├── __init__.py ├── util ├── __init__.py ├── preprocess.py ├── util.py └── input_helpers.py ├── Sentence_Modeling ├── __init__.py ├── test.py ├── test_mask.py ├── Siamese_network.py └── Sentence_Model.py ├── word2vec_pretrain ├── __init__.py ├── visualization.py └── preparation.py ├── README.md ├── plot_figures.py ├── visualization.py ├── coorrence.py ├── test_Model.py ├── helper.py ├── DeepModel.py ├── reload_model.py ├── tensor_construction.py ├── MultiGran_Model.py ├── tensor.py ├── Dynamic ├── MT_Dynamic_MultiGranModel.py └── MT_Dynamic_Arch.py ├── train_test_idf.py ├── train.py ├── MultiTask_MultiGranModel.py └── Multi_task_Arch.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Sentence_Modeling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /word2vec_pretrain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DLDisambiguation 2 | ## A multi-task normalization model with multi-view CNN 3 | 4 | ## The goal is do medical concept normalization and link medical short-texts into entities in ICD-10. 5 | -------------------------------------------------------------------------------- /plot_figures.py: -------------------------------------------------------------------------------- 1 | import pylab as pl 2 | 3 | x1 = [1, 2, 3, 4, 5, 6]# Make x, y arrays for each graph 4 | x = [0.001, 0.01, 0.1, 1, 10, 100]# Make x, y arrays for each graph 5 | y1 = [0.8487, 0.8562, 0.8578, 0.8581, 0.8255, 0.7031] 6 | x2 = x1 7 | y2 = [0.9116, 0.9148, 0.9101, 0.9081, 0.8992, 0.7140] 8 | 9 | import matplotlib.pyplot as plt 10 | 11 | ax = plt.subplot(111, xlabel='lambda', ylabel='F1 score') 12 | for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): 13 | item.set_fontsize(15) 14 | 15 | plot1, = ax.plot(x1, y1, 'rs-', label='Disease')# use pylab to plot x and y : Give your plots names 16 | plot2, = ax.plot(x2, y2, 'b^-', label='Procedure') 17 | 18 | # pl.title('MTL performance with different lambda of constraints')# give plot a title 19 | pl.xticks(x1, x, rotation=0) 20 | 21 | pl.legend(handles=[plot1, plot2], numpoints=1, fontsize=15)# make legend 22 | pl.show()# show the plot on the screen 23 | -------------------------------------------------------------------------------- /util/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import re 6 | import numpy as np 7 | from tensorflow.contrib import learn # pylint: disable=g-bad-import-order 8 | 9 | TOKENIZER_RE = re.compile(r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", 10 | re.UNICODE) 11 | 12 | def tokenizer(iterator): 13 | for value in iterator: 14 | yield list(value) 15 | 16 | 17 | class MyVocabularyProcessor(learn.preprocessing.VocabularyProcessor): 18 | def __init__(self, 19 | max_document_length, 20 | min_frequency=0, 21 | vocabulary=None, 22 | tokenizer_fn=tokenizer): 23 | self.sup = super(MyVocabularyProcessor, self) 24 | self.sup.__init__(max_document_length, min_frequency, vocabulary, tokenizer_fn) 25 | 26 | def transform(self, raw_documents): 27 | """Transform documents to word-id matrix. 28 | Convert words to ids with vocabulary fitted with fit or the one 29 | provided in the constructor. 30 | Args: 31 | raw_documents: An iterable which yield either str or unicode. 32 | Yields: 33 | x: iterable, [n_samples, max_document_length]. Word-id matrix. 34 | """ 35 | for tokens in self._tokenizer(raw_documents): 36 | word_ids = np.zeros(self.max_document_length, np.int64) 37 | for idx, token in enumerate(tokens): 38 | if idx >= self.max_document_length: 39 | break 40 | word_ids[idx] = self.vocabulary_.get(token) 41 | yield word_ids 42 | -------------------------------------------------------------------------------- /word2vec_pretrain/visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import sys 5 | reload(sys) 6 | sys.setdefaultencoding('utf-8') 7 | 8 | from matplotlib import rc 9 | rc('font',**{'family':'sans-serif','sans-serif':['AR PL KaitiM GB']}) 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn.manifold import TSNE 14 | from gensim.models import Word2Vec 15 | 16 | def checkSimilarity(model_file, word): 17 | 18 | model = Word2Vec.load(model_file) 19 | arr = model.most_similar([word]) 20 | for x in arr: 21 | print(str(x[0] + ":" + str(x[1]))) 22 | 23 | def main(): 24 | # model_file = "../data/word2vec/character.model" 25 | model_file = "../data/word2vec_new/word.model" 26 | checkSimilarity(model_file, "左") 27 | 28 | # character_wv_file = '../data/word2vec/character_model.txt' 29 | # word_wv_file = '../data/word2vec/word_model.txt' 30 | # 31 | # embeddings_file = word_wv_file 32 | # wv, vocabulary = load_embeddings(embeddings_file) 33 | # 34 | # tsne = TSNE(n_components=2, random_state=0) 35 | # np.set_printoptions(suppress=True) 36 | # Y = tsne.fit_transform(wv[:1000, :]) 37 | # 38 | # plt.scatter(Y[:, 0], Y[:, 1]) 39 | # for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]): 40 | # plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points') 41 | # plt.show() 42 | 43 | 44 | def load_embeddings(file_name): 45 | 46 | # with codecs.open(file_name, 'r', 'utf-8') as f_in: 47 | lines = open(file_name).readlines()[1:] 48 | vocabulary, wv = zip(*[line.strip().split(' ', 1) for line in lines]) 49 | 50 | wv = np.loadtxt(wv) 51 | return wv, vocabulary 52 | 53 | if __name__ == '__main__': 54 | main() -------------------------------------------------------------------------------- /Sentence_Modeling/test.py: -------------------------------------------------------------------------------- 1 | from DLDisambiguation.util.input_helpers import InputHelper 2 | from DLDisambiguation.util.preprocess import MyVocabularyProcessor 3 | import tensorflow as tf 4 | import os 5 | 6 | # tf.flags.DEFINE_string("test_file", "../data/validation_data_0724_opr.txt", "training file (default: None)") 7 | tf.flags.DEFINE_string("test_file", "../data/test_data_0816_des.txt", "training file (default: None)") 8 | 9 | FLAGS = tf.flags.FLAGS 10 | inpH = InputHelper() 11 | max_document_length = 10 12 | y_is_value = True 13 | 14 | model_dir = "./Exp/runs/Description1502955472" 15 | # model_dir = "./runs/Description1500991322" # 0.760 16 | # model_dir = "./runs/1500428748" # 0.69 17 | # model_dir = "./runs/Description1500983617" # 0.767 18 | # model_dir = "./runs/Description1501058401" # 0.754 19 | # model_dir = "./runs/Operation1501000120" # 0.809 20 | 21 | checkpoint_dir = os.path.join(model_dir, "checkpoints") 22 | print(checkpoint_dir) 23 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 24 | checkpoint_file = ckpt.model_checkpoint_path 25 | 26 | vocab_file = os.path.join(checkpoint_dir, "vocab") 27 | # load vocabulary model 28 | vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 29 | vocab_processor = vocab_processor.restore(vocab_file) 30 | 31 | test_x1, test_x2, test_y = inpH.getTestIndexedDataSet(FLAGS.test_file, "\t", vocab_processor, max_document_length, 32 | y_is_value) 33 | 34 | graph = tf.Graph() 35 | 36 | with graph.as_default(): 37 | sess = tf.Session() 38 | with sess.as_default(): 39 | # Load the saved meta graph and restore variables 40 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 41 | sess.run(tf.initialize_all_variables()) 42 | saver.restore(sess, checkpoint_file) 43 | 44 | # Get the placeholders from the graph by name 45 | # the output is a list with only one element 46 | input_x1 = graph.get_operation_by_name("input_x1").outputs[0] 47 | input_x2 = graph.get_operation_by_name("input_x2").outputs[0] 48 | input_y = graph.get_operation_by_name("input_y").outputs[0] 49 | 50 | accuracy_o = graph.get_operation_by_name("accuracy/accuracy").outputs[0] 51 | 52 | accu = sess.run([accuracy_o], {input_x1: test_x1, input_x2: test_x2, input_y: test_y}) 53 | print(accu) 54 | -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from matplotlib.font_manager import FontManager, FontProperties 4 | import matplotlib.pyplot as plt 5 | 6 | plt.rcParams['font.sans-serif'] = ['SimHei'] 7 | from util.input_helpers import InputHelper 8 | 9 | max_document_length = 20 10 | task_num = 2 11 | name = "des" if task_num == 1 else "opr" 12 | 13 | arr = np.load( 14 | "/Users/luoyi/Documents/Python/DLDisambiguation/Tensor_files/0823/Length" + str( 15 | max_document_length) + "/test_" + name + ".npy") 16 | 17 | inpH = InputHelper() 18 | 19 | data_file_test = "/Users/luoyi/Documents/Python/DLDisambiguation/data/test_data_0823_" + name + ".txt" 20 | x_test_mention, x_test_entity, y_test = inpH.getTsvTestData(data_file_test, "\t", max_document_length, y_value=False) 21 | 22 | sample_n = 1 23 | dir_ = "./fig/" 24 | 25 | 26 | # sample_n = len(arr) 27 | 28 | def getChineseFont(): 29 | return FontProperties(fname='/Users/luoyi/Downloads/msyh.ttf') 30 | 31 | 32 | sample_index = 352 33 | row_n, col_n = 1, 4 34 | plt.subplots(row_n, col_n, figsize=(20, 10)) 35 | 36 | for j in range(col_n): 37 | t = arr[sample_index, :, :, j] 38 | # x = t 39 | # t = (x - np.min(x)) / (np.max(x) - np.min(x)) 40 | # t = np.abs(t) 41 | plt.subplot(row_n, col_n, j + 1) 42 | plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.magma) 43 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.bone) 44 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.hsv) 45 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.prism) 46 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.flag) 47 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.autumn) 48 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.cool) 49 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.jet) 50 | # plt.imshow(t, vmin=-1, vmax=1, interpolation='nearest', cmap=plt.get_cmap('gray')) 51 | 52 | plt.title(str(j)) 53 | 54 | # plt.ylabel(unicode(x_test_mention[sample_index]), fontproperties=getChineseFont()) 55 | # plt.xlabel(unicode(x_test_entity[sample_index]), fontproperties=getChineseFont()) 56 | 57 | # plt.show() 58 | plt.colorbar(fraction=0.046, pad=0.04) 59 | plt.savefig(dir_ + "outfile" + str(sample_index) + name + "_magma.jpg") 60 | -------------------------------------------------------------------------------- /coorrence.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # # discover relationships between disease and procedure from labeled_data and save in coorrence_file 5 | 6 | file_des_opr = open("./data/exp0823/filter_pos_data_review_final.txt") 7 | res = open("coorrence_file.txt", "w+") 8 | 9 | entity_names = [] 10 | map_dict = [] 11 | 12 | line = file_des_opr.readline() 13 | while line != "": 14 | tmp = line.split("\t") 15 | des_e_name = tmp[1].strip() 16 | opr_e_name = tmp[3].strip() 17 | 18 | if entity_names.__contains__(des_e_name): 19 | i = entity_names.index(des_e_name) 20 | if opr_e_name in map_dict[i].keys(): 21 | map_dict[i][opr_e_name] += 1 22 | else: 23 | map_dict[i][opr_e_name] = 1 24 | else: 25 | entity_names.append(des_e_name) 26 | map_dict.append({opr_e_name: 1}) 27 | line = file_des_opr.readline() 28 | 29 | length = len(entity_names) 30 | for i in range(length): 31 | res.write(entity_names[i] + "\t") 32 | map_dict_des = map_dict[i] 33 | for k, v in map_dict_des.items(): 34 | res.write(k + ":" + str(v) + "_") 35 | res.write("\n") 36 | 37 | # # discover relationships between disease and procedure from Database and save in new_co_file.file 38 | 39 | # # !/usr/bin/env python 40 | # # coding=utf-8 41 | # import MySQLdb 42 | # import codecs 43 | 44 | # conn = MySQLdb.connect("localhost", "root", "10081008", "medical", charset='utf8') 45 | # cursor = conn.cursor() 46 | # cursor.execute('select S050100, S050501 from d2014_2015 where S050100 != "" and S050501 != "" limit 10000000;') 47 | # values = cursor.fetchall() 48 | # print("Finished data loading...") 49 | # 50 | # cursor.execute('select 疾病名称 from Norm6;') 51 | # disease_tuple = cursor.fetchall() 52 | # disease_list = [i[0] for i in disease_tuple] 53 | # 54 | # cursor.execute('select 手术名称 from Treatment;') 55 | # operation_tuple = cursor.fetchall() 56 | # operation_list = [i[0] for i in operation_tuple] 57 | # print("Finished Disease and Operation Names loading...") 58 | # 59 | # co_file = codecs.open("./data/new_co_file.txt", "w+", encoding="utf-8") 60 | # map_dict = {} 61 | # for i in values: 62 | # d_name = i[0] 63 | # o_name = i[1] 64 | # if d_name in disease_list and o_name in operation_list: 65 | # if d_name in map_dict.keys(): 66 | # o_dict = map_dict[d_name] 67 | # if o_name in o_dict.keys(): 68 | # map_dict[d_name][o_name] += 1 69 | # else: 70 | # map_dict[d_name][o_name] = 1 71 | # else: 72 | # map_dict[d_name] = {o_name: 1} 73 | # 74 | # for k, v in map_dict.iteritems(): 75 | # co_file.write(k + "\t") 76 | # for o_name, num in v.iteritems(): 77 | # co_file.write(o_name + ":"+str(num) + "_") 78 | # co_file.write("\n") 79 | # co_file.close() 80 | -------------------------------------------------------------------------------- /test_Model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from DLDisambiguation.util.input_helpers import InputHelper 4 | from tensor import Tensor 5 | import numpy as np 6 | from util.util import write_evaluation_file 7 | 8 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 9 | 10 | FLAGS = tf.flags.FLAGS 11 | FLAGS._parse_flags() 12 | 13 | task_num = 2 14 | inpH = InputHelper() 15 | max_document_length = 20 16 | 17 | name = "des" if task_num == 1 else "opr" 18 | 19 | # load in model 20 | model_dir = "./runs/Single_task21501595265" 21 | checkpoint_dir = os.path.join(model_dir, "checkpoints") 22 | print(checkpoint_dir) 23 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 24 | checkpoint_file = ckpt.model_checkpoint_path 25 | 26 | lstm_dir = "Description1500991322" if task_num == 1 else "Operation1501000120" 27 | lstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir) 28 | 29 | # load data 30 | load_Tensor = True 31 | 32 | data_file = os.path.join(FLAGS.train_dir, "data/training_data_0724_" + name + ".txt") 33 | data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_0724_" + name + ".txt") 34 | data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_0724_" + name + ".txt") 35 | 36 | x_test_mention, x_test_entity, y_test = inpH.getTsvTestData(data_file_test, "\t", max_document_length, y_value=False) 37 | 38 | if load_Tensor: 39 | mydir = "./Length" + str(max_document_length) + "/" 40 | x_test_tensor = np.load(mydir + "test_" + name + ".npy") 41 | 42 | else: 43 | x_test_tensor = Tensor(x_test_mention, x_test_entity, len(x_test_entity), max_document_length, task_num, 44 | lstm_dir).get_tensor() 45 | x_test_tensor = x_test_tensor.transpose((0, 2, 3, 1)) 46 | 47 | graph = tf.Graph() 48 | 49 | eval_file = open(os.path.join(model_dir, "test_analysis.txt"), "w+") 50 | rightfile = open(os.path.join(model_dir, "right_analysis.txt"), "w+") 51 | wrongfile = open(os.path.join(model_dir, "wrong_analysis.txt"), "w+") 52 | 53 | with graph.as_default(): 54 | sess = tf.Session() 55 | 56 | with sess.as_default(): 57 | # Load the saved meta graph and restore variables 58 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 59 | sess.run(tf.initialize_all_variables()) 60 | saver.restore(sess, checkpoint_file) 61 | 62 | for var in tf.trainable_variables(): 63 | print(var.name) 64 | print(var) 65 | print(sess.run(var)) 66 | 67 | # Get the placeholders from the graph by name 68 | # the output is a list with only one element 69 | input_tensor = graph.get_operation_by_name("input_tensor").outputs[0] 70 | input_y = graph.get_operation_by_name("input_y").outputs[0] 71 | droppout = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 72 | 73 | predictions = graph.get_operation_by_name("output/predictions").outputs[0] 74 | accuracy = graph.get_operation_by_name("accuracy/accuracy").outputs[0] 75 | scores = graph.get_operation_by_name("output/scores").outputs[0] 76 | 77 | pre, accu, scores = sess.run( 78 | [predictions, accuracy, scores], {input_tensor: x_test_tensor, input_y: y_test, droppout: 1}) 79 | 80 | eval_file.write("Accu: " + str(accu) + "\n") 81 | length = len(x_test_mention) 82 | labels = np.argmax(y_test, 1) 83 | 84 | write_evaluation_file(eval_file, rightfile, wrongfile, labels, pre, x_test_mention, x_test_entity) 85 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | # combine segmented segments to complete ones 2 | # generate new_training_data_.txt 3 | 4 | # file = open("./data/" + name + "_data_0810.txt") 5 | # line = file.readline().strip() 6 | # new_f = open("./data/new_training_data_.txt", "w+") 7 | # 8 | # while line != "": 9 | # tmp = line.split("\t") 10 | # line = file.readline().strip() 11 | # unormalized = "".join(tmp[1].split(" ")) 12 | # normalized = "".join(tmp[2].split(" ")) 13 | # new_f.write(unormalized + "\t" + normalized + "\n") 14 | # 15 | # new_f.close() 16 | # file.close() 17 | # 18 | # file = open("./data/new_training_data_.txt", "r") 19 | # data1 = open("./data/training_data.txt", "w+") 20 | # data2 = open("./data/testing_data.txt", "w+") 21 | # 22 | # line = file.readline() 23 | # cnt = 0 24 | # while line != "": 25 | # if cnt < 15000: 26 | # data1.write(line) 27 | # elif cnt < 20000: 28 | # data2.write(line) 29 | # else: 30 | # break 31 | # line = file.readline() 32 | # cnt += 1 33 | # file.close() 34 | # data1.close() 35 | # data2.close() 36 | 37 | key_set = ["train", "test"] 38 | # key_set = ["training", "validation", "test"] 39 | 40 | for key in key_set: 41 | prex = "_data_0823" 42 | # prex = "_dynamic_data" 43 | file = open("./data/exp0823/data_augment_" + key + ".txt") 44 | line = file.readline().strip() 45 | new_f = open("./data/" + key + prex + "_des.txt", "w+") 46 | new_f_o = open("./data/" + key + prex + "_opr.txt", "w+") 47 | 48 | while line != "": 49 | tmp = line.split("\t") 50 | line = file.readline().strip() 51 | label = tmp[0] 52 | unormalized_d = tmp[1] 53 | normalized_d = tmp[2] 54 | new_f.write(label + "\t" + unormalized_d + "\t" + normalized_d + "\n") 55 | 56 | label2 = tmp[3] 57 | unormalized_o = tmp[4] 58 | normalized_o = tmp[5] 59 | new_f_o.write(label2 + "\t" + unormalized_o + "\t" + normalized_o + "\n") 60 | 61 | new_f.close() 62 | new_f_o.close() 63 | file.close() 64 | 65 | # # generate dynamic dataset 66 | # import random 67 | 68 | # name = "validation" 69 | # file_name = "./data/exp0803/" + name + "_data_0803.lpy.csv" 70 | # file_t = open(file_name) 71 | # line = file_t.readline() 72 | # 73 | # res_file = open("./data/exp0803/" + name + "_dynamic_data.txt", "w+") 74 | # cnt = 0 75 | # while line != "": 76 | # cnt += 1 77 | # if cnt > 80000: 78 | # break 79 | # res = line.split("\t") 80 | # random_n = random.random() 81 | # if random_n < 0.6: 82 | # res_file.write(line) 83 | # else: 84 | # res_file.write("\t".join(res[:3]) + "\n") 85 | # line = file_t.readline() 86 | # res_file.close() 87 | 88 | ########### Analyze results of models 89 | 90 | # file2 = open("./runs/Exp/Single_task11502361344/right_cases.txt") 91 | # file1 = open("./runs/Exp/Single_task11502361227/right_cases.txt") 92 | # 93 | # line = file1.readline() 94 | # arr1 = [] 95 | # while line != "": 96 | # arr1.append(line) 97 | # line = file1.readline() 98 | # line = file2.readline() 99 | # 100 | # arr2 = [] 101 | # while line != "": 102 | # if line in arr1: 103 | # arr1.remove(line) 104 | # arr2.append(line) 105 | # line = file2.readline() 106 | # 107 | # ans = open("ans.txt", "w+") 108 | # ans2 = open("ans_overlap.txt", "w+") 109 | # for i in arr1: 110 | # ans.write(i) 111 | # 112 | # for i in arr2: 113 | # ans2.write(i) 114 | # ans.close() 115 | # ans2.close() 116 | -------------------------------------------------------------------------------- /DeepModel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import tensorflow as tf 5 | 6 | # no use any more...*********************************** Change to MultiGran_Model.py 7 | class CNNModel(object): 8 | 9 | def __init__(self, max_len, filter_sizes, num_filters, l2_reg_lambda=0.0): 10 | channel_num = 4 11 | 12 | # Placeholders for input, output and dropout 13 | self.input_tensor = tf.placeholder(tf.float32, [None, max_len, max_len, 4], name="input_tensor") 14 | self.input_y = tf.placeholder(tf.float32, [None, 2], name="input_y") 15 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 16 | 17 | # Keeping track of l2 regularization loss (optional) 18 | l2_loss = tf.constant(0.0) 19 | 20 | # Create a convolution + maxpool layer for each filter size 21 | pooled_outputs = [] 22 | for i, filter_size in enumerate(filter_sizes): 23 | filter_shape = [filter_size, filter_size, channel_num, num_filters] 24 | 25 | with tf.name_scope("conv-maxpool-%s" % filter_size): 26 | # Convolution Layer 27 | W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") 28 | b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") 29 | conv = tf.nn.conv2d( 30 | self.input_tensor, 31 | W, 32 | strides=[1, 1, 1, 1], 33 | padding="VALID", 34 | name="conv") 35 | # Apply nonlinearity 36 | h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") 37 | 38 | # Maxpooling over the outputs 39 | pooled = tf.nn.max_pool( 40 | h, 41 | # ksize=[1, filter_size, filter_size, 1], 42 | ksize=[1, max_len - filter_size + 1, max_len - filter_size + 1, 1], 43 | # ksize=[1, max_len - filter_size + 1, 1, 1], 44 | strides=[1, 1, 1, 1], 45 | padding='VALID', 46 | name="pool") 47 | pooled_outputs.append(pooled) 48 | 49 | # Combine all the pooled features 50 | num_filters_total = num_filters * len(filter_sizes) 51 | self.h_pool = tf.concat(pooled_outputs, 3) # 128 52 | self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # 128 53 | 54 | # Add dropout 55 | with tf.name_scope("dropout"): 56 | self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob, name="hidden_output_drop") 57 | 58 | # Final (unnormalized) scores and predictions 59 | with tf.name_scope("output"): 60 | W = tf.get_variable( 61 | "W_output", 62 | shape=[num_filters_total, 2], 63 | initializer=tf.contrib.layers.xavier_initializer()) 64 | b = tf.Variable(tf.constant(0.1, shape=[2]), name="b") 65 | l2_loss += tf.nn.l2_loss(W) 66 | l2_loss += tf.nn.l2_loss(b) 67 | self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") 68 | self.predictions = tf.argmax(self.scores, 1, name="predictions") 69 | 70 | # Calculate Mean cross-entropy loss 71 | with tf.name_scope("loss"): 72 | losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) 73 | self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss 74 | 75 | # Accuracy 76 | with tf.name_scope("accuracy"): 77 | correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) 78 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") 79 | -------------------------------------------------------------------------------- /Sentence_Modeling/test_mask.py: -------------------------------------------------------------------------------- 1 | from DLDisambiguation.util.input_helpers import InputHelper 2 | from DLDisambiguation.util.preprocess import MyVocabularyProcessor 3 | import tensorflow as tf 4 | import os 5 | import numpy as np 6 | 7 | 8 | def writeToFile(m, file): 9 | l = len(m) 10 | for i in range(l): 11 | l_col = len(m[i]) 12 | file.write("[") 13 | for j in range(l_col): 14 | file.write(str(m[i][j]) + " ") 15 | file.write("]\n") 16 | file.write("\n") 17 | 18 | inpH = InputHelper() 19 | max_document_length = 10 20 | y_is_value = True 21 | 22 | 23 | def get_data(vocab_processor, inpH, train_x1, train_x2, train_y, max_document_length): 24 | train_x1_i = np.asarray(list(vocab_processor.transform(train_x1))) 25 | train_x2_i = np.asarray(list(vocab_processor.transform(train_x2))) 26 | 27 | mask_train_x1 = np.zeros([len(train_x1_i), max_document_length]) 28 | mask_train_x2 = np.zeros([len(train_x2_i), max_document_length]) 29 | 30 | new_mask_x1, new_mask_x2 = inpH.padding_and_generate_mask(train_x1, train_x2, mask_train_x1, mask_train_x2) 31 | return (train_x1_i, train_x2_i, new_mask_x1, new_mask_x2, train_y) 32 | 33 | 34 | model_dir = "./Exp/runs/Description1502868912" 35 | 36 | checkpoint_dir = os.path.join(model_dir, "checkpoints") 37 | print(checkpoint_dir) 38 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 39 | checkpoint_file = ckpt.model_checkpoint_path 40 | 41 | vocab_file = os.path.join(checkpoint_dir, "vocab") 42 | # load vocabulary model 43 | vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 44 | vocab_processor = vocab_processor.restore(vocab_file) 45 | 46 | task_num = 1 47 | name = "des" if task_num == 1 else "opr" 48 | test_file = "../data/test_data_0816_" + name + ".txt" 49 | test_x1, test_x2, test_y = inpH.getTsvTestData(test_file, "\t", max_document_length, y_is_value) 50 | 51 | test_set = get_data(vocab_processor, inpH, test_x1, test_x2, test_y, max_document_length) 52 | 53 | filename = model_dir + "/test_look.txt" 54 | file = open(filename, "w+") 55 | 56 | # param_f = open(model_dir + "/params.txt", "w+") 57 | graph = tf.Graph() 58 | 59 | with graph.as_default(): 60 | sess = tf.Session() 61 | with sess.as_default(): 62 | # Load the saved meta graph and restore variables 63 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 64 | sess.run(tf.initialize_all_variables()) 65 | saver.restore(sess, checkpoint_file) 66 | 67 | # for var in tf.trainable_variables(): 68 | # print(var.name) 69 | # print(var) 70 | # print(sess.run(var)) 71 | 72 | # Get the placeholders from the graph by name 73 | # the output is a list with only one element 74 | input_x1 = graph.get_operation_by_name("input_x1").outputs[0] 75 | mask_x1 = graph.get_operation_by_name("mask_x1").outputs[0] 76 | input_x2 = graph.get_operation_by_name("input_x2").outputs[0] 77 | mask_x2 = graph.get_operation_by_name("mask_x2").outputs[0] 78 | input_y = graph.get_operation_by_name("input_y").outputs[0] 79 | 80 | accuracy = graph.get_operation_by_name("accuracy/accuracy").outputs[0] 81 | r1 = graph.get_operation_by_name("sentence_embedding/Representation1").outputs[0] 82 | r2 = graph.get_operation_by_name("sentence_embedding/Representation2").outputs[0] 83 | 84 | accu, vr1, vr2 = sess.run([accuracy, r1, r2], {input_x1: test_set[0], mask_x1: test_set[2], 85 | input_x2: test_set[1], mask_x2: test_set[3], 86 | input_y: test_set[4]}) 87 | 88 | # r(input_size, None, hidden_n * 2) => (None, input_size, hidden_n * 2) 89 | representation1 = np.transpose(vr1, (1, 0, 2)) 90 | representation2 = np.transpose(vr2, (1, 0, 2)) 91 | 92 | test_x1 = list(test_x1) 93 | test_x2 = list(test_x2) 94 | test_y = list(test_y) 95 | sample_n = len(representation1) 96 | for i in range(sample_n): 97 | matrix1 = representation1[i] 98 | matrix2 = representation2[i] 99 | file.write("sample " + str(i) + "\n") 100 | file.write(str(test_x1[i]) + " " + str(test_x2[i]) + " " + str(test_y[i]) + "\n") 101 | writeToFile(matrix1, file) 102 | writeToFile(matrix2, file) 103 | 104 | file.write("\n") 105 | 106 | print(accu) 107 | -------------------------------------------------------------------------------- /reload_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from DLDisambiguation.util.input_helpers import InputHelper 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | def plot_conv(sess, t_vars, name): 9 | var_conv = [v for v in t_vars if name in v.name] 10 | W = var_conv[0] # [2, 2, 1, 8] 11 | W = sess.run(W) 12 | 13 | length = W.shape[-1] 14 | 15 | row_n = 2 if length == 8 else 4 16 | col_n = length / row_n 17 | plt.subplots(row_n, col_n) 18 | 19 | for i in range(length): 20 | axes = plt.subplot(row_n, col_n, i + 1) 21 | map = W[:, :, 0, i] 22 | plt.imshow(map,cmap=plt.cm.magma) 23 | # plt.imshow(map, cmap=plt.get_cmap('gray')) 24 | # plt.xlabel(i) 25 | axes.set_xticks([]) 26 | axes.set_yticks([]) 27 | # plt.colorbar(fraction=0.046, pad=0.04) 28 | plt.savefig(dir_ + "map" + name + ".jpg") 29 | 30 | 31 | def plot_activation(sample_index, k, row_n, col_n, conv, name): 32 | for i in range(col_n): 33 | k += 1 34 | m = plt.subplot(row_n, col_n, k) 35 | if i == col_n / 2: 36 | m.set_title(name) 37 | # plt.imshow(conv[sample_index, :, :, i], vmin=-1, vmax=1, interpolation='nearest', cmap=plt.cm.jet) 38 | plt.imshow(conv[sample_index, :, :, i], interpolation='nearest', cmap=plt.cm.magma) 39 | # plt.imshow(conv[sample_index, :, :, i], cmap=plt.cm.jet, aspect='auto') 40 | # plt.imshow(conv[sample_index, :, :, i], cmap=plt.get_cmap('gray'), aspect='auto') 41 | m.set_xticks([]) 42 | m.set_yticks([]) 43 | # plt.title(name) 44 | # plt.colorbar(fraction=0.046, pad=0.04) 45 | return k 46 | 47 | 48 | def plot_activations(conv1, conv2, conv3, conv4): 49 | length = conv1.shape[-1] 50 | row_n = 4 51 | col_n = length 52 | plt.subplots(row_n, col_n, figsize=(20, 10)) 53 | sample_idnex = 352 54 | 55 | k = 0 56 | k = plot_activation(sample_idnex, k, row_n, col_n, conv1, "Str") 57 | k = plot_activation(sample_idnex, k, row_n, col_n, conv2, "Character Embedding") 58 | k = plot_activation(sample_idnex, k, row_n, col_n, conv3, "Word Embedding") 59 | plot_activation(sample_idnex, k, row_n, col_n, conv4, "Sentence Embedding") 60 | 61 | # plt.colorbar() 62 | plt.savefig(dir_ + str(sample_idnex) + "activations_color" + ".jpg") 63 | 64 | 65 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 66 | FLAGS = tf.flags.FLAGS 67 | FLAGS._parse_flags() 68 | 69 | task_num = 1 70 | inpH = InputHelper() 71 | max_document_length = 10 72 | dir_ = "fig/" 73 | 74 | model_dir = "./runs/NewExp/Single_task11503543419" 75 | checkpoint_dir = os.path.join(model_dir, "checkpoints") 76 | print(checkpoint_dir) 77 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 78 | checkpoint_file = ckpt.model_checkpoint_path 79 | # 80 | # mydir = "./Length" + str(max_document_length) + "/" 81 | # x_test_tensor = np.load(mydir + "test_des" + ".npy") 82 | x_test_tensor = np.load("./Tensor_files/0823/Length10/test_des.npy") 83 | graph = tf.Graph() 84 | 85 | with graph.as_default(): 86 | sess = tf.Session() 87 | 88 | with sess.as_default(): 89 | # Load the saved meta graph and restore variables 90 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 91 | sess.run(tf.initialize_all_variables()) 92 | saver.restore(sess, checkpoint_file) 93 | 94 | # PLOT Conv Filters 95 | t_vars = tf.trainable_variables() 96 | for var in t_vars: 97 | print(var.name) 98 | print(var) 99 | print(sess.run(var)) 100 | plot_conv(sess, t_vars, "conv1") 101 | plot_conv(sess, t_vars, "conv1_1") 102 | plot_conv(sess, t_vars, "conv1_2") 103 | plot_conv(sess, t_vars, "conv1_3") 104 | 105 | input_tensor = graph.get_operation_by_name("input_tensor").outputs[0] 106 | droppout = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 107 | 108 | conv1 = graph.get_operation_by_name("conv1/conv1").outputs[0] 109 | conv2 = graph.get_operation_by_name("conv1_1/conv1").outputs[0] 110 | conv3 = graph.get_operation_by_name("conv1_2/conv1").outputs[0] 111 | conv4 = graph.get_operation_by_name("conv1_3/conv1").outputs[0] 112 | 113 | conv_layer1, conv_layer2, conv_layer3, conv_layer4 = sess.run([conv1, conv2, conv3, conv4], 114 | feed_dict={input_tensor: x_test_tensor}) 115 | plot_activations(conv_layer1, conv_layer2, conv_layer3, conv_layer4) 116 | -------------------------------------------------------------------------------- /word2vec_pretrain/preparation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from sklearn.feature_extraction.text import TfidfVectorizer 5 | import jieba 6 | import re 7 | import gensim 8 | import codecs 9 | import os 10 | from DLDisambiguation.util.util import preprocess_unit 11 | 12 | def generateProcessedCorpusFile(src_file_path, character_file_path, word_file_path): 13 | """ 14 | 载入语料,去掉NULL的无效数据, 15 | 分词用空格隔开每个词并生成word_src.txt 16 | 将每个字用空格隔开并生成character_src.txt 17 | :param src_file_path: 18 | :param character_file_path: 19 | :param word_file_path: 20 | :return: 21 | """ 22 | data = [] 23 | file = codecs.open(src_file_path, "r") 24 | line = file.readline() 25 | 26 | character_file = codecs.open(character_file_path, "w+", "utf-8") 27 | word_file = codecs.open(word_file_path, "w+", "utf-8") 28 | 29 | while line != "": 30 | d = line.strip().decode("utf-8") 31 | # 处理"无","NA"."NULL".""的情况 32 | if len(d) != 1 and d != "NA" and d != "NULL" and d != "": 33 | data.append(d) 34 | line = file.readline() 35 | 36 | for i in data: 37 | res = preprocess_unit(i) 38 | characters = list("".join(res)) 39 | if len(characters) == 0: 40 | continue 41 | character_file.write(" ".join(characters) + "\n") 42 | character_file.close() 43 | print("Finished character model!") 44 | 45 | for i in data: 46 | res = preprocess_unit(i) 47 | x = jieba.cut(res) 48 | words = list(x) 49 | if len(words) == 0: 50 | continue 51 | word_file.write(" ".join(words) + "\n") 52 | word_file.close() 53 | print("Finished Word model!") 54 | 55 | 56 | def get_IDF(corpus_file, idf_file): 57 | file = open(corpus_file) 58 | line = file.readline() 59 | counter = 0 60 | corpus = [] 61 | 62 | while line != "": 63 | counter += 1 64 | 65 | line = preprocess_unit(line) 66 | cut_words = list(jieba.cut(line)) 67 | cut_words_valid = [i for i in cut_words if re.sub("\w+", "", i) != ""] 68 | content = " ".join(cut_words_valid) 69 | corpus.append(content) 70 | line = file.readline() 71 | file.close() 72 | 73 | # compute idf model 74 | vectorizer = TfidfVectorizer( 75 | use_idf=True, 76 | norm=None, 77 | smooth_idf=False, # idf = ln(N+1 / ni+1) 78 | sublinear_tf=False, # tf = 1+ln(tf) 79 | binary=False, 80 | min_df=1, max_df=1.0, max_features=None, 81 | strip_accents='unicode', 82 | ngram_range=(1, 1), preprocessor=None, stop_words=None, tokenizer=None, vocabulary=None 83 | ) 84 | X = vectorizer.fit_transform(corpus) 85 | idf = vectorizer.idf_ 86 | name_idf_dict = dict(zip(vectorizer.get_feature_names(), idf)) 87 | 88 | # write IDF_Model 89 | idf_file = open(idf_file, "w+") 90 | for k, v in name_idf_dict.items(): 91 | idf_file.write(k.encode("utf-8") + ":" + str(v) + "\n") 92 | idf_file.close() 93 | print("IDF_Model Finished!") 94 | 95 | 96 | class MySentences(object): 97 | def __init__(self, fname): 98 | self.fname = fname 99 | 100 | def __iter__(self): 101 | for line in open(self.fname): 102 | yield line.split() 103 | 104 | 105 | def checkPath(x): 106 | if not os.path.exists(x): 107 | open(x, 'a').close() 108 | 109 | 110 | def compute_word2vec(embedding_dir, type): 111 | load_model_flag = False 112 | src_file = embedding_dir + type + "_src.txt" 113 | model_file = embedding_dir + type + '.model' 114 | wv_file = embedding_dir + type + 'character_model.txt' 115 | 116 | if not load_model_flag: 117 | sentences = MySentences(src_file) # a memory-friendly iterator 118 | model = gensim.models.Word2Vec(sentences) 119 | 120 | checkPath(model_file) 121 | checkPath(wv_file) 122 | 123 | model.save(model_file) # save model 124 | model.wv.save_word2vec_format(wv_file, binary=False) # save word2vec txt 125 | print("Word2vec model finished!\n") 126 | else: 127 | model = gensim.models.Word2Vec.load(model_file) # load in model 128 | 129 | 130 | if __name__ == "__main__": 131 | 132 | corpus_file = "../data/d2013_operation.txt" 133 | idf_model = "../data/idfModel_operation.txt" 134 | 135 | # corpus_file = "../data/db_description_d2013.txt" 136 | # idf_model = "../data/idfModel.txt" 137 | 138 | task_num = 1 139 | if task_num == 1: 140 | embedding_dir = "../data/word2vec_new/" 141 | else: 142 | embedding_dir = "../data/operation/" 143 | 144 | # generate IDF Model 145 | get_IDF(corpus_file, idf_model) 146 | 147 | # produce character_src_file and word_src_file from corpus file 148 | # generateProcessedCorpusFile(corpus_file, character_src_file, word_src_file) 149 | 150 | # generate word2vec for characters and words 151 | # compute_word2vec(embedding_dir, "character") 152 | # compute_word2vec(embedding_dir, "word") -------------------------------------------------------------------------------- /tensor_construction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import os 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from DLDisambiguation.util.input_helpers import InputHelper 9 | from tensor import Tensor 10 | 11 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 12 | tf.flags.DEFINE_string("max_sequence_len", 10, "length") 13 | tf.flags.DEFINE_string("max_sequence_len2", 20, "length") 14 | FLAGS = tf.flags.FLAGS 15 | FLAGS._parse_flags() 16 | 17 | 18 | def generate_Tensor(mention, entity, mention2, entity2, mention3, entity3, max_len, task_n): 19 | lstm_dir = "Description1502868912" if task_n == 1 else "Operation1502954903" # 0810数据集 10 + 20 20 | # lstm_dir = "Description1501554142" if task_n == 1 else "Operation1501588184" # 旧数据集 8万 LEN = 10 21 | bilstm_dir = os.path.join("./Sentence_Modeling/Exp/runs", lstm_dir) 22 | 23 | men_arr = np.concatenate((mention, mention2, mention3)) 24 | entity_arr = np.concatenate((entity, entity2, entity3)) 25 | tensor = Tensor(men_arr, entity_arr, len(men_arr), max_len, task_n, bilstm_dir).get_tensor() 26 | tensor = tensor.transpose((0, 2, 3, 1)) 27 | 28 | g1 = len(mention) 29 | g2 = len(np.concatenate((mention, mention2))) 30 | return tensor[:g1], tensor[g1:g2], tensor[g2:] 31 | 32 | 33 | def generate_Tensor_no_dev(mention, entity, mention3, entity3, max_len, task_n): 34 | lstm_dir = "Description1503482587" if task_n == 1 else "Operation1503500979" # 0823 35 | # lstm_dir = "Description1503276064" if task_n == 1 else "Operation1503277387" # 0816 36 | # lstm_dir = "Description1503227493" if task_n == 1 else "Operation1502964494" # 37 | # lstm_dir = "Description1502964352" if task_n == 1 else "Operation1502964494" # 0810数据集 10 + 20 38 | print lstm_dir 39 | bilstm_dir = os.path.join("./Sentence_Modeling/Exp0823/runs", lstm_dir) 40 | 41 | men_arr = np.concatenate((mention, mention3)) 42 | entity_arr = np.concatenate((entity, entity3)) 43 | tensor = Tensor(men_arr, entity_arr, len(men_arr), max_len, task_n, bilstm_dir).get_tensor() 44 | tensor = tensor.transpose((0, 2, 3, 1)) 45 | 46 | g1 = len(mention) 47 | return tensor[:g1], tensor[g1:] 48 | 49 | 50 | def prepara_dynamic_tensor(inputH, training_path, dev_path, test_path, max_len, max_len2): 51 | indi1, x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels_Dyna( 52 | training_path, "\t", max_len) 53 | indi2, x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels_Dyna(dev_path, "\t", 54 | max_len) 55 | indi3, x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels_Dyna(test_path, "\t", 56 | max_len) 57 | 58 | print("Finished Loading") 59 | x_train_tensor, x_dev_tensor, x_test_tensor = generate_Tensor(x1_train, x2_train, x1_dev, x2_dev, x1_test, x2_test, 60 | max_len, 1) 61 | print("Finished constructing tensors!") 62 | 63 | dir_t = "./0816/" 64 | np.save(dir_t + "train_des", x_train_tensor) 65 | np.save(dir_t + "dev_des", x_dev_tensor) 66 | np.save(dir_t + "test_des", x_test_tensor) 67 | print("Save description tensors!") 68 | 69 | x_train_tensor_o, x_dev_tensor_o, x_test_tensor_o = generate_Tensor(x3_train, x4_train, x3_dev, x4_dev, x3_test, 70 | x4_test, max_len2, 2) 71 | 72 | np.save(dir_t + "train_opr", x_train_tensor_o) 73 | np.save(dir_t + "dev_opr", x_dev_tensor_o) 74 | np.save(dir_t + "test_opr", x_test_tensor_o) 75 | 76 | np.save(dir_t + "train_indi_opr", indi1) 77 | np.save(dir_t + "dev_indi_opr", indi2) 78 | np.save(dir_t + "test_indi_opr", indi3) 79 | 80 | def prepara_tensor_y_seperate(inputH, data_file, data_file_val, data_file_test, sep, max_len, name, task_num): 81 | x_train_mention, x_train_entity, y_train = inputH.getTsvTestData(data_file, sep, max_len, y_value=False) 82 | # x_dev_mention, x_dev_entity, y_dev = inputH.getTsvTestData(data_file_val, sep, max_len, y_value=False) 83 | x_test_mention, x_test_entity, y_test = inputH.getTsvTestData(data_file_test, sep, max_len, y_value=False) 84 | 85 | print("Finished Loading") 86 | # x_train_tensor, x_dev_tensor, x_test_tensor = generate_Tensor(x_train_mention, x_train_entity, x_dev_mention, 87 | # x_dev_entity, x_test_mention, x_test_entity, 88 | # max_len, task_num) 89 | x_train_tensor, x_test_tensor = generate_Tensor_no_dev(x_train_mention, x_train_entity, 90 | x_test_mention, x_test_entity, 91 | max_len, task_num) 92 | print("Finished constructing tensors!") 93 | 94 | print("Length") 95 | print(len(x_train_mention)) 96 | mydir = "Tensor_files/0823/No_IDF/Length" + str(max_len) + "/" 97 | np.save(mydir + "train_" + name, x_train_tensor) 98 | # np.save(mydir + "dev_" + name, x_dev_tensor) 99 | np.save(mydir + "test_" + name, x_test_tensor) 100 | print("Save tensors!") 101 | 102 | 103 | def main(): 104 | # Load data 105 | print("Loading data...") 106 | inputH = InputHelper() 107 | 108 | task_num = 1 109 | name = "des" if task_num == 1 else "opr" 110 | 111 | # train_f = "./data/exp0803/training_dynamic_data.txt" 112 | # dev_f = "./data/exp0803/validation_dynamic_data.txt" 113 | # test_f = "./data/exp0803/test_dynamic_data.txt" 114 | # prepara_dynamic_tensor(inputH, train_f, dev_f, test_f, FLAGS.max_sequence_len, FLAGS.max_sequence_len2) 115 | 116 | time_gen = "0823" 117 | data_file = os.path.join(FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt") 118 | data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt") 119 | data_file_val = data_file_test 120 | # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt") 121 | prepara_tensor_y_seperate(inputH, data_file, data_file_val, data_file_test, "\t", FLAGS.max_sequence_len, name, 122 | task_num) 123 | 124 | 125 | if __name__ == '__main__': 126 | main() 127 | -------------------------------------------------------------------------------- /util/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | import re 4 | import numpy as np 5 | 6 | 7 | def write_evaluation_file(eval_file, right_output_file, wrong_output_file, labels, predictions, mentions, entities, 8 | indi=None): 9 | tp, fp, tn, fn = 0, 0, 0, 0 10 | length = len(predictions) 11 | right_output_file.write("mention\tentity\tprediction\tlabel\n") 12 | wrong_output_file.write("mention\tentity\tprediction\tlabel\n") 13 | 14 | for i in range(length): 15 | if indi != None and indi[i] == 0: 16 | continue 17 | str_m = mentions[i] + "\t" + entities[i] + "\t" + str(predictions[i]) + "\t" + str(labels[i]) + "\n" 18 | if predictions[i] == 1 and labels[i] == 1: 19 | tp += 1.0 20 | right_output_file.write(str_m) 21 | elif predictions[i] == 1 and labels[i] == 0: 22 | fn += 1.0 23 | wrong_output_file.write(str_m) 24 | elif predictions[i] == 0 and labels[i] == 1: 25 | fp += 1.0 26 | wrong_output_file.write(str_m) 27 | else: 28 | tn += 1.0 29 | right_output_file.write(str_m) 30 | 31 | eval_file.write("True positive: " + str(tp) + "\n") 32 | eval_file.write("False positive: " + str(fp) + "\n") 33 | eval_file.write("True negative: " + str(tn) + "\n") 34 | eval_file.write("False negative: " + str(fn) + "\n") 35 | 36 | precision = tp / (tp + fp) 37 | recall = tp / (tp + fn) if tp + fn != 0.0 else 0.0001 38 | f1 = 2 * precision * recall / (recall + precision) 39 | 40 | eval_file.write("Precision:" + str(precision) + "\n") 41 | eval_file.write("Recall:" + str(recall) + "\n") 42 | eval_file.write("F1:" + str(f1) + "\n\n") 43 | 44 | 45 | def write_evaluation_file_multi(eval_file, right_output_file, wrong_output_file, labels, predictions, labels2, 46 | predictions2, mentions1, entities1, mentions2, entities2): 47 | tp, fp, tn, fn = 0, 0, 0, 0 48 | length = len(predictions) 49 | right_output_file.write("mention_description\tentity_description\tprediction_description\tlabel" 50 | "\tmention_operation\tentity_operation\tprediction_operation\tlabel \n") 51 | wrong_output_file.write("mention_description\tentity_description\tprediction_description\tlabel" 52 | "\tmention_operation\tentity_operation\tprediction_operation\tlabel \n") 53 | 54 | for i in range(length): 55 | str_m = mentions1[i] + "\t" + entities1[i] + "\t" + str(predictions[i]) + "\t" + str(labels[i]) + "\t" + \ 56 | mentions2[i] + "\t" + entities2[i] + "\t" + str(predictions2[i]) + "\t" + str(labels2[i]) + "\n" 57 | if predictions[i] == 1 and labels[i] == 1: 58 | tp += 1.0 59 | right_output_file.write(str_m) 60 | elif predictions[i] == 1 and labels[i] == 0: 61 | fn += 1.0 62 | wrong_output_file.write(str_m) 63 | elif predictions[i] == 0 and labels[i] == 1: 64 | fp += 1.0 65 | wrong_output_file.write(str_m) 66 | else: 67 | tn += 1.0 68 | right_output_file.write(str_m) 69 | 70 | eval_file.write("True positive: " + str(tp) + "\n") 71 | eval_file.write("False positive: " + str(fp) + "\n") 72 | eval_file.write("True negative: " + str(tn) + "\n") 73 | eval_file.write("False negative: " + str(fn) + "\n") 74 | 75 | precision = tp / (tp + fp) 76 | recall = tp / (tp + fn) 77 | f1 = 2 * precision * recall / (recall + precision) 78 | 79 | eval_file.write("Precision:" + str(precision) + "\n") 80 | eval_file.write("Recall:" + str(recall) + "\n") 81 | eval_file.write("F1:" + str(f1) + "\n") 82 | 83 | 84 | def preprocess_unit(str): 85 | res_0 = re.sub(ur"\w+", '', str) 86 | res_0 = re.sub(ur"[-( )\( \), \.;;、:° \s+ \*\[ \] \+ ?? \,]", '', res_0) 87 | 88 | # res_0 = str.replace(' ', '') 89 | # res_0 = re.sub(ur"\u3000", '', res_0) # 将中文的空格用英文空格代替,后面可以处理 90 | 91 | # res_0 = re.sub(ur"\[?[(]?\w+[.]?\w+\]?[)]?$", '', res_0) # 去除掉ICD编码 eg:I20.222 92 | # 93 | # res_0 = re.sub(r"\w\d+.\d+", '', res_0) # 去除掉ICD编码 eg:I20.222 94 | # res_0 = re.sub(r"\w\d+.?x+\d+$", '', res_0) # 去除掉尾部的编码 eg:I20.x222 95 | # 96 | # res_0 = re.sub(r"\s\w+", "", res_0) # 去掉空格后的字母,eg: 心肌梗塞急性 NOS 97 | # res_0 = re.sub(ur"\[\w+\]", "", res_0).strip() # 去掉括号中的字母解释,eg: [NSSMD] 98 | # res_0 = re.sub(ur"(\w+)", "", res_0).strip() # 去掉括号中的字母解释,eg: (NSSMD) 99 | # res_0 = re.sub(ur"\(\w+\)", "", res_0).strip() # 去掉括号中的字母解释,eg: (NSSMD) 100 | # 101 | # res = re.split(ur"[( )\( \), \.;;、:° \s+ \*\[ \] \+ ?? \,]", res_0) 102 | # res = filter(lambda x: len(x) != 1 and len(x) != 0, res) 103 | # 104 | # return "".join(res) 105 | return res_0 106 | 107 | 108 | def preprocess_arr(arr): 109 | res = [] 110 | for i in arr: 111 | res.append(preprocess_unit(i.decode("utf-8"))) 112 | return res 113 | 114 | 115 | def loadIDFModel(file_path): 116 | file = open(file_path) 117 | idfModel = dict() 118 | for line in file.readlines(): 119 | segs = line.strip().split(":") 120 | name, idf = segs[0].decode("utf-8"), float(segs[1]) 121 | idfModel[name] = idf 122 | return idfModel 123 | 124 | 125 | def load_data(path): 126 | file = open(path) 127 | line = file.readline() 128 | res = [] 129 | y = [] 130 | 131 | while line != "": 132 | tmp = line.split(",") 133 | res.append([tmp[0], tmp[1]]) 134 | y.append(tmp[2]) 135 | line = file.readline() 136 | file.close() 137 | return res, y 138 | 139 | 140 | def loadWord2Vec(filename): 141 | vocab = [] 142 | embd = [] 143 | file = open(filename, 'r') 144 | line = file.readline() 145 | 146 | while line != "": 147 | line = file.readline() # jump the first line 148 | if line == "": 149 | break 150 | row = line.strip().split(' ') 151 | vocab.append(row[0]) 152 | embedding_float = [float(i) for i in row[1:]] 153 | embd.append(embedding_float) 154 | print('Loaded Word2vec model!') 155 | file.close() 156 | return vocab, embd 157 | 158 | 159 | def getEmbedding(filename): 160 | vocab, embd = loadWord2Vec(filename) 161 | 162 | # add unknown symbol 163 | vocab.append("") 164 | embd.append([0.0] * 100) 165 | 166 | vocab_size = len(vocab) 167 | embedding_dim = len(embd[0]) 168 | embedding = np.asarray(embd) 169 | return vocab, vocab_size, embedding_dim, embedding 170 | -------------------------------------------------------------------------------- /MultiGran_Model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | class MultiGranModel(object): 8 | def _conv(self, name, in_, ksize, reuse=False): 9 | num_filters = ksize[3] 10 | 11 | with tf.variable_scope(name, reuse=reuse) as scope: 12 | # different CNN for different views 13 | # W = tf.get_variable("weights", ksize, initializer=tf.contrib.layers.xavier_initializer()) 14 | W = tf.Variable(tf.truncated_normal(ksize, stddev=0.1), name="W") 15 | biases = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") 16 | 17 | # same CNN for different views 18 | # W = tf.get_variable("weights", ksize, initializer=tf.contrib.layers.xavier_initializer()) 19 | # W = tf.get_variable("weights", ksize, initializer=tf.truncated_normal_initializer(stddev=0.1)) 20 | # biases = tf.get_variable("biases", [num_filters], initializer=tf.constant_initializer(0.1)) 21 | 22 | conv = tf.nn.conv2d(in_, W, strides=[1, 1, 1, 1], padding="VALID") 23 | h = tf.nn.relu(tf.nn.bias_add(conv, biases), name=scope.name) 24 | 25 | return h 26 | 27 | def _maxpool(self, name, in_, ksize, strides): 28 | pool = tf.nn.max_pool(in_, ksize=ksize, strides=strides, padding='VALID', name=name) 29 | print name, pool.get_shape().as_list() 30 | return pool 31 | 32 | def __init__(self, max_len, filter_sizes, pool_sizes, num_filters, l2_reg_lambda=0.0, type_CNN=2): 33 | channel_num = 4 34 | 35 | # Placeholders for input, output and dropout 36 | self.input_tensor = tf.placeholder(tf.float32, [None, max_len, max_len, channel_num], name="input_tensor") 37 | self.input_y = tf.placeholder(tf.float32, [None, 2], name="input_y") 38 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 39 | 40 | # Keeping track of l2 regularization loss (optional) 41 | l2_loss = tf.constant(0.0) 42 | 43 | # Create a convolution + maxpool layer for each filter size 44 | pooled_outputs = [] 45 | 46 | input_tensor = tf.expand_dims(self.input_tensor, 4) # N x W x H x V => N x W x H x V x C 47 | input_tensor = tf.transpose(input_tensor, perm=[3, 0, 1, 2, 4]) # N x W x H x V x C => V x N x W x H x C 48 | 49 | if type_CNN == 1: 50 | filter_shape1 = [filter_sizes[0], filter_sizes[1], 4, num_filters / 2] 51 | p_size1 = [1, 2, 2, 1] 52 | filter_shape2 = [filter_sizes[2], filter_sizes[3], num_filters / 2, num_filters] 53 | p_size2 = [1, 2, 2, 1] 54 | 55 | conv1 = self._conv("conv1", self.input_tensor, filter_shape1) 56 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 57 | conv2 = self._conv('conv2', pool1, filter_shape2) 58 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 59 | 60 | dim = np.prod(pool2.get_shape().as_list()[1:]) 61 | y = tf.reshape(pool2, [-1, dim]) 62 | else: 63 | for i in range(channel_num): 64 | # set reuse True for i > 0, for weight-sharing 65 | reuse_f = (i != 0) 66 | view = tf.gather(input_tensor, i) # N x W x H x C 67 | 68 | filter_shape1 = [filter_sizes[0], filter_sizes[1], 1, num_filters / 2] 69 | p_size1 = [1, pool_sizes[0], pool_sizes[1], 1] 70 | 71 | conv1 = self._conv('conv1', view, filter_shape1, reuse=reuse_f) 72 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 73 | 74 | if len(filter_sizes) == 4: 75 | filter_shape2 = [filter_sizes[2], filter_sizes[3], num_filters / 2, num_filters] 76 | p_size2 = [1, pool_sizes[2], pool_sizes[3], 1] 77 | 78 | conv2 = self._conv('conv2', pool1, filter_shape2, reuse=reuse_f) 79 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 80 | 81 | dim = np.prod(pool2.get_shape().as_list()[1:]) 82 | reshape = tf.reshape(pool2, [-1, dim]) 83 | else: 84 | dim = np.prod(pool1.get_shape().as_list()[1:]) 85 | reshape = tf.reshape(pool1, [-1, dim]) 86 | 87 | pooled_outputs.append(reshape) 88 | 89 | with tf.name_scope("view_pooling"): 90 | x = tf.stack(pooled_outputs) # 4 * N * dim 91 | x = tf.transpose(x, perm=[1, 2, 0]) # N * dim * 4 92 | reshape = tf.reshape(x, [-1, 4]) # (N * dim) * 4 93 | print reshape.get_shape().as_list() 94 | 95 | Weights = tf.Variable(tf.truncated_normal([4, 1], 0, 0.1), name="W") 96 | 97 | y = tf.matmul(reshape, Weights, name="view_pooling") 98 | y = tf.reshape(y, [-1, dim]) 99 | print y.get_shape().as_list() 100 | print("DIM:!" + str(dim)) 101 | 102 | # Add dropout 103 | with tf.name_scope("dropout"): 104 | self.h_drop = tf.nn.dropout(y, self.dropout_keep_prob, name="hidden_output_drop") 105 | print self.h_drop.get_shape().as_list() 106 | 107 | with tf.name_scope("fc1"): 108 | dim_ = dim / 2 109 | # dim_ = 100 110 | # W = tf.get_variable("W", [dim, dim_], initializer=tf.contrib.layers.xavier_initializer()) 111 | W = tf.Variable(name="W", initial_value=tf.truncated_normal(shape=[dim, dim_], stddev=0.1)) 112 | b = tf.Variable(tf.constant(0.1, shape=[dim_]), name="b") 113 | 114 | l2_loss += tf.nn.l2_loss(W) 115 | l2_loss += tf.nn.l2_loss(b) 116 | self.fc1 = tf.nn.relu(tf.matmul(self.h_drop, W) + b) 117 | self.fc_drop1 = tf.nn.dropout(self.fc1, self.dropout_keep_prob) 118 | 119 | # with tf.name_scope("fc2"): 120 | # dim__ = dim_ / 2 121 | # # dim_ = 100 122 | # W = tf.Variable(name="W", initial_value=tf.truncated_normal(shape=[dim_, dim__], stddev=0.1)) 123 | # b = tf.Variable(tf.constant(0.1, shape=[dim__]), name="b") 124 | # 125 | # l2_loss += tf.nn.l2_loss(W) 126 | # l2_loss += tf.nn.l2_loss(b) 127 | # self.fc2 = tf.nn.relu(tf.matmul(self.fc_drop1, W) + b) 128 | # self.fc_drop2 = tf.nn.dropout(self.fc2, self.dropout_keep_prob) 129 | 130 | # Final (unnormalized) scores and predictions 131 | with tf.name_scope("output"): 132 | # W = tf.get_variable("W_output", [dim_, 2], initializer=tf.contrib.layers.xavier_initializer()) 133 | W = tf.Variable(name="W_output", initial_value=tf.truncated_normal(shape=[dim_, 2], stddev=0.1)) 134 | b = tf.Variable(tf.constant(0.1, shape=[2]), name="b") 135 | 136 | l2_loss += tf.nn.l2_loss(W) 137 | l2_loss += tf.nn.l2_loss(b) 138 | self.scores = tf.nn.xw_plus_b(self.fc_drop1, W, b, name="scores") 139 | self.predictions = tf.argmax(self.scores, 1, name="predictions") 140 | 141 | # Calculate Mean cross-entropy loss 142 | with tf.name_scope("loss"): 143 | losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) 144 | self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss 145 | 146 | # Accuracy 147 | with tf.name_scope("accuracy"): 148 | correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) 149 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") 150 | -------------------------------------------------------------------------------- /Sentence_Modeling/Siamese_network.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class SiameseLSTM(object): 4 | """ 5 | A LSTM based deep Siamese network 6 | Uses an character embedding layer, followed by a biLSTM and Energy Loss layer. 7 | """ 8 | 9 | def __init__(self, sequence_length, vocab_processer, embedding_size, hidden_unit_size, l2_reg_lambda, batch_size, 10 | embedding_arr): 11 | # Placeholders for input, output 12 | self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1") 13 | self.mask_x1 = tf.placeholder(tf.float32, [None, sequence_length], name="mask_x1") 14 | 15 | self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2") 16 | self.mask_x2 = tf.placeholder(tf.float32, [None, sequence_length], name="mask_x2") 17 | self.input_y = tf.placeholder(tf.float32, [None], name="input_y") 18 | 19 | self.hidden_n = hidden_unit_size 20 | self.vocab_processer = vocab_processer 21 | 22 | # Keeping track of l2 regularization loss (optional) 23 | l2_loss = tf.constant(0.0, name="l2_loss") 24 | 25 | # Embedding layer 26 | with tf.name_scope("embedding"): 27 | self.W = tf.Variable(tf.constant(embedding_arr, dtype=tf.float32), trainable=True, name="W") 28 | 29 | self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1) 30 | self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2) 31 | 32 | # Create a convolution + maxpool layer for each filter size 33 | with tf.name_scope("output"): 34 | # Siamese Network with the same Bi-LSTM(side, side) 35 | # self.out1 = self.BiRNN(self.embedded_chars1, self.mask_x1, "side", embedding_size, sequence_length, 36 | # reuse_f=None) 37 | # self.out2 = self.BiRNN(self.embedded_chars2, self.mask_x2, "side", embedding_size, sequence_length, 38 | # reuse_f=True) 39 | 40 | # Siamese Network with different Bi-LSTMs(side1, side2) 41 | # out1 shape(None, 20) 42 | self.out1 = self.BiRNN(self.embedded_chars1, self.mask_x1, "side1", embedding_size, sequence_length) 43 | self.out2 = self.BiRNN(self.embedded_chars2, self.mask_x2, "side2", embedding_size, sequence_length) 44 | 45 | # cosine distance 46 | # normalize_a = tf.nn.l2_normalize(self.out1, 1) 47 | # normalize_b = tf.nn.l2_normalize(self.out2, 1) 48 | # self.distance = tf.subtract(1.0, abs( 49 | # tf.reduce_sum(tf.multiply(normalize_a, normalize_b), axis=1, name="distance"))) 50 | 51 | # Euclidean distance : distance shape:(None) 52 | 53 | self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1, self.out2)), 1)) 54 | 55 | with tf.name_scope("sentence_embedding"): 56 | self.representation1 = self.get_Representation(self.embedded_chars1, "side1", embedding_size, 57 | sequence_length) 58 | self.representation2 = self.get_Representation(self.embedded_chars2, "side2", embedding_size, 59 | sequence_length) 60 | 61 | self.representation1 = tf.identity(self.representation1, name="Representation1") 62 | self.representation2 = tf.identity(self.representation2, name="Representation2") 63 | 64 | with tf.name_scope("loss"): 65 | 66 | self.loss = self.contrastive_loss(self.input_y, self.distance, batch_size) 67 | 68 | with tf.name_scope("accuracy"): 69 | # predict_label = tf.subtract(1.0, tf.round(self.distance)) 70 | 71 | margin = 0.5 72 | self.predict_label = tf.cast(tf.less(self.distance, margin), "float32", name="prediction") 73 | # predict_label = tf.subtract(tf.Variable(1.0), tf.round(self.distance)) 74 | 75 | correct_predictions = tf.equal(self.predict_label, self.input_y) 76 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float32"), name="accuracy") 77 | 78 | def BiRNN(self, x, mask, scope, embedding_size, sequence_length, reuse_f=None): 79 | n_input = embedding_size 80 | n_steps = sequence_length 81 | 82 | x = tf.transpose(x, [1, 0, 2]) # (batch_size, n_steps, n_input) => (n_steps, batch_size, n_input) 83 | # Reshape to (n_steps*batch_size, n_input) 84 | x = tf.reshape(x, [-1, n_input]) 85 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 86 | x = tf.split(x, n_steps, 0) 87 | 88 | with tf.name_scope("fw" + scope), tf.variable_scope("fw" + scope, reuse=reuse_f): 89 | print(tf.get_variable_scope().name) 90 | fw_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_n, forget_bias=1.0, state_is_tuple=True) 91 | 92 | with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope, reuse=reuse_f): 93 | print(tf.get_variable_scope().name) 94 | bw_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_n, forget_bias=1.0, state_is_tuple=True) 95 | 96 | with tf.name_scope("fwbw" + scope), tf.variable_scope("fwbw" + scope, reuse=reuse_f): 97 | outputs, _, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32) 98 | 99 | outputs = tf.stack(outputs) # list of Tensor(None*(2*hidden)) => steps * None * (2*hidden) 100 | 101 | outputs = tf.transpose(outputs, [1, 0, 2]) # steps * None * (2*hidden) => None * steps * (2 * hidden) 102 | print(outputs.get_shape().as_list()) 103 | 104 | # Use mask 105 | outputs = outputs * mask[:, :, None] 106 | # mean pooling to get the vector 107 | x = tf.reduce_sum(mask, 1)[:, None] 108 | outputs = tf.reduce_sum(outputs, 1) / x 109 | print(outputs.get_shape().as_list()) 110 | return outputs 111 | 112 | def get_Representation(self, x, scope, embedding_size, sequence_length): 113 | n_input = embedding_size 114 | n_steps = sequence_length 115 | 116 | x = tf.transpose(x, [1, 0, 2]) 117 | # Reshape to (n_steps*batch_size, n_input) 118 | x = tf.reshape(x, [-1, n_input]) 119 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 120 | x = tf.split(x, n_steps, 0) 121 | 122 | with tf.name_scope("fw" + scope), tf.variable_scope("fw" + scope, reuse=True): 123 | print(tf.get_variable_scope().name) 124 | fw_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_n, forget_bias=1.0, state_is_tuple=True) 125 | with tf.name_scope("bw" + scope), tf.variable_scope("bw" + scope, reuse=True): 126 | print(tf.get_variable_scope().name) 127 | bw_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_n, forget_bias=1.0, state_is_tuple=True) 128 | with tf.name_scope("fwbw" + scope), tf.variable_scope("fwbw" + scope, reuse=True): 129 | # Outputs list contains the depth-concatenated fw and bw vectors for each input. 130 | # output shape -- [time][batch][cell_fw.output_size + cell_bw.output_size] 131 | outputs, _, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32) 132 | return outputs 133 | 134 | def contrastive_loss(self, y, d, batch_size): 135 | tmp = y * tf.square(d) 136 | margin = 1 137 | tmp2 = (1 - y) * tf.square(tf.maximum((margin - d), 0)) 138 | return tf.reduce_sum(tmp + tmp2) / batch_size / 2 139 | 140 | # dis = tf.subtract(tf.Variable(1.0), d) 141 | # tmp = tf.square(dis) / 4 142 | # margin = 0.4 143 | # tmp2 = tf.multiply(tf.cast(tf.less(margin, d), "float"), tf.square(d)) 144 | # return tf.reduce_sum(y * tmp + (1 - y) * tmp2) 145 | -------------------------------------------------------------------------------- /tensor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | import gensim, jieba, os 4 | from scipy import spatial 5 | from util.util import preprocess_arr 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from util.input_helpers import InputHelper 10 | from util.preprocess import MyVocabularyProcessor 11 | from util.util import loadIDFModel 12 | 13 | 14 | class Tensor(object): 15 | def __init__(self, m, e, batch_size, sequence_length, task_num, lstm_dir): 16 | """ 17 | initialize tensors 18 | :param m: mention list 19 | :param e: entity list 20 | :param batch_size: batch_size 21 | :param sequence_length: default sentence length 22 | :param task_num: the type of task(1--description, 2--operation) 23 | """ 24 | channel_num = 4 25 | if task_num == 1: # task1-description disambiguation 26 | word2vec_dir = "./data/word2vec" 27 | self.idfModel_file = "./data/description_idf.txt" 28 | # self.idfModel_file = "./data/idfModel.txt" 29 | 30 | else: # task2-operation disambiguation 31 | word2vec_dir = "./data/operation" 32 | self.idfModel_file = "./data/operation_idf.txt" 33 | # self.idfModel_file = "./data/idfModel_operation.txt" 34 | 35 | character_model_file = os.path.join(word2vec_dir, 'character.model') 36 | word_mode_file = os.path.join(word2vec_dir, 'word.model') 37 | self.bilstm_dir = lstm_dir 38 | 39 | self.mentions = preprocess_arr(m) 40 | self.entities = preprocess_arr(e) 41 | self.sequence_length = sequence_length 42 | self.batch_size = batch_size 43 | 44 | self.character_embedding_model = gensim.models.Word2Vec.load(character_model_file) 45 | self.word_embedding_model = gensim.models.Word2Vec.load(word_mode_file) 46 | 47 | self.tensor = np.zeros(shape=(batch_size, channel_num, sequence_length, sequence_length)) 48 | self.init_matrices() 49 | 50 | def init_matrices(self): 51 | """ 52 | initialize four matrices in the tensor 53 | :return: 54 | """ 55 | sentence_embedding_m, sentence_embedding_e = self.getSentence_Embedding(self.mentions, self.entities, 56 | self.sequence_length) 57 | np.save("no_att_sentece_m_opr.npy", sentence_embedding_m) 58 | np.save("no_att_sentece_e_opr.npy", sentence_embedding_e) 59 | # sentence_embedding_m = np.load("0823_sentence_m.npy") 60 | # sentence_embedding_e = np.load("0823_sentence_e.npy") 61 | 62 | print("Sentence Embedding Finished!") 63 | 64 | for sample_index in range(self.batch_size): 65 | len_mention = len(self.mentions[sample_index].decode("utf-8")) 66 | len_entity = len(self.entities[sample_index].decode("utf-8")) 67 | 68 | # for word matching 69 | words_m = list(jieba.cut(self.mentions[sample_index])) 70 | words_e = list(jieba.cut(self.entities[sample_index])) 71 | len_w_m = len(words_m) 72 | len_w_e = len(words_e) 73 | 74 | for i in range(len_w_m): 75 | for j in range(len_w_e): 76 | 77 | words_sim = 1 - spatial.distance.cosine( 78 | self.get_embedding(words_m[i], self.word_embedding_model), 79 | self.get_embedding(words_e[j], self.word_embedding_model)) 80 | 81 | # assign the word_pair_sim to the character_pairs which construct the words 82 | for character_i in words_m[i]: 83 | for character_j in words_e[j]: 84 | self.tensor[sample_index][2][ 85 | self.mentions[sample_index].index(character_i), self.entities[sample_index].index( 86 | character_j)] = words_sim 87 | 88 | for i in range(len_mention): 89 | for j in range(len_entity): 90 | # for sentence matching 91 | self.tensor[sample_index][3][i][j] = 1 - spatial.distance.cosine( 92 | sentence_embedding_m[sample_index][i], sentence_embedding_e[sample_index][j]) 93 | 94 | # for string matching 95 | if self.mentions[sample_index][i] == self.entities[sample_index][j]: 96 | self.tensor[sample_index][0][i][j] = 1 97 | 98 | # for character matching 99 | character_embedding_mi = self.get_embedding(self.mentions[sample_index][i], 100 | self.character_embedding_model) 101 | character_embedding_ei = self.get_embedding(self.entities[sample_index][j], 102 | self.character_embedding_model) 103 | self.tensor[sample_index][1][i][j] = 1 - spatial.distance.cosine(character_embedding_ei, 104 | character_embedding_mi) 105 | 106 | print("Tensor Completed!") 107 | 108 | def get_tensor(self): 109 | return self.tensor 110 | 111 | 112 | def get_embedding(self, word, model): 113 | if word in model.wv.vocab.keys(): 114 | index = model.wv.index2word.index(word) 115 | return model.wv.syn0[index] 116 | # return model.wv.vocab[word] 117 | else: 118 | vector_length = 100 119 | return np.ones([vector_length]) 120 | 121 | 122 | def getIDFWeights(self, x_names, x_index, vocab_id_w, idfModel): 123 | res = [] # idf_weights 124 | length = len(x_names) 125 | 126 | for i in range(length): 127 | name = x_names[i] 128 | index = x_index[i] 129 | character_idfs = dict() 130 | default_idf = 1 # for unknown word, assign idf=1 131 | 132 | name_segs = list(jieba.cut(name)) 133 | 134 | for name in name_segs: 135 | for character in name: 136 | if name in idfModel.keys(): 137 | character_idfs[character] = idfModel[name] 138 | else: 139 | character_idfs[character] = default_idf 140 | 141 | weight_per_name = [] 142 | for character_index in index: 143 | character = vocab_id_w[character_index] 144 | if character in character_idfs.keys(): 145 | weight_per_name.append(character_idfs[character]) 146 | else: 147 | weight_per_name.append(default_idf) 148 | res.append(weight_per_name) 149 | res_arr = np.asarray(res) 150 | 151 | # *** normalize the idf weights 152 | row_sums = res_arr.sum(axis=1) 153 | res_arr = res_arr / row_sums[:, np.newaxis] 154 | return res_arr 155 | 156 | 157 | def getAttention(self, r, x, index, vocab_id_w, idfModel): 158 | # r(input_size, None, hidden_n * 2) => (None, input_size, hidden_n * 2) 159 | representation = np.transpose(r, (1, 0, 2)) 160 | 161 | weights = self.getIDFWeights(x, index, vocab_id_w, idfModel) # shape: batch_size * sequence_length 162 | 163 | # weights transform from 2D to 3 D and then 3D*3D broadcasting 164 | representation = representation * weights[:, :, np.newaxis] 165 | return representation 166 | 167 | 168 | def getAttention_M(self, r, m, x, index, vocab_id_w, idfModel): 169 | # r(input_size, None, hidden_n * 2) => (None, input_size, hidden_n * 2) 170 | representation = np.transpose(r, (1, 0, 2)) 171 | representation = representation * m[:, :, np.newaxis] 172 | 173 | weights = self.getIDFWeights(x, index, vocab_id_w, idfModel) # shape: batch_size * sequence_length 174 | 175 | # weights transform from 2D to 3 D and then 3D*3D broadcasting 176 | representation = representation * weights[:, :, np.newaxis] 177 | return representation 178 | def no_attention(self, r, m, x, index, vocab_id_w, idfModel): 179 | # r(input_size, None, hidden_n * 2) => (None, input_size, hidden_n * 2) 180 | representation = np.transpose(r, (1, 0, 2)) 181 | representation = representation * m[:, :, np.newaxis] 182 | 183 | # weights = self.getIDFWeights(x, index, vocab_id_w, idfModel) # shape: batch_size * sequence_length 184 | 185 | # weights transform from 2D to 3 D and then 3D*3D broadcasting 186 | # representation = representation * weights[:, :, np.newaxis] 187 | return representation 188 | 189 | def getSentence_Embedding(self, x1, x2, max_document_length): 190 | checkpoint_dir = os.path.abspath(os.path.join(self.bilstm_dir, "checkpoints")) 191 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 192 | checkpoint_file = ckpt.model_checkpoint_path 193 | 194 | vocab_file = os.path.join(checkpoint_dir, "vocab") 195 | 196 | inpH = InputHelper() 197 | vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 198 | vocab_processor = vocab_processor.restore(vocab_file) 199 | 200 | tmp = [] 201 | (x1_index, x2_index, mask_x1, mask_x2, tmp) = inpH.get_data(vocab_processor, x1, x2, tmp, 202 | max_document_length) 203 | # x1_index, x2_index = inpH.toVocabularyIndexVector(x1, x2, vocab_file, max_document_length) 204 | 205 | idfModel = loadIDFModel(self.idfModel_file) 206 | 207 | # # load vocabulary model 208 | # vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 209 | # vocab_processor = vocab_processor.restore(vocab_file) 210 | 211 | # Extract word:id mapping from the object. 212 | vocab_dict = vocab_processor.vocabulary_._mapping 213 | vocab_id_w = dict((y, x) for x, y in vocab_dict.iteritems()) 214 | 215 | print("\nGenerating Sentence Embedding Result...\n") 216 | graph = tf.Graph() 217 | 218 | with graph.as_default(): 219 | sess = tf.Session() 220 | 221 | with sess.as_default(): 222 | # Load the saved meta graph and restore variables 223 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 224 | sess.run(tf.initialize_all_variables()) 225 | saver.restore(sess, checkpoint_file) 226 | 227 | # Get the placeholders from the graph by name 228 | # the output is a list with only one element 229 | input_x1 = graph.get_operation_by_name("input_x1").outputs[0] 230 | input_x2 = graph.get_operation_by_name("input_x2").outputs[0] 231 | 232 | sentence_representation1 = graph.get_operation_by_name("sentence_embedding/Representation1").outputs[0] 233 | sentence_representation2 = graph.get_operation_by_name("sentence_embedding/Representation2").outputs[0] 234 | print "Sentence vector shape after sentence modeling" 235 | print sentence_representation2.get_shape().as_list() 236 | 237 | r1, r2 = sess.run([sentence_representation1, sentence_representation2], 238 | {input_x1: x1_index, input_x2: x2_index}) 239 | 240 | # Applied Attention_mechanism 241 | representation1 = self.getAttention_M(r1, mask_x1, x1, x1_index, vocab_id_w, idfModel) 242 | representation2 = self.getAttention_M(r2, mask_x2, x2, x2_index, vocab_id_w, idfModel) 243 | # representation1 = self.no_attention(r1, mask_x1, x1, x1_index, vocab_id_w, idfModel) 244 | # representation2 = self.no_attention(r2, mask_x2, x2, x2_index, vocab_id_w, idfModel) 245 | 246 | return representation1, representation2 247 | -------------------------------------------------------------------------------- /Dynamic/MT_Dynamic_MultiGranModel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | class MT_Dynamic_MultiGranModel(object): 8 | def _conv(self, name, in_, ksize, reuse=False): 9 | num_filters = ksize[3] 10 | 11 | with tf.variable_scope(name, reuse=reuse) as scope: 12 | # same CNN for different views 13 | W = tf.get_variable("weights", ksize, initializer=tf.truncated_normal_initializer(stddev=0.1)) 14 | biases = tf.get_variable("biases", [num_filters], initializer=tf.constant_initializer(0.1)) 15 | 16 | conv = tf.nn.conv2d(in_, W, strides=[1, 1, 1, 1], padding="VALID") 17 | h = tf.nn.relu(tf.nn.bias_add(conv, biases), name=scope.name) 18 | 19 | return h 20 | 21 | def _maxpool(self, name, in_, ksize, strides): 22 | pool = tf.nn.max_pool(in_, ksize=ksize, strides=strides, padding='VALID', name=name) 23 | print name, pool.get_shape().as_list() 24 | return pool 25 | 26 | def __init__(self, max_len1, max_len2, filter_sizes, num_filters, l2_reg_lambda=0.0): 27 | channel_num = 4 28 | 29 | # Placeholders for input, output and dropout 30 | self.input_tensor = tf.placeholder(tf.float32, [None, max_len1, max_len1, 4], name="input_tensor_description") 31 | self.input_tensor_o = tf.placeholder(tf.float32, [None, max_len2, max_len2, 4], name="input_tensor_operation") 32 | 33 | self.input_y_description = tf.placeholder(tf.float32, [None, 2], name="input_y_description") 34 | self.input_y_operation = tf.placeholder(tf.float32, [None, 2], name="input_y_operation") 35 | 36 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 37 | 38 | self.gamma = tf.placeholder(tf.float32, [None], name="des_mtl_param") 39 | self.mask_opr = tf.placeholder(tf.float32, [None], name="mask_opr") 40 | 41 | self.gamma_ = tf.expand_dims(self.gamma, axis=1) 42 | 43 | self.matrix = tf.placeholder(tf.float32, [None, 1], name="cooccurence") 44 | 45 | # Keeping track of l2 regularization loss (optional) 46 | l2_loss_d = tf.constant(0.0) 47 | l2_loss_operation = tf.constant(0.0) 48 | 49 | # Create a convolution + maxpool layer for each filter size 50 | pooled_outputs = [] 51 | pooled_outputs_operation = [] 52 | 53 | input_tensor = tf.expand_dims(self.input_tensor, 4) # N x W x H x V => N x W x H x V x C 54 | input_tensor = tf.transpose(input_tensor, 55 | perm=[3, 0, 1, 2, 4]) # N x W x H x V x C => V x N x W x H x C 56 | 57 | input_tensor_operation = tf.expand_dims(self.input_tensor_o, 4) # N x W x H x V => N x W x H x V x C 58 | input_tensor_operation = tf.transpose(input_tensor_operation, 59 | perm=[3, 0, 1, 2, 4]) # N x W x H x V x C => V x N x W x H x C 60 | 61 | with tf.name_scope("CNN_Description"): 62 | for i in range(channel_num): 63 | # set reuse True for i > 0, for weight-sharing 64 | reuse_f = (i != 0) 65 | with tf.variable_scope("CNN_Description", reuse=reuse_f): 66 | view = tf.gather(input_tensor, i) # N x W x H x C 67 | 68 | filter_shape1 = [filter_sizes[0], filter_sizes[0], 1, num_filters / 2] 69 | filter_shape2 = [filter_sizes[1], filter_sizes[1], num_filters / 2, num_filters] 70 | p_size1 = [1, 3, 3, 1] 71 | p_size2 = [1, 5, 5, 1] 72 | 73 | conv1 = self._conv('conv1', view, filter_shape1, reuse=reuse_f) 74 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 75 | 76 | # conv2 = self._conv('conv2', pool1, filter_shape2, reuse=reuse_f) 77 | # pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 78 | 79 | dim1 = np.prod(pool1.get_shape().as_list()[1:]) 80 | reshape = tf.reshape(pool1, [-1, dim1]) 81 | 82 | pooled_outputs.append(reshape) 83 | 84 | with tf.name_scope("CNN_Operation"): 85 | for i in range(channel_num): 86 | # set reuse True for i > 0, for weight-sharing 87 | reuse_f = (i != 0) 88 | 89 | with tf.variable_scope("CNN_Operation", reuse=reuse_f): 90 | view = tf.gather(input_tensor_operation, i) # N x W x H x C 91 | 92 | filter_shape1 = [filter_sizes[0], filter_sizes[0], 1, num_filters / 2] 93 | filter_shape2 = [filter_sizes[1], filter_sizes[1], num_filters / 2, num_filters] 94 | p_size1 = [1, 2, 2, 1] 95 | p_size2 = [1, 5, 5, 1] 96 | 97 | conv1 = self._conv('conv1', view, filter_shape1, reuse=reuse_f) 98 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 99 | 100 | # conv2 = self._conv('conv2', pool1, filter_shape2, reuse=reuse_f) 101 | # pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 102 | 103 | dim2 = np.prod(pool1.get_shape().as_list()[1:]) 104 | reshape = tf.reshape(pool1, [-1, dim2]) 105 | 106 | pooled_outputs_operation.append(reshape) 107 | 108 | with tf.name_scope("Descriptipn_view_pooling"): 109 | x = tf.stack(pooled_outputs) # 4 * N * 7744 110 | x = tf.transpose(x, perm=[1, 2, 0]) # N * 7744 * 4 111 | reshape = tf.reshape(x, [-1, 4]) 112 | print reshape.get_shape().as_list() 113 | 114 | Weights = tf.Variable(tf.random_uniform([4, 1], 0.0, 1.0), name="W") 115 | 116 | y_d = tf.matmul(reshape, Weights, name="view_pooling") 117 | y_d = tf.reshape(y_d, [-1, dim1]) 118 | print y_d.get_shape().as_list() 119 | 120 | with tf.name_scope("Operation_view_pooling"): 121 | x = tf.stack(pooled_outputs_operation) # 4 * N * 7744 122 | x = tf.transpose(x, perm=[1, 2, 0]) # N * 7744 * 4 123 | reshape = tf.reshape(x, [-1, 4]) 124 | print reshape.get_shape().as_list() 125 | 126 | Weights = tf.Variable(tf.random_uniform([4, 1], 0.0, 1.0), name="W") 127 | 128 | y_o = tf.matmul(reshape, Weights, name="view_pooling") 129 | y_o = tf.reshape(y_o, [-1, dim2]) 130 | y_o = y_o * tf.expand_dims(self.mask_opr, axis=1) 131 | print y_o.get_shape().as_list() 132 | 133 | # Add dropout 134 | with tf.name_scope("dropout"): 135 | self.h_drop_d = tf.nn.dropout(y_d, self.dropout_keep_prob, name="hidden_output_description_drop") 136 | self.h_drop_o = tf.nn.dropout(y_o, self.dropout_keep_prob, name="hidden_output_operation_drop") 137 | print self.h_drop_d.get_shape().as_list() 138 | print self.h_drop_o.get_shape().as_list() 139 | 140 | with tf.name_scope("FC"): 141 | dim = 100 142 | W1 = tf.Variable(name="W1", initial_value=tf.truncated_normal(shape=[dim1, dim], stddev=0.1)) 143 | b1 = tf.Variable(tf.constant(0.1, shape=[dim]), name="b1") 144 | 145 | self.fc_d = tf.nn.relu(tf.matmul(self.h_drop_d, W1) + b1) 146 | self.fc_drop_d = tf.nn.dropout(self.fc_d, self.dropout_keep_prob) 147 | 148 | W2 = tf.Variable(name="W2", initial_value=tf.truncated_normal(shape=[dim2, dim], stddev=0.1)) 149 | b2 = tf.Variable(tf.constant(0.1, shape=[dim]), name="b2") 150 | 151 | self.fc_o = tf.nn.relu(tf.matmul(self.h_drop_o, W2) + b2) 152 | self.fc_drop_o = tf.nn.dropout(self.fc_o, self.dropout_keep_prob) 153 | 154 | # Share Layer Construction 155 | with tf.name_scope("Multitask"): 156 | 157 | layer1 = self.fc_drop_d * self.gamma_ 158 | layer2 = self.fc_drop_o * 0.5 159 | self.shared_layer = tf.add(layer1, layer2, name="shared_Layer") 160 | 161 | print self.shared_layer.get_shape().as_list() 162 | 163 | W1 = tf.get_variable(name="tt1_W", shape=[dim], 164 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 165 | W2 = tf.get_variable(name="st1_W", shape=[dim], 166 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 167 | W3 = tf.get_variable(name="st2_W", shape=[dim], 168 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 169 | W4 = tf.get_variable(name="tt2_W", shape=[dim], 170 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 171 | 172 | self.task1_r = tf.add(tf.multiply(self.shared_layer, W2), tf.multiply(self.fc_drop_d, W1), 173 | name="description_r") 174 | self.task2_r = tf.add(tf.multiply(self.shared_layer, W3), tf.multiply(self.fc_drop_o, W4), 175 | name="operation_r") 176 | print self.task1_r.get_shape().as_list() 177 | 178 | with tf.name_scope("FC2"): 179 | W1 = tf.Variable(name="W1", initial_value=tf.truncated_normal(shape=[dim, dim / 2], stddev=0.1)) 180 | b1 = tf.Variable(tf.constant(0.1, shape=[dim / 2]), name="b1") 181 | 182 | self.task1_representation = tf.nn.relu(tf.matmul(self.task1_r, W1) + b1) 183 | self.task1_representation = tf.nn.dropout(self.task1_representation, self.dropout_keep_prob) 184 | 185 | W2 = tf.Variable(name="W2", initial_value=tf.truncated_normal(shape=[dim, dim / 2], stddev=0.1)) 186 | b2 = tf.Variable(tf.constant(0.1, shape=[dim / 2]), name="b2") 187 | 188 | self.task2_representation = tf.nn.relu(tf.matmul(self.task2_r, W2) + b2) 189 | self.task2_representation = tf.nn.dropout(self.task2_representation, self.dropout_keep_prob) 190 | 191 | # Final (unnormalized) scores and predictions 192 | with tf.name_scope("output"): 193 | W_d = tf.get_variable(name="W_d", shape=[dim / 2, 2], 194 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 195 | b_d = tf.Variable(tf.constant(0.1, shape=[2]), name="b_d") 196 | 197 | l2_loss_d += tf.nn.l2_loss(W_d) 198 | l2_loss_d += tf.nn.l2_loss(b_d) 199 | 200 | # W_o = tf.Variable(name="W_output_o", initial_value=tf.random_normal([dim, 2], stddev=0.1)) 201 | W_o = tf.get_variable(name="W_o", shape=[dim / 2, 2], 202 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 203 | b_o = tf.Variable(tf.constant(0.1, shape=[2]), name="b_o") 204 | 205 | l2_loss_operation += tf.nn.l2_loss(W_o) 206 | l2_loss_operation += tf.nn.l2_loss(b_o) 207 | 208 | self.scores_d = tf.nn.xw_plus_b(self.task1_representation, W_d, b_d, name="scores1") 209 | self.scores_o = tf.nn.xw_plus_b(self.task2_representation, W_o, b_o, name="scores2") 210 | 211 | self.relation_d = tf.nn.softmax(self.scores_d, name="relation1") 212 | self.relation_o = tf.nn.softmax(self.scores_o, name="relation2") 213 | 214 | self.predictions_d = tf.argmax(self.scores_d, 1, name="predictions1") 215 | self.predictions_o = tf.argmax(self.scores_o, 1, name="predictions2") 216 | 217 | # Calculate Mean cross-entropy loss 218 | with tf.name_scope("loss"): 219 | losses1 = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores_d, labels=self.input_y_description) 220 | losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores_o, 221 | labels=self.input_y_operation) * self.mask_opr 222 | 223 | constraints = self.matrix * tf.reduce_sum(tf.square(self.relation_d - self.relation_o), axis=1) 224 | self.constraints = tf.identity(constraints, name="constraints") 225 | 226 | self.loss = tf.reduce_mean(losses1) + tf.reduce_mean(losses2) + l2_reg_lambda * ( 227 | l2_loss_d + l2_loss_operation) + tf.reduce_mean(constraints * self.mask_opr) 228 | 229 | # Accuracy 230 | with tf.name_scope("accuracy"): 231 | correct_predictions_d = tf.cast(tf.equal(self.predictions_d, tf.argmax(self.input_y_description, 1)), 232 | "float") 233 | 234 | label_opr = tf.argmax(self.input_y_operation, 1) 235 | correct_predictions_o = tf.multiply(tf.cast(tf.equal(self.predictions_o, label_opr), "float"), 236 | self.mask_opr) 237 | 238 | self.accuracy_d = tf.reduce_mean(correct_predictions_d, name="accuracy_d") 239 | self.accuracy_o = tf.div(tf.reduce_sum(correct_predictions_o), 240 | tf.reduce_sum(self.mask_opr), name="accuracy_o") 241 | -------------------------------------------------------------------------------- /util/input_helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import codecs 5 | import gc 6 | import sys 7 | 8 | import numpy as np 9 | 10 | from preprocess import MyVocabularyProcessor 11 | from util import preprocess_unit, preprocess_arr 12 | 13 | reload(sys) 14 | sys.setdefaultencoding("utf-8") 15 | 16 | 17 | class InputHelper(object): 18 | def get_data(self, vocab_processor, train_x1, train_x2, train_y, max_document_length): 19 | """ 20 | Use vocab_processor to index mention and entity pairs and then pad them and return mask arrs 21 | :param vocab_processor: 22 | :param train_x1: 23 | :param train_x2: 24 | :param train_y: 25 | :param max_document_length: 26 | :return: 27 | """ 28 | train_x1_i = np.asarray(list(vocab_processor.transform(train_x1))) 29 | train_x2_i = np.asarray(list(vocab_processor.transform(train_x2))) 30 | 31 | mask_train_x1 = np.zeros([len(train_x1_i), max_document_length]) 32 | mask_train_x2 = np.zeros([len(train_x2_i), max_document_length]) 33 | 34 | new_mask_x1, new_mask_x2 = self.padding_and_generate_mask(train_x1, train_x2, mask_train_x1, mask_train_x2) 35 | return (train_x1_i, train_x2_i, new_mask_x1, new_mask_x2, train_y) 36 | 37 | def padding_and_generate_mask(self, x1, x2, new_mask_x1, new_mask_x2): 38 | """ 39 | Pad the sentence and return mask array for mention and entity pair 40 | :param x1: 41 | :param x2: 42 | :param new_mask_x1: 43 | :param new_mask_x2: 44 | :return: 45 | """ 46 | 47 | for i, (x1, x2) in enumerate(zip(x1, x2)): 48 | # whether to remove sentences with length larger than maxlen 49 | if len(x1) == 0 or len(x2) == 0: 50 | print("") 51 | new_mask_x1[i, 0:len(x1)] = 1.0 52 | new_mask_x2[i, 0:len(x2)] = 1.0 53 | return new_mask_x1, new_mask_x2 54 | 55 | def add_y_helper(self, y_value, y_arr, is_positive_label): 56 | """ 57 | add 1/0 or [0,1]/[1, 0] in y_arr which depends on y_value flag 58 | :param y_value: 59 | :param y_arr: 60 | :param is_positive_label: 61 | :return: 62 | """ 63 | if y_value == True: 64 | if is_positive_label: 65 | y_arr.append(1) 66 | else: 67 | y_arr.append(0) 68 | else: 69 | if is_positive_label: 70 | y_arr.append(np.array([0, 1])) 71 | else: 72 | y_arr.append(np.array([1, 0])) 73 | return y_arr 74 | 75 | def batch_iter(self, data, batch_size, num_epochs, shuffle=True): 76 | """ 77 | Generates a batch iterator for a data set. 78 | :param data: 79 | :param batch_size: 80 | :param num_epochs: 81 | :param shuffle: 82 | :return: 83 | """ 84 | data = np.asarray(data) 85 | print(data) 86 | print(data.shape) 87 | data_size = len(data) 88 | num_batches_per_epoch = int(len(data) / batch_size) 89 | 90 | if shuffle: 91 | print "Shuffle!!!!" 92 | for epoch in range(num_epochs): 93 | # Shuffle the data at each epoch 94 | if shuffle: 95 | 96 | shuffle_indices = np.random.permutation(np.arange(data_size)) 97 | shuffled_data = data[shuffle_indices] 98 | else: 99 | shuffled_data = data 100 | for batch_num in range(num_batches_per_epoch): 101 | start_index = batch_num * batch_size 102 | end_index = min((batch_num + 1) * batch_size, data_size) 103 | yield shuffled_data[start_index:end_index] 104 | 105 | def getTestIndexedDataSet(self, data_path, sep, vocab_processor, max_document_length, y_value): 106 | """ 107 | Read in labeled test data and use previous vocabulary processor to index them 108 | :param data_path: 109 | :param sep: 110 | :param vocab_processor: 111 | :param max_document_length: 112 | :param y_value: 113 | :return: 114 | """ 115 | x1_temp, x2_temp, y = self.getTsvTestData(data_path, sep, max_document_length, y_value) 116 | 117 | x1 = np.asarray(list(vocab_processor.transform(x1_temp))) 118 | x2 = np.asarray(list(vocab_processor.transform(x2_temp))) 119 | return x1, x2, y 120 | 121 | def toVocabularyIndexVector(self, datax1, datax2, vocab_path, max_document_length): 122 | """ 123 | Transform the word list to vocabulary_index vectors 124 | :param datax1: 125 | :param datax2: 126 | :param vocab_path: 127 | :param max_document_length: 128 | :return: 129 | """ 130 | # Build vocabulary 131 | vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 132 | vocab_processor = vocab_processor.restore(vocab_path) 133 | print(len(vocab_processor.vocabulary_)) 134 | 135 | datax1 = preprocess_arr(datax1) 136 | datax2 = preprocess_arr(datax2) 137 | x1 = np.asarray(list(vocab_processor.transform(datax1))) 138 | x2 = np.asarray(list(vocab_processor.transform(datax2))) 139 | # Randomly shuffle data 140 | del vocab_processor 141 | gc.collect() 142 | return x1, x2 143 | 144 | def getTsvTestData(self, filepath, sep, sequence_length, y_value=False): 145 | """ 146 | load the data(label, mention, entity) from labeled files 147 | :param filepath: 148 | :return: three lists(label_list, mention_list, entity_list) 149 | """ 150 | print("Loading testing/labelled data from " + filepath) 151 | x1, x2 = [], [] 152 | y = [] 153 | line_num = 0 154 | for line in codecs.open(filepath, "r", "utf-8"): 155 | line_num += 1 156 | l = line.strip().split(sep) 157 | if len(l) < 3: 158 | continue 159 | 160 | l[1] = preprocess_unit(l[1]) 161 | l[2] = preprocess_unit(l[2]) 162 | 163 | if len(l[1]) == 0 or len(l[2]) == 0: 164 | continue 165 | 166 | # truncate when length is bigger than the max_length 167 | if len(l[1]) > sequence_length or len(l[2]) > sequence_length: 168 | l[1] = l[1][:sequence_length] 169 | l[2] = l[2][:sequence_length] 170 | 171 | x1.append(l[1]) 172 | x2.append(l[2]) 173 | y = self.add_y_helper(y_value, y, int(l[0]) == 1) 174 | if line_num != len(y): 175 | print("ei") 176 | print(line_num) 177 | return np.asarray(x1), np.asarray(x2), np.asarray(y) 178 | 179 | def getTsvTestData_Mul(self, filepath, sep, sequence_length, y_value=False): 180 | """ 181 | load the data(label, mention, entity) from labeled mutlti-task files 182 | :param filepath: 183 | :return: three lists(label_list, mention_list, entity_list) 184 | """ 185 | print("Loading testing/labelled data from " + filepath) 186 | x1, x2, x3, x4 = [], [], [], [] 187 | y = [] 188 | y2 = [] 189 | for line in codecs.open(filepath, "r", "utf-8"): 190 | l = line.strip().split(sep) 191 | if len(l) < 5: 192 | continue 193 | 194 | l[1] = preprocess_unit(l[1]) 195 | l[2] = preprocess_unit(l[2]) 196 | l[3] = preprocess_unit(l[3]) 197 | l[4] = preprocess_unit(l[4]) 198 | 199 | # truncate when length is bigger than the max_length 200 | if len(l[1]) > sequence_length or len(l[2]) > sequence_length or len(l[3]) > sequence_length or len( 201 | l[4]) > sequence_length: 202 | l[1] = l[1][:sequence_length] 203 | l[2] = l[2][:sequence_length] 204 | l[3] = l[3][:sequence_length] 205 | l[4] = l[4][:sequence_length] 206 | 207 | x1.append(l[1]) 208 | x2.append(l[2]) 209 | x3.append(l[3]) 210 | x4.append(l[4]) 211 | y = self.add_y_helper(y_value, y, int(l[0]) == 1) 212 | y2 = self.add_y_helper(y_value, y2, int(l[0]) == 1) 213 | return np.asarray(x1), np.asarray(x2), np.asarray(x3), np.asarray(x4), np.asarray(y), np.asarray(y2) 214 | 215 | def getTsvTestData_Mul(self, filepath, sep, sequence_length, y_value=False): 216 | """ 217 | load the data(label, mention, entity) from labeled mutlti-task files 218 | :param filepath: 219 | :return: three lists(label_list, mention_list, entity_list) 220 | """ 221 | print("Loading testing/labelled data from " + filepath) 222 | x1, x2, x3, x4 = [], [], [], [] 223 | y = [] 224 | y2 = [] 225 | for line in codecs.open(filepath, "r", "utf-8"): 226 | l = line.strip().split(sep) 227 | if len(l) < 5: 228 | continue 229 | 230 | l[1] = preprocess_unit(l[1]) 231 | l[2] = preprocess_unit(l[2]) 232 | l[3] = preprocess_unit(l[3]) 233 | l[4] = preprocess_unit(l[4]) 234 | 235 | # truncate when length is bigger than the max_length 236 | if len(l[1]) > sequence_length or len(l[2]) > sequence_length or len(l[3]) > sequence_length or len( 237 | l[4]) > sequence_length: 238 | l[1] = l[1][:sequence_length] 239 | l[2] = l[2][:sequence_length] 240 | l[3] = l[3][:sequence_length] 241 | l[4] = l[4][:sequence_length] 242 | 243 | x1.append(l[1]) 244 | x2.append(l[2]) 245 | x3.append(l[3]) 246 | x4.append(l[4]) 247 | y = self.add_y_helper(y_value, y, int(l[0]) == 1) 248 | y2 = self.add_y_helper(y_value, y2, int(l[0]) == 1) 249 | return np.asarray(x1), np.asarray(x2), np.asarray(x3), np.asarray(x4), np.asarray(y), np.asarray(y2) 250 | 251 | def getTsvTestData_Mul_Labels(self, filepath, sep, sequence_length, y_value=False): 252 | """ 253 | load the data(label, mention, entity) from labeled mutlti-task files 254 | :param filepath: 255 | :return: three lists(label_list, mention_list, entity_list) 256 | """ 257 | print("Loading testing/labelled data from " + filepath) 258 | x1, x2, x3, x4 = [], [], [], [] 259 | y = [] 260 | y2 = [] 261 | for line in codecs.open(filepath, "r", "utf-8"): 262 | l = line.strip().split(sep) 263 | if len(l) < 6: 264 | continue 265 | 266 | l[1] = preprocess_unit(l[1]) 267 | l[2] = preprocess_unit(l[2]) 268 | l[4] = preprocess_unit(l[4]) 269 | l[5] = preprocess_unit(l[5]) 270 | 271 | # truncate when length is bigger than the max_length 272 | if len(l[1]) > sequence_length or len(l[2]) > sequence_length or len(l[4]) > sequence_length or len( 273 | l[5]) > sequence_length: 274 | l[1] = l[1][:sequence_length] 275 | l[2] = l[2][:sequence_length] 276 | l[5] = l[5][:sequence_length] 277 | l[4] = l[4][:sequence_length] 278 | 279 | x1.append(l[1]) 280 | x2.append(l[2]) 281 | x3.append(l[4]) 282 | x4.append(l[5]) 283 | y = self.add_y_helper(y_value, y, int(l[0]) == 1) 284 | y2 = self.add_y_helper(y_value, y2, int(l[3]) == 1) 285 | 286 | return np.asarray(x1), np.asarray(x2), np.asarray(x3), np.asarray(x4), np.asarray(y), np.asarray(y2) 287 | 288 | def getTsvTestData_Mul_Labels_Dyna(self, filepath, sep, sequence_length, y_value=False): 289 | """ 290 | load the data(label, mention, entity) from labeled mutlti-task files 291 | :param filepath: 292 | :return: three lists(label_list, mention_list, entity_list) 293 | """ 294 | print("Loading testing/labelled data from " + filepath) 295 | x1, x2, x3, x4 = [], [], [], [] 296 | y = [] 297 | y2 = [] 298 | indicate = [] 299 | for line in codecs.open(filepath, "r", "utf-8"): 300 | l = line.strip().split(sep) 301 | l[1] = preprocess_unit(l[1]) 302 | l[2] = preprocess_unit(l[2]) 303 | if len(l[1]) > sequence_length or len(l[2]) > sequence_length: 304 | l[1] = l[1][:sequence_length] 305 | l[2] = l[2][:sequence_length] 306 | x1.append(l[1]) 307 | x2.append(l[2]) 308 | y = self.add_y_helper(y_value, y, int(l[0]) == 1) 309 | 310 | if len(l) == 3: # dynamic single task1 311 | x3.append("") 312 | x4.append("") 313 | y2 = self.add_y_helper(y_value, y2, False) 314 | indicate.append(1) 315 | else: 316 | l[4] = preprocess_unit(l[4]) 317 | l[5] = preprocess_unit(l[5]) 318 | # truncate when length is bigger than the max_length 319 | if len(l[4]) > sequence_length or len(l[5]) > sequence_length: 320 | l[5] = l[5][:sequence_length] 321 | l[4] = l[4][:sequence_length] 322 | x3.append(l[4]) 323 | x4.append(l[5]) 324 | indicate.append(0) 325 | y2 = self.add_y_helper(y_value, y2, int(l[3]) == 1) 326 | 327 | return indicate, np.asarray(x1), np.asarray(x2), np.asarray(x3), np.asarray(x4), np.asarray(y), np.asarray(y2) 328 | -------------------------------------------------------------------------------- /train_test_idf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import datetime 5 | import os 6 | import time 7 | import codecs 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | from util.util import write_evaluation_file 13 | from util.input_helpers import InputHelper 14 | from MultiGran_Model import MultiGranModel 15 | from tensor import Tensor 16 | 17 | # Parameters 18 | # ================================================== 19 | 20 | # Model Hyperparameters 21 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding (default: 100)") 22 | tf.flags.DEFINE_string("filter_sizes", "2, 3", "Comma-separated filter sizes (default: '2,3')") 23 | tf.flags.DEFINE_integer("num_filters", 16, "Number of filters per filter size (default: 64)") 24 | tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)") 25 | tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)") 26 | 27 | # Data Parameter 28 | tf.flags.DEFINE_integer("max_sequence_len", 10, "max document length of input") 29 | tf.flags.DEFINE_integer("most_words", 300000, "Most number of words in vocab (default: 300000)") 30 | 31 | # Training parameters 32 | tf.flags.DEFINE_integer("seed", 123, "Random seed (default: 123)") 33 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 34 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)") 35 | tf.flags.DEFINE_integer("num_epochs", 50, "Number of training epochs") 36 | tf.flags.DEFINE_float("eval_split", 0.1, "Use how much data for evaluating (default: 0.1)") 37 | tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") 38 | tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") 39 | 40 | FLAGS = tf.flags.FLAGS 41 | FLAGS._parse_flags() 42 | print("\nParameters:") 43 | for attr, value in sorted(FLAGS.__flags.items()): 44 | print("{}={}".format(attr.upper(), value)) 45 | print("") 46 | 47 | 48 | def main(): 49 | print("Loading data...") 50 | 51 | task_num = 1 52 | name = "des" if task_num == 1 else "opr" 53 | d_type = "Single_task1" if task_num == 1 else "Single_task2" 54 | 55 | inputH = InputHelper() 56 | max_document_length = FLAGS.max_sequence_len 57 | load_from_npy = False 58 | 59 | sep = "\t" 60 | data_file = "./runs/Exp/Single_task11501902502/lalaa.txt" 61 | 62 | def add_y_helper(y_value, y_arr, is_positive_label): 63 | """ 64 | add 1/0 or [0,1]/[1, 0] in y_arr which depends on y_value flag 65 | :param y_value: 66 | :param y_arr: 67 | :param is_positive_label: 68 | :return: 69 | """ 70 | if y_value == True: 71 | if is_positive_label: 72 | y_arr.append(1) 73 | else: 74 | y_arr.append(0) 75 | else: 76 | if is_positive_label: 77 | y_arr.append(np.array([0, 1])) 78 | else: 79 | y_arr.append(np.array([1, 0])) 80 | return y_arr 81 | 82 | def getTsvTestData(filepath, sep, sequence_length, y_value): 83 | """ 84 | load the data(label, mention, entity) from labeled file 85 | :param filepath: 86 | :return: three lists(label_list, mention_list, entity_list) 87 | """ 88 | print("Loading testing/labelled data from " + filepath) 89 | x1 = [] 90 | x2 = [] 91 | y = [] 92 | cnt = 0 93 | # positive samples from file 94 | for line in codecs.open(filepath, "r", "utf-8"): 95 | if cnt == 0: 96 | cnt = 1 97 | continue 98 | l = line.strip().split(sep) 99 | if len(l) < 4: 100 | continue 101 | 102 | # truncate when length is bigger than the max_length 103 | if len(l[1]) > sequence_length or len(l[0]) > sequence_length: 104 | l[1] = l[1][:sequence_length] 105 | l[0] = l[0][:sequence_length] 106 | 107 | x1.append(l[0]) 108 | x2.append(l[1]) 109 | y = add_y_helper(y_value, y, int(l[3]) == 1) 110 | return np.asarray(x1), np.asarray(x2), np.asarray(y) 111 | 112 | x_mention, x_entity, y = getTsvTestData(data_file, sep, max_document_length, y_value=False) 113 | # x_dev_mention, x_dev_entity, y_dev = inputH.getTsvTestData(data_file, sep, max_document_length, y_value=False) 114 | # x_test_mention, x_test_entity, y_test = inputH.getTsvTestData(data_file, sep, max_document_length, 115 | # y_value=False) 116 | r = len(x_mention) 117 | r1 = (int)(r * 0.8) 118 | r2 = (int)(r * 0.9) 119 | shuffle_indices = np.random.permutation(np.arange(r)) 120 | x_mention = x_mention[shuffle_indices] 121 | x_entity = x_entity[shuffle_indices] 122 | y = y[shuffle_indices] 123 | 124 | x_train_mention, x_train_entity, y_train = x_mention[:r1], x_entity[:r1], y[:r1] 125 | x_dev_mention, x_dev_entity, y_dev = x_mention[r1:r1], x_entity[r1:r2], y[r1:r2] 126 | x_test_mention, x_test_entity, y_test = x_mention[r2:], x_entity[r2:], y[r2:] 127 | 128 | if load_from_npy == False: 129 | # Constructing Tensor for train, dev, and test 130 | men_arr = np.concatenate((x_train_mention, x_dev_mention, x_test_mention)) 131 | entity_arr = np.concatenate((x_train_entity, x_dev_entity, x_test_entity)) 132 | 133 | lstm_dir = "Description1501554142" if task_num == 1 else "Operation1501209225" 134 | bilstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir) 135 | 136 | tensor = Tensor(men_arr, entity_arr, len(men_arr), FLAGS.max_sequence_len, task_num, bilstm_dir).get_tensor() 137 | tensor = tensor.transpose((0, 2, 3, 1)) 138 | g1 = len(x_train_mention) 139 | g2 = len(np.concatenate((x_train_mention, x_dev_mention))) 140 | x_train_tensor, x_dev_tensor, x_test_tensor = tensor[:g1], tensor[g1:g2], tensor[g2:] 141 | 142 | else: 143 | tensor_dir = "IDF" 144 | mydir = "./" + tensor_dir + "/Length" + str(FLAGS.max_sequence_len) + "/" 145 | x_train_tensor = np.load(mydir + "train_" + name + ".npy") 146 | x_dev_tensor = np.load(mydir + "dev_" + name + ".npy") 147 | x_test_tensor = np.load(mydir + "test_" + name + ".npy") 148 | 149 | with tf.Graph().as_default(): 150 | 151 | sess = tf.Session() 152 | with sess.as_default(): 153 | cnn = MultiGranModel( 154 | max_len=FLAGS.max_sequence_len, 155 | filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), 156 | num_filters=FLAGS.num_filters, 157 | l2_reg_lambda=FLAGS.l2_reg_lambda) 158 | 159 | # Define Training procedure 160 | global_step = tf.Variable(0, name="global_step", trainable=False) 161 | optimizer = tf.train.AdamOptimizer(1e-3) 162 | grads_and_vars = optimizer.compute_gradients(cnn.loss) 163 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 164 | saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) 165 | 166 | # Keep track of gradient values and sparsity (optional) 167 | for g, v in grads_and_vars: 168 | if g is not None: 169 | tf.summary.histogram("grad_hist/{}".format(v.name), g) 170 | tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) 171 | tf.summary.histogram(v.name, v) 172 | 173 | # Output directory for models and summaries 174 | timestamp = str(int(time.time())) 175 | out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "Exp", d_type + timestamp)) 176 | if not os.path.exists(out_dir): 177 | os.makedirs(out_dir) 178 | print("Writing to {}\n".format(out_dir)) 179 | 180 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 181 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 182 | if not os.path.exists(checkpoint_dir): 183 | os.makedirs(checkpoint_dir) 184 | 185 | # Summaries for loss and accuracy 186 | loss_summary = tf.summary.scalar("loss", cnn.loss) 187 | acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) 188 | 189 | # Train Summaries 190 | train_summary_merged = tf.summary.merge_all() 191 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 192 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 193 | 194 | # Dev summaries 195 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 196 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 197 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 198 | 199 | # Initialize all variables 200 | sess.run(tf.initialize_all_variables()) 201 | 202 | def train_step(x_batch, y_batch): 203 | feed_dict = { 204 | cnn.input_tensor: x_batch, 205 | cnn.input_y: y_batch, 206 | cnn.dropout_keep_prob: FLAGS.dropout_keep_prob 207 | } 208 | _, step, summaries, loss, accuracy = sess.run( 209 | [train_op, global_step, train_summary_merged, cnn.loss, cnn.accuracy], 210 | feed_dict) 211 | time_str = datetime.datetime.now().isoformat() 212 | if step % 10 == 0: 213 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 214 | train_summary_writer.add_summary(summaries, step) 215 | 216 | def dev_step(x_dev, y_batch_dev, writer=None): 217 | feed_dict = { 218 | cnn.input_tensor: x_dev, 219 | cnn.input_y: y_batch_dev, 220 | cnn.dropout_keep_prob: 1.0 221 | } 222 | step, summaries, loss, accuracy, pres = sess.run( 223 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.scores], 224 | feed_dict) 225 | if writer: 226 | writer.add_summary(summaries, step) 227 | 228 | return loss, accuracy 229 | 230 | def evaluate(x_dev, y_batch_dev): 231 | feed_dict = { 232 | cnn.input_tensor: x_dev, 233 | cnn.input_y: y_batch_dev, 234 | cnn.dropout_keep_prob: 1.0 235 | } 236 | step, loss, accuracy, pres = sess.run([global_step, cnn.loss, cnn.accuracy, cnn.scores], feed_dict) 237 | 238 | eval_file = open(out_dir + "/evaluation.txt", "w+") 239 | right_file = open(out_dir + "/right_cases.txt", "w+") 240 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 241 | 242 | eval_file.write("Accu: " + str(accuracy) + "\n") 243 | predictions = np.argmax(pres, 1) 244 | labels = np.argmax(y_batch_dev, 1) 245 | 246 | write_evaluation_file(eval_file, right_file, wrong_file, labels, predictions, x_test_mention, 247 | x_test_entity) 248 | 249 | eval_file.write("Parameters:") 250 | for attr, value in sorted(FLAGS.__flags.items()): 251 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 252 | 253 | print(loss) 254 | print(accuracy) 255 | print(pres) 256 | print(y_batch_dev) 257 | return loss, accuracy 258 | 259 | def dev_whole(x_dev, y_dev, writer=None): 260 | batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, shuffle=False) 261 | losses = [] 262 | accuracies = [] 263 | 264 | for idx, batch_dev in enumerate(batches_dev): 265 | x_batch, y_batch = zip(*batch_dev) 266 | loss, accurary = dev_step(x_batch, y_batch, writer) 267 | losses.append(loss) 268 | accuracies.append(accurary) 269 | return np.mean(np.array(losses)), np.mean(np.array(accuracies)) 270 | 271 | # def overfit(dev_loss): 272 | # n = len(dev_loss) 273 | # if n < 5: 274 | # return False 275 | # for i in xrange(n - 4, n): 276 | # if dev_loss[i] > dev_loss[i - 1]: 277 | # return False 278 | # return True 279 | 280 | # Generate batches 281 | batches = inputH.batch_iter(list(zip(x_train_tensor, y_train)), FLAGS.batch_size, FLAGS.num_epochs) 282 | 283 | # Training loop. For each batch... 284 | dev_loss = [] 285 | for batch in batches: 286 | x_batch, y_batch = zip(*batch) 287 | train_step(x_batch, y_batch) 288 | current_step = tf.train.global_step(sess, global_step) 289 | 290 | if current_step % FLAGS.evaluate_every == 0: 291 | print("\nEvaluation:") 292 | loss, accuracy = dev_whole(x_dev_tensor, y_dev, writer=dev_summary_writer) 293 | time_str = datetime.datetime.now().isoformat() 294 | print("{}: dev-aver, loss {:g}, acc {:g}".format(time_str, loss, accuracy)) 295 | dev_loss.append(accuracy) 296 | print("\nRecently accuracy:") 297 | print dev_loss[-10:] 298 | # if overfit(dev_loss): 299 | # print 'Overfit!!' 300 | # break 301 | print("") 302 | 303 | if current_step % FLAGS.checkpoint_every == 0: 304 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 305 | print("Saved model checkpoint to {}\n".format(path)) 306 | 307 | print("") 308 | evaluate(x_test_tensor, y_test) 309 | 310 | 311 | if __name__ == '__main__': 312 | main() 313 | -------------------------------------------------------------------------------- /Sentence_Modeling/Sentence_Model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from Siamese_network import SiameseLSTM 3 | from DLDisambiguation.util.input_helpers import InputHelper 4 | from DLDisambiguation.util.util import getEmbedding 5 | from DLDisambiguation.util.preprocess import MyVocabularyProcessor 6 | import time 7 | import numpy as np 8 | import datetime 9 | import os 10 | 11 | from DLDisambiguation.util.util import write_evaluation_file 12 | 13 | # Parameters 14 | # ================================================== 15 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding") 16 | tf.flags.DEFINE_integer("hidden_units", 15, "unit numbers of hidden vectors in Bi-LSTM") 17 | tf.flags.DEFINE_float("l2_reg_lambda", 0.01, "L2 regularization lambda (default: 0.0)") 18 | 19 | tf.flags.DEFINE_string("train_dir", "../", "training dir") 20 | 21 | # Training parameters 22 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size") 23 | tf.flags.DEFINE_integer("num_epochs", 50, "Number of training epochs (default: 50)") 24 | tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps") 25 | tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") 26 | 27 | FLAGS = tf.flags.FLAGS 28 | FLAGS._parse_flags() 29 | print("\nParameters:") 30 | for attr, value in sorted(FLAGS.__flags.items()): 31 | print("{}={}".format(attr.upper(), value)) 32 | print("") 33 | 34 | 35 | class SentenceModel: 36 | def getEmbeddingMatrix(self, embedding_dir, processer): 37 | vocab, vocab_size, embedding_dim, embedding = getEmbedding(embedding_dir) 38 | 39 | # Extract word:id mapping from the object. 40 | vocab_dict = processer.vocabulary_._mapping 41 | words_s = set(vocab_dict.keys()) 42 | 43 | # words_index = [i for i,e in enumerate(vocab) if e.decode("utf-8") in words_s] 44 | words_index = [] 45 | vocab_set = set(vocab) 46 | last_index = len(vocab) - 1 47 | 48 | for i in words_s: 49 | character_u = i.encode("utf-8") 50 | if character_u in vocab_set: 51 | words_index.append(vocab_dict[i]) 52 | else: 53 | # for unkown word, give default value of key 54 | print(character_u) 55 | words_index.append(last_index) 56 | # print(embedding[last_index]) 57 | 58 | # words_index = [vocab_dict[i] for i in words_s if i.encode("utf-8") in set(vocab)] 59 | res = embedding[words_index] 60 | res = np.asarray(res, dtype="float") 61 | return res 62 | 63 | def __init__(self): 64 | # the max length of description/operation segment, padding if shorter, and ignore the pair if longer 65 | max_document_length = 20 66 | inpH = InputHelper() 67 | y_is_value = True # flag to indicate that y is value(0 / 1) or array[0,1] / [1, 0] 68 | # train_set, dev_set, vocab_processor, sum_no_of_batches = inpH.getDataSets_File(FLAGS.training_files, "\t", 69 | # max_document_length, 70 | # 10, # 10---percent_dev 71 | # FLAGS.batch_size, y_value=y_is_value) 72 | 73 | # test_x1, test_x2, test_y = inpH.getTestDataSet(FLAGS.test_file, "\t\t", vocab_processor, max_document_length, y_is_value) 74 | 75 | task_num = 2 76 | d_type = "Description" if task_num == 1 else "Operation" 77 | embedding_dir = "../data/word2vec/character_model.txt" if task_num == 1 \ 78 | else "../data/operation/character_model.txt" 79 | name = "des" if task_num == 1 else "opr" 80 | 81 | time_gen = "0823" 82 | data_file = os.path.join(FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt") 83 | data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt") 84 | # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt") 85 | 86 | sep = "\t" 87 | train_x1, train_x2, train_y = inpH.getTsvTestData(data_file, sep, max_document_length, y_is_value) 88 | test_x1, test_x2, test_y = inpH.getTsvTestData(data_file_test, sep, max_document_length, y_is_value) 89 | # dev_x1, dev_x2, dev_y = inpH.getTsvTestData(data_file_val, sep, max_document_length, y_is_value) 90 | dev_x1, dev_x2, dev_y = test_x1, test_x2, test_y 91 | sum_no_of_batches = len(train_y) // FLAGS.batch_size 92 | 93 | vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) 94 | vocab_processor.fit_transform(np.concatenate((train_x1, train_x2, dev_x1, dev_x2))) 95 | # vocab_processor.fit_transform(np.concatenate((train_x1, train_x2, test_x1, test_x2, dev_x1, dev_x2))) 96 | print("Length of loaded vocabulary ={}".format(len(vocab_processor.vocabulary_))) 97 | 98 | train_set = inpH.get_data(vocab_processor, train_x1, train_x2, train_y, max_document_length) 99 | dev_set = inpH.get_data(vocab_processor, dev_x1, dev_x2, dev_y, max_document_length) 100 | test_set = inpH.get_data(vocab_processor, test_x1, test_x2, test_y, max_document_length) 101 | 102 | # load in word2vec model 103 | embedding_matrix = self.getEmbeddingMatrix(embedding_dir, vocab_processor) 104 | 105 | sess = tf.Session() 106 | with sess.as_default(): 107 | siameseModel = SiameseLSTM(sequence_length=max_document_length, 108 | vocab_processer=vocab_processor, 109 | embedding_size=FLAGS.embedding_dim, 110 | hidden_unit_size=FLAGS.hidden_units, 111 | l2_reg_lambda=FLAGS.l2_reg_lambda, 112 | batch_size=FLAGS.batch_size, 113 | embedding_arr=embedding_matrix) 114 | 115 | # Define Training procedure 116 | global_step = tf.Variable(0, name="global_step", trainable=False) 117 | optimizer = tf.train.AdamOptimizer(1e-3) 118 | print("initialized siameseModel object") 119 | 120 | grads_and_vars = optimizer.compute_gradients(siameseModel.loss) 121 | tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 122 | print("defined training_ops") 123 | 124 | # Keep track of variables, gradient values and sparsity 125 | for g, v in grads_and_vars: 126 | if g is not None: 127 | tf.summary.histogram("grad_hist/{}".format(v.name), g) 128 | tf.summary.histogram("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) 129 | tf.summary.histogram(v.name, v) 130 | print("defined gradient summaries") 131 | 132 | # Output directory for models and summaries 133 | timestamp = str(int(time.time())) 134 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "Exp" + time_gen, "runs", d_type + timestamp)) 135 | print("Writing to {}\n".format(out_dir)) 136 | 137 | # Summaries for loss and accuracy 138 | loss_summary = tf.summary.scalar("loss", siameseModel.loss) 139 | acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy) 140 | 141 | # Train Summaries 142 | train_summary_merged = tf.summary.merge_all() 143 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 144 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 145 | 146 | # Dev summaries 147 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 148 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 149 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 150 | 151 | # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it 152 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 153 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 154 | if not os.path.exists(checkpoint_dir): 155 | os.makedirs(checkpoint_dir) 156 | saver = tf.train.Saver(tf.all_variables(), max_to_keep=100) 157 | 158 | # Write vocabulary 159 | vocab_processor.save(os.path.join(checkpoint_dir, "vocab")) 160 | 161 | # Initialize all variables 162 | sess.run(tf.initialize_all_variables()) 163 | print("init all variables") 164 | 165 | graph_def = tf.get_default_graph().as_graph_def() 166 | graphpb_txt = str(graph_def) 167 | with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f: 168 | f.write(graphpb_txt) 169 | 170 | def train_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch): 171 | 172 | feed_dict = { 173 | siameseModel.input_x1: x1_batch, 174 | siameseModel.input_x2: x2_batch, 175 | siameseModel.mask_x1: x1_batch_m, 176 | siameseModel.mask_x2: x2_batch_m, 177 | siameseModel.input_y: y_batch, 178 | } 179 | 180 | _, step, summaries, loss, accuracy, dist = sess.run( 181 | [tr_op_set, global_step, train_summary_merged, siameseModel.loss, siameseModel.accuracy, 182 | siameseModel.distance], feed_dict) 183 | 184 | time_str = datetime.datetime.now().isoformat() 185 | d = np.copy(dist) 186 | d[d >= 0.5] = 999.0 187 | d[d < 0.5] = 1 188 | d[d > 1.0] = 0 189 | accuracy_t = np.mean(y_batch == d) 190 | print( 191 | "TRAIN {}: step {}, loss {:g}, acc {:g}, acc_t {:g}".format(time_str, step, loss, accuracy, accuracy_t)) 192 | print(y_batch) 193 | print(dist) 194 | print(d) 195 | train_summary_writer.add_summary(summaries, step) 196 | 197 | def dev_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch): 198 | 199 | feed_dict = { 200 | siameseModel.input_x1: x1_batch, 201 | siameseModel.input_x2: x2_batch, 202 | siameseModel.mask_x1: x1_batch_m, 203 | siameseModel.mask_x2: x2_batch_m, 204 | siameseModel.input_y: y_batch, 205 | } 206 | 207 | step, summaries, loss, accuracy, dist = sess.run( 208 | [global_step, dev_summary_op, siameseModel.loss, siameseModel.accuracy, siameseModel.distance], 209 | feed_dict) 210 | time_str = datetime.datetime.now().isoformat() 211 | d = np.copy(dist) 212 | d[d >= 0.5] = 999.0 213 | d[d < 0.5] = 1 214 | d[d > 1.0] = 0 215 | accuracy_t = np.mean(y_batch == d) 216 | print("DEV {}: step {}, loss {:g}, acc {:g}, acc_t {:g}".format(time_str, step, loss, accuracy, accuracy_t)) 217 | print(y_batch) 218 | print(dist) 219 | print(d) 220 | dev_summary_writer.add_summary(summaries, step) 221 | return accuracy 222 | 223 | def overfit(dev_loss, accu): 224 | num = 6 225 | n = len(dev_loss) 226 | if n < num: 227 | return False 228 | for i in xrange(n - num, n): 229 | if dev_loss[i] < accu: 230 | return False 231 | print(dev_loss) 232 | print(accu) 233 | return True 234 | 235 | def evaluate(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch, mention, entity): 236 | 237 | feed_dict = { 238 | siameseModel.input_x1: x1_batch, 239 | siameseModel.input_x2: x2_batch, 240 | siameseModel.mask_x1: x1_batch_m, 241 | siameseModel.mask_x2: x2_batch_m, 242 | siameseModel.input_y: y_batch, 243 | } 244 | 245 | loss, accuracy, dist = sess.run([siameseModel.loss, siameseModel.accuracy, siameseModel.distance], 246 | feed_dict) 247 | time_str = datetime.datetime.now().isoformat() 248 | print("Test {}: loss {:g}, acc {:g}".format(time_str, loss, accuracy)) 249 | print(dist) 250 | 251 | eval_file = open(out_dir + "/evaluation.txt", "w+") 252 | right_file = open(out_dir + "/right_cases.txt", "w+") 253 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 254 | 255 | eval_file.write("Accu: " + str(accuracy) + "\n") 256 | eval_file.write("Dataset: " + data_file + "\n") 257 | eval_file.write("Early Stopped at: " + str(stop_p) + "\n") 258 | 259 | d = np.copy(dist) 260 | d[d >= 0.5] = 999.0 261 | d[d < 0.5] = 1 262 | d[d > 1.0] = 0 263 | 264 | predictions = d 265 | write_evaluation_file(eval_file, right_file, wrong_file, y_batch, predictions, mention, entity) 266 | return accuracy 267 | 268 | # Generate batches 269 | batches = inpH.batch_iter(list(zip(train_set[0], train_set[1], train_set[2], train_set[3], train_set[4])), 270 | FLAGS.batch_size, FLAGS.num_epochs) 271 | 272 | max_validation_acc = 0.0 273 | num_batches_per_epoch = int(len(train_set[0]) / FLAGS.batch_size) 274 | print num_batches_per_epoch 275 | max_accu = 0 276 | dev_accu = [] 277 | 278 | for nn in xrange(sum_no_of_batches * FLAGS.num_epochs): 279 | batch = batches.next() 280 | if len(batch) < 1: 281 | continue 282 | x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch = zip(*batch) 283 | if len(y_batch) < 1: 284 | continue 285 | 286 | train_step(x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch) 287 | 288 | current_step = tf.train.global_step(sess, global_step) # get the global step. 289 | sum_acc = 0.0 290 | tmp = [] 291 | 292 | if current_step % num_batches_per_epoch == 0: 293 | print("\nEvaluation:") 294 | # dev_batches = inpH.batch_iter(list(zip(dev_set[0], dev_set[1], dev_set[2], dev_set[3], dev_set[4])), 295 | # FLAGS.batch_size, 1) 296 | # for db in dev_batches: 297 | # if len(db) < 1: 298 | # continue 299 | # x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b = zip(*db) 300 | # if len(y_dev_b) < 1: 301 | # continue 302 | # acc = dev_step(x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b) 303 | # sum_acc = sum_acc + acc 304 | # tmp.append(acc) 305 | # 306 | # acc_mean = np.mean(tmp) 307 | acc_mean = dev_step(dev_set[0], dev_set[1], dev_set[2], dev_set[3], dev_set[4]) 308 | dev_accu.append(acc_mean) 309 | 310 | if overfit(dev_accu, acc_mean): 311 | print 'Overfit!!' 312 | print("Optimum" + str(max_accu)) 313 | print(current_step) 314 | stop_p = current_step / num_batches_per_epoch 315 | print(stop_p) 316 | break 317 | 318 | if acc_mean >= max_accu: 319 | max_accu = acc_mean 320 | saver.save(sess, checkpoint_prefix, global_step=current_step) # save checkpoints 321 | tf.train.write_graph(sess.graph.as_graph_def(), checkpoint_prefix, "graph" + str(nn) + ".pb", 322 | as_text=False) # save graph_def 323 | print("Saved model {} with sum_accuracy={} checkpoint to {}\n".format(nn, max_validation_acc, 324 | checkpoint_prefix)) 325 | 326 | evaluate(test_set[0], test_set[1], test_set[2], test_set[3], test_set[4], test_x1, test_x2) 327 | 328 | 329 | s = SentenceModel() 330 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import datetime 5 | import os 6 | import time 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from util.util import write_evaluation_file 12 | from util.input_helpers import InputHelper 13 | from MultiGran_Model import MultiGranModel 14 | from tensor import Tensor 15 | 16 | # Parameters 17 | # ================================================== 18 | 19 | # Model Hyperparameters 20 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding (default: 100)") 21 | tf.flags.DEFINE_string("filter_sizes", "3, 3, 3, 3", "Comma-separated filter sizes (default: '2,3')") 22 | tf.flags.DEFINE_string("pool_sizes", "2, 2, 2, 2", "Comma-separated filter sizes (default: '2,3')") 23 | tf.flags.DEFINE_integer("num_filters", 16, "Number of filters per filter size (default: 64)") 24 | tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)") 25 | tf.flags.DEFINE_float("l2_reg_lambda", 0.001, "L2 regularizaion lambda (default: 0.0)") 26 | 27 | # Data Parameter 28 | tf.flags.DEFINE_integer("max_sequence_len", 20, "max document length of input") 29 | tf.flags.DEFINE_integer("most_words", 300000, "Most number of words in vocab (default: 300000)") 30 | 31 | # Training parameters 32 | tf.flags.DEFINE_integer("seed", 123, "Random seed (default: 123)") 33 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 34 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)") 35 | tf.flags.DEFINE_integer("num_epochs", 50, "Number of training epochs") 36 | tf.flags.DEFINE_float("eval_split", 0.1, "Use how much data for evaluating (default: 0.1)") 37 | tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") 38 | tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") 39 | 40 | FLAGS = tf.flags.FLAGS 41 | FLAGS._parse_flags() 42 | print("\nParameters:") 43 | for attr, value in sorted(FLAGS.__flags.items()): 44 | print("{}={}".format(attr.upper(), value)) 45 | print("") 46 | 47 | 48 | def main(): 49 | print("Loading data...") 50 | 51 | type_CNN = 2 52 | run_type = "Single CNN" if type_CNN == 1 else "Multiview 4CNN" 53 | 54 | early_stop_num = 11 55 | task_num = 2 56 | name = "des" if task_num == 1 else "opr" 57 | d_type = "Single_task1" if task_num == 1 else "Single_task2" 58 | 59 | inputH = InputHelper() 60 | max_document_length = FLAGS.max_sequence_len 61 | load_from_npy = True 62 | 63 | sep = "\t" 64 | time_gen = "0823" 65 | data_file = os.path.join(FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt") 66 | data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt") 67 | # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt") 68 | data_file_val = data_file_test 69 | 70 | # data_file = os.path.join(FLAGS.train_dir, "data/training_dynamic_data_" + name + ".txt") 71 | # data_file_test = os.path.join(FLAGS.train_dir, "data/test_dynamic_data_" + name + ".txt") 72 | # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_dynamic_data_" + name + ".txt") 73 | 74 | x_train_mention, x_train_entity, y_train = inputH.getTsvTestData(data_file, sep, max_document_length, y_value=False) 75 | x_dev_mention, x_dev_entity, y_dev = inputH.getTsvTestData(data_file_val, sep, max_document_length, y_value=False) 76 | x_test_mention, x_test_entity, y_test = inputH.getTsvTestData(data_file_test, sep, max_document_length, 77 | y_value=False) 78 | 79 | if load_from_npy == False: 80 | # Constructing Tensor for train, dev, and test 81 | men_arr = np.concatenate((x_train_mention, x_dev_mention, x_test_mention)) 82 | entity_arr = np.concatenate((x_train_entity, x_dev_entity, x_test_entity)) 83 | 84 | lstm_dir = "Description1501174251" if task_num == 1 else "Operation1501209225" 85 | bilstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir) 86 | 87 | tensor = Tensor(men_arr, entity_arr, len(men_arr), FLAGS.max_sequence_len, task_num, bilstm_dir).get_tensor() 88 | tensor = tensor.transpose((0, 2, 3, 1)) 89 | g1 = len(x_train_mention) 90 | g2 = len(np.concatenate((x_train_mention, x_dev_mention))) 91 | x_train_tensor, x_dev_tensor, x_test_tensor = tensor[:g1], tensor[g1:g2], tensor[g2:] 92 | 93 | else: 94 | mydir = "./Tensor_files/0823/Length" + str(FLAGS.max_sequence_len) + "/" 95 | 96 | x_train_tensor = np.load(mydir + "train_" + name + ".npy") 97 | # x_dev_tensor = np.load(mydir + "dev_" + name + ".npy") 98 | x_test_tensor = np.load(mydir + "test_" + name + ".npy") 99 | x_dev_tensor = x_test_tensor 100 | 101 | # indi_train = np.load(mydir + "train_indi_" + name + ".npy") 102 | # indi_val = np.load(mydir + "dev_indi_" + name + ".npy") 103 | # indi_test = np.load(mydir + "test_indi_" + name + ".npy") 104 | 105 | # def process(indi, tensor): 106 | # tmp = [] 107 | # ll = len(indi) 108 | # for i in range(ll): 109 | # if indi[i] == 0: 110 | # tmp.append(tensor[i]) 111 | # return np.asarray(tmp) 112 | # 113 | # x_train_tensor = process(indi_train, x_train_tensor) 114 | # x_dev_tensor = process(indi_val, x_dev_tensor) 115 | # x_test_tensor = process(indi_test, x_test_tensor) 116 | 117 | with tf.Graph().as_default(): 118 | 119 | sess = tf.Session() 120 | with sess.as_default(): 121 | cnn = MultiGranModel( 122 | max_len=FLAGS.max_sequence_len, 123 | pool_sizes=list(map(int, FLAGS.pool_sizes.split(","))), 124 | filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), 125 | num_filters=FLAGS.num_filters, 126 | l2_reg_lambda=FLAGS.l2_reg_lambda, 127 | type_CNN=type_CNN) # Define Training procedure 128 | global_step = tf.Variable(0, name="global_step", trainable=False) 129 | optimizer = tf.train.AdamOptimizer(1e-3) 130 | grads_and_vars = optimizer.compute_gradients(cnn.loss) 131 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 132 | saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) 133 | 134 | # Keep track of gradient values and sparsity (optional) 135 | for g, v in grads_and_vars: 136 | if g is not None: 137 | tf.summary.histogram("grad_hist/{}".format(v.name), g) 138 | tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) 139 | tf.summary.histogram(v.name, v) 140 | 141 | # Output directory for models and summaries 142 | timestamp = str(int(time.time())) 143 | out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "NewExp", d_type + timestamp)) 144 | if not os.path.exists(out_dir): 145 | os.makedirs(out_dir) 146 | print("Writing to {}\n".format(out_dir)) 147 | 148 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 149 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 150 | if not os.path.exists(checkpoint_dir): 151 | os.makedirs(checkpoint_dir) 152 | 153 | # Summaries for loss and accuracy 154 | loss_summary = tf.summary.scalar("loss", cnn.loss) 155 | acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) 156 | 157 | # Train Summaries 158 | train_summary_merged = tf.summary.merge_all() 159 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 160 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 161 | 162 | # Dev summaries 163 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 164 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 165 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 166 | 167 | # Initialize all variables 168 | sess.run(tf.initialize_all_variables()) 169 | 170 | def train_step(x_batch, y_batch): 171 | feed_dict = { 172 | cnn.input_tensor: x_batch, 173 | cnn.input_y: y_batch, 174 | cnn.dropout_keep_prob: FLAGS.dropout_keep_prob 175 | } 176 | _, step, summaries, loss, accuracy = sess.run( 177 | [train_op, global_step, train_summary_merged, cnn.loss, cnn.accuracy], 178 | feed_dict) 179 | time_str = datetime.datetime.now().isoformat() 180 | if step % 10 == 0: 181 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 182 | train_summary_writer.add_summary(summaries, step) 183 | 184 | def dev_step(x_dev, y_batch_dev, writer=None): 185 | feed_dict = { 186 | cnn.input_tensor: x_dev, 187 | cnn.input_y: y_batch_dev, 188 | cnn.dropout_keep_prob: 1.0 189 | } 190 | step, summaries, loss, accuracy, pres = sess.run( 191 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.scores], 192 | feed_dict) 193 | if writer: 194 | writer.add_summary(summaries, step) 195 | 196 | return loss, accuracy 197 | 198 | def evaluate(x_dev, y_batch_dev, m): 199 | feed_dict = { 200 | m.input_tensor: x_dev, 201 | m.input_y: y_batch_dev, 202 | m.dropout_keep_prob: 1.0 203 | } 204 | step, loss, accuracy, pres = sess.run([global_step, m.loss, m.accuracy, m.scores], 205 | feed_dict) 206 | 207 | eval_file = open(out_dir + "/evaluation.txt", "w+") 208 | right_file = open(out_dir + "/right_cases.txt", "w+") 209 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 210 | 211 | eval_file.write("Accu: " + str(accuracy) + "\n") 212 | predictions = np.argmax(pres, 1) 213 | labels = np.argmax(y_batch_dev, 1) 214 | 215 | write_evaluation_file(eval_file, right_file, wrong_file, labels, predictions, 216 | x_test_mention, 217 | x_test_entity) 218 | 219 | eval_file.write("Parameters:") 220 | for attr, value in sorted(FLAGS.__flags.items()): 221 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 222 | fs1 = [str(i) for i in fs] 223 | ps1 = [str(i) for i in ps] 224 | eval_file.write("Conv" + " ".join(fs1) + "\n") 225 | eval_file.write("Pool_sizes" + " ".join(ps1) + "\n") 226 | 227 | print(loss) 228 | print(accuracy) 229 | print(pres) 230 | print(y_batch_dev) 231 | return loss, accuracy 232 | 233 | def dev_whole(x_dev, y_dev, writer=None): 234 | # batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, 235 | # shuffle=False) 236 | # losses = [] 237 | # accuracies = [] 238 | # 239 | # for idx, batch_dev in enumerate(batches_dev): 240 | # x_batch, y_batch = zip(*batch_dev) 241 | # loss, accurary = dev_step(x_batch, y_batch, writer) 242 | # losses.append(loss) 243 | # accuracies.append(accurary) 244 | # return np.mean(np.array(losses)), np.mean(np.array(accuracies)) 245 | 246 | l, accu = dev_step(x_dev, y_dev, writer) 247 | return l, accu 248 | 249 | def overfit(dev_loss, accu): 250 | 251 | n = len(dev_loss) 252 | if n < early_stop_num: 253 | return False 254 | 255 | # mean_acc = np.mean(dev_loss[-15:]) 256 | # if mean_acc < accu: 257 | # return False 258 | for i in xrange(n - early_stop_num, n): 259 | if dev_loss[i] < accu: 260 | return False 261 | print(dev_loss) 262 | print(accu) 263 | return True 264 | 265 | # Generate batches 266 | batches = inputH.batch_iter(list(zip(x_train_tensor, y_train)), FLAGS.batch_size, FLAGS.num_epochs) 267 | 268 | data_num = len(y_train) 269 | num_batches_per_epoch = int(data_num / FLAGS.batch_size) 270 | # num_batches_per_epoch_2 = int(num_batches_per_epoch / 2) 271 | print num_batches_per_epoch 272 | 273 | # Training loop. For each batch... 274 | dev_loss = [] 275 | optimum_accu = 0 276 | 277 | for batch in batches: 278 | x_batch, y_batch = zip(*batch) 279 | train_step(x_batch, y_batch) 280 | current_step = tf.train.global_step(sess, global_step) 281 | 282 | if current_step % num_batches_per_epoch == 0: 283 | print("\nEvaluation:") 284 | loss, accuracy = dev_whole(x_dev_tensor, y_dev, writer=dev_summary_writer) 285 | time_str = datetime.datetime.now().isoformat() 286 | print("{}: dev-aver, loss {:g}, acc {:g}".format(time_str, loss, accuracy)) 287 | dev_loss.append(accuracy) 288 | 289 | print("\nRecently accuracy:") 290 | print dev_loss[-10:] 291 | if overfit(dev_loss, accuracy): 292 | print 'Overfit!!' 293 | print(current_step) 294 | print(current_step / num_batches_per_epoch) 295 | break 296 | print("") 297 | 298 | if accuracy > optimum_accu: 299 | optimum_accu = accuracy 300 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 301 | print("Saved model checkpoint to {}\n".format(path)) 302 | print("Optimum_accu: " + str(optimum_accu)) 303 | 304 | print("") 305 | print("Optimum_accu: " + str(optimum_accu)) 306 | 307 | # evaluate the result with the best model 308 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 309 | checkpoint_file = ckpt.model_checkpoint_path 310 | graph = tf.Graph() 311 | 312 | with graph.as_default(): 313 | sess = tf.Session() 314 | with sess.as_default(): 315 | # Load the saved meta graph and restore variables 316 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 317 | sess.run(tf.initialize_all_variables()) 318 | saver.restore(sess, checkpoint_file) 319 | 320 | input_t = graph.get_operation_by_name("input_tensor").outputs[0] 321 | input_y = graph.get_operation_by_name("input_y").outputs[0] 322 | prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 323 | 324 | prediction = graph.get_operation_by_name("output/predictions").outputs[0] 325 | accu = graph.get_operation_by_name("accuracy/accuracy").outputs[0] 326 | pres, accuracy = sess.run([prediction, accu], {input_t: x_test_tensor, input_y: y_test, prob: 1}) 327 | 328 | labels = np.argmax(y_test, 1) 329 | eval_file = open(out_dir + "/evaluation.txt", "w+") 330 | right_file = open(out_dir + "/right_cases.txt", "w+") 331 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 332 | 333 | eval_file.write("Dataset: " + data_file + "\n") 334 | eval_file.write(run_type + "\n") 335 | eval_file.write("Stopped at " + str(current_step / num_batches_per_epoch) + "\n") 336 | eval_file.write("Accu: " + str(accuracy) + "\n") 337 | write_evaluation_file(eval_file, right_file, wrong_file, labels, pres, x_test_mention, 338 | x_test_entity) 339 | 340 | eval_file.write("Parameters:") 341 | for attr, value in sorted(FLAGS.__flags.items()): 342 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 343 | 344 | print("loss:" + str(loss)) 345 | print("accuracy:" + str(accuracy)) 346 | 347 | 348 | if __name__ == '__main__': 349 | main() 350 | -------------------------------------------------------------------------------- /Dynamic/MT_Dynamic_Arch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import datetime 5 | import os 6 | import time 7 | import copy 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | from util.util import write_evaluation_file 13 | from DLDisambiguation.util.input_helpers import InputHelper 14 | from MT_Dynamic_MultiGranModel import MT_Dynamic_MultiGranModel 15 | from tensor import Tensor 16 | 17 | # Parameters 18 | # ================================================== 19 | 20 | # Model Hyperparameters 21 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding (default: 100)") 22 | tf.flags.DEFINE_string("filter_sizes", "2,3", "Comma-separated filter sizes (default: '2,3')") 23 | tf.flags.DEFINE_integer("num_filters", 16, "Number of filters per filter size (default: 64)") 24 | tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)") 25 | tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)") 26 | 27 | # Data Parameter 28 | tf.flags.DEFINE_integer("max_sequence_len", 10, "max document length of input") 29 | tf.flags.DEFINE_integer("max_sequence_len2", 10, "max document length of input") 30 | tf.flags.DEFINE_integer("most_words", 300000, "Most number of words in vocab (default: 300000)") 31 | 32 | # Training parameters 33 | tf.flags.DEFINE_integer("seed", 123, "Random seed (default: 123)") 34 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 35 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)") 36 | tf.flags.DEFINE_integer("num_epochs", 5, "Number of training epochs") 37 | tf.flags.DEFINE_float("eval_split", 0.1, "Use how much data for evaluating (default: 0.1)") 38 | tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") 39 | tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") 40 | 41 | FLAGS = tf.flags.FLAGS 42 | FLAGS._parse_flags() 43 | print("\nParameters:") 44 | for attr, value in sorted(FLAGS.__flags.items()): 45 | print("{}={}".format(attr.upper(), value)) 46 | print("") 47 | 48 | 49 | def get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test): 50 | res = [] 51 | length = len(x2_test) 52 | 53 | for i in range(length): 54 | des = x2_test[i] 55 | opr = x4_test[i] 56 | N = 0.1 57 | 58 | if des_e_names.__contains__(des): 59 | index = des_e_names.index(des) 60 | des_opr = des_opr_map[index] 61 | if opr in des_opr.keys(): 62 | N = des_opr[str(opr)] 63 | res.append(N) 64 | return np.expand_dims(np.asarray(res), axis=1) 65 | 66 | 67 | def load_coocurrence_matrix(filename): 68 | t = open(filename) 69 | line = t.readline() 70 | des_e_names = [] 71 | des_opr_map = [] 72 | 73 | while line != "": 74 | des, oprs = line.split("\t")[0], line.split("\t")[1] 75 | des_e_names.append(des) 76 | 77 | oprs_num = oprs.split("_")[:-1] 78 | tmp = {} 79 | for opr in oprs_num: 80 | opr_name, num = opr.split(":")[0], int(opr.split(":")[1]) 81 | tmp[opr_name] = num 82 | des_opr_map.append(copy.deepcopy(tmp)) 83 | line = t.readline() 84 | return des_e_names, des_opr_map 85 | 86 | 87 | def main(): 88 | # Load data 89 | print("Loading data...") 90 | inputH = InputHelper() 91 | 92 | train_f = os.path.join(FLAGS.train_dir, 'data/exp0803/training_dynamic_data.txt') 93 | dev_f = os.path.join(FLAGS.train_dir, 'data/exp0803/validation_dynamic_data.txt') 94 | test_f = os.path.join(FLAGS.train_dir, 'data/exp0803/test_dynamic_data.txt') 95 | 96 | our_dir = "./Tensor_files/0803_dynamic/" 97 | # our_dir = "./Length" + str(FLAGS.max_sequence_len) + "/" 98 | x_train_tensor = np.load(our_dir + "train_des.npy") 99 | x_dev_tensor = np.load(our_dir + "dev_des.npy") 100 | x_test_tensor = np.load(our_dir + "test_des.npy") 101 | 102 | # our_dir = "./Length" + str(FLAGS.max_sequence_len2) + "/" 103 | x_train_tensor_o = np.load(our_dir + "train_opr.npy") 104 | x_dev_tensor_o = np.load(our_dir + "dev_opr.npy") 105 | x_test_tensor_o = np.load(our_dir + "test_opr.npy") 106 | 107 | x_train_indi_o = 1 - np.load(our_dir + "train_indi_opr.npy") 108 | x_dev_indi_o = 1 - np.load(our_dir + "dev_indi_opr.npy") 109 | x_test_indi_o = 1 - np.load(our_dir + "test_indi_opr.npy") 110 | 111 | sep = "\t" 112 | i1, x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels_Dyna(train_f, sep, 113 | FLAGS.max_sequence_len) 114 | i2, x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels_Dyna(dev_f, sep, 115 | FLAGS.max_sequence_len) 116 | i3, x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels_Dyna(test_f, sep, 117 | FLAGS.max_sequence_len) 118 | 119 | des_e_names, des_opr_map = load_coocurrence_matrix("result.txt") 120 | co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test) 121 | co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train, x4_train) 122 | co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev) 123 | 124 | with tf.Graph().as_default(): 125 | 126 | sess = tf.Session() 127 | with sess.as_default(): 128 | cnn = MT_Dynamic_MultiGranModel( 129 | max_len1=FLAGS.max_sequence_len, 130 | max_len2=FLAGS.max_sequence_len2, 131 | filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), 132 | num_filters=FLAGS.num_filters, 133 | l2_reg_lambda=FLAGS.l2_reg_lambda, 134 | ) 135 | 136 | # Define Training procedure 137 | global_step = tf.Variable(0, name="global_step", trainable=False) 138 | optimizer = tf.train.AdamOptimizer(1e-3) 139 | 140 | grads_and_vars = optimizer.compute_gradients(cnn.loss) 141 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 142 | 143 | saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) 144 | 145 | # Keep track of gradient values and sparsity (optional) 146 | for g, v in grads_and_vars: 147 | if g is not None: 148 | tf.summary.histogram("grad_hist/{}".format(v.name), g) 149 | tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) 150 | tf.summary.histogram(v.name, v) 151 | 152 | # Output directory for models and summaries 153 | timestamp = str(int(time.time())) 154 | out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "multitask" + timestamp)) 155 | if not os.path.exists(out_dir): 156 | os.makedirs(out_dir) 157 | print("Writing to {}\n".format(out_dir)) 158 | 159 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 160 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 161 | if not os.path.exists(checkpoint_dir): 162 | os.makedirs(checkpoint_dir) 163 | 164 | # Summaries for loss and accuracy 165 | loss_summary = tf.summary.scalar("loss", cnn.loss) 166 | acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d) 167 | acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o) 168 | 169 | # Train Summaries 170 | train_summary_op = tf.summary.merge_all() 171 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 172 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 173 | 174 | # Dev summaries 175 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary1, acc_summary2]) 176 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 177 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 178 | 179 | # Initialize all variables 180 | sess.run(tf.initialize_all_variables()) 181 | 182 | def train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr): 183 | gamma = [0.5 if i == 1 else 1.0 for i in indi] 184 | gamma = np.asarray(gamma) 185 | 186 | feed_dict = { 187 | cnn.input_tensor: x_batch, 188 | cnn.input_y_description: y_batch, 189 | cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, 190 | cnn.input_tensor_o: x_batch2, 191 | cnn.input_y_operation: y_batch2, 192 | cnn.mask_opr: np.asarray(indi, dtype=float), 193 | cnn.gamma: gamma, 194 | cnn.matrix: co_arr 195 | } 196 | _, step, summaries, loss, accuracy1, accuracy2 = sess.run( 197 | [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o], 198 | feed_dict) 199 | time_str = datetime.datetime.now().isoformat() 200 | if step % 10 == 0: 201 | print( 202 | "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format(time_str, step, loss, accuracy1, 203 | accuracy2)) 204 | train_summary_writer.add_summary(summaries, step) 205 | 206 | def dev_step(x_dev, y_batch_dev, x_dev2, y_batch_dev2, indi, co_arr, writer=None): 207 | gamma = [0.5 if i == 1 else 1.0 for i in indi] 208 | gamma = np.asarray(gamma) 209 | feed_dict = { 210 | cnn.input_tensor: x_dev, 211 | cnn.input_y_description: y_batch_dev, 212 | cnn.dropout_keep_prob: 1.0, 213 | cnn.input_y_operation: y_batch_dev2, 214 | cnn.input_tensor_o: x_dev2, 215 | cnn.mask_opr: np.asarray(indi, dtype=float), 216 | cnn.gamma: gamma, 217 | cnn.matrix: co_arr 218 | } 219 | step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( 220 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], 221 | feed_dict) 222 | if writer: 223 | writer.add_summary(summaries, step) 224 | return loss, accuracy1, accuracy2 225 | 226 | def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, indi, co_arr): 227 | gamma = [0.5 if i == 1 else 1.0 for i in indi] 228 | gamma = np.asarray(gamma) 229 | feed_dict = { 230 | cnn.input_tensor: x_dev, 231 | cnn.input_y_description: y_batch_dev, 232 | cnn.dropout_keep_prob: 1.0, 233 | cnn.input_y_operation: y_batch_dev2, 234 | cnn.input_tensor_o: x_dev2, 235 | cnn.mask_opr: np.asarray(indi, dtype=float), 236 | cnn.gamma: gamma, 237 | cnn.matrix: co_arr 238 | } 239 | 240 | step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( 241 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], 242 | feed_dict) 243 | 244 | eval_file = open(out_dir + "/evaluation.txt", "w+") 245 | right_file = open(out_dir + "/right_cases.txt", "w+") 246 | right_file2 = open(out_dir + "/right_cases_operation.txt", "w+") 247 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 248 | wrong_file2 = open(out_dir + "/wrong_cases_operation.txt", "w+") 249 | 250 | eval_file.write("Accu1: " + str(accuracy1) + "\n") 251 | eval_file.write("Accu2: " + str(accuracy2) + "\n") 252 | 253 | predictions1 = np.argmax(pres1, 1) 254 | predictions2 = np.argmax(pres2, 1) 255 | labels1 = np.argmax(y_batch_dev, 1) 256 | labels2 = np.argmax(y_batch_dev2, 1) 257 | 258 | def process(indi, tensor): 259 | tmp = [] 260 | ll = len(indi) 261 | for i in range(ll): 262 | if indi[i] == 0: 263 | tmp.append(tensor[i]) 264 | return np.asarray(tmp) 265 | 266 | write_evaluation_file(eval_file, right_file, wrong_file, labels1, predictions1, x1_test, x2_test) 267 | write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, predictions2, x3_test, x4_test, 268 | indi) 269 | 270 | eval_file.write("Parameters:") 271 | for attr, value in sorted(FLAGS.__flags.items()): 272 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 273 | 274 | return loss, accuracy1, accuracy2 275 | 276 | def dev_whole(x_dev, y_dev, x_dev2, y_dev2, indi, co_dev_arr, writer=None): 277 | batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev, co_dev_arr)), FLAGS.batch_size, 1, shuffle=False) 278 | batches_dev2 = inputH.batch_iter(list(zip(x_dev2, y_dev2, indi)), FLAGS.batch_size, 1, shuffle=False) 279 | losses = [] 280 | accuracies1 = [] 281 | accuracies2 = [] 282 | 283 | batches = zip(batches_dev, batches_dev2) 284 | 285 | for batches_dev, batches_dev2 in batches: 286 | x_batch, y_batch, co_arr = zip(*batches_dev) 287 | x_batch2, y_batch2, indi = zip(*batches_dev2) 288 | loss, accuracy1, accuracy2 = dev_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr, writer) 289 | losses.append(loss) 290 | accuracies1.append(accuracy1) 291 | accuracies2.append(accuracy2) 292 | return np.mean(np.array(losses)), np.mean(np.array(accuracies1)), np.mean(np.array(accuracies2)) 293 | 294 | def overfit(dev_loss): 295 | n = len(dev_loss) 296 | if n < 5: 297 | return False 298 | for i in xrange(n - 4, n): 299 | if dev_loss[i] > dev_loss[i - 1]: 300 | return False 301 | return True 302 | 303 | # Generate batches 304 | batches = inputH.batch_iter( 305 | list(zip(x_train_tensor, y_train, x_train_tensor_o, y2_train, x_train_indi_o, co_arr_train)), 306 | FLAGS.batch_size, FLAGS.num_epochs) 307 | 308 | # Training loop. For each batch... 309 | dev_loss = [] 310 | dev_loss2 = [] 311 | # batch_d_o = zip(batches, batches2) 312 | for batch in batches: 313 | x_batch, y_batch, x_batch2, y_batch2, indi, co_arr = zip(*batch) 314 | 315 | train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr) 316 | current_step = tf.train.global_step(sess, global_step) 317 | 318 | if current_step % FLAGS.evaluate_every == 0: 319 | 320 | print("\nEvaluation:") 321 | loss, accuracy1, accuracy2 = dev_whole(x_dev_tensor, y_dev, x_dev_tensor_o, y2_dev, x_dev_indi_o, 322 | co_arr_val, writer=dev_summary_writer) 323 | 324 | time_str = datetime.datetime.now().isoformat() 325 | print("{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format(time_str, loss, accuracy1, accuracy2)) 326 | dev_loss.append(accuracy1) 327 | dev_loss2.append(accuracy2) 328 | 329 | print("\nRecently accuracy:") 330 | print dev_loss[-10:] 331 | print dev_loss2[-10:] 332 | 333 | if overfit(dev_loss): 334 | print 'Overfit!! in task1' 335 | break 336 | if overfit(dev_loss2): 337 | print 'Overfit!! in task2' 338 | break 339 | print("") 340 | 341 | if current_step % FLAGS.checkpoint_every == 0: 342 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 343 | print("Saved model checkpoint to {}\n".format(path)) 344 | 345 | loss, accuracy1, accuracy2 = evaluate(x_test_tensor, y_test, x_test_tensor_o, y2_test, x_test_indi_o, 346 | co_arr_test) 347 | print(loss) 348 | print(accuracy1) 349 | print(accuracy2) 350 | 351 | 352 | if __name__ == '__main__': 353 | main() 354 | -------------------------------------------------------------------------------- /MultiTask_MultiGranModel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | class MultiTask_MultiGranModel(object): 8 | def _conv(self, name, in_, ksize, reuse=False): 9 | num_filters = ksize[3] 10 | 11 | with tf.variable_scope(name, reuse=reuse) as scope: 12 | # different CNN for different views 13 | W = tf.Variable(tf.truncated_normal(ksize, stddev=0.1), name="W") 14 | biases = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") 15 | 16 | # same CNN for different views 17 | # W = tf.get_variable("weights", ksize, initializer=tf.contrib.layers.xavier_initializer()) 18 | # W = tf.get_variable("weights", ksize, initializer=tf.truncated_normal_initializer(stddev=0.1)) 19 | # biases = tf.get_variable("biases", [num_filters], initializer=tf.constant_initializer(0.1)) 20 | 21 | conv = tf.nn.conv2d(in_, W, strides=[1, 1, 1, 1], padding="VALID") 22 | h = tf.nn.relu(tf.nn.bias_add(conv, biases), name=scope.name) 23 | 24 | return h 25 | 26 | def _maxpool(self, name, in_, ksize, strides): 27 | pool = tf.nn.max_pool(in_, ksize=ksize, strides=strides, padding='VALID', name=name) 28 | print name, pool.get_shape().as_list() 29 | return pool 30 | 31 | def __init__(self, max_len1, max_len2, filter_sizes, pool_sizes, filter_sizes2, pool_sizes2, num_filters, 32 | l2_reg_lambda=0.0, constraint_lambda=0.0, alpha=0.5, type_CNN=2, view_num=0, view_nums=[]): 33 | channel_num = 4 34 | 35 | # Placeholders for input, output and dropout 36 | self.input_tensor = tf.placeholder(tf.float32, [None, max_len1, max_len1, channel_num], name="input_tensor_description") 37 | self.input_tensor_o = tf.placeholder(tf.float32, [None, max_len2, max_len2, channel_num], name="input_tensor_operation") 38 | 39 | self.input_y_description = tf.placeholder(tf.float32, [None, 2], name="input_y_description") 40 | self.input_y_operation = tf.placeholder(tf.float32, [None, 2], name="input_y_operation") 41 | 42 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 43 | self.matrix = tf.placeholder(tf.float32, [None, 1], name="cooccurence") 44 | self.constraint_lambda = constraint_lambda 45 | 46 | # Keeping track of l2 regularization loss (optional) 47 | l2_loss_d = tf.constant(0.0) 48 | l2_loss_operation = tf.constant(0.0) 49 | 50 | # Create a convolution + maxpool layer for each filter size 51 | pooled_outputs = [] 52 | pooled_outputs_operation = [] 53 | 54 | input_tensor = tf.expand_dims(self.input_tensor, 4) # N x W x H x V => N x W x H x V x C 55 | input_tensor = tf.transpose(input_tensor, 56 | perm=[3, 0, 1, 2, 4]) # N x W x H x V x C => V x N x W x H x C 57 | 58 | input_tensor_operation = tf.expand_dims(self.input_tensor_o, 4) # N x W x H x V => N x W x H x V x C 59 | input_tensor_operation = tf.transpose(input_tensor_operation, 60 | perm=[3, 0, 1, 2, 4]) # N x W x H x V x C => V x N x W x H x C 61 | 62 | if type_CNN == 2: # multi-view 63 | with tf.name_scope("CNN_Description"): 64 | view_c_num = 0 65 | for i in range(channel_num): 66 | # set reuse True for i > 0, for weight-sharing 67 | reuse_f = (i != 0) 68 | with tf.variable_scope("CNN_Description", reuse=reuse_f): 69 | if len(view_nums) != 0: 70 | if len(view_nums) <= view_c_num or view_nums[view_c_num] != i: 71 | continue 72 | else: 73 | view_c_num += 1 74 | print("AHAA" + str(i) + "\n") 75 | view = tf.gather(input_tensor, i) # N x W x H x C 76 | 77 | filter_shape1 = [filter_sizes[0], filter_sizes[0], 1, num_filters] 78 | filter_shape2 = [filter_sizes[1], filter_sizes[1], num_filters, num_filters * 2] 79 | p_size1 = [1, pool_sizes[0], pool_sizes[0], 1] 80 | p_size2 = [1, pool_sizes[1], pool_sizes[1], 1] 81 | 82 | conv1 = self._conv('conv1', view, filter_shape1, reuse=reuse_f) 83 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 84 | 85 | conv2 = self._conv('conv2', pool1, filter_shape2, reuse=reuse_f) 86 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 87 | 88 | dim1 = np.prod(pool2.get_shape().as_list()[1:]) 89 | reshape = tf.reshape(pool2, [-1, dim1]) 90 | 91 | pooled_outputs.append(reshape) 92 | 93 | with tf.name_scope("CNN_Operation"): 94 | view_c_num = 0 95 | for i in range(channel_num): 96 | # set reuse True for i > 0, for weight-sharing 97 | reuse_f = (i != 0) 98 | 99 | with tf.variable_scope("CNN_Operation", reuse=reuse_f): 100 | if len(view_nums) != 0: 101 | if len(view_nums) <= view_c_num or view_nums[view_c_num] != i: 102 | continue 103 | else: 104 | view_c_num += 1 105 | print("AHAA" + str(i) + "\n") 106 | view = tf.gather(input_tensor_operation, i) # N x W x H x C 107 | 108 | filter_shape1 = [filter_sizes2[0], filter_sizes2[0], 1, num_filters / 2] 109 | filter_shape2 = [filter_sizes2[1], filter_sizes2[1], num_filters / 2, num_filters] 110 | p_size1 = [1, pool_sizes2[0], pool_sizes2[0], 1] 111 | p_size2 = [1, pool_sizes2[1], pool_sizes2[1], 1] 112 | 113 | conv1 = self._conv('conv1', view, filter_shape1, reuse=reuse_f) 114 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 115 | 116 | conv2 = self._conv('conv2', pool1, filter_shape2, reuse=reuse_f) 117 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 118 | 119 | dim2 = np.prod(pool2.get_shape().as_list()[1:]) 120 | reshape = tf.reshape(pool2, [-1, dim2]) 121 | 122 | pooled_outputs_operation.append(reshape) 123 | 124 | 125 | view_num_len = len(pooled_outputs) 126 | print("LEN:" + str(view_num_len)) 127 | with tf.name_scope("Descriptipn_view_pooling"): 128 | x = tf.stack(pooled_outputs) # 4 * N * 7744 129 | x = tf.transpose(x, perm=[1, 2, 0]) # N * 7744 * 4 130 | 131 | reshape = tf.reshape(x, [-1, view_num_len]) 132 | print reshape.get_shape().as_list() 133 | 134 | Weights = tf.Variable(tf.random_uniform([view_num_len, 1], 0.0, 1.0), name="W") 135 | 136 | y_d = tf.matmul(reshape, Weights, name="view_pooling") 137 | y_d = tf.reshape(y_d, [-1, dim1]) 138 | print y_d.get_shape().as_list() 139 | 140 | with tf.name_scope("Operation_view_pooling"): 141 | x = tf.stack(pooled_outputs_operation) # 4 * N * 7744 142 | x = tf.transpose(x, perm=[1, 2, 0]) # N * 7744 * 4 143 | reshape = tf.reshape(x, [-1, view_num_len]) 144 | print reshape.get_shape().as_list() 145 | 146 | Weights = tf.Variable(tf.random_uniform([view_num_len, 1], 0.0, 1.0), name="W") 147 | 148 | y_o = tf.matmul(reshape, Weights, name="view_pooling") 149 | y_o = tf.reshape(y_o, [-1, dim2]) 150 | print y_o.get_shape().as_list() 151 | 152 | elif type_CNN == 3: # single view 153 | with tf.name_scope("CNN_Description"): 154 | view = tf.gather(input_tensor, view_num) # N x W x H x C 155 | filter_shape1 = [filter_sizes[0], filter_sizes[0], 1, num_filters] 156 | filter_shape2 = [filter_sizes[1], filter_sizes[1], num_filters, num_filters * 2] 157 | p_size1 = [1, pool_sizes[0], pool_sizes[0], 1] 158 | p_size2 = [1, pool_sizes[1], pool_sizes[1], 1] 159 | 160 | conv1 = self._conv("conv1", view, filter_shape1) 161 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 162 | conv2 = self._conv('conv2', pool1, filter_shape2) 163 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 164 | 165 | dim1 = np.prod(pool2.get_shape().as_list()[1:]) 166 | y_d = tf.reshape(pool2, [-1, dim1]) 167 | 168 | with tf.name_scope("CNN_Operation"): 169 | view = tf.gather(input_tensor_operation, view_num) # N x W x H x C 170 | filter_shape1 = [filter_sizes2[0], filter_sizes2[0], 1, num_filters / 2] 171 | filter_shape2 = [filter_sizes2[1], filter_sizes2[1], num_filters / 2, num_filters] 172 | p_size1 = [1, pool_sizes2[0], pool_sizes2[0], 1] 173 | p_size2 = [1, pool_sizes2[1], pool_sizes2[1], 1] 174 | 175 | conv1 = self._conv('conv1', view, filter_shape1) 176 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 177 | 178 | conv2 = self._conv('conv2', pool1, filter_shape2) 179 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 180 | 181 | dim2 = np.prod(pool2.get_shape().as_list()[1:]) 182 | y_o = tf.reshape(pool2, [-1, dim2]) 183 | 184 | else: # single CNN 185 | with tf.name_scope("CNN_Description"): 186 | filter_shape1 = [filter_sizes[0], filter_sizes[0], 4, num_filters] 187 | filter_shape2 = [filter_sizes[1], filter_sizes[1], num_filters, num_filters * 2] 188 | p_size1 = [1, pool_sizes[0], pool_sizes[0], 1] 189 | p_size2 = [1, pool_sizes[1], pool_sizes[1], 1] 190 | 191 | conv1 = self._conv("conv1", self.input_tensor, filter_shape1) 192 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 193 | conv2 = self._conv('conv2', pool1, filter_shape2) 194 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 195 | 196 | dim1 = np.prod(pool2.get_shape().as_list()[1:]) 197 | y_d = tf.reshape(pool2, [-1, dim1]) 198 | 199 | with tf.name_scope("CNN_Operation"): 200 | filter_shape1 = [filter_sizes2[0], filter_sizes2[0], 4, num_filters / 2] 201 | filter_shape2 = [filter_sizes2[1], filter_sizes2[1], num_filters / 2, num_filters] 202 | p_size1 = [1, pool_sizes2[0], pool_sizes2[0], 1] 203 | p_size2 = [1, pool_sizes2[1], pool_sizes2[1], 1] 204 | 205 | conv1 = self._conv('conv1', self.input_tensor_o, filter_shape1) 206 | pool1 = self._maxpool('pool1', conv1, ksize=p_size1, strides=[1, 1, 1, 1]) 207 | 208 | conv2 = self._conv('conv2', pool1, filter_shape2) 209 | pool2 = self._maxpool('pool2', conv2, ksize=p_size2, strides=[1, 1, 1, 1]) 210 | 211 | dim2 = np.prod(pool2.get_shape().as_list()[1:]) 212 | y_o = tf.reshape(pool2, [-1, dim2]) 213 | 214 | # Add dropout 215 | with tf.name_scope("dropout"): 216 | self.h_drop_d = tf.nn.dropout(y_d, self.dropout_keep_prob, name="hidden_output_description_drop") 217 | self.h_drop_o = tf.nn.dropout(y_o, self.dropout_keep_prob, name="hidden_output_operation_drop") 218 | print self.h_drop_d.get_shape().as_list() 219 | print self.h_drop_o.get_shape().as_list() 220 | 221 | with tf.name_scope("FC"): 222 | dim = min(int(dim1 / 2), int(dim2 / 2)) 223 | print("FC DIM:" + str(dim) + "\n") 224 | W1 = tf.Variable(name="W1", initial_value=tf.truncated_normal(shape=[dim1, dim], stddev=0.1)) 225 | b1 = tf.Variable(tf.constant(0.1, shape=[dim]), name="b1") 226 | 227 | self.fc_d = tf.nn.relu(tf.matmul(self.h_drop_d, W1) + b1) 228 | self.fc_drop_d = tf.nn.dropout(self.fc_d, self.dropout_keep_prob) 229 | 230 | W2 = tf.Variable(name="W2", initial_value=tf.truncated_normal(shape=[dim2, dim], stddev=0.1)) 231 | b2 = tf.Variable(tf.constant(0.1, shape=[dim]), name="b2") 232 | 233 | self.fc_o = tf.nn.relu(tf.matmul(self.h_drop_o, W2) + b2) 234 | self.fc_drop_o = tf.nn.dropout(self.fc_o, self.dropout_keep_prob) 235 | 236 | # Share Layer Construction 237 | with tf.name_scope("Multitask"): 238 | 239 | self.shared_layer = tf.add(alpha * self.fc_drop_d, (1 - alpha) * self.fc_drop_o, name="Shared_layer") 240 | # self.shared_layer = tf.div(tf.add(self.h_drop_d, self.h_drop_o), 2, name="Shared_layer") 241 | print self.shared_layer.get_shape().as_list() 242 | 243 | W1 = tf.get_variable(name="tt1_W", shape=[dim], 244 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 245 | W2 = tf.get_variable(name="st1_W", shape=[dim], 246 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 247 | W3 = tf.get_variable(name="st2_W", shape=[dim], 248 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 249 | W4 = tf.get_variable(name="tt2_W", shape=[dim], 250 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 251 | 252 | self.task1_r = tf.add(tf.multiply(self.shared_layer, W2), tf.multiply(self.fc_drop_d, W1), 253 | name="description_r") 254 | self.task2_r = tf.add(tf.multiply(self.shared_layer, W3), tf.multiply(self.fc_drop_o, W4), 255 | name="operation_r") 256 | print self.task1_r.get_shape().as_list() 257 | 258 | with tf.name_scope("FC2"): 259 | W1 = tf.Variable(name="W1", initial_value=tf.truncated_normal(shape=[dim, dim / 2], stddev=0.1)) 260 | b1 = tf.Variable(tf.constant(0.1, shape=[dim / 2]), name="b1") 261 | 262 | self.task1_representation = tf.nn.relu(tf.matmul(self.task1_r, W1) + b1) 263 | self.task1_representation = tf.nn.dropout(self.task1_representation, self.dropout_keep_prob) 264 | 265 | W2 = tf.Variable(name="W2", initial_value=tf.truncated_normal(shape=[dim, dim / 2], stddev=0.1)) 266 | b2 = tf.Variable(tf.constant(0.1, shape=[dim / 2]), name="b2") 267 | 268 | self.task2_representation = tf.nn.relu(tf.matmul(self.task2_r, W2) + b2) 269 | self.task2_representation = tf.nn.dropout(self.task2_representation, self.dropout_keep_prob) 270 | 271 | # Final (unnormalized) scores and predictions 272 | with tf.name_scope("output"): 273 | W_d = tf.get_variable(name="W_d", shape=[dim / 2, 2], 274 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 275 | b_d = tf.Variable(tf.constant(0.1, shape=[2]), name="b_d") 276 | 277 | l2_loss_d += tf.nn.l2_loss(W_d) 278 | l2_loss_d += tf.nn.l2_loss(b_d) 279 | 280 | W_o = tf.get_variable(name="W_o", shape=[dim / 2, 2], 281 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 282 | b_o = tf.Variable(tf.constant(0.1, shape=[2]), name="b_o") 283 | 284 | l2_loss_operation += tf.nn.l2_loss(W_o) 285 | l2_loss_operation += tf.nn.l2_loss(b_o) 286 | 287 | self.scores_d = tf.nn.xw_plus_b(self.task1_representation, W_d, b_d, name="scores1") 288 | self.scores_o = tf.nn.xw_plus_b(self.task2_representation, W_o, b_o, name="scores2") 289 | 290 | self.relation_d = tf.nn.softmax(self.scores_d, name="relation1") 291 | self.relation_o = tf.nn.softmax(self.scores_o, name="relation2") 292 | 293 | self.predictions_d = tf.argmax(self.scores_d, 1, name="predictions1") 294 | self.predictions_o = tf.argmax(self.scores_o, 1, name="predictions2") 295 | 296 | # Calculate Mean cross-entropy loss 297 | with tf.name_scope("loss"): 298 | losses1 = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores_d, labels=self.input_y_description) 299 | losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores_o, labels=self.input_y_operation) 300 | 301 | gap = tf.reduce_sum(tf.square(self.relation_d - self.relation_o), axis=1, keep_dims=True) 302 | constraints = tf.multiply(self.matrix, gap) 303 | self.constraints = tf.identity(tf.reduce_mean(constraints), name="constraints") 304 | 305 | self.loss = tf.reduce_mean(losses1) + tf.reduce_mean(losses2) + l2_reg_lambda * ( 306 | l2_loss_d + l2_loss_operation) + self.constraint_lambda * tf.reduce_mean(constraints) 307 | self.loss = tf.identity(self.loss, name="loss") 308 | 309 | # Accuracy 310 | with tf.name_scope("accuracy"): 311 | correct_predictions_d = tf.equal(self.predictions_d, tf.argmax(self.input_y_description, 1)) 312 | correct_predictions_o = tf.equal(self.predictions_o, tf.argmax(self.input_y_operation, 1)) 313 | self.accuracy_d = tf.reduce_mean(tf.cast(correct_predictions_d, "float"), name="accuracy_d") 314 | self.accuracy_o = tf.reduce_mean(tf.cast(correct_predictions_o, "float"), name="accuracy_o") 315 | -------------------------------------------------------------------------------- /Multi_task_Arch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | 4 | import datetime 5 | import os 6 | import time 7 | import copy 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | from util.util import write_evaluation_file 13 | from DLDisambiguation.util.input_helpers import InputHelper 14 | from MultiTask_MultiGranModel import MultiTask_MultiGranModel 15 | from tensor import Tensor 16 | 17 | # Parameters 18 | # ================================================== 19 | 20 | # Model Hyperparameters 21 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding (default: 100)") 22 | tf.flags.DEFINE_string("filter_sizes", "4,3", "Comma-separated filter sizes (default: '2,3')") 23 | tf.flags.DEFINE_string("filter_sizes2", "3,3", "Comma-separated filter sizes (default: '2,3')") 24 | tf.flags.DEFINE_string("pool_sizes", "2,2", "Comma-separated filter sizes (default: '2,3')") 25 | tf.flags.DEFINE_string("pool_sizes2", "3,3", "Comma-separated filter sizes (default: '2,3')") 26 | tf.flags.DEFINE_string("view_nums", "0,1,2,3", "view combination") 27 | 28 | tf.flags.DEFINE_integer("num_filters", 16, "Number of filters per filter size (default: 64)") 29 | tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)") 30 | tf.flags.DEFINE_float("l2_reg_lambda", 100, "L2 regularizaion lambda (default: 0.0)") 31 | tf.flags.DEFINE_float("con_lambda", 0.1, "constraint regularizaion lambda (default: 0.0)") 32 | tf.flags.DEFINE_float("alpha", 0.5, "parameter for shared layer") 33 | tf.flags.DEFINE_integer("type_CNN", 2, "type of CNN") 34 | tf.flags.DEFINE_integer("view_num", 3, "type of CNN") 35 | tf.flags.DEFINE_boolean("early_stop", True, "whether early stopping is used") 36 | tf.flags.DEFINE_integer("early_stop_num", 11, "number of epoch in early stopping") 37 | 38 | # Data Parameter 39 | tf.flags.DEFINE_integer("max_sequence_len", 10, "max document length of input") 40 | tf.flags.DEFINE_integer("max_sequence_len2", 20, "max document length of input") 41 | tf.flags.DEFINE_integer("most_words", 300000, "Most number of words in vocab (default: 300000)") 42 | 43 | # Training parameters 44 | tf.flags.DEFINE_integer("seed", 123, "Random seed (default: 123)") 45 | tf.flags.DEFINE_string("train_dir", "./", "Training dir root") 46 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)") 47 | tf.flags.DEFINE_integer("num_epochs", 50, "Number of training epochs") 48 | tf.flags.DEFINE_float("eval_split", 0.1, "Use how much data for evaluating (default: 0.1)") 49 | tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") 50 | tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") 51 | 52 | FLAGS = tf.flags.FLAGS 53 | FLAGS._parse_flags() 54 | print("\nParameters:") 55 | for attr, value in sorted(FLAGS.__flags.items()): 56 | print("{}={}".format(attr.upper(), value)) 57 | print("") 58 | 59 | 60 | def generate_Tensor(mention, entity, mention2, entity2, mention3, entity3, max_len, task_n): 61 | lstm_dir = "Description1501058401" if task_n == 1 else "Operation1501000120" 62 | bilstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir) 63 | 64 | tensor = Tensor(mention + mention2 + mention3, entity + entity2 + entity3, len(mention + mention2 + mention3), 65 | max_len, task_n, bilstm_dir).get_tensor() 66 | tensor = tensor.transpose((0, 2, 3, 1)) 67 | 68 | g1 = len(mention) 69 | g2 = len(mention + mention2) 70 | return tensor[:g1], tensor[g1:g2], tensor[g2:] 71 | 72 | 73 | def prepara_tensor_y(inputH, training_path, dev_path, test_path, max_len): 74 | sep = "\t" 75 | x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul(training_path, sep, max_len) 76 | x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul(dev_path, sep, max_len) 77 | x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul(test_path, sep, max_len) 78 | 79 | x_train_tensor, x_dev_tensor, x_test_tensor = generate_Tensor(x1_train, x2_train, x1_dev, x2_dev, x1_test, x2_test, 80 | max_len, 1) 81 | 82 | x_train_tensor_o, x_dev_tensor_o, x_test_tensor_o = generate_Tensor(x3_train, x4_train, x3_dev, x4_dev, x3_test, 83 | x4_test, max_len, 2) 84 | 85 | np.save("train_des", x_train_tensor) 86 | np.save("dev_des", x_dev_tensor) 87 | np.save("test_des", x_test_tensor) 88 | 89 | np.save("train_opr", x_train_tensor_o) 90 | np.save("dev_opr", x_dev_tensor_o) 91 | np.save("test_opr", x_test_tensor_o) 92 | 93 | return x_train_tensor, y_train, x_dev_tensor, y_dev, x_test_tensor, y_test, \ 94 | x_train_tensor_o, y2_train, x_dev_tensor_o, y2_dev, x_test_tensor_o, y2_test 95 | 96 | 97 | def get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test, N): 98 | res = [] 99 | length = len(x2_test) 100 | 101 | for i in range(length): 102 | des = x2_test[i] 103 | opr = x4_test[i] 104 | 105 | if des_e_names.__contains__(des): 106 | index = des_e_names.index(des) 107 | des_opr = des_opr_map[index] 108 | if opr in des_opr.keys(): 109 | N = des_opr[str(opr)] 110 | res.append(N) 111 | return np.expand_dims(np.asarray(res), axis=1) 112 | 113 | 114 | def load_coocurrence_matrix(filename): 115 | t = open(filename) 116 | line = t.readline() 117 | des_e_names = [] 118 | des_opr_map = [] 119 | 120 | while line != "": 121 | des, oprs = line.split("\t")[0], line.split("\t")[1] 122 | des_e_names.append(des) 123 | 124 | oprs_num = oprs.split("_")[:-1] 125 | tmp = {} 126 | for opr in oprs_num: 127 | opr_name, num = opr.split(":")[0], int(opr.split(":")[1]) 128 | tmp[opr_name] = num 129 | des_opr_map.append(copy.deepcopy(tmp)) 130 | line = t.readline() 131 | return des_e_names, des_opr_map 132 | 133 | 134 | def main(): 135 | # Load data 136 | print("Loading data...") 137 | inputH = InputHelper() 138 | 139 | date_f = "0823" 140 | train_f = "./data/exp" + date_f + "/data_augment_train.txt" 141 | test_f = "./data/exp" + date_f + "/data_augment_test.txt" 142 | dev_f = test_f 143 | 144 | our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len) + "/" 145 | x_train_tensor = np.load(our_dir + "train_des.npy") 146 | # x_dev_tensor = np.load(our_dir + "dev_des.npy") 147 | x_test_tensor = np.load(our_dir + "test_des.npy") 148 | x_dev_tensor = x_test_tensor 149 | 150 | our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len2) + "/" 151 | x_train_tensor_o = np.load(our_dir + "train_opr.npy") 152 | # x_dev_tensor_o = np.load(our_dir + "dev_opr.npy") 153 | x_test_tensor_o = np.load(our_dir + "test_opr.npy") 154 | x_dev_tensor_o = x_test_tensor_o 155 | 156 | def normalize(a): 157 | amin, amax = a.min(), a.max() # 求最大最小值 158 | a = (a - amin) / (amax - amin) # (矩阵元素-最小值)/(最大值-最小值) 159 | return a 160 | 161 | def normalize_tensor(t): 162 | t[:, :, :, 0] = normalize(t[:, :, :, 0]) 163 | t[:, :, :, 1] = normalize(t[:, :, :, 1]) 164 | t[:, :, :, 2] = normalize(t[:, :, :, 2]) 165 | t[:, :, :, 3] = normalize(t[:, :, :, 3]) 166 | return t 167 | 168 | x_test_tensor[:, :, :, 3] = normalize(x_test_tensor[:, :, :, 3]) 169 | x_train_tensor[:, :, :, 3] = normalize(x_train_tensor[:, :, :, 3]) 170 | x_test_tensor_o[:, :, :, 3] = normalize(x_test_tensor_o[:, :, :, 3]) 171 | x_train_tensor_o[:, :, :, 3] = normalize(x_train_tensor_o[:, :, :, 3]) 172 | # x_test_tensor = normalize_tensor(x_test_tensor) 173 | # x_test_tensor_o = normalize_tensor(x_test_tensor_o) 174 | # x_train_tensor = normalize_tensor(x_train_tensor) 175 | # x_train_tensor_o = normalize_tensor(x_train_tensor_o) 176 | 177 | sep = "\t" 178 | x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels(train_f, sep, 179 | FLAGS.max_sequence_len) 180 | # x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels(dev_f, sep, FLAGS.max_sequence_len) 181 | x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels(test_f, sep, 182 | FLAGS.max_sequence_len) 183 | x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = x1_test, x2_test, x3_test, x4_test, y_test, y2_test 184 | 185 | des_e_names, des_opr_map = load_coocurrence_matrix("coorrence_file.txt") 186 | N_default = 0.01 187 | co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test, N_default) 188 | co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train, x4_train, N_default) 189 | # co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev) 190 | co_arr_val = co_arr_test 191 | 192 | with tf.Graph().as_default(): 193 | 194 | sess = tf.Session() 195 | with sess.as_default(): 196 | cnn = MultiTask_MultiGranModel( 197 | max_len1=FLAGS.max_sequence_len, 198 | max_len2=FLAGS.max_sequence_len2, 199 | filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), 200 | filter_sizes2=list(map(int, FLAGS.filter_sizes2.split(","))), 201 | pool_sizes=list(map(int, FLAGS.pool_sizes.split(","))), 202 | pool_sizes2=list(map(int, FLAGS.pool_sizes2.split(","))), 203 | num_filters=FLAGS.num_filters, 204 | l2_reg_lambda=FLAGS.l2_reg_lambda, 205 | constraint_lambda=FLAGS.con_lambda, 206 | alpha=FLAGS.alpha, 207 | type_CNN=FLAGS.type_CNN, 208 | view_num=FLAGS.view_num, 209 | view_nums=list(map(int, FLAGS.view_nums.split(","))) 210 | ) 211 | 212 | # Define Training procedure 213 | global_step = tf.Variable(0, name="global_step", trainable=False) 214 | optimizer = tf.train.AdamOptimizer(1e-3) 215 | 216 | grads_and_vars = optimizer.compute_gradients(cnn.loss) 217 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 218 | 219 | saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) 220 | 221 | # Keep track of gradient values and sparsity (optional) 222 | for g, v in grads_and_vars: 223 | if g is not None: 224 | tf.summary.histogram("grad_hist/{}".format(v.name), g) 225 | tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) 226 | tf.summary.histogram(v.name, v) 227 | 228 | # Output directory for models and summaries 229 | timestamp = str(int(time.time())) 230 | out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "alpha_E", "multitask" + timestamp)) 231 | if not os.path.exists(out_dir): 232 | os.makedirs(out_dir) 233 | print("Writing to {}\n".format(out_dir)) 234 | 235 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 236 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 237 | if not os.path.exists(checkpoint_dir): 238 | os.makedirs(checkpoint_dir) 239 | 240 | # Summaries for loss and accuracy 241 | loss_summary = tf.summary.scalar("loss", cnn.loss) 242 | constraint_summary = tf.summary.scalar("constraints", cnn.constraints) 243 | acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d) 244 | acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o) 245 | 246 | # Train Summaries 247 | train_summary_op = tf.summary.merge_all() 248 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 249 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 250 | 251 | # Dev summaries 252 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary1, acc_summary2, constraint_summary]) 253 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 254 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 255 | 256 | # Initialize all variables 257 | sess.run(tf.initialize_all_variables()) 258 | 259 | def train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr): 260 | feed_dict = { 261 | cnn.input_tensor: x_batch, 262 | cnn.input_y_description: y_batch, 263 | cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, 264 | cnn.input_tensor_o: x_batch2, 265 | cnn.input_y_operation: y_batch2, 266 | cnn.matrix: co_arr, 267 | } 268 | _, step, summaries, loss, accuracy1, accuracy2 = sess.run( 269 | [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o], 270 | feed_dict) 271 | time_str = datetime.datetime.now().isoformat() 272 | if step % 10 == 0: 273 | print( 274 | "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format(time_str, step, loss, accuracy1, 275 | accuracy2)) 276 | train_summary_writer.add_summary(summaries, step) 277 | return accuracy1, accuracy2, loss 278 | 279 | def dev_step(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr, writer=None): 280 | feed_dict = { 281 | cnn.input_tensor: x_dev, 282 | cnn.input_y_description: y_batch_dev, 283 | cnn.dropout_keep_prob: 1.0, 284 | cnn.input_y_operation: y_batch_dev2, 285 | cnn.input_tensor_o: x_dev2, 286 | cnn.matrix: co_arr 287 | } 288 | step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( 289 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], 290 | feed_dict) 291 | if writer: 292 | writer.add_summary(summaries, step) 293 | return loss, accuracy1, accuracy2 294 | 295 | def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr): 296 | feed_dict = { 297 | cnn.input_tensor: x_dev, 298 | cnn.input_y_description: y_batch_dev, 299 | cnn.dropout_keep_prob: 1.0, 300 | cnn.input_y_operation: y_batch_dev2, 301 | cnn.input_tensor_o: x_dev2, 302 | cnn.matrix: co_arr 303 | } 304 | 305 | step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( 306 | [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], 307 | feed_dict) 308 | 309 | eval_file = open(out_dir + "/evaluation.txt", "w+") 310 | right_file = open(out_dir + "/right_cases.txt", "w+") 311 | right_file2 = open(out_dir + "/right_cases_operation.txt", "w+") 312 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 313 | wrong_file2 = open(out_dir + "/wrong_cases_operation.txt", "w+") 314 | 315 | eval_file.write("Accu1: " + str(accuracy1) + "\n") 316 | eval_file.write("Accu2: " + str(accuracy2) + "\n") 317 | 318 | predictions1 = np.argmax(pres1, 1) 319 | predictions2 = np.argmax(pres2, 1) 320 | labels1 = np.argmax(y_batch_dev, 1) 321 | labels2 = np.argmax(y_batch_dev2, 1) 322 | write_evaluation_file(eval_file, right_file, wrong_file, labels1, predictions1, x1_test, x2_test) 323 | write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, predictions2, x3_test, x4_test) 324 | 325 | eval_file.write("Parameters:") 326 | for attr, value in sorted(FLAGS.__flags.items()): 327 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 328 | 329 | return loss, accuracy1, accuracy2 330 | 331 | def dev_whole(x_dev, y_dev, x_dev2, y_dev2, co_arr, writer=None): 332 | batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev, co_arr)), FLAGS.batch_size, 1, shuffle=False) 333 | batches_dev2 = inputH.batch_iter(list(zip(x_dev2, y_dev2)), FLAGS.batch_size, 1, shuffle=False) 334 | losses = [] 335 | accuracies1 = [] 336 | accuracies2 = [] 337 | 338 | batches = zip(batches_dev, batches_dev2) 339 | 340 | for batches_dev, batches_dev2 in batches: 341 | x_batch, y_batch, co_arr_ = zip(*batches_dev) 342 | x_batch2, y_batch2 = zip(*batches_dev2) 343 | loss, accuracy1, accuracy2 = dev_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_) 344 | losses.append(loss) 345 | accuracies1.append(accuracy1) 346 | accuracies2.append(accuracy2) 347 | 348 | return np.mean(np.array(losses)), np.mean(np.array(accuracies1)), np.mean(np.array(accuracies2)) 349 | 350 | def overfit(dev_loss, accu): 351 | num = FLAGS.early_stop_num 352 | n = len(dev_loss) 353 | if n < num: 354 | return False 355 | 356 | for i in xrange(n - num, n - 1): 357 | if dev_loss[i] < accu: 358 | return False 359 | print(dev_loss) 360 | print(accu) 361 | return True 362 | 363 | # Generate batches 364 | batches = inputH.batch_iter(list(zip(x_train_tensor, y_train, x_train_tensor_o, y2_train, co_arr_train)), 365 | FLAGS.batch_size, FLAGS.num_epochs) 366 | 367 | # Training loop. For each batch... 368 | dev_loss = [] 369 | train_loss = [] 370 | 371 | train_accu = [] 372 | train_accu2 = [] 373 | dev_accu = [] 374 | dev_accu2 = [] 375 | # batch_d_o = zip(batches, batches2) 376 | optimum_accu1 = 0 377 | optimum_accu2 = 0 378 | data_num = len(y_train) 379 | num_batches_per_epoch = int(data_num / FLAGS.batch_size) 380 | # t = num_batches_per_epoch / 2 381 | optimum_loss = 1000 382 | 383 | for batch in batches: 384 | x_batch, y_batch, x_batch2, y_batch2, co_arr_batch = zip(*batch) 385 | 386 | acc1, acc2, loss_train = train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_batch) 387 | train_accu.append(acc1) 388 | train_accu2.append(acc2) 389 | train_loss.append(loss_train) 390 | current_step = tf.train.global_step(sess, global_step) 391 | if current_step % num_batches_per_epoch == 0: 392 | 393 | print("\nEvaluation:") 394 | loss, accuracy1, accuracy2 = dev_whole(x_dev_tensor, y_dev, x_dev_tensor_o, y2_dev, co_arr_val, 395 | writer=dev_summary_writer) 396 | 397 | summary = tf.Summary() 398 | 399 | summary.value.add(tag="Accuracy_Dev", simple_value=accuracy1) 400 | summary.value.add(tag="Accuracy2_Dev", simple_value=accuracy2) 401 | summary.value.add(tag="Loss_Dev", simple_value=loss) 402 | dev_summary_writer.add_summary(summary, current_step) 403 | 404 | time_str = datetime.datetime.now().isoformat() 405 | print("{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format(time_str, loss, accuracy1, accuracy2)) 406 | dev_accu.append(accuracy1) 407 | dev_accu2.append(accuracy2) 408 | dev_loss.append(loss) 409 | print("\nRecently accuracy:") 410 | print dev_accu[-10:] 411 | print dev_accu2[-10:] 412 | 413 | # if loss < optimum_loss: 414 | # optimum_loss = loss 415 | # stop_early = 0 416 | # optimum_accu1 = accuracy1 417 | # optimum_accu2 = accuracy2 418 | # path = saver.save(sess, checkpoint_prefix, global_step=current_step) 419 | # print("Saved model checkpoint to {}\n".format(path)) 420 | # else: 421 | # stop_early += 1 422 | # if stop_early == 10: 423 | # break 424 | if FLAGS.early_stop: 425 | if overfit(dev_accu, accuracy1) or overfit(dev_accu2, accuracy2): 426 | print 'Overfit!!' 427 | print(current_step) 428 | print(current_step / num_batches_per_epoch) 429 | break 430 | print("") 431 | 432 | if accuracy1 > optimum_accu1 and accuracy2 > optimum_accu2: 433 | optimum_accu1 = accuracy1 434 | optimum_accu2 = accuracy2 435 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 436 | print("Saved model checkpoint to {}\n".format(path)) 437 | 438 | print("Optimum_accu1: " + str(optimum_accu1)) 439 | print("Optimum_accu2: " + str(optimum_accu2)) 440 | 441 | print("Optimum_accu1: " + str(optimum_accu1)) 442 | print("Optimum_accu2: " + str(optimum_accu2)) 443 | 444 | import matplotlib.pyplot as plt 445 | # def plot_plots(y1, y2, name_task, type_eval): 446 | # x1 = np.arange(len(y1)) 447 | # x2 = np.arange(len(y2)) 448 | # p1, = plt.plot(x1, y1, 'b', label="Validation") 449 | # p2, = plt.plot(x2, y2, 'r', label="Train") 450 | # plt.legend(handles=[p1, p2], numpoints=1) # make legend 451 | # plt.title(name_task + "_" + type_eval) 452 | # plt.savefig(os.path.join(out_dir, name_task + "_" + type_eval + ".png")) 453 | # 454 | # plot_plots(dev_accu, train_accu, "Disease", "Accu") 455 | # plot_plots(dev_accu2, train_accu2, "Operation", "Accu") 456 | # plot_plots(dev_loss, train_loss, "MTL", "Loss") 457 | 458 | # evaluate the result with the best model 459 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 460 | checkpoint_file = ckpt.model_checkpoint_path 461 | graph = tf.Graph() 462 | 463 | with graph.as_default(): 464 | sess = tf.Session() 465 | with sess.as_default(): 466 | # Load the saved meta graph and restore variables 467 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) 468 | sess.run(tf.initialize_all_variables()) 469 | saver.restore(sess, checkpoint_file) 470 | 471 | cooccur = graph.get_operation_by_name("cooccurence").outputs[0] 472 | input_t1 = graph.get_operation_by_name("input_tensor_description").outputs[0] 473 | input_t2 = graph.get_operation_by_name("input_tensor_operation").outputs[0] 474 | input_y1 = graph.get_operation_by_name("input_y_description").outputs[0] 475 | input_y2 = graph.get_operation_by_name("input_y_operation").outputs[0] 476 | prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 477 | 478 | loss_opr = graph.get_operation_by_name("loss/loss").outputs[0] 479 | prediction = graph.get_operation_by_name("output/predictions1").outputs[0] 480 | prediction2 = graph.get_operation_by_name("output/predictions2").outputs[0] 481 | accu = graph.get_operation_by_name("accuracy/accuracy_d").outputs[0] 482 | accu2 = graph.get_operation_by_name("accuracy/accuracy_o").outputs[0] 483 | 484 | loss, pres1, pres2, accuracy1, accuracy2 = sess.run( 485 | [loss_opr, prediction, prediction2, accu, accu2], 486 | {input_t1: x_test_tensor, input_y1: y_test, cooccur: co_arr_test, 487 | input_t2: x_test_tensor_o, input_y2: y2_test, prob: 1}) 488 | 489 | eval_file = open(out_dir + "/evaluation.txt", "w+") 490 | right_file = open(out_dir + "/right_cases.txt", "w+") 491 | wrong_file = open(out_dir + "/wrong_cases.txt", "w+") 492 | right_file2 = open(out_dir + "/right_cases_opr.txt", "w+") 493 | wrong_file2 = open(out_dir + "/wrong_cases_opr.txt", "w+") 494 | 495 | eval_file.write("Accu1: " + str(accuracy1) + "\n") 496 | eval_file.write("Accu2: " + str(accuracy2) + "\n") 497 | # eval_file.write("Stopped at: " + str(int(current_step / num_batches_per_epoch)) + "\n") 498 | eval_file.write("Default: " + str(N_default) + "\n") 499 | 500 | labels1 = np.argmax(y_test, 1) 501 | labels2 = np.argmax(y2_test, 1) 502 | write_evaluation_file(eval_file, right_file, wrong_file, labels1, pres1, x1_test, x2_test) 503 | write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, pres2, x3_test, x4_test) 504 | 505 | eval_file.write("Parameters:") 506 | for attr, value in sorted(FLAGS.__flags.items()): 507 | eval_file.write("{}={}".format(attr.upper(), value) + "\n") 508 | 509 | print("loss:" + str(loss)) 510 | print("accuracy1:" + str(accuracy1)) 511 | print("accuracy2:" + str(accuracy2)) 512 | 513 | 514 | if __name__ == '__main__': 515 | main() 516 | --------------------------------------------------------------------------------