├── README.md
└── code
    ├── main_with_args.py
    ├── utils.py
    ├── generate_data.py
    ├── event_chain.py
    ├── evaluate.py
    └── gnn_with_args.py


/README.md:
--------------------------------------------------------------------------------
 1 | # ConstructingNEEG_IJCAI_2018
 2 | 
 3 | ## Paper Data and Code
 4 | The data and code for our IJCAI-ECAI 2018 Paper: [Constructing Narrative Event Evolutionary Graph for Script Event Prediction](https://arxiv.org/abs/1805.05081).
 5 | 
 6 | Data used in our paper can be found [here](https://drive.google.com/open?id=1WFBDL_zfNC1sSuz0dmaMux3w-OB_hUui). The codes here include PyTorch implementations of the PairLSTM baseline and our SGNN model. Code for EventComp model and how to extract the narrative event chains from raw NYT news corpus can be found [here](http://mark.granroth-wilding.co.uk/papers/what_happens_next/).
 7 | 
 8 | ## How to run the code?
 9 | 
10 | You need to download the data I used from [google-drive](https://drive.google.com/open?id=1WFBDL_zfNC1sSuz0dmaMux3w-OB_hUui). Besides, you need Python3.5 or 3.6, PyTorch 0.3.0, and Nvidia GPU, perhaps Titan XP or Tesla P100. You can run python3 evaluate.py to get the results reported in my paper, and run python3 event_chain.py to train a PairLSTM model, and run python3 main_with_args.py to train a SGNN model. I have writen some annotations in my codes, please read them and run!
11 | 
12 | It is a very time consuming extraction pipeline from the raw NYT/Gigaword raw corpus to get the preprocessed data. The good news is that you don't need to download the Gigaword corpus, because I have provided all the data you need to run the code.
13 | 
14 | **Original Data**: 
15 | `encoding_with_args.csv and data2.csv` are the constructed NEEG in the paper. `corpus_index_train0.txt, corpus_index_dev.txt and corpus_index_test.txt` are the original training, development and test sets I used to train the SGNN model. Just use the `pickle` module to load them.
16 | 
17 | 
18 | ## Requirements
19 | * Linux OS
20 | * Python 3.5 or 3.6
21 | * PyTorch 0.3.0
22 | * GPU (Tesla P100 or Others)
23 | 
24 | 


--------------------------------------------------------------------------------
/code/main_with_args.py:
--------------------------------------------------------------------------------
 1 | #coding:utf8
 2 | # Run this code to train our SGNN model.
 3 | # Generally we can train a model in about 1400 seconds (the code will automatically terminate by using early stop) using one Tesla P100 GPU.
 4 | from gnn_with_args import *
 5 | 
 6 | def main():
 7 |     dev_data=Data_data(pickle.load(open('../data/corpus_index_dev_with_args_all_chain.data','rb')))
 8 |     test_data=Data_data(pickle.load(open('../data/corpus_index_test_with_args_all_chain.data','rb')))
 9 |     train_data=Data_data(pickle.load(open('../data/corpus_index_train0_with_args_all_chain.data','rb')))
10 |     ans=pickle.load(open('../data/dev.answer','rb'))
11 |     dev_index=pickle.load(open('../data/dev_index.pickle','rb'))
12 |     print('train data prepare done')
13 |     word_id,id_vec,word_vec=get_hash_for_word('../data/deepwalk_128_unweighted_with_args.txt',verb_net3_mapping_with_args)
14 |     print('word vector prepare done')
15 | 
16 |     if len(sys.argv)==9:
17 |         L2_penalty,MARGIN,LR,T,BATCH_SIZE,EPOCHES,PATIENTS,METRIC=sys.argv[1:]
18 |     else:
19 |         HIDDEN_DIM = 128*4
20 |         L2_penalty=0.00001
21 |         LR=0.0001
22 |         T=2
23 |         MARGIN=0.015
24 |         BATCH_SIZE=1000
25 |         EPOCHES=520
26 |         PATIENTS=500
27 |         METRIC='euclid'
28 | 
29 |         if METRIC=='euclid':  #   
30 |             L2_penalty=0.00001
31 |             LR=0.0001
32 |             BATCH_SIZE=1000
33 |             MARGIN=0.015
34 |             PATIENTS=500
35 |         if METRIC=='dot':  # 
36 |             # LR=0.004
37 |             MARGIN=0.5
38 |         if METRIC=='cosine': # 
39 |             # LR=0.001
40 |             MARGIN=0.05
41 |         if METRIC=='norm_euclid': # 
42 |             # LR=0.0011
43 |             MARGIN=0.07
44 |         if METRIC=='manhattan': # 
45 |             # LR=0.0015
46 |             MARGIN=4.5
47 |         if METRIC=='multi': # 
48 |             # LR=0.001
49 |             MARGIN=0.015
50 |         if METRIC=='nonlinear': # 
51 |             # LR=0.001
52 |             MARGIN=0.015
53 |     start=time.time()
54 |     best_acc,best_epoch=train(dev_index,word_vec,ans,train_data,dev_data,test_data,float(L2_penalty),float(MARGIN),float(LR),int(T),int(BATCH_SIZE),int(EPOCHES),int(PATIENTS),int(HIDDEN_DIM),METRIC)
55 |     end=time.time()
56 |     print ("Run time: %f s" % (end-start))
57 |     with open('best_result.txt','a') as f:
58 |         f.write('Best Acc: %f, Epoch %d , L2_penalty=%s ,MARGIN=%s ,LR=%s ,T=%s ,BATCH_SIZE=%s ,EPOCHES=%s ,PATIENTS=%s, HIDDEN_DIM=%s, METRIC=%s\n' % (best_acc,best_epoch,L2_penalty,MARGIN,LR,T,BATCH_SIZE,EPOCHES,PATIENTS,HIDDEN_DIM,METRIC))
59 |     f.close()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 
65 | # 事件表示：事件链条的多维分布表示，加入频率和共现频次信息
66 | # 构建Graph: 统计bigram-过滤低频,删除自环,高频事件处理-图构建-计算概率
67 | # Context Extension By Ranking
68 | # Highway Networks
69 | # SRU
70 | # Attention
71 | # Subgraph Embedding
72 | # Adam
73 | 


--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
 1 | #coding:utf8
 2 | # This file defines some helper functions.
 3 | 
 4 | import torch
 5 | import pickle
 6 | import torch.nn as nn
 7 | from sklearn import preprocessing
 8 | from torch.nn import Parameter,Module
 9 | from torch.autograd import Variable
10 | import torch.autograd as autograd
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | import pprint,copy,os,random,math,sys,pickle,time
14 | import numpy as np
15 | import networkx as nx
16 | torch.manual_seed(1)
17 | use_cuda = torch.cuda.is_available()
18 | # from whim_common.utils.progress import get_progress_bar
19 | # use_cuda = False
20 | from multiprocessing import Process,Pool
21 | 
22 | verb_net3_mapping_with_args='../data/encoding_with_args.csv'
23 | 
24 | 
25 | def trans_to_cuda(variable):
26 |     if use_cuda:
27 |         return variable.cuda()    
28 |     else:
29 |         return variable
30 | 
31 | def id_to_vec(emb_file):
32 |     dic={}
33 |     for s in open(emb_file):
34 |         s=s.strip().split()
35 |         if len(s)==2:
36 |             continue
37 |         dic[s[0]]=np.array(s[1:],dtype=np.float32)
38 |     dic['0']=np.zeros(len(dic['0']),dtype=np.float32)
39 |     return dic
40 | 
41 | def word_to_id(voc_file):
42 |     dic={}
43 |     for s in open(voc_file):
44 |         s=s.strip().split()
45 |         dic[s[1]]=s[0]
46 |     return dic
47 | 
48 | def get_word_vec(id_vec):
49 |     word_vec=[]
50 |     for i in range(len(id_vec)):
51 |         word_vec.append(id_vec[str(i)])
52 |     return np.array(word_vec,dtype=np.float32)
53 | 
54 | def get_hash_for_word(emb_file,voc_file):
55 |     id_vec=id_to_vec(emb_file)
56 |     return word_to_id(voc_file),id_vec,get_word_vec(id_vec)
57 | 
58 | class Data_data(object):
59 |     def __init__(self, questions,questions2=None):
60 |         super(Data_data, self).__init__()
61 |         if questions2==None:
62 |             self.A,self.input_data,self.targets= questions[0],questions[1],questions[2]
63 |         else:
64 |             self.A = torch.cat((questions[0],questions2[0]))
65 |             self.input_data = torch.cat((questions[1],questions2[1]))
66 |             self.targets = torch.cat((questions[2],questions2[2]))
67 |         self.corpus_length=len(self.targets)
68 |         self.start=0
69 |     def next_batch(self,batch_size):
70 |         start=self.start
71 |         end=(self.start+batch_size) if (self.start+batch_size)<=self.corpus_length else self.corpus_length
72 |         self.start=(self.start+batch_size)
73 |         if self.start<self.corpus_length:
74 |             epoch_flag=False
75 |         else:
76 |             self.start=self.start%self.corpus_length
77 |             epoch_flag=True
78 |         return [trans_to_cuda(self.A[start:end]),trans_to_cuda(self.input_data[start:end]),trans_to_cuda(self.targets[start:end])],epoch_flag
79 | 
80 |     def all_data(self,index=None):
81 |         if type(index)==type(None):
82 |             return [trans_to_cuda(self.A),trans_to_cuda(self.input_data),trans_to_cuda(self.targets)]
83 |         else:
84 |             return [trans_to_cuda(self.A.index_select(0,index)),trans_to_cuda(self.input_data.index_select(0,index)),trans_to_cuda(self.targets.index_select(0,index))]            
85 | 


--------------------------------------------------------------------------------
/code/generate_data.py:
--------------------------------------------------------------------------------
  1 | #coding:utf8
  2 | # We did a lot of data preprocessing before feeding the data into our SGNN model to accelerate.
  3 | from gnn_with_args import *
  4 | 
  5 | def get_event_chains(event_list):
  6 |     return [['%s_%s' % (ev[0],ev[2]) for ev in event_list],['%s' % ev[3] for ev in event_list],['%s' % ev[4] for ev in event_list],['%s' % ev[5] for ev in event_list]]
  7 | 
  8 | class Data_txt(object):
  9 |     def __init__(self, questions):
 10 |         super(Data_txt, self).__init__()
 11 |         # random.shuffle(questions)
 12 |         self.corpus = questions
 13 |         self.corpus_length=len(questions)
 14 |         self.start=0
 15 |     def next_batch(self,batch_size):
 16 |         batch=[]
 17 |         for i in range(self.start,self.start+batch_size):
 18 |             i=i%self.corpus_length
 19 |             q=self.corpus[i]
 20 |             context_chains=get_event_chains(q[0])
 21 |             choices_chains=get_event_chains(q[1])
 22 |             batch.append([context_chains,choices_chains,q[2]])
 23 |         self.start=(self.start+batch_size)
 24 |         if self.start<self.corpus_length:
 25 |             epoch_flag=False
 26 |         else:
 27 |             self.start=self.start%self.corpus_length
 28 |             epoch_flag=True
 29 |         return batch,epoch_flag
 30 | 
 31 |     def all_data(self):
 32 |         batch=[]
 33 |         for i in range(0,self.corpus_length):
 34 |             q=self.corpus[i]
 35 |             context_chains=get_event_chains(q[0])
 36 |             choices_chains=get_event_chains(q[1])
 37 |             batch.append([context_chains,choices_chains,q[2]])
 38 |         return batch
 39 | 
 40 | def build_graph(filename):
 41 |     graph=nx.DiGraph()
 42 |     for s in open(filename):
 43 |         s=s.strip().split()
 44 |         graph.add_edge(s[0],s[1],weight=float(s[2]))
 45 |     return graph
 46 | 
 47 | def return_id_list(event_list,word_id):
 48 |     id_list=[]
 49 |     for event in event_list:
 50 |         if event in word_id:
 51 |             id_list.append(word_id[event])
 52 |         else:
 53 |             id_list.append('0')
 54 |     return id_list
 55 | 
 56 | def get_matrix(g,node_list,edge_list):
 57 |     node_list_map={}
 58 |     for i in node_list:
 59 |         node_list_map[i]=len(node_list_map)
 60 |     node_num=len(node_list) #13
 61 |     A=np.zeros((node_num,node_num),dtype=np.float32)
 62 |     for edge in edge_list:
 63 |         start=edge[0]
 64 |         end=edge[1]
 65 |         A[node_list_map[start]][node_list_map[end]]=g[start][end]['weight']
 66 |     return A
 67 | 
 68 | def get_matrix_for_chain(g,node_list,edge_list):
 69 |     node_list_map={}
 70 |     for i in node_list:
 71 |         node_list_map[i]=len(node_list_map)
 72 |     node_num=len(node_list) #13
 73 |     A=np.zeros((node_num,node_num),dtype=np.float32)
 74 |     for i,node in enumerate(node_list[0:7]):
 75 |         if (node_list[i],node_list[i+1]) in edge_list:
 76 |             start=node_list[i]
 77 |             end=node_list[i+1]
 78 |             A[node_list_map[start]][node_list_map[end]]=g[start][end]['weight']
 79 |     for i,node in enumerate(node_list[8:13]):
 80 |         if (node_list[7],node) in edge_list:
 81 |             start=node_list[7]
 82 |             end=node
 83 |             A[node_list_map[start]][node_list_map[end]]=g[start][end]['weight']
 84 |     return A
 85 | 
 86 | def process(data,word_id,g,predict=False):
 87 |     input_data=[]
 88 |     targets=[]
 89 |     A=[]
 90 |     pbar=get_progress_bar(len(data),title='Process Data')
 91 |     for i in range(len(data)):
 92 |         pbar.update(i)
 93 |         context,choice,answer=data[i]
 94 |         targets.append(answer)
 95 |         context_id=return_id_list(context[0],word_id)
 96 |         choice_id=return_id_list(choice[0],word_id)
 97 |         context_subject_id=return_id_list(context[1],word_id)
 98 |         choice_subject_id=return_id_list(choice[1],word_id)
 99 |         context_object_id=return_id_list(context[2],word_id)
100 |         choice_object_id=return_id_list(choice[2],word_id)
101 |         context_perp_id=return_id_list(context[3],word_id)
102 |         choice_perp_id=return_id_list(choice[3],word_id)
103 |         node_list=context_id+choice_id
104 |         node_list_int=[int(i) for i in node_list]
105 |         node_list_subject=context_subject_id+choice_subject_id
106 |         node_list_int_subject=[int(i) for i in node_list_subject]
107 |         node_list_object=context_object_id+choice_object_id
108 |         node_list_int_object=[int(i) for i in node_list_object]
109 |         node_list_perp=context_perp_id+choice_perp_id
110 |         node_list_int_perp=[int(i) for i in node_list_perp]
111 |         # print node_list_int+node_list_int_subject+node_list_int_object
112 |         input_data.append(node_list_int+node_list_int_subject+node_list_int_object+node_list_int_perp)
113 |         new_g=g.subgraph(node_list)
114 |         edge_list=list(new_g.edges())
115 |         # A.append(get_matrix(new_g,node_list,edge_list))
116 |         A.append(get_matrix_for_chain(new_g,node_list,edge_list))
117 |     pbar.finish()
118 |     A=Variable(torch.from_numpy(np.array(A)))
119 |     if not predict:
120 |         input_data=Variable(torch.from_numpy(np.array(input_data)))
121 |     else:
122 |         input_data=Variable(torch.from_numpy(np.array(input_data)),volatile=True)
123 |     targets=Variable(torch.from_numpy(np.array(targets)))
124 |     return A,input_data,targets
125 | 
126 | def dump_data():
127 |     dev_small_data=Data_txt(pickle.load(open('../data/corpus_index_dev_small.txt','rb')))
128 |     dev_data=Data_txt(pickle.load(open('../data/corpus_index_dev.txt','rb')))
129 |     test_data=Data_txt(pickle.load(open('../data/corpus_index_test.txt','rb')))
130 |     train_data=Data_txt(pickle.load(open('../data/corpus_index_train0.txt','rb')))
131 |     print ('train data prepare done')
132 |     word_id,id_vec,word_vec=get_hash_for_word('../data/deepwalk_128_unweighted_with_args.txt',verb_net3_mapping_with_args)
133 |     g=build_graph('../data/data2.csv')
134 |     print ('word vector prepare done')
135 | 
136 |     A,input_data,targets=process(dev_small_data.all_data(),word_id,g)
137 |     pickle.dump([A,input_data,targets],open('../data/corpus_index_dev_small_with_args_all_chain.data','wb'),-1)
138 |     print ('dev_small_data done.')
139 | 
140 |     A,input_data,targets=process(dev_data.all_data(),word_id,g)
141 |     pickle.dump([A,input_data,targets],open('../data/corpus_index_dev_with_args_all_chain.data','wb'),-1)
142 |     print ('dev_data done.')
143 | 
144 |     A,input_data,targets=process(test_data.all_data(),word_id,g)
145 |     pickle.dump([A,input_data,targets],open('../data/corpus_index_test_with_args_all_chain.data','wb'),-1)
146 |     print ('test_data done.')
147 | 
148 |     A,input_data,targets=process(train_data.all_data(),word_id,g)
149 |     pickle.dump([A,input_data,targets],open('../data/corpus_index_train0_with_args_all_chain.data','wb'),-1)
150 |     print ('train_data done.')
151 | 
152 | 
153 | 
154 | 
155 | def process_matrix(data):
156 |     A=data[0]
157 |     new_A=Variable(torch.zeros_like(A.data))
158 |     for i in range(A.shape[0]):
159 |         for j in range(A.shape[1]):
160 |             for k in range(A.shape[2]):
161 |                 if (A[i,j,k]!=0).data[0]:
162 |                     new_A[i,j,k]=0.01
163 |     return [new_A,data[1],data[2]]
164 | 
165 | def change_graph_to_unweighted():
166 |     dev_data_small=pickle.load(open('../data/corpus_index_dev_small_with_args_all.data','rb'))
167 |     dev_data=pickle.load(open('../data/corpus_index_dev_with_args_all.data','rb'))
168 |     test_data=pickle.load(open('../data/corpus_index_test_with_args_all.data','rb'))
169 |     train_data=pickle.load(open('../data/corpus_index_train0_with_args_all.data','rb'))
170 | 
171 | 
172 |     pickle.dump(process_matrix(dev_data_small),open('../data/corpus_index_dev_small_with_args_all_unweighted.data','wb'),-1)
173 |     pickle.dump(process_matrix(dev_data),open('../data/corpus_index_dev_with_args_all_unweighted.data','wb'),-1)
174 |     pickle.dump(process_matrix(test_data),open('../data/corpus_index_test_with_args_all_unweighted.data','wb'),-1)
175 |     pickle.dump(process_matrix(train_data),open('../data/corpus_index_train0_with_args_all_unweighted.data','wb'),-1)
176 | 
177 | def change_chain_to_unweighted():
178 |     dev_data_small=pickle.load(open('../data/corpus_index_dev_small_with_args_all_chain.data','rb'))
179 |     dev_data=pickle.load(open('../data/corpus_index_dev_with_args_all_chain.data','rb'))
180 |     test_data=pickle.load(open('../data/corpus_index_test_with_args_all_chain.data','rb'))
181 |     train_data=pickle.load(open('../data/corpus_index_train0_with_args_all_chain.data','rb'))
182 | 
183 | 
184 |     pickle.dump(process_matrix(dev_data_small),open('../data/corpus_index_dev_small_with_args_all_chain_unweighted.data','wb'),-1)
185 |     pickle.dump(process_matrix(dev_data),open('../data/corpus_index_dev_with_args_all_chain_unweighted.data','wb'),-1)
186 |     pickle.dump(process_matrix(test_data),open('../data/corpus_index_test_with_args_all_chain_unweighted.data','wb'),-1)
187 |     pickle.dump(process_matrix(train_data),open('../data/corpus_index_train0_with_args_all_chain_unweighted.data','wb'),-1)
188 | 


--------------------------------------------------------------------------------
/code/event_chain.py:
--------------------------------------------------------------------------------
  1 | #coding:utf8
  2 | #
  3 | # This is the PairLSTM baseline model in our paper.
  4 | # We made a lot of modifications to the model in wang et al. emnlp2017, because the performace of the exact model 
  5 | # described in their emnlp2017 paper is very poor.
  6 | # Though they said they released their code at https://github.com/wangzq870305/event_chain, the code there doesn't make any sense.
  7 | 
  8 | from gnn_with_args import *
  9 | 
 10 | class EventChain(nn.Module):
 11 |     def __init__(self, embedding_dim, hidden_dim, vocab_size,word_vec,num_layers=1,bidirectional=False):
 12 |         super(EventChain, self).__init__()
 13 |         self.embedding_dim=embedding_dim
 14 |         self.hidden_dim = hidden_dim
 15 |         self.num_layers=num_layers
 16 |         self.bidirectional=bidirectional
 17 |         self.num_directions= 1 if self.bidirectional==False else 2
 18 |         self.embedding = nn.Embedding(vocab_size, embedding_dim)
 19 |         self.embedding.weight.data = torch.from_numpy(word_vec)
 20 |         # self.embedding.weight.requires_grad=False
 21 |         self.gru = nn.GRU(self.embedding_dim, self.hidden_dim,self.num_layers,dropout=DROPOUT,bidirectional=self.bidirectional)
 22 |         self.linear_s_one=nn.Linear(hidden_dim*self.num_directions, 1,bias=False)
 23 |         self.linear_s_two=nn.Linear(hidden_dim*self.num_directions, 1,bias=True)
 24 |         self.linear_u_one=nn.Linear(hidden_dim*self.num_directions, 1,bias=False)
 25 |         self.linear_u_two=nn.Linear(hidden_dim*self.num_directions, 1,bias=True)
 26 |         self.loss_function = nn.MultiMarginLoss(margin=MARGIN)
 27 | 
 28 |         model_grad_params=filter(lambda p:p.requires_grad==True,self.parameters())
 29 |         train_params = list(map(id, self.embedding.parameters()))
 30 |         tune_params = filter(lambda p:id(p) not in train_params, model_grad_params)
 31 |         self.optimizer = optim.RMSprop([{'params':tune_params},{'params':self.embedding.parameters(),'lr':LR*0.06}],lr=LR, weight_decay=L2_penalty,momentum=0.2)
 32 | 
 33 |         self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[10,60], gamma=0.5)
 34 |         
 35 |     def compute_scores(self,output):  
 36 |         output=output.transpose(0, 1)   #5000*9*(128*4)
 37 |         a=self.linear_s_one(output[:,0:8,:])  
 38 |         b=self.linear_s_two(output[:,8,:])   
 39 |         c=torch.add(a.view(-1,8),b)  
 40 |         scores=F.sigmoid(c) 
 41 |         # attention weight matrix
 42 |         u_a=self.linear_u_one(output[:,0:8,:])
 43 |         u_b=self.linear_u_two(output[:,8,:])
 44 |         u_c=torch.add(u_a.view(-1,8),u_b)
 45 |         weight=torch.exp(F.tanh(u_c))
 46 |         weight=weight/torch.sum(weight,1).view(-1,1)
 47 |         scores=torch.sum(torch.mul(scores,weight),1).view(-1,5)
 48 |         # print (scores)
 49 |         return scores
 50 | 
 51 |     def forward(self, input):
 52 |         hidden = self.embedding(input) #1000*(13*4)*128
 53 |         hidden=torch.cat((hidden[:,0:13,:],hidden[:,13:26,:],hidden[:,26:39,:],hidden[:,39:52,:]),2) #1000*13*(128*4)
 54 |         input_a=hidden[:,0:8,:].repeat(1,5,1).view(5*len(hidden),8,-1) 
 55 |         input_b=hidden[:,8:13,:].contiguous().view(-1,1,512) 
 56 |         hidden=torch.cat((input_a,input_b),1) #5000*9*(128*4)
 57 | 
 58 |         self.hidden=self.init_hidden(len(hidden))
 59 |         output = hidden.transpose(0, 1) #9*5000*(128*4)
 60 |         output, self.hidden = self.gru(output, self.hidden)
 61 |         scores=self.compute_scores(output)
 62 |         return scores
 63 | 
 64 |     def predict(self, input, targets):
 65 |         scores=self.forward(input)
 66 |         sorted, L = torch.sort(scores,descending=True)
 67 |         num_correct0 = torch.sum((L[:,0] == targets).type(torch.FloatTensor))
 68 |         num_correct1 = torch.sum((L[:,1] == targets).type(torch.FloatTensor))
 69 |         num_correct2 = torch.sum((L[:,2] == targets).type(torch.FloatTensor))
 70 |         num_correct3 = torch.sum((L[:,3] == targets).type(torch.FloatTensor))
 71 |         num_correct4 = torch.sum((L[:,4] == targets).type(torch.FloatTensor))
 72 |         samples = len(targets)
 73 |         accuracy0 = num_correct0 / samples *100.0 
 74 |         accuracy1 = num_correct1 / samples *100.0 
 75 |         accuracy2 = num_correct2 / samples *100.0 
 76 |         accuracy3 = num_correct3 / samples *100.0 
 77 |         accuracy4 = num_correct4 / samples *100.0 
 78 |         return accuracy0,accuracy1,accuracy2,accuracy3,accuracy4
 79 | 
 80 |     def predict_with_minibatch(self,input,targets):
 81 |         scores=Variable(torch.zeros(len(targets),5)).cuda()
 82 |         for i in range(int(len(targets)/BATCH_SIZE)):
 83 |             scores_temp=self.forward(input[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
 84 |             scores[i*BATCH_SIZE:(i+1)*BATCH_SIZE]=scores_temp
 85 |         sorted, L = torch.sort(scores,descending=True)
 86 |         num_correct0 = torch.sum((L[:,0] == targets).type(torch.FloatTensor))
 87 |         num_correct1 = torch.sum((L[:,1] == targets).type(torch.FloatTensor))
 88 |         num_correct2 = torch.sum((L[:,2] == targets).type(torch.FloatTensor))
 89 |         num_correct3 = torch.sum((L[:,3] == targets).type(torch.FloatTensor))
 90 |         num_correct4 = torch.sum((L[:,4] == targets).type(torch.FloatTensor))
 91 |         samples = len(targets)
 92 |         accuracy0 = num_correct0 / samples *100.0 
 93 |         accuracy1 = num_correct1 / samples *100.0 
 94 |         accuracy2 = num_correct2 / samples *100.0 
 95 |         accuracy3 = num_correct3 / samples *100.0 
 96 |         accuracy4 = num_correct4 / samples *100.0 
 97 |         return accuracy0,accuracy1,accuracy2,accuracy3,accuracy4,scores
 98 | 
 99 |     def init_hidden(self,size):
100 |         hidden = Variable(torch.zeros(self.num_layers * self.num_directions, size , self.hidden_dim))
101 |         return trans_to_cuda(hidden)
102 | 
103 |     def weights_init(self,m):
104 |         if isinstance(m, nn.GRU):
105 |             nn.init.orthogonal(m.weight_hh_l0)
106 |             nn.init.orthogonal(m.weight_ih_l0)
107 |             nn.init.constant(m.bias_hh_l0,0)
108 |             nn.init.constant(m.bias_ih_l0,0)
109 |         elif isinstance(m, nn.Linear):
110 |             nn.init.xavier_uniform(m.weight)
111 |             # nn.init.constant(m.bias,0)
112 | 
113 | 
114 | def train():
115 |     model=trans_to_cuda(EventChain(embedding_dim=HIDDEN_DIM,hidden_dim=HIDDEN_DIM,vocab_size=len(word_vec),word_vec=word_vec,num_layers=1,bidirectional=False))
116 |     # model.apply(model.weights_init())
117 |     acc_list=[]
118 |     best_acc=0.0
119 |     best_epoch=0
120 |     print ('start training')
121 |     EPO=0
122 |     start=time.time()
123 |     while True:
124 |         patient=0
125 |         for epoch in range(EPOCHES):
126 |             model.optimizer.zero_grad() 
127 |             data,epoch_flag=train_data.next_batch(BATCH_SIZE)
128 |             # if epoch_flag:
129 |             #     model.scheduler.step()    
130 |             scores=model(data[1]) 
131 |             loss = model.loss_function(scores,data[2])
132 |             loss.backward()
133 |             # nn.utils.clip_grad_norm(model.parameters(),1)
134 |             model.optimizer.step()
135 |             data=dev_data.all_data()
136 |             accuracy,accuracy1,accuracy2,accuracy3,accuracy4,scores2=model.predict_with_minibatch(data[1],data[2])
137 |             # if (EPOCHES*EPO+epoch) % 50==0:
138 |             print ('Epoch %d : Eval  Acc: %f, %f, %f, %f, %f' % (EPOCHES*EPO+epoch,accuracy.data[0],accuracy1.data[0],accuracy2.data[0],accuracy3.data[0],accuracy4.data[0]))
139 |             acc_list.append((time.time()-start,accuracy.data[0]))
140 |             if best_acc<accuracy.data[0]:
141 |                 best_acc=accuracy.data[0]
142 |                 if best_acc>=51:
143 |                     torch.save(model.state_dict(), ('../data/event_chain_acc_%s_.model' % (best_acc)))
144 |                 best_epoch=EPOCHES*EPO+epoch+1
145 |                 patient=0
146 |             else:
147 |                 patient+=1
148 |             if patient>PATIENTS:
149 |                 break
150 |         if epoch==(EPOCHES-1):
151 |             EPO+=1
152 |             continue
153 |         else:
154 |             break
155 |     print ('Epoch %d : Best Acc: %f' % (best_epoch,best_acc))
156 |     pickle.dump(acc_list,open('../data/lstm_acc_list.pickle','wb'),2)
157 |     return best_acc,best_epoch
158 | 
159 | HIDDEN_DIM = 128*4
160 | L2_penalty=1e-8
161 | LR=0.0001
162 | MARGIN=0.015
163 | BATCH_SIZE=1000
164 | EPOCHES=520
165 | PATIENTS=500
166 | DROPOUT=0.2
167 | 
168 | if __name__ == '__main__':
169 |     dev_data=Data_data(pickle.load(open('../data/corpus_index_dev_with_args_all.data','rb')))
170 |     test_data=Data_data(pickle.load(open('../data/corpus_index_test_with_args_all.data','rb')))
171 |     train_data=Data_data(pickle.load(open('../data/corpus_index_train0_with_args_all.data','rb')))
172 |     print('train data prepare done')
173 |     word_id,id_vec,word_vec=get_hash_for_word('/users3/zyli/github/OpenNE/output/verb_net/1_property/deepwalk_128_unweighted_with_args.txt',verb_net3_mapping_with_args)
174 |     print('word vector prepare done')
175 |     start=time.time()
176 |     best_acc,best_epoch=train()
177 |     end=time.time()
178 |     print ("Run time: %f s" % (end-start))
179 |     with open('best_result.txt','a') as f:
180 |         f.write('Best Acc: %f, Epoch %d , L2_penalty=%s ,MARGIN=%s ,LR=%s ,BATCH_SIZE=%s ,EPOCHES=%s ,PATIENTS=%s, HIDDEN_DIM=%s event-chain\n' % (best_acc,best_epoch,L2_penalty,MARGIN,LR,BATCH_SIZE,EPOCHES,PATIENTS,HIDDEN_DIM))
181 |     f.close()
182 | 


--------------------------------------------------------------------------------
/code/evaluate.py:
--------------------------------------------------------------------------------
  1 | #coding:utf8
  2 | #
  3 | # Run this code to get the final results reported in our ijcai paper.
  4 | from io import open
  5 | import string
  6 | import re
  7 | import random
  8 | import torch
  9 | import torch.nn as nn
 10 | import numpy as np
 11 | from torch.autograd import Variable
 12 | from torch import optim
 13 | import torch.nn.functional as F
 14 | import pprint,copy
 15 | use_cuda = torch.cuda.is_available()
 16 | from gnn_with_args import *
 17 | from event_chain import EventChain
 18 | import warnings
 19 | warnings.filterwarnings("ignore", category=UserWarning)
 20 | 
 21 | def get_event_chains(event_list):
 22 |     return ['%s_%s' % (ev[0],ev[2]) for ev in event_list]
 23 | 
 24 | def get_word_embedding(word,word_id,id_vec,emb_size):
 25 |     if word in word_id:
 26 |         return id_vec[word_id[word]]
 27 |     else:
 28 |         return np.zeros(emb_size,dtype=np.float32)
 29 | 
 30 | def get_vec_rep(questions,word_id,id_vec,emb_size,predict=False):
 31 |     rep = np.zeros((5*len(questions),9,emb_size),dtype=np.float32)
 32 |     correct_answers=[]
 33 |     for i,q in enumerate(questions):
 34 |         context_chain=get_event_chains(q[0])
 35 |         choice_chain=get_event_chains(q[1])
 36 |         correct_answers.append(q[2])
 37 |         for j,context in enumerate(context_chain):
 38 |             context_vec=get_word_embedding(context,word_id,id_vec,emb_size)
 39 |             rep[5*i:5*(i+1),j,:]=context_vec
 40 |         for k,choice in enumerate(choice_chain):
 41 |             choice_vec=get_word_embedding(choice,word_id,id_vec,emb_size)
 42 |             rep[5*i+k,-1,:]=choice_vec
 43 |     if not predict:
 44 |         input_data=Variable(torch.from_numpy(rep))
 45 |     else:
 46 |         input_data=Variable(torch.from_numpy(rep),volatile=True)
 47 |     correct_answers = Variable(torch.from_numpy(np.array(correct_answers)))
 48 |     return input_data,correct_answers
 49 | 
 50 | 
 51 | class Word2VecAttention(nn.Module):
 52 |     def __init__(self):
 53 |         super(Word2VecAttention, self).__init__()
 54 |         self.linear_u_one=nn.Linear(HIDDEN_DIM,1,bias=False)
 55 |         self.linear_u_one2=nn.Linear(HIDDEN_DIM,1,bias=False)
 56 |         self.linear_u_two=nn.Linear(HIDDEN_DIM,1,bias=True)
 57 |         self.linear_u_two2=nn.Linear(HIDDEN_DIM,1,bias=False)
 58 |         self.sigmoid=nn.Sigmoid()
 59 |         self.tanh=nn.Tanh()
 60 | 
 61 |     def compute_scores(self,input_data):   
 62 |         weight=Variable(torch.zeros((len(input_data),8,1)).fill_(1./8))
 63 |         weighted_input=torch.mul(input_data[:,0:8,:],weight)  
 64 |         a=torch.sum(weighted_input,1)
 65 |         b=input_data[:,8,:]/8.0
 66 |         scores=-torch.norm(a-b, 2, 1).view(-1,5)
 67 |         return scores
 68 | 
 69 |     def forward(self, input_data):
 70 |         return self.compute_scores(input_data)
 71 | 
 72 |     def correct_answer_position(self,L,correct_answers):
 73 |         num_correct1 = torch.sum((L[:,0] == correct_answers).type(torch.FloatTensor))
 74 |         num_correct2 = torch.sum((L[:,1] == correct_answers).type(torch.FloatTensor))
 75 |         num_correct3 = torch.sum((L[:,2] == correct_answers).type(torch.FloatTensor))
 76 |         num_correct4 = torch.sum((L[:,3] == correct_answers).type(torch.FloatTensor))
 77 |         num_correct5 = torch.sum((L[:,4] == correct_answers).type(torch.FloatTensor))
 78 |         print ("%d / %d 1st max correct: %f" % (num_correct1.data[0], len(correct_answers),num_correct1 / len(correct_answers) * 100.))
 79 |         print ("%d / %d 2ed max correct: %f" % (num_correct2.data[0], len(correct_answers),num_correct2 / len(correct_answers) * 100.))
 80 |         print ("%d / %d 3rd max correct: %f" % (num_correct3.data[0], len(correct_answers),num_correct3 / len(correct_answers) * 100.))
 81 |         print ("%d / %d 4th max correct: %f" % (num_correct4.data[0], len(correct_answers),num_correct4 / len(correct_answers) * 100.))
 82 |         print ("%d / %d 5th max correct: %f" % (num_correct5.data[0], len(correct_answers),num_correct5 / len(correct_answers) * 100.))
 83 | 
 84 |     def predict(self, input_data, targets):
 85 |         scores=self.forward(input_data)
 86 |         sorted, L = torch.sort(scores,descending=True)
 87 |         self.correct_answer_position(L,targets)
 88 |         selections=L[:,0]
 89 |         pickle.dump((selections != targets),open('../data/test.answer','wb'))
 90 |         num_correct = torch.sum((selections == targets).type(torch.FloatTensor))
 91 |         accuracy = num_correct / len(targets) *100.0 
 92 |         return accuracy
 93 | 
 94 |     def weights_init(self,m):
 95 |         if isinstance(m, nn.Embedding):
 96 |             nn.init.xavier_uniform(m.weight)
 97 |         elif isinstance(m, nn.GRU):
 98 |             nn.init.xavier_uniform(m.weight_hh_l0)
 99 |             nn.init.xavier_uniform(m.weight_ih_l0)
100 |             nn.init.constant(m.bias_hh_l0,0)
101 |             nn.init.constant(m.bias_ih_l0,0)
102 |         elif isinstance(m, nn.Linear):
103 |             nn.init.xavier_uniform(m.weight)
104 |             # nn.init.uniform(m.weight)
105 |             # nn.init.normal(m.weight)
106 | 
107 | def train(questions):
108 |     model=Word2VecAttention()
109 |     input_data_test,correct_answers_test=get_vec_rep(questions,word_id,id_vec,HIDDEN_DIM,predict=True)
110 |     accuracy=model.predict(input_data_test,correct_answers_test)
111 |     print('Test  Acc: ',accuracy.data[0])
112 | 
113 | 
114 | def process_test(scores,test_index):
115 |     for index in test_index:
116 |         scores[index]=np.min(scores)
117 |     return scores
118 | 
119 | def get_acc(scores,correct_answers,name='scores',save=False):
120 |     selections = np.argmax(scores, axis=1)
121 |     num_correct = int(np.sum(selections == correct_answers))
122 |     if save:
123 |         pickle.dump((selections == correct_answers),open('./scores/'+name,'wb'),2)
124 |     samples = len(correct_answers)
125 |     accuracy = float(num_correct) / samples * 100.
126 |     # print ("%d / %d correct: %f" % (num_correct, samples, accuracy))
127 |     return accuracy
128 | 
129 | if __name__ == '__main__':
130 |     test_index=pickle.load(open('../data/test_index.pickle','rb'))
131 | 
132 |     HIDDEN_DIM = 128
133 |     L2_penalty=0.00001
134 |     MARGIN=0.015
135 |     LR=0.0001
136 |     T=1
137 |     BATCH_SIZE=50
138 |     EPOCHES=520
139 |     PATIENTS=300
140 |     test_data=Data_data(pickle.load(open('../data/corpus_index_test_with_args_all_chain.data','rb')))
141 |     word_id,id_vec,word_vec=get_hash_for_word('/users3/zyli/github/OpenNE/output/verb_net/1_property/deepwalk_128_unweighted_with_args.txt',verb_net3_mapping_with_args)
142 |     
143 |     HIDDEN_DIM = 128*4
144 |     L2_penalty=0.00001
145 |     MARGIN=0.015
146 |     LR=0.0001
147 |     T=1
148 |     BATCH_SIZE=1000
149 |     EPOCHES=520
150 |     PATIENTS=300
151 |     test_data=Data_data(pickle.load(open('../data/corpus_index_test_with_args_all_chain.data','rb')))
152 |     model=trans_to_cuda(EventGraph_With_Args(len(word_vec),HIDDEN_DIM,word_vec,L2_penalty,MARGIN,LR,T))
153 |     model.load_state_dict(torch.load('../data/gnn_euclid_acc_52.380001068115234_.model'))
154 | 
155 |     data=test_data.all_data()
156 |     correct_answers=data[2].cpu().data.numpy()
157 |     scores1=model(data[1],data[0]).cpu().data.numpy() 
158 |     scores1=process_test(scores1,test_index)
159 |     print (get_acc(scores1,correct_answers,'scores1'))
160 | 
161 |     HIDDEN_DIM = 128*4
162 |     L2_penalty=0.00001
163 |     MARGIN=0.015
164 |     LR=0.0001
165 |     T=1
166 |     BATCH_SIZE=1000
167 |     EPOCHES=520
168 |     PATIENTS=300
169 |     test_data=Data_data(pickle.load(open('../data/corpus_index_test_with_args_all.data','rb')))
170 |     data=test_data.all_data()
171 |     model=trans_to_cuda(EventChain(embedding_dim=HIDDEN_DIM,hidden_dim=HIDDEN_DIM,vocab_size=len(word_vec),word_vec=word_vec,num_layers=1,bidirectional=False))
172 |     model.load_state_dict(torch.load('../data/event_chain_acc_50.98999786376953_.model'))
173 |     accuracy,accuracy1,accuracy2,accuracy3,accuracy4,scores2=model.predict_with_minibatch(data[1],data[2])
174 |     scores2=scores2.cpu().data.numpy() 
175 |     scores2=process_test(scores2,test_index)
176 |     print (get_acc(scores2,correct_answers,'scores2'))
177 | 
178 |     scores3=pickle.load(open('../data/event_comp_test.scores','rb'),encoding='bytes')
179 |     scores3=process_test(scores3,test_index)
180 |     print (get_acc(scores3,correct_answers,'scores3'))
181 | 
182 | 
183 |     scores1=preprocessing.scale(scores1)
184 |     scores2=preprocessing.scale(scores2)
185 |     scores3=preprocessing.scale(scores3)
186 | 
187 |     best_acc=0. 
188 |     best_i_j_k=(0,0)
189 |     for i in np.arange(-3,3,0.1):
190 |         for j in np.arange(-3,3,0.1):
191 |             acc=get_acc(scores3*i+scores1*j,correct_answers)
192 |             if best_acc<acc:
193 |                 best_acc=acc 
194 |                 best_i_j_k=(i,j)
195 |     print (best_acc,best_i_j_k)
196 |     get_acc(scores3*best_i_j_k[0]+scores1*best_i_j_k[1],correct_answers,'scores1_scores3')
197 | 
198 |     best_acc=0. 
199 |     best_i_j_k=(0,0)
200 |     for i in np.arange(-3,3,0.1):
201 |         for j in np.arange(-3,3,0.1):
202 |             acc=get_acc(scores1*i+scores2*j,correct_answers)
203 |             if best_acc<acc:
204 |                 best_acc=acc 
205 |                 best_i_j_k=(i,j)
206 |     print (best_acc,best_i_j_k)
207 |     get_acc(scores1*best_i_j_k[0]+scores2*best_i_j_k[1],correct_answers,'scores1_scores2')
208 | 
209 |     best_acc=0. 
210 |     best_i_j_k=(0,0)
211 |     for i in np.arange(-3,3,0.1):
212 |         for j in np.arange(-3,3,0.1):
213 |             acc=get_acc(scores3*i+scores2*j,correct_answers)
214 |             if best_acc<acc:
215 |                 best_acc=acc 
216 |                 best_i_j_k=(i,j)
217 |     print (best_acc,best_i_j_k)
218 |     get_acc(scores3*best_i_j_k[0]+scores2*best_i_j_k[1],correct_answers,'scores2_scores3')
219 | 
220 |     best_acc=0. 
221 |     best_i_j_k=(0,0,0)
222 |     for i in np.arange(-3,3,0.1):
223 |         for j in np.arange(-3,3,0.1):
224 |             for k in np.arange(-3,3,0.1):
225 |                 acc=get_acc(scores1*i+scores3*j+scores2*k,correct_answers)
226 |                 if best_acc<acc:
227 |                     best_acc=acc 
228 |                     best_i_j_k=(i,j,k)
229 |     print (best_acc,best_i_j_k)
230 |     get_acc(scores1*best_i_j_k[0]+scores3*best_i_j_k[1]+scores2*best_i_j_k[2],correct_answers,'scores1_scores2_scores3')
231 |     
232 | # SGNN 1
233 | # event_chain-PairLSTM 2
234 | # event_comp 3
235 | 
236 | 
237 | 


--------------------------------------------------------------------------------
/code/gnn_with_args.py:
--------------------------------------------------------------------------------
  1 | #coding:utf8
  2 | # This is the SGNN model described in our ijcai paper.
  3 | from utils import *
  4 | 
  5 | class FNN(Module):
  6 |     def __init__(self, hidden_size,dropout_p=0.2):
  7 |         super(FNN, self).__init__()
  8 |         self.hidden_size = hidden_size
  9 |         self.linear_one=nn.Linear(self.hidden_size,self.hidden_size,bias=True)
 10 |         self.linear_two=nn.Linear(self.hidden_size,self.hidden_size,bias=True)
 11 |         self.reset_parameters()
 12 | 
 13 |     def forward(self,hidden): #1000*13*128
 14 |         hidden1=F.sigmoid(self.linear_one(hidden))
 15 |         hidden2=self.linear_two(hidden1)
 16 |         return hidden2+hidden
 17 | 
 18 |     def reset_parameters(self):
 19 |         stdv = 1.0 / math.sqrt(self.hidden_size)
 20 |         for weight in self.parameters():
 21 |             weight.data.uniform_(-stdv, stdv)
 22 | 
 23 | class GNN(Module):
 24 |     def __init__(self, hidden_size,T,dropout_p=0.2):
 25 |         super(GNN, self).__init__()
 26 |         self.hidden_size = hidden_size
 27 |         self.T = T
 28 |         self.gate_size = 3 * hidden_size
 29 |         self.w_ih = Parameter(torch.Tensor(self.gate_size, self.hidden_size))
 30 |         self.w_hh = Parameter(torch.Tensor(self.gate_size, self.hidden_size))
 31 |         self.b_ih = Parameter(torch.Tensor(self.gate_size))
 32 |         self.b_hh = Parameter(torch.Tensor(self.gate_size))        
 33 |         self.b_ah = Parameter(torch.Tensor(self.hidden_size))
 34 |         
 35 |         self.w_ih_2 = Parameter(torch.Tensor(self.gate_size, self.hidden_size))
 36 |         self.w_hh_2 = Parameter(torch.Tensor(self.gate_size, self.hidden_size))
 37 |         self.b_ih_2 = Parameter(torch.Tensor(self.gate_size))
 38 |         self.b_hh_2 = Parameter(torch.Tensor(self.gate_size))        
 39 |         self.b_ah_2 = Parameter(torch.Tensor(self.hidden_size))
 40 | 
 41 |         # self.dropout=nn.Dropout(dropout_p)
 42 |         self.reset_parameters()
 43 | 
 44 |     def GNNCell(self, A, hidden, w_ih, w_hh, b_ih, b_hh, b_ah):
 45 |         input=torch.matmul(A.transpose(1,2),hidden)+b_ah
 46 |         # input=self.dropout(input)
 47 |         gi = F.linear(input, w_ih, b_ih)
 48 |         gh = F.linear(hidden, w_hh, b_hh)
 49 |         i_r, i_i, i_n = gi.chunk(3, 2)
 50 |         h_r, h_i, h_n = gh.chunk(3, 2)
 51 |         resetgate = F.sigmoid(i_r + h_r)
 52 |         inputgate = F.sigmoid(i_i + h_i)
 53 |         newgate = F.tanh(i_n + resetgate * h_n)
 54 |         hy = newgate + inputgate * (hidden - newgate)
 55 |         # hy=self.dropout(hy)
 56 |         return hy
 57 | 
 58 |     def forward(self, A, hidden):
 59 |         hidden1=self.GNNCell(A,hidden,self.w_ih, self.w_hh, self.b_ih, self.b_hh, self.b_ah)
 60 |         hidden2=self.GNNCell(A,hidden1,self.w_ih, self.w_hh, self.b_ih, self.b_hh, self.b_ah)
 61 |         return hidden2
 62 | 
 63 |     def reset_parameters(self):
 64 |         stdv = 1.0 / math.sqrt(self.hidden_size)
 65 |         for weight in self.parameters():
 66 |             weight.data.uniform_(-stdv, stdv)
 67 | 
 68 | class EventGraph_With_Args(Module):
 69 |     def __init__(self, vocab_size, hidden_dim,word_vec,L2_penalty,MARGIN,LR,T,BATCH_SIZE=1000,dropout_p=0.2):
 70 |         super(EventGraph_With_Args, self).__init__()
 71 |         self.hidden_dim = hidden_dim
 72 |         self.vocab_size=vocab_size
 73 |         self.batch_size=BATCH_SIZE
 74 |         self.embedding = nn.Embedding(self.vocab_size,self.hidden_dim)
 75 |         self.embedding.weight.data = torch.from_numpy(word_vec)
 76 |         # self.embedding.weight.requires_grad=False
 77 |         self.gnn = GNN(self.hidden_dim,T)
 78 |         # self.fnn = FNN(self.hidden_dim)
 79 |         
 80 |         # compute
 81 |         self.linear_s_one=nn.Linear(hidden_dim, 1,bias=False)
 82 |         self.linear_s_two=nn.Linear(hidden_dim, 1,bias=True)
 83 |         self.linear_u_one=nn.Linear(hidden_dim,int(0.5*hidden_dim),bias=True)
 84 |         self.linear_u_one2=nn.Linear(int(0.5*hidden_dim),1,bias=True)
 85 |         self.linear_u_two=nn.Linear(hidden_dim,int(0.5*hidden_dim),bias=True)
 86 |         self.linear_u_two2=nn.Linear(int(0.5*hidden_dim),1,bias=True)
 87 |         # end compute
 88 |         
 89 |         self.multi = Parameter(torch.ones(3))
 90 |         self.dropout=nn.Dropout(dropout_p)
 91 |         self.loss_function = nn.MultiMarginLoss(margin=MARGIN)
 92 | 
 93 |         model_grad_params=filter(lambda p:p.requires_grad==True,self.parameters())
 94 |         train_params = list(map(id, self.embedding.parameters()))
 95 |         tune_params = filter(lambda p:id(p) not in train_params, model_grad_params)
 96 |         
 97 |         self.optimizer = optim.RMSprop([{'params':tune_params},{'params':self.embedding.parameters(),'lr':LR*0.06}],lr=LR, weight_decay=L2_penalty,momentum=0.2)
 98 | 
 99 |         # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.995)
100 | 
101 |     def compute_scores(self,hidden,metric='euclid'):   #batch_size*13*128
102 |         # attention on input 
103 |         input_a=hidden[:,0:8,:].repeat(1,5,1).view(5*len(hidden),8,-1) 
104 |         input_b=hidden[:,8:13,:] 
105 |         u_a=F.relu(self.linear_u_one(input_a)) 
106 |         u_a2=F.relu(self.linear_u_one2(u_a)) 
107 |         u_b=F.relu(self.linear_u_two(input_b)) 
108 |         u_b2=F.relu(self.linear_u_two2(u_b)) 
109 |         u_c=torch.add(u_a2.view(5*len(hidden),8),u_b2.view(5*len(hidden),1))
110 |         weight=torch.exp(F.tanh(u_c))
111 |         weight=(weight/torch.sum(weight,1).view(-1,1)).view(-1,8,1)
112 |         # weight.fill_(1./8) 
113 |         weighted_input=torch.mul(input_a,weight) 
114 |         a=torch.sum(weighted_input,1)
115 |         b=input_b/8.0
116 |         b=b.view(5*len(hidden),-1)
117 |         if metric=='dot':
118 |             scores=self.metric_dot(a,b)
119 |         elif metric=='cosine':
120 |             scores=self.metric_cosine(a,b)
121 |         elif metric=='euclid':
122 |             scores=self.metric_euclid(a,b)
123 |         elif metric=='norm_euclid':
124 |             scores=self.metric_norm_euclid(a,b)
125 |         elif metric=='manhattan':
126 |             scores=self.metric_manhattan(a,b)
127 |         elif metric=='multi':
128 |             scores=self.multi[0]*self.metric_euclid(a,b)+self.multi[1]*self.metric_dot(a,b)+self.multi[2]*self.metric_cosine(a,b)
129 |         return scores
130 | 
131 |     def forward(self, input,A,metric='euclid',nn_type='gnn'):
132 |         hidden = self.embedding(input)  #batch_size*(13*4)*128
133 |         hidden=torch.cat((hidden[:,0:13,:],hidden[:,13:26,:],hidden[:,26:39,:],hidden[:,39:52,:]),2)        
134 |         if nn_type=='gnn':
135 |             hidden = self.gnn(A,hidden)
136 | 
137 | 
138 |         # elif nn_type=='fnn':
139 |         # hidden = self.fnn(hidden)
140 |         scores=self.compute_scores(hidden,metric)
141 | 
142 |         return scores
143 | 
144 |     def predict(self,input,A,targets,dev_index,metric='euclid'):
145 |         scores=self.forward(input,A,metric)
146 |         # input和scores处理一下
147 |         for index in dev_index:
148 |             scores[index]=-100.0
149 |         # 处理完毕
150 |         sorted, L = torch.sort(scores,descending=True)
151 |         num_correct0 = torch.sum((L[:,0] == targets).type(torch.FloatTensor))
152 |         num_correct1 = torch.sum((L[:,1] == targets).type(torch.FloatTensor))
153 |         num_correct2 = torch.sum((L[:,2] == targets).type(torch.FloatTensor))
154 |         num_correct3 = torch.sum((L[:,3] == targets).type(torch.FloatTensor))
155 |         num_correct4 = torch.sum((L[:,4] == targets).type(torch.FloatTensor))
156 |         samples = len(targets)
157 |         accuracy0 = num_correct0 / samples *100.0 
158 |         accuracy1 = num_correct1 / samples *100.0 
159 |         accuracy2 = num_correct2 / samples *100.0 
160 |         accuracy3 = num_correct3 / samples *100.0 
161 |         accuracy4 = num_correct4 / samples *100.0 
162 |         return accuracy0,accuracy1,accuracy2,accuracy3,accuracy4
163 | 
164 |     def metric_dot(self, v0, v1):
165 |         return torch.sum(v0*v1,1).view(-1,5)
166 | 
167 |     def metric_cosine(self, v0, v1):
168 |         return F.cosine_similarity(v0,v1).view(-1,5)
169 | 
170 |     def metric_euclid(self, v0, v1):
171 |         return -torch.norm(v0-v1, 2, 1).view(-1,5)
172 | 
173 |     def metric_norm_euclid(self, v0, v1):
174 |         v0 = v0/torch.norm(v0, 2, 1).view(-1,1)
175 |         v1 = v1/torch.norm(v1, 2, 1).view(-1,1)
176 |         return -torch.norm(v0-v1, 2, 1).view(-1,5)
177 | 
178 |     def metric_manhattan(self, v0, v1):
179 |         return -torch.sum(torch.abs(v0 - v1), 1).view(-1,5)
180 | 
181 |     def correct_answer_position(self,L,correct_answers):
182 |         num_correct1 = torch.sum((L[:,0] == correct_answers).type(torch.FloatTensor))
183 |         num_correct2 = torch.sum((L[:,1] == correct_answers).type(torch.FloatTensor))
184 |         num_correct3 = torch.sum((L[:,2] == correct_answers).type(torch.FloatTensor))
185 |         num_correct4 = torch.sum((L[:,3] == correct_answers).type(torch.FloatTensor))
186 |         num_correct5 = torch.sum((L[:,4] == correct_answers).type(torch.FloatTensor))
187 |         print ("%d / %d 1st max correct: %f" % (num_correct1.data[0], len(correct_answers),num_correct1 / len(correct_answers) * 100.))
188 |         print ("%d / %d 2ed max correct: %f" % (num_correct2.data[0], len(correct_answers),num_correct2 / len(correct_answers) * 100.))
189 |         print ("%d / %d 3rd max correct: %f" % (num_correct3.data[0], len(correct_answers),num_correct3 / len(correct_answers) * 100.))
190 |         print ("%d / %d 4th max correct: %f" % (num_correct4.data[0], len(correct_answers),num_correct4 / len(correct_answers) * 100.))
191 |         print ("%d / %d 5th max correct: %f" % (num_correct5.data[0], len(correct_answers),num_correct5 / len(correct_answers) * 100.))
192 | 
193 |     def predict_with_minibatch(self,input,A,targets,dev_index,metric='euclid'):
194 |         # input.volatile=True
195 |         scores=trans_to_cuda(Variable(torch.zeros(len(targets),5)))
196 |         for i in range(int(len(targets)/self.batch_size)):
197 |             scores[i*self.batch_size:(i+1)*self.batch_size]=self.forward(input[i*self.batch_size:(i+1)*self.batch_size],A[i*self.batch_size:(i+1)*self.batch_size],metric)
198 | 
199 |         for index in dev_index:
200 |             scores[index]=-100.0
201 |         sorted, L = torch.sort(scores,descending=True)
202 |         # self.correct_answer_position(L,targets)
203 |         num_correct0 = torch.sum((L[:,0] == targets).type(torch.FloatTensor))
204 |         num_correct1 = torch.sum((L[:,1] == targets).type(torch.FloatTensor))
205 |         num_correct2 = torch.sum((L[:,2] == targets).type(torch.FloatTensor))
206 |         num_correct3 = torch.sum((L[:,3] == targets).type(torch.FloatTensor))
207 |         num_correct4 = torch.sum((L[:,4] == targets).type(torch.FloatTensor))
208 |         samples = len(targets)
209 |         accuracy0 = num_correct0 / samples *100.0 
210 |         accuracy1 = num_correct1 / samples *100.0 
211 |         accuracy2 = num_correct2 / samples *100.0 
212 |         accuracy3 = num_correct3 / samples *100.0 
213 |         accuracy4 = num_correct4 / samples *100.0 
214 |         return accuracy0,accuracy1,accuracy2,accuracy3,accuracy4
215 | 
216 |     def weights_init(self,m):
217 |         if isinstance(m, nn.GRU):
218 |             nn.init.xavier_uniform(m.weight_hh_l0)
219 |             nn.init.xavier_uniform(m.weight_ih_l0)
220 |             nn.init.constant(m.bias_hh_l0,0)
221 |             nn.init.constant(m.bias_ih_l0,0)
222 |         elif isinstance(m, GNN):
223 |             nn.init.xavier_uniform(m.w_hh)
224 |             nn.init.xavier_uniform(m.w_ih)
225 |             nn.init.xavier_uniform(m.w_hh_2)
226 |             nn.init.xavier_uniform(m.w_ih_2)
227 |             nn.init.constant(m.b_hh,0)
228 |             nn.init.constant(m.b_ih,0)
229 |             nn.init.constant(m.b_ah,0)
230 |             nn.init.constant(m.b_hh_2,0)
231 |             nn.init.constant(m.b_ih_2,0)
232 |             nn.init.constant(m.b_ah_2,0)
233 |         elif isinstance(m, nn.Linear):
234 |             nn.init.xavier_uniform(m.weight)
235 | 
236 | def train(dev_index,word_vec,ans,train_data,dev_data,test_data,L2_penalty,MARGIN,LR,T,BATCH_SIZE,EPOCHES,PATIENTS,HIDDEN_DIM,METRIC='euclid'):
237 |     model=trans_to_cuda(EventGraph_With_Args(len(word_vec),HIDDEN_DIM,word_vec,L2_penalty,MARGIN,LR,T,BATCH_SIZE))   
238 |     model.optimizer.zero_grad() 
239 |     # model.scheduler.step()
240 |     # model.apply(model.weights_init)
241 |     acc_list=[]
242 |     best_acc=0.0
243 |     best_epoch=0
244 |     print ('start training')
245 |     EPO=0
246 |     start=time.time()
247 |     while True:
248 |         patient=0
249 |         for epoch in range(EPOCHES):
250 |             data,epoch_flag=train_data.next_batch(BATCH_SIZE)
251 |             model.train()
252 |             scores=model(data[1],data[0],metric=METRIC) 
253 |             loss = model.loss_function(scores,data[2])
254 |             loss.backward()
255 |             model.optimizer.step()
256 |             model.optimizer.zero_grad()
257 |             # if (EPOCHES*EPO+epoch+1) % (1000/BATCH_SIZE)==0:
258 |             data=dev_data.all_data()
259 |             model.eval()
260 |             accuracy,accuracy1,accuracy2,accuracy3,accuracy4=model.predict(Variable(data[1].data,volatile=True),data[0],data[2],dev_index,metric=METRIC)
261 |             if (EPOCHES*EPO+epoch) % 50==0:
262 |                 print ('Epoch %d : Eval  Acc: %f, %f, %f, %f, %f, %s' % (EPOCHES*EPO+epoch,accuracy.data[0],accuracy1.data[0],accuracy2.data[0],accuracy3.data[0],accuracy4.data[0],METRIC))
263 |             acc_list.append((time.time()-start,accuracy.data[0]))
264 |             if best_acc<accuracy.data[0]:
265 |                 best_acc=accuracy.data[0]
266 |                 if best_acc>=52.7:
267 |                     torch.save(model.state_dict(), ('../data/gnn_%s_acc_%s_.model' % (METRIC,best_acc)))
268 |                 best_epoch=EPOCHES*EPO+epoch+1
269 |                 patient=0
270 |             else:
271 |                 patient+=1
272 |             if patient>PATIENTS:
273 |                 break
274 |         if epoch==(EPOCHES-1):
275 |             EPO+=1
276 |             continue
277 |         else:
278 |             break
279 |     print ('Epoch %d : Best Acc: %f' % (best_epoch,best_acc))
280 |     # pickle.dump(acc_list,open('../output/gnn_acc_list.pickle','wb'),2)
281 |     return best_acc,best_epoch
282 | 


--------------------------------------------------------------------------------