├── include
    ├── __pycache__
    │   ├── Init.cpython-35.pyc
    │   ├── Init.cpython-36.pyc
    │   ├── Load.cpython-35.pyc
    │   ├── Test.cpython-35.pyc
    │   ├── Test.cpython-36.pyc
    │   ├── Config.cpython-35.pyc
    │   ├── Config.cpython-36.pyc
    │   ├── Model.cpython-35.pyc
    │   ├── Model.cpython-36.pyc
    │   ├── Model_o.cpython-35.pyc
    │   ├── Model_o.cpython-36.pyc
    │   └── Test_o.cpython-35.pyc
    ├── Load.py
    ├── Config.py
    ├── Init.py
    ├── Test.py
    └── Model.py
├── data
    └── README.md
├── main.py
└── README.md


/include/__pycache__/Init.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Init.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Init.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Init.cpython-36.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Load.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Load.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Test.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Test.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Test.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Test.cpython-36.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Config.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Config.cpython-36.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Model.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Model.cpython-36.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Model_o.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Model_o.cpython-35.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Model_o.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Model_o.cpython-36.pyc


--------------------------------------------------------------------------------
/include/__pycache__/Test_o.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephanieWyt/RDGCN/HEAD/include/__pycache__/Test_o.cpython-35.pyc


--------------------------------------------------------------------------------
/include/Load.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | # load a file and return a list of tuple containing $num integers in each line
 5 | def loadfile(fn, num=1):
 6 |     print('loading a file...' + fn)
 7 |     ret = []
 8 |     with open(fn, encoding='utf-8') as f:
 9 |         for line in f:
10 |             th = line[:-1].split('\t')
11 |             x = []
12 |             for i in range(num):
13 |                 x.append(int(th[i]))
14 |             ret.append(tuple(x))
15 |     return ret
16 | 


--------------------------------------------------------------------------------
/include/Config.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Config:
 5 | 	language = 'ja_en' # zh_en | ja_en | fr_en
 6 | 	e1 = 'data/' + language + '/ent_ids_1'
 7 | 	e2 = 'data/' + language + '/ent_ids_2'
 8 | 	ill = 'data/' + language + '/ref_ent_ids'
 9 | 	kg1 = 'data/' + language + '/triples_1'
10 | 	kg2 = 'data/' + language + '/triples_2'
11 | 	epochs = 600
12 | 	dim = 300
13 | 	act_func = tf.nn.relu
14 | 	alpha = 0.1
15 | 	beta = 0.3
16 | 	gamma = 1.0  # margin based loss
17 | 	k = 125  # number of negative samples for each positive one
18 | 	seed = 3  # 30% of seeds
19 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | > Please download the [datasets](https://drive.google.com/drive/folders/13u-4r4aJbjhUPRbDXrVFA3QfQS0y_8Ye?usp=sharing) in advance.
 3 | 
 4 | There are three cross-lingual datasets in this folder:
 5 | - fr-en
 6 | - ja-en
 7 | - zh-en
 8 | 
 9 | Take the dataset DBP15K (ZH-EN) as an example, the folder "zh_en" contains:
10 | * ent_ids_1: ids for entities in source KG (ZH);
11 | * ent_ids_2: ids for entities in target KG (EN);
12 | * ref_ent_ids: entity links encoded by ids;
13 | * triples_1: relation triples encoded by ids in source KG (ZH);
14 | * triples_2: relation triples encoded by ids in target KG (EN);
15 | * zh_vectorList.json: the input entity feature matrix initialized by word vectors;
16 | 


--------------------------------------------------------------------------------
/include/Init.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | '''
 5 | Adapted from https://github.com/tkipf/gcn
 6 | '''
 7 | 
 8 | def uniform(shape, scale=0.05, name=None):
 9 | 	"""Uniform init."""
10 | 	initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
11 | 	return tf.Variable(initial, name=name)
12 | 
13 | 
14 | def glorot(shape, name=None):
15 | 	"""Glorot & Bengio (AISTATS 2010) init."""
16 | 	init_range = np.sqrt(6.0 / (shape[0] + shape[1]))
17 | 	initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
18 | 	return tf.Variable(initial, name=name)
19 | 
20 | 
21 | def zeros(shape, name=None):
22 | 	"""All zeros."""
23 | 	initial = tf.zeros(shape, dtype=tf.float32)
24 | 	return tf.Variable(initial, name=name)
25 | 
26 | 
27 | def ones(shape, name=None):
28 | 	"""All ones."""
29 | 	initial = tf.ones(shape, dtype=tf.float32)
30 | 	return tf.Variable(initial, name=name)
31 | 


--------------------------------------------------------------------------------
/include/Test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy
 3 | 
 4 | 
 5 | def get_hits(vec, test_pair, top_k=(1, 10, 50, 100)):
 6 |     Lvec = np.array([vec[e1] for e1, e2 in test_pair])
 7 |     Rvec = np.array([vec[e2] for e1, e2 in test_pair])
 8 |     sim = scipy.spatial.distance.cdist(Lvec, Rvec, metric='cityblock')
 9 |     top_lr = [0] * len(top_k)
10 |     for i in range(Lvec.shape[0]):
11 |         rank = sim[i, :].argsort()
12 |         rank_index = np.where(rank == i)[0][0]
13 |         for j in range(len(top_k)):
14 |             if rank_index < top_k[j]:
15 |                 top_lr[j] += 1
16 |     top_rl = [0] * len(top_k)
17 |     for i in range(Rvec.shape[0]):
18 |         rank = sim[:, i].argsort()
19 |         rank_index = np.where(rank == i)[0][0]
20 |         for j in range(len(top_k)):
21 |             if rank_index < top_k[j]:
22 |                 top_rl[j] += 1
23 |     print('For each left:')
24 |     for i in range(len(top_lr)):
25 |         print('Hits@%d: %.2f%%' % (top_k[i], top_lr[i] / len(test_pair) * 100))
26 |     print('For each right:')
27 |     for i in range(len(top_rl)):
28 |         print('Hits@%d: %.2f%%' % (top_k[i], top_rl[i] / len(test_pair) * 100))
29 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from include.Config import Config
 3 | from include.Model import build, training
 4 | from include.Test import get_hits
 5 | from include.Load import *
 6 | 
 7 | import warnings
 8 | warnings.filterwarnings("ignore")
 9 | 
10 | '''
11 | Follow the code style of GCN-Align:
12 | https://github.com/1049451037/GCN-Align
13 | '''
14 | 
15 | seed = 12306
16 | np.random.seed(seed)
17 | tf.set_random_seed(seed)
18 | 
19 | if __name__ == '__main__':
20 |     e = len(set(loadfile(Config.e1, 1)) | set(loadfile(Config.e2, 1)))
21 | 
22 |     ILL = loadfile(Config.ill, 2)
23 |     illL = len(ILL)
24 |     np.random.shuffle(ILL)
25 |     train = np.array(ILL[:illL // 10 * Config.seed])
26 |     test = ILL[illL // 10 * Config.seed:]
27 | 
28 |     KG1 = loadfile(Config.kg1, 3)
29 |     KG2 = loadfile(Config.kg2, 3)
30 | 
31 |     output_layer, loss = build(
32 |         Config.dim, Config.act_func, Config.alpha, Config.beta, Config.gamma, Config.k, Config.language[0:2], e, train, KG1 + KG2)
33 |     vec, J = training(output_layer, loss, 0.001,
34 |                       Config.epochs, train, e, Config.k, test)
35 |     print('loss:', J)
36 |     print('Result:')
37 |     get_hits(vec, test)
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RDGCN
 2 | 
 3 | Source code and datasets for IJCAI 2019 paper: ***[Relation-Aware Entity Alignment for Heterogeneous Knowledge Graphs](https://arxiv.org/pdf/1908.08210.pdf)***.
 4 | 
 5 | Initial datasets are from [GCN-Align](https://github.com/1049451037/GCN-Align) and [JAPE](https://github.com/nju-websoft/JAPE).
 6 | 
 7 | ## Dependencies
 8 | 
 9 | * Python>=3.5
10 | * Tensorflow>=1.8.0
11 | * Scipy>=1.1.0
12 | * Numpy
13 | 
14 | > Due to the limited graphics memory of GPU, we ran our codes using CPUs (40  Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz).
15 | 
16 | ## Datasets
17 | 
18 | Please first download the datasets [here](https://drive.google.com/drive/folders/13u-4r4aJbjhUPRbDXrVFA3QfQS0y_8Ye?usp=sharing) and extract them into `data/` directory.
19 | 
20 | There are three cross-lingual datasets in this folder:
21 | - fr-en
22 | - ja-en
23 | - zh-en
24 | 
25 | Take the dataset DBP15K (ZH-EN) as an example, the folder "zh_en" contains:
26 | * ent_ids_1: ids for entities in source KG (ZH);
27 | * ent_ids_2: ids for entities in target KG (EN);
28 | * ref_ent_ids: entity links encoded by ids;
29 | * triples_1: relation triples encoded by ids in source KG (ZH);
30 | * triples_2: relation triples encoded by ids in target KG (EN);
31 | * zh_vectorList.json: the input entity feature matrix initialized by word vectors;
32 | 
33 | ## Running
34 | 
35 | * Modify language or some other settings in *include/Config.py*
36 | * cd to the directory of *main.py*
37 | * run *main.py*
38 | 
39 | > Due to the instability of embedding-based methods, it is acceptable that the results fluctuate a little bit (±1%) when running code repeatedly.
40 | 
41 | > If you have any questions about reproduction, please feel free to email to wyting@pku.edu.cn.
42 | 
43 | ## Citation
44 | 
45 | If you use this model or code, please cite it as follows:
46 | 
47 | *Yuting Wu, Xiao Liu, Yansong Feng, Zheng Wang, Rui Yan, Dongyan Zhao. Relation-Aware Entity Alignment for Heterogeneous Knowledge Graphs. In Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, pages 5278-5284, 2019.*
48 | 
49 | ```
50 | @inproceedings{ijcai2019-733,
51 |   title={Relation-Aware Entity Alignment for Heterogeneous Knowledge Graphs},
52 |   author={Wu, Yuting and Liu, Xiao and Feng, Yansong and Wang, Zheng and Yan, Rui and Zhao, Dongyan},
53 |   booktitle={Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, {IJCAI-19}},            
54 |   pages={5278--5284},
55 |   year={2019},
56 | }
57 | ```
58 | 


--------------------------------------------------------------------------------
/include/Model.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from .Init import *
  3 | from include.Test import get_hits
  4 | import scipy
  5 | import json
  6 | 
  7 | 
  8 | def rfunc(KG, e):
  9 |     head = {}
 10 |     tail = {}
 11 |     cnt = {}
 12 |     for tri in KG:
 13 |         if tri[1] not in cnt:
 14 |             cnt[tri[1]] = 1
 15 |             head[tri[1]] = set([tri[0]])
 16 |             tail[tri[1]] = set([tri[2]])
 17 |         else:
 18 |             cnt[tri[1]] += 1
 19 |             head[tri[1]].add(tri[0])
 20 |             tail[tri[1]].add(tri[2])
 21 |     r_num = len(head)
 22 |     head_r = np.zeros((e, r_num))
 23 |     tail_r = np.zeros((e, r_num))
 24 |     r_mat_ind = []
 25 |     r_mat_val = []
 26 |     for tri in KG:
 27 |         head_r[tri[0]][tri[1]] = 1
 28 |         tail_r[tri[2]][tri[1]] = 1
 29 |         r_mat_ind.append([tri[0], tri[2]])
 30 |         r_mat_val.append(tri[1])
 31 |     r_mat = tf.SparseTensor(
 32 |         indices=r_mat_ind, values=r_mat_val, dense_shape=[e, e])
 33 | 
 34 |     return head, tail, head_r, tail_r, r_mat
 35 | 
 36 | 
 37 | def get_mat(e, KG):
 38 |     du = [{e_id} for e_id in range(e)]
 39 |     for tri in KG:
 40 |         if tri[0] != tri[2]:
 41 |             du[tri[0]].add(tri[2])
 42 |             du[tri[2]].add(tri[0])
 43 |     du = [len(d) for d in du]
 44 |     M = {}
 45 |     for tri in KG:
 46 |         if tri[0] == tri[2]:
 47 |             continue
 48 |         if (tri[0], tri[2]) not in M:
 49 |             M[(tri[0], tri[2])] = 1
 50 |         else:
 51 |             pass
 52 |         if (tri[2], tri[0]) not in M:
 53 |             M[(tri[2], tri[0])] = 1
 54 |         else:
 55 |             pass
 56 | 
 57 |     for i in range(e):
 58 |         M[(i, i)] = 1
 59 |     return M, du
 60 | 
 61 | 
 62 | # get a sparse tensor based on relational triples
 63 | def get_sparse_tensor(e, KG):
 64 |     print('getting a sparse tensor...')
 65 |     M, du = get_mat(e, KG)
 66 |     ind = []
 67 |     val = []
 68 |     M_arr = np.zeros((e, e))
 69 |     for fir, sec in M:
 70 |         ind.append((sec, fir))
 71 |         val.append(M[(fir, sec)] / math.sqrt(du[fir]) / math.sqrt(du[sec]))
 72 |         M_arr[fir][sec] = 1.0
 73 |     M = tf.SparseTensor(indices=ind, values=val, dense_shape=[e, e])
 74 | 
 75 |     return M, M_arr
 76 | 
 77 | 
 78 | # add a layer
 79 | def add_diag_layer(inlayer, dimension, M, act_func, dropout=0.0, init=ones):
 80 |     inlayer = tf.nn.dropout(inlayer, 1 - dropout)
 81 |     print('adding a diag layer...')
 82 |     w0 = init([1, dimension])
 83 |     tosum = tf.sparse_tensor_dense_matmul(M, tf.multiply(inlayer, w0))
 84 |     if act_func is None:
 85 |         return tosum
 86 |     else:
 87 |         return act_func(tosum)
 88 | 
 89 | 
 90 | def add_full_layer(inlayer, dimension_in, dimension_out, M, act_func, dropout=0.0, init=glorot):
 91 |     inlayer = tf.nn.dropout(inlayer, 1 - dropout)
 92 |     print('adding a full layer...')
 93 |     w0 = init([dimension_in, dimension_out])
 94 |     tosum = tf.sparse_tensor_dense_matmul(M, tf.matmul(inlayer, w0))
 95 |     if act_func is None:
 96 |         return tosum
 97 |     else:
 98 |         return act_func(tosum)
 99 | 
100 | 
101 | def add_sparse_att_layer(inlayer, dual_layer, r_mat, act_func, e):
102 |     dual_transform = tf.reshape(tf.layers.conv1d(
103 |         tf.expand_dims(dual_layer, 0), 1, 1), (-1, 1))
104 |     logits = tf.reshape(tf.nn.embedding_lookup(
105 |         dual_transform, r_mat.values), [-1])
106 |     print('adding sparse attention layer...')
107 |     lrelu = tf.SparseTensor(indices=r_mat.indices,
108 |                             values=tf.nn.leaky_relu(logits),
109 |                             dense_shape=(r_mat.dense_shape))
110 |     coefs = tf.sparse_softmax(lrelu)
111 |     vals = tf.sparse_tensor_dense_matmul(coefs, inlayer)
112 |     if act_func is None:
113 |         return vals
114 |     else:
115 |         return act_func(vals)
116 | 
117 | 
118 | def add_dual_att_layer(inlayer, inlayer2, adj_mat, act_func, hid_dim):
119 |     in_fts = tf.layers.conv1d(tf.expand_dims(inlayer2, 0), hid_dim, 1)
120 |     f_1 = tf.reshape(tf.layers.conv1d(in_fts, 1, 1), (-1, 1))
121 |     f_2 = tf.reshape(tf.layers.conv1d(in_fts, 1, 1), (-1, 1))
122 |     logits = f_1 + tf.transpose(f_2)
123 |     print('adding dual attention layer...')
124 |     adj_tensor = tf.constant(adj_mat, dtype=tf.float32)
125 |     bias_mat = -1e9 * (1.0 - (adj_mat > 0))
126 |     logits = tf.multiply(adj_tensor, logits)
127 |     coefs = tf.nn.softmax(tf.nn.leaky_relu(logits) + bias_mat)
128 | 
129 |     vals = tf.matmul(coefs, inlayer)
130 |     if act_func is None:
131 |         return vals
132 |     else:
133 |         return act_func(vals)
134 | 
135 | 
136 | def add_self_att_layer(inlayer, adj_mat, act_func, hid_dim):
137 |     in_fts = tf.layers.conv1d(tf.expand_dims(
138 |         inlayer, 0), hid_dim, 1, use_bias=False)
139 |     f_1 = tf.reshape(tf.layers.conv1d(in_fts, 1, 1), (-1, 1))
140 |     f_2 = tf.reshape(tf.layers.conv1d(in_fts, 1, 1), (-1, 1))
141 |     logits = f_1 + tf.transpose(f_2)
142 |     print('adding self attention layer...')
143 |     adj_tensor = tf.constant(adj_mat, dtype=tf.float32)
144 |     logits = tf.multiply(adj_tensor, logits)
145 |     bias_mat = -1e9 * (1.0 - (adj_mat > 0))
146 |     coefs = tf.nn.softmax(tf.nn.leaky_relu(logits) + bias_mat)
147 | 
148 |     vals = tf.matmul(coefs, inlayer)
149 |     if act_func is None:
150 |         return vals
151 |     else:
152 |         return act_func(vals)
153 | 
154 | 
155 | def highway(layer1, layer2, dimension):
156 |     kernel_gate = glorot([dimension, dimension])
157 |     bias_gate = zeros([dimension])
158 |     transform_gate = tf.matmul(layer1, kernel_gate) + bias_gate
159 |     transform_gate = tf.nn.sigmoid(transform_gate)
160 |     carry_gate = 1.0 - transform_gate
161 |     return transform_gate * layer2 + carry_gate * layer1
162 | 
163 | 
164 | def compute_r(inlayer, head_r, tail_r, dimension):
165 |     head_l = tf.transpose(tf.constant(head_r, dtype=tf.float32))
166 |     tail_l = tf.transpose(tf.constant(tail_r, dtype=tf.float32))
167 |     L = tf.matmul(head_l, inlayer) / \
168 |         tf.expand_dims(tf.reduce_sum(head_l, axis=-1), -1)
169 |     R = tf.matmul(tail_l, inlayer) / \
170 |         tf.expand_dims(tf.reduce_sum(tail_l, axis=-1), -1)
171 |     r_embeddings = tf.concat([L, R], axis=-1)
172 |     return r_embeddings
173 | 
174 | 
175 | def get_dual_input(inlayer, head, tail, head_r, tail_r, dimension):
176 |     dual_X = compute_r(inlayer, head_r, tail_r, dimension)
177 |     print('computing the dual input...')
178 |     count_r = len(head)
179 |     dual_A = np.zeros((count_r, count_r))
180 |     for i in range(count_r):
181 |         for j in range(count_r):
182 |             a_h = len(head[i] & head[j]) / len(head[i] | head[j])
183 |             a_t = len(tail[i] & tail[j]) / len(tail[i] | tail[j])
184 |             dual_A[i][j] = a_h + a_t
185 |     return dual_X, dual_A
186 | 
187 | 
188 | def get_input_layer(e, dimension, lang):
189 |     print('adding the primal input layer...')
190 |     with open(file='data/' + lang + '_en/' + lang + '_vectorList.json', mode='r', encoding='utf-8') as f:
191 |         embedding_list = json.load(f)
192 |         print(len(embedding_list), 'rows,', len(embedding_list[0]), 'columns.')
193 |     input_embeddings = tf.convert_to_tensor(embedding_list)
194 |     ent_embeddings = tf.Variable(input_embeddings)
195 |     return tf.nn.l2_normalize(ent_embeddings, 1)
196 | 
197 | 
198 | def get_loss(outlayer, ILL, gamma, k):
199 |     print('getting loss...')
200 |     left = ILL[:, 0]
201 |     right = ILL[:, 1]
202 |     t = len(ILL)
203 |     left_x = tf.nn.embedding_lookup(outlayer, left)
204 |     right_x = tf.nn.embedding_lookup(outlayer, right)
205 |     A = tf.reduce_sum(tf.abs(left_x - right_x), 1)
206 |     neg_left = tf.placeholder(tf.int32, [t * k], "neg_left")
207 |     neg_right = tf.placeholder(tf.int32, [t * k], "neg_right")
208 |     neg_l_x = tf.nn.embedding_lookup(outlayer, neg_left)
209 |     neg_r_x = tf.nn.embedding_lookup(outlayer, neg_right)
210 |     B = tf.reduce_sum(tf.abs(neg_l_x - neg_r_x), 1)
211 |     C = - tf.reshape(B, [t, k])
212 |     D = A + gamma
213 |     L1 = tf.nn.relu(tf.add(C, tf.reshape(D, [t, 1])))
214 |     neg_left = tf.placeholder(tf.int32, [t * k], "neg2_left")
215 |     neg_right = tf.placeholder(tf.int32, [t * k], "neg2_right")
216 |     neg_l_x = tf.nn.embedding_lookup(outlayer, neg_left)
217 |     neg_r_x = tf.nn.embedding_lookup(outlayer, neg_right)
218 |     B = tf.reduce_sum(tf.abs(neg_l_x - neg_r_x), 1)
219 |     C = - tf.reshape(B, [t, k])
220 |     L2 = tf.nn.relu(tf.add(C, tf.reshape(D, [t, 1])))
221 |     return (tf.reduce_sum(L1) + tf.reduce_sum(L2)) / (2.0 * k * t)
222 | 
223 | 
224 | def build(dimension, act_func, alpha, beta, gamma, k, lang, e, ILL, KG):
225 |     tf.reset_default_graph()
226 |     primal_X_0 = get_input_layer(e, dimension, lang)
227 |     M, M_arr = get_sparse_tensor(e, KG)
228 |     head, tail, head_r, tail_r, r_mat = rfunc(KG, e)
229 | 
230 |     print('first interaction...')
231 |     dual_X_1, dual_A_1 = get_dual_input(
232 |         primal_X_0, head, tail, head_r, tail_r, dimension)
233 |     dual_H_1 = add_self_att_layer(dual_X_1, dual_A_1, tf.nn.relu, 600)
234 |     primal_H_1 = add_sparse_att_layer(
235 |         primal_X_0, dual_H_1, r_mat, tf.nn.relu, e)
236 |     primal_X_1 = primal_X_0 + alpha * primal_H_1
237 | 
238 |     print('second interaction...')
239 |     dual_X_2, dual_A_2 = get_dual_input(
240 |         primal_X_1, head, tail, head_r, tail_r, dimension)
241 |     dual_H_2 = add_dual_att_layer(
242 |         dual_H_1, dual_X_2, dual_A_2, tf.nn.relu, 600)
243 |     primal_H_2 = add_sparse_att_layer(
244 |         primal_X_1, dual_H_2, r_mat, tf.nn.relu, e)
245 |     primal_X_2 = primal_X_0 + beta * primal_H_2
246 | 
247 |     print('gcn layers...')
248 |     gcn_layer_1 = add_diag_layer(
249 |         primal_X_2, dimension, M, act_func, dropout=0.0)
250 |     gcn_layer_1 = highway(primal_X_2, gcn_layer_1, dimension)
251 |     gcn_layer_2 = add_diag_layer(
252 |         gcn_layer_1, dimension, M, act_func, dropout=0.0)
253 |     output_layer = highway(gcn_layer_1, gcn_layer_2, dimension)
254 | 
255 |     loss = get_loss(output_layer, ILL, gamma, k)
256 |     return output_layer, loss
257 | 
258 | 
259 | # get negative samples
260 | def get_neg(ILL, output_layer, k):
261 |     neg = []
262 |     t = len(ILL)
263 |     ILL_vec = np.array([output_layer[e1] for e1 in ILL])
264 |     KG_vec = np.array(output_layer)
265 |     sim = scipy.spatial.distance.cdist(ILL_vec, KG_vec, metric='cityblock')
266 |     for i in range(t):
267 |         rank = sim[i, :].argsort()
268 |         neg.append(rank[0:k])
269 | 
270 |     neg = np.array(neg)
271 |     neg = neg.reshape((t * k,))
272 |     return neg
273 | 
274 | 
275 | def training(output_layer, loss, learning_rate, epochs, ILL, e, k, test):
276 |     train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
277 |     print('initializing...')
278 |     init = tf.global_variables_initializer()
279 |     sess = tf.Session()
280 |     sess.run(init)
281 |     print('running...')
282 |     J = []
283 |     t = len(ILL)
284 |     ILL = np.array(ILL)
285 |     L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1)))
286 |     neg_left = L.reshape((t * k,))
287 |     L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1)))
288 |     neg2_right = L.reshape((t * k,))
289 |     for i in range(epochs):
290 |         if i % 10 == 0:
291 |             out = sess.run(output_layer)
292 |             neg2_left = get_neg(ILL[:, 1], out, k)
293 |             neg_right = get_neg(ILL[:, 0], out, k)
294 |             feeddict = {"neg_left:0": neg_left,
295 |                         "neg_right:0": neg_right,
296 |                         "neg2_left:0": neg2_left,
297 |                         "neg2_right:0": neg2_right}
298 | 
299 |         _, th = sess.run([train_step, loss], feed_dict=feeddict)
300 |         if i % 10 == 0:
301 |             th, outvec = sess.run([loss, output_layer], feed_dict=feeddict)
302 |             J.append(th)
303 |             get_hits(outvec, test)
304 | 
305 |         print('%d/%d' % (i + 1, epochs), 'epochs...', th)
306 |     outvec = sess.run(output_layer)
307 |     sess.close()
308 |     return outvec, J
309 | 


--------------------------------------------------------------------------------