├── .gitignore
├── KGEAttack
├── ConvE
│ ├── FB15k-237.tar.gz
│ ├── WN18.tar.gz
│ ├── WN18RR.tar.gz
│ ├── clustering_elbow.ipynb
│ ├── com_add_attack_1.py
│ ├── com_add_attack_2.py
│ ├── com_add_attack_3.py
│ ├── create_clusters.py
│ ├── criage_add_attack_1.py
│ ├── criage_inverter.py
│ ├── criage_model.py
│ ├── dataset.py
│ ├── decoy_test.py
│ ├── elbow_plots
│ │ ├── FB15k-237_complex.png
│ │ ├── FB15k-237_conve.png
│ │ ├── FB15k-237_distmult.png
│ │ ├── FB15k-237_transe.png
│ │ ├── WN18RR_complex.png
│ │ ├── WN18RR_conve.png
│ │ ├── WN18RR_distmult.png
│ │ └── WN18RR_transe.png
│ ├── evaluation.py
│ ├── grad_add_attack.py
│ ├── ijcai_add_attack_1.py
│ ├── inst_add_attack.py
│ ├── inv_add_attack_1.py
│ ├── inv_add_attack_2.py
│ ├── inv_add_attack_3.py
│ ├── main.py
│ ├── model.py
│ ├── preprocess.py
│ ├── rand_add_attack_1.py
│ ├── rand_add_attack_2.py
│ ├── select_examples.ipynb
│ ├── select_targets.py
│ ├── sym_add_attack_1.py
│ ├── sym_add_attack_2.py
│ ├── sym_add_attack_3.py
│ ├── utils.py
│ └── wrangle_KG.py
├── Readme.md
├── complex_FB15k-237.sh
├── complex_WN18.sh
├── complex_WN18RR.sh
├── compute_decoy_metrics_FB15k-237.sh
├── compute_decoy_metrics_WN18RR.sh
├── conve_FB15k-237.sh
├── conve_WN18.sh
├── conve_WN18RR.sh
├── distmult_FB15k-237.sh
├── distmult_WN18.sh
├── distmult_WN18RR.sh
├── grad_add_attack_FB15k-237.sh
├── grad_add_attack_WN18RR.sh
├── inst_add_attack_FB15k-237.sh
├── inst_add_attack_WN18RR.sh
├── preprocess.sh
├── transe_FB15k-237.sh
├── transe_WN18.sh
└── transe_WN18RR.sh
├── LICENSE
├── Readme.md
├── inference_attack.yml
└── overview.jpg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore Jupyter checkpoints
2 | .ipynb_checkpoints
3 |
4 | # Python
5 | __pycache__/
6 |
7 | # Ignore folders
8 | data/
9 | saved_models/
10 | results/
11 | losses/
12 | logs/
13 | clusters/
14 |
15 |
16 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/FB15k-237.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/FB15k-237.tar.gz
--------------------------------------------------------------------------------
/KGEAttack/ConvE/WN18.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/WN18.tar.gz
--------------------------------------------------------------------------------
/KGEAttack/ConvE/WN18RR.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/WN18RR.tar.gz
--------------------------------------------------------------------------------
/KGEAttack/ConvE/create_clusters.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # - In this notebook -
5 | # - generate clusters for model, data combinations
6 | # - save them
7 | #
8 | #
9 |
10 | # In[1]:
11 |
12 |
13 | import pickle
14 | from typing import Dict, Tuple, List
15 | import os
16 | import numpy as np
17 | import json
18 | import torch
19 | import logging
20 | import argparse
21 | import math
22 | from pprint import pprint
23 | import pandas as pd
24 | import errno
25 | from sklearn.cluster import MiniBatchKMeans, KMeans
26 |
27 | import torch
28 | from torch.utils.data import DataLoader
29 | import torch.backends.cudnn as cudnn
30 |
31 | from dataset import TrainDataset, BidirectionalOneShotIterator
32 | from evaluation import evaluation
33 | from model import Distmult, Complex, Conve, Transe
34 |
35 |
36 | # In[2]:
37 |
38 |
39 | def add_arguments():
40 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs')
41 |
42 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237')
43 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}')
44 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations')
45 |
46 |
47 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0')
48 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2')
49 |
50 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)')
51 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1
52 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0')
53 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings')
54 |
55 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200?
56 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)')
57 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)')
58 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4')
59 |
60 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200')
61 |
62 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20')
63 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10')
64 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.')
65 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.')
66 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.')
67 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution')
68 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution')
69 |
70 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True')
71 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1')
72 |
73 |
74 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2?
75 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3')
76 |
77 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.')
78 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model')
79 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)')
80 |
81 | return parser
82 |
83 |
84 | def generate_dicts(data_path):
85 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f:
86 | ent_to_id = json.load(f)
87 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f:
88 | rel_to_id = json.load(f)
89 | n_ent = len(list(ent_to_id.keys()))
90 | n_rel = len(list(rel_to_id.keys()))
91 |
92 | return n_ent, n_rel, ent_to_id, rel_to_id
93 |
94 |
95 | def load_data(data_path):
96 | data = {}
97 | for split in ['train', 'valid', 'test']:
98 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int)
99 | df = df.drop_duplicates()
100 | data[split] = df.values
101 |
102 | return data
103 |
104 | def add_model(args, n_ent, n_rel):
105 | if args.add_reciprocals:
106 | if args.model is None:
107 | model = Conve(args, n_ent, 2*n_rel)
108 | elif args.model == 'conve':
109 | model = Conve(args, n_ent, 2*n_rel)
110 | elif args.model == 'distmult':
111 | model = Distmult(args, n_ent, 2*n_rel)
112 | elif args.model == 'complex':
113 | model = Complex(args, n_ent, 2*n_rel)
114 | elif args.model == 'transe':
115 | model = Transe(args, n_ent, 2*n_rel)
116 | else:
117 | logger.info('Unknown model: {0}', args.model)
118 | raise Exception("Unknown model!")
119 | else:
120 | if args.model is None:
121 | model = Conve(args, n_ent, n_rel)
122 | elif args.model == 'conve':
123 | model = Conve(args, n_ent, n_rel)
124 | elif args.model == 'distmult':
125 | model = Distmult(args, n_ent, n_rel)
126 | elif args.model == 'complex':
127 | model = Complex(args, n_ent, n_rel)
128 | elif args.model == 'transe':
129 | model = Transe(args, n_ent, n_rel)
130 | else:
131 | logger.info('Unknown model: {0}', args.model)
132 | raise Exception("Unknown model!")
133 |
134 | #model.to(self.device)
135 | return model
136 |
137 |
138 |
139 | # In[3]:
140 |
141 |
142 | # In[4]:
143 |
144 |
145 | parser = add_arguments()
146 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1')
147 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side')
148 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment')
149 | parser.add_argument('--num-clusters', type=int, default=100, help='Number of clusters to be generated')
150 |
151 |
152 | # In[5]:
153 |
154 |
155 | args = parser.parse_args()
156 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
157 |
158 |
159 | # In[6]:
160 |
161 |
162 | #args.target_split = 1 # which target split to use
163 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100.
164 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side
165 | #args.rand_run = 1 # a number assigned to the random run of the experiment
166 |
167 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17
168 |
169 | #args.model = 'distmult'
170 | #args.data = 'FB15k-237'
171 | # Below is based on hyperparams for original model
172 | if args.data == 'WN18RR':
173 | if args.model == 'distmult':
174 | args.lr = 0.01
175 | args.num_batches = 50
176 | elif args.model == 'complex':
177 | args.lr = 0.01
178 | elif args.model == 'conve':
179 | args.lr = 0.001
180 | elif args.model == 'transe':
181 | args.lr = 0.001
182 | args.input_drop = 0.0
183 | args.transe_margin = 9.0
184 | args.num_batches = 1000
185 | args.epochs = 100
186 | args.reg_weight = 1e-10
187 | else:
188 | print("New model:{0},{1}. Set hyperparams".format(args.data, args.model))
189 | elif args.data == 'FB15k-237':
190 | if args.model == 'distmult':
191 | args.lr = 0.005
192 | args.input_drop = 0.5
193 | elif args.model == 'complex':
194 | args.lr = 0.005
195 | args.input_drop = 0.5
196 | elif args.model == 'conve':
197 | args.lr = 0.001
198 | args.hidden_drop = 0.5
199 | elif args.model == 'transe':
200 | args.lr = 0.001
201 | args.input_drop = 0.0
202 | args.transe_margin = 9.0
203 | args.num_batches = 800
204 | args.epochs = 100
205 | args.reg_weight = 1e-10
206 | else:
207 | print("New model:{0},{1}. Set hyperparams".format(args.data, args.model))
208 | else:
209 | print("New dataset:{0}. Set hyperparams".format(args.data))
210 |
211 |
212 |
213 |
214 |
215 | # In[7]:
216 |
217 |
218 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html
219 | torch.manual_seed(args.seed)
220 | cudnn.deterministic = True
221 | cudnn.benchmark = False
222 | np.random.seed(args.seed)
223 | rng = np.random.default_rng(seed=args.seed)
224 |
225 |
226 | args.epochs = -1 #no training here
227 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop)
228 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name)
229 | #log_path = 'logs/inv_add_1_{0}_{1}_{2}_{3}.log'.format(args.data, model_name, args.num_batches, args.epochs)
230 |
231 |
232 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
233 | datefmt = '%m/%d/%Y %H:%M:%S',
234 | level = logging.INFO
235 | )
236 | logger = logging.getLogger(__name__)
237 |
238 |
239 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split)
240 |
241 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path)
242 |
243 |
244 | # In[8]:
245 |
246 |
247 | # add a model and load the pre-trained params
248 | model = add_model(args, n_ent, n_rel)
249 | model.to(device)
250 | logger.info('Loading saved model from {0}'.format(model_path))
251 | state = torch.load(model_path)
252 | model_params = state['state_dict']
253 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()]
254 | for key, size, count in params:
255 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count))
256 |
257 | model.load_state_dict(model_params)
258 |
259 | model.eval()
260 |
261 |
262 | # In[ ]:
263 |
264 |
265 | logger.info("Starting the clustering algorithm")
266 |
267 |
268 | # In[9]:
269 |
270 |
271 | # Perform clustering of entity embeddings
272 | if args.model == 'complex':
273 | ent_emb = torch.cat((model.emb_e_real.weight.data, model.emb_e_img.weight.data), dim=-1)
274 | else:
275 | ent_emb = model.emb_e.weight.data
276 |
277 | ent_emb = ent_emb.cpu().numpy()
278 |
279 | km = KMeans(n_clusters=args.num_clusters, n_init=100, max_iter=500,
280 | random_state=0, #batch_size = 100,
281 | init='k-means++'#, verbose=1
282 | #max_no_improvement=20
283 | )
284 | km.fit(ent_emb)
285 |
286 |
287 | # In[ ]:
288 |
289 |
290 | logger.info("Finished clustering... saving centres, labels, inertia, n_iter")
291 |
292 |
293 | # In[21]:
294 |
295 |
296 | save_path = 'clusters/{0}_{1}_{2}_{3}'.format( args.model, args.data, args.num_clusters, args.rand_run)
297 |
298 |
299 | # In[24]:
300 |
301 |
302 | out = open(save_path + 'cluster_centers.pickle', 'wb')
303 | pickle.dump(km.cluster_centers_, out)
304 | out.close()
305 |
306 |
307 | # In[28]:
308 |
309 |
310 | out = open(save_path + 'labels.pickle', 'wb')
311 | pickle.dump(km.labels_, out)
312 | out.close()
313 |
314 |
315 | # In[31]:
316 |
317 |
318 | out = open(save_path + 'inertia.pickle', 'wb')
319 | pickle.dump(km.inertia_, out)
320 | out.close()
321 |
322 |
323 | # In[34]:
324 |
325 |
326 | out = open(save_path + 'n_iter.pickle', 'wb')
327 | pickle.dump(km.n_iter_, out)
328 | out.close()
329 |
330 |
331 | # In[35]:
332 |
333 |
334 | #inp_f = open(save_path + 'cluster_centers.pickle', 'rb')
335 | #centres = np.array(pickle.load(inp_f))
336 | #inp_f.close()
337 |
338 |
339 | # In[ ]:
340 |
341 |
342 |
343 |
344 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/criage_inverter.py:
--------------------------------------------------------------------------------
1 |
2 | import pickle
3 | from typing import Dict, Tuple, List
4 | import os
5 | import numpy as np
6 | import json
7 | import torch
8 | import logging
9 | import argparse
10 | import math
11 | from pprint import pprint
12 | import pandas as pd
13 | import errno
14 | from sklearn.cluster import MiniBatchKMeans, KMeans
15 |
16 | import torch
17 | from torch.utils.data import DataLoader
18 | import torch.backends.cudnn as cudnn
19 |
20 | from dataset import TrainDataset, BidirectionalOneShotIterator
21 | from evaluation import evaluation
22 | from criage_model import Distmult, Conve
23 |
24 |
25 | # In[2]:
26 |
27 |
28 | def add_arguments():
29 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs')
30 |
31 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237')
32 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex, transe}')
33 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations')
34 |
35 |
36 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0')
37 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2')
38 |
39 | parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train (default: 200)')
40 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1
41 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0')
42 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings')
43 |
44 | parser.add_argument('--num-batches', type=int, default=100, help='Number of batches for training (default: 400)') #maybe 200?
45 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)')
46 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)')
47 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4')
48 |
49 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200')
50 |
51 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20')
52 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10')
53 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.')
54 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.')
55 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.')
56 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution')
57 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution')
58 |
59 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True')
60 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1')
61 |
62 |
63 | parser.add_argument('--reg-weight', type=float, default=0.0, help='Weight for regularization. Default: 5e-12')#maybe 5e-2?
64 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3')
65 |
66 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.')
67 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model')
68 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)')
69 |
70 | return parser
71 |
72 |
73 | def generate_dicts(data_path):
74 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f:
75 | ent_to_id = json.load(f)
76 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f:
77 | rel_to_id = json.load(f)
78 | n_ent = len(list(ent_to_id.keys()))
79 | n_rel = len(list(rel_to_id.keys()))
80 |
81 | return n_ent, n_rel, ent_to_id, rel_to_id
82 |
83 |
84 | def load_data(data_path):
85 | data = {}
86 | for split in ['train', 'valid', 'test']:
87 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int)
88 | df = df.drop_duplicates()
89 | data[split] = df.values
90 |
91 | return data
92 |
93 | def load_train_data(data_path, args, n_rel):
94 | ##### train #####
95 | inp_f = open(os.path.join(data_path, 'sr2o_train.pickle'), 'rb')
96 | sr2o_train: Dict[Tuple[int, int], List[int]] = pickle.load(inp_f)
97 | inp_f.close()
98 |
99 | inp_f = open(os.path.join(data_path, 'or2s_train.pickle'), 'rb')
100 | or2s_train: Dict[Tuple[int, int], List[int]] = pickle.load(inp_f)
101 | inp_f.close()
102 |
103 | if args.add_reciprocals:
104 | # adding reciprocals
105 | or2s_train = {(int(k[0]), int(k[1])+n_rel): v for k,v in or2s_train.items()}
106 | else:
107 | or2s_train = {(int(k[0]), int(k[1])): v for k,v in or2s_train.items()}
108 | sr2o_train = {(int(k[0]), int(k[1])): v for k,v in sr2o_train.items()}
109 |
110 | return sr2o_train, or2s_train
111 |
112 | def add_model(args, n_ent, n_rel):
113 | if args.add_reciprocals:
114 | if args.model is None:
115 | model = Conve(args, n_ent, 2*n_rel)
116 | elif args.model == 'conve':
117 | model = Conve(args, n_ent, 2*n_rel)
118 | elif args.model == 'distmult':
119 | model = Distmult(args, n_ent, 2*n_rel)
120 | elif args.model == 'complex':
121 | model = Complex(args, n_ent, 2*n_rel)
122 | elif args.model == 'transe':
123 | model = Transe(args, n_ent, 2*n_rel)
124 | else:
125 | logger.info('Unknown model: {0}', args.model)
126 | raise Exception("Unknown model!")
127 | else:
128 | if args.model is None:
129 | model = Conve(args, n_ent, n_rel)
130 | elif args.model == 'conve':
131 | model = Conve(args, n_ent, n_rel)
132 | elif args.model == 'distmult':
133 | model = Distmult(args, n_ent, n_rel)
134 | elif args.model == 'complex':
135 | model = Complex(args, n_ent, n_rel)
136 | elif args.model == 'transe':
137 | model = Transe(args, n_ent, n_rel)
138 | else:
139 | logger.info('Unknown model: {0}', args.model)
140 | raise Exception("Unknown model!")
141 |
142 | #model.to(self.device)
143 | return model
144 |
145 |
146 |
147 | # In[3]:
148 |
149 | # In[4]:
150 |
151 |
152 | parser = add_arguments()
153 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1')
154 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side')
155 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment')
156 |
157 |
158 | # In[5]:
159 |
160 |
161 | args = parser.parse_args()
162 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
163 |
164 |
165 | # In[6]:
166 |
167 |
168 | #args.target_split = 1 # which target split to use
169 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100.
170 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side
171 | #args.rand_run = 1 # a number assigned to the random run of the experiment
172 |
173 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17
174 |
175 | #args.model = 'distmult'
176 | #args.data = 'FB15k-237'
177 |
178 |
179 | # In[7]:
180 |
181 |
182 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html
183 | torch.manual_seed(args.seed)
184 | cudnn.deterministic = True
185 | cudnn.benchmark = False
186 | np.random.seed(args.seed)
187 | rng = np.random.default_rng(seed=args.seed)
188 |
189 |
190 | #args.epochs = -1 #no training here
191 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop)
192 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name)
193 | #log_path = 'logs/inv_add_1_{0}_{1}_{2}_{3}.log'.format(args.data, model_name, args.num_batches, args.epochs)
194 | log_path = save_path = 'logs/attack_logs/criage_inverter/{0}_{1}_{2}_{3}'.format( args.data,
195 | model_name,
196 | args.num_batches,
197 | args.epochs
198 | )
199 |
200 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
201 | datefmt = '%m/%d/%Y %H:%M:%S',
202 | level = logging.INFO,
203 | filename = log_path
204 | )
205 | logger = logging.getLogger(__name__)
206 | logger.info(args)
207 | logger.info('-------------------- Running Criage Inverter ----------------------')
208 |
209 |
210 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split)
211 |
212 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path)
213 |
214 | ##### load data####
215 | data = load_data(data_path)
216 | train_data, valid_data, test_data = data['train'], data['valid'], data['test']
217 |
218 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb')
219 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f)
220 | inp_f.close()
221 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()}
222 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()}
223 |
224 |
225 | logger.info('Loading training data')
226 | sr2o_train, or2s_train = load_train_data(data_path, args, n_rel)
227 |
228 | # lhs denotes subject side corruptions and rhs denotes object side corruptions
229 | batch_size_lhs = math.ceil(len(list(or2s_train.keys()))/args.num_batches)
230 | batch_size_rhs = math.ceil(len(list(sr2o_train.keys()))/args.num_batches)
231 |
232 | logger.info("Dict size or2s:{0}".format(len(list(or2s_train.keys()))))
233 | logger.info('Batch_size_lhs: {0}'.format(batch_size_lhs))
234 | logger.info("Dict size sr2o:{0}".format(len(list(sr2o_train.keys()))))
235 | logger.info('Batch_size_rhs: {0}'.format(batch_size_rhs))
236 |
237 | train_dataloader_lhs = DataLoader(
238 | TrainDataset(args, n_ent, or2s_train, mode='lhs'),
239 | batch_size = batch_size_lhs,
240 | shuffle = True,
241 | num_workers = 0, #max(0, args.num_workers),
242 | collate_fn = TrainDataset.collate_fn
243 | )
244 |
245 | train_dataloader_rhs = DataLoader(
246 | TrainDataset(args, n_ent, sr2o_train, mode='rhs'),
247 | batch_size = batch_size_rhs,
248 | shuffle = True,
249 | num_workers = 0, #max(0, self.args.num_workers),
250 | collate_fn = TrainDataset.collate_fn
251 | )
252 |
253 |
254 | # In[8]:
255 |
256 | logger.info('Loading pre-trained model params')
257 | # add a model and load the pre-trained params
258 | model = add_model(args, n_ent, n_rel)
259 | model.to(device)
260 | logger.info('Loading saved model from {0}'.format(model_path))
261 | model_state = model.state_dict()
262 | pre_state = torch.load(model_path)
263 | pretrained = pre_state['state_dict']
264 | for name in model_state:
265 | if name in pretrained:
266 | model_state[name].copy_(pretrained[name])
267 |
268 |
269 | #model.eval()
270 |
271 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr_decay)
272 |
273 | logger.info('----- Training -----')
274 | for epoch in range(args.epochs):
275 | model.train()
276 | train_iterator = BidirectionalOneShotIterator(train_dataloader_lhs, train_dataloader_rhs)
277 | losses = []
278 | for b in range(2*args.num_batches):
279 | optimizer.zero_grad()
280 | batch = next(train_iterator)
281 | e1, rel,label,mode = batch
282 | e1, rel = e1.to(device), rel.to(device)
283 | E1, R = model.forward(e1, rel)
284 | loss_E1 = model.loss(E1, e1) #e1.squeeze(1))
285 | loss_R = model.loss(R, rel) #rel.squeeze(1))
286 | loss = loss_E1 + loss_R
287 |
288 | loss.backward()
289 | optimizer.step()
290 | losses.append(loss.item())
291 | if (b%100 == 0) or (b== (2*args.num_batches-1)):
292 | logger.info('[E:{} | {}]: Train Loss:{:.4}'.format(epoch, b, np.mean(losses)))
293 |
294 | loss = np.mean(losses)
295 | logger.info('[Epoch:{}]: Training Loss:{:.4}\n'.format(epoch, loss))
296 |
297 |
298 | logger.info('Saving trained inverter model')
299 | save_path = 'saved_models/criage_inverter/{0}_{1}.model'.format(args.data, model_name)
300 | state = {
301 | 'state_dict': model.state_dict(),
302 | 'optimizer': optimizer.state_dict(),
303 | 'args': vars(args)
304 | }
305 | torch.save(state, save_path)
306 | logger.info('Saving model to {0}'.format(save_path))
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/criage_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import functional as F, Parameter
3 | from torch.autograd import Variable
4 |
5 |
6 | from torch.nn.init import xavier_normal_, xavier_uniform_
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 |
9 |
10 |
11 | class Distmult(torch.nn.Module):
12 | def __init__(self, args, num_entities, num_relations):
13 | super(Distmult, self).__init__()
14 | self.emb_e = torch.nn.Embedding(num_entities, args.embedding_dim, padding_idx=None)
15 | self.emb_rel = torch.nn.Embedding(num_relations, args.embedding_dim, padding_idx=None)
16 | self.emb_e.weight.requires_grad=False
17 | self.emb_rel.weight.requires_grad=False
18 |
19 | self.linear_t = torch.nn.Linear(args.embedding_dim, args.embedding_dim)
20 | self.linear_rel = torch.nn.Linear(args.embedding_dim, num_relations)
21 | self.linear_e1 = torch.nn.Linear(args.embedding_dim, num_entities)
22 | self.linear_t.weight.requires_grad=True
23 | self.linear_e1.weight.requires_grad=True
24 | self.linear_rel.weight.requires_grad=True
25 | self.hidden_drop = torch.nn.Dropout(args.hidden_drop)
26 |
27 | self.inp_drop = torch.nn.Dropout(args.input_drop)
28 | self.loss = torch.nn.CrossEntropyLoss()
29 |
30 | self.args = args
31 |
32 | def init(self):
33 | xavier_normal(self.emb_e.weight.data)
34 | xavier_normal(self.emb_rel.weight.data)
35 |
36 | def forward(self, e1, rel):
37 | #e1_embedded= self.emb_e(e1)
38 | #rel_embedded= self.emb_rel(rel)
39 | #e1_embedded = e1_embedded.view(-1, args.embedding_dim)
40 | #rel_embedded = rel_embedded.view(-1, args.embedding_dim)
41 |
42 | #pred = e1_embedded*rel_embedded
43 | pred = self.encoder(e1, rel)
44 | return self.decoder(pred)
45 |
46 |
47 | def encoder(self, e1, rel):
48 | e1_embedded= self.emb_e(e1)
49 | rel_embedded= self.emb_rel(rel)
50 | e1_embedded = e1_embedded.squeeze(dim=1)
51 | rel_embedded = rel_embedded.squeeze(dim=1)
52 |
53 | pred = e1_embedded*rel_embedded
54 |
55 | return pred
56 |
57 | def encoder_2(self, e1):
58 | e1_embedded= self.emb_e(e1)
59 | return e1_embedded
60 |
61 | def decoder(self, pred):
62 | pred = self.linear_t(pred)
63 | pred= F.relu(pred)
64 | E1 = self.linear_e1(pred)
65 | R = self.linear_rel(pred)
66 | return E1, R
67 |
68 |
69 | class Conve(torch.nn.Module):
70 | def __init__(self, args, num_entities, num_relations):
71 | super(Conve, self).__init__()
72 | self.emb_e = torch.nn.Embedding(num_entities, args.embedding_dim, padding_idx=None)
73 | self.emb_rel = torch.nn.Embedding(num_relations, args.embedding_dim, padding_idx=None)
74 | self.emb_e.weight.requires_grad = False
75 | self.emb_rel.weight.requires_grad = False
76 |
77 | self.embedding_dim = args.embedding_dim #default is 200
78 | self.num_filters = args.num_filters # default is 32
79 | self.kernel_size = args.kernel_size # default is 3
80 | self.stack_width = args.stack_width # default is 20
81 | self.stack_height = args.embedding_dim // self.stack_width
82 |
83 |
84 | flat_sz_h = int(2*self.stack_width) - self.kernel_size + 1
85 | flat_sz_w = self.stack_height - self.kernel_size + 1
86 | self.flat_sz = flat_sz_h*flat_sz_w*self.num_filters
87 |
88 | self.linear_t = torch.nn.Linear(args.embedding_dim, self.flat_sz)
89 | self.linear_rel = torch.nn.Linear(2*args.embedding_dim, num_relations) # 2* is needed because encoder stacks the embeddings
90 | self.linear_e1 = torch.nn.Linear(2*args.embedding_dim, num_entities)
91 |
92 | self.deconv1= torch.nn.ConvTranspose2d(in_channels =32, out_channels=1, kernel_size =3)
93 |
94 | self.linear_t.weight.requires_grad = True
95 | self.linear_rel.weight.requires_grad = True
96 | self.linear_e1.weight.requires_grad = True
97 | self.deconv1.weight.requires_grad = True
98 |
99 | self.inp_drop = torch.nn.Dropout(args.input_drop)
100 | self.hidden_drop = torch.nn.Dropout(args.hidden_drop)
101 | self.feature_map_drop = torch.nn.Dropout2d(args.feat_drop)
102 | self.loss = torch.nn.CrossEntropyLoss()
103 | #self.loss = torch.nn.BCELoss()
104 | #self.emb_dim1 = args.embedding_shape1
105 | #self.emb_dim2 = args.embedding_dim // self.emb_dim1
106 |
107 | self.conv1 = torch.nn.Conv2d(1, out_channels=self.num_filters,
108 | kernel_size=(self.kernel_size, self.kernel_size),
109 | stride=1, padding=0, bias=args.use_bias)
110 | #self.conv1 = torch.nn.Conv2d(1, 32, (3, 3), 1, 0, bias=args.use_bias)
111 | self.bn0 = torch.nn.BatchNorm2d(1)
112 | self.bn1 = torch.nn.BatchNorm2d(self.num_filters)
113 | self.bn2 = torch.nn.BatchNorm1d(args.embedding_dim)
114 |
115 | self.register_parameter('b', Parameter(torch.zeros(num_entities)))
116 | self.fc = torch.nn.Linear(self.flat_sz,args.embedding_dim)
117 | self.conv1.weight.requires_grad = False
118 | self.fc.weight.requires_grad = False
119 | self.args = args
120 |
121 | def init(self):
122 | xavier_normal_(self.emb_e.weight.data)
123 | xavier_normal_(self.emb_rel.weight.data)
124 |
125 | def forward(self, e1, rel):
126 | x = self.encoder(e1, rel)
127 |
128 | return self.decoder(x)
129 |
130 | def encoder(self, e1, rel):
131 | #e1_embedded= self.emb_e(e1).view(-1, 1, self.emb_dim1, self.emb_dim2)
132 | e1_embedded = self.emb_e(e1).view(-1, 1, self.stack_width, self.stack_height)
133 | #rel_embedded = self.emb_rel(rel).view(-1, 1, self.emb_dim1, self.emb_dim2)
134 | rel_embedded = self.emb_rel(rel).view(-1, 1, self.stack_width, self.stack_height)
135 |
136 | stacked_inputs = torch.cat([e1_embedded, rel_embedded], 2)
137 |
138 | stacked_inputs = self.bn0(stacked_inputs)
139 | #x= self.inp_drop(stacked_inputs)
140 | x = stacked_inputs
141 | #print(x.shape)
142 | x= self.conv1(x)
143 | #print(x.shape)
144 | x= self.bn1(x)
145 | x= F.relu(x)
146 | #x = self.feature_map_drop(x)
147 | x = x.view(x.shape[0], -1)
148 | x = self.fc(x)
149 | #x = self.hidden_drop(x)
150 | x = self.bn2(x)
151 | x = F.relu(x)
152 | #x = torch.mm(x, self.emb_e.weight.transpose(1,0))
153 | #x += self.b.expand_as(x)
154 | #pred = torch.sigmoid(x)
155 |
156 | return x
157 |
158 | def encoder_2(self, e1):
159 | e1_embedded = self.emb_e(e1)
160 | return e1_embedded
161 |
162 | def decoder(self, pred):
163 | if self.args.embedding_dim == 1000:
164 | pred = self.linear_t(pred).view(-1, 32, 38, 48) #I got these reshape values by printing shape after conv in encoder
165 | else:
166 | pred = self.linear_t(pred).view(-1, 32, 38, 8) #I got these reshape values by printing shape after conv in encoder
167 | #print(pred.shape)
168 | pred = self.deconv1(pred)
169 | #print(pred.shape)
170 |
171 | pred = F.relu(pred.view(-1, 2*self.args.embedding_dim))
172 | E1 = self.linear_e1(pred)
173 | R = self.linear_rel(pred)
174 | return E1, R
175 |
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/dataset.py:
--------------------------------------------------------------------------------
1 | '''
2 | 1. Dataset structure -- s,r,one-hot-labels, mode
3 | 2. Dataset class takes in (s,r) pairs,their labels and mode and returns one-hot encoded vectors for labels and s,r as two separate vectors
4 | '''
5 |
6 | from torch.utils.data import Dataset
7 | from typing import Dict, Tuple, List
8 | import numpy as np
9 | import torch
10 |
11 | class TrainDataset(Dataset):
12 | def __init__(self, args, num_ent, sr2o:Dict[Tuple[int, int], List[int]], mode:str):
13 | '''
14 | Input can be sr2o or or2s
15 | Mode is 'lhs' for or2s and 'rhs' for sr2o
16 | '''
17 | self.sr2o = sr2o
18 | self.sr = list(self.sr2o.keys())
19 | self.args = args
20 | self.n_ent = num_ent
21 | self.entities = np.arange(self.n_ent, dtype=np.int32)
22 | self.mode = mode
23 | #mode is not needed for generating data, but needed in data iterator to decide direction for model.forward()
24 |
25 | def __len__(self):
26 | return len(self.sr)
27 |
28 | def __getitem__(self, idx):
29 | sample_key = self.sr[idx]
30 | s,r = int(sample_key[0]), int(sample_key[1])
31 | index_target = np.array(self.sr2o[(s,r)], dtype=np.int32)
32 | sample_label = self.get_label(index_target)
33 | s,r = torch.tensor(sample_key[0], dtype=torch.long), torch.tensor(sample_key[1], dtype=torch.long)
34 | index_target = torch.tensor(index_target, dtype=torch.long)
35 | # label smoothing
36 | if self.args.label_smoothing != 0.0:
37 | sample_label = (1.0 - self.args.label_smoothing)*sample_label + (1.0/self.n_ent)
38 |
39 | return s,r,sample_label, self.mode
40 |
41 |
42 | @staticmethod
43 | def collate_fn(data):
44 | s = torch.stack([_[0] for _ in data], dim=0)
45 | r = torch.stack([_[1] for _ in data], dim=0)
46 | #index_target = torch.stack([_[2] for _ in data], dim=0) #this gives error
47 | label = torch.stack([_[2] for _ in data], dim=0)
48 | mode = data[0][3]
49 |
50 | return s, r,label, mode
51 |
52 | def get_label(self, index_target:List[int]):
53 | # get the multi-one-hot labels from indices
54 | one_hot = np.zeros(self.n_ent, dtype=np.float32)
55 | np.add.at(one_hot, index_target, 1.0)
56 | return torch.FloatTensor(one_hot)
57 |
58 |
59 | class BidirectionalOneShotIterator(object):
60 | def __init__(self, dataloader_lhs, dataloader_rhs):
61 | #self.iterator_lhs = iter(dataloader_lhs)
62 | #self.iterator_rhs = iter(dataloader_rhs)
63 | self.iterator_lhs = self.one_shot_iterator(dataloader_lhs)
64 | self.iterator_rhs = self.one_shot_iterator(dataloader_rhs)
65 | self.step = 0
66 |
67 | def __next__(self):
68 | if self.step % 2 == 0:
69 | data = next(self.iterator_lhs)
70 | else:
71 | data = next(self.iterator_rhs)
72 |
73 | self.step += 1
74 | return data
75 |
76 | def __iter__(self):
77 | return self
78 |
79 | @staticmethod
80 | def one_shot_iterator(dataloader):
81 | '''
82 | Transform a PyTorch Dataloader into python iterator
83 | '''
84 | while True:
85 | for data in dataloader:
86 | yield data
87 |
88 |
89 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/FB15k-237_complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_complex.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/FB15k-237_conve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_conve.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/FB15k-237_distmult.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_distmult.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/FB15k-237_transe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_transe.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/WN18RR_complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_complex.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/WN18RR_conve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_conve.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/WN18RR_distmult.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_distmult.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/elbow_plots/WN18RR_transe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_transe.png
--------------------------------------------------------------------------------
/KGEAttack/ConvE/evaluation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch.autograd import Variable
4 | from sklearn import metrics
5 |
6 | import datetime
7 | from typing import Dict, Tuple, List
8 | import logging
9 | import os
10 | import pickle
11 |
12 | logger = logging.getLogger(__name__) #config already set in main.py
13 |
14 | def get_ranking(model, queries:torch.Tensor, num_rel:int,
15 | filters:Dict[str, Dict[Tuple[int, int], List[int]]],
16 | device: str,
17 | batch_size: int = 500
18 | ):
19 | ranks = []
20 | ranks_lhs = []
21 | ranks_rhs = []
22 | b_begin = 0
23 | #logger.info('Computing ranks for all queries')
24 | while b_begin < len(queries):
25 | b_queries = queries[b_begin : b_begin+batch_size]
26 | s,r,o = b_queries[:,0], b_queries[:,1], b_queries[:,2]
27 | r_rev = r+num_rel
28 | lhs_score = model.forward(o,r_rev, mode='lhs', sigmoid=False) #this gives scores not probabilities
29 | rhs_score = model.forward(s,r, mode='rhs', sigmoid=False) # this gives scores not probabilities
30 |
31 | for i, query in enumerate(b_queries):
32 | filter_lhs = filters['lhs'][(query[2].item(), query[1].item())]
33 | filter_rhs = filters['rhs'][(query[0].item(), query[1].item())]
34 |
35 | # save the prediction that is relevant
36 | target_value1 = rhs_score[i, query[2].item()].item()
37 | target_value2 = lhs_score[i, query[0].item()].item()
38 | # zero all known cases (this are not interesting)
39 | # this corresponds to the filtered setting
40 | lhs_score[i][filter_lhs] = -1e6
41 | rhs_score[i][filter_rhs] = -1e6
42 | # write base the saved values
43 | rhs_score[i][query[2].item()] = target_value1
44 | lhs_score[i][query[0].item()] = target_value2
45 |
46 | # sort and rank
47 | max_values, lhs_sort = torch.sort(lhs_score, dim=1, descending=True) #high scores get low number ranks
48 | max_values, rhs_sort = torch.sort(rhs_score, dim=1, descending=True)
49 |
50 | lhs_sort = lhs_sort.cpu().numpy()
51 | rhs_sort = rhs_sort.cpu().numpy()
52 |
53 | for i, query in enumerate(b_queries):
54 | # find the rank of the target entities
55 | lhs_rank = np.where(lhs_sort[i]==query[0].item())[0][0]
56 | rhs_rank = np.where(rhs_sort[i]==query[2].item())[0][0]
57 |
58 | # rank+1, since the lowest rank is rank 1 not rank 0
59 | ranks_lhs.append(lhs_rank + 1)
60 | ranks_rhs.append(rhs_rank + 1)
61 |
62 | b_begin += batch_size
63 |
64 | #logger.info('Ranking done for all queries')
65 | return ranks_lhs, ranks_rhs
66 |
67 |
68 |
69 | def evaluation(model, queries, to_skip_eval:Dict[str, Dict[Tuple[int, int], List[int]]],
70 | save_name:str, num_rel:int=0, split:str ='test', batch_size:int=500, epoch:int=-1, device:str="cpu"):
71 |
72 |
73 | examples = torch.from_numpy(queries.astype('int64')).to(device)
74 |
75 | #get ranking
76 | ranks_lhs, ranks_rhs = get_ranking(model, examples, num_rel, to_skip_eval, device, batch_size)
77 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs)
78 |
79 | #final logging
80 | hits_at = np.arange(1,11)
81 | hits_at_lhs = list(map(lambda x: np.mean((ranks_lhs <= x), dtype=np.float64).item(),
82 | hits_at))
83 | hits_at_rhs = list(map(lambda x: np.mean((ranks_rhs <= x), dtype=np.float64).item(),
84 | hits_at))
85 | mr_lhs = np.mean(ranks_lhs, dtype=np.float64).item()
86 | mr_rhs = np.mean(ranks_rhs, dtype=np.float64).item()
87 |
88 | mrr_lhs = np.mean(1. / ranks_lhs, dtype=np.float64).item()
89 | mrr_rhs = np.mean(1. / ranks_rhs, dtype=np.float64).item()
90 |
91 |
92 | logger.info('')
93 | logger.info('-'*50)
94 | logger.info(split+'_'+save_name)
95 | logger.info('-'*50)
96 | logger.info('')
97 | for i in hits_at:
98 | logger.info('Hits left @{0}: {1}'.format(i, hits_at_lhs[i-1]))
99 | logger.info('Hits right @{0}: {1}'.format(i, hits_at_rhs[i-1]))
100 | logger.info('Hits @{0}: {1}'.format(i, np.mean([hits_at_lhs[i-1],hits_at_rhs[i-1]]).item()))
101 | logger.info('Mean rank lhs: {0}'.format( mr_lhs))
102 | logger.info('Mean rank rhs: {0}'.format(mr_rhs))
103 | logger.info('Mean rank: {0}'.format( np.mean([mr_lhs, mr_rhs])))
104 | logger.info('Mean reciprocal rank lhs: {0}'.format( mrr_lhs))
105 | logger.info('Mean reciprocal rank rhs: {0}'.format( mrr_rhs))
106 | logger.info('Mean reciprocal rank: {0}'.format(np.mean([mrr_rhs, mrr_lhs])))
107 |
108 | with open(os.path.join('results', split + '_' + save_name + '.txt'), 'a') as text_file:
109 | text_file.write('Epoch: {0}\n'.format(epoch))
110 | text_file.write('Lhs denotes ranking by subject corruptions \n')
111 | text_file.write('Rhs denotes ranking by object corruptions \n')
112 | for i in hits_at:
113 | text_file.write('Hits left @{0}: {1}\n'.format(i, hits_at_lhs[i-1]))
114 | text_file.write('Hits right @{0}: {1}\n'.format(i, hits_at_rhs[i-1]))
115 | text_file.write('Hits @{0}: {1}\n'.format(i, np.mean([hits_at_lhs[i-1],hits_at_rhs[i-1]]).item()))
116 | text_file.write('Mean rank lhs: {0}\n'.format( mr_lhs))
117 | text_file.write('Mean rank rhs: {0}\n'.format(mr_rhs))
118 | text_file.write('Mean rank: {0}\n'.format( np.mean([mr_lhs, mr_rhs])))
119 | text_file.write('MRR lhs: {0}\n'.format( mrr_lhs))
120 | text_file.write('MRR rhs: {0}\n'.format(mrr_rhs))
121 | text_file.write('MRR: {0}\n'.format(np.mean([mrr_rhs, mrr_lhs])))
122 | text_file.write('-------------------------------------------------\n')
123 |
124 |
125 | results = {}
126 | for i in hits_at:
127 | results['hits_lhs@{}'.format(i)] = hits_at_lhs[i-1]
128 | results['hits_rhs@{}'.format(i)] = hits_at_rhs[i-1]
129 | results['mrr_lhs'] = mrr_lhs
130 | results['mrr_rhs'] = mrr_rhs
131 | results['mr_lhs'] = mr_lhs
132 | results['mr_rhs'] = mr_rhs
133 |
134 | return results
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/inst_add_attack.py:
--------------------------------------------------------------------------------
1 | ### Add triples based on IF triple, chosen as instance similarity - cos, dot, l2
2 | ### In inference attacks, 2 additions are selected to decrease the target triple's ranks on s-side and o-side
3 | ### But in attribution attack, target triple's rank is reduced (on both s-side and o-side) by selecting the IF triple and adding its corrupted version
4 | ### Thus, to integrate Attribution attacks here, I am selecting two IF triples in the neighbourhood and adding their corrputed versions as 2 adversarial additions. Perhaps another version to experiment would be to select the IF triples for s-side and o-side ranks separately and then add their corrupted versions as adversarial additions - the final edits would then be of the form (test_s, test_r', test_o') for o-side, and (test_s', test_r', test_o) for s-side ranks.
5 |
6 | import pickle
7 | from typing import Dict, Tuple, List
8 | import os
9 | import numpy as np
10 | import pandas as pd
11 | from collections import defaultdict
12 | import operator
13 |
14 | import json
15 | import logging
16 | import argparse
17 | import math
18 | from pprint import pprint
19 | import errno
20 | import time
21 |
22 | import torch
23 | from torch.utils.data import DataLoader
24 | import torch.backends.cudnn as cudnn
25 | from torch import nn
26 | from torch.nn import CrossEntropyLoss
27 | from torch.nn import functional as F
28 | import torch.autograd as autograd
29 |
30 | from evaluation import evaluation
31 | from model import Distmult, Complex, Conve, Transe
32 | import utils
33 |
34 | def get_if_triple(test_trip, nghbr_trip, model, attack_batch_size, simmetric):
35 | test_trip = test_trip[None, :] # add a batch dimension
36 | test_trip = torch.from_numpy(test_trip).to(device)
37 | test_s, test_r, test_o = test_trip[:,0], test_trip[:,1], test_trip[:,2]
38 | test_vec = model.score_triples_vec(test_s, test_r, test_o)
39 |
40 | b_begin = 0
41 | nghbr_sim = []
42 | if attack_batch_size == -1:
43 | nghbr_batch = nghbr_trip.shape[0]
44 | else:
45 | nghbr_batch = args.attack_batch_size
46 |
47 | while b_begin < nghbr_trip.shape[0]:
48 | b_nghbr_trip = nghbr_trip[b_begin : b_begin+nghbr_batch]
49 | b_nghbr_trip = torch.from_numpy(b_nghbr_trip).to(device)
50 | b_nghbr_s, b_nghbr_r, b_nghbr_o = b_nghbr_trip[:,0], b_nghbr_trip[:,1], b_nghbr_trip[:,2]
51 | b_nghbr_vec = model.score_triples_vec(b_nghbr_s, b_nghbr_r, b_nghbr_o)
52 | # shape of nghbr_vec is (num_nghbrs x emb_dim) e.g. (459 x 100)
53 | # shape of test vec is (1 x emb_dim)
54 | if simmetric == 'l2':
55 | b_sim = -torch.norm((b_nghbr_vec-test_vec), p=2, dim=-1)
56 | elif simmetric == 'dot':
57 | b_sim = torch.matmul(b_nghbr_vec, test_vec.t())
58 | else: ##cos
59 | b_sim = F.cosine_similarity(test_vec, b_nghbr_vec) #default dim=1
60 |
61 | b_sim = b_sim.detach().cpu().numpy().tolist()
62 | nghbr_sim += b_sim
63 | b_begin += nghbr_batch
64 |
65 | nghbr_sim = np.array(nghbr_sim)
66 | nghbr_sim = torch.from_numpy(nghbr_sim).to(device)
67 | # we want to remove the neighbour with maximum cosine similarity
68 | max_values, argsort = torch.sort(nghbr_sim, -1, descending=True)
69 | del_idx_1, del_idx_2 = argsort[0], argsort[1]
70 |
71 | return del_idx_1, del_idx_2
72 |
73 |
74 |
75 | def get_additions(train_data, test_data, neighbours, model, attack_batch_size, simmetric):
76 | logger.info('------ Generating edits per target triple ------')
77 | start_time = time.time()
78 | logger.info('Start time: {0}'.format(str(start_time)))
79 |
80 | if args.model == 'complex':
81 | ent_emb = torch.cat((model.emb_e_real.weight, model.emb_e_img.weight), dim=-1)
82 | rel_emb = torch.cat((model.emb_rel_real.weight, model.emb_rel_img.weight), dim=-1)
83 | else:
84 | ent_emb = model.emb_e.weight
85 | rel_emb = model.emb_rel.weight
86 |
87 |
88 | triples_to_delete = []
89 | triples_to_add = []
90 | summary_dict = {}
91 | for test_idx, test_trip in enumerate(test_data):
92 | test_nghbrs = neighbours[test_idx]
93 | nghbr_trip = train_data[test_nghbrs]
94 | del_idx_1, del_idx_2 = get_if_triple(test_trip, nghbr_trip, model, attack_batch_size, simmetric)
95 | if_trips = [nghbr_trip[del_idx_1], nghbr_trip[del_idx_2]]
96 |
97 | test_trip = torch.from_numpy(test_trip).to(device)[None,:]
98 | test_s, test_r, test_o = test_trip[:,0], test_trip[:,1], test_trip[:,2]
99 |
100 | summary_list = []
101 | summary_list.append(list(map(int, [test_s.item(),test_r.item(),test_o.item()])))
102 |
103 | for if_trip in if_trips:
104 | if_trip = torch.from_numpy(if_trip).to(device)[None,:]
105 | if_s, if_r, if_o = if_trip[:,0], if_trip[:,1], if_trip[:,2]
106 |
107 | if (if_o == test_s or if_o == test_o):
108 | # object of IF triple is neighbour - edit will be [s_dash, if_r, if_o]
109 | if args.model == 'complex':
110 | if_s_emb = torch.cat((model.emb_e_real(if_s), model.emb_e_img(if_s)), dim=-1).squeeze(dim=1)
111 | else:
112 | if_s_emb = model.emb_e(if_s).squeeze(dim=1)
113 | cos_sim_s = F.cosine_similarity(if_s_emb, ent_emb)
114 | #cos_sim_r = F.cosine_similarity(if_r_emb, rel_emb)
115 |
116 | # filter for (s_dash, r, o), i.e. ignore s_dash that already exist
117 | filter_s = train_data[np.where((train_data[:,2] == if_o.item())
118 | & (train_data[:,1] == if_r.item())), 0].squeeze()
119 | #filter_r = train_data[np.where((train_data[:,0] == if_s.item())
120 | # & (train_data[:,2] == if_o.item())), 1].squeeze()
121 | cos_sim_s[filter_s] = 1e6
122 | #cos_sim_r[filter_r] = 1e6
123 |
124 | # sort and rank - smallest cosine similarity means largest cosine distance
125 | # Hence, corrupted entity = one with smallest cos similarity
126 | min_values_s, argsort_s = torch.sort(cos_sim_s, -1, descending=False)
127 | #min_values_r, argsort_r = torch.sort(cos_sim_r, -1, descending=False)
128 | s_dash = argsort_s[0][None, None]
129 | #r_dash = argsort_r[0][None, None]
130 |
131 | add_trip = [s_dash.item(), if_r.item(), if_o.item()]
132 |
133 | elif (if_s == test_s or if_s == test_o):
134 | #print('s is neighbour')
135 | # subject of IF triple is neighbour - edit will be [if_s, if_r, o_dash]
136 | if args.model == 'complex':
137 | if_o_emb = torch.cat((model.emb_e_real(if_o), model.emb_e_img(if_o)), dim=-1).squeeze(dim=1)
138 | else:
139 | if_o_emb = model.emb_e(if_o).squeeze(dim=1)
140 | #if_r_emb = model.emb_rel(if_r).squeeze(dim=1)
141 | cos_sim_o = F.cosine_similarity(if_o_emb, ent_emb)
142 | #cos_sim_r = F.cosine_similarity(if_r_emb, rel_emb)
143 |
144 | # filter for (s, r, o_dash), i.e. ignore o_dash that already exist
145 | filter_o = train_data[np.where((train_data[:,0] == if_s.item())
146 | & (train_data[:,1] == if_r.item())), 2].squeeze()
147 | #filter_r = train_data[np.where((train_data[:,0] == if_s.item())
148 | # & (train_data[:,2] == if_o.item())), 1].squeeze()
149 | cos_sim_o[filter_o] = 1e6
150 | #cos_sim_r[filter_r] = 1e6
151 |
152 | # sort and rank - smallest cosine similarity means largest cosine distance
153 | # Hence, corrupted entity = one with smallest cos similarity
154 | min_values_o, argsort_o = torch.sort(cos_sim_o, -1, descending=False)
155 | #min_values_r, argsort_r = torch.sort(cos_sim_r, -1, descending=False)
156 | o_dash = argsort_o[0][None, None]
157 | #r_dash = argsort_r[0][None, None]
158 |
159 | add_trip = [if_s.item(), if_r.item(), o_dash.item()]
160 |
161 | else:
162 | logger.info('Unexpected behaviour')
163 |
164 | triples_to_delete.append(if_trip)
165 | triples_to_add.append(add_trip)
166 | summary_list.append(list(map(int, add_trip)))
167 |
168 | summary_dict[test_idx] = summary_list
169 | if test_idx%100 == 0 or test_idx == test_data.shape[0]-1:
170 | logger.info('Processed test triple {0}'.format(str(test_idx)))
171 | logger.info('Time taken: {0}'.format(str(time.time() - start_time)))
172 | logger.info('Time taken to generate edits: {0}'.format(str(time.time() - start_time)))
173 |
174 | return triples_to_delete, triples_to_add, summary_dict
175 |
176 | if __name__ == '__main__':
177 |
178 |
179 | parser = utils.get_argument_parser()
180 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1')
181 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side')
182 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment')
183 | parser.add_argument('--attack-batch-size', type=int, default=-1, help='Batch size for processing neighbours of target')
184 |
185 | parser.add_argument('--sim-metric', type=str, default='cos', help='Similarity metric for the attribution attack - cos, dot, l2')
186 |
187 | args = parser.parse_args()
188 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
189 | args.device = device
190 |
191 |
192 | #args.target_split = 1 # which target split to use
193 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100.
194 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side
195 | #args.rand_run = 1 # a number assigned to the random run of the experiment
196 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17
197 |
198 | if args.reproduce_results:
199 | args = utils.set_hyperparams(args)
200 |
201 |
202 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html
203 | torch.manual_seed(args.seed)
204 | cudnn.deterministic = True
205 | cudnn.benchmark = False
206 | np.random.seed(args.seed)
207 | rng = np.random.default_rng(seed=args.seed)
208 |
209 |
210 | args.epochs = -1 #no training here
211 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop)
212 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name)
213 | log_path = 'logs/attack_logs/inst_add_{5}/{0}_{1}_{2}_{3}_{4}'.format( args.model, args.data,
214 | args.target_split, args.budget, args.rand_run, args.sim_metric)
215 |
216 |
217 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
218 | datefmt = '%m/%d/%Y %H:%M:%S',
219 | level = logging.INFO,
220 | filename = log_path
221 | )
222 | logger = logging.getLogger(__name__)
223 |
224 |
225 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split)
226 |
227 | n_ent, n_rel, ent_to_id, rel_to_id = utils.generate_dicts(data_path)
228 |
229 | ##### load data####
230 | data = utils.load_data(data_path)
231 | train_data, valid_data, test_data = data['train'], data['valid'], data['test']
232 |
233 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb')
234 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f)
235 | inp_f.close()
236 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()}
237 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()}
238 |
239 |
240 | model = utils.load_model(model_path, args, n_ent, n_rel, device)
241 |
242 | neighbours = utils.generate_nghbrs(test_data, train_data)
243 | # test set is the target set because we loaded data from target_...
244 |
245 | if_triples, triples_to_add, summary_dict = get_additions(train_data, test_data, neighbours, model, args.attack_batch_size, args.sim_metric)
246 |
247 | triples_to_add = np.asarray(triples_to_add)
248 | if_triples = np.asarray(if_triples)
249 |
250 | new_train_1 = np.concatenate((triples_to_add, train_data))
251 |
252 | logger.info ('Length of original training set: ' + str(train_data.shape[0]))
253 | logger.info ('Length of new poisoned training set: ' + str(new_train_1.shape[0]))
254 |
255 | df = pd.DataFrame(new_train_1)
256 | df = df.drop_duplicates()
257 | new_train = df.values
258 | #new_train = new_train_1
259 |
260 |
261 | logger.info ('Length of original training set: ' + str(train_data.shape[0]))
262 | logger.info ('Length of new poisoned training set: ' + str(new_train.shape[0]))
263 |
264 | num_en_or = np.unique(np.concatenate((train_data[:,0], train_data[:,2]))).shape[0]
265 | num_en_pos = np.unique(np.concatenate((new_train[:,0], new_train[:,2]))).shape[0]
266 |
267 | save_path = 'data/inst_add_{5}_{0}_{1}_{2}_{3}_{4}'.format( args.model, args.data, args.target_split, args.budget, args.rand_run,
268 | args.sim_metric
269 | )
270 | try :
271 | os.makedirs(save_path)
272 | except OSError as e:
273 | if e.errno == errno.EEXIST:
274 | logger.info(e)
275 | logger.info('Using the existing folder {0} for processed data'.format(save_path))
276 | else:
277 | raise
278 |
279 |
280 | with open(os.path.join(save_path, 'train.txt'), 'w') as out:
281 | for item in new_train:
282 | out.write("%s\n" % "\t".join(map(str, item)))
283 |
284 | out = open(os.path.join(save_path, 'train.pickle'), 'wb')
285 | pickle.dump(new_train.astype('uint64'), out)
286 | out.close()
287 |
288 |
289 | with open(os.path.join(save_path, 'entities_dict.json'), 'w') as f:
290 | f.write(json.dumps(ent_to_id) + '\n')
291 |
292 | with open(os.path.join(save_path, 'relations_dict.json'), 'w') as f:
293 | f.write(json.dumps(rel_to_id) + '\n')
294 |
295 | with open(os.path.join(save_path, 'valid.txt'), 'w') as out:
296 | for item in valid_data:
297 | out.write("%s\n" % "\t".join(map(str, item)))
298 |
299 | out = open(os.path.join(save_path, 'valid.pickle'), 'wb')
300 | pickle.dump(valid_data.astype('uint64'), out)
301 | out.close()
302 |
303 | with open(os.path.join(save_path, 'test.txt'), 'w') as out:
304 | for item in test_data:
305 | out.write("%s\n" % "\t".join(map(str, item)))
306 |
307 | out = open(os.path.join(save_path, 'test.pickle'), 'wb')
308 | pickle.dump(test_data.astype('uint64'), out)
309 | out.close()
310 |
311 | with open(os.path.join(save_path, 'influential_triples.txt'), 'w') as out:
312 | for item in if_triples:
313 | out.write("%s\n" % "\t".join(map(str, item)))
314 |
315 | with open(os.path.join(save_path, 'adversarial_additions.txt'), 'w') as out:
316 | for item in triples_to_add:
317 | out.write("%s\n" % "\t".join(map(str, item)))
318 |
319 | with open(os.path.join(save_path, 'summary_edits.json'), 'w') as out:
320 | out.write(json.dumps(summary_dict) + '\n')
321 |
322 | with open(os.path.join(save_path, 'stats.txt'), 'w') as f:
323 | f.write('Length of original training set: {0} \n'. format(train_data.shape[0]))
324 | f.write('Length of new poisoned training set: {0} \n'. format(new_train.shape[0]))
325 | f.write('Length of new poisoned training set including duplicates: {0} \n'. format(new_train_1.shape[0]))
326 | f.write('Number of entities in original training set: {0} \n'. format(num_en_or))
327 | f.write('Number of entities in poisoned training set: {0} \n'. format(num_en_pos))
328 | f.write('Length of original test set: {0} \n'. format(test_data.shape[0]))
329 | f.write('Number of triples addded : {0}\n'.format(triples_to_add.shape[0]))
330 | #f.write('Number of triples added from corrupting o_side: {0} (o_dash, r, s)\n'. format(trips_to_add_o.shape[0]))
331 | #f.write('Number of triples added from corrupting s_side: {0} (o, r, s_dash)\n'. format(trips_to_add_s.shape[0]))
332 | #f.write('In this version, I use reciprocal embedding and its inverse to select (o, r, s_dash)\n')
333 | f.write('Instance Attribution Attacks - This attack version is generated uses similarity metric: {0} \n'.format(args.sim_metric))
334 | #f.write('Flag value for maximizing soft truth (If False, minimize): {0}\n' .format(maximize))
335 | f.write('---------------------------------------------------------------------- \n')
336 |
337 |
338 |
339 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/preprocess.py:
--------------------------------------------------------------------------------
1 | '''
2 | 1. Read the string data and generate dictionaries
3 | 2. Convert string data to processed data and save it along with dictionaries
4 | '''
5 |
6 | import numpy as np
7 | import sys
8 | import os
9 | import errno
10 | import json
11 | import pandas as pd
12 | import pickle
13 |
14 |
15 | if len(sys.argv) > 1:
16 | dataset_name = sys.argv[1] # name of dataset
17 | else:
18 | #dataset_name = 'FB15k-237'
19 | #dataset_name = 'YAGO3-10'
20 | #dataset_name = 'WN18'
21 | #dataset_name = 'FB15k'
22 | dataset_name = 'WN18RR'
23 |
24 | seed = 345345
25 | np.random.seed(seed)
26 | rdm = np.random.RandomState(seed)
27 | rng = np.random.default_rng(seed)
28 |
29 | base_path = 'data/{0}_original/'.format(dataset_name)
30 | #processed_path = 'data/processed_{0}'.format(dataset_name)
31 | processed_path = 'data/{0}'.format(dataset_name)
32 | files = ['train', 'valid', 'test']
33 |
34 | def _load_data(file_path):
35 | df = pd.read_csv(file_path, sep='\t', header=None, names=None, dtype=str)
36 | df = df.drop_duplicates()
37 | return df.values
38 |
39 | def generate_ids():
40 | complete_data = []
41 | for file in files:
42 | file_path = os.path.join(base_path, file+'.txt')
43 | complete_data.append(_load_data(file_path))
44 |
45 | complete_data = np.concatenate(complete_data)
46 | unique_ent = np.unique(np.concatenate((complete_data[:,0], complete_data[:,2])))
47 | unique_rel = np.unique(complete_data[:,1])
48 |
49 | entities_to_id = {x:i for (i,x) in enumerate(sorted(unique_ent))}
50 | rel_to_id = {x:i for (i,x) in enumerate(sorted(unique_rel))}
51 |
52 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel)))
53 |
54 | return unique_ent, unique_rel, entities_to_id, rel_to_id
55 |
56 | def generate_ids_from_train():
57 | file_path = os.path.join(base_path, 'train.txt')
58 | X_train = _load_data(file_path)
59 | #complete_dataset = np.concatenate(complete_dataset)
60 | unique_ent = np.unique(np.concatenate((X_train[:, 0], X_train[:, 2])))
61 | unique_rel = np.unique(X_train[:, 1])
62 |
63 | entities_to_id = {x: i for (i, x) in enumerate(sorted(unique_ent))}
64 | rel_to_id = {x: i for (i, x) in enumerate(sorted(unique_rel))}
65 |
66 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel)))
67 |
68 | return unique_ent, unique_rel, entities_to_id, rel_to_id
69 |
70 |
71 | def process_and_save(entities_to_id, relations_to_id, unique_ent):
72 | try :
73 | os.makedirs(processed_path)
74 | except OSError as e:
75 | if e.errno == errno.EEXIST:
76 | print(e)
77 | print('Using the existing folder {0} for processed data'.format(processed_path))
78 | else:
79 | raise
80 |
81 | with open(os.path.join(processed_path, 'dataset_stats.txt'), 'w') as file:
82 | file.write("{}: {} entities and {} relations \n".format(dataset_name, len(unique_ent), len(unique_rel)))
83 |
84 | # function to filter out triples with unseen entities
85 | def _filter_unseen_entities(x):
86 | ent_seen = unique_ent
87 | df = pd.DataFrame(x, columns=['s', 'p', 'o'])
88 | filtered_df = df[df.s.isin(ent_seen) & df.o.isin(ent_seen)]
89 | n_removed_ents = df.shape[0] - filtered_df.shape[0]
90 | return filtered_df.values, n_removed_ents
91 |
92 |
93 | for f in files:
94 | file_path = os.path.join(base_path, f+'.txt')
95 | x = _load_data(file_path)
96 | x, n_removed_ents = _filter_unseen_entities(x) # filter unseen entities if any
97 | if n_removed_ents > 0:
98 | msg = '{0}: {1} split: Removed {2} triples containing unseen entities. \n'.format(dataset_name, f, n_removed_ents)
99 | with open(os.path.join(processed_path, 'dataset_stats.txt'), 'a') as file:
100 | file.write(msg)
101 | print(msg)
102 | x_idx_s = np.vectorize(entities_to_id.get)(x[:, 0])
103 | x_idx_p = np.vectorize(relations_to_id.get)(x[:, 1])
104 | x_idx_o = np.vectorize(entities_to_id.get)(x[:, 2])
105 |
106 | x = np.dstack([x_idx_s, x_idx_p, x_idx_o]).reshape((-1, 3))
107 |
108 | with open(os.path.join(processed_path, f+'.txt'), 'w') as out:
109 | for item in x:
110 | out.write("%s\n" % "\t".join(map(str, item)))
111 |
112 | out = open(os.path.join(processed_path, f+'.pickle'), 'wb')
113 | pickle.dump(x.astype('uint64'), out)
114 | out.close()
115 |
116 | return
117 |
118 |
119 | filter_unseen = True # if needed, pass as cmd argument
120 | if filter_unseen:
121 | unique_ent, unique_rel, entities_to_id, rel_to_id = generate_ids_from_train()
122 | else:
123 | unique_ent, unique_rel, entities_to_id, rel_to_id = generate_ids()
124 |
125 | n_relations = len(unique_rel)
126 | n_entities = len(unique_ent)
127 |
128 |
129 | process_and_save(entities_to_id, rel_to_id, unique_ent)
130 |
131 |
132 | with open(os.path.join(processed_path, 'entities_dict.json'), 'w') as f:
133 | f.write(json.dumps(entities_to_id) + '\n')
134 |
135 | with open(os.path.join(processed_path, 'relations_dict.json'), 'w') as f:
136 | f.write(json.dumps(rel_to_id) + '\n')
137 |
138 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel)))
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/select_targets.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from typing import Dict, Tuple, List
3 | import os
4 | import numpy as np
5 | import json
6 | import torch
7 | import logging
8 | import argparse
9 | import math
10 | from pprint import pprint
11 |
12 | import torch
13 | from torch.utils.data import DataLoader
14 | import torch.backends.cudnn as cudnn
15 |
16 | from dataset import TrainDataset, BidirectionalOneShotIterator
17 | from evaluation import evaluation
18 | from model import Distmult, Complex, Conve, Transe
19 |
20 |
21 |
22 | def add_arguments():
23 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs')
24 |
25 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237')
26 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}')
27 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations')
28 |
29 |
30 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0')
31 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2')
32 |
33 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)')
34 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1
35 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0')
36 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings')
37 |
38 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200?
39 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)')
40 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)')
41 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4')
42 |
43 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200')
44 |
45 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20')
46 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10')
47 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.')
48 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.')
49 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.')
50 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution')
51 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution')
52 |
53 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True')
54 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1')
55 |
56 |
57 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2?
58 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3')
59 |
60 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.')
61 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model')
62 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)')
63 |
64 | return parser
65 |
66 | def set_paths(args):
67 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop)
68 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name)
69 | eval_name = '{0}_{1}_{2}_{3}_{4}_{5}'.format(args.data, model_name, args.num_batches, args.epochs, args.valid_batch_size, args.test_batch_size)
70 | log_path = 'logs/select_target_{0}_{1}_{2}_{3}_{4}.log'.format(args.data, args.target_split, model_name, args.num_batches, args.epochs)
71 |
72 | return model_name, model_path, eval_name, log_path
73 |
74 | def generate_dicts(data_path):
75 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f:
76 | ent_to_id = json.load(f)
77 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f:
78 | rel_to_id = json.load(f)
79 | n_ent = len(list(ent_to_id.keys()))
80 | n_rel = len(list(rel_to_id.keys()))
81 |
82 | return n_ent, n_rel, ent_to_id, rel_to_id
83 |
84 | def load_data(data_path):
85 | data = {}
86 | for split in ['train', 'valid', 'test']:
87 | inp_f = open(os.path.join(data_path, split+'.pickle'), 'rb')
88 | data[split] = np.array(pickle.load(inp_f))
89 | inp_f.close()
90 |
91 | return data
92 |
93 | def add_model(args, n_ent, n_rel):
94 | if args.add_reciprocals:
95 | if args.model is None:
96 | model = Conve(args, n_ent, 2*n_rel)
97 | elif args.model == 'conve':
98 | model = Conve(args, n_ent, 2*n_rel)
99 | elif args.model == 'distmult':
100 | model = Distmult(args, n_ent, 2*n_rel)
101 | elif args.model == 'complex':
102 | model = Complex(args, n_ent, 2*n_rel)
103 | elif args.model == 'transe':
104 | model = Transe(args, n_ent, 2*n_rel)
105 | else:
106 | logger.info('Unknown model: {0}', args.model)
107 | raise Exception("Unknown model!")
108 | else:
109 | if args.model is None:
110 | model = Conve(args, n_ent, n_rel)
111 | elif args.model == 'conve':
112 | model = Conve(args, n_ent, n_rel)
113 | elif args.model == 'distmult':
114 | model = Distmult(args, n_ent, n_rel)
115 | elif args.model == 'complex':
116 | model = Complex(args, n_ent, n_rel)
117 | elif args.model == 'transe':
118 | model = Transe(args, n_ent, n_rel)
119 | else:
120 | logger.info('Unknown model: {0}', args.model)
121 | raise Exception("Unknown model!")
122 |
123 | #model.to(self.device)
124 | return model
125 |
126 | def get_ranking(model, queries:torch.Tensor, num_rel:int,
127 | filters:Dict[str, Dict[Tuple[int, int], List[int]]],
128 | device: str,
129 | batch_size: int = 500
130 | ):
131 | ranks = []
132 | ranks_lhs = []
133 | ranks_rhs = []
134 | b_begin = 0
135 | #logger.info('Computing ranks for all queries')
136 | while b_begin < len(queries):
137 | b_queries = queries[b_begin : b_begin+batch_size]
138 | s,r,o = b_queries[:,0], b_queries[:,1], b_queries[:,2]
139 | r_rev = r+num_rel
140 | lhs_score = model.forward(o,r_rev, mode='lhs', sigmoid=False) #this gives scores not probabilities
141 | rhs_score = model.forward(s,r, mode='rhs', sigmoid=False) # this gives scores not probabilities
142 |
143 | for i, query in enumerate(b_queries):
144 | filter_lhs = filters['lhs'][(query[2].item(), query[1].item())]
145 | filter_rhs = filters['rhs'][(query[0].item(), query[1].item())]
146 |
147 | # save the prediction that is relevant
148 | target_value1 = rhs_score[i, query[2].item()].item()
149 | target_value2 = lhs_score[i, query[0].item()].item()
150 | # zero all known cases (this are not interesting)
151 | # this corresponds to the filtered setting
152 | lhs_score[i][filter_lhs] = -1e6
153 | rhs_score[i][filter_rhs] = -1e6
154 | # write base the saved values
155 | rhs_score[i][query[2].item()] = target_value1
156 | lhs_score[i][query[0].item()] = target_value2
157 |
158 | # sort and rank
159 | max_values, lhs_sort = torch.sort(lhs_score, dim=1, descending=True) #high scores get low number ranks
160 | max_values, rhs_sort = torch.sort(rhs_score, dim=1, descending=True)
161 |
162 | lhs_sort = lhs_sort.cpu().numpy()
163 | rhs_sort = rhs_sort.cpu().numpy()
164 |
165 | for i, query in enumerate(b_queries):
166 | # find the rank of the target entities
167 | lhs_rank = np.where(lhs_sort[i]==query[0].item())[0][0]
168 | rhs_rank = np.where(rhs_sort[i]==query[2].item())[0][0]
169 |
170 | # rank+1, since the lowest rank is rank 1 not rank 0
171 | ranks_lhs.append(lhs_rank + 1)
172 | ranks_rhs.append(rhs_rank + 1)
173 |
174 | b_begin += batch_size
175 |
176 | #logger.info('Ranking done for all queries')
177 | return ranks_lhs, ranks_rhs
178 |
179 |
180 |
181 |
182 | if __name__ == '__main__':
183 | parser = add_arguments()
184 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1')
185 | args = parser.parse_args()
186 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
187 |
188 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html
189 | torch.manual_seed(args.seed)
190 | cudnn.deterministic = True
191 | cudnn.benchmark = False
192 | np.random.seed(args.seed)
193 | rng = np.random.default_rng(seed=args.seed)
194 |
195 | args.epochs = -1 #no training here
196 | model_name, model_path, eval_name, log_path = set_paths(args)
197 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
198 | datefmt = '%m/%d/%Y %H:%M:%S',
199 | level = logging.INFO,
200 | filename = log_path
201 | )
202 | logger = logging.getLogger(__name__)
203 |
204 |
205 | data_path = 'data/{0}'.format(args.data)
206 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path)
207 |
208 | ##### load data####
209 | data = load_data(data_path)
210 | train_data, valid_data, test_data = data['train'], data['valid'], data['test']
211 |
212 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb')
213 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f)
214 | inp_f.close()
215 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()}
216 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()}
217 |
218 | # add a model and load the pre-trained params
219 | model = add_model(args, n_ent, n_rel)
220 | model.to(device)
221 | logger.info('Loading saved model from {0}'.format(model_path))
222 | state = torch.load(model_path)
223 | model_params = state['state_dict']
224 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()]
225 | for key, size, count in params:
226 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count))
227 |
228 | model.load_state_dict(model_params)
229 | model.eval()
230 |
231 | with torch.no_grad():
232 | target_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split)
233 |
234 | # generate ranks for test set
235 | logger.info('Generating target set from test set')
236 | test_data = torch.from_numpy(test_data.astype('int64')).to(device)
237 | if args.add_reciprocals:
238 | num_rel= n_rel
239 | else:
240 | num_rel = 0
241 | ranks_lhs, ranks_rhs = get_ranking(model, test_data, num_rel, to_skip_eval, device, args.test_batch_size)
242 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs)
243 | #indices_lhs, indices_rhs = np.asarray(ranks_lhs <= 10).nonzero(), np.asarray(ranks_rhs <= 10).nonzero()
244 | if args.target_split == 2:
245 | indices = np.asarray(((ranks_lhs <= 100) & (ranks_lhs >10)) & ((ranks_rhs <= 100)&(ranks_rhs > 10))).nonzero()
246 | elif args.target_split ==1 :
247 | indices = np.asarray((ranks_lhs <= 10) & (ranks_rhs <= 10)).nonzero()
248 | else:
249 | logger.info('Unknown Target Split: {0}', self.args.target_split)
250 | raise Exception("Unknown target split!")
251 |
252 | test_data = test_data.cpu().numpy()
253 | #targets_lhs, targets_rhs = test_data[indices_lhs], test_data[indices_rhs]
254 | targets = test_data[indices]
255 | logger.info('Number of targets generated: {0}'.format(targets.shape[0]))
256 | #save eval for selected targets
257 | split = 'target_{0}'.format(args.target_split)
258 |
259 | results_target = evaluation(model, targets, to_skip_eval, eval_name, num_rel, split, args.test_batch_size, -1, device)
260 | # save target set
261 |
262 | with open(os.path.join(target_path, 'target.txt'), 'w') as out:
263 | for item in targets:
264 | out.write("%s\n" % "\t".join(map(str, item)))
265 | with open(os.path.join(target_path, 'test.txt'), 'w') as out:
266 | for item in targets:
267 | out.write("%s\n" % "\t".join(map(str, item)))
268 |
269 | # use the valid set to generate non-target set
270 | logger.info('Generating non target set from valid set')
271 | valid_data = torch.from_numpy(valid_data.astype('int64')).to(device)
272 | if args.add_reciprocals:
273 | num_rel= n_rel
274 | else:
275 | num_rel = 0
276 | ranks_lhs, ranks_rhs = get_ranking(model, valid_data, num_rel, to_skip_eval, device, args.valid_batch_size)
277 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs)
278 | if args.target_split == 2:
279 | indices = np.asarray(((ranks_lhs <= 100) & (ranks_lhs >10)) & ((ranks_rhs <= 100)&(ranks_rhs > 10))).nonzero()
280 | elif args.target_split == 1:
281 | indices = np.asarray((ranks_lhs <= 10) & (ranks_rhs <= 10)).nonzero()
282 | else:
283 | logger.info('Unknown Target Split: {0}', self.args.target_split)
284 | raise Exception("Unknown target split!")
285 |
286 | valid_data = valid_data.cpu().numpy()
287 | non_targets = valid_data[indices]
288 | logger.info('Number of non targets generated: {0}'.format(non_targets.shape[0]))
289 | #save eval for selected non targets
290 | split = 'non_target_{0}'.format(args.target_split)
291 |
292 | results_ntarget = evaluation(model, non_targets, to_skip_eval, eval_name, num_rel, split, args.valid_batch_size, -1, device)
293 | # save non target set and valid set both - eval needed for both
294 | with open(os.path.join(target_path, 'non_target.txt'), 'w') as out:
295 | for item in non_targets:
296 | out.write("%s\n" % "\t".join(map(str, item)))
297 | with open(os.path.join(target_path, 'valid.txt'), 'w') as out:
298 | for item in valid_data:
299 | out.write("%s\n" % "\t".join(map(str, item)))
300 |
301 |
302 | # saving dicts to avoid searching later
303 | with open(os.path.join(target_path, 'entities_dict.json'), 'w') as f:
304 | f.write(json.dumps(ent_to_id) + '\n')
305 |
306 | with open(os.path.join(target_path, 'relations_dict.json'), 'w') as f:
307 | f.write(json.dumps(rel_to_id) + '\n')
308 |
309 | with open(os.path.join(target_path, 'train.txt'), 'w') as out:
310 | for item in train_data:
311 | out.write("%s\n" % "\t".join(map(str, item)))
312 |
313 | out = open(os.path.join(target_path, 'to_skip_eval.pickle'), 'wb')
314 | pickle.dump(to_skip_eval, out)
315 | out.close()
316 |
317 | # write down the stats for targets generated
318 | with open(os.path.join(target_path, 'stats.txt'), 'w') as out:
319 | out.write('Number of train set triples: {0}\n'.format(train_data.shape[0]))
320 | out.write('Number of test set triples: {0}\n'.format(test_data.shape[0]))
321 | out.write('Number of valid set triples: {0}\n'.format(valid_data.shape[0]))
322 | out.write('Number of target triples: {0}\n'.format(targets.shape[0]))
323 | out.write('Number of non target triples: {0}\n'.format(non_targets.shape[0]))
324 | if args.target_split ==2:
325 | out.write('Target triples are ranked >10 and <=100 and test set is the target triples \n')
326 | out.write('Non target triples are ranked >10 and <=100 but valid triples is original valid set \n')
327 | out.write('Non target triples with ranks >10 and <=100 are in non_target.txt \n')
328 | else:
329 | out.write('Target triples are ranked <=10 and test set is the target triples \n')
330 | out.write('Non target triples are ranked <=10 but valid triples is original valid set \n')
331 | out.write('Non target triples with ranks <=10 are in non_target.txt \n')
332 | out.write('------------------------------------------- \n')
333 |
334 |
335 |
336 |
337 |
338 |
--------------------------------------------------------------------------------
/KGEAttack/ConvE/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | This file contains functions that are used repeatedly across different attacks
3 | '''
4 | import logging
5 | import time
6 | from tqdm import tqdm
7 | import io
8 | import pandas as pd
9 | import numpy as np
10 | import os
11 | import json
12 |
13 | import argparse
14 | import torch
15 |
16 | from model import Distmult, Complex, Conve, Transe
17 |
18 |
19 | logger = logging.getLogger(__name__) #config already set in main.py
20 |
21 | def generate_dicts(data_path):
22 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f:
23 | ent_to_id = json.load(f)
24 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f:
25 | rel_to_id = json.load(f)
26 | n_ent = len(list(ent_to_id.keys()))
27 | n_rel = len(list(rel_to_id.keys()))
28 |
29 | return n_ent, n_rel, ent_to_id, rel_to_id
30 |
31 | def load_data(data_path):
32 | data = {}
33 | for split in ['train', 'valid', 'test']:
34 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int)
35 | df = df.drop_duplicates()
36 | data[split] = df.values
37 |
38 | return data
39 |
40 | def add_model(args, n_ent, n_rel):
41 | if args.add_reciprocals:
42 | if args.model is None:
43 | model = Conve(args, n_ent, 2*n_rel)
44 | elif args.model == 'conve':
45 | model = Conve(args, n_ent, 2*n_rel)
46 | elif args.model == 'distmult':
47 | model = Distmult(args, n_ent, 2*n_rel)
48 | elif args.model == 'complex':
49 | model = Complex(args, n_ent, 2*n_rel)
50 | elif args.model == 'transe':
51 | model = Transe(args, n_ent, 2*n_rel)
52 | else:
53 | logger.info('Unknown model: {0}', args.model)
54 | raise Exception("Unknown model!")
55 | else:
56 | if args.model is None:
57 | model = Conve(args, n_ent, n_rel)
58 | elif args.model == 'conve':
59 | model = Conve(args, n_ent, n_rel)
60 | elif args.model == 'distmult':
61 | model = Distmult(args, n_ent, n_rel)
62 | elif args.model == 'complex':
63 | model = Complex(args, n_ent, n_rel)
64 | elif args.model == 'transe':
65 | model = Transe(args, n_ent, n_rel)
66 | else:
67 | logger.info('Unknown model: {0}', args.model)
68 | raise Exception("Unknown model!")
69 |
70 | #model.to(self.device)
71 | return model
72 |
73 | def load_model(model_path, args, n_ent, n_rel, device):
74 | # add a model and load the pre-trained params
75 | model = add_model(args, n_ent, n_rel)
76 | model.to(device)
77 | logger.info('Loading saved model from {0}'.format(model_path))
78 | state = torch.load(model_path)
79 | model_params = state['state_dict']
80 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()]
81 | for key, size, count in params:
82 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count))
83 |
84 | model.load_state_dict(model_params)
85 | model.eval()
86 | logger.info(model)
87 |
88 | return model
89 |
90 | def generate_nghbrs(test_set, train_set):
91 | '''
92 | For every triple in test set, return the index of
93 | neighbouring triple in training set,
94 | i.e. indices in training set are returned
95 | '''
96 | n_dict = {}
97 | for t, triple in enumerate(test_set):
98 | sub = triple[0]
99 | obj = triple[2]
100 | mask = (np.isin(train_set[:,0], [sub, obj]) | np.isin(train_set[:,2], [sub, obj]))
101 | #nghbrs_dict[t] = pro_train[mask]
102 | mask_idx = np.where(mask)[0]
103 | n_dict[t] = mask_idx
104 |
105 | return n_dict
106 |
107 |
108 | def perturb_data(train_data, trips_to_delete):
109 | logger.info('----- Generating perturbed dataset ------')
110 | per_tr_1 = np.empty_like(train_data)
111 | per_tr_1[:] = train_data
112 |
113 | n_ignored_edits = 0
114 | for idx, trip in enumerate(trips_to_delete):
115 | i = trip[0]
116 | j = trip[1]
117 | k = trip[2]
118 | # mask for triple in training set
119 | m = (np.isin(per_tr_1[:,0], [i]) & np.isin(per_tr_1[:,1], [j]) & np.isin(per_tr_1[:,2], [k]))
120 | if np.any(m):
121 | temp_tr = per_tr_1[~m]
122 | # mask to check if deleting triple also deletes entity
123 | m2 = (((np.any(temp_tr[:,0] ==k)) | (np.any(temp_tr[:,2] == k)))
124 | & ((np.any(temp_tr[:,0] == i)) | (np.any(temp_tr[:,2] == i))))
125 | if np.any(m2):
126 | #np.copyto(per_tr, temp_tr)
127 | per_tr_1 = np.empty_like(temp_tr)
128 | per_tr_1[:] = temp_tr
129 | else:
130 | n_ignored_edits += 1
131 | logger.info('Ignoring edit number {0}: {1} because it deletes entities'.format(idx, trip))
132 | else:
133 | logger.info('Can\'t delete the selected triple. Something is wrong in the code')
134 | logger.info(trip)
135 | break
136 |
137 | return per_tr_1, n_ignored_edits
138 |
139 |
140 | def set_hyperparams(args):
141 | '''
142 | Given the args, return with updated hyperparams for reproducibility
143 | '''
144 | if args.data == 'WN18RR':
145 | args.original_data = 'WN18RR'
146 |
147 | if args.data == 'WN18':
148 | args.original_data = 'WN18'
149 |
150 | if args.data == 'FB15k-237':
151 | args.original_data = 'FB15k-237'
152 |
153 | if (args.data == 'WN18RR' or args.original_data == 'WN18RR'):
154 | if args.model == 'distmult':
155 | args.lr = 0.01
156 | args.num_batches = 50
157 | elif args.model == 'complex':
158 | args.lr = 0.01
159 | elif args.model == 'conve':
160 | args.lr = 0.001
161 | elif args.model == 'transe':
162 | args.lr = 0.005
163 | args.input_drop = 0.0
164 | args.transe_margin = 9.0
165 | args.num_batches = 1000
166 | args.epochs = 100
167 | args.reg_weight = 1e-12
168 | else:
169 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model))
170 |
171 | if (args.data == 'FB15k-237' or args.original_data == 'FB15k-237'):
172 | if args.model == 'distmult':
173 | args.lr = 0.005
174 | args.input_drop = 0.5
175 | elif args.model == 'complex':
176 | args.lr = 0.005
177 | args.input_drop = 0.5
178 | elif args.model == 'conve':
179 | args.lr = 0.001
180 | args.hidden_drop = 0.5
181 | elif args.model == 'transe':
182 | args.lr = 0.001
183 | args.input_drop = 0.0
184 | args.transe_margin = 9.0
185 | args.num_batches = 800
186 | args.epochs = 100
187 | args.reg_weight = 1e-10
188 | else:
189 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model))
190 |
191 | if (args.data == 'WN18' or args.original_data == 'WN18'):
192 | if args.model == 'distmult':
193 | args.lr = 0.01
194 | args.num_batches = 50
195 | elif args.model == 'complex':
196 | args.lr = 0.01
197 | elif args.model == 'conve':
198 | args.lr = 0.005
199 | elif args.model == 'transe':
200 | args.lr = 0.01
201 | args.input_drop = 0.0
202 | args.transe_margin = 9.0
203 | args.num_batches = 1500
204 | args.epochs = 100
205 | args.reg_weight = 1e-12
206 | else:
207 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model))
208 |
209 |
210 | return args
211 |
212 |
213 |
214 |
215 |
216 | def get_argument_parser():
217 | '''Generate an argument parser
218 | '''
219 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs')
220 |
221 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237')
222 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}')
223 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations')
224 |
225 |
226 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0')
227 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2')
228 |
229 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)')
230 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1
231 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0')
232 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings')
233 |
234 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200?
235 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)')
236 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)')
237 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4')
238 |
239 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200')
240 |
241 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20')
242 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10')
243 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.')
244 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.')
245 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.')
246 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution')
247 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution')
248 |
249 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True')
250 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1')
251 |
252 |
253 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2?
254 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3')
255 |
256 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.')
257 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model')
258 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)')
259 |
260 | parser.add_argument('--reproduce-results', action='store_true', help='Use the hyperparameters to reproduce the results.')
261 | parser.add_argument('--original-data', type=str, default='FB15k-237', help='Dataset to use; this option is needed to set the hyperparams to reproduce the results for training after attack, default: FB15k-237')
262 |
263 | return parser
264 |
265 |
266 | class TqdmToLogger(io.StringIO):
267 | #https://github.com/tqdm/tqdm/issues/313
268 | """
269 | Output stream for TQDM which will output to logger module instead of
270 | the StdOut.
271 | """
272 | logger = None
273 | level = None
274 | buf = ''
275 | def __init__(self,logger,level=None):
276 | super(TqdmToLogger, self).__init__()
277 | self.logger = logger
278 | self.level = level or logging.INFO
279 | def write(self,buf):
280 | self.buf = buf.strip('\r\n\t ')
281 | def flush(self):
282 | self.logger.log(self.level, self.buf)
--------------------------------------------------------------------------------
/KGEAttack/ConvE/wrangle_KG.py:
--------------------------------------------------------------------------------
1 | '''
2 | 1. Read the processed data (int IDs) and generate sr2o and or2s data from training file
3 | 2. Use the train, valid and test file to generate filter lists for evaluation
4 | '''
5 | import numpy as np
6 | import sys
7 | import os
8 | import errno
9 | import json
10 | import pandas as pd
11 | import pickle
12 | from collections import defaultdict
13 |
14 |
15 | def _load_data(file_path):
16 | df = pd.read_csv(file_path, sep='\t', header=None, names=None, dtype=str)
17 | df = df.drop_duplicates()
18 | return df.values
19 |
20 |
21 | def generate_eval_filter(dataset_name):
22 | #processed_path = 'data/processed_{0}'.format(dataset_name)
23 | processed_path = 'data/{0}'.format(dataset_name)
24 | files = ['train', 'valid', 'test']
25 | to_skip = {'lhs': defaultdict(set), 'rhs': defaultdict(set)}
26 | for file in files:
27 | file_path = os.path.join(processed_path, file+'.txt')
28 | examples = _load_data(file_path)
29 | for lhs, rel, rhs in examples:
30 | #to_skip['lhs'][(rhs, rel + n_relations)].add(lhs) # reciprocals
31 | to_skip['lhs'][(rhs, rel)].add(int(lhs)) # we don't need reciprocal training
32 | to_skip['rhs'][(lhs, rel)].add(int(rhs))
33 |
34 | to_skip_final = {'lhs': {}, 'rhs': {}}
35 | for kk, skip in to_skip.items():
36 | for k, v in skip.items():
37 | to_skip_final[kk][k] = sorted(list(v))
38 | #to_skip_final[kk][(int(k[0]), int(k[1]))] = sorted(list(v))
39 |
40 | out = open(os.path.join(processed_path, 'to_skip_eval.pickle'), 'wb')
41 | pickle.dump(to_skip_final, out)
42 | out.close()
43 |
44 | #with open(os.path.join(processed_path, 'to_skip_eval.json'), 'w') as f:
45 | # f.write(json.dumps(to_skip_final) + '\n')
46 |
47 | return
48 |
49 | def generate_train_data(dataset_name):
50 | #processed_path = 'data/processed_{0}'.format(dataset_name)
51 | processed_path = 'data/{0}'.format(dataset_name)
52 | file_path = os.path.join(processed_path, 'train.txt')
53 | train_examples = _load_data(file_path)
54 | sr2o = defaultdict(set)
55 | or2s = defaultdict(set)
56 | for s,r,o in train_examples:
57 | sr2o[(s,r)].add(o)
58 | or2s[(o,r)].add(s)
59 |
60 | sr2o = {k: sorted(list(v)) for k, v in sr2o.items()}
61 | or2s = {k: sorted(list(v)) for k, v in or2s.items()}
62 |
63 | out = open(os.path.join(processed_path, 'sr2o_train.pickle'), 'wb')
64 | pickle.dump(sr2o, out)
65 | out.close()
66 |
67 | out = open(os.path.join(processed_path, 'or2s_train.pickle'), 'wb')
68 | pickle.dump(or2s, out)
69 | out.close()
70 |
71 | #with open(os.path.join(processed_path, 'sr2o_train.json'), 'w') as f:
72 | # f.write(json.dumps(sr2o) + '\n')
73 |
74 | #with open(os.path.join(processed_path, 'or2s_train.json'), 'w') as f:
75 | # f.write(json.dumps(or2s) + '\n')
76 |
77 | return
78 |
79 |
80 | if __name__ == '__main__':
81 |
82 | if len(sys.argv) > 1:
83 | dataset_name = sys.argv[1] # name of dataset
84 | else:
85 | #dataset_name = 'FB15k-237'
86 | #dataset_name = 'YAGO3-10'
87 | #dataset_name = 'WN18'
88 | #dataset_name = 'FB15k'
89 | dataset_name = 'WN18RR'
90 |
91 | seed = 345345
92 | np.random.seed(seed)
93 | rdm = np.random.RandomState(seed)
94 | rng = np.random.default_rng(seed)
95 |
96 | print('{}: Generating filter lists for evaluation'.format(dataset_name))
97 | generate_eval_filter(dataset_name)
98 | print('{}: Generating train data'.format(dataset_name))
99 | generate_train_data(dataset_name)
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/KGEAttack/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | Code Structure
3 |
4 | This file describes the structure of the code
5 |
6 | Commandline instructions for all experiments are available in bash scripts at this level
7 |
8 | The main codebase is in `ConvE`
9 | - script to preprocess data (generate dictionaries) is `preprocess.py`
10 | - script to generate evaluation filters and training tuples is `wrangle_KG.py`
11 | - script to train a KGE model is `main.py`
12 | - script to select target triples from the test set is `select_targets.py`
13 | - Random neighbourhood baseline is in `rand_add_attack_1.py`
14 | - Random global baseline is in `rand_add_attack_2.py`
15 | - Zhang et al. baseline is implemented in `ijcai_add_attack.py`
16 | - CRIAGE baseline is in `criage_add_attack_1.py`
17 | - Proposed symmetric attacks in `sym_add_attack_{1,2,3}`
18 | - 1 for soft truth score
19 | - 2 for KGE ranks
20 | - 3 for cosine distance
21 | - Proposed inversion attacks in `inv_add_attack_{1,2,3}`
22 | - 1 for soft truth score
23 | - 2 for KGE ranks
24 | - 3 for cosine distance
25 | - Proposed composition attacks in `com_add_attack_{1,2,3}`
26 | - 1 for soft truth score
27 | - 2 for KGE ranks
28 | - 3 for cosine distance
29 | - Elbow method to select clusters is in `clustering_elbow.ipynb`
30 | - Script to generate clusters is `create_clusters.py`
31 | - Script to compute metrics on decoy set in `decoy_test.py`
32 | - Folder `elbow_plots` contains the elbow plots
33 | - Folder `data` will contain datasets generated from running the experiments.
34 | - These are named as `attack_model_dataset_split_budget_run`
35 | - here `split=1` for target split, `budget=1` for most attacks except random global with 2 edits, and `run` is the number for a random run
36 | - For Zhang et al. attacks, an additional argument is down sampling percent
37 | - Folder `saved_models`, `clusters`, `logs`, `results` and `losses` are also empty but will be used if a script is run
38 |
--------------------------------------------------------------------------------
/KGEAttack/complex_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data FB15k-237 --lr 0.005 --input-drop 0.5
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_complex_FB15k-237_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data FB15k-237 --lr 0.005 --input-drop 0.5
14 |
15 |
16 | echo 'Generating random edits for the neighbourhood'
17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data FB15k-237 --budget 1 --rand-run 1
18 | python -u wrangle_KG.py rand_add_n_complex_FB15k-237_1_1_1
19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
20 |
21 | echo 'Generating global random edits'
22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data FB15k-237 --budget 1 --rand-run 1
23 | python -u wrangle_KG.py rand_add_g_complex_FB15k-237_1_1_1
24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
25 |
26 |
27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data FB15k-237 --budget 2 --rand-run 1
28 | python -u wrangle_KG.py rand_add_g_complex_FB15k-237_1_2_1
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_FB15k-237_1_2_1 --lr 0.005 --input-drop 0.5
30 |
31 |
32 | echo 'Generating symmetry edits with ground truth minimum'
33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data FB15k-237 --budget 1
34 | python -u wrangle_KG.py sym_add_1_complex_FB15k-237_1_1_1
35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
36 |
37 | echo 'Generating symmetry edits with worse ranks'
38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data FB15k-237 --budget 1
39 | python -u wrangle_KG.py sym_add_2_complex_FB15k-237_1_1_1
40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
41 |
42 | echo 'Generating symmetry edits with cosine distance'
43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data FB15k-237 --budget 1
44 | python -u wrangle_KG.py sym_add_3_complex_FB15k-237_1_1_1
45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
46 |
47 |
48 |
49 |
50 | echo 'Generating inversion edits with ground truth minimum'
51 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data FB15k-237 --budget 1
52 | python -u wrangle_KG.py inv_add_1_complex_FB15k-237_1_1_1
53 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
54 |
55 | echo 'Generating inversion edits with worse ranks'
56 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data FB15k-237 --budget 1
57 | python -u wrangle_KG.py inv_add_2_complex_FB15k-237_1_1_1
58 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
59 |
60 | echo 'Generating inversion edits with cosine distance'
61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data FB15k-237 --budget 1
62 | python -u wrangle_KG.py inv_add_3_complex_FB15k-237_1_1_1
63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
64 |
65 |
66 |
67 |
68 | echo 'Generating composition edits with ground truth values'
69 | python -u create_clusters.py --model complex --data FB15k-237 --num-clusters 300
70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data FB15k-237 --budget 1 --num-clusters 300 --rand-run 1
71 | python -u wrangle_KG.py com_add_1_complex_FB15k-237_1_1_1
72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
73 |
74 | echo 'Generating composition attack with just worse ranks '
75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data FB15k-237 --budget 1
76 | python -u wrangle_KG.py com_add_2_complex_FB15k-237_1_1_1
77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
78 |
79 | echo 'Generating composition attack with cosine distance '
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data FB15k-237 --budget 1
81 | python -u wrangle_KG.py com_add_3_complex_FB15k-237_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
83 |
84 |
85 |
86 |
87 | echo 'Generating edits from IJCAI-19 baseline '
88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data FB15k-237 --budget 1 --corruption-factor 15 --rand-run 1 --use-gpu
89 | python -u wrangle_KG.py ijcai_add_1_complex_FB15k-237_1_1_1_15.0
90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_FB15k-237_1_1_1_15.0 --lr 0.005 --input-drop 0.5
91 |
92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
93 | python -u wrangle_KG.py ijcai_add_1_complex_FB15k-237_1_1_1_5.0
94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_FB15k-237_1_1_1_5.0 --lr 0.005 --input-drop 0.5
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/KGEAttack/complex_WN18.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data WN18 --lr 0.01
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_complex_WN18_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data WN18 --lr 0.01
14 |
15 |
16 | echo 'Generating random edits for the neighbourhood'
17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data WN18 --budget 1 --rand-run 1
18 | python -u wrangle_KG.py rand_add_n_complex_WN18_1_1_1
19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_WN18_1_1_1 --lr 0.01
20 |
21 | echo 'Generating global random edits'
22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18 --budget 1 --rand-run 1
23 | python -u wrangle_KG.py rand_add_g_complex_WN18_1_1_1
24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18_1_1_1 --lr 0.01
25 |
26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18 --budget 2 --rand-run 1
27 | python -u wrangle_KG.py rand_add_g_complex_WN18_1_2_1
28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18_1_2_1 --lr 0.01
29 |
30 |
31 |
32 | echo 'Generating symmetry edits with ground truth minimum'
33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data WN18 --budget 1
34 | python -u wrangle_KG.py sym_add_1_complex_WN18_1_1_1
35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_WN18_1_1_1 --lr 0.01
36 |
37 |
38 | echo 'Generating symmetry edits with worse ranks'
39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data WN18 --budget 1
40 | python -u wrangle_KG.py sym_add_2_complex_WN18_1_1_1
41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_WN18_1_1_1 --lr 0.01
42 |
43 | echo 'Generating symmetry edits with cosine distance'
44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data WN18 --budget 1
45 | python -u wrangle_KG.py sym_add_3_complex_WN18_1_1_1
46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_WN18_1_1_1 --lr 0.01
47 |
48 |
49 |
50 |
51 | echo 'Generating inversion edits with ground truth minimum'
52 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data WN18 --budget 1
53 | python -u wrangle_KG.py inv_add_1_complex_WN18_1_1_1
54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_WN18_1_1_1 --lr 0.01
55 |
56 | echo 'Generating inversion edits with worse ranks'
57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data WN18 --budget 1
58 | python -u wrangle_KG.py inv_add_2_complex_WN18_1_1_1
59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_WN18_1_1_1 --lr 0.01
60 |
61 | echo 'Generating inversion edits with cosine distance'
62 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data WN18 --budget 1
63 | python -u wrangle_KG.py inv_add_3_complex_WN18_1_1_1
64 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_WN18_1_1_1 --lr 0.01
65 |
66 |
67 |
68 | echo 'Generating composition edits with ground truth values'
69 | python -u create_clusters.py --model complex --data WN18 --num-clusters 100
70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data WN18 --budget 1 --num-clusters 100 --rand-run 1
71 | python -u wrangle_KG.py com_add_1_complex_WN18_1_1_1
72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_WN18_1_1_1 --lr 0.01
73 |
74 | echo 'Generating composition edits with just worse ranks '
75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data WN18 --budget 1
76 | python -u wrangle_KG.py com_add_2_complex_WN18_1_1_1
77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_WN18_1_1_1 --lr 0.01
78 |
79 | echo 'Generating composition edits with cosine distance '
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data WN18 --budget 1
81 | python -u wrangle_KG.py com_add_3_complex_WN18_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_WN18_1_1_1 --lr 0.01
83 |
84 |
85 |
86 |
87 | echo 'Generating edits from IJCAI-19 baseline '
88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
89 | python -u wrangle_KG.py ijcai_add_1_complex_WN18_1_1_1_20.0
90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18_1_1_1_20.0 --lr 0.01
91 |
92 |
93 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
94 | # python -u wrangle_KG.py ijcai_add_1_complex_WN18_1_1_1_5.0
95 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18_1_1_1_5.0 --lr 0.01
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
--------------------------------------------------------------------------------
/KGEAttack/complex_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data WN18RR --lr 0.01
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_complex_WN18RR_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data WN18RR --lr 0.01
14 |
15 |
16 | echo 'Generating random edits for the neighbourhood'
17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data WN18RR --budget 1 --rand-run 1
18 | python -u wrangle_KG.py rand_add_n_complex_WN18RR_1_1_1
19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_WN18RR_1_1_1 --lr 0.01
20 |
21 | echo 'Generating global random edits'
22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18RR --budget 1 --rand-run 1
23 | python -u wrangle_KG.py rand_add_g_complex_WN18RR_1_1_1
24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18RR_1_1_1 --lr 0.01
25 |
26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18RR --budget 2 --rand-run 1
27 | python -u wrangle_KG.py rand_add_g_complex_WN18RR_1_2_1
28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18RR_1_2_1 --lr 0.01
29 |
30 |
31 |
32 | echo 'Generating symmetry edits with ground truth minimum'
33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data WN18RR --budget 1
34 | python -u wrangle_KG.py sym_add_1_complex_WN18RR_1_1_1
35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_WN18RR_1_1_1 --lr 0.01
36 |
37 |
38 | echo 'Generating symmetry edits with worse ranks'
39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data WN18RR --budget 1
40 | python -u wrangle_KG.py sym_add_2_complex_WN18RR_1_1_1
41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_WN18RR_1_1_1 --lr 0.01
42 |
43 | echo 'Generating symmetry edits with cosine distance'
44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data WN18RR --budget 1
45 | python -u wrangle_KG.py sym_add_3_complex_WN18RR_1_1_1
46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_WN18RR_1_1_1 --lr 0.01
47 |
48 |
49 |
50 |
51 | echo 'Generating inversion edits with ground truth minimum'
52 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data WN18RR --budget 1
53 | python -u wrangle_KG.py inv_add_1_complex_WN18RR_1_1_1
54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_WN18RR_1_1_1 --lr 0.01
55 |
56 | echo 'Generating inversion edits with worse ranks'
57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data WN18RR --budget 1
58 | python -u wrangle_KG.py inv_add_2_complex_WN18RR_1_1_1
59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_WN18RR_1_1_1 --lr 0.01
60 |
61 | echo 'Generating inversion edits with cosine distance'
62 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data WN18RR --budget 1
63 | python -u wrangle_KG.py inv_add_3_complex_WN18RR_1_1_1
64 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_WN18RR_1_1_1 --lr 0.01
65 |
66 |
67 |
68 | echo 'Generating composition edits with ground truth values'
69 | #python -u create_clusters.py --model complex --data WN18RR --num-clusters 100
70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data WN18RR --budget 1 --num-clusters 100 --rand-run 1
71 | python -u wrangle_KG.py com_add_1_complex_WN18RR_1_1_1
72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_WN18RR_1_1_1 --lr 0.01
73 |
74 | echo 'Generating composition edits with just worse ranks '
75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data WN18RR --budget 1
76 | python -u wrangle_KG.py com_add_2_complex_WN18RR_1_1_1
77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_WN18RR_1_1_1 --lr 0.01
78 |
79 | echo 'Generating composition edits with cosine distance '
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data WN18RR --budget 1
81 | python -u wrangle_KG.py com_add_3_complex_WN18RR_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_WN18RR_1_1_1 --lr 0.01
83 |
84 |
85 |
86 |
87 | echo 'Generating edits from IJCAI-19 baseline '
88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
89 | python -u wrangle_KG.py ijcai_add_1_complex_WN18RR_1_1_1_20.0
90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18RR_1_1_1_20.0 --lr 0.01
91 |
92 |
93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
94 | python -u wrangle_KG.py ijcai_add_1_complex_WN18RR_1_1_1_5.0
95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18RR_1_1_1_5.0 --lr 0.01
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/KGEAttack/compute_decoy_metrics_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Computing metrics for decoy in FB15k-237 DistMult '
6 |
7 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_1' --budget 1
8 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_2' --budget 1
9 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_3' --budget 1
10 |
11 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_1' --budget 1
12 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_2' --budget 1
13 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_3' --budget 1
14 |
15 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1
16 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_2' --budget 1
17 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_3' --budget 1
18 |
19 |
20 | echo 'Computing metrics for decoy in FB15k-237 Complex'
21 |
22 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_1' --budget 1
23 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_2' --budget 1
24 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_3' --budget 1
25 |
26 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_1' --budget 1
27 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_2' --budget 1
28 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_3' --budget 1
29 |
30 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1
31 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_2' --budget 1
32 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_3' --budget 1
33 |
34 |
35 | echo 'Computing metrics for decoy in FB15k-237 Transe '
36 |
37 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_1' --budget 1
38 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_2' --budget 1
39 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_3' --budget 1
40 |
41 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_1' --budget 1
42 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_2' --budget 1
43 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_3' --budget 1
44 |
45 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1
46 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_2' --budget 1
47 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_3' --budget 1
48 |
49 |
50 |
51 | echo 'Computing metrics for decoy in FB15k-237 ConvE'
52 |
53 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_1' --budget 1
54 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_2' --budget 1
55 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_3' --budget 1
56 |
57 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_1' --budget 1
58 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_2' --budget 1
59 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_3' --budget 1
60 |
61 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1
62 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_2' --budget 1
63 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_3' --budget 1
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/KGEAttack/compute_decoy_metrics_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Computing metrics for decoy in WN18RR DistMult '
6 |
7 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_1' --budget 1
8 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_2' --budget 1
9 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_3' --budget 1
10 |
11 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_1' --budget 1
12 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_2' --budget 1
13 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_3' --budget 1
14 |
15 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1
16 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_2' --budget 1
17 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_3' --budget 1
18 |
19 |
20 | echo 'Computing metrics for decoy in WN18RR Complex'
21 |
22 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_1' --budget 1
23 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_2' --budget 1
24 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_3' --budget 1
25 |
26 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_1' --budget 1
27 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_2' --budget 1
28 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_3' --budget 1
29 |
30 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1
31 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_2' --budget 1
32 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_3' --budget 1
33 |
34 |
35 | echo 'Computing metrics for decoy in WN18RR Transe '
36 |
37 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_1' --budget 1
38 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_2' --budget 1
39 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_3' --budget 1
40 |
41 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_1' --budget 1
42 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_2' --budget 1
43 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_3' --budget 1
44 |
45 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1
46 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_2' --budget 1
47 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_3' --budget 1
48 |
49 |
50 |
51 | echo 'Computing metrics for decoy in WN18RR ConvE'
52 |
53 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_1' --budget 1
54 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_2' --budget 1
55 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_3' --budget 1
56 |
57 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_1' --budget 1
58 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_2' --budget 1
59 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_3' --budget 1
60 |
61 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1
62 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_2' --budget 1
63 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_3' --budget 1
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/KGEAttack/conve_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5
9 |
10 |
11 | echo 'Selecting target triples'
12 | mkdir data/target_conve_FB15k-237_1
13 |
14 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5
15 |
16 |
17 |
18 | echo 'Generating random edits for the neighbourhood'
19 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data FB15k-237 --budget 1 --rand-run 1
20 | python -u wrangle_KG.py rand_add_n_conve_FB15k-237_1_1_1
21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
22 |
23 | echo 'Generating global random edits'
24 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data FB15k-237 --budget 1 --rand-run 1
25 | python -u wrangle_KG.py rand_add_g_conve_FB15k-237_1_1_1
26 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
27 |
28 |
29 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data FB15k-237 --budget 2 --rand-run 1
30 | python -u wrangle_KG.py rand_add_g_conve_FB15k-237_1_2_1
31 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_FB15k-237_1_2_1 --lr 0.001 --hidden-drop 0.5
32 |
33 |
34 |
35 | echo 'Generating symmetry edits with ground truth minimum'
36 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data FB15k-237 --budget 1
37 | python -u wrangle_KG.py sym_add_1_conve_FB15k-237_1_1_1
38 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
39 |
40 | echo 'Generating symmetry edits with worse ranks'
41 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data FB15k-237 --budget 1
42 | python -u wrangle_KG.py sym_add_2_conve_FB15k-237_1_1_1
43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
44 |
45 | echo 'Generating symmetry edits with cosine distance'
46 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data FB15k-237 --budget 1
47 | python -u wrangle_KG.py sym_add_3_conve_FB15k-237_1_1_1
48 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
49 |
50 |
51 |
52 |
53 |
54 | echo 'Generating inversion edits with ground truth minimum'
55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data FB15k-237 --budget 1
56 | python -u wrangle_KG.py inv_add_1_conve_FB15k-237_1_1_1
57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
58 |
59 | echo 'Generating inversion edits with worse ranks'
60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data FB15k-237 --budget 1
61 | python -u wrangle_KG.py inv_add_2_conve_FB15k-237_1_1_1
62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
63 |
64 | echo 'Generating inversion edits with cosine distance'
65 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data FB15k-237 --budget 1
66 | python -u wrangle_KG.py inv_add_3_conve_FB15k-237_1_1_1
67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
68 |
69 |
70 |
71 |
72 | echo 'Generating composition edits with ground truth values'
73 | python -u create_clusters.py --model conve --data FB15k-237 --num-clusters 300
74 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data FB15k-237 --budget 1 --num-clusters 300 --rand-run 1
75 | python -u wrangle_KG.py com_add_1_conve_FB15k-237_1_1_1
76 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
77 |
78 |
79 | echo 'Generating composition attack with just worse ranks '
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data FB15k-237 --budget 1
81 | python -u wrangle_KG.py com_add_2_conve_FB15k-237_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
83 |
84 | echo 'Generating composition attack with cosine distance '
85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data FB15k-237 --budget 1
86 | python -u wrangle_KG.py com_add_3_conve_FB15k-237_1_1_1
87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
88 |
89 |
90 |
91 | echo 'Generating edits from IJCAI-19 baseline '
92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data FB15k-237 --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu
93 | python -u wrangle_KG.py ijcai_add_1_conve_FB15k-237_1_1_1_0.1
94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_FB15k-237_1_1_1_0.1 --lr 0.001 --hidden-drop 0.5
95 |
96 |
97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data FB15k-237 --budget 1 --corruption-factor 0.3 --rand-run 1 --use-gpu
98 | python -u wrangle_KG.py ijcai_add_1_conve_FB15k-237_1_1_1_0.3
99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_FB15k-237_1_1_1_0.3 --lr 0.001 --hidden-drop 0.5
100 |
101 |
102 |
103 | echo 'Generating edits from criage baseline '
104 | python -u wrangle_KG.py target_conve_FB15k-237_1
105 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5
106 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data FB15k-237
107 | python -u wrangle_KG.py criage_add_1_conve_FB15k-237_1_1_1
108 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
--------------------------------------------------------------------------------
/KGEAttack/conve_WN18.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.001
9 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.001 --hidden-drop 0.1
10 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.005
11 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.01
12 |
13 | echo 'Selecting target triples'
14 | mkdir data/target_conve_WN18_1
15 |
16 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data WN18 --lr 0.005
17 |
18 | echo 'Generating random edits for the neighbourhood'
19 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data WN18 --budget 1 --rand-run 1
20 | python -u wrangle_KG.py rand_add_n_conve_WN18_1_1_1
21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_WN18_1_1_1 --lr 0.005
22 |
23 | echo 'Generating global random edits'
24 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18 --budget 1 --rand-run 1
25 | python -u wrangle_KG.py rand_add_g_conve_WN18_1_1_1
26 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18_1_1_1 --lr 0.005
27 |
28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18 --budget 2 --rand-run 1
29 | python -u wrangle_KG.py rand_add_g_conve_WN18_1_2_1
30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18_1_2_1 --lr 0.005
31 |
32 |
33 |
34 | echo 'Generating symmetry edits with ground truth minimum'
35 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data WN18 --budget 1
36 | python -u wrangle_KG.py sym_add_1_conve_WN18_1_1_1
37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_WN18_1_1_1 --lr 0.005
38 |
39 | echo 'Generating symmetry edits with worse ranks'
40 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data WN18 --budget 1
41 | python -u wrangle_KG.py sym_add_2_conve_WN18_1_1_1
42 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_WN18_1_1_1 --lr 0.005
43 |
44 | echo 'Generating symmetry edits with cosine distance'
45 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data WN18 --budget 1
46 | python -u wrangle_KG.py sym_add_3_conve_WN18_1_1_1
47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_WN18_1_1_1 --lr 0.005
48 |
49 |
50 |
51 |
52 | echo 'Generating inversion edits with ground truth minimum'
53 |
54 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data WN18 --budget 1
55 | python -u wrangle_KG.py inv_add_1_conve_WN18_1_1_1
56 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_WN18_1_1_1 --lr 0.005
57 |
58 | echo 'Generating inversion edits with worse ranks'
59 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data WN18 --budget 1
60 | python -u wrangle_KG.py inv_add_2_conve_WN18_1_1_1
61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_WN18_1_1_1 --lr 0.005
62 |
63 | echo 'Generating inversion edits with cosine distance'
64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data WN18 --budget 1
65 | python -u wrangle_KG.py inv_add_3_conve_WN18_1_1_1
66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_WN18_1_1_1 --lr 0.005
67 |
68 |
69 |
70 |
71 | echo 'Generating composition edits with ground truth values'
72 | python -u create_clusters.py --model conve --data WN18 --num-clusters 300
73 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data WN18 --budget 1 --num-clusters 300 --rand-run 1
74 | python -u wrangle_KG.py com_add_1_conve_WN18_1_1_1
75 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_WN18_1_1_1 --lr 0.005
76 |
77 |
78 | echo 'Generating composition edits with just worse ranks '
79 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data WN18 --budget 1
80 | python -u wrangle_KG.py com_add_2_conve_WN18_1_1_1
81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_WN18_1_1_1 --lr 0.005
82 |
83 |
84 | echo 'Generating composition edits with cosine distance '
85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data WN18 --budget 1
86 | python -u wrangle_KG.py com_add_3_conve_WN18_1_1_1
87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_WN18_1_1_1 --lr 0.005
88 |
89 |
90 |
91 |
92 | echo 'Generating edits from IJCAI-19 baseline '
93 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18 --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu
94 | # python -u wrangle_KG.py ijcai_add_1_conve_WN18_1_1_1_0.1
95 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18_1_1_1_0.1 --lr 0.005
96 |
97 |
98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18 --budget 1 --corruption-factor 2 --rand-run 1 --use-gpu
99 | python -u wrangle_KG.py ijcai_add_1_conve_WN18_1_1_1_2.0
100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18_1_1_1_2.0 --lr 0.005
101 |
102 |
103 |
104 |
105 |
106 | echo 'Generating edits from Criage baseline '
107 | python -u wrangle_KG.py target_conve_WN18_1
108 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data WN18 --lr 0.005
109 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data WN18
110 | python -u wrangle_KG.py criage_add_1_conve_WN18_1_1_1
111 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_WN18_1_1_1 --lr 0.005
112 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/KGEAttack/conve_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18RR --lr 0.001
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_conve_WN18RR_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data WN18RR --lr 0.001
14 |
15 |
16 |
17 | echo 'Generating random edits for the neighbourhood'
18 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data WN18RR --budget 1 --rand-run 1
19 | python -u wrangle_KG.py rand_add_n_conve_WN18RR_1_1_1
20 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_WN18RR_1_1_1 --lr 0.001
21 |
22 | echo 'Generating global random edits'
23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18RR --budget 1 --rand-run 1
24 | python -u wrangle_KG.py rand_add_g_conve_WN18RR_1_1_1
25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18RR_1_1_1 --lr 0.001
26 |
27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18RR --budget 2 --rand-run 1
28 | python -u wrangle_KG.py rand_add_g_conve_WN18RR_1_2_1
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18RR_1_2_1 --lr 0.001
30 |
31 |
32 |
33 | echo 'Generating symmetry edits with ground truth minimum'
34 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data WN18RR --budget 1
35 | python -u wrangle_KG.py sym_add_1_conve_WN18RR_1_1_1
36 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_WN18RR_1_1_1 --lr 0.001
37 |
38 | echo 'Generating symmetry edits with worse ranks'
39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data WN18RR --budget 1
40 | python -u wrangle_KG.py sym_add_2_conve_WN18RR_1_1_1
41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_WN18RR_1_1_1 --lr 0.001
42 |
43 | echo 'Generating symmetry edits with cosine distance'
44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data WN18RR --budget 1
45 | python -u wrangle_KG.py sym_add_3_conve_WN18RR_1_1_1
46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_WN18RR_1_1_1 --lr 0.001
47 |
48 |
49 |
50 |
51 | echo 'Generating inversion edits with ground truth minimum'
52 |
53 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data WN18RR --budget 1
54 | python -u wrangle_KG.py inv_add_1_conve_WN18RR_1_1_1
55 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_WN18RR_1_1_1 --lr 0.001
56 |
57 | echo 'Generating inversion edits with worse ranks'
58 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data WN18RR --budget 1
59 | python -u wrangle_KG.py inv_add_2_conve_WN18RR_1_1_1
60 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_WN18RR_1_1_1 --lr 0.001
61 |
62 | echo 'Generating inversion edits with cosine distance'
63 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data WN18RR --budget 1
64 | python -u wrangle_KG.py inv_add_3_conve_WN18RR_1_1_1
65 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_WN18RR_1_1_1 --lr 0.001
66 |
67 |
68 |
69 |
70 | echo 'Generating composition edits with ground truth values'
71 | python -u create_clusters.py --model conve --data WN18RR --num-clusters 300
72 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data WN18RR --budget 1 --num-clusters 300 --rand-run 1
73 | python -u wrangle_KG.py com_add_1_conve_WN18RR_1_1_1
74 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_WN18RR_1_1_1 --lr 0.001
75 |
76 |
77 | echo 'Generating composition edits with just worse ranks '
78 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data WN18RR --budget 1
79 | python -u wrangle_KG.py com_add_2_conve_WN18RR_1_1_1
80 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_WN18RR_1_1_1 --lr 0.001
81 |
82 |
83 | echo 'Generating composition edits with cosine distance '
84 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data WN18RR --budget 1
85 | python -u wrangle_KG.py com_add_3_conve_WN18RR_1_1_1
86 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_WN18RR_1_1_1 --lr 0.001
87 |
88 |
89 |
90 |
91 | echo 'Generating edits from IJCAI-19 baseline '
92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18RR --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu
93 | python -u wrangle_KG.py ijcai_add_1_conve_WN18RR_1_1_1_0.1
94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18RR_1_1_1_0.1 --lr 0.001
95 |
96 |
97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18RR --budget 1 --corruption-factor 2 --rand-run 1 --use-gpu
98 | python -u wrangle_KG.py ijcai_add_1_conve_WN18RR_1_1_1_2.0
99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18RR_1_1_1_2.0 --lr 0.001
100 |
101 |
102 |
103 |
104 |
105 | echo 'Generating edits from Criage baseline '
106 | python -u wrangle_KG.py target_conve_WN18RR_1
107 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data WN18RR --lr 0.001
108 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data WN18RR
109 | python -u wrangle_KG.py criage_add_1_conve_WN18RR_1_1_1
110 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_WN18RR_1_1_1 --lr 0.001
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/KGEAttack/distmult_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_distmult_FB15k-237_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5
14 |
15 | echo 'Re-training the model to compute baseline change in metrics for target set'
16 | #python -u wrangle_KG.py target_distmult_FB15k-237_1
17 | #CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_FB15k-237_1 --lr 0.005 --input-drop 0.5
18 |
19 |
20 | # echo 'Generating random edits for the neighbourhood'
21 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --rand-run 1
22 | python -u wrangle_KG.py rand_add_n_distmult_FB15k-237_1_1_1
23 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
24 |
25 |
26 | echo 'Generating global random edits with 1 edit'
27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data FB15k-237 --budget 1 --rand-run 1
28 | python -u wrangle_KG.py rand_add_g_distmult_FB15k-237_1_1_1
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
30 |
31 | echo 'Generating global random edits with 2 edits'
32 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data FB15k-237 --budget 2 --rand-run 1
33 | python -u wrangle_KG.py rand_add_g_distmult_FB15k-237_1_2_1
34 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_FB15k-237_1_2_1 --lr 0.005 --input-drop 0.5
35 |
36 |
37 | echo 'Generating symmetry edits with ground truth minimum'
38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data FB15k-237 --budget 1
39 | python -u wrangle_KG.py sym_add_1_distmult_FB15k-237_1_1_1
40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
41 |
42 | echo 'Generating symmetry edits with worse ranks'
43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data FB15k-237 --budget 1
44 | python -u wrangle_KG.py sym_add_2_distmult_FB15k-237_1_1_1
45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
46 |
47 | echo 'Generating symmetry edits with cosine distance'
48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data FB15k-237 --budget 1
49 | python -u wrangle_KG.py sym_add_3_distmult_FB15k-237_1_1_1
50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
51 |
52 |
53 |
54 | echo 'Generating inversion edits with ground truth minimum'
55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data FB15k-237 --budget 1
56 | python -u wrangle_KG.py inv_add_1_distmult_FB15k-237_1_1_1
57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
58 |
59 | echo 'Generating inversion edits with worse ranks'
60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data FB15k-237 --budget 1
61 | python -u wrangle_KG.py inv_add_2_distmult_FB15k-237_1_1_1
62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
63 |
64 | echo 'Generating inversion edits with cosine distance'
65 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data FB15k-237 --budget 1
66 | python -u wrangle_KG.py inv_add_3_distmult_FB15k-237_1_1_1
67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
68 |
69 |
70 |
71 | echo 'Generating composition edits with ground truth values'
72 | python -u create_clusters.py --model distmult --data FB15k-237 --num-clusters 200
73 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --num-clusters 200 --rand-run 1
74 | python -u wrangle_KG.py com_add_1_distmult_FB15k-237_1_1_1
75 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
76 |
77 |
78 | echo 'Generating composition edits with just worse ranks'
79 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data FB15k-237 --budget 1
80 | python -u wrangle_KG.py com_add_2_distmult_FB15k-237_1_1_1
81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
82 |
83 |
84 | echo 'Generating comoposition edits with cosine distance'
85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data FB15k-237 --budget 1
86 | python -u wrangle_KG.py com_add_3_distmult_FB15k-237_1_1_1
87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
88 |
89 |
90 |
91 |
92 | echo 'Generating edits from IJCAI-19 baseline '
93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
94 | python -u wrangle_KG.py ijcai_add_1_distmult_FB15k-237_1_1_1_20.0
95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_FB15k-237_1_1_1_20.0 --lr 0.005 --input-drop 0.5
96 |
97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
98 | python -u wrangle_KG.py ijcai_add_1_distmult_FB15k-237_1_1_1_5.0
99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_FB15k-237_1_1_1_5.0 --lr 0.005 --input-drop 0.5
100 |
101 |
102 |
103 | echo 'Generating edits from CRIAGE baseline'
104 | python -u wrangle_KG.py target_distmult_FB15k-237_1
105 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5
106 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data FB15k-237
107 | python -u wrangle_KG.py criage_add_1_distmult_FB15k-237_1_1_1
108 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/KGEAttack/distmult_WN18.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18 --lr 0.01 --num-batches 50 #this can be used
8 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18 --lr 0.01 --num-batches 50 --input-drop 0.0
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_distmult_WN18_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data WN18 --lr 0.01 --num-batches 50
14 |
15 | # echo 'Re-training the model to compute baseline change in metrics for target set'
16 | python -u wrangle_KG.py target_distmult_WN18_1
17 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_WN18_1 --lr 0.01 --num-batches 50
18 |
19 |
20 | echo 'Generating random edits for the neighbourhood'
21 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data WN18 --budget 1 --rand-run 1
22 | python -u wrangle_KG.py rand_add_n_distmult_WN18_1_1_1
23 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
24 |
25 |
26 | echo 'Generating global random edits with 1 edit'
27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18 --budget 1 --rand-run 1
28 | python -u wrangle_KG.py rand_add_g_distmult_WN18_1_1_1
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
30 |
31 | echo 'Generating global random edits with 2 edits'
32 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18 --budget 2 --rand-run 1
33 | python -u wrangle_KG.py rand_add_g_distmult_WN18_1_2_1
34 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18_1_2_1 --lr 0.01 --num-batches 50
35 |
36 |
37 | echo 'Generating symmetry edits with ground truth minimum'
38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data WN18 --budget 1
39 | python -u wrangle_KG.py sym_add_1_distmult_WN18_1_1_1
40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
41 |
42 | echo 'Generating symmetry edits with worse ranks'
43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data WN18 --budget 1
44 | python -u wrangle_KG.py sym_add_2_distmult_WN18_1_1_1
45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
46 |
47 |
48 | echo 'Generating symmetry edits with cosine distance'
49 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data WN18 --budget 1
50 | python -u wrangle_KG.py sym_add_3_distmult_WN18_1_1_1
51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
52 |
53 |
54 | echo 'Generating inversion edits with ground truth minimum'
55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data WN18 --budget 1
56 | python -u wrangle_KG.py inv_add_1_distmult_WN18_1_1_1
57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
58 |
59 |
60 | echo 'Generating inversion edits with worse ranks'
61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data WN18 --budget 1
62 | python -u wrangle_KG.py inv_add_2_distmult_WN18_1_1_1
63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
64 |
65 |
66 | echo 'Generating inversion edits with cosine distance'
67 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data WN18 --budget 1
68 | python -u wrangle_KG.py inv_add_3_distmult_WN18_1_1_1
69 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
70 |
71 |
72 |
73 | echo 'Generating composition edits with ground truth values'
74 | python -u create_clusters.py --model distmult --data WN18 --num-clusters 300
75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data WN18 --budget 1 --num-clusters 300 --rand-run 1
76 | python -u wrangle_KG.py com_add_1_distmult_WN18_1_1_1
77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
78 |
79 |
80 | echo 'Generating composition edits with just worse ranks'
81 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data WN18 --budget 1
82 | python -u wrangle_KG.py com_add_2_distmult_WN18_1_1_1
83 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
84 |
85 | echo 'Generating composition edits with cosine distance'
86 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data WN18 --budget 1
87 | python -u wrangle_KG.py com_add_3_distmult_WN18_1_1_1
88 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
89 |
90 |
91 |
92 | echo 'Generating edits from IJCAI-19 baseline '
93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
94 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18_1_1_1_20.0
95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18_1_1_1_20.0 --lr 0.01 --num-batches 50
96 |
97 |
98 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
99 | # python -u wrangle_KG.py ijcai_add_1_distmult_WN18_1_1_1_5.0
100 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18_1_1_1_5.0 --lr 0.01 --num-batches 50
101 |
102 |
103 | echo 'Generating edits from criage baseline '
104 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data WN18 --lr 0.01 --num-batches 50
105 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data WN18
106 | python -u wrangle_KG.py criage_add_1_distmult_WN18_1_1_1
107 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/KGEAttack/distmult_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18RR --lr 0.01 --num-batches 50
8 |
9 | echo 'Selecting target triples'
10 | mkdir data/target_distmult_WN18RR_1
11 |
12 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data WN18RR --lr 0.01 --num-batches 50
13 |
14 | echo 'Re-training the model to compute baseline change in metrics for target set'
15 | python -u wrangle_KG.py target_distmult_WN18RR_1
16 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_WN18RR_1 --lr 0.01 --num-batches 50
17 |
18 |
19 | echo 'Generating random edits for the neighbourhood'
20 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data WN18RR --budget 1 --rand-run 1
21 | python -u wrangle_KG.py rand_add_n_distmult_WN18RR_1_1_1
22 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
23 |
24 |
25 | echo 'Generating global random edits with 1 edit'
26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18RR --budget 1 --rand-run 1
27 | python -u wrangle_KG.py rand_add_g_distmult_WN18RR_1_1_1
28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
29 |
30 | echo 'Generating global random edits with 2 edits'
31 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18RR --budget 2 --rand-run 1
32 | python -u wrangle_KG.py rand_add_g_distmult_WN18RR_1_2_1
33 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18RR_1_2_1 --lr 0.01 --num-batches 50
34 |
35 |
36 | echo 'Generating symmetry edits with ground truth minimum'
37 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data WN18RR --budget 1
38 | python -u wrangle_KG.py sym_add_1_distmult_WN18RR_1_1_1
39 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
40 |
41 | echo 'Generating symmetry edits with worse ranks'
42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data WN18RR --budget 1
43 | python -u wrangle_KG.py sym_add_2_distmult_WN18RR_1_1_1
44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
45 |
46 |
47 | echo 'Generating symmetry edits with cosine distance'
48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data WN18RR --budget 1
49 | python -u wrangle_KG.py sym_add_3_distmult_WN18RR_1_1_1
50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
51 |
52 |
53 | echo 'Generating inversion edits with ground truth minimum'
54 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data WN18RR --budget 1
55 | python -u wrangle_KG.py inv_add_1_distmult_WN18RR_1_1_1
56 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
57 |
58 |
59 | echo 'Generating inversion edits with worse ranks'
60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data WN18RR --budget 1
61 | python -u wrangle_KG.py inv_add_2_distmult_WN18RR_1_1_1
62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
63 |
64 |
65 | echo 'Generating inversion edits with cosine distance'
66 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data WN18RR --budget 1
67 | python -u wrangle_KG.py inv_add_3_distmult_WN18RR_1_1_1
68 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
69 |
70 |
71 |
72 | echo 'Generating composition edits with ground truth values'
73 | python -u create_clusters.py --model distmult --data WN18RR --num-clusters 300
74 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data WN18RR --budget 1 --num-clusters 300 --rand-run 1
75 | python -u wrangle_KG.py com_add_1_distmult_WN18RR_1_1_1
76 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
77 |
78 |
79 | echo 'Generating composition edits with just worse ranks'
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data WN18RR --budget 1
81 | python -u wrangle_KG.py com_add_2_distmult_WN18RR_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
83 |
84 | echo 'Generating composition edits with cosine distance'
85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data WN18RR --budget 1
86 | python -u wrangle_KG.py com_add_3_distmult_WN18RR_1_1_1
87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
88 |
89 |
90 |
91 | echo 'Generating edits from IJCAI-19 baseline '
92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
93 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18RR_1_1_1_20.0
94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18RR_1_1_1_20.0 --lr 0.01 --num-batches 50
95 |
96 |
97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
98 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18RR_1_1_1_5.0
99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18RR_1_1_1_5.0 --lr 0.01 --num-batches 50
100 |
101 |
102 | echo 'Generating edits from criage baseline '
103 | python -u wrangle_KG.py target_distmult_WN18RR_1
104 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data WN18RR --lr 0.01 --num-batches 50
105 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data WN18RR
106 | python -u wrangle_KG.py criage_add_1_distmult_WN18RR_1_1_1
107 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/KGEAttack/grad_add_attack_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 DistMult'
6 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric dot
7 | python -u wrangle_KG.py grad_add_dot_distmult_FB15k-237_1_1_1
8 | python -u main.py --model distmult --data grad_add_dot_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
9 |
10 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 DistMult'
11 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric cos
12 | python -u wrangle_KG.py grad_add_cos_distmult_FB15k-237_1_1_1
13 | python -u main.py --model distmult --data grad_add_cos_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
14 |
15 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 DistMult'
16 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric l2
17 | python -u wrangle_KG.py grad_add_l2_distmult_FB15k-237_1_1_1
18 | python -u main.py --model distmult --data grad_add_l2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
19 |
20 | # ####################################################################################################################################
21 |
22 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 ComplEx'
23 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric dot
24 | python -u wrangle_KG.py grad_add_dot_complex_FB15k-237_1_1_1
25 | python -u main.py --model complex --data grad_add_dot_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
26 |
27 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 ComplEx'
28 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric cos
29 | python -u wrangle_KG.py grad_add_cos_complex_FB15k-237_1_1_1
30 | python -u main.py --model complex --data grad_add_cos_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
31 |
32 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 ComplEx'
33 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric l2
34 | python -u wrangle_KG.py grad_add_l2_complex_FB15k-237_1_1_1
35 | python -u main.py --model complex --data grad_add_l2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
36 |
37 | # ####################################################################################################################################
38 |
39 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 ConvE'
40 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric dot
41 | python -u wrangle_KG.py grad_add_dot_conve_FB15k-237_1_1_1
42 | python -u main.py --model conve --data grad_add_dot_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
43 |
44 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 ConvE'
45 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric cos
46 | python -u wrangle_KG.py grad_add_cos_conve_FB15k-237_1_1_1
47 | python -u main.py --model conve --data grad_add_cos_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
48 |
49 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 ConvE'
50 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric l2
51 | python -u wrangle_KG.py grad_add_l2_conve_FB15k-237_1_1_1
52 | python -u main.py --model conve --data grad_add_l2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
53 |
54 | # ####################################################################################################################################
55 |
56 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 TransE'
57 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric dot
58 | python -u wrangle_KG.py grad_add_dot_transe_FB15k-237_1_1_1
59 | python -u main.py --model transe --data grad_add_dot_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
60 |
61 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 TransE'
62 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric cos
63 | python -u wrangle_KG.py grad_add_cos_transe_FB15k-237_1_1_1
64 | python -u main.py --model transe --data grad_add_cos_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
65 |
66 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 TransE'
67 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric l2
68 | python -u wrangle_KG.py grad_add_l2_transe_FB15k-237_1_1_1
69 | python -u main.py --model transe --data grad_add_l2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/KGEAttack/grad_add_attack_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Generating gradient attribution edits with dot similarity : WN18RR DistMult'
6 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric dot
7 | python -u wrangle_KG.py grad_add_dot_distmult_WN18RR_1_1_1
8 | python -u main.py --model distmult --data grad_add_dot_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
9 |
10 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR DistMult'
11 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric cos
12 | python -u wrangle_KG.py grad_add_cos_distmult_WN18RR_1_1_1
13 | python -u main.py --model distmult --data grad_add_cos_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
14 |
15 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR DistMult'
16 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric l2
17 | python -u wrangle_KG.py grad_add_l2_distmult_WN18RR_1_1_1
18 | python -u main.py --model distmult --data grad_add_l2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
19 |
20 | # ####################################################################################################################################
21 |
22 | echo 'Generating gradient attribution edits with dot similarity : WN18RR ComplEx'
23 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric dot
24 | python -u wrangle_KG.py grad_add_dot_complex_WN18RR_1_1_1
25 | python -u main.py --model complex --data grad_add_dot_complex_WN18RR_1_1_1 --lr 0.01
26 |
27 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR ComplEx'
28 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric cos
29 | python -u wrangle_KG.py grad_add_cos_complex_WN18RR_1_1_1
30 | python -u main.py --model complex --data grad_add_cos_complex_WN18RR_1_1_1 --lr 0.01
31 |
32 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR ComplEx'
33 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric l2
34 | python -u wrangle_KG.py grad_add_l2_complex_WN18RR_1_1_1
35 | python -u main.py --model complex --data grad_add_l2_complex_WN18RR_1_1_1 --lr 0.01
36 |
37 | # ####################################################################################################################################
38 |
39 | echo 'Generating gradient attribution edits with dot similarity : WN18RR ConvE'
40 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric dot
41 | python -u wrangle_KG.py grad_add_dot_conve_WN18RR_1_1_1
42 | python -u main.py --model conve --data grad_add_dot_conve_WN18RR_1_1_1 --lr 0.001
43 |
44 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR ConvE'
45 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric cos
46 | python -u wrangle_KG.py grad_add_cos_conve_WN18RR_1_1_1
47 | python -u main.py --model conve --data grad_add_cos_conve_WN18RR_1_1_1 --lr 0.001
48 |
49 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR ConvE'
50 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric l2
51 | python -u wrangle_KG.py grad_add_l2_conve_WN18RR_1_1_1
52 | python -u main.py --model conve --data grad_add_l2_conve_WN18RR_1_1_1 --lr 0.001
53 |
54 | # ####################################################################################################################################
55 |
56 | echo 'Generating gradient attribution edits with dot similarity : WN18RR TransE'
57 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric dot
58 | python -u wrangle_KG.py grad_add_dot_transe_WN18RR_1_1_1
59 | python -u main.py --model transe --data grad_add_dot_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
60 |
61 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR TransE'
62 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric cos
63 | python -u wrangle_KG.py grad_add_cos_transe_WN18RR_1_1_1
64 | python -u main.py --model transe --data grad_add_cos_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
65 |
66 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR TransE'
67 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric l2
68 | python -u wrangle_KG.py grad_add_l2_transe_WN18RR_1_1_1
69 | python -u main.py --model transe --data grad_add_l2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/KGEAttack/inst_add_attack_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 DistMult'
6 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric dot
7 | python -u wrangle_KG.py inst_add_dot_distmult_FB15k-237_1_1_1
8 | python -u main.py --model distmult --data inst_add_dot_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
9 |
10 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 DistMult'
11 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric cos
12 | python -u wrangle_KG.py inst_add_cos_distmult_FB15k-237_1_1_1
13 | python -u main.py --model distmult --data inst_add_cos_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
14 |
15 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 DistMult'
16 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric l2
17 | python -u wrangle_KG.py inst_add_l2_distmult_FB15k-237_1_1_1
18 | python -u main.py --model distmult --data inst_add_l2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
19 |
20 | # ####################################################################################################################################
21 |
22 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 ComplEx'
23 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric dot
24 | python -u wrangle_KG.py inst_add_dot_complex_FB15k-237_1_1_1
25 | python -u main.py --model complex --data inst_add_dot_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
26 |
27 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 ComplEx'
28 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric cos
29 | python -u wrangle_KG.py inst_add_cos_complex_FB15k-237_1_1_1
30 | python -u main.py --model complex --data inst_add_cos_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
31 |
32 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 ComplEx'
33 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric l2
34 | python -u wrangle_KG.py inst_add_l2_complex_FB15k-237_1_1_1
35 | python -u main.py --model complex --data inst_add_l2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5
36 |
37 | # ####################################################################################################################################
38 |
39 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 ConvE'
40 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric dot
41 | python -u wrangle_KG.py inst_add_dot_conve_FB15k-237_1_1_1
42 | python -u main.py --model conve --data inst_add_dot_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
43 |
44 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 ConvE'
45 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric cos
46 | python -u wrangle_KG.py inst_add_cos_conve_FB15k-237_1_1_1
47 | python -u main.py --model conve --data inst_add_cos_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
48 |
49 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 ConvE'
50 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric l2
51 | python -u wrangle_KG.py inst_add_l2_conve_FB15k-237_1_1_1
52 | python -u main.py --model conve --data inst_add_l2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5
53 |
54 | # ####################################################################################################################################
55 |
56 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 TransE'
57 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric dot
58 | python -u wrangle_KG.py inst_add_dot_transe_FB15k-237_1_1_1
59 | python -u main.py --model transe --data inst_add_dot_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
60 |
61 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 TransE'
62 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric cos
63 | python -u wrangle_KG.py inst_add_cos_transe_FB15k-237_1_1_1
64 | python -u main.py --model transe --data inst_add_cos_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
65 |
66 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 TransE'
67 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric l2
68 | python -u wrangle_KG.py inst_add_l2_transe_FB15k-237_1_1_1
69 | python -u main.py --model transe --data inst_add_l2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/KGEAttack/inst_add_attack_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo 'Generating instance attribution edits with dot similarity : WN18RR DistMult'
6 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric dot
7 | python -u wrangle_KG.py inst_add_dot_distmult_WN18RR_1_1_1
8 | python -u main.py --model distmult --data inst_add_dot_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
9 |
10 | echo 'Generating instance attribution edits with cosine similarity : WN18RR DistMult'
11 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric cos
12 | python -u wrangle_KG.py inst_add_cos_distmult_WN18RR_1_1_1
13 | python -u main.py --model distmult --data inst_add_cos_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
14 |
15 | echo 'Generating instance attribution edits with l2 similarity : WN18RR DistMult'
16 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric l2
17 | python -u wrangle_KG.py inst_add_l2_distmult_WN18RR_1_1_1
18 | python -u main.py --model distmult --data inst_add_l2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50
19 |
20 | # ####################################################################################################################################
21 |
22 | echo 'Generating instance attribution edits with dot similarity : WN18RR ComplEx'
23 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric dot
24 | python -u wrangle_KG.py inst_add_dot_complex_WN18RR_1_1_1
25 | python -u main.py --model complex --data inst_add_dot_complex_WN18RR_1_1_1 --lr 0.01
26 |
27 | echo 'Generating instance attribution edits with cosine similarity : WN18RR ComplEx'
28 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric cos
29 | python -u wrangle_KG.py inst_add_cos_complex_WN18RR_1_1_1
30 | python -u main.py --model complex --data inst_add_cos_complex_WN18RR_1_1_1 --lr 0.01
31 |
32 | echo 'Generating instance attribution edits with l2 similarity : WN18RR ComplEx'
33 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric l2
34 | python -u wrangle_KG.py inst_add_l2_complex_WN18RR_1_1_1
35 | python -u main.py --model complex --data inst_add_l2_complex_WN18RR_1_1_1 --lr 0.01
36 |
37 | # ####################################################################################################################################
38 |
39 | echo 'Generating instance attribution edits with dot similarity : WN18RR ConvE'
40 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric dot
41 | python -u wrangle_KG.py inst_add_dot_conve_WN18RR_1_1_1
42 | python -u main.py --model conve --data inst_add_dot_conve_WN18RR_1_1_1 --lr 0.001
43 |
44 | echo 'Generating instance attribution edits with cosine similarity : WN18RR ConvE'
45 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric cos
46 | python -u wrangle_KG.py inst_add_cos_conve_WN18RR_1_1_1
47 | python -u main.py --model conve --data inst_add_cos_conve_WN18RR_1_1_1 --lr 0.001
48 |
49 | echo 'Generating instance attribution edits with l2 similarity : WN18RR ConvE'
50 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric l2
51 | python -u wrangle_KG.py inst_add_l2_conve_WN18RR_1_1_1
52 | python -u main.py --model conve --data inst_add_l2_conve_WN18RR_1_1_1 --lr 0.001
53 |
54 | # ####################################################################################################################################
55 |
56 | echo 'Generating instance attribution edits with dot similarity : WN18RR TransE'
57 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric dot
58 | python -u wrangle_KG.py inst_add_dot_transe_WN18RR_1_1_1
59 | python -u main.py --model transe --data inst_add_dot_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
60 |
61 | echo 'Generating instance attribution edits with cosine similarity : WN18RR TransE'
62 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric cos
63 | python -u wrangle_KG.py inst_add_cos_transe_WN18RR_1_1_1
64 | python -u main.py --model transe --data inst_add_cos_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
65 |
66 | echo 'Generating instance attribution edits with l2 similarity : WN18RR TransE'
67 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric l2
68 | python -u wrangle_KG.py inst_add_l2_transe_WN18RR_1_1_1
69 | python -u main.py --model transe --data inst_add_l2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/KGEAttack/preprocess.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | echo "Adding necessary directories"
6 | mkdir saved_models results losses logs clusters
7 | mkdir saved_models/criage_inverter
8 | mkdir logs/attack_logs
9 | mkdir logs/attack_logs/criage_add_1 logs/attack_logs/ijcai_add_1 logs/attack_logs/criage_inverter
10 | mkdir logs/attack_logs/rand_add_{n,g} logs/attack_logs/sym_add_{1,2,3} logs/attack_logs/inv_add_{1,2,3} logs/attack_logs/com_add_{1,2,3}
11 | mkdir logs/attack_logs/inst_add_{cos,dot,l2} logs/attack_logs/grad_add_{cos,dot,l2}
12 |
13 |
14 | echo "Extracting original data.... "
15 | mkdir data/WN18RR_original
16 | mkdir data/FB15k-237_original
17 |
18 | tar -xvf WN18RR.tar.gz -C data/WN18RR_original
19 | tar -xvf FB15k-237.tar.gz -C data/FB15k-237_original
20 |
21 | echo "Preprocessing... "
22 | python -u preprocess.py WN18RR
23 | python -u preprocess.py FB15k-237
24 |
25 | echo "Wrangling to generate training set and eval filters... "
26 | python -u wrangle_KG.py WN18RR
27 | python -u wrangle_KG.py FB15k-237
--------------------------------------------------------------------------------
/KGEAttack/transe_FB15k-237.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data FB15k-237 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
9 |
10 | echo 'Selecting target triples'
11 | mkdir data/target_transe_FB15k-237_1
12 |
13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data FB15k-237 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
14 |
15 |
16 |
17 | echo 'Generating random edits for the neighbourhood'
18 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data FB15k-237 --budget 1 --rand-run 1
19 | python -u wrangle_KG.py rand_add_n_transe_FB15k-237_1_1_1
20 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
21 |
22 | echo 'Generating global random edits with 1 edit'
23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data FB15k-237 --budget 1 --rand-run 1
24 | python -u wrangle_KG.py rand_add_g_transe_FB15k-237_1_1_1
25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
26 |
27 | echo 'Generating global random edits with 2 edits'
28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data FB15k-237 --budget 2 --rand-run 1
29 | python -u wrangle_KG.py rand_add_g_transe_FB15k-237_1_2_1
30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_FB15k-237_1_2_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
31 |
32 |
33 |
34 |
35 | echo 'Generating symmetry edits with ground truth minimum'
36 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data FB15k-237 --budget 1
37 | python -u wrangle_KG.py sym_add_1_transe_FB15k-237_1_1_1
38 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
39 |
40 |
41 | echo 'Generating symmetry edits with cosine distance'
42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data FB15k-237 --budget 1
43 | python -u wrangle_KG.py sym_add_2_transe_FB15k-237_1_1_1
44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
45 |
46 |
47 | echo 'Generating symmetry edits with worse ranks'
48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data FB15k-237 --budget 1
49 | python -u wrangle_KG.py sym_add_3_transe_FB15k-237_1_1_1
50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
51 |
52 |
53 |
54 |
55 |
56 | echo 'Generating inversion edits with ground truth minimum'
57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data FB15k-237 --budget 1
58 | python -u wrangle_KG.py inv_add_1_transe_FB15k-237_1_1_1
59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
60 |
61 |
62 | echo 'Generating inversion edits with cosine distance'
63 |
64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data FB15k-237 --budget 1
65 | python -u wrangle_KG.py inv_add_2_transe_FB15k-237_1_1_1
66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
67 |
68 |
69 | echo 'Generating inversion edits with worse ranks'
70 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data FB15k-237 --budget 1
71 | python -u wrangle_KG.py inv_add_3_transe_FB15k-237_1_1_1
72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
73 |
74 |
75 |
76 |
77 |
78 | echo 'Generating composition edits with ground truth values'
79 | python -u create_clusters.py --model transe --data FB15k-237 --num-clusters 100
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data FB15k-237 --budget 1 --num-clusters 100 --rand-run 1
81 | python -u wrangle_KG.py com_add_1_transe_FB15k-237_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
83 |
84 |
85 | echo 'Generating composition attack with just worse ranks '
86 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data FB15k-237 --budget 1
87 | python -u wrangle_KG.py com_add_2_transe_FB15k-237_1_1_1
88 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
89 |
90 | echo 'Generating composition attack with cosine distance '
91 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data FB15k-237 --budget 1
92 | python -u wrangle_KG.py com_add_3_transe_FB15k-237_1_1_1
93 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
94 |
95 |
96 |
97 | echo 'Generating edits from IJCAI-19 baseline '
98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data FB15k-237 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
99 | python -u wrangle_KG.py ijcai_add_1_transe_FB15k-237_1_1_1_20.0
100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_FB15k-237_1_1_1_20.0 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
101 |
102 |
103 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
104 | python -u wrangle_KG.py ijcai_add_1_transe_FB15k-237_1_1_1_5.0
105 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_FB15k-237_1_1_1_5.0 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
--------------------------------------------------------------------------------
/KGEAttack/transe_WN18.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 |
9 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
10 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.0005 --input-drop 0.0 --transe-margin 0.0 --transe-norm 1 --num-batches 1000 --epochs 200 --reg-weight 1e-6 --embedding-dim 150
11 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
12 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
13 |
14 | echo 'Selecting target triples'
15 | mkdir data/target_transe_WN18_1
16 |
17 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
18 |
19 | # echo 'Re-training the model to compute baseline change in metrics for target set'
20 | # python -u wrangle_KG.py target_transe_WN18_1
21 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data target_transe_WN18_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
22 |
23 |
24 | echo 'Generating random edits for the neighbourhood'
25 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data WN18 --budget 1 --rand-run 1
26 | python -u wrangle_KG.py rand_add_n_transe_WN18_1_1_1
27 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
28 |
29 | echo 'Generating global random edits with 1 edit'
30 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18 --budget 1 --rand-run 1
31 | python -u wrangle_KG.py rand_add_g_transe_WN18_1_1_1
32 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
33 |
34 | echo 'Generating global random edits with 2 edits'
35 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18 --budget 2 --rand-run 1
36 | python -u wrangle_KG.py rand_add_g_transe_WN18_1_2_1
37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18_1_2_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
38 |
39 |
40 |
41 | echo 'Generating symmetry edits with ground truth minimum'
42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data WN18 --budget 1
43 | python -u wrangle_KG.py sym_add_1_transe_WN18_1_1_1
44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
45 |
46 | echo 'Generating symmetry edits with worse ranks'
47 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data WN18 --budget 1
48 | python -u wrangle_KG.py sym_add_2_transe_WN18_1_1_1
49 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
50 |
51 | echo 'Generating symmetry edits with worse ranks'
52 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data WN18 --budget 1
53 | python -u wrangle_KG.py sym_add_3_transe_WN18_1_1_1
54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
55 |
56 |
57 |
58 |
59 |
60 | echo 'Generating inversion edits with ground truth minimum'
61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data WN18 --budget 1
62 | python -u wrangle_KG.py inv_add_1_transe_WN18_1_1_1
63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
64 |
65 | echo 'Generating inversion edits with worse ranks'
66 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data WN18 --budget 1
67 | python -u wrangle_KG.py inv_add_2_transe_WN18_1_1_1
68 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
69 |
70 | echo 'Generating inversion edits with cosine distance'
71 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data WN18 --budget 1
72 | python -u wrangle_KG.py inv_add_3_transe_WN18_1_1_1
73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
74 |
75 |
76 |
77 |
78 |
79 |
80 | echo 'Generating composition edits with ground truth values'
81 | python -u create_clusters.py --model transe --data WN18 --num-clusters 50
82 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data WN18 --budget 1 --num-clusters 50 --rand-run 1
83 | python -u wrangle_KG.py com_add_1_transe_WN18_1_1_1
84 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
85 |
86 | echo 'Generating composition edits with just worse ranks '
87 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data WN18 --budget 1
88 | python -u wrangle_KG.py com_add_2_transe_WN18_1_1_1
89 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
90 |
91 | echo 'Generating composition edits with cosine distance '
92 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data WN18 --budget 1
93 | python -u wrangle_KG.py com_add_3_transe_WN18_1_1_1
94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
95 |
96 |
97 |
98 |
99 | echo 'Generating edits from IJCAI-19 baseline '
100 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
101 | python -u wrangle_KG.py ijcai_add_1_transe_WN18_1_1_1_20.0
102 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18_1_1_1_20.0 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
103 |
104 |
105 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
106 | # python -u wrangle_KG.py ijcai_add_1_transe_WN18_1_1_1_5.0
107 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18_1_1_1_5.0 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12
108 |
109 |
110 |
111 |
112 |
113 |
--------------------------------------------------------------------------------
/KGEAttack/transe_WN18RR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | cd ConvE
4 |
5 | # train the original model
6 | echo 'Training original model'
7 |
8 |
9 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18RR --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
10 |
11 |
12 | echo 'Selecting target triples'
13 | mkdir data/target_transe_WN18RR_1
14 |
15 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data WN18RR --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
16 |
17 | echo 'Re-training the model to compute baseline change in metrics for target set'
18 | python -u wrangle_KG.py target_transe_WN18RR_1
19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data target_transe_WN18RR_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
20 |
21 |
22 | echo 'Generating random edits for the neighbourhood'
23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data WN18RR --budget 1 --rand-run 1
24 | python -u wrangle_KG.py rand_add_n_transe_WN18RR_1_1_1
25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
26 |
27 | echo 'Generating global random edits with 1 edit'
28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18RR --budget 1 --rand-run 1
29 | python -u wrangle_KG.py rand_add_g_transe_WN18RR_1_1_1
30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
31 |
32 | echo 'Generating global random edits with 2 edits'
33 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18RR --budget 2 --rand-run 1
34 | python -u wrangle_KG.py rand_add_g_transe_WN18RR_1_2_1
35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18RR_1_2_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
36 |
37 |
38 |
39 | echo 'Generating symmetry edits with ground truth minimum'
40 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data WN18RR --budget 1
41 | python -u wrangle_KG.py sym_add_1_transe_WN18RR_1_1_1
42 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
43 |
44 | echo 'Generating symmetry edits with worse ranks'
45 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data WN18RR --budget 1
46 | python -u wrangle_KG.py sym_add_2_transe_WN18RR_1_1_1
47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
48 |
49 | echo 'Generating symmetry edits with worse ranks'
50 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data WN18RR --budget 1
51 | python -u wrangle_KG.py sym_add_3_transe_WN18RR_1_1_1
52 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
53 |
54 |
55 |
56 |
57 |
58 | echo 'Generating inversion edits with ground truth minimum'
59 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data WN18RR --budget 1
60 | python -u wrangle_KG.py inv_add_1_transe_WN18RR_1_1_1
61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
62 |
63 | echo 'Generating inversion edits with worse ranks'
64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data WN18RR --budget 1
65 | python -u wrangle_KG.py inv_add_2_transe_WN18RR_1_1_1
66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
67 |
68 | echo 'Generating inversion edits with cosine distance'
69 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data WN18RR --budget 1
70 | python -u wrangle_KG.py inv_add_3_transe_WN18RR_1_1_1
71 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
72 |
73 |
74 |
75 |
76 |
77 |
78 | echo 'Generating composition edits with ground truth values'
79 | python -u create_clusters.py --model transe --data WN18RR --num-clusters 50
80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data WN18RR --budget 1 --num-clusters 50 --rand-run 1
81 | python -u wrangle_KG.py com_add_1_transe_WN18RR_1_1_1
82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
83 |
84 | echo 'Generating composition edits with just worse ranks '
85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data WN18RR --budget 1
86 | python -u wrangle_KG.py com_add_2_transe_WN18RR_1_1_1
87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
88 |
89 | echo 'Generating composition edits with cosine distance '
90 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data WN18RR --budget 1
91 | python -u wrangle_KG.py com_add_3_transe_WN18RR_1_1_1
92 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
93 |
94 |
95 |
96 |
97 | echo 'Generating edits from IJCAI-19 baseline '
98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu
99 | python -u wrangle_KG.py ijcai_add_1_transe_WN18RR_1_1_1_20.0
100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18RR_1_1_1_20.0 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
101 |
102 |
103 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu
104 | python -u wrangle_KG.py ijcai_add_1_transe_WN18RR_1_1_1_5.0
105 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18RR_1_1_1_5.0 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Peru Bhardwaj
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | Poisoning Knowledge Graph Embeddings
via Relation Inference Patterns
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | This is the code repository to accompany the ACL 2021 paper on poisoning attacks on KGE models.
19 | The work is a part of my PhD study at Trinity College Dublin and is funded by Accenture Labs and ADAPT Centre.
20 | For any questions or feedback, add an issue or email me at: bhardwap at tcd dot ie
21 |
22 | ### Related Publications
23 | 1. Adversarial Attacks on Knowledge Graph Embeddings via Instance Attribution Methods (EMNLP 2021) - Paper, Codebase
24 | 2. Adversarial Robustness of Representation Learning for Knowledge Graphs (PhD Thesis) - Link
25 |
26 | ## Overview
27 | 
28 | The figure illustrates the composition based adversarial attack on missing link prediction for fraud detection. The knowledge graph consists of two types of entities - `Person` and `BankAccount`. The target triple to predict is `(Karl, affiliated with, Joe the mobster)`. Original KGE model predicts this triple as True, i.e. assigns it a higher rank than the synthetic negative triples. But a malicious attacker can add adversarial triples (in purple) that connect `Karl` with a non-suspicious person `Bob` through composition pattern. Now, the KGE model predicts the target triple as False.
29 |
30 | Thus, the proposed adversarial attacks are based on a reformulation of the problem of poisoning attacks on KGE models for missing link prediction. Instead of degrading the rank of a target triple directly, the attacker aims to improve the rank of a *decoy triple*. To do so, they exploit the inductive abilities of KGE models which are expressed through connectivity patterns like symmetry, inversion or composition. This problem reformulation for poisoning attacks also helps to understand the behaviour of KGE models because the extent of effectiveness of the attack relying on an inference pattern indicates the KGE model's sensitivity to that inference pattern.
31 |
32 | ## Reproducing the results
33 |
34 | ### Setup
35 | - python = 3.8.5
36 | - pytorch = 1.4.0
37 | - numpy = 1.19.1
38 | - jupyter = 1.0.0
39 | - pandas = 1.1.0
40 | - matplotlib = 3.2.2
41 | - scikit-learn = 0.23.2
42 | - seaborn = 0.11.0
43 |
44 | Experiments reported in the paper were run in the conda environment `inference_attack.yml`
45 |
46 |
47 | ### Usage
48 | - The codebase and the bash scripts used for experiments are in `KGEAttack`
49 | - To add the necessary directories and preprocess the original datasets, use the bash script `preprocess.sh`
50 | - For each model-dataset combination, there is a bash script to train the original model, generate attacks from baselines and proposed attacks; and train the poisoned model. These scripts are named as `model-dataset.sh`
51 | - The instructions in these scripts are grouped together under the echo statements which indicate what they do.
52 | - The hyperparameters in bash scripts are the ones used for the experiments reported in the paper.
53 | - The metrics on decoy triples can be computed by the script `compute_decoy_metrics_WN18RR.sh` or `compute_decoy_metrics_FB15k-237.sh`
54 | - To reproduce the results, specific instructions from the bash scripts can be run individually on the commandline or the full script can be run.
55 | - All experiments in the paper were run on a shared HPC cluster that had Nvidia RTX 2080ti, Tesla K40 and V100 GPUs.
56 |
57 |
58 | ## References
59 | Parts of this codebase are based on the code from following repositories
60 | - [ConvE](https://github.com/TimDettmers/ConvE)
61 | - [CRIAGE](https://github.com/pouyapez/criage)
62 | - [KGC Re-evalaution](https://github.com/svjan5/kg-reeval)
63 | - [ComplEx-N3](https://github.com/facebookresearch/kbc)
64 |
65 |
66 | ## Citation
67 |
68 | ```bibtex
69 | @inproceedings{bhardwaj-etal-2021-poisoning,
70 | title = "Poisoning Knowledge Graph Embeddings via Relation Inference Patterns",
71 | author = "Bhardwaj, Peru and
72 | Kelleher, John and
73 | Costabello, Luca and
74 | O{'}Sullivan, Declan",
75 | booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
76 | month = aug,
77 | year = "2021",
78 | address = "Online",
79 | publisher = "Association for Computational Linguistics",
80 | url = "https://aclanthology.org/2021.acl-long.147",
81 | pages = "1875--1888"
82 | }
83 | ```
84 |
--------------------------------------------------------------------------------
/inference_attack.yml:
--------------------------------------------------------------------------------
1 | name: inference_attack
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - _pytorch_select=0.2=gpu_0
7 | - argon2-cffi=20.1.0=py38h7b6447c_1
8 | - async_generator=1.10=pyhd3eb1b0_0
9 | - attrs=20.3.0=pyhd3eb1b0_0
10 | - backcall=0.2.0=pyhd3eb1b0_0
11 | - blas=1.0=mkl
12 | - bleach=3.2.3=pyhd3eb1b0_0
13 | - ca-certificates=2021.1.19=h06a4308_0
14 | - certifi=2020.12.5=py38h06a4308_0
15 | - cffi=1.14.4=py38h261ae71_0
16 | - cudatoolkit=10.1.243=h6bb024c_0
17 | - cudnn=7.6.5=cuda10.1_0
18 | - cycler=0.10.0=py38_0
19 | - dbus=1.13.18=hb2f20db_0
20 | - decorator=4.4.2=pyhd3eb1b0_0
21 | - defusedxml=0.6.0=py_0
22 | - entrypoints=0.3=py38_0
23 | - expat=2.2.10=he6710b0_2
24 | - fontconfig=2.13.0=h9420a91_0
25 | - freetype=2.10.4=h5ab3b9f_0
26 | - glib=2.66.1=h92f7085_0
27 | - gst-plugins-base=1.14.0=h8213a91_2
28 | - gstreamer=1.14.0=h28cd5cc_2
29 | - icu=58.2=he6710b0_3
30 | - importlib-metadata=2.0.0=py_1
31 | - importlib_metadata=2.0.0=1
32 | - intel-openmp=2020.2=254
33 | - ipykernel=5.3.4=py38h5ca1d4c_0
34 | - ipython=7.19.0=py38hb070fc8_1
35 | - ipython_genutils=0.2.0=pyhd3eb1b0_1
36 | - ipywidgets=7.6.3=pyhd3eb1b0_1
37 | - jedi=0.17.0=py38_0
38 | - jinja2=2.11.2=pyhd3eb1b0_0
39 | - joblib=1.0.0=pyhd3eb1b0_0
40 | - jpeg=9b=h024ee3a_2
41 | - jsonschema=3.2.0=py_2
42 | - jupyter=1.0.0=py38_7
43 | - jupyter_client=6.1.7=py_0
44 | - jupyter_console=6.2.0=py_0
45 | - jupyter_core=4.7.0=py38h06a4308_0
46 | - jupyterlab_pygments=0.1.2=py_0
47 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
48 | - kiwisolver=1.3.0=py38h2531618_0
49 | - ld_impl_linux-64=2.33.1=h53a641e_7
50 | - libedit=3.1.20191231=h14c3975_1
51 | - libffi=3.3=he6710b0_2
52 | - libgcc-ng=9.1.0=hdf63c60_0
53 | - libgfortran-ng=7.3.0=hdf63c60_0
54 | - libpng=1.6.37=hbc83047_0
55 | - libsodium=1.0.18=h7b6447c_0
56 | - libstdcxx-ng=9.1.0=hdf63c60_0
57 | - libuuid=1.0.3=h1bed415_2
58 | - libxcb=1.14=h7b6447c_0
59 | - libxml2=2.9.10=hb55368b_3
60 | - markupsafe=1.1.1=py38h7b6447c_0
61 | - matplotlib=3.2.2=0
62 | - matplotlib-base=3.2.2=py38hef1b27d_0
63 | - mistune=0.8.4=py38h7b6447c_1000
64 | - mkl=2020.2=256
65 | - mkl-service=2.3.0=py38he904b0f_0
66 | - mkl_fft=1.2.0=py38h23d657b_0
67 | - mkl_random=1.1.1=py38h0573a6f_0
68 | - nbclient=0.5.1=py_0
69 | - nbconvert=6.0.7=py38_0
70 | - nbformat=5.1.2=pyhd3eb1b0_1
71 | - ncurses=6.2=he6710b0_1
72 | - nest-asyncio=1.4.3=pyhd3eb1b0_0
73 | - ninja=1.10.2=py38hff7bd54_0
74 | - notebook=6.2.0=py38h06a4308_0
75 | - numpy=1.19.1=py38hbc911f0_0
76 | - numpy-base=1.19.1=py38hfa32c7d_0
77 | - openssl=1.1.1i=h27cfd23_0
78 | - packaging=20.9=pyhd3eb1b0_0
79 | - pandas=1.1.0=py38he6710b0_0
80 | - pandoc=2.11=hb0f4dca_0
81 | - pandocfilters=1.4.3=py38h06a4308_1
82 | - parso=0.8.1=pyhd3eb1b0_0
83 | - pcre=8.44=he6710b0_0
84 | - pexpect=4.8.0=pyhd3eb1b0_3
85 | - pickleshare=0.7.5=pyhd3eb1b0_1003
86 | - pip=20.3.3=py38h06a4308_0
87 | - prometheus_client=0.9.0=pyhd3eb1b0_0
88 | - prompt-toolkit=3.0.8=py_0
89 | - prompt_toolkit=3.0.8=0
90 | - ptyprocess=0.7.0=pyhd3eb1b0_2
91 | - pycparser=2.20=py_2
92 | - pygments=2.7.4=pyhd3eb1b0_0
93 | - pyparsing=2.4.7=pyhd3eb1b0_0
94 | - pyqt=5.9.2=py38h05f1152_4
95 | - pyrsistent=0.17.3=py38h7b6447c_0
96 | - python=3.8.5=h7579374_1
97 | - python-dateutil=2.8.1=pyhd3eb1b0_0
98 | - pytorch=1.4.0=cuda101py38h02f0884_0
99 | - pytz=2020.5=pyhd3eb1b0_0
100 | - pyzmq=20.0.0=py38h2531618_1
101 | - qt=5.9.7=h5867ecd_1
102 | - qtconsole=4.7.7=py_0
103 | - qtpy=1.9.0=py_0
104 | - readline=8.1=h27cfd23_0
105 | - scikit-learn=0.23.2=py38h0573a6f_0
106 | - scipy=1.5.2=py38h0b6359f_0
107 | - seaborn=0.11.0=py_0
108 | - send2trash=1.5.0=pyhd3eb1b0_1
109 | - setuptools=52.0.0=py38h06a4308_0
110 | - sip=4.19.13=py38he6710b0_0
111 | - six=1.15.0=py38h06a4308_0
112 | - sqlite=3.33.0=h62c20be_0
113 | - terminado=0.9.2=py38h06a4308_0
114 | - testpath=0.4.4=pyhd3eb1b0_0
115 | - threadpoolctl=2.1.0=pyh5ca1d4c_0
116 | - tk=8.6.10=hbc83047_0
117 | - tornado=6.1=py38h27cfd23_0
118 | - traitlets=5.0.5=pyhd3eb1b0_0
119 | - wcwidth=0.2.5=py_0
120 | - webencodings=0.5.1=py38_1
121 | - wheel=0.36.2=pyhd3eb1b0_0
122 | - widgetsnbextension=3.5.1=py38_0
123 | - xz=5.2.5=h7b6447c_0
124 | - zeromq=4.3.3=he6710b0_3
125 | - zipp=3.4.0=pyhd3eb1b0_0
126 | - zlib=1.2.11=h7b6447c_3
127 |
128 |
129 |
--------------------------------------------------------------------------------
/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/overview.jpg
--------------------------------------------------------------------------------