├── .gitignore ├── KGEAttack ├── ConvE │ ├── FB15k-237.tar.gz │ ├── WN18.tar.gz │ ├── WN18RR.tar.gz │ ├── clustering_elbow.ipynb │ ├── com_add_attack_1.py │ ├── com_add_attack_2.py │ ├── com_add_attack_3.py │ ├── create_clusters.py │ ├── criage_add_attack_1.py │ ├── criage_inverter.py │ ├── criage_model.py │ ├── dataset.py │ ├── decoy_test.py │ ├── elbow_plots │ │ ├── FB15k-237_complex.png │ │ ├── FB15k-237_conve.png │ │ ├── FB15k-237_distmult.png │ │ ├── FB15k-237_transe.png │ │ ├── WN18RR_complex.png │ │ ├── WN18RR_conve.png │ │ ├── WN18RR_distmult.png │ │ └── WN18RR_transe.png │ ├── evaluation.py │ ├── grad_add_attack.py │ ├── ijcai_add_attack_1.py │ ├── inst_add_attack.py │ ├── inv_add_attack_1.py │ ├── inv_add_attack_2.py │ ├── inv_add_attack_3.py │ ├── main.py │ ├── model.py │ ├── preprocess.py │ ├── rand_add_attack_1.py │ ├── rand_add_attack_2.py │ ├── select_examples.ipynb │ ├── select_targets.py │ ├── sym_add_attack_1.py │ ├── sym_add_attack_2.py │ ├── sym_add_attack_3.py │ ├── utils.py │ └── wrangle_KG.py ├── Readme.md ├── complex_FB15k-237.sh ├── complex_WN18.sh ├── complex_WN18RR.sh ├── compute_decoy_metrics_FB15k-237.sh ├── compute_decoy_metrics_WN18RR.sh ├── conve_FB15k-237.sh ├── conve_WN18.sh ├── conve_WN18RR.sh ├── distmult_FB15k-237.sh ├── distmult_WN18.sh ├── distmult_WN18RR.sh ├── grad_add_attack_FB15k-237.sh ├── grad_add_attack_WN18RR.sh ├── inst_add_attack_FB15k-237.sh ├── inst_add_attack_WN18RR.sh ├── preprocess.sh ├── transe_FB15k-237.sh ├── transe_WN18.sh └── transe_WN18RR.sh ├── LICENSE ├── Readme.md ├── inference_attack.yml └── overview.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Jupyter checkpoints 2 | .ipynb_checkpoints 3 | 4 | # Python 5 | __pycache__/ 6 | 7 | # Ignore folders 8 | data/ 9 | saved_models/ 10 | results/ 11 | losses/ 12 | logs/ 13 | clusters/ 14 | 15 | 16 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/FB15k-237.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/FB15k-237.tar.gz -------------------------------------------------------------------------------- /KGEAttack/ConvE/WN18.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/WN18.tar.gz -------------------------------------------------------------------------------- /KGEAttack/ConvE/WN18RR.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/WN18RR.tar.gz -------------------------------------------------------------------------------- /KGEAttack/ConvE/create_clusters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # - In this notebook - 5 | # - generate clusters for model, data combinations 6 | # - save them 7 | # 8 | # 9 | 10 | # In[1]: 11 | 12 | 13 | import pickle 14 | from typing import Dict, Tuple, List 15 | import os 16 | import numpy as np 17 | import json 18 | import torch 19 | import logging 20 | import argparse 21 | import math 22 | from pprint import pprint 23 | import pandas as pd 24 | import errno 25 | from sklearn.cluster import MiniBatchKMeans, KMeans 26 | 27 | import torch 28 | from torch.utils.data import DataLoader 29 | import torch.backends.cudnn as cudnn 30 | 31 | from dataset import TrainDataset, BidirectionalOneShotIterator 32 | from evaluation import evaluation 33 | from model import Distmult, Complex, Conve, Transe 34 | 35 | 36 | # In[2]: 37 | 38 | 39 | def add_arguments(): 40 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs') 41 | 42 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237') 43 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}') 44 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations') 45 | 46 | 47 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0') 48 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2') 49 | 50 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)') 51 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1 52 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0') 53 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings') 54 | 55 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200? 56 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)') 57 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)') 58 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4') 59 | 60 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200') 61 | 62 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20') 63 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10') 64 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.') 65 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.') 66 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.') 67 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution') 68 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution') 69 | 70 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True') 71 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1') 72 | 73 | 74 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2? 75 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3') 76 | 77 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.') 78 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model') 79 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)') 80 | 81 | return parser 82 | 83 | 84 | def generate_dicts(data_path): 85 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f: 86 | ent_to_id = json.load(f) 87 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f: 88 | rel_to_id = json.load(f) 89 | n_ent = len(list(ent_to_id.keys())) 90 | n_rel = len(list(rel_to_id.keys())) 91 | 92 | return n_ent, n_rel, ent_to_id, rel_to_id 93 | 94 | 95 | def load_data(data_path): 96 | data = {} 97 | for split in ['train', 'valid', 'test']: 98 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int) 99 | df = df.drop_duplicates() 100 | data[split] = df.values 101 | 102 | return data 103 | 104 | def add_model(args, n_ent, n_rel): 105 | if args.add_reciprocals: 106 | if args.model is None: 107 | model = Conve(args, n_ent, 2*n_rel) 108 | elif args.model == 'conve': 109 | model = Conve(args, n_ent, 2*n_rel) 110 | elif args.model == 'distmult': 111 | model = Distmult(args, n_ent, 2*n_rel) 112 | elif args.model == 'complex': 113 | model = Complex(args, n_ent, 2*n_rel) 114 | elif args.model == 'transe': 115 | model = Transe(args, n_ent, 2*n_rel) 116 | else: 117 | logger.info('Unknown model: {0}', args.model) 118 | raise Exception("Unknown model!") 119 | else: 120 | if args.model is None: 121 | model = Conve(args, n_ent, n_rel) 122 | elif args.model == 'conve': 123 | model = Conve(args, n_ent, n_rel) 124 | elif args.model == 'distmult': 125 | model = Distmult(args, n_ent, n_rel) 126 | elif args.model == 'complex': 127 | model = Complex(args, n_ent, n_rel) 128 | elif args.model == 'transe': 129 | model = Transe(args, n_ent, n_rel) 130 | else: 131 | logger.info('Unknown model: {0}', args.model) 132 | raise Exception("Unknown model!") 133 | 134 | #model.to(self.device) 135 | return model 136 | 137 | 138 | 139 | # In[3]: 140 | 141 | 142 | # In[4]: 143 | 144 | 145 | parser = add_arguments() 146 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1') 147 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side') 148 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment') 149 | parser.add_argument('--num-clusters', type=int, default=100, help='Number of clusters to be generated') 150 | 151 | 152 | # In[5]: 153 | 154 | 155 | args = parser.parse_args() 156 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 157 | 158 | 159 | # In[6]: 160 | 161 | 162 | #args.target_split = 1 # which target split to use 163 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. 164 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side 165 | #args.rand_run = 1 # a number assigned to the random run of the experiment 166 | 167 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17 168 | 169 | #args.model = 'distmult' 170 | #args.data = 'FB15k-237' 171 | # Below is based on hyperparams for original model 172 | if args.data == 'WN18RR': 173 | if args.model == 'distmult': 174 | args.lr = 0.01 175 | args.num_batches = 50 176 | elif args.model == 'complex': 177 | args.lr = 0.01 178 | elif args.model == 'conve': 179 | args.lr = 0.001 180 | elif args.model == 'transe': 181 | args.lr = 0.001 182 | args.input_drop = 0.0 183 | args.transe_margin = 9.0 184 | args.num_batches = 1000 185 | args.epochs = 100 186 | args.reg_weight = 1e-10 187 | else: 188 | print("New model:{0},{1}. Set hyperparams".format(args.data, args.model)) 189 | elif args.data == 'FB15k-237': 190 | if args.model == 'distmult': 191 | args.lr = 0.005 192 | args.input_drop = 0.5 193 | elif args.model == 'complex': 194 | args.lr = 0.005 195 | args.input_drop = 0.5 196 | elif args.model == 'conve': 197 | args.lr = 0.001 198 | args.hidden_drop = 0.5 199 | elif args.model == 'transe': 200 | args.lr = 0.001 201 | args.input_drop = 0.0 202 | args.transe_margin = 9.0 203 | args.num_batches = 800 204 | args.epochs = 100 205 | args.reg_weight = 1e-10 206 | else: 207 | print("New model:{0},{1}. Set hyperparams".format(args.data, args.model)) 208 | else: 209 | print("New dataset:{0}. Set hyperparams".format(args.data)) 210 | 211 | 212 | 213 | 214 | 215 | # In[7]: 216 | 217 | 218 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html 219 | torch.manual_seed(args.seed) 220 | cudnn.deterministic = True 221 | cudnn.benchmark = False 222 | np.random.seed(args.seed) 223 | rng = np.random.default_rng(seed=args.seed) 224 | 225 | 226 | args.epochs = -1 #no training here 227 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop) 228 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name) 229 | #log_path = 'logs/inv_add_1_{0}_{1}_{2}_{3}.log'.format(args.data, model_name, args.num_batches, args.epochs) 230 | 231 | 232 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 233 | datefmt = '%m/%d/%Y %H:%M:%S', 234 | level = logging.INFO 235 | ) 236 | logger = logging.getLogger(__name__) 237 | 238 | 239 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split) 240 | 241 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path) 242 | 243 | 244 | # In[8]: 245 | 246 | 247 | # add a model and load the pre-trained params 248 | model = add_model(args, n_ent, n_rel) 249 | model.to(device) 250 | logger.info('Loading saved model from {0}'.format(model_path)) 251 | state = torch.load(model_path) 252 | model_params = state['state_dict'] 253 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()] 254 | for key, size, count in params: 255 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count)) 256 | 257 | model.load_state_dict(model_params) 258 | 259 | model.eval() 260 | 261 | 262 | # In[ ]: 263 | 264 | 265 | logger.info("Starting the clustering algorithm") 266 | 267 | 268 | # In[9]: 269 | 270 | 271 | # Perform clustering of entity embeddings 272 | if args.model == 'complex': 273 | ent_emb = torch.cat((model.emb_e_real.weight.data, model.emb_e_img.weight.data), dim=-1) 274 | else: 275 | ent_emb = model.emb_e.weight.data 276 | 277 | ent_emb = ent_emb.cpu().numpy() 278 | 279 | km = KMeans(n_clusters=args.num_clusters, n_init=100, max_iter=500, 280 | random_state=0, #batch_size = 100, 281 | init='k-means++'#, verbose=1 282 | #max_no_improvement=20 283 | ) 284 | km.fit(ent_emb) 285 | 286 | 287 | # In[ ]: 288 | 289 | 290 | logger.info("Finished clustering... saving centres, labels, inertia, n_iter") 291 | 292 | 293 | # In[21]: 294 | 295 | 296 | save_path = 'clusters/{0}_{1}_{2}_{3}'.format( args.model, args.data, args.num_clusters, args.rand_run) 297 | 298 | 299 | # In[24]: 300 | 301 | 302 | out = open(save_path + 'cluster_centers.pickle', 'wb') 303 | pickle.dump(km.cluster_centers_, out) 304 | out.close() 305 | 306 | 307 | # In[28]: 308 | 309 | 310 | out = open(save_path + 'labels.pickle', 'wb') 311 | pickle.dump(km.labels_, out) 312 | out.close() 313 | 314 | 315 | # In[31]: 316 | 317 | 318 | out = open(save_path + 'inertia.pickle', 'wb') 319 | pickle.dump(km.inertia_, out) 320 | out.close() 321 | 322 | 323 | # In[34]: 324 | 325 | 326 | out = open(save_path + 'n_iter.pickle', 'wb') 327 | pickle.dump(km.n_iter_, out) 328 | out.close() 329 | 330 | 331 | # In[35]: 332 | 333 | 334 | #inp_f = open(save_path + 'cluster_centers.pickle', 'rb') 335 | #centres = np.array(pickle.load(inp_f)) 336 | #inp_f.close() 337 | 338 | 339 | # In[ ]: 340 | 341 | 342 | 343 | 344 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/criage_inverter.py: -------------------------------------------------------------------------------- 1 | 2 | import pickle 3 | from typing import Dict, Tuple, List 4 | import os 5 | import numpy as np 6 | import json 7 | import torch 8 | import logging 9 | import argparse 10 | import math 11 | from pprint import pprint 12 | import pandas as pd 13 | import errno 14 | from sklearn.cluster import MiniBatchKMeans, KMeans 15 | 16 | import torch 17 | from torch.utils.data import DataLoader 18 | import torch.backends.cudnn as cudnn 19 | 20 | from dataset import TrainDataset, BidirectionalOneShotIterator 21 | from evaluation import evaluation 22 | from criage_model import Distmult, Conve 23 | 24 | 25 | # In[2]: 26 | 27 | 28 | def add_arguments(): 29 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs') 30 | 31 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237') 32 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex, transe}') 33 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations') 34 | 35 | 36 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0') 37 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2') 38 | 39 | parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train (default: 200)') 40 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1 41 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0') 42 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings') 43 | 44 | parser.add_argument('--num-batches', type=int, default=100, help='Number of batches for training (default: 400)') #maybe 200? 45 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)') 46 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)') 47 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4') 48 | 49 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200') 50 | 51 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20') 52 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10') 53 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.') 54 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.') 55 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.') 56 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution') 57 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution') 58 | 59 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True') 60 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1') 61 | 62 | 63 | parser.add_argument('--reg-weight', type=float, default=0.0, help='Weight for regularization. Default: 5e-12')#maybe 5e-2? 64 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3') 65 | 66 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.') 67 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model') 68 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)') 69 | 70 | return parser 71 | 72 | 73 | def generate_dicts(data_path): 74 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f: 75 | ent_to_id = json.load(f) 76 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f: 77 | rel_to_id = json.load(f) 78 | n_ent = len(list(ent_to_id.keys())) 79 | n_rel = len(list(rel_to_id.keys())) 80 | 81 | return n_ent, n_rel, ent_to_id, rel_to_id 82 | 83 | 84 | def load_data(data_path): 85 | data = {} 86 | for split in ['train', 'valid', 'test']: 87 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int) 88 | df = df.drop_duplicates() 89 | data[split] = df.values 90 | 91 | return data 92 | 93 | def load_train_data(data_path, args, n_rel): 94 | ##### train ##### 95 | inp_f = open(os.path.join(data_path, 'sr2o_train.pickle'), 'rb') 96 | sr2o_train: Dict[Tuple[int, int], List[int]] = pickle.load(inp_f) 97 | inp_f.close() 98 | 99 | inp_f = open(os.path.join(data_path, 'or2s_train.pickle'), 'rb') 100 | or2s_train: Dict[Tuple[int, int], List[int]] = pickle.load(inp_f) 101 | inp_f.close() 102 | 103 | if args.add_reciprocals: 104 | # adding reciprocals 105 | or2s_train = {(int(k[0]), int(k[1])+n_rel): v for k,v in or2s_train.items()} 106 | else: 107 | or2s_train = {(int(k[0]), int(k[1])): v for k,v in or2s_train.items()} 108 | sr2o_train = {(int(k[0]), int(k[1])): v for k,v in sr2o_train.items()} 109 | 110 | return sr2o_train, or2s_train 111 | 112 | def add_model(args, n_ent, n_rel): 113 | if args.add_reciprocals: 114 | if args.model is None: 115 | model = Conve(args, n_ent, 2*n_rel) 116 | elif args.model == 'conve': 117 | model = Conve(args, n_ent, 2*n_rel) 118 | elif args.model == 'distmult': 119 | model = Distmult(args, n_ent, 2*n_rel) 120 | elif args.model == 'complex': 121 | model = Complex(args, n_ent, 2*n_rel) 122 | elif args.model == 'transe': 123 | model = Transe(args, n_ent, 2*n_rel) 124 | else: 125 | logger.info('Unknown model: {0}', args.model) 126 | raise Exception("Unknown model!") 127 | else: 128 | if args.model is None: 129 | model = Conve(args, n_ent, n_rel) 130 | elif args.model == 'conve': 131 | model = Conve(args, n_ent, n_rel) 132 | elif args.model == 'distmult': 133 | model = Distmult(args, n_ent, n_rel) 134 | elif args.model == 'complex': 135 | model = Complex(args, n_ent, n_rel) 136 | elif args.model == 'transe': 137 | model = Transe(args, n_ent, n_rel) 138 | else: 139 | logger.info('Unknown model: {0}', args.model) 140 | raise Exception("Unknown model!") 141 | 142 | #model.to(self.device) 143 | return model 144 | 145 | 146 | 147 | # In[3]: 148 | 149 | # In[4]: 150 | 151 | 152 | parser = add_arguments() 153 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1') 154 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side') 155 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment') 156 | 157 | 158 | # In[5]: 159 | 160 | 161 | args = parser.parse_args() 162 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 163 | 164 | 165 | # In[6]: 166 | 167 | 168 | #args.target_split = 1 # which target split to use 169 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. 170 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side 171 | #args.rand_run = 1 # a number assigned to the random run of the experiment 172 | 173 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17 174 | 175 | #args.model = 'distmult' 176 | #args.data = 'FB15k-237' 177 | 178 | 179 | # In[7]: 180 | 181 | 182 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html 183 | torch.manual_seed(args.seed) 184 | cudnn.deterministic = True 185 | cudnn.benchmark = False 186 | np.random.seed(args.seed) 187 | rng = np.random.default_rng(seed=args.seed) 188 | 189 | 190 | #args.epochs = -1 #no training here 191 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop) 192 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name) 193 | #log_path = 'logs/inv_add_1_{0}_{1}_{2}_{3}.log'.format(args.data, model_name, args.num_batches, args.epochs) 194 | log_path = save_path = 'logs/attack_logs/criage_inverter/{0}_{1}_{2}_{3}'.format( args.data, 195 | model_name, 196 | args.num_batches, 197 | args.epochs 198 | ) 199 | 200 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 201 | datefmt = '%m/%d/%Y %H:%M:%S', 202 | level = logging.INFO, 203 | filename = log_path 204 | ) 205 | logger = logging.getLogger(__name__) 206 | logger.info(args) 207 | logger.info('-------------------- Running Criage Inverter ----------------------') 208 | 209 | 210 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split) 211 | 212 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path) 213 | 214 | ##### load data#### 215 | data = load_data(data_path) 216 | train_data, valid_data, test_data = data['train'], data['valid'], data['test'] 217 | 218 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb') 219 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f) 220 | inp_f.close() 221 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()} 222 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()} 223 | 224 | 225 | logger.info('Loading training data') 226 | sr2o_train, or2s_train = load_train_data(data_path, args, n_rel) 227 | 228 | # lhs denotes subject side corruptions and rhs denotes object side corruptions 229 | batch_size_lhs = math.ceil(len(list(or2s_train.keys()))/args.num_batches) 230 | batch_size_rhs = math.ceil(len(list(sr2o_train.keys()))/args.num_batches) 231 | 232 | logger.info("Dict size or2s:{0}".format(len(list(or2s_train.keys())))) 233 | logger.info('Batch_size_lhs: {0}'.format(batch_size_lhs)) 234 | logger.info("Dict size sr2o:{0}".format(len(list(sr2o_train.keys())))) 235 | logger.info('Batch_size_rhs: {0}'.format(batch_size_rhs)) 236 | 237 | train_dataloader_lhs = DataLoader( 238 | TrainDataset(args, n_ent, or2s_train, mode='lhs'), 239 | batch_size = batch_size_lhs, 240 | shuffle = True, 241 | num_workers = 0, #max(0, args.num_workers), 242 | collate_fn = TrainDataset.collate_fn 243 | ) 244 | 245 | train_dataloader_rhs = DataLoader( 246 | TrainDataset(args, n_ent, sr2o_train, mode='rhs'), 247 | batch_size = batch_size_rhs, 248 | shuffle = True, 249 | num_workers = 0, #max(0, self.args.num_workers), 250 | collate_fn = TrainDataset.collate_fn 251 | ) 252 | 253 | 254 | # In[8]: 255 | 256 | logger.info('Loading pre-trained model params') 257 | # add a model and load the pre-trained params 258 | model = add_model(args, n_ent, n_rel) 259 | model.to(device) 260 | logger.info('Loading saved model from {0}'.format(model_path)) 261 | model_state = model.state_dict() 262 | pre_state = torch.load(model_path) 263 | pretrained = pre_state['state_dict'] 264 | for name in model_state: 265 | if name in pretrained: 266 | model_state[name].copy_(pretrained[name]) 267 | 268 | 269 | #model.eval() 270 | 271 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr_decay) 272 | 273 | logger.info('----- Training -----') 274 | for epoch in range(args.epochs): 275 | model.train() 276 | train_iterator = BidirectionalOneShotIterator(train_dataloader_lhs, train_dataloader_rhs) 277 | losses = [] 278 | for b in range(2*args.num_batches): 279 | optimizer.zero_grad() 280 | batch = next(train_iterator) 281 | e1, rel,label,mode = batch 282 | e1, rel = e1.to(device), rel.to(device) 283 | E1, R = model.forward(e1, rel) 284 | loss_E1 = model.loss(E1, e1) #e1.squeeze(1)) 285 | loss_R = model.loss(R, rel) #rel.squeeze(1)) 286 | loss = loss_E1 + loss_R 287 | 288 | loss.backward() 289 | optimizer.step() 290 | losses.append(loss.item()) 291 | if (b%100 == 0) or (b== (2*args.num_batches-1)): 292 | logger.info('[E:{} | {}]: Train Loss:{:.4}'.format(epoch, b, np.mean(losses))) 293 | 294 | loss = np.mean(losses) 295 | logger.info('[Epoch:{}]: Training Loss:{:.4}\n'.format(epoch, loss)) 296 | 297 | 298 | logger.info('Saving trained inverter model') 299 | save_path = 'saved_models/criage_inverter/{0}_{1}.model'.format(args.data, model_name) 300 | state = { 301 | 'state_dict': model.state_dict(), 302 | 'optimizer': optimizer.state_dict(), 303 | 'args': vars(args) 304 | } 305 | torch.save(state, save_path) 306 | logger.info('Saving model to {0}'.format(save_path)) 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/criage_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F, Parameter 3 | from torch.autograd import Variable 4 | 5 | 6 | from torch.nn.init import xavier_normal_, xavier_uniform_ 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | 9 | 10 | 11 | class Distmult(torch.nn.Module): 12 | def __init__(self, args, num_entities, num_relations): 13 | super(Distmult, self).__init__() 14 | self.emb_e = torch.nn.Embedding(num_entities, args.embedding_dim, padding_idx=None) 15 | self.emb_rel = torch.nn.Embedding(num_relations, args.embedding_dim, padding_idx=None) 16 | self.emb_e.weight.requires_grad=False 17 | self.emb_rel.weight.requires_grad=False 18 | 19 | self.linear_t = torch.nn.Linear(args.embedding_dim, args.embedding_dim) 20 | self.linear_rel = torch.nn.Linear(args.embedding_dim, num_relations) 21 | self.linear_e1 = torch.nn.Linear(args.embedding_dim, num_entities) 22 | self.linear_t.weight.requires_grad=True 23 | self.linear_e1.weight.requires_grad=True 24 | self.linear_rel.weight.requires_grad=True 25 | self.hidden_drop = torch.nn.Dropout(args.hidden_drop) 26 | 27 | self.inp_drop = torch.nn.Dropout(args.input_drop) 28 | self.loss = torch.nn.CrossEntropyLoss() 29 | 30 | self.args = args 31 | 32 | def init(self): 33 | xavier_normal(self.emb_e.weight.data) 34 | xavier_normal(self.emb_rel.weight.data) 35 | 36 | def forward(self, e1, rel): 37 | #e1_embedded= self.emb_e(e1) 38 | #rel_embedded= self.emb_rel(rel) 39 | #e1_embedded = e1_embedded.view(-1, args.embedding_dim) 40 | #rel_embedded = rel_embedded.view(-1, args.embedding_dim) 41 | 42 | #pred = e1_embedded*rel_embedded 43 | pred = self.encoder(e1, rel) 44 | return self.decoder(pred) 45 | 46 | 47 | def encoder(self, e1, rel): 48 | e1_embedded= self.emb_e(e1) 49 | rel_embedded= self.emb_rel(rel) 50 | e1_embedded = e1_embedded.squeeze(dim=1) 51 | rel_embedded = rel_embedded.squeeze(dim=1) 52 | 53 | pred = e1_embedded*rel_embedded 54 | 55 | return pred 56 | 57 | def encoder_2(self, e1): 58 | e1_embedded= self.emb_e(e1) 59 | return e1_embedded 60 | 61 | def decoder(self, pred): 62 | pred = self.linear_t(pred) 63 | pred= F.relu(pred) 64 | E1 = self.linear_e1(pred) 65 | R = self.linear_rel(pred) 66 | return E1, R 67 | 68 | 69 | class Conve(torch.nn.Module): 70 | def __init__(self, args, num_entities, num_relations): 71 | super(Conve, self).__init__() 72 | self.emb_e = torch.nn.Embedding(num_entities, args.embedding_dim, padding_idx=None) 73 | self.emb_rel = torch.nn.Embedding(num_relations, args.embedding_dim, padding_idx=None) 74 | self.emb_e.weight.requires_grad = False 75 | self.emb_rel.weight.requires_grad = False 76 | 77 | self.embedding_dim = args.embedding_dim #default is 200 78 | self.num_filters = args.num_filters # default is 32 79 | self.kernel_size = args.kernel_size # default is 3 80 | self.stack_width = args.stack_width # default is 20 81 | self.stack_height = args.embedding_dim // self.stack_width 82 | 83 | 84 | flat_sz_h = int(2*self.stack_width) - self.kernel_size + 1 85 | flat_sz_w = self.stack_height - self.kernel_size + 1 86 | self.flat_sz = flat_sz_h*flat_sz_w*self.num_filters 87 | 88 | self.linear_t = torch.nn.Linear(args.embedding_dim, self.flat_sz) 89 | self.linear_rel = torch.nn.Linear(2*args.embedding_dim, num_relations) # 2* is needed because encoder stacks the embeddings 90 | self.linear_e1 = torch.nn.Linear(2*args.embedding_dim, num_entities) 91 | 92 | self.deconv1= torch.nn.ConvTranspose2d(in_channels =32, out_channels=1, kernel_size =3) 93 | 94 | self.linear_t.weight.requires_grad = True 95 | self.linear_rel.weight.requires_grad = True 96 | self.linear_e1.weight.requires_grad = True 97 | self.deconv1.weight.requires_grad = True 98 | 99 | self.inp_drop = torch.nn.Dropout(args.input_drop) 100 | self.hidden_drop = torch.nn.Dropout(args.hidden_drop) 101 | self.feature_map_drop = torch.nn.Dropout2d(args.feat_drop) 102 | self.loss = torch.nn.CrossEntropyLoss() 103 | #self.loss = torch.nn.BCELoss() 104 | #self.emb_dim1 = args.embedding_shape1 105 | #self.emb_dim2 = args.embedding_dim // self.emb_dim1 106 | 107 | self.conv1 = torch.nn.Conv2d(1, out_channels=self.num_filters, 108 | kernel_size=(self.kernel_size, self.kernel_size), 109 | stride=1, padding=0, bias=args.use_bias) 110 | #self.conv1 = torch.nn.Conv2d(1, 32, (3, 3), 1, 0, bias=args.use_bias) 111 | self.bn0 = torch.nn.BatchNorm2d(1) 112 | self.bn1 = torch.nn.BatchNorm2d(self.num_filters) 113 | self.bn2 = torch.nn.BatchNorm1d(args.embedding_dim) 114 | 115 | self.register_parameter('b', Parameter(torch.zeros(num_entities))) 116 | self.fc = torch.nn.Linear(self.flat_sz,args.embedding_dim) 117 | self.conv1.weight.requires_grad = False 118 | self.fc.weight.requires_grad = False 119 | self.args = args 120 | 121 | def init(self): 122 | xavier_normal_(self.emb_e.weight.data) 123 | xavier_normal_(self.emb_rel.weight.data) 124 | 125 | def forward(self, e1, rel): 126 | x = self.encoder(e1, rel) 127 | 128 | return self.decoder(x) 129 | 130 | def encoder(self, e1, rel): 131 | #e1_embedded= self.emb_e(e1).view(-1, 1, self.emb_dim1, self.emb_dim2) 132 | e1_embedded = self.emb_e(e1).view(-1, 1, self.stack_width, self.stack_height) 133 | #rel_embedded = self.emb_rel(rel).view(-1, 1, self.emb_dim1, self.emb_dim2) 134 | rel_embedded = self.emb_rel(rel).view(-1, 1, self.stack_width, self.stack_height) 135 | 136 | stacked_inputs = torch.cat([e1_embedded, rel_embedded], 2) 137 | 138 | stacked_inputs = self.bn0(stacked_inputs) 139 | #x= self.inp_drop(stacked_inputs) 140 | x = stacked_inputs 141 | #print(x.shape) 142 | x= self.conv1(x) 143 | #print(x.shape) 144 | x= self.bn1(x) 145 | x= F.relu(x) 146 | #x = self.feature_map_drop(x) 147 | x = x.view(x.shape[0], -1) 148 | x = self.fc(x) 149 | #x = self.hidden_drop(x) 150 | x = self.bn2(x) 151 | x = F.relu(x) 152 | #x = torch.mm(x, self.emb_e.weight.transpose(1,0)) 153 | #x += self.b.expand_as(x) 154 | #pred = torch.sigmoid(x) 155 | 156 | return x 157 | 158 | def encoder_2(self, e1): 159 | e1_embedded = self.emb_e(e1) 160 | return e1_embedded 161 | 162 | def decoder(self, pred): 163 | if self.args.embedding_dim == 1000: 164 | pred = self.linear_t(pred).view(-1, 32, 38, 48) #I got these reshape values by printing shape after conv in encoder 165 | else: 166 | pred = self.linear_t(pred).view(-1, 32, 38, 8) #I got these reshape values by printing shape after conv in encoder 167 | #print(pred.shape) 168 | pred = self.deconv1(pred) 169 | #print(pred.shape) 170 | 171 | pred = F.relu(pred.view(-1, 2*self.args.embedding_dim)) 172 | E1 = self.linear_e1(pred) 173 | R = self.linear_rel(pred) 174 | return E1, R 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/dataset.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 1. Dataset structure -- s,r,one-hot-labels, mode 3 | 2. Dataset class takes in (s,r) pairs,their labels and mode and returns one-hot encoded vectors for labels and s,r as two separate vectors 4 | ''' 5 | 6 | from torch.utils.data import Dataset 7 | from typing import Dict, Tuple, List 8 | import numpy as np 9 | import torch 10 | 11 | class TrainDataset(Dataset): 12 | def __init__(self, args, num_ent, sr2o:Dict[Tuple[int, int], List[int]], mode:str): 13 | ''' 14 | Input can be sr2o or or2s 15 | Mode is 'lhs' for or2s and 'rhs' for sr2o 16 | ''' 17 | self.sr2o = sr2o 18 | self.sr = list(self.sr2o.keys()) 19 | self.args = args 20 | self.n_ent = num_ent 21 | self.entities = np.arange(self.n_ent, dtype=np.int32) 22 | self.mode = mode 23 | #mode is not needed for generating data, but needed in data iterator to decide direction for model.forward() 24 | 25 | def __len__(self): 26 | return len(self.sr) 27 | 28 | def __getitem__(self, idx): 29 | sample_key = self.sr[idx] 30 | s,r = int(sample_key[0]), int(sample_key[1]) 31 | index_target = np.array(self.sr2o[(s,r)], dtype=np.int32) 32 | sample_label = self.get_label(index_target) 33 | s,r = torch.tensor(sample_key[0], dtype=torch.long), torch.tensor(sample_key[1], dtype=torch.long) 34 | index_target = torch.tensor(index_target, dtype=torch.long) 35 | # label smoothing 36 | if self.args.label_smoothing != 0.0: 37 | sample_label = (1.0 - self.args.label_smoothing)*sample_label + (1.0/self.n_ent) 38 | 39 | return s,r,sample_label, self.mode 40 | 41 | 42 | @staticmethod 43 | def collate_fn(data): 44 | s = torch.stack([_[0] for _ in data], dim=0) 45 | r = torch.stack([_[1] for _ in data], dim=0) 46 | #index_target = torch.stack([_[2] for _ in data], dim=0) #this gives error 47 | label = torch.stack([_[2] for _ in data], dim=0) 48 | mode = data[0][3] 49 | 50 | return s, r,label, mode 51 | 52 | def get_label(self, index_target:List[int]): 53 | # get the multi-one-hot labels from indices 54 | one_hot = np.zeros(self.n_ent, dtype=np.float32) 55 | np.add.at(one_hot, index_target, 1.0) 56 | return torch.FloatTensor(one_hot) 57 | 58 | 59 | class BidirectionalOneShotIterator(object): 60 | def __init__(self, dataloader_lhs, dataloader_rhs): 61 | #self.iterator_lhs = iter(dataloader_lhs) 62 | #self.iterator_rhs = iter(dataloader_rhs) 63 | self.iterator_lhs = self.one_shot_iterator(dataloader_lhs) 64 | self.iterator_rhs = self.one_shot_iterator(dataloader_rhs) 65 | self.step = 0 66 | 67 | def __next__(self): 68 | if self.step % 2 == 0: 69 | data = next(self.iterator_lhs) 70 | else: 71 | data = next(self.iterator_rhs) 72 | 73 | self.step += 1 74 | return data 75 | 76 | def __iter__(self): 77 | return self 78 | 79 | @staticmethod 80 | def one_shot_iterator(dataloader): 81 | ''' 82 | Transform a PyTorch Dataloader into python iterator 83 | ''' 84 | while True: 85 | for data in dataloader: 86 | yield data 87 | 88 | 89 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/FB15k-237_complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_complex.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/FB15k-237_conve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_conve.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/FB15k-237_distmult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_distmult.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/FB15k-237_transe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/FB15k-237_transe.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/WN18RR_complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_complex.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/WN18RR_conve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_conve.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/WN18RR_distmult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_distmult.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/elbow_plots/WN18RR_transe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/KGEAttack/ConvE/elbow_plots/WN18RR_transe.png -------------------------------------------------------------------------------- /KGEAttack/ConvE/evaluation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.autograd import Variable 4 | from sklearn import metrics 5 | 6 | import datetime 7 | from typing import Dict, Tuple, List 8 | import logging 9 | import os 10 | import pickle 11 | 12 | logger = logging.getLogger(__name__) #config already set in main.py 13 | 14 | def get_ranking(model, queries:torch.Tensor, num_rel:int, 15 | filters:Dict[str, Dict[Tuple[int, int], List[int]]], 16 | device: str, 17 | batch_size: int = 500 18 | ): 19 | ranks = [] 20 | ranks_lhs = [] 21 | ranks_rhs = [] 22 | b_begin = 0 23 | #logger.info('Computing ranks for all queries') 24 | while b_begin < len(queries): 25 | b_queries = queries[b_begin : b_begin+batch_size] 26 | s,r,o = b_queries[:,0], b_queries[:,1], b_queries[:,2] 27 | r_rev = r+num_rel 28 | lhs_score = model.forward(o,r_rev, mode='lhs', sigmoid=False) #this gives scores not probabilities 29 | rhs_score = model.forward(s,r, mode='rhs', sigmoid=False) # this gives scores not probabilities 30 | 31 | for i, query in enumerate(b_queries): 32 | filter_lhs = filters['lhs'][(query[2].item(), query[1].item())] 33 | filter_rhs = filters['rhs'][(query[0].item(), query[1].item())] 34 | 35 | # save the prediction that is relevant 36 | target_value1 = rhs_score[i, query[2].item()].item() 37 | target_value2 = lhs_score[i, query[0].item()].item() 38 | # zero all known cases (this are not interesting) 39 | # this corresponds to the filtered setting 40 | lhs_score[i][filter_lhs] = -1e6 41 | rhs_score[i][filter_rhs] = -1e6 42 | # write base the saved values 43 | rhs_score[i][query[2].item()] = target_value1 44 | lhs_score[i][query[0].item()] = target_value2 45 | 46 | # sort and rank 47 | max_values, lhs_sort = torch.sort(lhs_score, dim=1, descending=True) #high scores get low number ranks 48 | max_values, rhs_sort = torch.sort(rhs_score, dim=1, descending=True) 49 | 50 | lhs_sort = lhs_sort.cpu().numpy() 51 | rhs_sort = rhs_sort.cpu().numpy() 52 | 53 | for i, query in enumerate(b_queries): 54 | # find the rank of the target entities 55 | lhs_rank = np.where(lhs_sort[i]==query[0].item())[0][0] 56 | rhs_rank = np.where(rhs_sort[i]==query[2].item())[0][0] 57 | 58 | # rank+1, since the lowest rank is rank 1 not rank 0 59 | ranks_lhs.append(lhs_rank + 1) 60 | ranks_rhs.append(rhs_rank + 1) 61 | 62 | b_begin += batch_size 63 | 64 | #logger.info('Ranking done for all queries') 65 | return ranks_lhs, ranks_rhs 66 | 67 | 68 | 69 | def evaluation(model, queries, to_skip_eval:Dict[str, Dict[Tuple[int, int], List[int]]], 70 | save_name:str, num_rel:int=0, split:str ='test', batch_size:int=500, epoch:int=-1, device:str="cpu"): 71 | 72 | 73 | examples = torch.from_numpy(queries.astype('int64')).to(device) 74 | 75 | #get ranking 76 | ranks_lhs, ranks_rhs = get_ranking(model, examples, num_rel, to_skip_eval, device, batch_size) 77 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs) 78 | 79 | #final logging 80 | hits_at = np.arange(1,11) 81 | hits_at_lhs = list(map(lambda x: np.mean((ranks_lhs <= x), dtype=np.float64).item(), 82 | hits_at)) 83 | hits_at_rhs = list(map(lambda x: np.mean((ranks_rhs <= x), dtype=np.float64).item(), 84 | hits_at)) 85 | mr_lhs = np.mean(ranks_lhs, dtype=np.float64).item() 86 | mr_rhs = np.mean(ranks_rhs, dtype=np.float64).item() 87 | 88 | mrr_lhs = np.mean(1. / ranks_lhs, dtype=np.float64).item() 89 | mrr_rhs = np.mean(1. / ranks_rhs, dtype=np.float64).item() 90 | 91 | 92 | logger.info('') 93 | logger.info('-'*50) 94 | logger.info(split+'_'+save_name) 95 | logger.info('-'*50) 96 | logger.info('') 97 | for i in hits_at: 98 | logger.info('Hits left @{0}: {1}'.format(i, hits_at_lhs[i-1])) 99 | logger.info('Hits right @{0}: {1}'.format(i, hits_at_rhs[i-1])) 100 | logger.info('Hits @{0}: {1}'.format(i, np.mean([hits_at_lhs[i-1],hits_at_rhs[i-1]]).item())) 101 | logger.info('Mean rank lhs: {0}'.format( mr_lhs)) 102 | logger.info('Mean rank rhs: {0}'.format(mr_rhs)) 103 | logger.info('Mean rank: {0}'.format( np.mean([mr_lhs, mr_rhs]))) 104 | logger.info('Mean reciprocal rank lhs: {0}'.format( mrr_lhs)) 105 | logger.info('Mean reciprocal rank rhs: {0}'.format( mrr_rhs)) 106 | logger.info('Mean reciprocal rank: {0}'.format(np.mean([mrr_rhs, mrr_lhs]))) 107 | 108 | with open(os.path.join('results', split + '_' + save_name + '.txt'), 'a') as text_file: 109 | text_file.write('Epoch: {0}\n'.format(epoch)) 110 | text_file.write('Lhs denotes ranking by subject corruptions \n') 111 | text_file.write('Rhs denotes ranking by object corruptions \n') 112 | for i in hits_at: 113 | text_file.write('Hits left @{0}: {1}\n'.format(i, hits_at_lhs[i-1])) 114 | text_file.write('Hits right @{0}: {1}\n'.format(i, hits_at_rhs[i-1])) 115 | text_file.write('Hits @{0}: {1}\n'.format(i, np.mean([hits_at_lhs[i-1],hits_at_rhs[i-1]]).item())) 116 | text_file.write('Mean rank lhs: {0}\n'.format( mr_lhs)) 117 | text_file.write('Mean rank rhs: {0}\n'.format(mr_rhs)) 118 | text_file.write('Mean rank: {0}\n'.format( np.mean([mr_lhs, mr_rhs]))) 119 | text_file.write('MRR lhs: {0}\n'.format( mrr_lhs)) 120 | text_file.write('MRR rhs: {0}\n'.format(mrr_rhs)) 121 | text_file.write('MRR: {0}\n'.format(np.mean([mrr_rhs, mrr_lhs]))) 122 | text_file.write('-------------------------------------------------\n') 123 | 124 | 125 | results = {} 126 | for i in hits_at: 127 | results['hits_lhs@{}'.format(i)] = hits_at_lhs[i-1] 128 | results['hits_rhs@{}'.format(i)] = hits_at_rhs[i-1] 129 | results['mrr_lhs'] = mrr_lhs 130 | results['mrr_rhs'] = mrr_rhs 131 | results['mr_lhs'] = mr_lhs 132 | results['mr_rhs'] = mr_rhs 133 | 134 | return results 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/inst_add_attack.py: -------------------------------------------------------------------------------- 1 | ### Add triples based on IF triple, chosen as instance similarity - cos, dot, l2 2 | ### In inference attacks, 2 additions are selected to decrease the target triple's ranks on s-side and o-side 3 | ### But in attribution attack, target triple's rank is reduced (on both s-side and o-side) by selecting the IF triple and adding its corrupted version 4 | ### Thus, to integrate Attribution attacks here, I am selecting two IF triples in the neighbourhood and adding their corrputed versions as 2 adversarial additions. Perhaps another version to experiment would be to select the IF triples for s-side and o-side ranks separately and then add their corrupted versions as adversarial additions - the final edits would then be of the form (test_s, test_r', test_o') for o-side, and (test_s', test_r', test_o) for s-side ranks. 5 | 6 | import pickle 7 | from typing import Dict, Tuple, List 8 | import os 9 | import numpy as np 10 | import pandas as pd 11 | from collections import defaultdict 12 | import operator 13 | 14 | import json 15 | import logging 16 | import argparse 17 | import math 18 | from pprint import pprint 19 | import errno 20 | import time 21 | 22 | import torch 23 | from torch.utils.data import DataLoader 24 | import torch.backends.cudnn as cudnn 25 | from torch import nn 26 | from torch.nn import CrossEntropyLoss 27 | from torch.nn import functional as F 28 | import torch.autograd as autograd 29 | 30 | from evaluation import evaluation 31 | from model import Distmult, Complex, Conve, Transe 32 | import utils 33 | 34 | def get_if_triple(test_trip, nghbr_trip, model, attack_batch_size, simmetric): 35 | test_trip = test_trip[None, :] # add a batch dimension 36 | test_trip = torch.from_numpy(test_trip).to(device) 37 | test_s, test_r, test_o = test_trip[:,0], test_trip[:,1], test_trip[:,2] 38 | test_vec = model.score_triples_vec(test_s, test_r, test_o) 39 | 40 | b_begin = 0 41 | nghbr_sim = [] 42 | if attack_batch_size == -1: 43 | nghbr_batch = nghbr_trip.shape[0] 44 | else: 45 | nghbr_batch = args.attack_batch_size 46 | 47 | while b_begin < nghbr_trip.shape[0]: 48 | b_nghbr_trip = nghbr_trip[b_begin : b_begin+nghbr_batch] 49 | b_nghbr_trip = torch.from_numpy(b_nghbr_trip).to(device) 50 | b_nghbr_s, b_nghbr_r, b_nghbr_o = b_nghbr_trip[:,0], b_nghbr_trip[:,1], b_nghbr_trip[:,2] 51 | b_nghbr_vec = model.score_triples_vec(b_nghbr_s, b_nghbr_r, b_nghbr_o) 52 | # shape of nghbr_vec is (num_nghbrs x emb_dim) e.g. (459 x 100) 53 | # shape of test vec is (1 x emb_dim) 54 | if simmetric == 'l2': 55 | b_sim = -torch.norm((b_nghbr_vec-test_vec), p=2, dim=-1) 56 | elif simmetric == 'dot': 57 | b_sim = torch.matmul(b_nghbr_vec, test_vec.t()) 58 | else: ##cos 59 | b_sim = F.cosine_similarity(test_vec, b_nghbr_vec) #default dim=1 60 | 61 | b_sim = b_sim.detach().cpu().numpy().tolist() 62 | nghbr_sim += b_sim 63 | b_begin += nghbr_batch 64 | 65 | nghbr_sim = np.array(nghbr_sim) 66 | nghbr_sim = torch.from_numpy(nghbr_sim).to(device) 67 | # we want to remove the neighbour with maximum cosine similarity 68 | max_values, argsort = torch.sort(nghbr_sim, -1, descending=True) 69 | del_idx_1, del_idx_2 = argsort[0], argsort[1] 70 | 71 | return del_idx_1, del_idx_2 72 | 73 | 74 | 75 | def get_additions(train_data, test_data, neighbours, model, attack_batch_size, simmetric): 76 | logger.info('------ Generating edits per target triple ------') 77 | start_time = time.time() 78 | logger.info('Start time: {0}'.format(str(start_time))) 79 | 80 | if args.model == 'complex': 81 | ent_emb = torch.cat((model.emb_e_real.weight, model.emb_e_img.weight), dim=-1) 82 | rel_emb = torch.cat((model.emb_rel_real.weight, model.emb_rel_img.weight), dim=-1) 83 | else: 84 | ent_emb = model.emb_e.weight 85 | rel_emb = model.emb_rel.weight 86 | 87 | 88 | triples_to_delete = [] 89 | triples_to_add = [] 90 | summary_dict = {} 91 | for test_idx, test_trip in enumerate(test_data): 92 | test_nghbrs = neighbours[test_idx] 93 | nghbr_trip = train_data[test_nghbrs] 94 | del_idx_1, del_idx_2 = get_if_triple(test_trip, nghbr_trip, model, attack_batch_size, simmetric) 95 | if_trips = [nghbr_trip[del_idx_1], nghbr_trip[del_idx_2]] 96 | 97 | test_trip = torch.from_numpy(test_trip).to(device)[None,:] 98 | test_s, test_r, test_o = test_trip[:,0], test_trip[:,1], test_trip[:,2] 99 | 100 | summary_list = [] 101 | summary_list.append(list(map(int, [test_s.item(),test_r.item(),test_o.item()]))) 102 | 103 | for if_trip in if_trips: 104 | if_trip = torch.from_numpy(if_trip).to(device)[None,:] 105 | if_s, if_r, if_o = if_trip[:,0], if_trip[:,1], if_trip[:,2] 106 | 107 | if (if_o == test_s or if_o == test_o): 108 | # object of IF triple is neighbour - edit will be [s_dash, if_r, if_o] 109 | if args.model == 'complex': 110 | if_s_emb = torch.cat((model.emb_e_real(if_s), model.emb_e_img(if_s)), dim=-1).squeeze(dim=1) 111 | else: 112 | if_s_emb = model.emb_e(if_s).squeeze(dim=1) 113 | cos_sim_s = F.cosine_similarity(if_s_emb, ent_emb) 114 | #cos_sim_r = F.cosine_similarity(if_r_emb, rel_emb) 115 | 116 | # filter for (s_dash, r, o), i.e. ignore s_dash that already exist 117 | filter_s = train_data[np.where((train_data[:,2] == if_o.item()) 118 | & (train_data[:,1] == if_r.item())), 0].squeeze() 119 | #filter_r = train_data[np.where((train_data[:,0] == if_s.item()) 120 | # & (train_data[:,2] == if_o.item())), 1].squeeze() 121 | cos_sim_s[filter_s] = 1e6 122 | #cos_sim_r[filter_r] = 1e6 123 | 124 | # sort and rank - smallest cosine similarity means largest cosine distance 125 | # Hence, corrupted entity = one with smallest cos similarity 126 | min_values_s, argsort_s = torch.sort(cos_sim_s, -1, descending=False) 127 | #min_values_r, argsort_r = torch.sort(cos_sim_r, -1, descending=False) 128 | s_dash = argsort_s[0][None, None] 129 | #r_dash = argsort_r[0][None, None] 130 | 131 | add_trip = [s_dash.item(), if_r.item(), if_o.item()] 132 | 133 | elif (if_s == test_s or if_s == test_o): 134 | #print('s is neighbour') 135 | # subject of IF triple is neighbour - edit will be [if_s, if_r, o_dash] 136 | if args.model == 'complex': 137 | if_o_emb = torch.cat((model.emb_e_real(if_o), model.emb_e_img(if_o)), dim=-1).squeeze(dim=1) 138 | else: 139 | if_o_emb = model.emb_e(if_o).squeeze(dim=1) 140 | #if_r_emb = model.emb_rel(if_r).squeeze(dim=1) 141 | cos_sim_o = F.cosine_similarity(if_o_emb, ent_emb) 142 | #cos_sim_r = F.cosine_similarity(if_r_emb, rel_emb) 143 | 144 | # filter for (s, r, o_dash), i.e. ignore o_dash that already exist 145 | filter_o = train_data[np.where((train_data[:,0] == if_s.item()) 146 | & (train_data[:,1] == if_r.item())), 2].squeeze() 147 | #filter_r = train_data[np.where((train_data[:,0] == if_s.item()) 148 | # & (train_data[:,2] == if_o.item())), 1].squeeze() 149 | cos_sim_o[filter_o] = 1e6 150 | #cos_sim_r[filter_r] = 1e6 151 | 152 | # sort and rank - smallest cosine similarity means largest cosine distance 153 | # Hence, corrupted entity = one with smallest cos similarity 154 | min_values_o, argsort_o = torch.sort(cos_sim_o, -1, descending=False) 155 | #min_values_r, argsort_r = torch.sort(cos_sim_r, -1, descending=False) 156 | o_dash = argsort_o[0][None, None] 157 | #r_dash = argsort_r[0][None, None] 158 | 159 | add_trip = [if_s.item(), if_r.item(), o_dash.item()] 160 | 161 | else: 162 | logger.info('Unexpected behaviour') 163 | 164 | triples_to_delete.append(if_trip) 165 | triples_to_add.append(add_trip) 166 | summary_list.append(list(map(int, add_trip))) 167 | 168 | summary_dict[test_idx] = summary_list 169 | if test_idx%100 == 0 or test_idx == test_data.shape[0]-1: 170 | logger.info('Processed test triple {0}'.format(str(test_idx))) 171 | logger.info('Time taken: {0}'.format(str(time.time() - start_time))) 172 | logger.info('Time taken to generate edits: {0}'.format(str(time.time() - start_time))) 173 | 174 | return triples_to_delete, triples_to_add, summary_dict 175 | 176 | if __name__ == '__main__': 177 | 178 | 179 | parser = utils.get_argument_parser() 180 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1') 181 | parser.add_argument('--budget', type=int, default=1, help='Budget for each target triple for each corruption side') 182 | parser.add_argument('--rand-run', type=int, default=1, help='A number assigned to the random run of experiment') 183 | parser.add_argument('--attack-batch-size', type=int, default=-1, help='Batch size for processing neighbours of target') 184 | 185 | parser.add_argument('--sim-metric', type=str, default='cos', help='Similarity metric for the attribution attack - cos, dot, l2') 186 | 187 | args = parser.parse_args() 188 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 189 | args.device = device 190 | 191 | 192 | #args.target_split = 1 # which target split to use 193 | #Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. 194 | #args.budget = 1 #indicates the num of adversarial edits for each target triple for each corruption side 195 | #args.rand_run = 1 # a number assigned to the random run of the experiment 196 | args.seed = args.seed + (args.rand_run - 1) # default seed is 17 197 | 198 | if args.reproduce_results: 199 | args = utils.set_hyperparams(args) 200 | 201 | 202 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html 203 | torch.manual_seed(args.seed) 204 | cudnn.deterministic = True 205 | cudnn.benchmark = False 206 | np.random.seed(args.seed) 207 | rng = np.random.default_rng(seed=args.seed) 208 | 209 | 210 | args.epochs = -1 #no training here 211 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop) 212 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name) 213 | log_path = 'logs/attack_logs/inst_add_{5}/{0}_{1}_{2}_{3}_{4}'.format( args.model, args.data, 214 | args.target_split, args.budget, args.rand_run, args.sim_metric) 215 | 216 | 217 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 218 | datefmt = '%m/%d/%Y %H:%M:%S', 219 | level = logging.INFO, 220 | filename = log_path 221 | ) 222 | logger = logging.getLogger(__name__) 223 | 224 | 225 | data_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split) 226 | 227 | n_ent, n_rel, ent_to_id, rel_to_id = utils.generate_dicts(data_path) 228 | 229 | ##### load data#### 230 | data = utils.load_data(data_path) 231 | train_data, valid_data, test_data = data['train'], data['valid'], data['test'] 232 | 233 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb') 234 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f) 235 | inp_f.close() 236 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()} 237 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()} 238 | 239 | 240 | model = utils.load_model(model_path, args, n_ent, n_rel, device) 241 | 242 | neighbours = utils.generate_nghbrs(test_data, train_data) 243 | # test set is the target set because we loaded data from target_... 244 | 245 | if_triples, triples_to_add, summary_dict = get_additions(train_data, test_data, neighbours, model, args.attack_batch_size, args.sim_metric) 246 | 247 | triples_to_add = np.asarray(triples_to_add) 248 | if_triples = np.asarray(if_triples) 249 | 250 | new_train_1 = np.concatenate((triples_to_add, train_data)) 251 | 252 | logger.info ('Length of original training set: ' + str(train_data.shape[0])) 253 | logger.info ('Length of new poisoned training set: ' + str(new_train_1.shape[0])) 254 | 255 | df = pd.DataFrame(new_train_1) 256 | df = df.drop_duplicates() 257 | new_train = df.values 258 | #new_train = new_train_1 259 | 260 | 261 | logger.info ('Length of original training set: ' + str(train_data.shape[0])) 262 | logger.info ('Length of new poisoned training set: ' + str(new_train.shape[0])) 263 | 264 | num_en_or = np.unique(np.concatenate((train_data[:,0], train_data[:,2]))).shape[0] 265 | num_en_pos = np.unique(np.concatenate((new_train[:,0], new_train[:,2]))).shape[0] 266 | 267 | save_path = 'data/inst_add_{5}_{0}_{1}_{2}_{3}_{4}'.format( args.model, args.data, args.target_split, args.budget, args.rand_run, 268 | args.sim_metric 269 | ) 270 | try : 271 | os.makedirs(save_path) 272 | except OSError as e: 273 | if e.errno == errno.EEXIST: 274 | logger.info(e) 275 | logger.info('Using the existing folder {0} for processed data'.format(save_path)) 276 | else: 277 | raise 278 | 279 | 280 | with open(os.path.join(save_path, 'train.txt'), 'w') as out: 281 | for item in new_train: 282 | out.write("%s\n" % "\t".join(map(str, item))) 283 | 284 | out = open(os.path.join(save_path, 'train.pickle'), 'wb') 285 | pickle.dump(new_train.astype('uint64'), out) 286 | out.close() 287 | 288 | 289 | with open(os.path.join(save_path, 'entities_dict.json'), 'w') as f: 290 | f.write(json.dumps(ent_to_id) + '\n') 291 | 292 | with open(os.path.join(save_path, 'relations_dict.json'), 'w') as f: 293 | f.write(json.dumps(rel_to_id) + '\n') 294 | 295 | with open(os.path.join(save_path, 'valid.txt'), 'w') as out: 296 | for item in valid_data: 297 | out.write("%s\n" % "\t".join(map(str, item))) 298 | 299 | out = open(os.path.join(save_path, 'valid.pickle'), 'wb') 300 | pickle.dump(valid_data.astype('uint64'), out) 301 | out.close() 302 | 303 | with open(os.path.join(save_path, 'test.txt'), 'w') as out: 304 | for item in test_data: 305 | out.write("%s\n" % "\t".join(map(str, item))) 306 | 307 | out = open(os.path.join(save_path, 'test.pickle'), 'wb') 308 | pickle.dump(test_data.astype('uint64'), out) 309 | out.close() 310 | 311 | with open(os.path.join(save_path, 'influential_triples.txt'), 'w') as out: 312 | for item in if_triples: 313 | out.write("%s\n" % "\t".join(map(str, item))) 314 | 315 | with open(os.path.join(save_path, 'adversarial_additions.txt'), 'w') as out: 316 | for item in triples_to_add: 317 | out.write("%s\n" % "\t".join(map(str, item))) 318 | 319 | with open(os.path.join(save_path, 'summary_edits.json'), 'w') as out: 320 | out.write(json.dumps(summary_dict) + '\n') 321 | 322 | with open(os.path.join(save_path, 'stats.txt'), 'w') as f: 323 | f.write('Length of original training set: {0} \n'. format(train_data.shape[0])) 324 | f.write('Length of new poisoned training set: {0} \n'. format(new_train.shape[0])) 325 | f.write('Length of new poisoned training set including duplicates: {0} \n'. format(new_train_1.shape[0])) 326 | f.write('Number of entities in original training set: {0} \n'. format(num_en_or)) 327 | f.write('Number of entities in poisoned training set: {0} \n'. format(num_en_pos)) 328 | f.write('Length of original test set: {0} \n'. format(test_data.shape[0])) 329 | f.write('Number of triples addded : {0}\n'.format(triples_to_add.shape[0])) 330 | #f.write('Number of triples added from corrupting o_side: {0} (o_dash, r, s)\n'. format(trips_to_add_o.shape[0])) 331 | #f.write('Number of triples added from corrupting s_side: {0} (o, r, s_dash)\n'. format(trips_to_add_s.shape[0])) 332 | #f.write('In this version, I use reciprocal embedding and its inverse to select (o, r, s_dash)\n') 333 | f.write('Instance Attribution Attacks - This attack version is generated uses similarity metric: {0} \n'.format(args.sim_metric)) 334 | #f.write('Flag value for maximizing soft truth (If False, minimize): {0}\n' .format(maximize)) 335 | f.write('---------------------------------------------------------------------- \n') 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/preprocess.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 1. Read the string data and generate dictionaries 3 | 2. Convert string data to processed data and save it along with dictionaries 4 | ''' 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | import errno 10 | import json 11 | import pandas as pd 12 | import pickle 13 | 14 | 15 | if len(sys.argv) > 1: 16 | dataset_name = sys.argv[1] # name of dataset 17 | else: 18 | #dataset_name = 'FB15k-237' 19 | #dataset_name = 'YAGO3-10' 20 | #dataset_name = 'WN18' 21 | #dataset_name = 'FB15k' 22 | dataset_name = 'WN18RR' 23 | 24 | seed = 345345 25 | np.random.seed(seed) 26 | rdm = np.random.RandomState(seed) 27 | rng = np.random.default_rng(seed) 28 | 29 | base_path = 'data/{0}_original/'.format(dataset_name) 30 | #processed_path = 'data/processed_{0}'.format(dataset_name) 31 | processed_path = 'data/{0}'.format(dataset_name) 32 | files = ['train', 'valid', 'test'] 33 | 34 | def _load_data(file_path): 35 | df = pd.read_csv(file_path, sep='\t', header=None, names=None, dtype=str) 36 | df = df.drop_duplicates() 37 | return df.values 38 | 39 | def generate_ids(): 40 | complete_data = [] 41 | for file in files: 42 | file_path = os.path.join(base_path, file+'.txt') 43 | complete_data.append(_load_data(file_path)) 44 | 45 | complete_data = np.concatenate(complete_data) 46 | unique_ent = np.unique(np.concatenate((complete_data[:,0], complete_data[:,2]))) 47 | unique_rel = np.unique(complete_data[:,1]) 48 | 49 | entities_to_id = {x:i for (i,x) in enumerate(sorted(unique_ent))} 50 | rel_to_id = {x:i for (i,x) in enumerate(sorted(unique_rel))} 51 | 52 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel))) 53 | 54 | return unique_ent, unique_rel, entities_to_id, rel_to_id 55 | 56 | def generate_ids_from_train(): 57 | file_path = os.path.join(base_path, 'train.txt') 58 | X_train = _load_data(file_path) 59 | #complete_dataset = np.concatenate(complete_dataset) 60 | unique_ent = np.unique(np.concatenate((X_train[:, 0], X_train[:, 2]))) 61 | unique_rel = np.unique(X_train[:, 1]) 62 | 63 | entities_to_id = {x: i for (i, x) in enumerate(sorted(unique_ent))} 64 | rel_to_id = {x: i for (i, x) in enumerate(sorted(unique_rel))} 65 | 66 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel))) 67 | 68 | return unique_ent, unique_rel, entities_to_id, rel_to_id 69 | 70 | 71 | def process_and_save(entities_to_id, relations_to_id, unique_ent): 72 | try : 73 | os.makedirs(processed_path) 74 | except OSError as e: 75 | if e.errno == errno.EEXIST: 76 | print(e) 77 | print('Using the existing folder {0} for processed data'.format(processed_path)) 78 | else: 79 | raise 80 | 81 | with open(os.path.join(processed_path, 'dataset_stats.txt'), 'w') as file: 82 | file.write("{}: {} entities and {} relations \n".format(dataset_name, len(unique_ent), len(unique_rel))) 83 | 84 | # function to filter out triples with unseen entities 85 | def _filter_unseen_entities(x): 86 | ent_seen = unique_ent 87 | df = pd.DataFrame(x, columns=['s', 'p', 'o']) 88 | filtered_df = df[df.s.isin(ent_seen) & df.o.isin(ent_seen)] 89 | n_removed_ents = df.shape[0] - filtered_df.shape[0] 90 | return filtered_df.values, n_removed_ents 91 | 92 | 93 | for f in files: 94 | file_path = os.path.join(base_path, f+'.txt') 95 | x = _load_data(file_path) 96 | x, n_removed_ents = _filter_unseen_entities(x) # filter unseen entities if any 97 | if n_removed_ents > 0: 98 | msg = '{0}: {1} split: Removed {2} triples containing unseen entities. \n'.format(dataset_name, f, n_removed_ents) 99 | with open(os.path.join(processed_path, 'dataset_stats.txt'), 'a') as file: 100 | file.write(msg) 101 | print(msg) 102 | x_idx_s = np.vectorize(entities_to_id.get)(x[:, 0]) 103 | x_idx_p = np.vectorize(relations_to_id.get)(x[:, 1]) 104 | x_idx_o = np.vectorize(entities_to_id.get)(x[:, 2]) 105 | 106 | x = np.dstack([x_idx_s, x_idx_p, x_idx_o]).reshape((-1, 3)) 107 | 108 | with open(os.path.join(processed_path, f+'.txt'), 'w') as out: 109 | for item in x: 110 | out.write("%s\n" % "\t".join(map(str, item))) 111 | 112 | out = open(os.path.join(processed_path, f+'.pickle'), 'wb') 113 | pickle.dump(x.astype('uint64'), out) 114 | out.close() 115 | 116 | return 117 | 118 | 119 | filter_unseen = True # if needed, pass as cmd argument 120 | if filter_unseen: 121 | unique_ent, unique_rel, entities_to_id, rel_to_id = generate_ids_from_train() 122 | else: 123 | unique_ent, unique_rel, entities_to_id, rel_to_id = generate_ids() 124 | 125 | n_relations = len(unique_rel) 126 | n_entities = len(unique_ent) 127 | 128 | 129 | process_and_save(entities_to_id, rel_to_id, unique_ent) 130 | 131 | 132 | with open(os.path.join(processed_path, 'entities_dict.json'), 'w') as f: 133 | f.write(json.dumps(entities_to_id) + '\n') 134 | 135 | with open(os.path.join(processed_path, 'relations_dict.json'), 'w') as f: 136 | f.write(json.dumps(rel_to_id) + '\n') 137 | 138 | print("{}: {} entities and {} relations".format(dataset_name, len(unique_ent), len(unique_rel))) 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/select_targets.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Dict, Tuple, List 3 | import os 4 | import numpy as np 5 | import json 6 | import torch 7 | import logging 8 | import argparse 9 | import math 10 | from pprint import pprint 11 | 12 | import torch 13 | from torch.utils.data import DataLoader 14 | import torch.backends.cudnn as cudnn 15 | 16 | from dataset import TrainDataset, BidirectionalOneShotIterator 17 | from evaluation import evaluation 18 | from model import Distmult, Complex, Conve, Transe 19 | 20 | 21 | 22 | def add_arguments(): 23 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs') 24 | 25 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237') 26 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}') 27 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations') 28 | 29 | 30 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0') 31 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2') 32 | 33 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)') 34 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1 35 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0') 36 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings') 37 | 38 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200? 39 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)') 40 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)') 41 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4') 42 | 43 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200') 44 | 45 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20') 46 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10') 47 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.') 48 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.') 49 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.') 50 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution') 51 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution') 52 | 53 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True') 54 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1') 55 | 56 | 57 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2? 58 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3') 59 | 60 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.') 61 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model') 62 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)') 63 | 64 | return parser 65 | 66 | def set_paths(args): 67 | model_name = '{0}_{1}_{2}_{3}_{4}'.format(args.model, args.embedding_dim, args.input_drop, args.hidden_drop, args.feat_drop) 68 | model_path = 'saved_models/{0}_{1}.model'.format(args.data, model_name) 69 | eval_name = '{0}_{1}_{2}_{3}_{4}_{5}'.format(args.data, model_name, args.num_batches, args.epochs, args.valid_batch_size, args.test_batch_size) 70 | log_path = 'logs/select_target_{0}_{1}_{2}_{3}_{4}.log'.format(args.data, args.target_split, model_name, args.num_batches, args.epochs) 71 | 72 | return model_name, model_path, eval_name, log_path 73 | 74 | def generate_dicts(data_path): 75 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f: 76 | ent_to_id = json.load(f) 77 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f: 78 | rel_to_id = json.load(f) 79 | n_ent = len(list(ent_to_id.keys())) 80 | n_rel = len(list(rel_to_id.keys())) 81 | 82 | return n_ent, n_rel, ent_to_id, rel_to_id 83 | 84 | def load_data(data_path): 85 | data = {} 86 | for split in ['train', 'valid', 'test']: 87 | inp_f = open(os.path.join(data_path, split+'.pickle'), 'rb') 88 | data[split] = np.array(pickle.load(inp_f)) 89 | inp_f.close() 90 | 91 | return data 92 | 93 | def add_model(args, n_ent, n_rel): 94 | if args.add_reciprocals: 95 | if args.model is None: 96 | model = Conve(args, n_ent, 2*n_rel) 97 | elif args.model == 'conve': 98 | model = Conve(args, n_ent, 2*n_rel) 99 | elif args.model == 'distmult': 100 | model = Distmult(args, n_ent, 2*n_rel) 101 | elif args.model == 'complex': 102 | model = Complex(args, n_ent, 2*n_rel) 103 | elif args.model == 'transe': 104 | model = Transe(args, n_ent, 2*n_rel) 105 | else: 106 | logger.info('Unknown model: {0}', args.model) 107 | raise Exception("Unknown model!") 108 | else: 109 | if args.model is None: 110 | model = Conve(args, n_ent, n_rel) 111 | elif args.model == 'conve': 112 | model = Conve(args, n_ent, n_rel) 113 | elif args.model == 'distmult': 114 | model = Distmult(args, n_ent, n_rel) 115 | elif args.model == 'complex': 116 | model = Complex(args, n_ent, n_rel) 117 | elif args.model == 'transe': 118 | model = Transe(args, n_ent, n_rel) 119 | else: 120 | logger.info('Unknown model: {0}', args.model) 121 | raise Exception("Unknown model!") 122 | 123 | #model.to(self.device) 124 | return model 125 | 126 | def get_ranking(model, queries:torch.Tensor, num_rel:int, 127 | filters:Dict[str, Dict[Tuple[int, int], List[int]]], 128 | device: str, 129 | batch_size: int = 500 130 | ): 131 | ranks = [] 132 | ranks_lhs = [] 133 | ranks_rhs = [] 134 | b_begin = 0 135 | #logger.info('Computing ranks for all queries') 136 | while b_begin < len(queries): 137 | b_queries = queries[b_begin : b_begin+batch_size] 138 | s,r,o = b_queries[:,0], b_queries[:,1], b_queries[:,2] 139 | r_rev = r+num_rel 140 | lhs_score = model.forward(o,r_rev, mode='lhs', sigmoid=False) #this gives scores not probabilities 141 | rhs_score = model.forward(s,r, mode='rhs', sigmoid=False) # this gives scores not probabilities 142 | 143 | for i, query in enumerate(b_queries): 144 | filter_lhs = filters['lhs'][(query[2].item(), query[1].item())] 145 | filter_rhs = filters['rhs'][(query[0].item(), query[1].item())] 146 | 147 | # save the prediction that is relevant 148 | target_value1 = rhs_score[i, query[2].item()].item() 149 | target_value2 = lhs_score[i, query[0].item()].item() 150 | # zero all known cases (this are not interesting) 151 | # this corresponds to the filtered setting 152 | lhs_score[i][filter_lhs] = -1e6 153 | rhs_score[i][filter_rhs] = -1e6 154 | # write base the saved values 155 | rhs_score[i][query[2].item()] = target_value1 156 | lhs_score[i][query[0].item()] = target_value2 157 | 158 | # sort and rank 159 | max_values, lhs_sort = torch.sort(lhs_score, dim=1, descending=True) #high scores get low number ranks 160 | max_values, rhs_sort = torch.sort(rhs_score, dim=1, descending=True) 161 | 162 | lhs_sort = lhs_sort.cpu().numpy() 163 | rhs_sort = rhs_sort.cpu().numpy() 164 | 165 | for i, query in enumerate(b_queries): 166 | # find the rank of the target entities 167 | lhs_rank = np.where(lhs_sort[i]==query[0].item())[0][0] 168 | rhs_rank = np.where(rhs_sort[i]==query[2].item())[0][0] 169 | 170 | # rank+1, since the lowest rank is rank 1 not rank 0 171 | ranks_lhs.append(lhs_rank + 1) 172 | ranks_rhs.append(rhs_rank + 1) 173 | 174 | b_begin += batch_size 175 | 176 | #logger.info('Ranking done for all queries') 177 | return ranks_lhs, ranks_rhs 178 | 179 | 180 | 181 | 182 | if __name__ == '__main__': 183 | parser = add_arguments() 184 | parser.add_argument('--target-split', type=int, default=1, help='Ranks to use for target set. Values are 1 for ranks <=10; 2 for ranks>10 and ranks<=100. Default: 1') 185 | args = parser.parse_args() 186 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 187 | 188 | # Fixing random seeds for reproducibility -https://pytorch.org/docs/stable/notes/randomness.html 189 | torch.manual_seed(args.seed) 190 | cudnn.deterministic = True 191 | cudnn.benchmark = False 192 | np.random.seed(args.seed) 193 | rng = np.random.default_rng(seed=args.seed) 194 | 195 | args.epochs = -1 #no training here 196 | model_name, model_path, eval_name, log_path = set_paths(args) 197 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 198 | datefmt = '%m/%d/%Y %H:%M:%S', 199 | level = logging.INFO, 200 | filename = log_path 201 | ) 202 | logger = logging.getLogger(__name__) 203 | 204 | 205 | data_path = 'data/{0}'.format(args.data) 206 | n_ent, n_rel, ent_to_id, rel_to_id = generate_dicts(data_path) 207 | 208 | ##### load data#### 209 | data = load_data(data_path) 210 | train_data, valid_data, test_data = data['train'], data['valid'], data['test'] 211 | 212 | inp_f = open(os.path.join(data_path, 'to_skip_eval.pickle'), 'rb') 213 | to_skip_eval: Dict[str, Dict[Tuple[int, int], List[int]]] = pickle.load(inp_f) 214 | inp_f.close() 215 | to_skip_eval['lhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['lhs'].items()} 216 | to_skip_eval['rhs'] = {(int(k[0]), int(k[1])): v for k,v in to_skip_eval['rhs'].items()} 217 | 218 | # add a model and load the pre-trained params 219 | model = add_model(args, n_ent, n_rel) 220 | model.to(device) 221 | logger.info('Loading saved model from {0}'.format(model_path)) 222 | state = torch.load(model_path) 223 | model_params = state['state_dict'] 224 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()] 225 | for key, size, count in params: 226 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count)) 227 | 228 | model.load_state_dict(model_params) 229 | model.eval() 230 | 231 | with torch.no_grad(): 232 | target_path = 'data/target_{0}_{1}_{2}'.format(args.model, args.data, args.target_split) 233 | 234 | # generate ranks for test set 235 | logger.info('Generating target set from test set') 236 | test_data = torch.from_numpy(test_data.astype('int64')).to(device) 237 | if args.add_reciprocals: 238 | num_rel= n_rel 239 | else: 240 | num_rel = 0 241 | ranks_lhs, ranks_rhs = get_ranking(model, test_data, num_rel, to_skip_eval, device, args.test_batch_size) 242 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs) 243 | #indices_lhs, indices_rhs = np.asarray(ranks_lhs <= 10).nonzero(), np.asarray(ranks_rhs <= 10).nonzero() 244 | if args.target_split == 2: 245 | indices = np.asarray(((ranks_lhs <= 100) & (ranks_lhs >10)) & ((ranks_rhs <= 100)&(ranks_rhs > 10))).nonzero() 246 | elif args.target_split ==1 : 247 | indices = np.asarray((ranks_lhs <= 10) & (ranks_rhs <= 10)).nonzero() 248 | else: 249 | logger.info('Unknown Target Split: {0}', self.args.target_split) 250 | raise Exception("Unknown target split!") 251 | 252 | test_data = test_data.cpu().numpy() 253 | #targets_lhs, targets_rhs = test_data[indices_lhs], test_data[indices_rhs] 254 | targets = test_data[indices] 255 | logger.info('Number of targets generated: {0}'.format(targets.shape[0])) 256 | #save eval for selected targets 257 | split = 'target_{0}'.format(args.target_split) 258 | 259 | results_target = evaluation(model, targets, to_skip_eval, eval_name, num_rel, split, args.test_batch_size, -1, device) 260 | # save target set 261 | 262 | with open(os.path.join(target_path, 'target.txt'), 'w') as out: 263 | for item in targets: 264 | out.write("%s\n" % "\t".join(map(str, item))) 265 | with open(os.path.join(target_path, 'test.txt'), 'w') as out: 266 | for item in targets: 267 | out.write("%s\n" % "\t".join(map(str, item))) 268 | 269 | # use the valid set to generate non-target set 270 | logger.info('Generating non target set from valid set') 271 | valid_data = torch.from_numpy(valid_data.astype('int64')).to(device) 272 | if args.add_reciprocals: 273 | num_rel= n_rel 274 | else: 275 | num_rel = 0 276 | ranks_lhs, ranks_rhs = get_ranking(model, valid_data, num_rel, to_skip_eval, device, args.valid_batch_size) 277 | ranks_lhs, ranks_rhs = np.array(ranks_lhs), np.array(ranks_rhs) 278 | if args.target_split == 2: 279 | indices = np.asarray(((ranks_lhs <= 100) & (ranks_lhs >10)) & ((ranks_rhs <= 100)&(ranks_rhs > 10))).nonzero() 280 | elif args.target_split == 1: 281 | indices = np.asarray((ranks_lhs <= 10) & (ranks_rhs <= 10)).nonzero() 282 | else: 283 | logger.info('Unknown Target Split: {0}', self.args.target_split) 284 | raise Exception("Unknown target split!") 285 | 286 | valid_data = valid_data.cpu().numpy() 287 | non_targets = valid_data[indices] 288 | logger.info('Number of non targets generated: {0}'.format(non_targets.shape[0])) 289 | #save eval for selected non targets 290 | split = 'non_target_{0}'.format(args.target_split) 291 | 292 | results_ntarget = evaluation(model, non_targets, to_skip_eval, eval_name, num_rel, split, args.valid_batch_size, -1, device) 293 | # save non target set and valid set both - eval needed for both 294 | with open(os.path.join(target_path, 'non_target.txt'), 'w') as out: 295 | for item in non_targets: 296 | out.write("%s\n" % "\t".join(map(str, item))) 297 | with open(os.path.join(target_path, 'valid.txt'), 'w') as out: 298 | for item in valid_data: 299 | out.write("%s\n" % "\t".join(map(str, item))) 300 | 301 | 302 | # saving dicts to avoid searching later 303 | with open(os.path.join(target_path, 'entities_dict.json'), 'w') as f: 304 | f.write(json.dumps(ent_to_id) + '\n') 305 | 306 | with open(os.path.join(target_path, 'relations_dict.json'), 'w') as f: 307 | f.write(json.dumps(rel_to_id) + '\n') 308 | 309 | with open(os.path.join(target_path, 'train.txt'), 'w') as out: 310 | for item in train_data: 311 | out.write("%s\n" % "\t".join(map(str, item))) 312 | 313 | out = open(os.path.join(target_path, 'to_skip_eval.pickle'), 'wb') 314 | pickle.dump(to_skip_eval, out) 315 | out.close() 316 | 317 | # write down the stats for targets generated 318 | with open(os.path.join(target_path, 'stats.txt'), 'w') as out: 319 | out.write('Number of train set triples: {0}\n'.format(train_data.shape[0])) 320 | out.write('Number of test set triples: {0}\n'.format(test_data.shape[0])) 321 | out.write('Number of valid set triples: {0}\n'.format(valid_data.shape[0])) 322 | out.write('Number of target triples: {0}\n'.format(targets.shape[0])) 323 | out.write('Number of non target triples: {0}\n'.format(non_targets.shape[0])) 324 | if args.target_split ==2: 325 | out.write('Target triples are ranked >10 and <=100 and test set is the target triples \n') 326 | out.write('Non target triples are ranked >10 and <=100 but valid triples is original valid set \n') 327 | out.write('Non target triples with ranks >10 and <=100 are in non_target.txt \n') 328 | else: 329 | out.write('Target triples are ranked <=10 and test set is the target triples \n') 330 | out.write('Non target triples are ranked <=10 but valid triples is original valid set \n') 331 | out.write('Non target triples with ranks <=10 are in non_target.txt \n') 332 | out.write('------------------------------------------- \n') 333 | 334 | 335 | 336 | 337 | 338 | -------------------------------------------------------------------------------- /KGEAttack/ConvE/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file contains functions that are used repeatedly across different attacks 3 | ''' 4 | import logging 5 | import time 6 | from tqdm import tqdm 7 | import io 8 | import pandas as pd 9 | import numpy as np 10 | import os 11 | import json 12 | 13 | import argparse 14 | import torch 15 | 16 | from model import Distmult, Complex, Conve, Transe 17 | 18 | 19 | logger = logging.getLogger(__name__) #config already set in main.py 20 | 21 | def generate_dicts(data_path): 22 | with open (os.path.join(data_path, 'entities_dict.json'), 'r') as f: 23 | ent_to_id = json.load(f) 24 | with open (os.path.join(data_path, 'relations_dict.json'), 'r') as f: 25 | rel_to_id = json.load(f) 26 | n_ent = len(list(ent_to_id.keys())) 27 | n_rel = len(list(rel_to_id.keys())) 28 | 29 | return n_ent, n_rel, ent_to_id, rel_to_id 30 | 31 | def load_data(data_path): 32 | data = {} 33 | for split in ['train', 'valid', 'test']: 34 | df = pd.read_csv(os.path.join(data_path, split+'.txt'), sep='\t', header=None, names=None, dtype=int) 35 | df = df.drop_duplicates() 36 | data[split] = df.values 37 | 38 | return data 39 | 40 | def add_model(args, n_ent, n_rel): 41 | if args.add_reciprocals: 42 | if args.model is None: 43 | model = Conve(args, n_ent, 2*n_rel) 44 | elif args.model == 'conve': 45 | model = Conve(args, n_ent, 2*n_rel) 46 | elif args.model == 'distmult': 47 | model = Distmult(args, n_ent, 2*n_rel) 48 | elif args.model == 'complex': 49 | model = Complex(args, n_ent, 2*n_rel) 50 | elif args.model == 'transe': 51 | model = Transe(args, n_ent, 2*n_rel) 52 | else: 53 | logger.info('Unknown model: {0}', args.model) 54 | raise Exception("Unknown model!") 55 | else: 56 | if args.model is None: 57 | model = Conve(args, n_ent, n_rel) 58 | elif args.model == 'conve': 59 | model = Conve(args, n_ent, n_rel) 60 | elif args.model == 'distmult': 61 | model = Distmult(args, n_ent, n_rel) 62 | elif args.model == 'complex': 63 | model = Complex(args, n_ent, n_rel) 64 | elif args.model == 'transe': 65 | model = Transe(args, n_ent, n_rel) 66 | else: 67 | logger.info('Unknown model: {0}', args.model) 68 | raise Exception("Unknown model!") 69 | 70 | #model.to(self.device) 71 | return model 72 | 73 | def load_model(model_path, args, n_ent, n_rel, device): 74 | # add a model and load the pre-trained params 75 | model = add_model(args, n_ent, n_rel) 76 | model.to(device) 77 | logger.info('Loading saved model from {0}'.format(model_path)) 78 | state = torch.load(model_path) 79 | model_params = state['state_dict'] 80 | params = [(key, value.size(), value.numel()) for key, value in model_params.items()] 81 | for key, size, count in params: 82 | logger.info('Key:{0}, Size:{1}, Count:{2}'.format(key, size, count)) 83 | 84 | model.load_state_dict(model_params) 85 | model.eval() 86 | logger.info(model) 87 | 88 | return model 89 | 90 | def generate_nghbrs(test_set, train_set): 91 | ''' 92 | For every triple in test set, return the index of 93 | neighbouring triple in training set, 94 | i.e. indices in training set are returned 95 | ''' 96 | n_dict = {} 97 | for t, triple in enumerate(test_set): 98 | sub = triple[0] 99 | obj = triple[2] 100 | mask = (np.isin(train_set[:,0], [sub, obj]) | np.isin(train_set[:,2], [sub, obj])) 101 | #nghbrs_dict[t] = pro_train[mask] 102 | mask_idx = np.where(mask)[0] 103 | n_dict[t] = mask_idx 104 | 105 | return n_dict 106 | 107 | 108 | def perturb_data(train_data, trips_to_delete): 109 | logger.info('----- Generating perturbed dataset ------') 110 | per_tr_1 = np.empty_like(train_data) 111 | per_tr_1[:] = train_data 112 | 113 | n_ignored_edits = 0 114 | for idx, trip in enumerate(trips_to_delete): 115 | i = trip[0] 116 | j = trip[1] 117 | k = trip[2] 118 | # mask for triple in training set 119 | m = (np.isin(per_tr_1[:,0], [i]) & np.isin(per_tr_1[:,1], [j]) & np.isin(per_tr_1[:,2], [k])) 120 | if np.any(m): 121 | temp_tr = per_tr_1[~m] 122 | # mask to check if deleting triple also deletes entity 123 | m2 = (((np.any(temp_tr[:,0] ==k)) | (np.any(temp_tr[:,2] == k))) 124 | & ((np.any(temp_tr[:,0] == i)) | (np.any(temp_tr[:,2] == i)))) 125 | if np.any(m2): 126 | #np.copyto(per_tr, temp_tr) 127 | per_tr_1 = np.empty_like(temp_tr) 128 | per_tr_1[:] = temp_tr 129 | else: 130 | n_ignored_edits += 1 131 | logger.info('Ignoring edit number {0}: {1} because it deletes entities'.format(idx, trip)) 132 | else: 133 | logger.info('Can\'t delete the selected triple. Something is wrong in the code') 134 | logger.info(trip) 135 | break 136 | 137 | return per_tr_1, n_ignored_edits 138 | 139 | 140 | def set_hyperparams(args): 141 | ''' 142 | Given the args, return with updated hyperparams for reproducibility 143 | ''' 144 | if args.data == 'WN18RR': 145 | args.original_data = 'WN18RR' 146 | 147 | if args.data == 'WN18': 148 | args.original_data = 'WN18' 149 | 150 | if args.data == 'FB15k-237': 151 | args.original_data = 'FB15k-237' 152 | 153 | if (args.data == 'WN18RR' or args.original_data == 'WN18RR'): 154 | if args.model == 'distmult': 155 | args.lr = 0.01 156 | args.num_batches = 50 157 | elif args.model == 'complex': 158 | args.lr = 0.01 159 | elif args.model == 'conve': 160 | args.lr = 0.001 161 | elif args.model == 'transe': 162 | args.lr = 0.005 163 | args.input_drop = 0.0 164 | args.transe_margin = 9.0 165 | args.num_batches = 1000 166 | args.epochs = 100 167 | args.reg_weight = 1e-12 168 | else: 169 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model)) 170 | 171 | if (args.data == 'FB15k-237' or args.original_data == 'FB15k-237'): 172 | if args.model == 'distmult': 173 | args.lr = 0.005 174 | args.input_drop = 0.5 175 | elif args.model == 'complex': 176 | args.lr = 0.005 177 | args.input_drop = 0.5 178 | elif args.model == 'conve': 179 | args.lr = 0.001 180 | args.hidden_drop = 0.5 181 | elif args.model == 'transe': 182 | args.lr = 0.001 183 | args.input_drop = 0.0 184 | args.transe_margin = 9.0 185 | args.num_batches = 800 186 | args.epochs = 100 187 | args.reg_weight = 1e-10 188 | else: 189 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model)) 190 | 191 | if (args.data == 'WN18' or args.original_data == 'WN18'): 192 | if args.model == 'distmult': 193 | args.lr = 0.01 194 | args.num_batches = 50 195 | elif args.model == 'complex': 196 | args.lr = 0.01 197 | elif args.model == 'conve': 198 | args.lr = 0.005 199 | elif args.model == 'transe': 200 | args.lr = 0.01 201 | args.input_drop = 0.0 202 | args.transe_margin = 9.0 203 | args.num_batches = 1500 204 | args.epochs = 100 205 | args.reg_weight = 1e-12 206 | else: 207 | raise Exception("New model:{0},{1}. Set hyperparams".format(args.data, args.model)) 208 | 209 | 210 | return args 211 | 212 | 213 | 214 | 215 | 216 | def get_argument_parser(): 217 | '''Generate an argument parser 218 | ''' 219 | parser = argparse.ArgumentParser(description='Link prediction for knowledge graphs') 220 | 221 | parser.add_argument('--data', type=str, default='FB15k-237', help='Dataset to use: {FB15k-237, YAGO3-10, WN18RR, umls, nations, kinship}, default: FB15k-237') 222 | parser.add_argument('--model', type=str, default='conve', help='Choose from: {conve, distmult, complex}') 223 | parser.add_argument('--add-reciprocals', action='store_true', help='Option to add reciprocal relations') 224 | 225 | 226 | parser.add_argument('--transe-margin', type=float, default=12.0, help='Margin value for TransE scoring function. Default:12.0') 227 | parser.add_argument('--transe-norm', type=int, default=2, help='P-norm value for TransE scoring function. Default:2') 228 | 229 | parser.add_argument('--epochs', type=int, default=400, help='Number of epochs to train (default: 400)') 230 | parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')#maybe 0.1 231 | parser.add_argument('--lr-decay', type=float, default=0.0, help='Weight decay value to use in the optimizer. Default: 0.0') 232 | parser.add_argument('--max-norm', action='store_true', help='Option to add unit max norm constraint to entity embeddings') 233 | 234 | parser.add_argument('--num-batches', type=int, default=400, help='Number of batches for training (default: 400)') #maybe 200? 235 | parser.add_argument('--test-batch-size', type=int, default=128, help='Batch size for test split (default: 128)') 236 | parser.add_argument('--valid-batch-size', type=int, default=128, help='Batch size for valid split (default: 128)') 237 | parser.add_argument('--num-workers', type=int, default=4, help='Number of workers to use for the batch loaders on GPU. Default: 4') 238 | 239 | parser.add_argument('--embedding-dim', type=int, default=200, help='The embedding dimension (1D). Default: 200') 240 | 241 | parser.add_argument('--stack_width', type=int, default=20, help='The first dimension of the reshaped/stacked 2D embedding. Second dimension is inferred. Default: 20') 242 | #parser.add_argument('--stack_height', type=int, default=10, help='The second dimension of the reshaped/stacked 2D embedding. Default: 10') 243 | parser.add_argument('--hidden-drop', type=float, default=0.3, help='Dropout for the hidden layer. Default: 0.3.') 244 | parser.add_argument('--input-drop', type=float, default=0.2, help='Dropout for the input embeddings. Default: 0.2.') 245 | parser.add_argument('--feat-drop', type=float, default=0.3, help='Dropout for the convolutional features. Default: 0.2.') 246 | parser.add_argument('-num-filters', default=32, type=int, help='Number of filters for convolution') 247 | parser.add_argument('-kernel-size', default=3, type=int, help='Kernel Size for convolution') 248 | 249 | parser.add_argument('--use-bias', action='store_true', help='Use a bias in the convolutional layer. Default: True') 250 | parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing value to use. Default: 0.1') 251 | 252 | 253 | parser.add_argument('--reg-weight', type=float, default=5e-12, help='Weight for regularization. Default: 5e-12')#maybe 5e-2? 254 | parser.add_argument('--reg-norm', type=int, default=2, help='Norm for regularization. Default: 3') 255 | 256 | parser.add_argument('--resume', action='store_true', help='Restore a saved model.') 257 | parser.add_argument('--resume-split', type=str, default='test', help='Split to evaluate a restored model') 258 | parser.add_argument('--seed', type=int, default=17, metavar='S', help='Random seed (default: 17)') 259 | 260 | parser.add_argument('--reproduce-results', action='store_true', help='Use the hyperparameters to reproduce the results.') 261 | parser.add_argument('--original-data', type=str, default='FB15k-237', help='Dataset to use; this option is needed to set the hyperparams to reproduce the results for training after attack, default: FB15k-237') 262 | 263 | return parser 264 | 265 | 266 | class TqdmToLogger(io.StringIO): 267 | #https://github.com/tqdm/tqdm/issues/313 268 | """ 269 | Output stream for TQDM which will output to logger module instead of 270 | the StdOut. 271 | """ 272 | logger = None 273 | level = None 274 | buf = '' 275 | def __init__(self,logger,level=None): 276 | super(TqdmToLogger, self).__init__() 277 | self.logger = logger 278 | self.level = level or logging.INFO 279 | def write(self,buf): 280 | self.buf = buf.strip('\r\n\t ') 281 | def flush(self): 282 | self.logger.log(self.level, self.buf) -------------------------------------------------------------------------------- /KGEAttack/ConvE/wrangle_KG.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 1. Read the processed data (int IDs) and generate sr2o and or2s data from training file 3 | 2. Use the train, valid and test file to generate filter lists for evaluation 4 | ''' 5 | import numpy as np 6 | import sys 7 | import os 8 | import errno 9 | import json 10 | import pandas as pd 11 | import pickle 12 | from collections import defaultdict 13 | 14 | 15 | def _load_data(file_path): 16 | df = pd.read_csv(file_path, sep='\t', header=None, names=None, dtype=str) 17 | df = df.drop_duplicates() 18 | return df.values 19 | 20 | 21 | def generate_eval_filter(dataset_name): 22 | #processed_path = 'data/processed_{0}'.format(dataset_name) 23 | processed_path = 'data/{0}'.format(dataset_name) 24 | files = ['train', 'valid', 'test'] 25 | to_skip = {'lhs': defaultdict(set), 'rhs': defaultdict(set)} 26 | for file in files: 27 | file_path = os.path.join(processed_path, file+'.txt') 28 | examples = _load_data(file_path) 29 | for lhs, rel, rhs in examples: 30 | #to_skip['lhs'][(rhs, rel + n_relations)].add(lhs) # reciprocals 31 | to_skip['lhs'][(rhs, rel)].add(int(lhs)) # we don't need reciprocal training 32 | to_skip['rhs'][(lhs, rel)].add(int(rhs)) 33 | 34 | to_skip_final = {'lhs': {}, 'rhs': {}} 35 | for kk, skip in to_skip.items(): 36 | for k, v in skip.items(): 37 | to_skip_final[kk][k] = sorted(list(v)) 38 | #to_skip_final[kk][(int(k[0]), int(k[1]))] = sorted(list(v)) 39 | 40 | out = open(os.path.join(processed_path, 'to_skip_eval.pickle'), 'wb') 41 | pickle.dump(to_skip_final, out) 42 | out.close() 43 | 44 | #with open(os.path.join(processed_path, 'to_skip_eval.json'), 'w') as f: 45 | # f.write(json.dumps(to_skip_final) + '\n') 46 | 47 | return 48 | 49 | def generate_train_data(dataset_name): 50 | #processed_path = 'data/processed_{0}'.format(dataset_name) 51 | processed_path = 'data/{0}'.format(dataset_name) 52 | file_path = os.path.join(processed_path, 'train.txt') 53 | train_examples = _load_data(file_path) 54 | sr2o = defaultdict(set) 55 | or2s = defaultdict(set) 56 | for s,r,o in train_examples: 57 | sr2o[(s,r)].add(o) 58 | or2s[(o,r)].add(s) 59 | 60 | sr2o = {k: sorted(list(v)) for k, v in sr2o.items()} 61 | or2s = {k: sorted(list(v)) for k, v in or2s.items()} 62 | 63 | out = open(os.path.join(processed_path, 'sr2o_train.pickle'), 'wb') 64 | pickle.dump(sr2o, out) 65 | out.close() 66 | 67 | out = open(os.path.join(processed_path, 'or2s_train.pickle'), 'wb') 68 | pickle.dump(or2s, out) 69 | out.close() 70 | 71 | #with open(os.path.join(processed_path, 'sr2o_train.json'), 'w') as f: 72 | # f.write(json.dumps(sr2o) + '\n') 73 | 74 | #with open(os.path.join(processed_path, 'or2s_train.json'), 'w') as f: 75 | # f.write(json.dumps(or2s) + '\n') 76 | 77 | return 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | if len(sys.argv) > 1: 83 | dataset_name = sys.argv[1] # name of dataset 84 | else: 85 | #dataset_name = 'FB15k-237' 86 | #dataset_name = 'YAGO3-10' 87 | #dataset_name = 'WN18' 88 | #dataset_name = 'FB15k' 89 | dataset_name = 'WN18RR' 90 | 91 | seed = 345345 92 | np.random.seed(seed) 93 | rdm = np.random.RandomState(seed) 94 | rng = np.random.default_rng(seed) 95 | 96 | print('{}: Generating filter lists for evaluation'.format(dataset_name)) 97 | generate_eval_filter(dataset_name) 98 | print('{}: Generating train data'.format(dataset_name)) 99 | generate_train_data(dataset_name) 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /KGEAttack/Readme.md: -------------------------------------------------------------------------------- 1 |

2 | Code Structure 3 |

4 |

This file describes the structure of the code

5 | 6 | Commandline instructions for all experiments are available in bash scripts at this level 7 | 8 | The main codebase is in `ConvE` 9 | - script to preprocess data (generate dictionaries) is `preprocess.py` 10 | - script to generate evaluation filters and training tuples is `wrangle_KG.py` 11 | - script to train a KGE model is `main.py` 12 | - script to select target triples from the test set is `select_targets.py` 13 | - Random neighbourhood baseline is in `rand_add_attack_1.py` 14 | - Random global baseline is in `rand_add_attack_2.py` 15 | - Zhang et al. baseline is implemented in `ijcai_add_attack.py` 16 | - CRIAGE baseline is in `criage_add_attack_1.py` 17 | - Proposed symmetric attacks in `sym_add_attack_{1,2,3}` 18 | - 1 for soft truth score 19 | - 2 for KGE ranks 20 | - 3 for cosine distance 21 | - Proposed inversion attacks in `inv_add_attack_{1,2,3}` 22 | - 1 for soft truth score 23 | - 2 for KGE ranks 24 | - 3 for cosine distance 25 | - Proposed composition attacks in `com_add_attack_{1,2,3}` 26 | - 1 for soft truth score 27 | - 2 for KGE ranks 28 | - 3 for cosine distance 29 | - Elbow method to select clusters is in `clustering_elbow.ipynb` 30 | - Script to generate clusters is `create_clusters.py` 31 | - Script to compute metrics on decoy set in `decoy_test.py` 32 | - Folder `elbow_plots` contains the elbow plots 33 | - Folder `data` will contain datasets generated from running the experiments. 34 | - These are named as `attack_model_dataset_split_budget_run` 35 | - here `split=1` for target split, `budget=1` for most attacks except random global with 2 edits, and `run` is the number for a random run 36 | - For Zhang et al. attacks, an additional argument is down sampling percent 37 | - Folder `saved_models`, `clusters`, `logs`, `results` and `losses` are also empty but will be used if a script is run 38 | -------------------------------------------------------------------------------- /KGEAttack/complex_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data FB15k-237 --lr 0.005 --input-drop 0.5 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_complex_FB15k-237_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data FB15k-237 --lr 0.005 --input-drop 0.5 14 | 15 | 16 | echo 'Generating random edits for the neighbourhood' 17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data FB15k-237 --budget 1 --rand-run 1 18 | python -u wrangle_KG.py rand_add_n_complex_FB15k-237_1_1_1 19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 20 | 21 | echo 'Generating global random edits' 22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data FB15k-237 --budget 1 --rand-run 1 23 | python -u wrangle_KG.py rand_add_g_complex_FB15k-237_1_1_1 24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 25 | 26 | 27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data FB15k-237 --budget 2 --rand-run 1 28 | python -u wrangle_KG.py rand_add_g_complex_FB15k-237_1_2_1 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_FB15k-237_1_2_1 --lr 0.005 --input-drop 0.5 30 | 31 | 32 | echo 'Generating symmetry edits with ground truth minimum' 33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data FB15k-237 --budget 1 34 | python -u wrangle_KG.py sym_add_1_complex_FB15k-237_1_1_1 35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 36 | 37 | echo 'Generating symmetry edits with worse ranks' 38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data FB15k-237 --budget 1 39 | python -u wrangle_KG.py sym_add_2_complex_FB15k-237_1_1_1 40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 41 | 42 | echo 'Generating symmetry edits with cosine distance' 43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data FB15k-237 --budget 1 44 | python -u wrangle_KG.py sym_add_3_complex_FB15k-237_1_1_1 45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 46 | 47 | 48 | 49 | 50 | echo 'Generating inversion edits with ground truth minimum' 51 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data FB15k-237 --budget 1 52 | python -u wrangle_KG.py inv_add_1_complex_FB15k-237_1_1_1 53 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 54 | 55 | echo 'Generating inversion edits with worse ranks' 56 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data FB15k-237 --budget 1 57 | python -u wrangle_KG.py inv_add_2_complex_FB15k-237_1_1_1 58 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 59 | 60 | echo 'Generating inversion edits with cosine distance' 61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data FB15k-237 --budget 1 62 | python -u wrangle_KG.py inv_add_3_complex_FB15k-237_1_1_1 63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 64 | 65 | 66 | 67 | 68 | echo 'Generating composition edits with ground truth values' 69 | python -u create_clusters.py --model complex --data FB15k-237 --num-clusters 300 70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data FB15k-237 --budget 1 --num-clusters 300 --rand-run 1 71 | python -u wrangle_KG.py com_add_1_complex_FB15k-237_1_1_1 72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 73 | 74 | echo 'Generating composition attack with just worse ranks ' 75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data FB15k-237 --budget 1 76 | python -u wrangle_KG.py com_add_2_complex_FB15k-237_1_1_1 77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 78 | 79 | echo 'Generating composition attack with cosine distance ' 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data FB15k-237 --budget 1 81 | python -u wrangle_KG.py com_add_3_complex_FB15k-237_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 83 | 84 | 85 | 86 | 87 | echo 'Generating edits from IJCAI-19 baseline ' 88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data FB15k-237 --budget 1 --corruption-factor 15 --rand-run 1 --use-gpu 89 | python -u wrangle_KG.py ijcai_add_1_complex_FB15k-237_1_1_1_15.0 90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_FB15k-237_1_1_1_15.0 --lr 0.005 --input-drop 0.5 91 | 92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 93 | python -u wrangle_KG.py ijcai_add_1_complex_FB15k-237_1_1_1_5.0 94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_FB15k-237_1_1_1_5.0 --lr 0.005 --input-drop 0.5 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /KGEAttack/complex_WN18.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data WN18 --lr 0.01 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_complex_WN18_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data WN18 --lr 0.01 14 | 15 | 16 | echo 'Generating random edits for the neighbourhood' 17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data WN18 --budget 1 --rand-run 1 18 | python -u wrangle_KG.py rand_add_n_complex_WN18_1_1_1 19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_WN18_1_1_1 --lr 0.01 20 | 21 | echo 'Generating global random edits' 22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18 --budget 1 --rand-run 1 23 | python -u wrangle_KG.py rand_add_g_complex_WN18_1_1_1 24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18_1_1_1 --lr 0.01 25 | 26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18 --budget 2 --rand-run 1 27 | python -u wrangle_KG.py rand_add_g_complex_WN18_1_2_1 28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18_1_2_1 --lr 0.01 29 | 30 | 31 | 32 | echo 'Generating symmetry edits with ground truth minimum' 33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data WN18 --budget 1 34 | python -u wrangle_KG.py sym_add_1_complex_WN18_1_1_1 35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_WN18_1_1_1 --lr 0.01 36 | 37 | 38 | echo 'Generating symmetry edits with worse ranks' 39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data WN18 --budget 1 40 | python -u wrangle_KG.py sym_add_2_complex_WN18_1_1_1 41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_WN18_1_1_1 --lr 0.01 42 | 43 | echo 'Generating symmetry edits with cosine distance' 44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data WN18 --budget 1 45 | python -u wrangle_KG.py sym_add_3_complex_WN18_1_1_1 46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_WN18_1_1_1 --lr 0.01 47 | 48 | 49 | 50 | 51 | echo 'Generating inversion edits with ground truth minimum' 52 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data WN18 --budget 1 53 | python -u wrangle_KG.py inv_add_1_complex_WN18_1_1_1 54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_WN18_1_1_1 --lr 0.01 55 | 56 | echo 'Generating inversion edits with worse ranks' 57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data WN18 --budget 1 58 | python -u wrangle_KG.py inv_add_2_complex_WN18_1_1_1 59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_WN18_1_1_1 --lr 0.01 60 | 61 | echo 'Generating inversion edits with cosine distance' 62 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data WN18 --budget 1 63 | python -u wrangle_KG.py inv_add_3_complex_WN18_1_1_1 64 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_WN18_1_1_1 --lr 0.01 65 | 66 | 67 | 68 | echo 'Generating composition edits with ground truth values' 69 | python -u create_clusters.py --model complex --data WN18 --num-clusters 100 70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data WN18 --budget 1 --num-clusters 100 --rand-run 1 71 | python -u wrangle_KG.py com_add_1_complex_WN18_1_1_1 72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_WN18_1_1_1 --lr 0.01 73 | 74 | echo 'Generating composition edits with just worse ranks ' 75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data WN18 --budget 1 76 | python -u wrangle_KG.py com_add_2_complex_WN18_1_1_1 77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_WN18_1_1_1 --lr 0.01 78 | 79 | echo 'Generating composition edits with cosine distance ' 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data WN18 --budget 1 81 | python -u wrangle_KG.py com_add_3_complex_WN18_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_WN18_1_1_1 --lr 0.01 83 | 84 | 85 | 86 | 87 | echo 'Generating edits from IJCAI-19 baseline ' 88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 89 | python -u wrangle_KG.py ijcai_add_1_complex_WN18_1_1_1_20.0 90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18_1_1_1_20.0 --lr 0.01 91 | 92 | 93 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 94 | # python -u wrangle_KG.py ijcai_add_1_complex_WN18_1_1_1_5.0 95 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18_1_1_1_5.0 --lr 0.01 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /KGEAttack/complex_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data WN18RR --lr 0.01 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_complex_WN18RR_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model complex --data WN18RR --lr 0.01 14 | 15 | 16 | echo 'Generating random edits for the neighbourhood' 17 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model complex --data WN18RR --budget 1 --rand-run 1 18 | python -u wrangle_KG.py rand_add_n_complex_WN18RR_1_1_1 19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_n_complex_WN18RR_1_1_1 --lr 0.01 20 | 21 | echo 'Generating global random edits' 22 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18RR --budget 1 --rand-run 1 23 | python -u wrangle_KG.py rand_add_g_complex_WN18RR_1_1_1 24 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18RR_1_1_1 --lr 0.01 25 | 26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model complex --data WN18RR --budget 2 --rand-run 1 27 | python -u wrangle_KG.py rand_add_g_complex_WN18RR_1_2_1 28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data rand_add_g_complex_WN18RR_1_2_1 --lr 0.01 29 | 30 | 31 | 32 | echo 'Generating symmetry edits with ground truth minimum' 33 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model complex --data WN18RR --budget 1 34 | python -u wrangle_KG.py sym_add_1_complex_WN18RR_1_1_1 35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_1_complex_WN18RR_1_1_1 --lr 0.01 36 | 37 | 38 | echo 'Generating symmetry edits with worse ranks' 39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model complex --data WN18RR --budget 1 40 | python -u wrangle_KG.py sym_add_2_complex_WN18RR_1_1_1 41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_2_complex_WN18RR_1_1_1 --lr 0.01 42 | 43 | echo 'Generating symmetry edits with cosine distance' 44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model complex --data WN18RR --budget 1 45 | python -u wrangle_KG.py sym_add_3_complex_WN18RR_1_1_1 46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data sym_add_3_complex_WN18RR_1_1_1 --lr 0.01 47 | 48 | 49 | 50 | 51 | echo 'Generating inversion edits with ground truth minimum' 52 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model complex --data WN18RR --budget 1 53 | python -u wrangle_KG.py inv_add_1_complex_WN18RR_1_1_1 54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_1_complex_WN18RR_1_1_1 --lr 0.01 55 | 56 | echo 'Generating inversion edits with worse ranks' 57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model complex --data WN18RR --budget 1 58 | python -u wrangle_KG.py inv_add_2_complex_WN18RR_1_1_1 59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_2_complex_WN18RR_1_1_1 --lr 0.01 60 | 61 | echo 'Generating inversion edits with cosine distance' 62 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model complex --data WN18RR --budget 1 63 | python -u wrangle_KG.py inv_add_3_complex_WN18RR_1_1_1 64 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data inv_add_3_complex_WN18RR_1_1_1 --lr 0.01 65 | 66 | 67 | 68 | echo 'Generating composition edits with ground truth values' 69 | #python -u create_clusters.py --model complex --data WN18RR --num-clusters 100 70 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model complex --data WN18RR --budget 1 --num-clusters 100 --rand-run 1 71 | python -u wrangle_KG.py com_add_1_complex_WN18RR_1_1_1 72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_1_complex_WN18RR_1_1_1 --lr 0.01 73 | 74 | echo 'Generating composition edits with just worse ranks ' 75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model complex --data WN18RR --budget 1 76 | python -u wrangle_KG.py com_add_2_complex_WN18RR_1_1_1 77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_2_complex_WN18RR_1_1_1 --lr 0.01 78 | 79 | echo 'Generating composition edits with cosine distance ' 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model complex --data WN18RR --budget 1 81 | python -u wrangle_KG.py com_add_3_complex_WN18RR_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data com_add_3_complex_WN18RR_1_1_1 --lr 0.01 83 | 84 | 85 | 86 | 87 | echo 'Generating edits from IJCAI-19 baseline ' 88 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 89 | python -u wrangle_KG.py ijcai_add_1_complex_WN18RR_1_1_1_20.0 90 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18RR_1_1_1_20.0 --lr 0.01 91 | 92 | 93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model complex --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 94 | python -u wrangle_KG.py ijcai_add_1_complex_WN18RR_1_1_1_5.0 95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model complex --data ijcai_add_1_complex_WN18RR_1_1_1_5.0 --lr 0.01 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /KGEAttack/compute_decoy_metrics_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Computing metrics for decoy in FB15k-237 DistMult ' 6 | 7 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_1' --budget 1 8 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_2' --budget 1 9 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'sym_add_3' --budget 1 10 | 11 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_1' --budget 1 12 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_2' --budget 1 13 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'inv_add_3' --budget 1 14 | 15 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1 16 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_2' --budget 1 17 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data FB15k-237 --attack 'com_add_3' --budget 1 18 | 19 | 20 | echo 'Computing metrics for decoy in FB15k-237 Complex' 21 | 22 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_1' --budget 1 23 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_2' --budget 1 24 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'sym_add_3' --budget 1 25 | 26 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_1' --budget 1 27 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_2' --budget 1 28 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'inv_add_3' --budget 1 29 | 30 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1 31 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_2' --budget 1 32 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data FB15k-237 --attack 'com_add_3' --budget 1 33 | 34 | 35 | echo 'Computing metrics for decoy in FB15k-237 Transe ' 36 | 37 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_1' --budget 1 38 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_2' --budget 1 39 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'sym_add_3' --budget 1 40 | 41 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_1' --budget 1 42 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_2' --budget 1 43 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'inv_add_3' --budget 1 44 | 45 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1 46 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_2' --budget 1 47 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data FB15k-237 --attack 'com_add_3' --budget 1 48 | 49 | 50 | 51 | echo 'Computing metrics for decoy in FB15k-237 ConvE' 52 | 53 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_1' --budget 1 54 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_2' --budget 1 55 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'sym_add_3' --budget 1 56 | 57 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_1' --budget 1 58 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_2' --budget 1 59 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'inv_add_3' --budget 1 60 | 61 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_1' --budget 1 --rand-run 1 62 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_2' --budget 1 63 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data FB15k-237 --attack 'com_add_3' --budget 1 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /KGEAttack/compute_decoy_metrics_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Computing metrics for decoy in WN18RR DistMult ' 6 | 7 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_1' --budget 1 8 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_2' --budget 1 9 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'sym_add_3' --budget 1 10 | 11 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_1' --budget 1 12 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_2' --budget 1 13 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'inv_add_3' --budget 1 14 | 15 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1 16 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_2' --budget 1 17 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model distmult --data WN18RR --attack 'com_add_3' --budget 1 18 | 19 | 20 | echo 'Computing metrics for decoy in WN18RR Complex' 21 | 22 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_1' --budget 1 23 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_2' --budget 1 24 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'sym_add_3' --budget 1 25 | 26 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_1' --budget 1 27 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_2' --budget 1 28 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'inv_add_3' --budget 1 29 | 30 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1 31 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_2' --budget 1 32 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model complex --data WN18RR --attack 'com_add_3' --budget 1 33 | 34 | 35 | echo 'Computing metrics for decoy in WN18RR Transe ' 36 | 37 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_1' --budget 1 38 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_2' --budget 1 39 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'sym_add_3' --budget 1 40 | 41 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_1' --budget 1 42 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_2' --budget 1 43 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'inv_add_3' --budget 1 44 | 45 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1 46 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_2' --budget 1 47 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model transe --data WN18RR --attack 'com_add_3' --budget 1 48 | 49 | 50 | 51 | echo 'Computing metrics for decoy in WN18RR ConvE' 52 | 53 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_1' --budget 1 54 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_2' --budget 1 55 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'sym_add_3' --budget 1 56 | 57 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_1' --budget 1 58 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_2' --budget 1 59 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'inv_add_3' --budget 1 60 | 61 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_1' --budget 1 --rand-run 1 62 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_2' --budget 1 63 | CUDA_VISIBLE_DEVICES=0 python -u decoy_test.py --model conve --data WN18RR --attack 'com_add_3' --budget 1 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /KGEAttack/conve_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5 9 | 10 | 11 | echo 'Selecting target triples' 12 | mkdir data/target_conve_FB15k-237_1 13 | 14 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5 15 | 16 | 17 | 18 | echo 'Generating random edits for the neighbourhood' 19 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data FB15k-237 --budget 1 --rand-run 1 20 | python -u wrangle_KG.py rand_add_n_conve_FB15k-237_1_1_1 21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 22 | 23 | echo 'Generating global random edits' 24 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data FB15k-237 --budget 1 --rand-run 1 25 | python -u wrangle_KG.py rand_add_g_conve_FB15k-237_1_1_1 26 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 27 | 28 | 29 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data FB15k-237 --budget 2 --rand-run 1 30 | python -u wrangle_KG.py rand_add_g_conve_FB15k-237_1_2_1 31 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_FB15k-237_1_2_1 --lr 0.001 --hidden-drop 0.5 32 | 33 | 34 | 35 | echo 'Generating symmetry edits with ground truth minimum' 36 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data FB15k-237 --budget 1 37 | python -u wrangle_KG.py sym_add_1_conve_FB15k-237_1_1_1 38 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 39 | 40 | echo 'Generating symmetry edits with worse ranks' 41 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data FB15k-237 --budget 1 42 | python -u wrangle_KG.py sym_add_2_conve_FB15k-237_1_1_1 43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 44 | 45 | echo 'Generating symmetry edits with cosine distance' 46 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data FB15k-237 --budget 1 47 | python -u wrangle_KG.py sym_add_3_conve_FB15k-237_1_1_1 48 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 49 | 50 | 51 | 52 | 53 | 54 | echo 'Generating inversion edits with ground truth minimum' 55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data FB15k-237 --budget 1 56 | python -u wrangle_KG.py inv_add_1_conve_FB15k-237_1_1_1 57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 58 | 59 | echo 'Generating inversion edits with worse ranks' 60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data FB15k-237 --budget 1 61 | python -u wrangle_KG.py inv_add_2_conve_FB15k-237_1_1_1 62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 63 | 64 | echo 'Generating inversion edits with cosine distance' 65 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data FB15k-237 --budget 1 66 | python -u wrangle_KG.py inv_add_3_conve_FB15k-237_1_1_1 67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 68 | 69 | 70 | 71 | 72 | echo 'Generating composition edits with ground truth values' 73 | python -u create_clusters.py --model conve --data FB15k-237 --num-clusters 300 74 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data FB15k-237 --budget 1 --num-clusters 300 --rand-run 1 75 | python -u wrangle_KG.py com_add_1_conve_FB15k-237_1_1_1 76 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 77 | 78 | 79 | echo 'Generating composition attack with just worse ranks ' 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data FB15k-237 --budget 1 81 | python -u wrangle_KG.py com_add_2_conve_FB15k-237_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 83 | 84 | echo 'Generating composition attack with cosine distance ' 85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data FB15k-237 --budget 1 86 | python -u wrangle_KG.py com_add_3_conve_FB15k-237_1_1_1 87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 88 | 89 | 90 | 91 | echo 'Generating edits from IJCAI-19 baseline ' 92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data FB15k-237 --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu 93 | python -u wrangle_KG.py ijcai_add_1_conve_FB15k-237_1_1_1_0.1 94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_FB15k-237_1_1_1_0.1 --lr 0.001 --hidden-drop 0.5 95 | 96 | 97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data FB15k-237 --budget 1 --corruption-factor 0.3 --rand-run 1 --use-gpu 98 | python -u wrangle_KG.py ijcai_add_1_conve_FB15k-237_1_1_1_0.3 99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_FB15k-237_1_1_1_0.3 --lr 0.001 --hidden-drop 0.5 100 | 101 | 102 | 103 | echo 'Generating edits from criage baseline ' 104 | python -u wrangle_KG.py target_conve_FB15k-237_1 105 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data FB15k-237 --lr 0.001 --hidden-drop 0.5 106 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data FB15k-237 107 | python -u wrangle_KG.py criage_add_1_conve_FB15k-237_1_1_1 108 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /KGEAttack/conve_WN18.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.001 9 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.001 --hidden-drop 0.1 10 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.005 11 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18 --lr 0.01 12 | 13 | echo 'Selecting target triples' 14 | mkdir data/target_conve_WN18_1 15 | 16 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data WN18 --lr 0.005 17 | 18 | echo 'Generating random edits for the neighbourhood' 19 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data WN18 --budget 1 --rand-run 1 20 | python -u wrangle_KG.py rand_add_n_conve_WN18_1_1_1 21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_WN18_1_1_1 --lr 0.005 22 | 23 | echo 'Generating global random edits' 24 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18 --budget 1 --rand-run 1 25 | python -u wrangle_KG.py rand_add_g_conve_WN18_1_1_1 26 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18_1_1_1 --lr 0.005 27 | 28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18 --budget 2 --rand-run 1 29 | python -u wrangle_KG.py rand_add_g_conve_WN18_1_2_1 30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18_1_2_1 --lr 0.005 31 | 32 | 33 | 34 | echo 'Generating symmetry edits with ground truth minimum' 35 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data WN18 --budget 1 36 | python -u wrangle_KG.py sym_add_1_conve_WN18_1_1_1 37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_WN18_1_1_1 --lr 0.005 38 | 39 | echo 'Generating symmetry edits with worse ranks' 40 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data WN18 --budget 1 41 | python -u wrangle_KG.py sym_add_2_conve_WN18_1_1_1 42 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_WN18_1_1_1 --lr 0.005 43 | 44 | echo 'Generating symmetry edits with cosine distance' 45 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data WN18 --budget 1 46 | python -u wrangle_KG.py sym_add_3_conve_WN18_1_1_1 47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_WN18_1_1_1 --lr 0.005 48 | 49 | 50 | 51 | 52 | echo 'Generating inversion edits with ground truth minimum' 53 | 54 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data WN18 --budget 1 55 | python -u wrangle_KG.py inv_add_1_conve_WN18_1_1_1 56 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_WN18_1_1_1 --lr 0.005 57 | 58 | echo 'Generating inversion edits with worse ranks' 59 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data WN18 --budget 1 60 | python -u wrangle_KG.py inv_add_2_conve_WN18_1_1_1 61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_WN18_1_1_1 --lr 0.005 62 | 63 | echo 'Generating inversion edits with cosine distance' 64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data WN18 --budget 1 65 | python -u wrangle_KG.py inv_add_3_conve_WN18_1_1_1 66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_WN18_1_1_1 --lr 0.005 67 | 68 | 69 | 70 | 71 | echo 'Generating composition edits with ground truth values' 72 | python -u create_clusters.py --model conve --data WN18 --num-clusters 300 73 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data WN18 --budget 1 --num-clusters 300 --rand-run 1 74 | python -u wrangle_KG.py com_add_1_conve_WN18_1_1_1 75 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_WN18_1_1_1 --lr 0.005 76 | 77 | 78 | echo 'Generating composition edits with just worse ranks ' 79 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data WN18 --budget 1 80 | python -u wrangle_KG.py com_add_2_conve_WN18_1_1_1 81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_WN18_1_1_1 --lr 0.005 82 | 83 | 84 | echo 'Generating composition edits with cosine distance ' 85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data WN18 --budget 1 86 | python -u wrangle_KG.py com_add_3_conve_WN18_1_1_1 87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_WN18_1_1_1 --lr 0.005 88 | 89 | 90 | 91 | 92 | echo 'Generating edits from IJCAI-19 baseline ' 93 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18 --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu 94 | # python -u wrangle_KG.py ijcai_add_1_conve_WN18_1_1_1_0.1 95 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18_1_1_1_0.1 --lr 0.005 96 | 97 | 98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18 --budget 1 --corruption-factor 2 --rand-run 1 --use-gpu 99 | python -u wrangle_KG.py ijcai_add_1_conve_WN18_1_1_1_2.0 100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18_1_1_1_2.0 --lr 0.005 101 | 102 | 103 | 104 | 105 | 106 | echo 'Generating edits from Criage baseline ' 107 | python -u wrangle_KG.py target_conve_WN18_1 108 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data WN18 --lr 0.005 109 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data WN18 110 | python -u wrangle_KG.py criage_add_1_conve_WN18_1_1_1 111 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_WN18_1_1_1 --lr 0.005 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /KGEAttack/conve_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data WN18RR --lr 0.001 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_conve_WN18RR_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model conve --data WN18RR --lr 0.001 14 | 15 | 16 | 17 | echo 'Generating random edits for the neighbourhood' 18 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model conve --data WN18RR --budget 1 --rand-run 1 19 | python -u wrangle_KG.py rand_add_n_conve_WN18RR_1_1_1 20 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_n_conve_WN18RR_1_1_1 --lr 0.001 21 | 22 | echo 'Generating global random edits' 23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18RR --budget 1 --rand-run 1 24 | python -u wrangle_KG.py rand_add_g_conve_WN18RR_1_1_1 25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18RR_1_1_1 --lr 0.001 26 | 27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model conve --data WN18RR --budget 2 --rand-run 1 28 | python -u wrangle_KG.py rand_add_g_conve_WN18RR_1_2_1 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data rand_add_g_conve_WN18RR_1_2_1 --lr 0.001 30 | 31 | 32 | 33 | echo 'Generating symmetry edits with ground truth minimum' 34 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model conve --data WN18RR --budget 1 35 | python -u wrangle_KG.py sym_add_1_conve_WN18RR_1_1_1 36 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_1_conve_WN18RR_1_1_1 --lr 0.001 37 | 38 | echo 'Generating symmetry edits with worse ranks' 39 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model conve --data WN18RR --budget 1 40 | python -u wrangle_KG.py sym_add_2_conve_WN18RR_1_1_1 41 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_2_conve_WN18RR_1_1_1 --lr 0.001 42 | 43 | echo 'Generating symmetry edits with cosine distance' 44 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model conve --data WN18RR --budget 1 45 | python -u wrangle_KG.py sym_add_3_conve_WN18RR_1_1_1 46 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data sym_add_3_conve_WN18RR_1_1_1 --lr 0.001 47 | 48 | 49 | 50 | 51 | echo 'Generating inversion edits with ground truth minimum' 52 | 53 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model conve --data WN18RR --budget 1 54 | python -u wrangle_KG.py inv_add_1_conve_WN18RR_1_1_1 55 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_1_conve_WN18RR_1_1_1 --lr 0.001 56 | 57 | echo 'Generating inversion edits with worse ranks' 58 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model conve --data WN18RR --budget 1 59 | python -u wrangle_KG.py inv_add_2_conve_WN18RR_1_1_1 60 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_2_conve_WN18RR_1_1_1 --lr 0.001 61 | 62 | echo 'Generating inversion edits with cosine distance' 63 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model conve --data WN18RR --budget 1 64 | python -u wrangle_KG.py inv_add_3_conve_WN18RR_1_1_1 65 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data inv_add_3_conve_WN18RR_1_1_1 --lr 0.001 66 | 67 | 68 | 69 | 70 | echo 'Generating composition edits with ground truth values' 71 | python -u create_clusters.py --model conve --data WN18RR --num-clusters 300 72 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model conve --data WN18RR --budget 1 --num-clusters 300 --rand-run 1 73 | python -u wrangle_KG.py com_add_1_conve_WN18RR_1_1_1 74 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_1_conve_WN18RR_1_1_1 --lr 0.001 75 | 76 | 77 | echo 'Generating composition edits with just worse ranks ' 78 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model conve --data WN18RR --budget 1 79 | python -u wrangle_KG.py com_add_2_conve_WN18RR_1_1_1 80 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_2_conve_WN18RR_1_1_1 --lr 0.001 81 | 82 | 83 | echo 'Generating composition edits with cosine distance ' 84 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model conve --data WN18RR --budget 1 85 | python -u wrangle_KG.py com_add_3_conve_WN18RR_1_1_1 86 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data com_add_3_conve_WN18RR_1_1_1 --lr 0.001 87 | 88 | 89 | 90 | 91 | echo 'Generating edits from IJCAI-19 baseline ' 92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18RR --budget 1 --corruption-factor 0.1 --rand-run 1 --use-gpu 93 | python -u wrangle_KG.py ijcai_add_1_conve_WN18RR_1_1_1_0.1 94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18RR_1_1_1_0.1 --lr 0.001 95 | 96 | 97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model conve --data WN18RR --budget 1 --corruption-factor 2 --rand-run 1 --use-gpu 98 | python -u wrangle_KG.py ijcai_add_1_conve_WN18RR_1_1_1_2.0 99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data ijcai_add_1_conve_WN18RR_1_1_1_2.0 --lr 0.001 100 | 101 | 102 | 103 | 104 | 105 | echo 'Generating edits from Criage baseline ' 106 | python -u wrangle_KG.py target_conve_WN18RR_1 107 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model conve --data WN18RR --lr 0.001 108 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model conve --data WN18RR 109 | python -u wrangle_KG.py criage_add_1_conve_WN18RR_1_1_1 110 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model conve --data criage_add_1_conve_WN18RR_1_1_1 --lr 0.001 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /KGEAttack/distmult_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_distmult_FB15k-237_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5 14 | 15 | echo 'Re-training the model to compute baseline change in metrics for target set' 16 | #python -u wrangle_KG.py target_distmult_FB15k-237_1 17 | #CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_FB15k-237_1 --lr 0.005 --input-drop 0.5 18 | 19 | 20 | # echo 'Generating random edits for the neighbourhood' 21 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --rand-run 1 22 | python -u wrangle_KG.py rand_add_n_distmult_FB15k-237_1_1_1 23 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 24 | 25 | 26 | echo 'Generating global random edits with 1 edit' 27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data FB15k-237 --budget 1 --rand-run 1 28 | python -u wrangle_KG.py rand_add_g_distmult_FB15k-237_1_1_1 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 30 | 31 | echo 'Generating global random edits with 2 edits' 32 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data FB15k-237 --budget 2 --rand-run 1 33 | python -u wrangle_KG.py rand_add_g_distmult_FB15k-237_1_2_1 34 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_FB15k-237_1_2_1 --lr 0.005 --input-drop 0.5 35 | 36 | 37 | echo 'Generating symmetry edits with ground truth minimum' 38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data FB15k-237 --budget 1 39 | python -u wrangle_KG.py sym_add_1_distmult_FB15k-237_1_1_1 40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 41 | 42 | echo 'Generating symmetry edits with worse ranks' 43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data FB15k-237 --budget 1 44 | python -u wrangle_KG.py sym_add_2_distmult_FB15k-237_1_1_1 45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 46 | 47 | echo 'Generating symmetry edits with cosine distance' 48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data FB15k-237 --budget 1 49 | python -u wrangle_KG.py sym_add_3_distmult_FB15k-237_1_1_1 50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 51 | 52 | 53 | 54 | echo 'Generating inversion edits with ground truth minimum' 55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data FB15k-237 --budget 1 56 | python -u wrangle_KG.py inv_add_1_distmult_FB15k-237_1_1_1 57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 58 | 59 | echo 'Generating inversion edits with worse ranks' 60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data FB15k-237 --budget 1 61 | python -u wrangle_KG.py inv_add_2_distmult_FB15k-237_1_1_1 62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 63 | 64 | echo 'Generating inversion edits with cosine distance' 65 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data FB15k-237 --budget 1 66 | python -u wrangle_KG.py inv_add_3_distmult_FB15k-237_1_1_1 67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 68 | 69 | 70 | 71 | echo 'Generating composition edits with ground truth values' 72 | python -u create_clusters.py --model distmult --data FB15k-237 --num-clusters 200 73 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --num-clusters 200 --rand-run 1 74 | python -u wrangle_KG.py com_add_1_distmult_FB15k-237_1_1_1 75 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 76 | 77 | 78 | echo 'Generating composition edits with just worse ranks' 79 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data FB15k-237 --budget 1 80 | python -u wrangle_KG.py com_add_2_distmult_FB15k-237_1_1_1 81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 82 | 83 | 84 | echo 'Generating comoposition edits with cosine distance' 85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data FB15k-237 --budget 1 86 | python -u wrangle_KG.py com_add_3_distmult_FB15k-237_1_1_1 87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 88 | 89 | 90 | 91 | 92 | echo 'Generating edits from IJCAI-19 baseline ' 93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 94 | python -u wrangle_KG.py ijcai_add_1_distmult_FB15k-237_1_1_1_20.0 95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_FB15k-237_1_1_1_20.0 --lr 0.005 --input-drop 0.5 96 | 97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 98 | python -u wrangle_KG.py ijcai_add_1_distmult_FB15k-237_1_1_1_5.0 99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_FB15k-237_1_1_1_5.0 --lr 0.005 --input-drop 0.5 100 | 101 | 102 | 103 | echo 'Generating edits from CRIAGE baseline' 104 | python -u wrangle_KG.py target_distmult_FB15k-237_1 105 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data FB15k-237 --lr 0.005 --input-drop 0.5 106 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data FB15k-237 107 | python -u wrangle_KG.py criage_add_1_distmult_FB15k-237_1_1_1 108 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /KGEAttack/distmult_WN18.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18 --lr 0.01 --num-batches 50 #this can be used 8 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18 --lr 0.01 --num-batches 50 --input-drop 0.0 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_distmult_WN18_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data WN18 --lr 0.01 --num-batches 50 14 | 15 | # echo 'Re-training the model to compute baseline change in metrics for target set' 16 | python -u wrangle_KG.py target_distmult_WN18_1 17 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_WN18_1 --lr 0.01 --num-batches 50 18 | 19 | 20 | echo 'Generating random edits for the neighbourhood' 21 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data WN18 --budget 1 --rand-run 1 22 | python -u wrangle_KG.py rand_add_n_distmult_WN18_1_1_1 23 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 24 | 25 | 26 | echo 'Generating global random edits with 1 edit' 27 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18 --budget 1 --rand-run 1 28 | python -u wrangle_KG.py rand_add_g_distmult_WN18_1_1_1 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 30 | 31 | echo 'Generating global random edits with 2 edits' 32 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18 --budget 2 --rand-run 1 33 | python -u wrangle_KG.py rand_add_g_distmult_WN18_1_2_1 34 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18_1_2_1 --lr 0.01 --num-batches 50 35 | 36 | 37 | echo 'Generating symmetry edits with ground truth minimum' 38 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data WN18 --budget 1 39 | python -u wrangle_KG.py sym_add_1_distmult_WN18_1_1_1 40 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 41 | 42 | echo 'Generating symmetry edits with worse ranks' 43 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data WN18 --budget 1 44 | python -u wrangle_KG.py sym_add_2_distmult_WN18_1_1_1 45 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 46 | 47 | 48 | echo 'Generating symmetry edits with cosine distance' 49 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data WN18 --budget 1 50 | python -u wrangle_KG.py sym_add_3_distmult_WN18_1_1_1 51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 52 | 53 | 54 | echo 'Generating inversion edits with ground truth minimum' 55 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data WN18 --budget 1 56 | python -u wrangle_KG.py inv_add_1_distmult_WN18_1_1_1 57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 58 | 59 | 60 | echo 'Generating inversion edits with worse ranks' 61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data WN18 --budget 1 62 | python -u wrangle_KG.py inv_add_2_distmult_WN18_1_1_1 63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 64 | 65 | 66 | echo 'Generating inversion edits with cosine distance' 67 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data WN18 --budget 1 68 | python -u wrangle_KG.py inv_add_3_distmult_WN18_1_1_1 69 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 70 | 71 | 72 | 73 | echo 'Generating composition edits with ground truth values' 74 | python -u create_clusters.py --model distmult --data WN18 --num-clusters 300 75 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data WN18 --budget 1 --num-clusters 300 --rand-run 1 76 | python -u wrangle_KG.py com_add_1_distmult_WN18_1_1_1 77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 78 | 79 | 80 | echo 'Generating composition edits with just worse ranks' 81 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data WN18 --budget 1 82 | python -u wrangle_KG.py com_add_2_distmult_WN18_1_1_1 83 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 84 | 85 | echo 'Generating composition edits with cosine distance' 86 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data WN18 --budget 1 87 | python -u wrangle_KG.py com_add_3_distmult_WN18_1_1_1 88 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 89 | 90 | 91 | 92 | echo 'Generating edits from IJCAI-19 baseline ' 93 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 94 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18_1_1_1_20.0 95 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18_1_1_1_20.0 --lr 0.01 --num-batches 50 96 | 97 | 98 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 99 | # python -u wrangle_KG.py ijcai_add_1_distmult_WN18_1_1_1_5.0 100 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18_1_1_1_5.0 --lr 0.01 --num-batches 50 101 | 102 | 103 | echo 'Generating edits from criage baseline ' 104 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data WN18 --lr 0.01 --num-batches 50 105 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data WN18 106 | python -u wrangle_KG.py criage_add_1_distmult_WN18_1_1_1 107 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_WN18_1_1_1 --lr 0.01 --num-batches 50 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /KGEAttack/distmult_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data WN18RR --lr 0.01 --num-batches 50 8 | 9 | echo 'Selecting target triples' 10 | mkdir data/target_distmult_WN18RR_1 11 | 12 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model distmult --data WN18RR --lr 0.01 --num-batches 50 13 | 14 | echo 'Re-training the model to compute baseline change in metrics for target set' 15 | python -u wrangle_KG.py target_distmult_WN18RR_1 16 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data target_distmult_WN18RR_1 --lr 0.01 --num-batches 50 17 | 18 | 19 | echo 'Generating random edits for the neighbourhood' 20 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model distmult --data WN18RR --budget 1 --rand-run 1 21 | python -u wrangle_KG.py rand_add_n_distmult_WN18RR_1_1_1 22 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_n_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 23 | 24 | 25 | echo 'Generating global random edits with 1 edit' 26 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18RR --budget 1 --rand-run 1 27 | python -u wrangle_KG.py rand_add_g_distmult_WN18RR_1_1_1 28 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 29 | 30 | echo 'Generating global random edits with 2 edits' 31 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model distmult --data WN18RR --budget 2 --rand-run 1 32 | python -u wrangle_KG.py rand_add_g_distmult_WN18RR_1_2_1 33 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data rand_add_g_distmult_WN18RR_1_2_1 --lr 0.01 --num-batches 50 34 | 35 | 36 | echo 'Generating symmetry edits with ground truth minimum' 37 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model distmult --data WN18RR --budget 1 38 | python -u wrangle_KG.py sym_add_1_distmult_WN18RR_1_1_1 39 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 40 | 41 | echo 'Generating symmetry edits with worse ranks' 42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model distmult --data WN18RR --budget 1 43 | python -u wrangle_KG.py sym_add_2_distmult_WN18RR_1_1_1 44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 45 | 46 | 47 | echo 'Generating symmetry edits with cosine distance' 48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model distmult --data WN18RR --budget 1 49 | python -u wrangle_KG.py sym_add_3_distmult_WN18RR_1_1_1 50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data sym_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 51 | 52 | 53 | echo 'Generating inversion edits with ground truth minimum' 54 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model distmult --data WN18RR --budget 1 55 | python -u wrangle_KG.py inv_add_1_distmult_WN18RR_1_1_1 56 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 57 | 58 | 59 | echo 'Generating inversion edits with worse ranks' 60 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model distmult --data WN18RR --budget 1 61 | python -u wrangle_KG.py inv_add_2_distmult_WN18RR_1_1_1 62 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 63 | 64 | 65 | echo 'Generating inversion edits with cosine distance' 66 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model distmult --data WN18RR --budget 1 67 | python -u wrangle_KG.py inv_add_3_distmult_WN18RR_1_1_1 68 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data inv_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 69 | 70 | 71 | 72 | echo 'Generating composition edits with ground truth values' 73 | python -u create_clusters.py --model distmult --data WN18RR --num-clusters 300 74 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model distmult --data WN18RR --budget 1 --num-clusters 300 --rand-run 1 75 | python -u wrangle_KG.py com_add_1_distmult_WN18RR_1_1_1 76 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 77 | 78 | 79 | echo 'Generating composition edits with just worse ranks' 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model distmult --data WN18RR --budget 1 81 | python -u wrangle_KG.py com_add_2_distmult_WN18RR_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 83 | 84 | echo 'Generating composition edits with cosine distance' 85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model distmult --data WN18RR --budget 1 86 | python -u wrangle_KG.py com_add_3_distmult_WN18RR_1_1_1 87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data com_add_3_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 88 | 89 | 90 | 91 | echo 'Generating edits from IJCAI-19 baseline ' 92 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 93 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18RR_1_1_1_20.0 94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18RR_1_1_1_20.0 --lr 0.01 --num-batches 50 95 | 96 | 97 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model distmult --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 98 | python -u wrangle_KG.py ijcai_add_1_distmult_WN18RR_1_1_1_5.0 99 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data ijcai_add_1_distmult_WN18RR_1_1_1_5.0 --lr 0.01 --num-batches 50 100 | 101 | 102 | echo 'Generating edits from criage baseline ' 103 | python -u wrangle_KG.py target_distmult_WN18RR_1 104 | CUDA_VISIBLE_DEVICES=0 python -u criage_inverter.py --model distmult --data WN18RR --lr 0.01 --num-batches 50 105 | CUDA_VISIBLE_DEVICES=0 python -u criage_add_attack_1.py --model distmult --data WN18RR 106 | python -u wrangle_KG.py criage_add_1_distmult_WN18RR_1_1_1 107 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model distmult --data criage_add_1_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /KGEAttack/grad_add_attack_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 DistMult' 6 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric dot 7 | python -u wrangle_KG.py grad_add_dot_distmult_FB15k-237_1_1_1 8 | python -u main.py --model distmult --data grad_add_dot_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 9 | 10 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 DistMult' 11 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric cos 12 | python -u wrangle_KG.py grad_add_cos_distmult_FB15k-237_1_1_1 13 | python -u main.py --model distmult --data grad_add_cos_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 14 | 15 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 DistMult' 16 | python -u grad_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric l2 17 | python -u wrangle_KG.py grad_add_l2_distmult_FB15k-237_1_1_1 18 | python -u main.py --model distmult --data grad_add_l2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 19 | 20 | # #################################################################################################################################### 21 | 22 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 ComplEx' 23 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric dot 24 | python -u wrangle_KG.py grad_add_dot_complex_FB15k-237_1_1_1 25 | python -u main.py --model complex --data grad_add_dot_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 26 | 27 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 ComplEx' 28 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric cos 29 | python -u wrangle_KG.py grad_add_cos_complex_FB15k-237_1_1_1 30 | python -u main.py --model complex --data grad_add_cos_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 31 | 32 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 ComplEx' 33 | python -u grad_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric l2 34 | python -u wrangle_KG.py grad_add_l2_complex_FB15k-237_1_1_1 35 | python -u main.py --model complex --data grad_add_l2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 36 | 37 | # #################################################################################################################################### 38 | 39 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 ConvE' 40 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric dot 41 | python -u wrangle_KG.py grad_add_dot_conve_FB15k-237_1_1_1 42 | python -u main.py --model conve --data grad_add_dot_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 43 | 44 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 ConvE' 45 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric cos 46 | python -u wrangle_KG.py grad_add_cos_conve_FB15k-237_1_1_1 47 | python -u main.py --model conve --data grad_add_cos_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 48 | 49 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 ConvE' 50 | python -u grad_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric l2 51 | python -u wrangle_KG.py grad_add_l2_conve_FB15k-237_1_1_1 52 | python -u main.py --model conve --data grad_add_l2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 53 | 54 | # #################################################################################################################################### 55 | 56 | echo 'Generating gradient attribution edits with dot similarity : FB15k-237 TransE' 57 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric dot 58 | python -u wrangle_KG.py grad_add_dot_transe_FB15k-237_1_1_1 59 | python -u main.py --model transe --data grad_add_dot_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 60 | 61 | echo 'Generating gradient attribution edits with cosine similarity : FB15k-237 TransE' 62 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric cos 63 | python -u wrangle_KG.py grad_add_cos_transe_FB15k-237_1_1_1 64 | python -u main.py --model transe --data grad_add_cos_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 65 | 66 | echo 'Generating gradient attribution edits with l2 similarity : FB15k-237 TransE' 67 | python -u grad_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric l2 68 | python -u wrangle_KG.py grad_add_l2_transe_FB15k-237_1_1_1 69 | python -u main.py --model transe --data grad_add_l2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /KGEAttack/grad_add_attack_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Generating gradient attribution edits with dot similarity : WN18RR DistMult' 6 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric dot 7 | python -u wrangle_KG.py grad_add_dot_distmult_WN18RR_1_1_1 8 | python -u main.py --model distmult --data grad_add_dot_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 9 | 10 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR DistMult' 11 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric cos 12 | python -u wrangle_KG.py grad_add_cos_distmult_WN18RR_1_1_1 13 | python -u main.py --model distmult --data grad_add_cos_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 14 | 15 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR DistMult' 16 | python -u grad_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric l2 17 | python -u wrangle_KG.py grad_add_l2_distmult_WN18RR_1_1_1 18 | python -u main.py --model distmult --data grad_add_l2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 19 | 20 | # #################################################################################################################################### 21 | 22 | echo 'Generating gradient attribution edits with dot similarity : WN18RR ComplEx' 23 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric dot 24 | python -u wrangle_KG.py grad_add_dot_complex_WN18RR_1_1_1 25 | python -u main.py --model complex --data grad_add_dot_complex_WN18RR_1_1_1 --lr 0.01 26 | 27 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR ComplEx' 28 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric cos 29 | python -u wrangle_KG.py grad_add_cos_complex_WN18RR_1_1_1 30 | python -u main.py --model complex --data grad_add_cos_complex_WN18RR_1_1_1 --lr 0.01 31 | 32 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR ComplEx' 33 | python -u grad_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric l2 34 | python -u wrangle_KG.py grad_add_l2_complex_WN18RR_1_1_1 35 | python -u main.py --model complex --data grad_add_l2_complex_WN18RR_1_1_1 --lr 0.01 36 | 37 | # #################################################################################################################################### 38 | 39 | echo 'Generating gradient attribution edits with dot similarity : WN18RR ConvE' 40 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric dot 41 | python -u wrangle_KG.py grad_add_dot_conve_WN18RR_1_1_1 42 | python -u main.py --model conve --data grad_add_dot_conve_WN18RR_1_1_1 --lr 0.001 43 | 44 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR ConvE' 45 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric cos 46 | python -u wrangle_KG.py grad_add_cos_conve_WN18RR_1_1_1 47 | python -u main.py --model conve --data grad_add_cos_conve_WN18RR_1_1_1 --lr 0.001 48 | 49 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR ConvE' 50 | python -u grad_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric l2 51 | python -u wrangle_KG.py grad_add_l2_conve_WN18RR_1_1_1 52 | python -u main.py --model conve --data grad_add_l2_conve_WN18RR_1_1_1 --lr 0.001 53 | 54 | # #################################################################################################################################### 55 | 56 | echo 'Generating gradient attribution edits with dot similarity : WN18RR TransE' 57 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric dot 58 | python -u wrangle_KG.py grad_add_dot_transe_WN18RR_1_1_1 59 | python -u main.py --model transe --data grad_add_dot_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 60 | 61 | echo 'Generating gradient attribution edits with cosine similarity : WN18RR TransE' 62 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric cos 63 | python -u wrangle_KG.py grad_add_cos_transe_WN18RR_1_1_1 64 | python -u main.py --model transe --data grad_add_cos_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 65 | 66 | echo 'Generating gradient attribution edits with l2 similarity : WN18RR TransE' 67 | python -u grad_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric l2 68 | python -u wrangle_KG.py grad_add_l2_transe_WN18RR_1_1_1 69 | python -u main.py --model transe --data grad_add_l2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /KGEAttack/inst_add_attack_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 DistMult' 6 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric dot 7 | python -u wrangle_KG.py inst_add_dot_distmult_FB15k-237_1_1_1 8 | python -u main.py --model distmult --data inst_add_dot_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 9 | 10 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 DistMult' 11 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric cos 12 | python -u wrangle_KG.py inst_add_cos_distmult_FB15k-237_1_1_1 13 | python -u main.py --model distmult --data inst_add_cos_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 14 | 15 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 DistMult' 16 | python -u inst_add_attack.py --model distmult --data FB15k-237 --reproduce-results --sim-metric l2 17 | python -u wrangle_KG.py inst_add_l2_distmult_FB15k-237_1_1_1 18 | python -u main.py --model distmult --data inst_add_l2_distmult_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 19 | 20 | # #################################################################################################################################### 21 | 22 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 ComplEx' 23 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric dot 24 | python -u wrangle_KG.py inst_add_dot_complex_FB15k-237_1_1_1 25 | python -u main.py --model complex --data inst_add_dot_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 26 | 27 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 ComplEx' 28 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric cos 29 | python -u wrangle_KG.py inst_add_cos_complex_FB15k-237_1_1_1 30 | python -u main.py --model complex --data inst_add_cos_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 31 | 32 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 ComplEx' 33 | python -u inst_add_attack.py --model complex --data FB15k-237 --reproduce-results --sim-metric l2 34 | python -u wrangle_KG.py inst_add_l2_complex_FB15k-237_1_1_1 35 | python -u main.py --model complex --data inst_add_l2_complex_FB15k-237_1_1_1 --lr 0.005 --input-drop 0.5 36 | 37 | # #################################################################################################################################### 38 | 39 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 ConvE' 40 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric dot 41 | python -u wrangle_KG.py inst_add_dot_conve_FB15k-237_1_1_1 42 | python -u main.py --model conve --data inst_add_dot_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 43 | 44 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 ConvE' 45 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric cos 46 | python -u wrangle_KG.py inst_add_cos_conve_FB15k-237_1_1_1 47 | python -u main.py --model conve --data inst_add_cos_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 48 | 49 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 ConvE' 50 | python -u inst_add_attack.py --model conve --data FB15k-237 --reproduce-results --sim-metric l2 51 | python -u wrangle_KG.py inst_add_l2_conve_FB15k-237_1_1_1 52 | python -u main.py --model conve --data inst_add_l2_conve_FB15k-237_1_1_1 --lr 0.001 --hidden-drop 0.5 53 | 54 | # #################################################################################################################################### 55 | 56 | echo 'Generating instance attribution edits with dot similarity : FB15k-237 TransE' 57 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric dot 58 | python -u wrangle_KG.py inst_add_dot_transe_FB15k-237_1_1_1 59 | python -u main.py --model transe --data inst_add_dot_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 60 | 61 | echo 'Generating instance attribution edits with cosine similarity : FB15k-237 TransE' 62 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric cos 63 | python -u wrangle_KG.py inst_add_cos_transe_FB15k-237_1_1_1 64 | python -u main.py --model transe --data inst_add_cos_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 65 | 66 | echo 'Generating instance attribution edits with l2 similarity : FB15k-237 TransE' 67 | python -u inst_add_attack.py --model transe --data FB15k-237 --reproduce-results --sim-metric l2 68 | python -u wrangle_KG.py inst_add_l2_transe_FB15k-237_1_1_1 69 | python -u main.py --model transe --data inst_add_l2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /KGEAttack/inst_add_attack_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo 'Generating instance attribution edits with dot similarity : WN18RR DistMult' 6 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric dot 7 | python -u wrangle_KG.py inst_add_dot_distmult_WN18RR_1_1_1 8 | python -u main.py --model distmult --data inst_add_dot_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 9 | 10 | echo 'Generating instance attribution edits with cosine similarity : WN18RR DistMult' 11 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric cos 12 | python -u wrangle_KG.py inst_add_cos_distmult_WN18RR_1_1_1 13 | python -u main.py --model distmult --data inst_add_cos_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 14 | 15 | echo 'Generating instance attribution edits with l2 similarity : WN18RR DistMult' 16 | python -u inst_add_attack.py --model distmult --data WN18RR --reproduce-results --sim-metric l2 17 | python -u wrangle_KG.py inst_add_l2_distmult_WN18RR_1_1_1 18 | python -u main.py --model distmult --data inst_add_l2_distmult_WN18RR_1_1_1 --lr 0.01 --num-batches 50 19 | 20 | # #################################################################################################################################### 21 | 22 | echo 'Generating instance attribution edits with dot similarity : WN18RR ComplEx' 23 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric dot 24 | python -u wrangle_KG.py inst_add_dot_complex_WN18RR_1_1_1 25 | python -u main.py --model complex --data inst_add_dot_complex_WN18RR_1_1_1 --lr 0.01 26 | 27 | echo 'Generating instance attribution edits with cosine similarity : WN18RR ComplEx' 28 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric cos 29 | python -u wrangle_KG.py inst_add_cos_complex_WN18RR_1_1_1 30 | python -u main.py --model complex --data inst_add_cos_complex_WN18RR_1_1_1 --lr 0.01 31 | 32 | echo 'Generating instance attribution edits with l2 similarity : WN18RR ComplEx' 33 | python -u inst_add_attack.py --model complex --data WN18RR --reproduce-results --sim-metric l2 34 | python -u wrangle_KG.py inst_add_l2_complex_WN18RR_1_1_1 35 | python -u main.py --model complex --data inst_add_l2_complex_WN18RR_1_1_1 --lr 0.01 36 | 37 | # #################################################################################################################################### 38 | 39 | echo 'Generating instance attribution edits with dot similarity : WN18RR ConvE' 40 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric dot 41 | python -u wrangle_KG.py inst_add_dot_conve_WN18RR_1_1_1 42 | python -u main.py --model conve --data inst_add_dot_conve_WN18RR_1_1_1 --lr 0.001 43 | 44 | echo 'Generating instance attribution edits with cosine similarity : WN18RR ConvE' 45 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric cos 46 | python -u wrangle_KG.py inst_add_cos_conve_WN18RR_1_1_1 47 | python -u main.py --model conve --data inst_add_cos_conve_WN18RR_1_1_1 --lr 0.001 48 | 49 | echo 'Generating instance attribution edits with l2 similarity : WN18RR ConvE' 50 | python -u inst_add_attack.py --model conve --data WN18RR --reproduce-results --sim-metric l2 51 | python -u wrangle_KG.py inst_add_l2_conve_WN18RR_1_1_1 52 | python -u main.py --model conve --data inst_add_l2_conve_WN18RR_1_1_1 --lr 0.001 53 | 54 | # #################################################################################################################################### 55 | 56 | echo 'Generating instance attribution edits with dot similarity : WN18RR TransE' 57 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric dot 58 | python -u wrangle_KG.py inst_add_dot_transe_WN18RR_1_1_1 59 | python -u main.py --model transe --data inst_add_dot_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 60 | 61 | echo 'Generating instance attribution edits with cosine similarity : WN18RR TransE' 62 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric cos 63 | python -u wrangle_KG.py inst_add_cos_transe_WN18RR_1_1_1 64 | python -u main.py --model transe --data inst_add_cos_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 65 | 66 | echo 'Generating instance attribution edits with l2 similarity : WN18RR TransE' 67 | python -u inst_add_attack.py --model transe --data WN18RR --reproduce-results --sim-metric l2 68 | python -u wrangle_KG.py inst_add_l2_transe_WN18RR_1_1_1 69 | python -u main.py --model transe --data inst_add_l2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /KGEAttack/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | echo "Adding necessary directories" 6 | mkdir saved_models results losses logs clusters 7 | mkdir saved_models/criage_inverter 8 | mkdir logs/attack_logs 9 | mkdir logs/attack_logs/criage_add_1 logs/attack_logs/ijcai_add_1 logs/attack_logs/criage_inverter 10 | mkdir logs/attack_logs/rand_add_{n,g} logs/attack_logs/sym_add_{1,2,3} logs/attack_logs/inv_add_{1,2,3} logs/attack_logs/com_add_{1,2,3} 11 | mkdir logs/attack_logs/inst_add_{cos,dot,l2} logs/attack_logs/grad_add_{cos,dot,l2} 12 | 13 | 14 | echo "Extracting original data.... " 15 | mkdir data/WN18RR_original 16 | mkdir data/FB15k-237_original 17 | 18 | tar -xvf WN18RR.tar.gz -C data/WN18RR_original 19 | tar -xvf FB15k-237.tar.gz -C data/FB15k-237_original 20 | 21 | echo "Preprocessing... " 22 | python -u preprocess.py WN18RR 23 | python -u preprocess.py FB15k-237 24 | 25 | echo "Wrangling to generate training set and eval filters... " 26 | python -u wrangle_KG.py WN18RR 27 | python -u wrangle_KG.py FB15k-237 -------------------------------------------------------------------------------- /KGEAttack/transe_FB15k-237.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data FB15k-237 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 9 | 10 | echo 'Selecting target triples' 11 | mkdir data/target_transe_FB15k-237_1 12 | 13 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data FB15k-237 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 14 | 15 | 16 | 17 | echo 'Generating random edits for the neighbourhood' 18 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data FB15k-237 --budget 1 --rand-run 1 19 | python -u wrangle_KG.py rand_add_n_transe_FB15k-237_1_1_1 20 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 21 | 22 | echo 'Generating global random edits with 1 edit' 23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data FB15k-237 --budget 1 --rand-run 1 24 | python -u wrangle_KG.py rand_add_g_transe_FB15k-237_1_1_1 25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 26 | 27 | echo 'Generating global random edits with 2 edits' 28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data FB15k-237 --budget 2 --rand-run 1 29 | python -u wrangle_KG.py rand_add_g_transe_FB15k-237_1_2_1 30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_FB15k-237_1_2_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 31 | 32 | 33 | 34 | 35 | echo 'Generating symmetry edits with ground truth minimum' 36 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data FB15k-237 --budget 1 37 | python -u wrangle_KG.py sym_add_1_transe_FB15k-237_1_1_1 38 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 39 | 40 | 41 | echo 'Generating symmetry edits with cosine distance' 42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data FB15k-237 --budget 1 43 | python -u wrangle_KG.py sym_add_2_transe_FB15k-237_1_1_1 44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 45 | 46 | 47 | echo 'Generating symmetry edits with worse ranks' 48 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data FB15k-237 --budget 1 49 | python -u wrangle_KG.py sym_add_3_transe_FB15k-237_1_1_1 50 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 51 | 52 | 53 | 54 | 55 | 56 | echo 'Generating inversion edits with ground truth minimum' 57 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data FB15k-237 --budget 1 58 | python -u wrangle_KG.py inv_add_1_transe_FB15k-237_1_1_1 59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 60 | 61 | 62 | echo 'Generating inversion edits with cosine distance' 63 | 64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data FB15k-237 --budget 1 65 | python -u wrangle_KG.py inv_add_2_transe_FB15k-237_1_1_1 66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 67 | 68 | 69 | echo 'Generating inversion edits with worse ranks' 70 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data FB15k-237 --budget 1 71 | python -u wrangle_KG.py inv_add_3_transe_FB15k-237_1_1_1 72 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 73 | 74 | 75 | 76 | 77 | 78 | echo 'Generating composition edits with ground truth values' 79 | python -u create_clusters.py --model transe --data FB15k-237 --num-clusters 100 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data FB15k-237 --budget 1 --num-clusters 100 --rand-run 1 81 | python -u wrangle_KG.py com_add_1_transe_FB15k-237_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 83 | 84 | 85 | echo 'Generating composition attack with just worse ranks ' 86 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data FB15k-237 --budget 1 87 | python -u wrangle_KG.py com_add_2_transe_FB15k-237_1_1_1 88 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 89 | 90 | echo 'Generating composition attack with cosine distance ' 91 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data FB15k-237 --budget 1 92 | python -u wrangle_KG.py com_add_3_transe_FB15k-237_1_1_1 93 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_FB15k-237_1_1_1 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 94 | 95 | 96 | 97 | echo 'Generating edits from IJCAI-19 baseline ' 98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data FB15k-237 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 99 | python -u wrangle_KG.py ijcai_add_1_transe_FB15k-237_1_1_1_20.0 100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_FB15k-237_1_1_1_20.0 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 101 | 102 | 103 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data FB15k-237 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 104 | python -u wrangle_KG.py ijcai_add_1_transe_FB15k-237_1_1_1_5.0 105 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_FB15k-237_1_1_1_5.0 --lr 0.001 --input-drop 0.0 --transe-margin 9.0 --num-batches 800 --epochs 100 --reg-weight 1e-10 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /KGEAttack/transe_WN18.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | 9 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 10 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.0005 --input-drop 0.0 --transe-margin 0.0 --transe-norm 1 --num-batches 1000 --epochs 200 --reg-weight 1e-6 --embedding-dim 150 11 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 12 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 13 | 14 | echo 'Selecting target triples' 15 | mkdir data/target_transe_WN18_1 16 | 17 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data WN18 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 18 | 19 | # echo 'Re-training the model to compute baseline change in metrics for target set' 20 | # python -u wrangle_KG.py target_transe_WN18_1 21 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data target_transe_WN18_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 22 | 23 | 24 | echo 'Generating random edits for the neighbourhood' 25 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data WN18 --budget 1 --rand-run 1 26 | python -u wrangle_KG.py rand_add_n_transe_WN18_1_1_1 27 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 28 | 29 | echo 'Generating global random edits with 1 edit' 30 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18 --budget 1 --rand-run 1 31 | python -u wrangle_KG.py rand_add_g_transe_WN18_1_1_1 32 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 33 | 34 | echo 'Generating global random edits with 2 edits' 35 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18 --budget 2 --rand-run 1 36 | python -u wrangle_KG.py rand_add_g_transe_WN18_1_2_1 37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18_1_2_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 38 | 39 | 40 | 41 | echo 'Generating symmetry edits with ground truth minimum' 42 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data WN18 --budget 1 43 | python -u wrangle_KG.py sym_add_1_transe_WN18_1_1_1 44 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 45 | 46 | echo 'Generating symmetry edits with worse ranks' 47 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data WN18 --budget 1 48 | python -u wrangle_KG.py sym_add_2_transe_WN18_1_1_1 49 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 50 | 51 | echo 'Generating symmetry edits with worse ranks' 52 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data WN18 --budget 1 53 | python -u wrangle_KG.py sym_add_3_transe_WN18_1_1_1 54 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 55 | 56 | 57 | 58 | 59 | 60 | echo 'Generating inversion edits with ground truth minimum' 61 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data WN18 --budget 1 62 | python -u wrangle_KG.py inv_add_1_transe_WN18_1_1_1 63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 64 | 65 | echo 'Generating inversion edits with worse ranks' 66 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data WN18 --budget 1 67 | python -u wrangle_KG.py inv_add_2_transe_WN18_1_1_1 68 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 69 | 70 | echo 'Generating inversion edits with cosine distance' 71 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data WN18 --budget 1 72 | python -u wrangle_KG.py inv_add_3_transe_WN18_1_1_1 73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 74 | 75 | 76 | 77 | 78 | 79 | 80 | echo 'Generating composition edits with ground truth values' 81 | python -u create_clusters.py --model transe --data WN18 --num-clusters 50 82 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data WN18 --budget 1 --num-clusters 50 --rand-run 1 83 | python -u wrangle_KG.py com_add_1_transe_WN18_1_1_1 84 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 85 | 86 | echo 'Generating composition edits with just worse ranks ' 87 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data WN18 --budget 1 88 | python -u wrangle_KG.py com_add_2_transe_WN18_1_1_1 89 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 90 | 91 | echo 'Generating composition edits with cosine distance ' 92 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data WN18 --budget 1 93 | python -u wrangle_KG.py com_add_3_transe_WN18_1_1_1 94 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_WN18_1_1_1 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 95 | 96 | 97 | 98 | 99 | echo 'Generating edits from IJCAI-19 baseline ' 100 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18 --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 101 | python -u wrangle_KG.py ijcai_add_1_transe_WN18_1_1_1_20.0 102 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18_1_1_1_20.0 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 103 | 104 | 105 | # CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18 --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 106 | # python -u wrangle_KG.py ijcai_add_1_transe_WN18_1_1_1_5.0 107 | # CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18_1_1_1_5.0 --lr 0.01 --input-drop 0.0 --transe-margin 9.0 --num-batches 1500 --epochs 100 --reg-weight 1e-12 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /KGEAttack/transe_WN18RR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd ConvE 4 | 5 | # train the original model 6 | echo 'Training original model' 7 | 8 | 9 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data WN18RR --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 10 | 11 | 12 | echo 'Selecting target triples' 13 | mkdir data/target_transe_WN18RR_1 14 | 15 | CUDA_VISIBLE_DEVICES=0 python -u select_targets.py --model transe --data WN18RR --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 16 | 17 | echo 'Re-training the model to compute baseline change in metrics for target set' 18 | python -u wrangle_KG.py target_transe_WN18RR_1 19 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data target_transe_WN18RR_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 20 | 21 | 22 | echo 'Generating random edits for the neighbourhood' 23 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_1.py --model transe --data WN18RR --budget 1 --rand-run 1 24 | python -u wrangle_KG.py rand_add_n_transe_WN18RR_1_1_1 25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_n_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 26 | 27 | echo 'Generating global random edits with 1 edit' 28 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18RR --budget 1 --rand-run 1 29 | python -u wrangle_KG.py rand_add_g_transe_WN18RR_1_1_1 30 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 31 | 32 | echo 'Generating global random edits with 2 edits' 33 | CUDA_VISIBLE_DEVICES=0 python -u rand_add_attack_2.py --model transe --data WN18RR --budget 2 --rand-run 1 34 | python -u wrangle_KG.py rand_add_g_transe_WN18RR_1_2_1 35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data rand_add_g_transe_WN18RR_1_2_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 36 | 37 | 38 | 39 | echo 'Generating symmetry edits with ground truth minimum' 40 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_1.py --model transe --data WN18RR --budget 1 41 | python -u wrangle_KG.py sym_add_1_transe_WN18RR_1_1_1 42 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 43 | 44 | echo 'Generating symmetry edits with worse ranks' 45 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_2.py --model transe --data WN18RR --budget 1 46 | python -u wrangle_KG.py sym_add_2_transe_WN18RR_1_1_1 47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 48 | 49 | echo 'Generating symmetry edits with worse ranks' 50 | CUDA_VISIBLE_DEVICES=0 python -u sym_add_attack_3.py --model transe --data WN18RR --budget 1 51 | python -u wrangle_KG.py sym_add_3_transe_WN18RR_1_1_1 52 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data sym_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 53 | 54 | 55 | 56 | 57 | 58 | echo 'Generating inversion edits with ground truth minimum' 59 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_1.py --model transe --data WN18RR --budget 1 60 | python -u wrangle_KG.py inv_add_1_transe_WN18RR_1_1_1 61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 62 | 63 | echo 'Generating inversion edits with worse ranks' 64 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_2.py --model transe --data WN18RR --budget 1 65 | python -u wrangle_KG.py inv_add_2_transe_WN18RR_1_1_1 66 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 67 | 68 | echo 'Generating inversion edits with cosine distance' 69 | CUDA_VISIBLE_DEVICES=0 python -u inv_add_attack_3.py --model transe --data WN18RR --budget 1 70 | python -u wrangle_KG.py inv_add_3_transe_WN18RR_1_1_1 71 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data inv_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 72 | 73 | 74 | 75 | 76 | 77 | 78 | echo 'Generating composition edits with ground truth values' 79 | python -u create_clusters.py --model transe --data WN18RR --num-clusters 50 80 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_1.py --model transe --data WN18RR --budget 1 --num-clusters 50 --rand-run 1 81 | python -u wrangle_KG.py com_add_1_transe_WN18RR_1_1_1 82 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_1_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 83 | 84 | echo 'Generating composition edits with just worse ranks ' 85 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_2.py --model transe --data WN18RR --budget 1 86 | python -u wrangle_KG.py com_add_2_transe_WN18RR_1_1_1 87 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_2_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 88 | 89 | echo 'Generating composition edits with cosine distance ' 90 | CUDA_VISIBLE_DEVICES=0 python -u com_add_attack_3.py --model transe --data WN18RR --budget 1 91 | python -u wrangle_KG.py com_add_3_transe_WN18RR_1_1_1 92 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data com_add_3_transe_WN18RR_1_1_1 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 93 | 94 | 95 | 96 | 97 | echo 'Generating edits from IJCAI-19 baseline ' 98 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18RR --budget 1 --corruption-factor 20 --rand-run 1 --use-gpu 99 | python -u wrangle_KG.py ijcai_add_1_transe_WN18RR_1_1_1_20.0 100 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18RR_1_1_1_20.0 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 101 | 102 | 103 | CUDA_VISIBLE_DEVICES=0 python -u ijcai_add_attack_1.py --model transe --data WN18RR --budget 1 --corruption-factor 5 --rand-run 1 --use-gpu 104 | python -u wrangle_KG.py ijcai_add_1_transe_WN18RR_1_1_1_5.0 105 | CUDA_VISIBLE_DEVICES=0 python -u main.py --model transe --data ijcai_add_1_transe_WN18RR_1_1_1_5.0 --lr 0.005 --input-drop 0.0 --transe-margin 9.0 --num-batches 1000 --epochs 100 --reg-weight 1e-12 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Peru Bhardwaj 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 |

2 | Poisoning Knowledge Graph Embeddings
via Relation Inference Patterns 3 |

4 | 5 |

6 | 7 | 8 | 9 | 10 | 11 |

12 | 13 |

14 | 15 | 16 |

17 | 18 |

This is the code repository to accompany the ACL 2021 paper on poisoning attacks on KGE models.
19 | The work is a part of my PhD study at Trinity College Dublin and is funded by Accenture Labs and ADAPT Centre.
20 | For any questions or feedback, add an issue or email me at: bhardwap at tcd dot ie

21 | 22 | ### Related Publications 23 | 1. Adversarial Attacks on Knowledge Graph Embeddings via Instance Attribution Methods (EMNLP 2021) - Paper, Codebase 24 | 2. Adversarial Robustness of Representation Learning for Knowledge Graphs (PhD Thesis) - Link 25 | 26 | ## Overview 27 | ![](overview.jpg) 28 | The figure illustrates the composition based adversarial attack on missing link prediction for fraud detection. The knowledge graph consists of two types of entities - `Person` and `BankAccount`. The target triple to predict is `(Karl, affiliated with, Joe the mobster)`. Original KGE model predicts this triple as True, i.e. assigns it a higher rank than the synthetic negative triples. But a malicious attacker can add adversarial triples (in purple) that connect `Karl` with a non-suspicious person `Bob` through composition pattern. Now, the KGE model predicts the target triple as False. 29 | 30 | Thus, the proposed adversarial attacks are based on a reformulation of the problem of poisoning attacks on KGE models for missing link prediction. Instead of degrading the rank of a target triple directly, the attacker aims to improve the rank of a *decoy triple*. To do so, they exploit the inductive abilities of KGE models which are expressed through connectivity patterns like symmetry, inversion or composition. This problem reformulation for poisoning attacks also helps to understand the behaviour of KGE models because the extent of effectiveness of the attack relying on an inference pattern indicates the KGE model's sensitivity to that inference pattern. 31 | 32 | ## Reproducing the results 33 | 34 | ### Setup 35 | - python = 3.8.5 36 | - pytorch = 1.4.0 37 | - numpy = 1.19.1 38 | - jupyter = 1.0.0 39 | - pandas = 1.1.0 40 | - matplotlib = 3.2.2 41 | - scikit-learn = 0.23.2 42 | - seaborn = 0.11.0 43 | 44 | Experiments reported in the paper were run in the conda environment `inference_attack.yml` 45 | 46 | 47 | ### Usage 48 | - The codebase and the bash scripts used for experiments are in `KGEAttack` 49 | - To add the necessary directories and preprocess the original datasets, use the bash script `preprocess.sh` 50 | - For each model-dataset combination, there is a bash script to train the original model, generate attacks from baselines and proposed attacks; and train the poisoned model. These scripts are named as `model-dataset.sh` 51 | - The instructions in these scripts are grouped together under the echo statements which indicate what they do. 52 | - The hyperparameters in bash scripts are the ones used for the experiments reported in the paper. 53 | - The metrics on decoy triples can be computed by the script `compute_decoy_metrics_WN18RR.sh` or `compute_decoy_metrics_FB15k-237.sh` 54 | - To reproduce the results, specific instructions from the bash scripts can be run individually on the commandline or the full script can be run. 55 | - All experiments in the paper were run on a shared HPC cluster that had Nvidia RTX 2080ti, Tesla K40 and V100 GPUs. 56 | 57 | 58 | ## References 59 | Parts of this codebase are based on the code from following repositories 60 | - [ConvE](https://github.com/TimDettmers/ConvE) 61 | - [CRIAGE](https://github.com/pouyapez/criage) 62 | - [KGC Re-evalaution](https://github.com/svjan5/kg-reeval) 63 | - [ComplEx-N3](https://github.com/facebookresearch/kbc) 64 | 65 | 66 | ## Citation 67 | 68 | ```bibtex 69 | @inproceedings{bhardwaj-etal-2021-poisoning, 70 | title = "Poisoning Knowledge Graph Embeddings via Relation Inference Patterns", 71 | author = "Bhardwaj, Peru and 72 | Kelleher, John and 73 | Costabello, Luca and 74 | O{'}Sullivan, Declan", 75 | booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", 76 | month = aug, 77 | year = "2021", 78 | address = "Online", 79 | publisher = "Association for Computational Linguistics", 80 | url = "https://aclanthology.org/2021.acl-long.147", 81 | pages = "1875--1888" 82 | } 83 | ``` 84 | -------------------------------------------------------------------------------- /inference_attack.yml: -------------------------------------------------------------------------------- 1 | name: inference_attack 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _pytorch_select=0.2=gpu_0 7 | - argon2-cffi=20.1.0=py38h7b6447c_1 8 | - async_generator=1.10=pyhd3eb1b0_0 9 | - attrs=20.3.0=pyhd3eb1b0_0 10 | - backcall=0.2.0=pyhd3eb1b0_0 11 | - blas=1.0=mkl 12 | - bleach=3.2.3=pyhd3eb1b0_0 13 | - ca-certificates=2021.1.19=h06a4308_0 14 | - certifi=2020.12.5=py38h06a4308_0 15 | - cffi=1.14.4=py38h261ae71_0 16 | - cudatoolkit=10.1.243=h6bb024c_0 17 | - cudnn=7.6.5=cuda10.1_0 18 | - cycler=0.10.0=py38_0 19 | - dbus=1.13.18=hb2f20db_0 20 | - decorator=4.4.2=pyhd3eb1b0_0 21 | - defusedxml=0.6.0=py_0 22 | - entrypoints=0.3=py38_0 23 | - expat=2.2.10=he6710b0_2 24 | - fontconfig=2.13.0=h9420a91_0 25 | - freetype=2.10.4=h5ab3b9f_0 26 | - glib=2.66.1=h92f7085_0 27 | - gst-plugins-base=1.14.0=h8213a91_2 28 | - gstreamer=1.14.0=h28cd5cc_2 29 | - icu=58.2=he6710b0_3 30 | - importlib-metadata=2.0.0=py_1 31 | - importlib_metadata=2.0.0=1 32 | - intel-openmp=2020.2=254 33 | - ipykernel=5.3.4=py38h5ca1d4c_0 34 | - ipython=7.19.0=py38hb070fc8_1 35 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 36 | - ipywidgets=7.6.3=pyhd3eb1b0_1 37 | - jedi=0.17.0=py38_0 38 | - jinja2=2.11.2=pyhd3eb1b0_0 39 | - joblib=1.0.0=pyhd3eb1b0_0 40 | - jpeg=9b=h024ee3a_2 41 | - jsonschema=3.2.0=py_2 42 | - jupyter=1.0.0=py38_7 43 | - jupyter_client=6.1.7=py_0 44 | - jupyter_console=6.2.0=py_0 45 | - jupyter_core=4.7.0=py38h06a4308_0 46 | - jupyterlab_pygments=0.1.2=py_0 47 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 48 | - kiwisolver=1.3.0=py38h2531618_0 49 | - ld_impl_linux-64=2.33.1=h53a641e_7 50 | - libedit=3.1.20191231=h14c3975_1 51 | - libffi=3.3=he6710b0_2 52 | - libgcc-ng=9.1.0=hdf63c60_0 53 | - libgfortran-ng=7.3.0=hdf63c60_0 54 | - libpng=1.6.37=hbc83047_0 55 | - libsodium=1.0.18=h7b6447c_0 56 | - libstdcxx-ng=9.1.0=hdf63c60_0 57 | - libuuid=1.0.3=h1bed415_2 58 | - libxcb=1.14=h7b6447c_0 59 | - libxml2=2.9.10=hb55368b_3 60 | - markupsafe=1.1.1=py38h7b6447c_0 61 | - matplotlib=3.2.2=0 62 | - matplotlib-base=3.2.2=py38hef1b27d_0 63 | - mistune=0.8.4=py38h7b6447c_1000 64 | - mkl=2020.2=256 65 | - mkl-service=2.3.0=py38he904b0f_0 66 | - mkl_fft=1.2.0=py38h23d657b_0 67 | - mkl_random=1.1.1=py38h0573a6f_0 68 | - nbclient=0.5.1=py_0 69 | - nbconvert=6.0.7=py38_0 70 | - nbformat=5.1.2=pyhd3eb1b0_1 71 | - ncurses=6.2=he6710b0_1 72 | - nest-asyncio=1.4.3=pyhd3eb1b0_0 73 | - ninja=1.10.2=py38hff7bd54_0 74 | - notebook=6.2.0=py38h06a4308_0 75 | - numpy=1.19.1=py38hbc911f0_0 76 | - numpy-base=1.19.1=py38hfa32c7d_0 77 | - openssl=1.1.1i=h27cfd23_0 78 | - packaging=20.9=pyhd3eb1b0_0 79 | - pandas=1.1.0=py38he6710b0_0 80 | - pandoc=2.11=hb0f4dca_0 81 | - pandocfilters=1.4.3=py38h06a4308_1 82 | - parso=0.8.1=pyhd3eb1b0_0 83 | - pcre=8.44=he6710b0_0 84 | - pexpect=4.8.0=pyhd3eb1b0_3 85 | - pickleshare=0.7.5=pyhd3eb1b0_1003 86 | - pip=20.3.3=py38h06a4308_0 87 | - prometheus_client=0.9.0=pyhd3eb1b0_0 88 | - prompt-toolkit=3.0.8=py_0 89 | - prompt_toolkit=3.0.8=0 90 | - ptyprocess=0.7.0=pyhd3eb1b0_2 91 | - pycparser=2.20=py_2 92 | - pygments=2.7.4=pyhd3eb1b0_0 93 | - pyparsing=2.4.7=pyhd3eb1b0_0 94 | - pyqt=5.9.2=py38h05f1152_4 95 | - pyrsistent=0.17.3=py38h7b6447c_0 96 | - python=3.8.5=h7579374_1 97 | - python-dateutil=2.8.1=pyhd3eb1b0_0 98 | - pytorch=1.4.0=cuda101py38h02f0884_0 99 | - pytz=2020.5=pyhd3eb1b0_0 100 | - pyzmq=20.0.0=py38h2531618_1 101 | - qt=5.9.7=h5867ecd_1 102 | - qtconsole=4.7.7=py_0 103 | - qtpy=1.9.0=py_0 104 | - readline=8.1=h27cfd23_0 105 | - scikit-learn=0.23.2=py38h0573a6f_0 106 | - scipy=1.5.2=py38h0b6359f_0 107 | - seaborn=0.11.0=py_0 108 | - send2trash=1.5.0=pyhd3eb1b0_1 109 | - setuptools=52.0.0=py38h06a4308_0 110 | - sip=4.19.13=py38he6710b0_0 111 | - six=1.15.0=py38h06a4308_0 112 | - sqlite=3.33.0=h62c20be_0 113 | - terminado=0.9.2=py38h06a4308_0 114 | - testpath=0.4.4=pyhd3eb1b0_0 115 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 116 | - tk=8.6.10=hbc83047_0 117 | - tornado=6.1=py38h27cfd23_0 118 | - traitlets=5.0.5=pyhd3eb1b0_0 119 | - wcwidth=0.2.5=py_0 120 | - webencodings=0.5.1=py38_1 121 | - wheel=0.36.2=pyhd3eb1b0_0 122 | - widgetsnbextension=3.5.1=py38_0 123 | - xz=5.2.5=h7b6447c_0 124 | - zeromq=4.3.3=he6710b0_3 125 | - zipp=3.4.0=pyhd3eb1b0_0 126 | - zlib=1.2.11=h7b6447c_3 127 | 128 | 129 | -------------------------------------------------------------------------------- /overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeruBhardwaj/InferenceAttack/ddfda138f7937a6313af5392ec401ae89900aaf8/overview.jpg --------------------------------------------------------------------------------