├── CARCA.py ├── README.md └── RawData └── DataProcessing.py /CARCA.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """#UTILS""" 4 | 5 | import sys 6 | import copy 7 | import random 8 | import numpy as np 9 | from collections import defaultdict 10 | import pandas as pd 11 | import pickle 12 | import os 13 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 14 | def load_data(filename): 15 | try: 16 | with open(filename, "rb") as f: 17 | x= pickle.load(f) 18 | except: 19 | x = [] 20 | return x 21 | 22 | def save_data(data,filename): 23 | with open(filename, "wb") as f: 24 | pickle.dump(data, f) 25 | 26 | 27 | 28 | def data_partition(fname): 29 | usernum = 0 30 | itemnum = 0 31 | User = defaultdict(list) 32 | user_train = {} 33 | user_valid = {} 34 | user_test = {} 35 | # assume user/item index starting from 1 36 | f = open('./Data/%s.txt' % fname, 'r') 37 | for line in f: 38 | u, i = line.rstrip().split(' ') 39 | u = int(u) 40 | i = int(i) 41 | usernum = max(u, usernum) 42 | itemnum = max(i, itemnum) 43 | User[u].append(i) 44 | 45 | for user in User: 46 | nfeedback = len(User[user]) 47 | if nfeedback < 3: 48 | user_train[user] = User[user] 49 | user_valid[user] = [] 50 | user_test[user] = [] 51 | else: 52 | user_train[user] = User[user][:-2] 53 | user_valid[user] = [] 54 | user_valid[user].append(User[user][-2]) 55 | user_test[user] = [] 56 | user_test[user].append(User[user][-1]) 57 | return [user_train, user_valid, user_test, usernum, itemnum] 58 | 59 | 60 | def evaluate(model, dataset, args, sess, cxtdict, cxtsize, negnum=100): 61 | [train, valid, test, usernum, itemnum] = copy.deepcopy(dataset) 62 | 63 | NDCG = 0.0 64 | HT = 0.0 65 | valid_user = 0.0 66 | Auc = 0.0 67 | if usernum>10000: 68 | users = random.sample(range(1, usernum + 1), 10000) 69 | else: 70 | users = range(1, usernum + 1) 71 | for u in users: 72 | 73 | if len(train[u]) < 1 or len(test[u]) < 1: continue 74 | 75 | seq = np.zeros([args.maxlen], dtype=np.int32) 76 | seqcxt = np.zeros([args.maxlen,cxtsize], dtype=np.int32) 77 | testitemscxt = list() 78 | idx = args.maxlen - 1 79 | seq[idx] = valid[u][0] 80 | #Cxt 81 | seqcxt[idx] = cxtdict[(u,valid[u][0])] 82 | 83 | idx -= 1 84 | for i in reversed(train[u]): 85 | seq[idx] = i 86 | #Cxt 87 | seqcxt[idx] = cxtdict[(u,i)] 88 | 89 | idx -= 1 90 | if idx == -1: break 91 | 92 | rated = set(train[u]) 93 | rated.add(0) 94 | item_idx = [test[u][0]] 95 | testitemscxt.append(cxtdict[(u,test[u][0])]) 96 | for _ in range(negnum): 97 | t = np.random.randint(1, itemnum + 1) 98 | while t in rated: t = np.random.randint(1, itemnum + 1) 99 | item_idx.append(t) 100 | testitemscxt.append(cxtdict[(u,test[u][0])]) 101 | 102 | 103 | predictions = -model.predict(sess, np.ones(args.maxlen)*u, [seq], item_idx, [seqcxt], testitemscxt) 104 | predictions = predictions[0] 105 | score = -predictions.copy() 106 | rank = predictions.argsort().argsort()[0] 107 | 108 | valid_user += 1 109 | 110 | if rank < 10: 111 | NDCG += 1 / np.log2(rank + 2) 112 | HT += 1 113 | #if valid_user % 100 == 0: 114 | # print ('.',sys.stdout.flush()) 115 | 116 | tmpans=0 117 | count=0 118 | for j in range(1,len(score)): #sample 119 | if score[0]>score[j]: 120 | tmpans+=1 121 | count+=1 122 | tmpans/=float(count) 123 | Auc+=tmpans 124 | 125 | return NDCG / valid_user, HT / valid_user, Auc / valid_user 126 | 127 | 128 | def evaluate_valid(model, dataset, args, sess, cxtdict, cxtsize, negnum=100): 129 | [train, valid, test, usernum, itemnum] = copy.deepcopy(dataset) 130 | 131 | NDCG = 0.0 132 | valid_user = 0.0 133 | HT = 0.0 134 | Auc = 0.0 135 | if usernum>10000: 136 | users = random.sample(range(1, usernum + 1), 10000) 137 | else: 138 | users = range(1, usernum + 1) 139 | for u in users: 140 | if len(train[u]) < 1 or len(valid[u]) < 1: continue 141 | 142 | seq = np.zeros([args.maxlen], dtype=np.int32) 143 | seqcxt = np.zeros([args.maxlen,cxtsize], dtype=np.int32) 144 | testitemscxt = list() 145 | idx = args.maxlen - 1 146 | for i in reversed(train[u]): 147 | seq[idx] = i 148 | #cxt 149 | seqcxt[idx] = cxtdict[(u,i)] 150 | idx -= 1 151 | if idx == -1: break 152 | 153 | rated = set(train[u]) 154 | rated.add(0) 155 | item_idx = [valid[u][0]] 156 | testitemscxt.append(cxtdict[(u,valid[u][0])]) 157 | for _ in range(negnum): 158 | t = np.random.randint(1, itemnum + 1) 159 | while t in rated: t = np.random.randint(1, itemnum + 1) 160 | item_idx.append(t) 161 | testitemscxt.append(cxtdict[(u,valid[u][0])]) 162 | 163 | predictions = -model.predict(sess, np.ones(args.maxlen)*u, [seq], item_idx, [seqcxt], testitemscxt) 164 | predictions = predictions[0] 165 | score = -predictions.copy() 166 | 167 | rank = predictions.argsort().argsort()[0] 168 | 169 | valid_user += 1 170 | 171 | if rank < 10: 172 | NDCG += 1 / np.log2(rank + 2) 173 | HT += 1 174 | 175 | tmpans=0 176 | count=0 177 | for j in range(1,len(score)): #sample 178 | if score[0]>score[j]: 179 | tmpans+=1 180 | count+=1 181 | tmpans/=float(count) 182 | Auc+=tmpans 183 | #if valid_user % 100 == 0: 184 | # print ('.',sys.stdout.flush()) 185 | 186 | 187 | return NDCG / valid_user, HT / valid_user, Auc / valid_user 188 | 189 | 190 | 191 | def PreprocessData(filname, DatasetName, sep="\t"): 192 | col_names = ["user", "item", "rate", "st"] 193 | df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python') 194 | for col in ("user", "item"): 195 | df[col] = df[col].astype(np.int32) 196 | df["rate"] = df["rate"].astype(np.float32) 197 | df['ts'] = pd.to_datetime(df['st'],unit='s') 198 | df = df.sort_values(by=['ts']) 199 | df['year'], df['month'], df['day'], df['dayofweek'], df['dayofyear'] , df['week'] = zip(*df['ts'].map(lambda x: [x.year,x.month,x.day,x.dayofweek,x.dayofyear,x.week])) 200 | df['year']-=df['year'].min() 201 | df['year']/=df['year'].max() 202 | df['month']/=12 203 | df['day']/=31 204 | df['dayofweek']/=7 205 | df['dayofyear']/=365 206 | df['week']/=4 207 | 208 | DATEINFO = {} 209 | UsersDict = {} 210 | for index, row in df.iterrows() : 211 | userid = int(row['user']) 212 | itemid = int(row['item']) 213 | 214 | if userid in UsersDict.keys() : 215 | UsersDict[userid].append(itemid) 216 | else : 217 | UsersDict[userid] = list() 218 | UsersDict[userid].append(itemid) 219 | 220 | 221 | year = row['year'] 222 | month = row['month'] 223 | day = row['day'] 224 | dayofweek = row['dayofweek'] 225 | dayofyear = row['dayofyear'] 226 | week = row['week'] 227 | DATEINFO[(userid,itemid)] = [year, month, day, dayofweek, dayofyear, week] 228 | ''' 229 | f = open('./Data/%s_pre.txt' % DatasetName, 'w') 230 | for user in UsersDict.keys(): 231 | for i in UsersDict[user]: 232 | f.write('%d %d\n' % (user, i)) 233 | f.close() 234 | ''' 235 | 236 | return df, DATEINFO 237 | 238 | 239 | def get_ItemDataBeauty(itemnum): 240 | #ItemFeatures = load_data('./Data/Beauty_feat_1.dat') 241 | ItemFeatures = load_data('./Data/Beauty_feat_cat.dat') 242 | ItemFeatures = np.vstack((np.zeros(ItemFeatures.shape[1]), ItemFeatures)) 243 | return ItemFeatures 244 | 245 | def get_UserDataBeauty(usernum): 246 | UserFeatures = np.identity(usernum,dtype=np.int8) 247 | UserFeatures = np.vstack((np.zeros(UserFeatures.shape[1],dtype=np.int8), UserFeatures)) 248 | return UserFeatures 249 | 250 | def PreprocessData_Beauty(filname, DatasetName, sep="\t"): 251 | col_names = ["user", "item", "ts"] 252 | df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python') 253 | for col in ("user", "item"): 254 | df[col] = df[col].astype(np.int32) 255 | 256 | df['ts'] = pd.to_datetime(df['ts'],unit='s') 257 | df = df.sort_values(by=['ts']) 258 | df['year'], df['month'], df['day'], df['dayofweek'], df['dayofyear'] , df['week'] = zip(*df['ts'].map(lambda x: [x.year,x.month,x.day,x.dayofweek,x.dayofyear,x.week])) 259 | df['year']-=df['year'].min() 260 | df['year']/=df['year'].max() 261 | df['month']/=12 262 | df['day']/=31 263 | df['dayofweek']/=7 264 | df['dayofyear']/=365 265 | df['week']/=4 266 | 267 | DATEINFO = {} 268 | UsersDict = {} 269 | for index, row in df.iterrows() : 270 | userid = int(row['user']) 271 | itemid = int(row['item']) 272 | 273 | year = row['year'] 274 | month = row['month'] 275 | day = row['day'] 276 | dayofweek = row['dayofweek'] 277 | dayofyear = row['dayofyear'] 278 | week = row['week'] 279 | DATEINFO[(userid,itemid)] = [year, month, day, dayofweek, dayofyear, week] 280 | 281 | return df, DATEINFO 282 | 283 | def get_ItemDataMen(itemnum): 284 | ItemFeatures = load_data('./Data/Men_imgs.dat') 285 | ItemFeatures = np.vstack((np.zeros(ItemFeatures.shape[1]), ItemFeatures)) 286 | return ItemFeatures 287 | 288 | def get_UserDataMen(usernum): 289 | UserFeatures = np.identity(usernum,dtype=np.int8) 290 | UserFeatures = np.vstack((np.zeros(UserFeatures.shape[1],dtype=np.int8), UserFeatures)) 291 | return UserFeatures 292 | 293 | def PreprocessData_Men(filname, DatasetName, sep="\t"): 294 | col_names = ["user", "item", "ts"] 295 | df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python') 296 | for col in ("user", "item"): 297 | df[col] = df[col].astype(np.int32) 298 | 299 | df['ts'] = pd.to_datetime(df['ts'],unit='s') 300 | df = df.sort_values(by=['ts']) 301 | df['year'], df['month'], df['day'], df['dayofweek'], df['dayofyear'] , df['week'] = zip(*df['ts'].map(lambda x: [x.year,x.month,x.day,x.dayofweek,x.dayofyear,x.week])) 302 | df['year']-=df['year'].min() 303 | df['year']/=df['year'].max() 304 | df['month']/=12 305 | df['day']/=31 306 | df['dayofweek']/=7 307 | df['dayofyear']/=365 308 | df['week']/=4 309 | 310 | DATEINFO = {} 311 | UsersDict = {} 312 | for index, row in df.iterrows() : 313 | userid = int(row['user']) 314 | itemid = int(row['item']) 315 | 316 | year = row['year'] 317 | month = row['month'] 318 | day = row['day'] 319 | dayofweek = row['dayofweek'] 320 | dayofyear = row['dayofyear'] 321 | week = row['week'] 322 | DATEINFO[(userid,itemid)] = [year, month, day, dayofweek, dayofyear, week] 323 | 324 | return df, DATEINFO 325 | 326 | 327 | def PreprocessData_Fashion(filname, DatasetName, sep="\t"): 328 | col_names = ["user", "item", "ts"] 329 | df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python') 330 | for col in ("user", "item"): 331 | df[col] = df[col].astype(np.int32) 332 | 333 | df['ts'] = pd.to_datetime(df['ts'],unit='s') 334 | df = df.sort_values(by=['ts']) 335 | df['year'], df['month'], df['day'], df['dayofweek'], df['dayofyear'] , df['week'] = zip(*df['ts'].map(lambda x: [x.year,x.month,x.day,x.dayofweek,x.dayofyear,x.week])) 336 | df['year']-=df['year'].min() 337 | df['year']/=df['year'].max() 338 | df['month']/=12 339 | df['day']/=31 340 | df['dayofweek']/=7 341 | df['dayofyear']/=365 342 | df['week']/=4 343 | 344 | DATEINFO = {} 345 | UsersDict = {} 346 | for index, row in df.iterrows() : 347 | userid = int(row['user']) 348 | itemid = int(row['item']) 349 | 350 | year = row['year'] 351 | month = row['month'] 352 | day = row['day'] 353 | dayofweek = row['dayofweek'] 354 | dayofyear = row['dayofyear'] 355 | week = row['week'] 356 | DATEINFO[(userid,itemid)] = [year, month, day, dayofweek, dayofyear, week] 357 | 358 | return df, DATEINFO 359 | 360 | def get_ItemDataFashion(itemnum): 361 | ItemFeatures = load_data('./Data/Fashion_imgs.dat') 362 | ItemFeatures = np.vstack((np.zeros(ItemFeatures.shape[1]), ItemFeatures)) 363 | return ItemFeatures 364 | 365 | def get_UserDataFashion(usernum): 366 | UserFeatures = np.identity(usernum,dtype=np.int8) 367 | UserFeatures = np.vstack((np.zeros(UserFeatures.shape[1],dtype=np.int8), UserFeatures)) 368 | return UserFeatures 369 | 370 | def PreprocessData_Games(filname, DatasetName, sep="\t"): 371 | col_names = ["user", "item", "ts"] 372 | df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python') 373 | for col in ("user", "item"): 374 | df[col] = df[col].astype(np.int32) 375 | 376 | df['ts'] = pd.to_datetime(df['ts'],unit='s') 377 | df = df.sort_values(by=['ts']) 378 | df['year'], df['month'], df['day'], df['dayofweek'], df['dayofyear'] , df['week'] = zip(*df['ts'].map(lambda x: [x.year,x.month,x.day,x.dayofweek,x.dayofyear,x.week])) 379 | df['year']-=df['year'].min() 380 | df['year']/=df['year'].max() 381 | df['month']/=12 382 | df['day']/=31 383 | df['dayofweek']/=7 384 | df['dayofyear']/=365 385 | df['week']/=4 386 | 387 | DATEINFO = {} 388 | UsersDict = {} 389 | for index, row in df.iterrows() : 390 | userid = int(row['user']) 391 | itemid = int(row['item']) 392 | 393 | year = row['year'] 394 | month = row['month'] 395 | day = row['day'] 396 | dayofweek = row['dayofweek'] 397 | dayofyear = row['dayofyear'] 398 | week = row['week'] 399 | DATEINFO[(userid,itemid)] = [year, month, day, dayofweek, dayofyear, week] 400 | 401 | return df, DATEINFO 402 | 403 | def get_ItemDataGames(itemnum): 404 | ItemFeatures = load_data('./Data/Video_Games_feat.dat') 405 | ItemFeatures = np.vstack((np.zeros(ItemFeatures.shape[1]), ItemFeatures)) 406 | return ItemFeatures 407 | 408 | """#Sampler""" 409 | 410 | import numpy as np 411 | from multiprocessing import Process, Queue 412 | 413 | 414 | def random_neq(l, r, s): 415 | t = np.random.randint(l, r) 416 | while t in s: 417 | t = np.random.randint(l, r) 418 | return t 419 | 420 | 421 | def sample_function(user_train, usernum, itemnum, cxtdict, cxtsize, batch_size, maxlen, result_queue, SEED): 422 | def sample(): 423 | 424 | user = np.random.randint(1, usernum + 1) 425 | while len(user_train[user]) <= 1: user = np.random.randint(1, usernum + 1) 426 | 427 | seq = np.zeros([maxlen], dtype=np.int32) 428 | pos = np.zeros([maxlen], dtype=np.int32) 429 | neg = np.zeros([maxlen], dtype=np.int32) 430 | ###CXT 431 | seqcxt = np.zeros([maxlen,cxtsize], dtype=np.float32) 432 | poscxt = np.zeros([maxlen,cxtsize], dtype=np.float32) 433 | negcxt = np.zeros([maxlen,cxtsize], dtype=np.float32) 434 | ### 435 | 436 | 437 | nxt = user_train[user][-1] 438 | idx = maxlen - 1 439 | 440 | ts = set(user_train[user]) 441 | for i in reversed(user_train[user][:-1]): 442 | 443 | seq[idx] = i 444 | pos[idx] = nxt 445 | neg_i = 0 446 | if nxt != 0: 447 | neg_i = random_neq(1, itemnum + 1, ts) 448 | neg[idx] = neg_i 449 | ###CXT 450 | seqcxt[idx] = cxtdict[(user,i)] 451 | poscxt[idx] = cxtdict[(user,nxt)] 452 | negcxt[idx] = cxtdict[(user,nxt)] 453 | ### 454 | 455 | nxt = i 456 | idx -= 1 457 | if idx == -1: break 458 | 459 | return (np.ones(maxlen)*user, seq, pos, neg, seqcxt, poscxt, negcxt) 460 | 461 | np.random.seed(SEED) 462 | while True: 463 | one_batch = [] 464 | for i in range(batch_size): 465 | one_batch.append(sample()) 466 | 467 | result_queue.put(zip(*one_batch)) 468 | 469 | 470 | class WarpSampler(object): 471 | def __init__(self, User, usernum, itemnum, cxtdict, cxtsize, batch_size=64, maxlen=10, n_workers=1): 472 | self.result_queue = Queue(maxsize=n_workers * 10) 473 | self.processors = [] 474 | for i in range(n_workers): 475 | self.processors.append( 476 | Process(target=sample_function, args=(User, 477 | usernum, 478 | itemnum, 479 | cxtdict, 480 | cxtsize, 481 | batch_size, 482 | maxlen, 483 | self.result_queue, 484 | np.random.randint(2e9) 485 | ))) 486 | self.processors[-1].daemon = True 487 | self.processors[-1].start() 488 | 489 | def next_batch(self): 490 | return self.result_queue.get() 491 | 492 | def close(self): 493 | for p in self.processors: 494 | p.terminate() 495 | p.join() 496 | 497 | """#Modules""" 498 | 499 | ''' 500 | Modified version of the original code by kyubyong park. 501 | kbpark.linguist@gmail.com. 502 | https://www.github.com/kyubyong/transformer 503 | ''' 504 | 505 | 506 | import tensorflow as tf 507 | import numpy as np 508 | 509 | 510 | def positional_encoding(dim, sentence_length, dtype=tf.float32): 511 | 512 | encoded_vec = np.array([pos/np.power(10000, 2*i/dim) for pos in range(sentence_length) for i in range(dim)]) 513 | encoded_vec[::2] = np.sin(encoded_vec[::2]) 514 | encoded_vec[1::2] = np.cos(encoded_vec[1::2]) 515 | 516 | return tf.convert_to_tensor(encoded_vec.reshape([sentence_length, dim]), dtype=dtype) 517 | 518 | def normalize(inputs, 519 | epsilon = 1e-8, 520 | scope="ln", 521 | reuse=None): 522 | '''Applies layer normalization. 523 | 524 | Args: 525 | inputs: A tensor with 2 or more dimensions, where the first dimension has 526 | `batch_size`. 527 | epsilon: A floating number. A very small number for preventing ZeroDivision Error. 528 | scope: Optional scope for `variable_scope`. 529 | reuse: Boolean, whether to reuse the weights of a previous layer 530 | by the same name. 531 | 532 | Returns: 533 | A tensor with the same shape and data dtype as `inputs`. 534 | ''' 535 | with tf.variable_scope(scope, reuse=reuse): 536 | inputs_shape = inputs.get_shape() 537 | params_shape = inputs_shape[-1:] 538 | 539 | mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True) 540 | beta= tf.Variable(tf.zeros(params_shape)) 541 | gamma = tf.Variable(tf.ones(params_shape)) 542 | normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) ) 543 | outputs = gamma * normalized + beta 544 | 545 | return outputs 546 | 547 | def embedding(inputs, 548 | vocab_size, 549 | num_units, 550 | zero_pad=True, 551 | scale=True, 552 | l2_reg=0.0, 553 | scope="embedding", 554 | with_t=False, 555 | reuse=None): 556 | '''Embeds a given tensor. 557 | 558 | Args: 559 | inputs: A `Tensor` with type `int32` or `int64` containing the ids 560 | to be looked up in `lookup table`. 561 | vocab_size: An int. Vocabulary size. 562 | num_units: An int. Number of embedding hidden units. 563 | zero_pad: A boolean. If True, all the values of the fist row (id 0) 564 | should be constant zeros. 565 | scale: A boolean. If True. the outputs is multiplied by sqrt num_units. 566 | scope: Optional scope for `variable_scope`. 567 | reuse: Boolean, whether to reuse the weights of a previous layer 568 | by the same name. 569 | 570 | Returns: 571 | A `Tensor` with one more rank than inputs's. The last dimensionality 572 | should be `num_units`. 573 | 574 | For example, 575 | 576 | ``` 577 | import tensorflow as tf 578 | 579 | inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) 580 | outputs = embedding(inputs, 6, 2, zero_pad=True) 581 | with tf.Session() as sess: 582 | sess.run(tf.global_variables_initializer()) 583 | print sess.run(outputs) 584 | >> 585 | [[[ 0. 0. ] 586 | [ 0.09754146 0.67385566] 587 | [ 0.37864095 -0.35689294]] 588 | 589 | [[-1.01329422 -1.09939694] 590 | [ 0.7521342 0.38203377] 591 | [-0.04973143 -0.06210355]]] 592 | ``` 593 | 594 | ``` 595 | import tensorflow as tf 596 | 597 | inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) 598 | outputs = embedding(inputs, 6, 2, zero_pad=False) 599 | with tf.Session() as sess: 600 | sess.run(tf.global_variables_initializer()) 601 | print sess.run(outputs) 602 | >> 603 | [[[-0.19172323 -0.39159766] 604 | [-0.43212751 -0.66207761] 605 | [ 1.03452027 -0.26704335]] 606 | 607 | [[-0.11634696 -0.35983452] 608 | [ 0.50208133 0.53509563] 609 | [ 1.22204471 -0.96587461]]] 610 | ``` 611 | ''' 612 | with tf.variable_scope(scope, reuse=reuse): 613 | lookup_table = tf.get_variable('lookup_table', 614 | dtype=tf.float32, 615 | shape=[vocab_size, num_units], 616 | #initializer=tf.contrib.layers.xavier_initializer(), 617 | regularizer=tf.contrib.layers.l2_regularizer(l2_reg)) 618 | if zero_pad: 619 | lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), 620 | lookup_table[1:, :]), 0) 621 | outputs = tf.nn.embedding_lookup(lookup_table, inputs) 622 | 623 | if scale: 624 | outputs = outputs * (num_units ** 0.5) 625 | if with_t: return outputs,lookup_table 626 | else: return outputs 627 | 628 | 629 | def multihead_attention(queries, 630 | keys, 631 | num_units=None, 632 | num_heads=8, 633 | dropout_rate=0, 634 | is_training=True, 635 | causality=False, 636 | scope="multihead_attention", 637 | reuse=None, 638 | res=True, 639 | with_qk=False): 640 | '''Applies multihead attention. 641 | 642 | Args: 643 | queries: A 3d tensor with shape of [N, T_q, C_q]. 644 | keys: A 3d tensor with shape of [N, T_k, C_k]. 645 | num_units: A scalar. Attention size. 646 | dropout_rate: A floating point number. 647 | is_training: Boolean. Controller of mechanism for dropout. 648 | causality: Boolean. If true, units that reference the future are masked. 649 | num_heads: An int. Number of heads. 650 | scope: Optional scope for `variable_scope`. 651 | reuse: Boolean, whether to reuse the weights of a previous layer 652 | by the same name. 653 | 654 | Returns 655 | A 3d tensor with shape of (N, T_q, C) 656 | ''' 657 | with tf.variable_scope(scope, reuse=reuse): 658 | # Set the fall back option for num_units 659 | if num_units is None: 660 | num_units = queries.get_shape().as_list[-1] 661 | 662 | # Linear projections 663 | Q = tf.layers.dense(queries, num_units, activation=tf.nn.leaky_relu) # (N, T_q, C) 664 | K = tf.layers.dense(keys, num_units, activation=tf.nn.leaky_relu) # (N, T_k, C) 665 | V = tf.layers.dense(keys, num_units, activation=tf.nn.leaky_relu) # (N, T_k, C) 666 | #Q = tf.layers.dense(queries, num_units, activation=None) # (N, T_q, C) 667 | #K = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) 668 | #V = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) 669 | 670 | # Split and concat 671 | Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 672 | K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 673 | V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 674 | 675 | # Multiplication 676 | outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k) 677 | 678 | # Scale 679 | outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) 680 | 681 | # Key Masking 682 | key_masks = tf.sign(tf.reduce_sum(tf.abs(keys), axis=-1)) # (N, T_k) 683 | key_masks = tf.tile(key_masks, [num_heads, 1]) # (h*N, T_k) 684 | key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) # (h*N, T_q, T_k) 685 | 686 | paddings = tf.ones_like(outputs)*(-2**32+1) 687 | outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # (h*N, T_q, T_k) 688 | 689 | # Causality = Future blinding 690 | if causality: 691 | diag_vals = tf.ones_like(outputs[0, :, :]) # (T_q, T_k) 692 | tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense() # (T_q, T_k) 693 | masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) # (h*N, T_q, T_k) 694 | 695 | paddings = tf.ones_like(masks)*(-2**32+1) 696 | outputs = tf.where(tf.equal(masks, 0), paddings, outputs) # (h*N, T_q, T_k) 697 | 698 | # Activation 699 | outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k) 700 | 701 | # Query Masking 702 | query_masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1)) # (N, T_q) 703 | query_masks = tf.tile(query_masks, [num_heads, 1]) # (h*N, T_q) 704 | query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) # (h*N, T_q, T_k) 705 | outputs *= query_masks # broadcasting. (N, T_q, C) 706 | 707 | # Dropouts 708 | outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) 709 | 710 | # Weighted sum 711 | outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h) 712 | 713 | # Restore shape 714 | outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, C) 715 | 716 | # Residual connection 717 | if res: 718 | outputs *= queries 719 | 720 | # Normalize 721 | #outputs = normalize(outputs) # (N, T_q, C) 722 | 723 | if with_qk: return Q,K 724 | else: return outputs 725 | 726 | def multihead_attention2(queries, 727 | keys, 728 | num_units=None, 729 | num_heads=8, 730 | dropout_rate=0, 731 | is_training=True, 732 | causality=False, 733 | scope="multihead_attention", 734 | reuse=None, 735 | res=True, 736 | with_qk=False): 737 | '''Applies multihead attention. 738 | 739 | Args: 740 | queries: A 3d tensor with shape of [N, T_q, C_q]. 741 | keys: A 3d tensor with shape of [N, T_k, C_k]. 742 | num_units: A scalar. Attention size. 743 | dropout_rate: A floating point number. 744 | is_training: Boolean. Controller of mechanism for dropout. 745 | causality: Boolean. If true, units that reference the future are masked. 746 | num_heads: An int. Number of heads. 747 | scope: Optional scope for `variable_scope`. 748 | reuse: Boolean, whether to reuse the weights of a previous layer 749 | by the same name. 750 | 751 | Returns 752 | A 3d tensor with shape of (N, T_q, C) 753 | ''' 754 | with tf.variable_scope(scope, reuse=reuse): 755 | # Set the fall back option for num_units 756 | if num_units is None: 757 | num_units = queries.get_shape().as_list[-1] 758 | 759 | # Linear projections 760 | Q = tf.layers.dense(queries, num_units, activation=tf.nn.leaky_relu) # (N, T_q, C) 761 | K = tf.layers.dense(keys, num_units, activation=tf.nn.leaky_relu) # (N, T_k, C) 762 | V = tf.layers.dense(keys, num_units, activation=tf.nn.leaky_relu) # (N, T_k, C) 763 | #Q = tf.layers.dense(queries, num_units, activation=None) # (N, T_q, C) 764 | #K = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) 765 | #V = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) 766 | 767 | # Split and concat 768 | Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 769 | K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 770 | V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 771 | 772 | # Multiplication 773 | outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k) 774 | 775 | # Scale 776 | outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) 777 | 778 | # Key Masking 779 | key_masks = tf.sign(tf.reduce_sum(tf.abs(keys), axis=-1)) # (N, T_k) 780 | key_masks = tf.tile(key_masks, [num_heads, 1]) # (h*N, T_k) 781 | key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) # (h*N, T_q, T_k) 782 | 783 | paddings = tf.ones_like(outputs)*(-2**32+1) 784 | outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # (h*N, T_q, T_k) 785 | 786 | # Causality = Future blinding 787 | if causality: 788 | diag_vals = tf.ones_like(outputs[0, :, :]) # (T_q, T_k) 789 | tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense() # (T_q, T_k) 790 | masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) # (h*N, T_q, T_k) 791 | 792 | paddings = tf.ones_like(masks)*(-2**32+1) 793 | outputs = tf.where(tf.equal(masks, 0), paddings, outputs) # (h*N, T_q, T_k) 794 | 795 | # Activation 796 | outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k) 797 | 798 | # Query Masking 799 | query_masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1)) # (N, T_q) 800 | query_masks = tf.tile(query_masks, [num_heads, 1]) # (h*N, T_q) 801 | query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) # (h*N, T_q, T_k) 802 | outputs *= query_masks # broadcasting. (N, T_q, C) 803 | 804 | # Dropouts 805 | outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) 806 | 807 | # Weighted sum 808 | outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h) 809 | 810 | # Restore shape 811 | outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, C) 812 | 813 | # Residual connection 814 | if res: 815 | outputs *= queries 816 | 817 | # Normalize 818 | #outputs = normalize(outputs) # (N, T_q, C) 819 | 820 | if with_qk: return Q,K 821 | else: return outputs 822 | 823 | def feedforward(inputs, 824 | num_units=[2048, 512], 825 | scope="multihead_attention", 826 | dropout_rate=0.2, 827 | is_training=True, 828 | reuse=None): 829 | '''Point-wise feed forward net. 830 | 831 | Args: 832 | inputs: A 3d tensor with shape of [N, T, C]. 833 | num_units: A list of two integers. 834 | scope: Optional scope for `variable_scope`. 835 | reuse: Boolean, whether to reuse the weights of a previous layer 836 | by the same name. 837 | 838 | Returns: 839 | A 3d tensor with the same shape and dtype as inputs 840 | ''' 841 | with tf.variable_scope(scope, reuse=reuse): 842 | # Inner layer 843 | params = {"inputs": inputs, "filters": num_units[0], "kernel_size": 1, 844 | "activation": tf.nn.leaky_relu, "use_bias": True} 845 | outputs = tf.layers.conv1d(**params) 846 | outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) 847 | # Readout layer 848 | params = {"inputs": outputs, "filters": num_units[1], "kernel_size": 1, 849 | "activation": None, "use_bias": True} 850 | outputs = tf.layers.conv1d(**params) 851 | outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) 852 | 853 | # Residual connection 854 | outputs += inputs 855 | 856 | # Normalize 857 | #outputs = normalize(outputs) 858 | 859 | return outputs 860 | 861 | """#Model""" 862 | 863 | class Model(): 864 | def __init__(self, usernum, itemnum, args, ItemFeatures=None, UserFeatures=None, cxt_size=None, reuse=None , use_res=False): 865 | self.is_training = tf.placeholder(tf.bool, shape=()) 866 | self.u = tf.placeholder(tf.int32, shape=(None, args.maxlen)) 867 | self.input_seq = tf.placeholder(tf.int32, shape=(None, args.maxlen)) 868 | self.pos = tf.placeholder(tf.int32, shape=(None, args.maxlen)) 869 | self.neg = tf.placeholder(tf.int32, shape=(None, args.maxlen)) 870 | self.seq_cxt = tf.placeholder(tf.float32, shape=(None, args.maxlen, cxt_size)) 871 | self.pos_cxt = tf.placeholder(tf.float32, shape=(None, args.maxlen, cxt_size)) 872 | self.neg_cxt = tf.placeholder(tf.float32, shape=(None, args.maxlen, cxt_size)) 873 | 874 | self.ItemFeats = tf.constant(ItemFeatures,name="ItemFeats", shape=[itemnum + 1, ItemFeatures.shape[1]],dtype=tf.float32) 875 | #self.UserFeats = tf.constant(UserFeatures,name="UserFeats", shape=[usernum + 1, UserFeatures.shape[1]],dtype=tf.float32) 876 | 877 | pos = self.pos 878 | neg = self.neg 879 | mask = tf.expand_dims(tf.to_float(tf.not_equal(self.input_seq, 0)), -1) 880 | 881 | # sequence embedding, item embedding table 882 | self.seq_in, item_emb_table = embedding(self.input_seq, 883 | vocab_size=itemnum + 1, 884 | num_units=args.hidden_units, 885 | zero_pad=True, 886 | scale=True, 887 | l2_reg=args.l2_emb, 888 | scope="input_embeddings", 889 | with_t=True, 890 | reuse=reuse 891 | ) 892 | 893 | # sequence features and their embeddings 894 | self.seq_feat = tf.nn.embedding_lookup(self.ItemFeats, self.input_seq, name="seq_feat") 895 | #Cxt 896 | self.seq_feat_in = tf.concat([self.seq_feat , self.seq_cxt], -1) 897 | #cxt 898 | self.seq_feat_emb = tf.layers.dense(inputs=self.seq_feat_in, units=args.hidden_units*5,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="feat_emb") 899 | #### Features Part 900 | 901 | 902 | # Positional Encoding 903 | t, pos_emb_table = embedding( 904 | tf.tile(tf.expand_dims(tf.range(tf.shape(self.input_seq)[1]), 0), [tf.shape(self.input_seq)[0], 1]), 905 | vocab_size=args.maxlen, 906 | num_units=args.hidden_units, 907 | zero_pad=False, 908 | scale=False, 909 | l2_reg=args.l2_emb, 910 | scope="dec_pos", 911 | reuse=reuse, 912 | with_t=True 913 | ) 914 | 915 | 916 | #### Features Part 917 | self.seq_concat = tf.concat([self.seq_in , self.seq_feat_emb], 2) 918 | self.seq = tf.layers.dense(inputs=self.seq_concat, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='embComp') 919 | #### Features Part 920 | #### Cxt part 921 | 922 | #### 923 | #self.seq += t 924 | 925 | # Dropout 926 | self.seq = tf.layers.dropout(self.seq, 927 | rate=args.dropout_rate, 928 | training=tf.convert_to_tensor(self.is_training)) 929 | self.seq *= mask 930 | 931 | # Build blocks 932 | 933 | for i in range(args.num_blocks): 934 | with tf.variable_scope("num_blocks_%d" % i): 935 | 936 | # Self-attention 937 | self.seq = multihead_attention(queries=normalize(self.seq), 938 | keys=self.seq, 939 | num_units=args.hidden_units, 940 | num_heads=args.num_heads, 941 | dropout_rate=args.dropout_rate, 942 | is_training=self.is_training, 943 | causality=False, 944 | scope="self_attention") 945 | 946 | # Feed forward 947 | self.seq = feedforward(normalize(self.seq), num_units=[args.hidden_units, args.hidden_units], 948 | dropout_rate=args.dropout_rate, is_training=self.is_training) 949 | self.seq *= mask 950 | 951 | self.seq = normalize(self.seq) 952 | 953 | 954 | 955 | #pos = tf.reshape(pos, [tf.shape(self.input_seq)[0] * args.maxlen]) #(128 x 200) x 1 956 | #neg = tf.reshape(neg, [tf.shape(self.input_seq)[0] * args.maxlen]) #(128 x 200) x 1 957 | 958 | ##cxt 959 | #pos_cxt_resh = tf.reshape(self.pos_cxt, [tf.shape(self.input_seq)[0] * args.maxlen, cxt_size]) #(128 x 200) x 6 960 | #neg_cxt_resh = tf.reshape(self.neg_cxt, [tf.shape(self.input_seq)[0] * args.maxlen, cxt_size]) #(128 x 200) x 6 961 | ## 962 | #usr = tf.reshape(self.u, [tf.shape(self.input_seq)[0] * args.maxlen]) #(128 x 200) x 1 963 | 964 | 965 | pos_emb_in = tf.nn.embedding_lookup(item_emb_table, pos) #(128 x 200) x h 966 | neg_emb_in = tf.nn.embedding_lookup(item_emb_table, neg) #(128 x 200) x h 967 | 968 | #seq_emb = tf.reshape(self.seq, [tf.shape(self.input_seq)[0] * args.maxlen, args.hidden_units]) # 128 x 200 x h=> (128 x 200) x h 969 | 970 | #seq_emb_train = tf.reshape(self.seq, [tf.shape(self.input_seq)[0] * args.maxlen, args.hidden_units]) # 128 x 200 x h=> (128 x 200) x h 971 | #seq_emb_test = tf.reshape(self.seq, [tf.shape(self.input_seq)[0] * args.maxlen, args.hidden_units]) # 1 x 200 x h=> (1 x 200) x h 972 | seq_emb_train = self.seq #128 x 200 x h 973 | seq_emb_test = self.seq #128 x 200 x h 974 | 975 | 976 | 977 | #############User Embedding 978 | #user_emb = tf.one_hot(usr , usernum+1) 979 | #user_emb = tf.concat([tf.nn.embedding_lookup(self.UserFeats, usr, name="user_feat") ,user_emb], -1) 980 | #user_emb = tf.layers.dense(inputs=user_emb, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="user_emb") 981 | ## 982 | #seq_emb_train = tf.concat([seq_emb_train, user_emb], -1) 983 | #seq_emb_train = tf.layers.dense(inputs=seq_emb_train, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="seq_user_emb") 984 | 985 | ############# 986 | 987 | #### Features Part 988 | pos_feat_in = tf.nn.embedding_lookup(self.ItemFeats, pos, name="seq_feat") #(128 x 200) x h 989 | ##cxt 990 | pos_feat = tf.concat([pos_feat_in , self.pos_cxt], -1) #(128 x 200) x h 991 | ## 992 | pos_feat_emb = tf.layers.dense(inputs=pos_feat, reuse=True, units=args.hidden_units*5,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="feat_emb") 993 | pos_emb_con = tf.concat([pos_emb_in, pos_feat_emb], -1) 994 | pos_emb = tf.layers.dense(inputs=pos_emb_con, reuse=True, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='embComp') # 128 x 200 x h 995 | 996 | 997 | #pos_emb = tf.multiply(pos_emb,user_emb) 998 | 999 | 1000 | neg_feat_in = tf.nn.embedding_lookup(self.ItemFeats, neg, name="seq_feat") 1001 | ##cxt 1002 | neg_feat = tf.concat([neg_feat_in , self.neg_cxt], -1) 1003 | ## 1004 | neg_feat_emb = tf.layers.dense(inputs=neg_feat, reuse=True, units=args.hidden_units*5,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="feat_emb") 1005 | neg_emb_con = tf.concat([neg_emb_in, neg_feat_emb], -1) 1006 | neg_emb = tf.layers.dense(inputs=neg_emb_con, reuse=True, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='embComp') # 128 x 200 x h 1007 | 1008 | 1009 | #neg_emb = tf.multiply(neg_emb,user_emb) 1010 | #### Features Part 1011 | 1012 | 1013 | 1014 | 1015 | self.test_item = tf.placeholder(tf.int32, shape=(101)) 1016 | self.test_item_cxt = tf.placeholder(tf.float32, shape=(101, cxt_size)) 1017 | 1018 | test_item_resh = tf.reshape(self.test_item, [1,101]) 1019 | test_item_cxt_resh = tf.reshape(self.test_item_cxt, [1,101,cxt_size]) #1 x 101 x 6 1020 | 1021 | test_item_emb_in = tf.nn.embedding_lookup(item_emb_table, test_item_resh) #1 x 101 x h 1022 | 1023 | ########### Test user 1024 | self.test_user = tf.placeholder(tf.int32, shape=(args.maxlen)) 1025 | #test_user_emb = tf.one_hot(self.test_user , usernum+1) 1026 | #test_user_emb = tf.nn.embedding_lookup(self.UserFeats, self.test_user, name="Test_user_feat") 1027 | #test_user_emb = tf.concat([tf.nn.embedding_lookup(self.UserFeats, self.test_user, name="Test_user_feat") ,test_user_emb], -1) 1028 | #test_user_emb = tf.layers.dense(inputs=test_user_emb, reuse=True, units=args.hidden_units,activation=tf.nn.leaky_relu, kernel_initializer=tf.random_normal_initializer(stddev=0.01) , name="user_emb") 1029 | 1030 | 1031 | #### Features Part 1032 | test_feat_in = tf.nn.embedding_lookup(self.ItemFeats, test_item_resh, name="seq_feat") #1 x 101 x f 1033 | ##cxt 1034 | test_feat_con = tf.concat([test_feat_in , test_item_cxt_resh], -1) #1 x 101 x f + 6 1035 | ## 1036 | test_feat_emb = tf.layers.dense(inputs=test_feat_con, reuse=True, units=args.hidden_units*5,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name="feat_emb") #1 x 101 x h 1037 | test_item_emb_con = tf.concat([test_item_emb_in, test_feat_emb], -1) #1 x 101 x 2h 1038 | test_item_emb = tf.layers.dense(inputs=test_item_emb_con, reuse=True, units=args.hidden_units,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='embComp') # 1 x 101 x h 1039 | 1040 | 1041 | ############################################################################ 1042 | 1043 | #test_item_emb = tf.multiply(test_item_emb, test_user_emb) 1044 | #### Features Part 1045 | mask_pos = tf.expand_dims(tf.to_float(tf.not_equal(self.pos, 0)), -1) 1046 | mask_neg = tf.expand_dims(tf.to_float(tf.not_equal(self.neg, 0)), -1) 1047 | 1048 | 1049 | self.test_logits = None 1050 | for i in range(1): 1051 | with tf.variable_scope("num_blocks_p_%d" % i): 1052 | 1053 | # Self-attentions, # 1 x 200 x h 1054 | # Self-attentions, # 1 x 101 x h 1055 | self.test_logits = multihead_attention2(queries=test_item_emb, 1056 | keys=seq_emb_test, 1057 | num_units=args.hidden_units, 1058 | num_heads=args.num_heads, 1059 | dropout_rate=args.dropout_rate, 1060 | is_training=self.is_training, 1061 | causality=False, 1062 | res = use_res, 1063 | scope="self_attention") 1064 | 1065 | # Feed forward , # 1 x 101 x h 1066 | #self.test_logits = feedforward(self.test_logits, num_units=[args.hidden_units, args.hidden_units], dropout_rate=args.dropout_rate, is_training=self.is_training) 1067 | 1068 | 1069 | 1070 | ##Without User 1071 | self.test_logits = tf.layers.dense(inputs=self.test_logits, units=1,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='logit') # 1 x 101 x 1 1072 | self.test_logits = tf.reshape(self.test_logits, [1, 101], name="Reshape_pos") # 101 x 1 1073 | 1074 | 1075 | 1076 | 1077 | ## prediction layer 1078 | ############################################################################ 1079 | self.pos_logits = None 1080 | self.neg_logits = None 1081 | for i in range(1): 1082 | with tf.variable_scope("num_blocks_p_%d" % i): 1083 | 1084 | # Self-attentions, # 128 x 200 x 1 1085 | self.pos_logits = multihead_attention2(queries=pos_emb, 1086 | keys=seq_emb_train, 1087 | num_units=args.hidden_units, 1088 | num_heads=args.num_heads, 1089 | dropout_rate=args.dropout_rate, 1090 | is_training=self.is_training, 1091 | causality=False, 1092 | reuse=True, 1093 | res = use_res, 1094 | scope="self_attention") 1095 | 1096 | # Feed forward , # 128 x 200 x 1 1097 | #self.pos_logits = feedforward(normalize(self.pos_logits), num_units=[args.hidden_units, args.hidden_units], dropout_rate=args.dropout_rate, is_training=self.is_training,reuse=True) 1098 | self.pos_logits *= mask_pos 1099 | 1100 | for i in range(1): 1101 | with tf.variable_scope("num_blocks_p_%d" % i): 1102 | 1103 | # Self-attentions 1104 | self.neg_logits = multihead_attention2(queries=neg_emb, 1105 | keys=seq_emb_train, 1106 | num_units=args.hidden_units, 1107 | num_heads=args.num_heads, 1108 | dropout_rate=args.dropout_rate, 1109 | is_training=self.is_training, 1110 | causality=False, 1111 | reuse=True, 1112 | res = use_res, 1113 | scope="self_attention") 1114 | 1115 | # Feed forward # 128 x 200 x 1 1116 | #self.neg_logits = feedforward(normalize(self.neg_logits), num_units=[args.hidden_units, args.hidden_units], dropout_rate=args.dropout_rate, is_training=self.is_training,reuse=True) 1117 | self.neg_logits *= mask_neg 1118 | 1119 | 1120 | 1121 | 1122 | self.pos_logits = tf.layers.dense(inputs=self.pos_logits, reuse=True, units=1,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='logit') 1123 | self.neg_logits = tf.layers.dense(inputs=self.neg_logits, reuse=True, units=1,activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.01), name='logit') 1124 | #tf.reduce_sum(pos_emb * seq_emb_train, -1) 1125 | 1126 | self.pos_logits = tf.reshape(self.pos_logits, [tf.shape(self.input_seq)[0] * args.maxlen], name="Reshape_pos") # 128 x 200 x 1=> (128 x 200) x 1 1127 | self.neg_logits = tf.reshape(self.neg_logits, [tf.shape(self.input_seq)[0] * args.maxlen], name="Reshape_neg") # 128 x 200 x 1=> (128 x 200) x 1 1128 | ########################################################################### 1129 | 1130 | 1131 | 1132 | 1133 | # ignore padding items (0) 1134 | istarget = tf.reshape(tf.to_float(tf.not_equal(pos, 0)), [tf.shape(self.input_seq)[0] * args.maxlen]) 1135 | self.loss = tf.reduce_sum( 1136 | - tf.log(tf.sigmoid(self.pos_logits) + 1e-24) * istarget - 1137 | tf.log(1 - tf.sigmoid(self.neg_logits) + 1e-24) * istarget 1138 | ) / tf.reduce_sum(istarget) 1139 | reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 1140 | self.loss += sum(reg_losses) 1141 | 1142 | tf.summary.scalar('loss', self.loss) 1143 | self.auc = tf.reduce_sum( 1144 | ((tf.sign(self.pos_logits - self.neg_logits) + 1) / 2) * istarget 1145 | ) / tf.reduce_sum(istarget) 1146 | 1147 | if reuse is None: 1148 | tf.summary.scalar('auc', self.auc) 1149 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 1150 | self.optimizer = tf.train.AdamOptimizer(learning_rate=args.lr, beta2=0.98) 1151 | self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) 1152 | else: 1153 | tf.summary.scalar('test_auc', self.auc) 1154 | 1155 | self.merged = tf.summary.merge_all() 1156 | 1157 | def predict(self, sess, u, seq, item_idx, seqcxt, testitemcxt): 1158 | return sess.run(self.test_logits, 1159 | {self.test_user: u, self.input_seq: seq, self.test_item: item_idx, self.is_training: False, self.seq_cxt:seqcxt, self.test_item_cxt:testitemcxt}) 1160 | 1161 | """#Main""" 1162 | 1163 | import os 1164 | import time 1165 | import argparse 1166 | import tensorflow as tf 1167 | from tqdm import tqdm 1168 | 1169 | def str2bool(s): 1170 | if s not in {'False', 'True'}: 1171 | raise ValueError('Not a valid boolean string') 1172 | return s == 'True' 1173 | 1174 | dataset_name = sys.argv[1] 1175 | args = None 1176 | 1177 | if dataset_name == 'Beauty': 1178 | class Args: 1179 | dataset = 'Beauty' 1180 | train_dir = 'default' 1181 | batch_size = 128 1182 | lr = 0.0001 1183 | maxlen = 75 1184 | hidden_units = 90 1185 | num_blocks = 3 1186 | num_epochs = 1801 1187 | num_heads = 1 # 1188 | dropout_rate = 0.5 #2 1189 | l2_emb = 0.0001 1190 | cxt_size = 6 1191 | use_res = True 1192 | args = Args() 1193 | 1194 | 1195 | if dataset_name == 'Fashion': 1196 | class Args: 1197 | dataset = 'Fashion' 1198 | train_dir = 'default' 1199 | batch_size = 100 1200 | lr = 0.00001 1201 | maxlen = 35 1202 | hidden_units = 390 1203 | num_blocks = 3 1204 | num_epochs = 801 1205 | num_heads = 3 # 1206 | dropout_rate = 0.3 #2 1207 | l2_emb = 0.0001 1208 | cxt_size = 6 1209 | use_res = False 1210 | args = Args() 1211 | 1212 | 1213 | if dataset_name == 'Men' : 1214 | class Args: 1215 | dataset = 'Men' 1216 | train_dir = 'default' 1217 | batch_size = 128 1218 | lr = 0.000006 1219 | maxlen = 35 1220 | hidden_units = 390 1221 | num_blocks = 3 1222 | num_epochs = 801 1223 | num_heads = 3 # 1224 | dropout_rate = 0.3 #2 1225 | l2_emb = 0.0001 1226 | cxt_size = 6 1227 | use_res = False 1228 | args = Args() 1229 | 1230 | 1231 | if dataset_name == 'Video_Games': 1232 | class Args: 1233 | dataset = 'Video_Games' 1234 | train_dir = 'default' 1235 | batch_size = 128 1236 | lr = 0.0001 1237 | maxlen = 50 1238 | hidden_units = 90 1239 | num_blocks = 3 1240 | num_epochs = 801 1241 | num_heads = 3 # 1242 | dropout_rate = 0.5 #2 1243 | l2_emb = 0.0 1244 | cxt_size = 6 1245 | use_res = True 1246 | args = Args() 1247 | 1248 | 1249 | 1250 | # Uncomment these sections to generate the context dictionaries for each dataset if the datasets were preprocessed from scratch. 1251 | 1252 | #if dataset_name == 'Beauty' : 1253 | # TrainDf, CXTDict = PreprocessData_Beauty("./Data/Beauty_cxt.txt", args.dataset, sep=" ") 1254 | # save_data(CXTDict,'./Data/CXTDictSasRec_Beauty.dat') 1255 | 1256 | #if dataset_name == 'Video_Games' : 1257 | # TrainDf, CXTDict = PreprocessData_Games("./Data/Video_Games_cxt.txt", args.dataset, sep=" ") 1258 | # save_data(CXTDict,'./Data/CXTDictSasRec_Games.dat') 1259 | 1260 | #if dataset_name == 'Men' : 1261 | # TrainDf, CXTDict = PreprocessData_Men("./Data/Men_cxt.txt", args.dataset, sep=" ") 1262 | # save_data(CXTDict,'./Data/CXTDictSasRec_Men.dat') 1263 | 1264 | #if dataset_name == 'Fashion' : 1265 | # TrainDf, CXTDict = PreprocessData_Fashion("./Data/Fashion_cxt.txt", args.dataset, sep=" ") 1266 | # save_data(CXTDict,'./Data/CXTDictSasRec_Fashion.dat') 1267 | 1268 | ## 1269 | 1270 | dataset = data_partition(args.dataset) 1271 | [user_train, user_valid, user_test, usernum, itemnum] = dataset 1272 | num_batch = len(user_train) / args.batch_size 1273 | print(usernum,'--',itemnum) 1274 | ItemFeatures = None 1275 | UserFeatures = None 1276 | 1277 | if args.dataset == 'Beauty' : 1278 | ItemFeatures = get_ItemDataBeauty(itemnum) 1279 | #UserFeatures = get_UserDataBeauty(usernum) 1280 | UserFeatures = [] 1281 | CXTDict = load_data('./Data/CXTDictSasRec_Beauty.dat') 1282 | 1283 | if args.dataset == 'Men' : 1284 | ItemFeatures = get_ItemDataMen(itemnum) 1285 | #UserFeatures = get_UserDataMen(usernum) 1286 | UserFeatures = [] 1287 | CXTDict = load_data('./Data/CXTDictSasRec_Men.dat') 1288 | 1289 | if args.dataset == 'Fashion' : 1290 | ItemFeatures = get_ItemDataFashion(itemnum) 1291 | #UserFeatures = get_UserDataFashion(usernum) 1292 | UserFeatures = [] 1293 | CXTDict = load_data('./Data/CXTDictSasRec_Fashion.dat') 1294 | 1295 | if args.dataset == 'Video_Games' : 1296 | ItemFeatures = get_ItemDataGames(itemnum) 1297 | #UserFeatures = get_UserDataFashion(usernum) 1298 | UserFeatures = [] 1299 | CXTDict = load_data('./Data/CXTDictSasRec_Games.dat') 1300 | 1301 | 1302 | print(ItemFeatures.shape) 1303 | #print(UserFeatures) 1304 | #print(abc) 1305 | cc = 0.0 1306 | for u in user_train: 1307 | cc += len(user_train[u]) 1308 | print ('average sequence length: %.2f' % (cc / len(user_train))) 1309 | 1310 | #config = tf.ConfigProto() 1311 | #config.gpu_options.allow_growth = True 1312 | #config.allow_soft_placement = True 1313 | #sess = tf.Session(config=config) 1314 | sess = tf.Session() 1315 | sampler = WarpSampler(user_train, usernum, itemnum, CXTDict, args.cxt_size, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3) 1316 | model = Model(usernum, itemnum, args, ItemFeatures, UserFeatures, args.cxt_size,use_res = args.use_res) 1317 | sess.run(tf.initialize_all_variables()) 1318 | T = 0.0 1319 | t0 = time.time() 1320 | 1321 | 1322 | for epoch in range(1, args.num_epochs + 1): 1323 | for step in tqdm(range(int(num_batch)), total=int(num_batch), ncols=70, leave=False, unit='b'): 1324 | #for step in range(int(num_batch)): 1325 | u, seq, pos, neg, seqcxt, poscxt, negcxt = sampler.next_batch() 1326 | 1327 | auc, loss, _ = sess.run([model.auc, model.loss, model.train_op], 1328 | {model.u: u, model.input_seq: seq, model.pos: pos, model.neg: neg, 1329 | model.is_training: True, model.seq_cxt:seqcxt, model.pos_cxt:poscxt, model.neg_cxt:negcxt}) 1330 | 1331 | if epoch % 20 == 0: #20 1332 | t1 = time.time() - t0 1333 | T += t1 1334 | print ('Evaluating') 1335 | t_test = evaluate(model, dataset, args, sess, CXTDict, args.cxt_size) 1336 | t_valid = evaluate_valid(model, dataset, args, sess, CXTDict, args.cxt_size) 1337 | #print(t_test) 1338 | print ('epoch:%d, time: %f(s), valid (NDCG@10: %.4f, HR@10: %.4f, AUC: %.4f), test (NDCG@10: %.4f, HR@10: %.4f, AUC: %.4f)' % (epoch, T, t_valid[0], t_valid[1], t_valid[2], t_test[0], t_test[1], t_test[2])) 1339 | t0 = time.time() 1340 | 1341 | 1342 | sampler.close() 1343 | print("Done") 1344 | 1345 | 1346 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/carca-context-and-attribute-aware-next-item/sequential-recommendation-on-amazon-men)](https://paperswithcode.com/sota/sequential-recommendation-on-amazon-men?p=carca-context-and-attribute-aware-next-item) 3 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/carca-context-and-attribute-aware-next-item/recommendation-systems-on-amazon-games)](https://paperswithcode.com/sota/recommendation-systems-on-amazon-games?p=carca-context-and-attribute-aware-next-item) 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/carca-context-and-attribute-aware-next-item/recommendation-systems-on-amazon-fashion)](https://paperswithcode.com/sota/recommendation-systems-on-amazon-fashion?p=carca-context-and-attribute-aware-next-item) 5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/carca-context-and-attribute-aware-next-item/recommendation-systems-on-amazon-beauty)](https://paperswithcode.com/sota/recommendation-systems-on-amazon-beauty?p=carca-context-and-attribute-aware-next-item) 6 | 7 | # CARCA 8 | 9 | This is our implementation for the CARCA paper accepted at RecSys 2022 10 | https://dl.acm.org/doi/10.1145/3523227.3546777: 11 | 12 | Rashed, Ahmed, et al. "Context and Attribute-Aware Sequential Recommendation via Cross-Attention" 13 | 14 | Please cite our paper if you use the code or datasets. 15 | 16 | ## Enviroment 17 | * pandas==1.0.3 18 | * tensorflow==1.14.0 19 | * matplotlib==3.1.3 20 | * numpy==1.18.1 21 | * six==1.14.0 22 | * scikit_learn==0.23.1 23 | 24 | ## Steps 25 | 1) Download preprocessed data from here "https://drive.google.com/drive/folders/1a_u52mIEUA-1WrwsNZZa-aoGJcMmVugs?usp=sharing" or the raw data from "https://jmcauley.ucsd.edu/data/amazon/" 26 | 27 | 2) Add the data files inside the "Data/" folder 28 | 29 | 3) To run the respective dataset, please use the below commands 30 | - python CARCA.py 'Video_Games' 31 | - python CARCA.py 'Men' 32 | - python CARCA.py 'Beauty' 33 | - python CARCA.py 'Fashion' 34 | 35 | 4) To preprocess raw Amazon reviews data, please use the DataProcessing.py and put the reviews and metadata in the RawData folder. Also, generate the context dictionaries using the commented section in the CARCA.py 36 | 37 | 5) To preprocess the Men and Fashion image features from scratch you will need to download all products images and pass them through a pre-trained resnet 50 model. Then match them using their ASIN code with the reviews data. 38 | 39 | 40 | 41 | ## Important Note 42 | If you are planning to apply CARCA on datasets without attributes or context, it is advisable to use rolling window protocol for training the model as the current training protocol (right shifted input) might not be stable in those scenarios. 43 | -------------------------------------------------------------------------------- /RawData/DataProcessing.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from collections import defaultdict 3 | from datetime import datetime 4 | import array 5 | import numpy as np 6 | import pickle 7 | import pandas as pd 8 | 9 | 10 | def parse(path): 11 | g = gzip.open(path, 'r') 12 | for l in g: 13 | yield eval(l) 14 | 15 | def load_data(filename): 16 | try: 17 | with open(filename, "rb") as f: 18 | x= pickle.load(f) 19 | except: 20 | x = [] 21 | return x 22 | 23 | def save_data(data,filename): 24 | with open(filename, "wb") as f: 25 | pickle.dump(data, f) 26 | 27 | countU = defaultdict(lambda: 0) 28 | countP = defaultdict(lambda: 0) 29 | line = 0 30 | 31 | dataset_name = 'Video_Games' 32 | f = open('reviews_' + dataset_name + '.txt', 'w') 33 | for l in parse('reviews_' + dataset_name + '.json.gz'): 34 | line += 1 35 | f.write(" ".join([l['reviewerID'], l['asin'], str(l['overall']), str(l['unixReviewTime'])]) + ' \n') 36 | asin = l['asin'] 37 | rev = l['reviewerID'] 38 | time = l['unixReviewTime'] 39 | countU[rev] += 1 40 | countP[asin] += 1 41 | f.close() 42 | 43 | 44 | usermap = dict() 45 | usernum = 0 46 | itemmap = dict() 47 | itemnum = 0 48 | User = dict() 49 | for l in parse('reviews_' + dataset_name + '.json.gz'): 50 | line += 1 51 | asin = l['asin'] 52 | rev = l['reviewerID'] 53 | time = l['unixReviewTime'] 54 | if countU[rev] < 5 or countP[asin] < 5: 55 | continue 56 | 57 | if rev in usermap: 58 | userid = usermap[rev] 59 | else: 60 | usernum += 1 61 | userid = usernum 62 | usermap[rev] = userid 63 | User[userid] = [] 64 | if asin in itemmap: 65 | itemid = itemmap[asin] 66 | else: 67 | itemnum += 1 68 | itemid = itemnum 69 | itemmap[asin] = itemid 70 | User[userid].append([time, itemid]) 71 | # sort reviews in User according to time 72 | 73 | for userid in User.keys(): 74 | User[userid].sort(key=lambda x: x[0]) 75 | 76 | print (usernum, itemnum) 77 | 78 | f = open(dataset_name + '_cxt.txt', 'w') 79 | for user in User.keys(): 80 | for i in User[user]: 81 | f.write('%d %d %s\n' % (user, i[1], i[0])) 82 | f.close() 83 | 84 | f = open(dataset_name + '.txt', 'w') 85 | for user in User.keys(): 86 | for i in User[user]: 87 | f.write('%d %d\n' % (user, i[1])) 88 | f.close() 89 | 90 | 91 | #### Reading and writing features 92 | itemfeat_dict = {} 93 | counter = 0 94 | for l in parse('meta_' + dataset_name + '.json.gz'): 95 | line += 1 96 | asin = l['asin'] 97 | 98 | title = "" 99 | if 'description' in l.keys(): 100 | title = l['description'] 101 | 102 | price = 0.0 103 | if 'price' in l.keys(): 104 | price = float(l['price']) 105 | 106 | brand = "" 107 | if 'brand' in l.keys(): 108 | brand = l['brand'] 109 | 110 | categories = l['categories'][0] 111 | #print(price , "-",brand , "-",categories , "-" ) 112 | if asin in itemmap.keys(): 113 | itemid = itemmap[asin] 114 | itemfeat_dict[itemid] = [title,price,brand,categories] 115 | counter = counter + 1 116 | 117 | features_list = list() 118 | templist = ["",0.0,"",[]] 119 | for item_id in range(1,itemnum+1): 120 | if item_id in itemfeat_dict.keys(): 121 | features_list.append(itemfeat_dict[item_id]) 122 | else: 123 | features_list.append(templist) 124 | 125 | 126 | df = pd.DataFrame(features_list, columns=['title','price','brand','categories']) 127 | 128 | del df['title'] 129 | df['categoriesstring'] = [' '.join(map(str, l)) for l in df['categories']] 130 | df=pd.concat([df,df['categoriesstring'].str.get_dummies(sep=' ').add_prefix('cat_').astype('int8')],axis=1) 131 | del df['categories'] 132 | del df['categoriesstring'] 133 | df=pd.get_dummies(df,dummy_na=True) 134 | 135 | print(df.head()) 136 | print(df.dtypes) 137 | save_data(df.values,dataset_name+'_feat.dat') 138 | 139 | 140 | ### 141 | --------------------------------------------------------------------------------