├── .gitignore
├── LICENSE
├── README.md
├── data_loader.py
├── data_preprocess.py
├── dataset
    └── download.sh
├── fig
    ├── fig0.png
    ├── fig_1.png
    ├── fig_2.png
    ├── fig_3.png
    └── fig_4.png
├── hyper_params.py
├── model.py
├── model_ckpt
    └── README.md
├── modules.py
├── tensorboard
    └── README.md
├── test.py
├── train.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .env
 83 | .venv
 84 | env/
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | 
101 | corpora
102 | logdir
103 | preprocessed


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Xu Jiajian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # An Implementation of NLR: [Neural Collaborative Reasoning](https://arxiv.org/abs/2005.08129)
 2 | ----------
 3 | 
 4 | This isn't an official code. I tried to implement the paper as I understood.
 5 | 
 6 | ## The basic idea of the paper 
 7 | 
 8 | The paper propose a Modularized Logical Neural Network architecture, which learns basic logical operations 
 9 | such as AND, OR, and NOT as neural modules based on logical regularizer, and learns logic variables 
10 | as vector embeddings. In this way, each logic expression can be equivalently organized as a 
11 | neural network, so that logical reasoning and prediction can be conducted in a continuous space. 
12 | 
13 | ![figure0](./fig/fig0.png)
14 | 
15 | ## Requirements
16 | 
17 | - python3
18 | - numpy
19 | - tqdm
20 | - tensorflow==1.11.0
21 | 
22 | > Maybe the version of tensorflow doesn't matter a lot, and I have not tested.
23 | 
24 | ## Training
25 | 
26 | - First. Run the command below to download Movielens-100K dataset.
27 | ```shell script
28 | cd dataset
29 | bash download.sh
30 | ```
31 | 
32 | - Second. Run the command below to preprocess data.
33 | ```shell script
34 | python data_preprocess.py
35 | ```
36 | 
37 | - Then. Run the following command to train model.
38 | ```shell script
39 | python train.py
40 | ```
41 | Also you can change the default hyper-parameters by 2 ways. 
42 | One is modifying the `hyper_params.py` file.
43 | And the other is like following.
44 | ```shell script
45 | python train.py --user_emb_dim 128 \
46 |                 --item_emb_dim 128 \
47 |                 --hidden1_dim 256 \
48 |                 --hidden2_dim 128 \
49 |                 --batch_size 64 \
50 |                 --lr 0.001 \
51 |                 --num_epochs 20
52 | ```
53 | 
54 | After that you can see logs are printed. Also the tensorboard.
55 | 
56 | - Tensorboard curve
57 | 
58 | ![lr noam](fig/fig_1.png)    ![logical loss](fig/fig_2.png)   
59 | ![l2 loss](fig/fig_3.png)    ![loss curve](fig/fig_4.png)
60 | 
61 | ## Test
62 | - Run
63 | ```shell script
64 | # python test.py --ckpt model_ckpt/test1/{your_model_name}
65 | # example
66 | python test.py --ckpt model_ckpt/test1/nlr-13500
67 | ```
68 | 
69 | Then, you can see the log below.
70 | ```shell script
71 | HR@10: 0.081654
72 | NDCG@10: 0.037584
73 | ```
74 | 
75 | ***Some hyper-parameter wasn't mentioned in paper, and now I'm tuning. 
76 | The results may be not satisfying.***
77 | 
78 | ## Note
79 | 
80 | - adding fast searching strategy, making inference faster.
81 | 
82 | - some unconspicuous details.
83 | 
84 | > Please offer your questions and suggestions by writing issues or sending email to 406493851@qq.com, so that I can improve it.
85 | 


--------------------------------------------------------------------------------
/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | UNKNOWN_TAG = '<unk>'
  5 | PADDING_TAG = '<pad>'
  6 | TAGS = [UNKNOWN_TAG, PADDING_TAG]
  7 | 
  8 | 
  9 | def load_train_datas(data_path, feed_back=True):
 10 |     with open(data_path, 'r') as f:
 11 |         raw_lines = f.readlines()
 12 | 
 13 |     users, hist_items, scores, labels = [], [], [], []
 14 |     all_items = []
 15 |     for line in raw_lines:
 16 |         user, history_seq, label = line.strip().split('\t', 2)
 17 |         history_seq = history_seq.split('|')
 18 | 
 19 |         users.append(user)
 20 |         labels.append(label)
 21 | 
 22 |         all_items.append(label)
 23 | 
 24 |         if feed_back:
 25 |             history_seq = [hist.split(',') for hist in history_seq]
 26 |             hist_ = [hist[0] for hist in history_seq]
 27 |             hist_items.append(hist_)
 28 |             all_items.extend(hist_)
 29 |             scores.append([int(hist[1]) for hist in history_seq])
 30 |         else:
 31 |             history_seq = [hist.split(',')[0] for hist in history_seq]
 32 |             hist_items.append(history_seq)
 33 |             all_items.extend(history_seq)
 34 |             scores.append([1] * len(history_seq))
 35 | 
 36 |     user_2_id = {user: i + len(TAGS) for i, user in enumerate(set(users))}
 37 |     item_2_id = {item: i + len(TAGS) for i, item in enumerate(set(all_items))}
 38 |     for i, tag in enumerate(TAGS):
 39 |         user_2_id[tag] = i
 40 |         item_2_id[tag] = i
 41 | 
 42 |     return users, hist_items, scores, labels, user_2_id, item_2_id
 43 | 
 44 | 
 45 | def load_test_datas(data_path, feed_back=True):
 46 |     with open(data_path, 'r') as f:
 47 |         raw_lines = f.readlines()
 48 | 
 49 |     users, hist_items, scores, labels = [], [], [], []
 50 |     for line in raw_lines:
 51 |         user, history_seq, label = line.strip().split('\t', 2)
 52 |         history_seq = history_seq.split('|')
 53 | 
 54 |         users.append(user)
 55 |         labels.append(label)
 56 | 
 57 |         if feed_back:
 58 |             history_seq = [hist.split(',') for hist in history_seq]
 59 |             hist_ = [hist[0] for hist in history_seq]
 60 |             hist_items.append(hist_)
 61 |             scores.append([int(hist[1]) for hist in history_seq])
 62 |         else:
 63 |             history_seq = [hist.split(',')[0] for hist in history_seq]
 64 |             hist_items.append(history_seq)
 65 |             scores.append([1] * len(history_seq))
 66 | 
 67 |     return users, hist_items, scores, labels
 68 | 
 69 | 
 70 | def batch_iterator(users, hist_items, feedback_scores, labels, user_2_id, item_2_id, history_len=5,
 71 |                    batch_size=128, shuffle=True):
 72 |     # string to index_id
 73 |     user_data = [[user_2_id.get(user, user_2_id[UNKNOWN_TAG])] for user in users]
 74 |     label_data = [[item_2_id.get(label, item_2_id[UNKNOWN_TAG])] for label in labels]
 75 |     hist_data = []
 76 |     for hist in hist_items:
 77 |         hist_ = [item_2_id.get(h, item_2_id[UNKNOWN_TAG]) for h in hist]
 78 |         hist_data.append(hist_)
 79 | 
 80 |     # padding
 81 |     hist_data = tf.keras.preprocessing.sequence.pad_sequences(hist_data, maxlen=history_len,
 82 |                                                               value=item_2_id[PADDING_TAG])
 83 |     feedback_data = tf.keras.preprocessing.sequence.pad_sequences(feedback_scores,
 84 |                                                                   maxlen=history_len, value=0)
 85 |     user_data = np.array(user_data)
 86 |     label_data = np.array(label_data)
 87 | 
 88 |     # shuffle datas
 89 |     if shuffle:
 90 |         indices = np.random.permutation(range(len(labels)))
 91 |         hist_data = hist_data[indices]
 92 |         feedback_data = feedback_data[indices]
 93 |         user_data = user_data[indices]
 94 |         label_data = label_data[indices]
 95 | 
 96 |     # negative sampling
 97 |     negative_samples = []
 98 |     for label in label_data:
 99 |         neg_sample = np.random.randint(0, len(item_2_id), size=[1])
100 |         while label == neg_sample:
101 |             neg_sample = np.random.randint(0, len(item_2_id), size=[1])
102 |         negative_samples.append(neg_sample)
103 |     negative_samples = np.array(negative_samples)
104 | 
105 |     num_batch = int((len(labels) - 1) / batch_size + 1)
106 |     for i in range(num_batch):
107 |         start = i * batch_size
108 |         end = min((i + 1) * batch_size, len(labels))
109 | 
110 |         # shuffle history items
111 |         hist_rand_ind = np.random.permutation(range(history_len))
112 |         hist_data_batch = hist_data[start: end][:, hist_rand_ind]
113 |         feedback_data_batch = feedback_data[start: end][:, hist_rand_ind]
114 | 
115 |         yield user_data[start: end], hist_data_batch, feedback_data_batch, \
116 |               label_data[start: end], negative_samples[start: end]
117 | 
118 | 
119 | def test_batch(user, hist_items, feedback_score, user_2_id, item_2_id):
120 |     item_num = len(item_2_id)
121 |     user_data = np.array([[user_2_id.get(user, user_2_id[UNKNOWN_TAG])]] * item_num)
122 |     items_data = np.array(
123 |         [[item_2_id.get(item, item_2_id[UNKNOWN_TAG]) for item in hist_items]] * item_num)
124 |     feedback_data = np.array([feedback_score] * item_num)
125 |     return user_data, items_data, feedback_data
126 | 


--------------------------------------------------------------------------------
/data_preprocess.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | current_dir = os.path.split(os.path.realpath(__file__))[0]
 5 | 
 6 | 
 7 | def preprocess_movieslens(history_items_num=5):
 8 |     r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
 9 |     ratings = pd.read_csv('dataset/ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1')
10 |     ratings = ratings.sort_values('unix_timestamp')
11 |     print(ratings)
12 | 
13 |     user_items_dict = {}
14 |     for rating in ratings.values:
15 |         user_id, movie_id, rating, timestamp = rating[0], rating[1], rating[2], rating[3]
16 |         if user_id not in user_items_dict:
17 |             user_items_dict[user_id] = []
18 |         feedback = -1 if rating <= 3 else 1
19 |         user_items_dict[user_id].append((movie_id, feedback))
20 | 
21 |     user_samples = {}
22 |     for user, histories in user_items_dict.items():
23 |         samples = []
24 |         if len(histories) > history_items_num:
25 |             for i in range(history_items_num, len(histories)):
26 |                 sample = ['{},{}'.format(hist[0], hist[1]) for hist in
27 |                           histories[i - history_items_num: i]]
28 |                 label = histories[i][0]
29 |                 samples.append('{}\t{}\t{}'.format(user, '|'.join(sample), label))
30 |         elif len(histories) > 1:
31 |             sample = ['{},{}'.format(hist[0], hist[1]) for hist in histories[:-1]]
32 |             label = histories[-1][0]
33 |             samples.append('{}\t{}\t{}'.format(user, '|'.join(sample), label))
34 |         else:
35 |             samples.append('{}\t{}\t{}'.format(user, '<unk>,1', histories[0][0]))
36 |         user_samples[user] = samples
37 | 
38 |     train_datas, eval_datas, test_datas = [], [], []
39 |     for user, samples in user_samples.items():
40 |         if len(samples) > 2:
41 |             train_datas.extend(samples[:-2])
42 |             eval_datas.append(samples[-2])
43 |             test_datas.append(samples[-1])
44 |         elif len(samples) == 2:
45 |             train_datas.append(samples[0])
46 |             test_datas.append(samples[1])
47 |         else:
48 |             test_datas.append(samples[0])
49 | 
50 |     with open('dataset/ml-100k/train.data', 'w') as f:
51 |         _ = [print(sample, file=f) for sample in train_datas]
52 |     with open('dataset/ml-100k/eval.data', 'w') as f:
53 |         _ = [print(sample, file=f) for sample in eval_datas]
54 |     with open('dataset/ml-100k/test.data', 'w') as f:
55 |         _ = [print(sample, file=f) for sample in test_datas]
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     preprocess_movieslens()
60 | 


--------------------------------------------------------------------------------
/dataset/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DATA_ROOT=./
 4 | 
 5 | function download () {
 6 |   fileurl=${1}
 7 |   filename=${fileurl##*/}
 8 |   if [ ! -f ${filename} ]; then
 9 |     echo ">>> Download '${filename}' from '${fileurl}'."
10 |     wget ${fileurl}
11 |   else
12 |     echo "*** File '${filename}' exists. Skip."
13 |   fi
14 | }
15 | 
16 | download http://files.grouplens.org/datasets/movielens/ml-100k.zip
17 | 
18 | unzip ml-100k.zip
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/fig/fig0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Scagin/NeuralLogicReasoning/2128352ebc98bea60f640e7420d80f276b07bb94/fig/fig0.png


--------------------------------------------------------------------------------
/fig/fig_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Scagin/NeuralLogicReasoning/2128352ebc98bea60f640e7420d80f276b07bb94/fig/fig_1.png


--------------------------------------------------------------------------------
/fig/fig_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Scagin/NeuralLogicReasoning/2128352ebc98bea60f640e7420d80f276b07bb94/fig/fig_2.png


--------------------------------------------------------------------------------
/fig/fig_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Scagin/NeuralLogicReasoning/2128352ebc98bea60f640e7420d80f276b07bb94/fig/fig_3.png


--------------------------------------------------------------------------------
/fig/fig_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Scagin/NeuralLogicReasoning/2128352ebc98bea60f640e7420d80f276b07bb94/fig/fig_4.png


--------------------------------------------------------------------------------
/hyper_params.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | class HyperParams:
 5 |     parser = argparse.ArgumentParser()
 6 | 
 7 |     # train
 8 |     ## files
 9 |     parser.add_argument('--train_datas', default='dataset/ml-100k/train.data',
10 |                         help='training data.')
11 |     parser.add_argument('--eval_datas', default='dataset/ml-100k/eval.data',
12 |                         help='evaluating data.')
13 |     parser.add_argument('--is_with_feedback', default=True,
14 |                         help='the history with explicit feedback or not.')
15 |     parser.add_argument('--checkpoint_dir', default='model_ckpt/test1',
16 |                         help='model save directory.')
17 |     parser.add_argument('--ckpt_name', default='nlr', help='the save model name.')
18 |     parser.add_argument('--tensorboard_dir', default='tensorboard/test1',
19 |                         help='tensorboard log directory.')
20 | 
21 |     # training scheme
22 |     parser.add_argument('--batch_size', default=128, type=int)
23 |     parser.add_argument('--eval_batch_size', default=256, type=int)
24 |     parser.add_argument('--l2_weight', default=1e-4, type=int, help='the weight of L2 loss.')
25 |     parser.add_argument('--logical_weight', default=0.1, type=int,
26 |                         help='the weight of logical regularizer loss.')
27 |     parser.add_argument('--history_len', default=5, type=int, help='length of historical items.')
28 |     parser.add_argument('--warmup_steps', default=10000, type=int,
29 |                         help='warm up steps for adam optimizer.')
30 |     parser.add_argument('--lr', default=1e-3, type=float, help='learning rate')
31 |     parser.add_argument('--num_epochs', default=200, type=int)
32 |     parser.add_argument('--eval_per_steps', default=1000, type=int, help='evaluate per steps.')
33 | 
34 |     # model
35 |     parser.add_argument('--user_emb_dim', default=64, type=int,
36 |                         help='embedding dimension of user.')
37 |     parser.add_argument('--item_emb_dim', default=64, type=int,
38 |                         help='embedding dimension of item.')
39 |     parser.add_argument('--hidden1_dim', default=128, type=int,
40 |                         help='fisrt hidden layer in logical modules dimension.')
41 |     parser.add_argument('--hidden2_dim', default=64, type=int,
42 |                         help='second hidden layer in logical modules dimension.')
43 |     parser.add_argument('--interact_type', default='concat', type=str,
44 |                         help='interact type between user embedding and item embedding.')
45 | 
46 |     # test
47 |     parser.add_argument('--test_datas', default='dataset/ml-100k/test.data', help='test data')
48 |     parser.add_argument('--ckpt', default='model_ckpt/my_model/nlr-437000', help='checkpoint file path')
49 |     parser.add_argument('--topk', default=5, help='how many items return from sorted result list.')
50 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from modules import interact_encoder, not_modules, cosine_probability, noam_scheme, OrMoudleCell
  3 | 
  4 | 
  5 | class NLR_model:
  6 | 
  7 |     def __init__(self, user_embedding_dim=256, item_embedding_dim=256,
  8 |                  hidden1_dim=512, hidden2_dim=256, num_users=100,
  9 |                  num_items=1000, learning_rate=1e-3, warmup_steps=4000.,
 10 |                  l2_weight=1e-4, logical_weight=0.1, interact_type='sum'):
 11 |         # hyper param
 12 |         self.user_embedding_dim = user_embedding_dim
 13 |         self.item_embedding_dim = item_embedding_dim
 14 |         self.hidden1_dim = hidden1_dim
 15 |         self.hidden2_dim = hidden2_dim
 16 |         self.num_users = num_users
 17 |         self.num_items = num_items
 18 |         self.learning_rate = learning_rate
 19 |         self.l2_weight = l2_weight
 20 |         self.logical_weight = logical_weight
 21 |         self.warmup_steps = warmup_steps
 22 |         self.interact_type = interact_type
 23 |         self.activation = tf.nn.relu
 24 | 
 25 |         # input
 26 |         with tf.name_scope('input_tensor'):
 27 |             self.input_user = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='input_user')
 28 |             self.input_items = tf.placeholder(dtype=tf.int32, shape=[None, None],
 29 |                                               name='input_items')
 30 |             self.input_feedback_score = tf.placeholder(dtype=tf.float32, shape=[None, None],
 31 |                                                        name='input_feedback_score')
 32 |             self.input_target = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='input_target')
 33 |             self.input_negative_sample = tf.placeholder(dtype=tf.int32, shape=[None, 1],
 34 |                                                         name='input_negative_sample')
 35 | 
 36 |         # model structure
 37 |         self.build()
 38 | 
 39 |     def build(self):
 40 |         # truth vector
 41 |         self.T = tf.get_variable('truth_vector', shape=[1, 1, self.item_embedding_dim],
 42 |                                  dtype=tf.float32, trainable=False)
 43 |         # embedding matrix
 44 |         self.user_embedding_layer = tf.get_variable(name='user_embedding_layer',
 45 |                                                     shape=[self.num_users, self.user_embedding_dim],
 46 |                                                     dtype=tf.float32)
 47 |         self.item_embedding_layer = tf.get_variable(name='item_embedding_layer',
 48 |                                                     shape=[self.num_items, self.item_embedding_dim],
 49 |                                                     dtype=tf.float32)
 50 |         # embedding
 51 |         self.user_emb_vec = tf.nn.embedding_lookup(self.user_embedding_layer, self.input_user)
 52 |         self.item_emb_vec = tf.nn.embedding_lookup(self.item_embedding_layer, self.input_items)
 53 |         self.target_emb_vec = tf.nn.embedding_lookup(self.item_embedding_layer,
 54 |                                                      self.input_target)
 55 |         self.negative_sample_emb_vec = tf.nn.embedding_lookup(self.item_embedding_layer,
 56 |                                                               self.input_negative_sample)
 57 | 
 58 |         # interaction
 59 |         self.encoder = interact_encoder(self.user_emb_vec, self.item_emb_vec, self.hidden1_dim,
 60 |                                         self.hidden2_dim, activation=self.activation,
 61 |                                         interact_type=self.interact_type)
 62 |         self.encoder_pos = interact_encoder(self.user_emb_vec, self.target_emb_vec,
 63 |                                             self.hidden1_dim, self.hidden2_dim,
 64 |                                             activation=self.activation,
 65 |                                             interact_type=self.interact_type)
 66 |         self.encoder_neg = interact_encoder(self.user_emb_vec, self.negative_sample_emb_vec,
 67 |                                             self.hidden1_dim, self.hidden2_dim,
 68 |                                             activation=self.activation,
 69 |                                             interact_type=self.interact_type)
 70 | 
 71 |         # NOT(*) operation
 72 |         feedback_to_oper = self.input_feedback_score[:, :, tf.newaxis] * tf.ones_like(self.encoder)
 73 |         applicable = tf.equal(feedback_to_oper, 1)
 74 |         encoder_to_oper = tf.where(applicable, self.encoder, tf.zeros_like(self.encoder))
 75 |         not_encoder = not_modules(encoder_to_oper, self.hidden1_dim, self.hidden2_dim,
 76 |                                   activation=self.activation)
 77 |         self.not_encoder = tf.where(applicable, not_encoder, self.encoder)
 78 | 
 79 |         # OR(*) operation
 80 |         self.or_cell = OrMoudleCell(self.hidden1_dim, self.hidden2_dim)
 81 |         self.or_encoder, _ = tf.nn.dynamic_rnn(self.or_cell, self.not_encoder[:, 1:, :],
 82 |                                                initial_state=self.not_encoder[:, 0, :],
 83 |                                                dtype=tf.float32)
 84 |         self.or_encoder_last = self.or_encoder[:, -1, :]
 85 | 
 86 |         self.or_encoder_pos, _ = tf.nn.dynamic_rnn(self.or_cell, self.encoder_pos,
 87 |                                                    initial_state=self.or_encoder_last,
 88 |                                                    dtype=tf.float32)
 89 |         self.or_encoder_neg, _ = tf.nn.dynamic_rnn(self.or_cell, self.encoder_neg,
 90 |                                                    initial_state=self.or_encoder_last,
 91 |                                                    dtype=tf.float32)
 92 | 
 93 |         # cosine similarity
 94 |         self.probability_pos = cosine_probability(self.or_encoder_pos, self.T)
 95 |         self.probability_neg = cosine_probability(self.or_encoder_neg, self.T)
 96 | 
 97 |         # pair-wise loss
 98 |         self.traget_loss = -tf.reduce_sum(
 99 |             tf.log_sigmoid(self.probability_pos - self.probability_neg))
100 | 
101 |         # L2 loss
102 |         trainable_variables = tf.trainable_variables()
103 |         self.l2_loss = tf.reduce_sum([tf.nn.l2_loss(var) for var in trainable_variables])
104 | 
105 |         # model loss
106 |         self.lnn_loss = self.traget_loss + self.l2_weight * self.l2_loss
107 | 
108 |         # logical regularizer loss
109 |         event_space_vectors = [self.encoder, self.encoder_pos, self.encoder_neg,
110 |                                self.not_encoder,
111 |                                self.or_encoder, self.or_encoder_pos, self.or_encoder_neg]
112 |         event_space_vectors = tf.concat(event_space_vectors, axis=1)
113 |         self.logical_loss = self.logical_regularizer(event_space_vectors)
114 | 
115 |         # sum
116 |         self.loss = self.lnn_loss + self.logical_weight * self.logical_loss
117 | 
118 |         # Adam
119 |         global_step = tf.train.get_or_create_global_step()
120 |         lr = noam_scheme(self.learning_rate, global_step, self.warmup_steps)
121 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
122 |         # train
123 |         self.train_op = self.optimizer.minimize(self.loss, global_step=global_step)
124 | 
125 |         # tensorboard scalar
126 |         tf.summary.scalar('loss', self.loss)
127 |         tf.summary.scalar('traget_loss', self.traget_loss)
128 |         tf.summary.scalar('l2_loss', self.l2_loss)
129 |         tf.summary.scalar('logical_loss', self.logical_loss)
130 |         tf.summary.scalar('lr', lr)
131 |         tf.summary.scalar('global_step', global_step)
132 | 
133 |         self.summaries = tf.summary.merge_all()
134 | 
135 |     def logical_regularizer(self, event_space):
136 |         '''
137 |         Build logical regularizers.
138 |         The regularizers make NOT([T])=[F], and so on.
139 |         '''
140 |         F_vec = not_modules(self.T, self.hidden1_dim, self.hidden2_dim, activation=self.activation)
141 |         not_event = not_modules(event_space, self.hidden1_dim, self.hidden2_dim,
142 |                                 activation=self.activation)
143 |         double_not_event = not_modules(not_event, self.hidden1_dim, self.hidden2_dim,
144 |                                        activation=self.activation)
145 |         reg_1 = tf.reduce_mean(1 + cosine_probability(not_event, event_space))
146 |         reg_2 = tf.reduce_mean(1 - cosine_probability(double_not_event, event_space))
147 | 
148 |         event_or_F = self.or_cell(event_space, F_vec)
149 |         reg_7 = tf.reduce_mean(1 - cosine_probability(event_or_F, event_space))
150 | 
151 |         event_or_T = self.or_cell(event_space, self.T)
152 |         reg_8 = tf.reduce_mean(1 - cosine_probability(event_or_T, self.T))
153 | 
154 |         event_or_event = self.or_cell(event_space, event_space)
155 |         reg_9 = tf.reduce_mean(1 - cosine_probability(event_or_event, event_or_event))
156 | 
157 |         event_or_not_event = self.or_cell(event_space, not_event)
158 |         reg_10 = tf.reduce_mean(1 - cosine_probability(event_or_not_event, self.T))
159 | 
160 |         return reg_1 + reg_2 + reg_7 + reg_8 + reg_9 + reg_10
161 | 
162 |     def get_hyper_parameter(self):
163 |         '''
164 |         Return all hyper-parameters.
165 |         '''
166 |         params = {
167 |             'user_embedding_dim': self.user_embedding_dim,
168 |             'item_embedding_dim': self.item_embedding_dim,
169 |             'hidden1_dim': self.hidden1_dim,
170 |             'hidden2_dim': self.hidden2_dim,
171 |             'num_users': self.num_users,
172 |             'num_items': self.num_items,
173 |             'learning_rate': self.learning_rate,
174 |             'l2_weight': self.l2_weight,
175 |             'logical_weight': self.logical_weight,
176 |             'warmup_steps': self.warmup_steps,
177 |             'interact_type': self.interact_type
178 |         }
179 |         return params
180 | 


--------------------------------------------------------------------------------
/model_ckpt/README.md:
--------------------------------------------------------------------------------
1 | ***model checkpoints dir***


--------------------------------------------------------------------------------
/modules.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | INTERACT_FUNC_SET = {'sum', 'sub', 'mean', 'concat'}
  4 | 
  5 | 
  6 | def interact_encoder(user_vec, item_vec, hidden1_dim, hidden2_dim,
  7 |                      interact_type='sum', activation=tf.nn.relu):
  8 |     '''
  9 |     Calculate the interaction between the user and the item.
 10 |     '''
 11 | 
 12 |     interact_type = interact_type.lower()
 13 |     assert interact_type in INTERACT_FUNC_SET
 14 | 
 15 |     # interaction
 16 |     _user = tf.tile(user_vec, [1, tf.shape(item_vec)[1], 1])
 17 |     if interact_type == 'sum':
 18 |         merge_vec = _user + item_vec
 19 |     elif interact_type == 'sub':
 20 |         merge_vec = _user - item_vec
 21 |     elif interact_type == 'mean':
 22 |         merge_vec = (_user + item_vec) / 2
 23 |     elif interact_type == 'concat':
 24 |         merge_vec = tf.concat([_user, item_vec], axis=-1)
 25 | 
 26 |     encoder = tf.layers.dense(merge_vec, hidden1_dim, activation=activation, name='encoder_hidden1',
 27 |                               reuse=tf.AUTO_REUSE)
 28 |     # encoder = tf.layers.batch_normalization(encoder, name='encoder_bn1', reuse=tf.AUTO_REUSE)
 29 |     encoder = tf.layers.dense(encoder, hidden2_dim, name='encoder_hidden2', reuse=tf.AUTO_REUSE)
 30 |     # encoder = tf.layers.batch_normalization(encoder, name='encoder_bn2', reuse=tf.AUTO_REUSE)
 31 |     return encoder
 32 | 
 33 | 
 34 | def not_modules(input, hidden1_dim, hidden2_dim, activation=tf.nn.relu):
 35 |     '''
 36 |     An module to calculate the logical operation NOT(*).
 37 |     '''
 38 |     not_encoder = tf.layers.dense(input, hidden1_dim, activation=activation, name='not_hidden1',
 39 |                                   reuse=tf.AUTO_REUSE)
 40 |     # not_encoder = tf.layers.batch_normalization(not_encoder, name='not_bn1', reuse=tf.AUTO_REUSE)
 41 |     not_encoder = tf.layers.dense(not_encoder, hidden2_dim, name='not_hidden2', reuse=tf.AUTO_REUSE)
 42 |     # not_encoder = tf.layers.batch_normalization(not_encoder, name='not_bn2', reuse=tf.AUTO_REUSE)
 43 |     return not_encoder
 44 | 
 45 | 
 46 | def cosine_probability(vec_a, vec_b):
 47 |     '''
 48 |     Calculate the cosine similarity between {vec_a} and {vec_b}.
 49 |     '''
 50 |     a_norm = tf.sqrt(tf.reduce_sum(tf.square(vec_a), axis=-1))
 51 |     b_norm = tf.sqrt(tf.reduce_sum(tf.square(vec_b), axis=-1))
 52 |     _prod = tf.multiply(vec_a, vec_b)
 53 |     inner_prod = tf.reduce_sum(_prod, axis=-1)
 54 |     prob = inner_prod / (a_norm * b_norm)
 55 |     return prob
 56 | 
 57 | 
 58 | def noam_scheme(init_lr, global_step, warmup_steps=4000.):
 59 |     '''
 60 |     Noam scheme learning rate decay.
 61 |     '''
 62 |     step = tf.cast(global_step + 1, dtype=tf.float32)
 63 |     return init_lr * warmup_steps ** 0.5 * tf.minimum(step * warmup_steps ** -1.5, step ** -0.5)
 64 | 
 65 | 
 66 | class OrMoudleCell(tf.nn.rnn_cell.RNNCell):
 67 |     '''
 68 |     An module to calculate the logical operation OR(*).
 69 |     This is a rnn cell, each time can just operate between 2 vector.
 70 | 
 71 |     `input` is a matrix without step 0,
 72 |     and `state` is initialized to the vector at step 0.
 73 |     '''
 74 | 
 75 |     def __init__(self, num_units_1, num_units_2, interact_type="sum", activation=None,
 76 |                  reuse=tf.AUTO_REUSE, name=None):
 77 |         super(OrMoudleCell, self).__init__(_reuse=reuse, name=name)
 78 |         self._num_units_1 = num_units_1
 79 |         self._num_units_2 = num_units_2
 80 |         self._activation = activation or tf.nn.relu
 81 |         interact_type = interact_type.lower()
 82 |         assert interact_type in INTERACT_FUNC_SET
 83 |         self.interact_type = interact_type
 84 | 
 85 |     @property
 86 |     def state_size(self):
 87 |         return self._num_units_2
 88 | 
 89 |     @property
 90 |     def output_size(self):
 91 |         return self._num_units_2
 92 | 
 93 |     def build(self, inputs_shape):
 94 |         self.layer_1 = tf.layers.Dense(self._num_units_1, activation=self._activation,
 95 |                                        name="or_hidden1")
 96 |         self.layer_2 = tf.layers.Dense(self._num_units_2, name="or_hidden2")
 97 |         self.built = True
 98 | 
 99 |     def call(self, inputs, state):
100 |         if self.interact_type == 'sum':
101 |             hidden = inputs + state
102 |         elif self.interact_type == 'sub':
103 |             hidden = inputs - state
104 |         elif self.interact_type == 'mean':
105 |             hidden = (inputs + state) / 2
106 |         elif self.interact_type == 'concat':
107 |             hidden = tf.concat([inputs, state], axis=-1)
108 |         else:
109 |             hidden = inputs + state
110 |         hidden = self.layer_1(hidden)
111 |         output = self.layer_2(hidden)
112 |         return output, output
113 | 


--------------------------------------------------------------------------------
/tensorboard/README.md:
--------------------------------------------------------------------------------
1 | ***Tensorboard log dir.***


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import tqdm
 2 | import random
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | import utils
 7 | import data_loader
 8 | from model import NLR_model
 9 | from hyper_params import HyperParams
10 | 
11 | 
12 | def test():
13 |     hparams = HyperParams()
14 | 
15 |     parser = hparams.parser
16 |     hp = parser.parse_args()
17 | 
18 |     test_users, test_hist_items, test_scores, test_labels = data_loader.load_test_datas(
19 |         hp.test_datas, hp.is_with_feedback)
20 | 
21 |     user_2_id, item_2_id, train_hypers = utils.load_training_info(hp.checkpoint_dir)
22 |     id_2_item = {id: item for item, id in item_2_id.items()}
23 | 
24 |     model = NLR_model(user_embedding_dim=train_hypers.get('user_embedding_dim'),
25 |                       item_embedding_dim=train_hypers.get('item_embedding_dim'),
26 |                       hidden1_dim=train_hypers.get('hidden1_dim'),
27 |                       hidden2_dim=train_hypers.get('hidden2_dim'),
28 |                       num_users=train_hypers.get('num_users'),
29 |                       num_items=train_hypers.get('num_items'),
30 |                       interact_type=train_hypers.get('interact_type'))
31 | 
32 |     saver = tf.train.Saver()
33 |     with tf.Session() as sess:
34 |         # restore
35 |         saver.restore(sess, hp.ckpt)
36 | 
37 |         items_embedding_matrix = sess.run(model.item_embedding_layer)
38 |         items_embedding_matrix = items_embedding_matrix[:, np.newaxis, :]
39 | 
40 |         topk = hp.topk
41 |         count = 0
42 |         hr_total = 0
43 |         ndcg_total = 0
44 |         for user, hist, feedback, label in tqdm.tqdm(zip(test_users, test_hist_items, test_scores,
45 |                                                          test_labels)):
46 |             # if random.random() > 0.02:
47 |             #     continue
48 |             user_data, items_data, feedback_data = data_loader.test_batch(user, hist, feedback,
49 |                                                                           user_2_id, item_2_id)
50 | 
51 |             prob_pos = sess.run(model.probability_pos,
52 |                                 feed_dict={model.input_user: user_data,
53 |                                            model.input_items: items_data,
54 |                                            model.input_feedback_score: feedback_data,
55 |                                            model.target_emb_vec: items_embedding_matrix})
56 | 
57 |             prob_pos = np.squeeze(prob_pos, axis=1)
58 |             pred_item_ids = np.argsort(prob_pos, axis=0)[::-1][:topk]
59 |             label_ids = [item_2_id.get(label, item_2_id[data_loader.UNKNOWN_TAG])]
60 | 
61 |             ndcg_score = utils.calNDCG(pred_item_ids, label_ids)
62 |             ndcg_total += ndcg_score
63 | 
64 |             hr_score = len(set(pred_item_ids).intersection(set(label_ids))) / len(label_ids)
65 |             hr_total += hr_score
66 | 
67 |             count += 1
68 | 
69 |         print('HR@{}: {:.6f}'.format(topk, hr_total / count))
70 |         print('NDCG@{}: {:.6f}'.format(topk, ndcg_total / count))
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     test()
75 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import logging
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | import utils
  8 | import data_loader
  9 | from model import NLR_model
 10 | from hyper_params import HyperParams
 11 | 
 12 | 
 13 | def evaluate(sess, model, users, hist_items, scores, labels,
 14 |              user_2_id, item_2_id, test_ratio=0.5, topk=5):
 15 |     count = 0
 16 |     hr_total = 0
 17 |     ndcg_total = 0
 18 | 
 19 |     items_embedding_matrix = sess.run(model.item_embedding_layer)
 20 |     items_embedding_matrix = items_embedding_matrix[:, np.newaxis, :]
 21 |     for user, hist, feedback, label in zip(users, hist_items, scores, labels):
 22 |         if random.random() > test_ratio:
 23 |             continue
 24 |         user_data, items_data, feedback_data = data_loader.test_batch(user, hist, feedback,
 25 |                                                                       user_2_id, item_2_id)
 26 | 
 27 |         prob_pos = sess.run(model.probability_pos,
 28 |                             feed_dict={model.input_user: user_data,
 29 |                                        model.input_items: items_data,
 30 |                                        model.input_feedback_score: feedback_data,
 31 |                                        model.target_emb_vec: items_embedding_matrix})
 32 | 
 33 |         prob_pos = np.squeeze(prob_pos, axis=1)
 34 |         pred_item_ids = np.argsort(prob_pos, axis=0)[::-1][:topk]
 35 |         label_ids = [item_2_id.get(label, item_2_id[data_loader.UNKNOWN_TAG])]
 36 | 
 37 |         ndcg_score = utils.calNDCG(pred_item_ids, label_ids)
 38 |         ndcg_total += ndcg_score
 39 | 
 40 |         hr_score = len(set(pred_item_ids).intersection(set(label_ids))) / len(label_ids)
 41 |         hr_total += hr_score
 42 | 
 43 |         count += 1
 44 | 
 45 |     return hr_total / count, ndcg_total / count
 46 | 
 47 | 
 48 | def train():
 49 |     hparams = HyperParams()
 50 | 
 51 |     parser = hparams.parser
 52 |     hp = parser.parse_args()
 53 | 
 54 |     # read datas
 55 |     train_users, train_hist_items, train_scores, \
 56 |     train_labels, user_2_id, item_2_id = data_loader.load_train_datas(hp.train_datas,
 57 |                                                                       hp.is_with_feedback)
 58 | 
 59 |     eval_users, eval_hist_items, eval_scores, eval_labels, _, _ = data_loader.load_train_datas(
 60 |         hp.eval_datas, hp.is_with_feedback)
 61 | 
 62 |     # build model
 63 |     model = NLR_model(user_embedding_dim=hp.user_emb_dim, item_embedding_dim=hp.item_emb_dim,
 64 |                       hidden1_dim=hp.hidden1_dim, hidden2_dim=hp.hidden2_dim,
 65 |                       num_users=len(user_2_id), num_items=len(item_2_id), learning_rate=hp.lr,
 66 |                       l2_weight=hp.l2_weight, warmup_steps=hp.warmup_steps,
 67 |                       interact_type=hp.interact_type)
 68 | 
 69 |     saver = tf.train.Saver(max_to_keep=5)
 70 |     with tf.Session() as sess:
 71 |         # initialize / restore
 72 |         ckpt = tf.train.latest_checkpoint(hp.checkpoint_dir)
 73 |         if ckpt is None:
 74 |             logging.info('Initializing from scratch')
 75 |             sess.run(tf.global_variables_initializer())
 76 |             if not os.path.exists(hp.checkpoint_dir):
 77 |                 os.mkdir(hp.checkpoint_dir)
 78 |             utils.save_training_info(user_2_id, item_2_id, model.get_hyper_parameter(),
 79 |                                      hp.checkpoint_dir)
 80 |         else:
 81 |             saver.restore(sess, ckpt)
 82 | 
 83 |         summary_writer = tf.summary.FileWriter(hp.tensorboard_dir, sess.graph)
 84 | 
 85 |         stop_flag = False
 86 |         best_hr = 0
 87 |         best_ndcg = 0
 88 |         last_update_step = 0
 89 |         max_nonupdate_steps = 50000
 90 |         num_batch = int((len(train_labels) - 1) / hp.batch_size + 1)
 91 |         for epoch in range(hp.num_epochs):
 92 |             batch_iter = data_loader.batch_iterator(train_users, train_hist_items, train_scores,
 93 |                                                     train_labels, user_2_id, item_2_id,
 94 |                                                     history_len=hp.history_len,
 95 |                                                     batch_size=hp.batch_size)
 96 |             for i, batch in enumerate(batch_iter):
 97 |                 user_batch, items_batch, feedback_batch, label_batch, neg_batch = batch
 98 | 
 99 |                 current_step = epoch * num_batch + i
100 |                 # evaluate
101 |                 if current_step % hp.eval_per_steps == 0:
102 |                     # evaluate train dataset
103 |                     _pos_prob, _neg_prob, _target_loss, _l2_loss, \
104 |                     _logical_loss, _loss, _summary = sess.run(
105 |                         [model.probability_pos, model.probability_neg, model.traget_loss,
106 |                          model.l2_loss, model.logical_loss, model.loss, model.summaries],
107 |                         feed_dict={model.input_user: user_batch, model.input_items: items_batch,
108 |                                    model.input_feedback_score: feedback_batch,
109 |                                    model.input_negative_sample: neg_batch,
110 |                                    model.input_target: label_batch})
111 | 
112 |                     # evaluate validation dataset
113 |                     hr_k, ndcg_k = evaluate(sess, model, eval_users, eval_hist_items, eval_scores,
114 |                                             eval_labels, user_2_id, item_2_id)
115 |                     summary_writer.add_summary(_summary, global_step=current_step)
116 | 
117 |                     # save
118 |                     if ndcg_k >= best_ndcg or hr_k >= best_hr:
119 |                         is_best = '*'
120 |                         best_ndcg = ndcg_k
121 |                         best_hr = hr_k
122 |                         last_update_step = current_step
123 |                         saver.save(sess, os.path.join(hp.checkpoint_dir, hp.ckpt_name),
124 |                                    global_step=current_step)
125 |                     else:
126 |                         is_best = ''
127 | 
128 |                     print('\nepoch: {}, step: {}, train pos prob: {:.4f}, train neg prob: {:.4f}, '
129 |                           'train target loss: {:.4f}, train l2 loss: {:.4f}, '
130 |                           'train logical loss: {:.4f}, train loss: {:.4f} '
131 |                           'eval hr@k: {:.4f}, eval ndcg@k: {:.4f} {}'
132 |                           .format(epoch, current_step, np.mean(_pos_prob), np.mean(_neg_prob),
133 |                                   _target_loss, _l2_loss, _logical_loss, _loss, hr_k, ndcg_k,
134 |                                   is_best))
135 | 
136 |                 # train
137 |                 _ = sess.run(model.train_op, feed_dict={model.input_user: user_batch,
138 |                                                         model.input_items: items_batch,
139 |                                                         model.input_feedback_score: feedback_batch,
140 |                                                         model.input_negative_sample: neg_batch,
141 |                                                         model.input_target: label_batch})
142 | 
143 |                 if current_step - last_update_step >= max_nonupdate_steps:
144 |                     stop_flag = True
145 |                     break
146 | 
147 |             if stop_flag:
148 |                 break
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     train()
153 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | 
 5 | 
 6 | def save_training_info(user2id, item2id, hypers, path):
 7 |     with open(os.path.join(path, 'user2id.txt'), 'w') as f:
 8 |         for key, value in user2id.items():
 9 |             print('{} {}'.format(key, value), file=f)
10 | 
11 |     with open(os.path.join(path, 'item2id.txt'), 'w') as f:
12 |         for key, value in item2id.items():
13 |             print('{} {}'.format(key, value), file=f)
14 | 
15 |     with open(os.path.join(path, 'hypers.json'), 'w') as f:
16 |         json.dump(hypers, f)
17 | 
18 | 
19 | def load_training_info(path):
20 |     user2id = {}
21 |     with open(os.path.join(path, 'user2id.txt'), 'r') as lines:
22 |         for line in lines:
23 |             key, value = line.strip().split(' ', 1)
24 |             user2id[key] = int(value)
25 | 
26 |     item2id = {}
27 |     with open(os.path.join(path, 'item2id.txt'), 'r') as lines:
28 |         for line in lines:
29 |             key, value = line.strip().split(' ', 1)
30 |             item2id[key] = int(value)
31 | 
32 |     with open(os.path.join(path, 'hypers.json'), 'r') as f:
33 |         hypers = json.load(f)
34 | 
35 |     return user2id, item2id, hypers
36 | 
37 | 
38 | def calDCG(scores):
39 |     return np.sum(
40 |         np.divide(scores,
41 |                   np.log(np.arange(scores.shape[0], dtype=np.float32) + 2)),
42 |         dtype=np.float32)
43 | 
44 | 
45 | def calNDCG(rank_list, pos_items):
46 |     relevance = np.ones_like(pos_items)
47 |     it2rel = {it: r for it, r in zip(pos_items, relevance)}
48 |     rank_scores = np.asarray([it2rel.get(it, 0.0) for it in rank_list], dtype=np.float32)
49 | 
50 |     idcg = calDCG(relevance)
51 | 
52 |     dcg = calDCG(rank_scores)
53 | 
54 |     if dcg == 0.0:
55 |         return 0.0
56 | 
57 |     ndcg = dcg / idcg
58 |     return ndcg
59 | 


--------------------------------------------------------------------------------