├── README.md
├── vbpr_multiprocess.py
├── bpr_mf.ipynb
└── vbpr.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # bpr
2 | 
3 | BPR implemtned in Tensorflow
4 | 
5 | Bayesian Personalized Ranking(BPR) is a learning algorithm for collaborative filtering first introduced in: BPR: Bayesian Personalized Ranking from Implicit Feedback. Steffen Rendle, Christoph Freudenthaler, Zeno Gantner and Lars Schmidt-Thieme, Proc. UAI 2009.   
6 | 


--------------------------------------------------------------------------------
/vbpr_multiprocess.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import tensorflow as tf
  4 | import os
  5 | import lmdb
  6 | import cPickle as pickle
  7 | import numpy
  8 | import random
  9 | from multiprocessing import Process, Queue
 10 | 
 11 | with open("man20k.pkl", "r") as f:
 12 |     user_id_mapping, item_id_mapping, train_ratings, test_ratings = pickle.load(f)
 13 | 
 14 | image_features = {}
 15 | db = lmdb.open('./features20k')
 16 | with db.begin(write=False) as ctx:
 17 |     for iid in item_id_mapping.values():
 18 |         image_features[iid] = numpy.fromstring(ctx.get(str(iid)), dtype=numpy.float32)
 19 | 
 20 | assert(len(item_id_mapping) == len(image_features))
 21 | print len(user_id_mapping)
 22 | print len(item_id_mapping)
 23 | 
 24 | 
 25 | train_queue = Queue(4)
 26 | def uniform_sample_batch(train_ratings, item_count, image_features, sample_count=20000, batch_size=512):
 27 |     for i in range(sample_count):
 28 |         t = []
 29 |         iv = []
 30 |         jv = []
 31 |         for b in xrange(batch_size):
 32 |             u = random.sample(train_ratings.keys(), 1)[0]
 33 |             i = random.sample(train_ratings[u], 1)[0]
 34 |             j = random.randint(0, item_count-1)
 35 |             while j in train_ratings[u]:
 36 |                 j = random.randint(0, item_count-1)
 37 |             t.append([u, i, j])
 38 |             iv.append(image_features[i])
 39 |             jv.append(image_features[j])
 40 |         # block if queue is full
 41 |         train_queue.put( (numpy.asarray(t), numpy.vstack(tuple(iv)), numpy.vstack(tuple(jv))), True )
 42 |     train_queue.put(None)
 43 | 
 44 | def train_data_process(sample_count=20000, batch_size=512):
 45 |     p = Process(target=uniform_sample_batch, args=(train_ratings, len(item_id_mapping), image_features, sample_count, batch_size))
 46 |     return p
 47 | 
 48 | 
 49 | def test_batch_generator_by_user(train_ratings, test_ratings, item_count, image_features):
 50 |     # using leave one cv
 51 |     for u in test_ratings.keys():
 52 |         i = test_ratings[u]
 53 |         t = []
 54 |         ilist = []
 55 |         jlist = []
 56 |         for j in range(item_count):
 57 |             if j != test_ratings[u] and not (j in train_ratings[u]):
 58 |                 # find item not in test[u] and train[u]
 59 |                 t.append([u, i, j])
 60 |                 ilist.append(image_features[i])
 61 |                 jlist.append(image_features[j])
 62 |         yield numpy.asarray(t), numpy.vstack(tuple(ilist)), numpy.vstack(tuple(jlist))
 63 | 
 64 | 
 65 | def vbpr(user_count, item_count, hidden_dim=20, hidden_img_dim=128, 
 66 |          learning_rate = 0.001,
 67 |          l2_regulization = 0.01, 
 68 |          bias_regulization=1.0):
 69 |     """
 70 |     user_count: total number of users
 71 |     item_count: total number of items
 72 |     hidden_dim: hidden feature size of MF
 73 |     hidden_img_dim: [4096, hidden_img_dim]
 74 |     """
 75 |     u = tf.placeholder(tf.int32, [None])
 76 |     i = tf.placeholder(tf.int32, [None])
 77 |     j = tf.placeholder(tf.int32, [None])
 78 |     iv = tf.placeholder(tf.float32, [None, 4096])
 79 |     jv = tf.placeholder(tf.float32, [None, 4096])
 80 |     
 81 |     with tf.device("/gpu:1"):
 82 |         user_emb_w = tf.get_variable("user_emb_w", [user_count+1, hidden_dim], 
 83 |                                     initializer=tf.random_normal_initializer(0, 0.1))
 84 |         user_img_w = tf.get_variable("user_img_w", [user_count+1, hidden_img_dim],
 85 |                                     initializer=tf.random_normal_initializer(0, 0.1))
 86 |         item_emb_w = tf.get_variable("item_emb_w", [item_count+1, hidden_dim], 
 87 |                                     initializer=tf.random_normal_initializer(0, 0.1))
 88 |         item_b = tf.get_variable("item_b", [item_count+1, 1], 
 89 |                                     initializer=tf.constant_initializer(0.0))
 90 |         
 91 |         u_emb = tf.nn.embedding_lookup(user_emb_w, u)
 92 |         u_img = tf.nn.embedding_lookup(user_img_w, u)
 93 |         
 94 |         i_emb = tf.nn.embedding_lookup(item_emb_w, i)
 95 |         i_b = tf.nn.embedding_lookup(item_b, i)
 96 |         j_emb = tf.nn.embedding_lookup(item_emb_w, j)
 97 |         j_b = tf.nn.embedding_lookup(item_b, j)
 98 |     
 99 |     with tf.device("/gpu:1"):
100 |         img_emb_w = tf.get_variable("image_embedding_weights", [4096, hidden_img_dim], 
101 |                                    initializer=tf.random_normal_initializer(0, 0.1))
102 | 
103 |         img_i_j = tf.matmul(iv - jv,  img_emb_w)
104 | 
105 |         # MF predict: u_i > u_j
106 |         x = i_b - j_b + tf.reduce_sum(tf.mul(u_emb, (i_emb - j_emb)), 1, keep_dims=True) +             tf.reduce_sum(tf.mul(u_img, img_i_j),1, keep_dims=True)
107 | 
108 |         # auc score is used in test/cv
109 |         # reduce_mean is reasonable BECAUSE
110 |         # all test (i, j) pairs of one user is in ONE batch
111 |         auc = tf.reduce_mean(tf.to_float(x > 0))
112 | 
113 |         l2_norm = tf.add_n([
114 |                 tf.reduce_sum(tf.mul(u_emb, u_emb)), 
115 |                 tf.reduce_sum(tf.mul(u_img, u_img)),
116 |                 tf.reduce_sum(tf.mul(i_emb, i_emb)),
117 |                 tf.reduce_sum(tf.mul(j_emb, j_emb)),
118 |                 tf.reduce_sum(tf.mul(img_emb_w, img_emb_w)),
119 |                 bias_regulization * tf.reduce_sum(tf.mul(i_b, i_b)),
120 |                 bias_regulization * tf.reduce_sum(tf.mul(j_b, j_b))
121 |             ])
122 | 
123 |         loss = l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))
124 |     train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
125 |     return u, i, j, iv, jv, loss, auc, train_op
126 | 
127 | 
128 | # In[17]:
129 | 
130 | user_count = len(user_id_mapping)
131 | item_count = len(item_id_mapping)
132 | 
133 | with tf.Graph().as_default(), tf.Session() as session:
134 |     with tf.variable_scope('vbpr'):
135 |         u, i, j, iv, jv, loss, auc, train_op = vbpr(user_count, item_count)
136 |     
137 |     session.run(tf.initialize_all_variables())
138 |     
139 |     for epoch in range(1, 20):
140 |         print "epoch ", epoch
141 |         _loss_train = 0.0
142 |         sample_count = 20000
143 |         batch_size = 512
144 |         p = train_data_process(sample_count, batch_size)
145 |         p.start()
146 |         data = train_queue.get(True) #block if queue is empty
147 |         while data:
148 |             d, _iv, _jv = data
149 |             _loss, _ = session.run([loss, train_op], feed_dict={
150 |                     u:d[:,0], i:d[:,1], j:d[:,2], iv:_iv, jv:_jv
151 |                 })
152 |             _loss_train += _loss
153 |             data = train_queue.get(True)
154 |         p.join()
155 |         print "train_loss:", _loss_train/sample_count
156 | 
157 |         _auc_all = 0
158 |         _loss_test = 0.0
159 |         _test_user_count = len(test_ratings)
160 |         for d, _iv, _jv in test_batch_generator_by_user(train_ratings, 
161 |                                                       test_ratings, item_count, image_features):
162 |             _loss, _auc = session.run([loss, auc], feed_dict={
163 |                     u:d[:,0], i:d[:,1], j:d[:,2], iv:_iv, jv:_jv
164 |                 })
165 |             _loss_test += _loss
166 |             _auc_all += _auc
167 |         print "test_loss: ", _loss_test/_test_user_count, " auc: ", _auc_all/_test_user_count
168 |         print ""
169 | 
170 | 


--------------------------------------------------------------------------------
/bpr_mf.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Use MovieLens 1M dataset,     \n",
  8 |     "more details can be found at: http://grouplens.org/datasets/movielens/"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "max_u_id: 6040\n",
 23 |       "max_i_id: 3952\n"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import numpy\n",
 29 |     "import tensorflow as tf\n",
 30 |     "import os\n",
 31 |     "import random\n",
 32 |     "from collections import defaultdict\n",
 33 |     "\n",
 34 |     "def load_data(data_path):\n",
 35 |     "    '''\n",
 36 |     "    As for bpr experiment, all ratings are removed.\n",
 37 |     "    '''\n",
 38 |     "    user_ratings = defaultdict(set)\n",
 39 |     "    max_u_id = -1\n",
 40 |     "    max_i_id = -1\n",
 41 |     "    with open(data_path, 'r') as f:\n",
 42 |     "        for line in f.readlines():\n",
 43 |     "            u, i, _, _ = line.split(\"::\")\n",
 44 |     "            u = int(u)\n",
 45 |     "            i = int(i)\n",
 46 |     "            user_ratings[u].add(i)\n",
 47 |     "            max_u_id = max(u, max_u_id)\n",
 48 |     "            max_i_id = max(i, max_i_id)\n",
 49 |     "    print \"max_u_id:\", max_u_id\n",
 50 |     "    print \"max_i_id:\", max_i_id\n",
 51 |     "    return max_u_id, max_i_id, user_ratings\n",
 52 |     "    \n",
 53 |     "\n",
 54 |     "data_path = os.path.join('/home/hadoop/data/movielens', 'ratings.dat')\n",
 55 |     "user_count, item_count, user_ratings = load_data(data_path)\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "def generate_test(user_ratings):\n",
 59 |     "    '''\n",
 60 |     "    for each user, random select one of his(her) rating into test set\n",
 61 |     "    '''\n",
 62 |     "    user_test = dict()\n",
 63 |     "    for u, i_list in user_ratings.items():\n",
 64 |     "        user_test[u] = random.sample(user_ratings[u], 1)[0]\n",
 65 |     "    return user_test\n",
 66 |     "\n",
 67 |     "user_ratings_test = generate_test(user_ratings)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def generate_train_batch(user_ratings, user_ratings_test, item_count, batch_size=512):\n",
 79 |     "    '''\n",
 80 |     "    uniform sampling (user, item_rated, item_not_rated)\n",
 81 |     "    '''\n",
 82 |     "    t = []\n",
 83 |     "    for b in xrange(batch_size):\n",
 84 |     "        u = random.sample(user_ratings.keys(), 1)[0]\n",
 85 |     "        i = random.sample(user_ratings[u], 1)[0]\n",
 86 |     "        while i == user_ratings_test[u]:\n",
 87 |     "            i = random.sample(user_ratings[u], 1)[0]\n",
 88 |     "        \n",
 89 |     "        j = random.randint(1, item_count)\n",
 90 |     "        while j in user_ratings[u]:\n",
 91 |     "            j = random.randint(1, item_count)\n",
 92 |     "        t.append([u, i, j])\n",
 93 |     "    return numpy.asarray(t)\n",
 94 |     "\n",
 95 |     "def generate_test_batch(user_ratings, user_ratings_test, item_count):\n",
 96 |     "    '''\n",
 97 |     "    for an user u and an item i rated by u, \n",
 98 |     "    generate pairs (u,i,j) for all item j which u has't rated\n",
 99 |     "    it's convinent for computing AUC score for u\n",
100 |     "    '''\n",
101 |     "    for u in user_ratings.keys():\n",
102 |     "        t = []\n",
103 |     "        i = user_ratings_test[u]\n",
104 |     "        for j in xrange(1, item_count+1):\n",
105 |     "            if not (j in user_ratings[u]):\n",
106 |     "                t.append([u, i, j])\n",
107 |     "        yield numpy.asarray(t)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 10,
113 |    "metadata": {
114 |     "collapsed": true
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def bpr_mf(user_count, item_count, hidden_dim):\n",
119 |     "    u = tf.placeholder(tf.int32, [None])\n",
120 |     "    i = tf.placeholder(tf.int32, [None])\n",
121 |     "    j = tf.placeholder(tf.int32, [None])\n",
122 |     "\n",
123 |     "    with tf.device(\"/cpu:0\"):\n",
124 |     "        user_emb_w = tf.get_variable(\"user_emb_w\", [user_count+1, hidden_dim], \n",
125 |     "                            initializer=tf.random_normal_initializer(0, 0.1))\n",
126 |     "        item_emb_w = tf.get_variable(\"item_emb_w\", [item_count+1, hidden_dim], \n",
127 |     "                                initializer=tf.random_normal_initializer(0, 0.1))\n",
128 |     "        item_b = tf.get_variable(\"item_b\", [item_count+1, 1], \n",
129 |     "                                initializer=tf.constant_initializer(0.0))\n",
130 |     "        \n",
131 |     "        u_emb = tf.nn.embedding_lookup(user_emb_w, u)\n",
132 |     "        i_emb = tf.nn.embedding_lookup(item_emb_w, i)\n",
133 |     "        i_b = tf.nn.embedding_lookup(item_b, i)\n",
134 |     "        j_emb = tf.nn.embedding_lookup(item_emb_w, j)\n",
135 |     "        j_b = tf.nn.embedding_lookup(item_b, j)\n",
136 |     "    \n",
137 |     "    # MF predict: u_i > u_j\n",
138 |     "    x = i_b - j_b + tf.reduce_sum(tf.mul(u_emb, (i_emb - j_emb)), 1, keep_dims=True)\n",
139 |     "    \n",
140 |     "    # AUC for one user:\n",
141 |     "    # reasonable iff all (u,i,j) pairs are from the same user\n",
142 |     "    # \n",
143 |     "    # average AUC = mean( auc for each user in test set)\n",
144 |     "    mf_auc = tf.reduce_mean(tf.to_float(x > 0))\n",
145 |     "    \n",
146 |     "    l2_norm = tf.add_n([\n",
147 |     "            tf.reduce_sum(tf.mul(u_emb, u_emb)), \n",
148 |     "            tf.reduce_sum(tf.mul(i_emb, i_emb)),\n",
149 |     "            tf.reduce_sum(tf.mul(j_emb, j_emb))\n",
150 |     "        ])\n",
151 |     "    \n",
152 |     "    regulation_rate = 0.0001\n",
153 |     "    bprloss = regulation_rate * l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))\n",
154 |     "    \n",
155 |     "    train_op = tf.train.GradientDescentOptimizer(0.01).minimize(bprloss)\n",
156 |     "    return u, i, j, mf_auc, bprloss, train_op"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 13,
162 |    "metadata": {
163 |     "collapsed": false
164 |    },
165 |    "outputs": [
166 |     {
167 |      "name": "stdout",
168 |      "output_type": "stream",
169 |      "text": [
170 |       "epoch:  1\n",
171 |       "bpr_loss:  0.717820626124\n",
172 |       "test_loss:  0.944949 test_auc:  0.635838932041\n",
173 |       "\n",
174 |       "epoch:  2\n",
175 |       "bpr_loss:  0.705865208436\n",
176 |       "test_loss:  0.931426 test_auc:  0.706418683048\n",
177 |       "\n",
178 |       "epoch:  3\n",
179 |       "bpr_loss:  0.694599327731\n",
180 |       "test_loss:  0.918148 test_auc:  0.747514513074\n",
181 |       "\n",
182 |       "epoch:  4\n",
183 |       "bpr_loss:  0.684150013811\n",
184 |       "test_loss:  0.905279 test_auc:  0.774131693715\n",
185 |       "\n",
186 |       "epoch:  5\n",
187 |       "bpr_loss:  0.674296607874\n",
188 |       "test_loss:  0.892612 test_auc:  0.792358162586\n",
189 |       "\n",
190 |       "epoch:  6\n",
191 |       "bpr_loss:  0.66479459452\n",
192 |       "test_loss:  0.880452 test_auc:  0.805371120111\n",
193 |       "\n",
194 |       "epoch:  7\n",
195 |       "bpr_loss:  0.656021738486\n",
196 |       "test_loss:  0.868155 test_auc:  0.815137821843\n",
197 |       "\n",
198 |       "epoch:  8\n",
199 |       "bpr_loss:  0.647756577671\n",
200 |       "test_loss:  0.856277 test_auc:  0.822629666476\n",
201 |       "\n",
202 |       "epoch:  9\n",
203 |       "bpr_loss:  0.639781989999\n",
204 |       "test_loss:  0.84503 test_auc:  0.828591293397\n",
205 |       "\n",
206 |       "epoch:  10\n",
207 |       "bpr_loss:  0.632315899365\n",
208 |       "test_loss:  0.834042 test_auc:  0.833400565847\n",
209 |       "\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "with tf.Graph().as_default(), tf.Session() as session:\n",
215 |     "    u, i, j, mf_auc, bprloss, train_op = bpr_mf(user_count, item_count, 20)\n",
216 |     "    session.run(tf.initialize_all_variables())\n",
217 |     "    for epoch in range(1, 11):\n",
218 |     "        _batch_bprloss = 0\n",
219 |     "        for k in range(1, 5000): # uniform samples from training set\n",
220 |     "            uij = generate_train_batch(user_ratings, user_ratings_test, item_count)\n",
221 |     "\n",
222 |     "            _bprloss, _ = session.run([bprloss, train_op], \n",
223 |     "                                feed_dict={u:uij[:,0], i:uij[:,1], j:uij[:,2]})\n",
224 |     "            _batch_bprloss += _bprloss\n",
225 |     "        \n",
226 |     "        print \"epoch: \", epoch\n",
227 |     "        print \"bpr_loss: \", _batch_bprloss / k\n",
228 |     "\n",
229 |     "        user_count = 0\n",
230 |     "        _auc_sum = 0.0\n",
231 |     "\n",
232 |     "        # each batch will return only one user's auc\n",
233 |     "        for t_uij in generate_test_batch(user_ratings, user_ratings_test, item_count):\n",
234 |     "\n",
235 |     "            _auc, _test_bprloss = session.run([mf_auc, bprloss],\n",
236 |     "                                    feed_dict={u:t_uij[:,0], i:t_uij[:,1], j:t_uij[:,2]}\n",
237 |     "                                )\n",
238 |     "            user_count += 1\n",
239 |     "            _auc_sum += _auc\n",
240 |     "        print \"test_loss: \", _test_bprloss, \"test_auc: \", _auc_sum/user_count\n",
241 |     "        print \"\""
242 |    ]
243 |   }
244 |  ],
245 |  "metadata": {
246 |   "kernelspec": {
247 |    "display_name": "Python 2",
248 |    "language": "python",
249 |    "name": "python2"
250 |   },
251 |   "language_info": {
252 |    "codemirror_mode": {
253 |     "name": "ipython",
254 |     "version": 2
255 |    },
256 |    "file_extension": ".py",
257 |    "mimetype": "text/x-python",
258 |    "name": "python",
259 |    "nbconvert_exporter": "python",
260 |    "pygments_lexer": "ipython2",
261 |    "version": "2.7.11"
262 |   }
263 |  },
264 |  "nbformat": 4,
265 |  "nbformat_minor": 0
266 | }
267 | 


--------------------------------------------------------------------------------
/vbpr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import tensorflow as tf\n",
 12 |     "import os\n",
 13 |     "import lmdb\n",
 14 |     "import cPickle as pickle\n",
 15 |     "import numpy\n",
 16 |     "import random"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "16776\n",
 31 |       "11151\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "with open(\"man20k.pkl\", \"r\") as f:\n",
 37 |     "    user_id_mapping, item_id_mapping, train_ratings, test_ratings = pickle.load(f)\n",
 38 |     "\n",
 39 |     "image_features = {}\n",
 40 |     "db = lmdb.open('./features20k')\n",
 41 |     "with db.begin(write=False) as ctx:\n",
 42 |     "    for iid in item_id_mapping.values():\n",
 43 |     "        image_features[iid] = numpy.fromstring(ctx.get(str(iid)), dtype=numpy.float32)\n",
 44 |     "\n",
 45 |     "assert(len(item_id_mapping) == len(image_features))\n",
 46 |     "print len(user_id_mapping)\n",
 47 |     "print len(item_id_mapping)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def uniform_sample_batch(train_ratings, item_count, image_features, sample_count=20000, batch_size=5):\n",
 59 |     "    for i in range(sample_count):\n",
 60 |     "        t = []\n",
 61 |     "        iv = []\n",
 62 |     "        jv = []\n",
 63 |     "        for b in xrange(batch_size):\n",
 64 |     "            u = random.sample(train_ratings.keys(), 1)[0]\n",
 65 |     "            i = random.sample(train_ratings[u], 1)[0]\n",
 66 |     "            j = random.randint(0, item_count-1)\n",
 67 |     "            while j in train_ratings[u]:\n",
 68 |     "                j = random.randint(0, item_count-1)\n",
 69 |     "            t.append([u, i, j])\n",
 70 |     "            iv.append(image_features[i])\n",
 71 |     "            jv.append(image_features[j])\n",
 72 |     "        yield numpy.asarray(t), numpy.vstack(tuple(iv)), numpy.vstack(tuple(jv))\n",
 73 |     "\n",
 74 |     "def test_batch_generator_by_user(train_ratings, test_ratings, item_count, image_features):\n",
 75 |     "    # using leave one cv\n",
 76 |     "    for u in test_ratings.keys():\n",
 77 |     "        i = test_ratings[u]\n",
 78 |     "        t = []\n",
 79 |     "        ilist = []\n",
 80 |     "        jlist = []\n",
 81 |     "        for j in range(item_count):\n",
 82 |     "            if j != test_ratings[u] and not (j in train_ratings[u]):\n",
 83 |     "                # find item not in test[u] and train[u]\n",
 84 |     "                t.append([u, i, j])\n",
 85 |     "                ilist.append(image_features[i])\n",
 86 |     "                jlist.append(image_features[j])\n",
 87 |     "        yield numpy.asarray(t), numpy.vstack(tuple(ilist)), numpy.vstack(tuple(jlist))"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 14,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "def vbpr(user_count, item_count, hidden_dim=20, hidden_img_dim=128, \n",
 99 |     "         learning_rate = 0.001,\n",
100 |     "         l2_regulization = 0.01, \n",
101 |     "         bias_regulization=1.0):\n",
102 |     "    \"\"\"\n",
103 |     "    user_count: total number of users\n",
104 |     "    item_count: total number of items\n",
105 |     "    hidden_dim: hidden feature size of MF\n",
106 |     "    hidden_img_dim: [4096, hidden_img_dim]\n",
107 |     "    \"\"\"\n",
108 |     "    u = tf.placeholder(tf.int32, [None])\n",
109 |     "    i = tf.placeholder(tf.int32, [None])\n",
110 |     "    j = tf.placeholder(tf.int32, [None])\n",
111 |     "    iv = tf.placeholder(tf.float32, [None, 4096])\n",
112 |     "    jv = tf.placeholder(tf.float32, [None, 4096])\n",
113 |     "    \n",
114 |     "    with tf.device(\"/gpu:1\"):\n",
115 |     "        user_emb_w = tf.get_variable(\"user_emb_w\", [user_count+1, hidden_dim], \n",
116 |     "                                    initializer=tf.random_normal_initializer(0, 0.1))\n",
117 |     "        user_img_w = tf.get_variable(\"user_img_w\", [user_count+1, hidden_img_dim],\n",
118 |     "                                    initializer=tf.random_normal_initializer(0, 0.1))\n",
119 |     "        item_emb_w = tf.get_variable(\"item_emb_w\", [item_count+1, hidden_dim], \n",
120 |     "                                    initializer=tf.random_normal_initializer(0, 0.1))\n",
121 |     "        item_b = tf.get_variable(\"item_b\", [item_count+1, 1], \n",
122 |     "                                    initializer=tf.constant_initializer(0.0))\n",
123 |     "        \n",
124 |     "        u_emb = tf.nn.embedding_lookup(user_emb_w, u)\n",
125 |     "        u_img = tf.nn.embedding_lookup(user_img_w, u)\n",
126 |     "        \n",
127 |     "        i_emb = tf.nn.embedding_lookup(item_emb_w, i)\n",
128 |     "        i_b = tf.nn.embedding_lookup(item_b, i)\n",
129 |     "        j_emb = tf.nn.embedding_lookup(item_emb_w, j)\n",
130 |     "        j_b = tf.nn.embedding_lookup(item_b, j)\n",
131 |     "    \n",
132 |     "    with tf.device(\"/gpu:1\"):\n",
133 |     "        img_emb_w = tf.get_variable(\"image_embedding_weights\", [4096, hidden_img_dim], \n",
134 |     "                                   initializer=tf.random_normal_initializer(0, 0.1))\n",
135 |     "\n",
136 |     "        img_i_j = tf.matmul(iv - jv,  img_emb_w)\n",
137 |     "\n",
138 |     "        # MF predict: u_i > u_j\n",
139 |     "        x = i_b - j_b + tf.reduce_sum(tf.mul(u_emb, (i_emb - j_emb)), 1, keep_dims=True) + \\\n",
140 |     "            tf.reduce_sum(tf.mul(u_img, img_i_j),1, keep_dims=True)\n",
141 |     "\n",
142 |     "        # auc score is used in test/cv\n",
143 |     "        # reduce_mean is reasonable BECAUSE\n",
144 |     "        # all test (i, j) pairs of one user is in ONE batch\n",
145 |     "        auc = tf.reduce_mean(tf.to_float(x > 0))\n",
146 |     "\n",
147 |     "        l2_norm = tf.add_n([\n",
148 |     "                tf.reduce_sum(tf.mul(u_emb, u_emb)), \n",
149 |     "                tf.reduce_sum(tf.mul(u_img, u_img)),\n",
150 |     "                tf.reduce_sum(tf.mul(i_emb, i_emb)),\n",
151 |     "                tf.reduce_sum(tf.mul(j_emb, j_emb)),\n",
152 |     "                tf.reduce_sum(tf.mul(img_emb_w, img_emb_w)),\n",
153 |     "                bias_regulization * tf.reduce_sum(tf.mul(i_b, i_b)),\n",
154 |     "                bias_regulization * tf.reduce_sum(tf.mul(j_b, j_b))\n",
155 |     "            ])\n",
156 |     "\n",
157 |     "        loss = l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))\n",
158 |     "    train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)\n",
159 |     "    return u, i, j, iv, jv, loss, auc, train_op"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 17,
165 |    "metadata": {
166 |     "collapsed": false
167 |    },
168 |    "outputs": [
169 |     {
170 |      "name": "stdout",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "epoch  1\n",
174 |       "train_loss: 1.90607950926\n",
175 |       "epoch  2\n",
176 |       "train_loss: 0.847671033382\n",
177 |       "epoch  3\n",
178 |       "train_loss: 0.471264983296\n",
179 |       "epoch  4\n",
180 |       "train_loss: 0.28013004905\n",
181 |       "epoch  5\n",
182 |       "train_loss: 0.16987822926\n",
183 |       "epoch  6\n",
184 |       "train_loss: 0.10363506636\n",
185 |       "epoch  7\n",
186 |       "train_loss: 0.0634256706461\n",
187 |       "epoch  8\n",
188 |       "train_loss: 0.0388962389901\n",
189 |       "epoch  9\n",
190 |       "train_loss: 0.0238909451365\n",
191 |       "test_loss:  0.0185006202516  auc:  0.528875898521\n",
192 |       "\n",
193 |       "epoch  10\n",
194 |       "train_loss: 0.0147129922137\n",
195 |       "epoch  11\n",
196 |       "train_loss: 0.00908719892614\n",
197 |       "epoch  12\n",
198 |       "train_loss: 0.00563998709526\n",
199 |       "epoch  13\n",
200 |       "train_loss: 0.00352557315351\n",
201 |       "epoch  14\n",
202 |       "train_loss: 0.00222928830911\n",
203 |       "epoch  15\n",
204 |       "train_loss: 0.00143267278047\n",
205 |       "epoch  16\n",
206 |       "train_loss: 0.000944744668785\n",
207 |       "epoch  17\n",
208 |       "train_loss: 0.000645333021879\n",
209 |       "epoch  18\n",
210 |       "train_loss: 0.000461429908581\n",
211 |       "epoch  19\n",
212 |       "train_loss: 0.000348555094795\n",
213 |       "test_loss:  0.000201286266862  auc:  0.696867971356\n",
214 |       "\n",
215 |       "epoch  20\n",
216 |       "train_loss: 0.000279322677117\n",
217 |       "epoch  21\n",
218 |       "train_loss: 0.000236824362859\n",
219 |       "epoch  22\n",
220 |       "train_loss: 0.000210696356109\n",
221 |       "epoch  23\n",
222 |       "train_loss: 0.0001946867191\n",
223 |       "epoch  24\n",
224 |       "train_loss: 0.000184854761115\n",
225 |       "epoch  25\n",
226 |       "train_loss: 0.000178814592335\n",
227 |       "epoch  26\n",
228 |       "train_loss: 0.000175114339741\n",
229 |       "epoch  27\n",
230 |       "train_loss: 0.000172837277001\n",
231 |       "epoch  28\n",
232 |       "train_loss: 0.000171442347579\n",
233 |       "epoch  29\n",
234 |       "train_loss: 0.000170585585118\n",
235 |       "test_loss:  6.32324662892e-05  auc:  0.724187425064\n",
236 |       "\n",
237 |       "epoch  30\n",
238 |       "train_loss: 0.000170058965712\n",
239 |       "epoch  31\n",
240 |       "train_loss: 0.000169736123702\n",
241 |       "epoch  32\n",
242 |       "train_loss: 0.000169537788141\n",
243 |       "epoch  33\n",
244 |       "train_loss: 0.00016941606754\n",
245 |       "epoch  34\n",
246 |       "train_loss: 0.000169341344794\n",
247 |       "epoch  35\n",
248 |       "train_loss: 0.000169295391766\n",
249 |       "epoch  36\n",
250 |       "train_loss: 0.000169267251971\n",
251 |       "epoch  37\n",
252 |       "train_loss: 0.000169249952829\n",
253 |       "epoch  38\n",
254 |       "train_loss: 0.000169239334558\n",
255 |       "epoch  39\n",
256 |       "train_loss: 0.000169232823362\n",
257 |       "test_loss:  6.21836653008e-05  auc:  0.724219947274\n",
258 |       "\n",
259 |       "epoch  40\n",
260 |       "train_loss: 0.000169228824583\n",
261 |       "epoch  41\n",
262 |       "train_loss: 0.000169226372818\n",
263 |       "epoch  42\n",
264 |       "train_loss: 0.000169224862679\n",
265 |       "epoch  43\n",
266 |       "train_loss: 0.000169223932986\n",
267 |       "epoch  44\n",
268 |       "train_loss: 0.000169223372417\n",
269 |       "epoch  45\n",
270 |       "train_loss: 0.000169223024946\n",
271 |       "epoch  46\n",
272 |       "train_loss: 0.00016922280498\n",
273 |       "epoch  47\n",
274 |       "train_loss: 0.000169222674565\n",
275 |       "epoch  48\n",
276 |       "train_loss: 0.00016922258999\n",
277 |       "epoch  49\n",
278 |       "train_loss: 0.000169222545519\n",
279 |       "test_loss:  6.21756902582e-05  auc:  0.723119083448\n",
280 |       "\n",
281 |       "epoch  50\n",
282 |       "train_loss: 0.000169222513592\n",
283 |       "epoch  51\n",
284 |       "train_loss: 0.000169222495519\n",
285 |       "epoch  52\n",
286 |       "train_loss: 0.000169222475291\n",
287 |       "epoch  53\n",
288 |       "train_loss: 0.000169222473953\n",
289 |       "epoch  54\n",
290 |       "train_loss: 0.000169222472527\n",
291 |       "epoch  55\n",
292 |       "train_loss: 0.000169222468918\n",
293 |       "epoch  56\n",
294 |       "train_loss: 0.00016922246528\n",
295 |       "epoch  57\n",
296 |       "train_loss: 0.000169222464378\n",
297 |       "epoch  58\n",
298 |       "train_loss: 0.00016922246883\n",
299 |       "epoch  59\n",
300 |       "train_loss: 0.000169222466211\n",
301 |       "test_loss:  6.2175628813e-05  auc:  0.722753278903\n",
302 |       "\n",
303 |       "epoch  60\n",
304 |       "train_loss: 0.000169222461642\n",
305 |       "epoch  61\n",
306 |       "train_loss: 0.000169222460943\n",
307 |       "epoch  62\n",
308 |       "train_loss: 0.000169222467171\n",
309 |       "epoch  63\n",
310 |       "train_loss: 0.000169222463272\n",
311 |       "epoch  64\n",
312 |       "train_loss: 0.000169222463941\n",
313 |       "epoch  65\n",
314 |       "train_loss: 0.000169222463475\n",
315 |       "epoch  66\n",
316 |       "train_loss: 0.000169222464552\n",
317 |       "epoch  67\n",
318 |       "train_loss: 0.000169222464378\n",
319 |       "epoch  68\n",
320 |       "train_loss: 0.00016922246461\n",
321 |       "epoch  69\n",
322 |       "train_loss: 0.000169222466939\n",
323 |       "test_loss:  6.2175629888e-05  auc:  0.722208490817\n",
324 |       "\n",
325 |       "epoch  70\n",
326 |       "train_loss: 0.000169222467724\n",
327 |       "epoch  71\n",
328 |       "train_loss: 0.000169222470169\n",
329 |       "epoch  72\n",
330 |       "train_loss: 0.000169222464523\n",
331 |       "epoch  73\n",
332 |       "train_loss: 0.000169222463737\n",
333 |       "epoch  74\n",
334 |       "train_loss: 0.000169222465745\n",
335 |       "epoch  75\n",
336 |       "train_loss: 0.000169222462137\n",
337 |       "epoch  76\n",
338 |       "train_loss: 0.00016922245975\n",
339 |       "epoch  77\n",
340 |       "train_loss: 0.000169222466619\n",
341 |       "epoch  78\n",
342 |       "train_loss: 0.000169222466473\n",
343 |       "epoch  79\n",
344 |       "train_loss: 0.000169222463766\n",
345 |       "test_loss:  6.21756297265e-05  auc:  0.722351304475\n",
346 |       "\n",
347 |       "epoch  80\n",
348 |       "train_loss: 0.00016922246496\n",
349 |       "epoch  81\n",
350 |       "train_loss: 0.000169222462078\n",
351 |       "epoch  82\n",
352 |       "train_loss: 0.000169222465833\n",
353 |       "epoch  83\n",
354 |       "train_loss: 0.000169222463941\n",
355 |       "epoch  84\n",
356 |       "train_loss: 0.000169222466211\n",
357 |       "epoch  85\n",
358 |       "train_loss: 0.000169222466589\n",
359 |       "epoch  86\n",
360 |       "train_loss: 0.000169222464407\n",
361 |       "epoch  87\n",
362 |       "train_loss: 0.000169222467317\n",
363 |       "epoch  88\n",
364 |       "train_loss: 0.00016922246074\n",
365 |       "epoch  89\n",
366 |       "train_loss: 0.000169222460274\n",
367 |       "test_loss:  6.21756326492e-05  auc:  0.722816082116\n",
368 |       "\n",
369 |       "epoch  90\n",
370 |       "train_loss: 0.00016922246493\n",
371 |       "epoch  91\n",
372 |       "train_loss: 0.000169222466822\n",
373 |       "epoch  92\n",
374 |       "train_loss: 0.000169222465222\n",
375 |       "epoch  93\n",
376 |       "train_loss: 0.000169222465833\n",
377 |       "epoch  94\n",
378 |       "train_loss: 0.000169222465833\n",
379 |       "epoch  95\n",
380 |       "train_loss: 0.000169222463446\n",
381 |       "epoch  96\n",
382 |       "train_loss: 0.000169222459896\n",
383 |       "epoch  97\n",
384 |       "train_loss: 0.000169222458673\n",
385 |       "epoch  98\n",
386 |       "train_loss: 0.000169222463417\n",
387 |       "epoch  99\n",
388 |       "train_loss: 0.000169222469209\n",
389 |       "test_loss:  6.21756314117e-05  auc:  0.722785097113\n",
390 |       "\n",
391 |       "epoch  100\n",
392 |       "train_loss: 0.000169222462137\n",
393 |       "epoch  101\n",
394 |       "train_loss: 0.000169222466182\n",
395 |       "epoch  102\n",
396 |       "train_loss: 0.000169222465629\n",
397 |       "epoch  103\n",
398 |       "train_loss: 0.000169222466764\n",
399 |       "epoch  104\n",
400 |       "train_loss: 0.000169222461729\n",
401 |       "epoch  105\n",
402 |       "train_loss: 0.000169222460157\n",
403 |       "epoch  106\n",
404 |       "train_loss: 0.000169222466444\n",
405 |       "epoch  107\n",
406 |       "train_loss: 0.000169222462078\n",
407 |       "epoch  108\n",
408 |       "train_loss: 0.000169222469965\n",
409 |       "epoch  109\n",
410 |       "train_loss: 0.000169222461467\n",
411 |       "test_loss:  6.21756326817e-05  auc:  0.722071970966\n",
412 |       "\n",
413 |       "epoch  110\n",
414 |       "train_loss: 0.000169222466735\n",
415 |       "epoch  111\n",
416 |       "train_loss: 0.000169222465716\n",
417 |       "epoch  112\n",
418 |       "train_loss: 0.000169222462282\n",
419 |       "epoch  113\n",
420 |       "train_loss: 0.0001692224656\n",
421 |       "epoch  114\n",
422 |       "train_loss: 0.000169222462515\n",
423 |       "epoch  115\n",
424 |       "train_loss: 0.000169222461438\n",
425 |       "epoch  116\n",
426 |       "train_loss: 0.000169222465367\n",
427 |       "epoch  117\n",
428 |       "train_loss: 0.000169222462253\n",
429 |       "epoch  118\n",
430 |       "train_loss: 0.000169222469121\n",
431 |       "epoch  119\n",
432 |       "train_loss: 0.000169222467724\n",
433 |       "test_loss:  6.21756272506e-05  auc:  0.722286653913\n",
434 |       "\n",
435 |       "epoch  120\n",
436 |       "train_loss: 0.000169222470693\n",
437 |       "epoch  121\n",
438 |       "train_loss: 0.000169222465454\n",
439 |       "epoch  122\n",
440 |       "train_loss: 0.000169222464174\n",
441 |       "epoch  123\n",
442 |       "train_loss: 0.00016922246333\n",
443 |       "epoch  124\n",
444 |       "train_loss: 0.000169222468161\n",
445 |       "epoch  125\n",
446 |       "train_loss: 0.000169222466124\n",
447 |       "epoch  126\n",
448 |       "train_loss: 0.000169222470693\n",
449 |       "epoch  127\n",
450 |       "train_loss: 0.00016922246106\n",
451 |       "epoch  128\n",
452 |       "train_loss: 0.000169222462835\n",
453 |       "epoch  129\n",
454 |       "train_loss: 0.000169222466298\n",
455 |       "test_loss:  6.21756290916e-05  auc:  0.722704050756\n",
456 |       "\n",
457 |       "epoch  130\n",
458 |       "train_loss: 0.000169222466124\n",
459 |       "epoch  131\n",
460 |       "train_loss: 0.000169222466706\n",
461 |       "epoch  132\n",
462 |       "train_loss: 0.000169222464872\n",
463 |       "epoch  133\n",
464 |       "train_loss: 0.000169222464494\n",
465 |       "epoch  134\n",
466 |       "train_loss: 0.000169222464494\n",
467 |       "epoch  135\n",
468 |       "train_loss: 0.000169222461845\n",
469 |       "epoch  136\n",
470 |       "train_loss: 0.000169222460914\n",
471 |       "epoch  137\n",
472 |       "train_loss: 0.000169222462224\n",
473 |       "epoch  138\n",
474 |       "train_loss: 0.000169222462748\n",
475 |       "epoch  139\n",
476 |       "train_loss: 0.000169222463446\n",
477 |       "test_loss:  6.21756309968e-05  auc:  0.72228236522\n",
478 |       "\n",
479 |       "epoch  140\n",
480 |       "train_loss: 0.000169222461205\n",
481 |       "epoch  141\n",
482 |       "train_loss: 0.000169222464698\n",
483 |       "epoch  142\n",
484 |       "train_loss: 0.000169222460303\n",
485 |       "epoch  143\n",
486 |       "train_loss: 0.000169222465804\n",
487 |       "epoch  144\n",
488 |       "train_loss: 0.000169222465425\n",
489 |       "epoch  145\n",
490 |       "train_loss: 0.000169222464669\n",
491 |       "epoch  146\n",
492 |       "train_loss: 0.000169222463883\n",
493 |       "epoch  147\n",
494 |       "train_loss: 0.000169222465745\n",
495 |       "epoch  148\n",
496 |       "train_loss: 0.000169222465076\n",
497 |       "epoch  149\n",
498 |       "train_loss: 0.000169222464436\n",
499 |       "test_loss:  6.2175629206e-05  auc:  0.722723938225\n",
500 |       "\n",
501 |       "epoch  150\n",
502 |       "train_loss: 0.000169222467928\n",
503 |       "epoch  151\n",
504 |       "train_loss: 0.000169222460099\n",
505 |       "epoch  152\n",
506 |       "train_loss: 0.000169222463417\n",
507 |       "epoch  153\n",
508 |       "train_loss: 0.000169222461438\n",
509 |       "epoch  154\n",
510 |       "train_loss: 0.000169222467579\n",
511 |       "epoch  155\n",
512 |       "train_loss: 0.000169222467812\n",
513 |       "epoch  156\n",
514 |       "train_loss: 0.000169222467259\n",
515 |       "epoch  157\n",
516 |       "train_loss: 0.000169222467812\n",
517 |       "epoch  158\n",
518 |       "train_loss: 0.000169222459401\n",
519 |       "epoch  159\n",
520 |       "train_loss: 0.00016922246461\n",
521 |       "test_loss:  6.21756271442e-05  auc:  0.722820235079\n",
522 |       "\n",
523 |       "epoch  160\n",
524 |       "train_loss: 0.000169222467724\n",
525 |       "epoch  161\n",
526 |       "train_loss: 0.0001692224617\n",
527 |       "epoch  162\n",
528 |       "train_loss: 0.000169222457102\n",
529 |       "epoch  163\n",
530 |       "train_loss: 0.000169222466968\n",
531 |       "epoch  164\n",
532 |       "train_loss: 0.000169222464639\n",
533 |       "epoch  165\n",
534 |       "train_loss: 0.000169222467201\n",
535 |       "epoch  166\n",
536 |       "train_loss: 0.000169222469238\n",
537 |       "epoch  167\n",
538 |       "train_loss: 0.000169222464901\n",
539 |       "epoch  168\n",
540 |       "train_loss: 0.000169222463155\n",
541 |       "epoch  169\n",
542 |       "train_loss: 0.000169222461845\n",
543 |       "test_loss:  6.21756316478e-05  auc:  0.722910937715\n",
544 |       "\n",
545 |       "epoch  170\n",
546 |       "train_loss: 0.000169222463592\n",
547 |       "epoch  171\n",
548 |       "train_loss: 0.000169222463854\n",
549 |       "epoch  172\n",
550 |       "train_loss: 0.000169222467375\n",
551 |       "epoch  173\n",
552 |       "train_loss: 0.000169222461554\n",
553 |       "epoch  174\n",
554 |       "train_loss: 0.000169222461176\n",
555 |       "epoch  175\n",
556 |       "train_loss: 0.000169222468481\n",
557 |       "epoch  176\n",
558 |       "train_loss: 0.000169222465134\n",
559 |       "epoch  177\n",
560 |       "train_loss: 0.000169222468452\n",
561 |       "epoch  178\n",
562 |       "train_loss: 0.000169222461787\n",
563 |       "epoch  179\n",
564 |       "train_loss: 0.00016922246429\n",
565 |       "test_loss:  6.21756275126e-05  auc:  0.721525889624\n",
566 |       "\n",
567 |       "epoch  180\n",
568 |       "train_loss: 0.000169222471101\n",
569 |       "epoch  181\n",
570 |       "train_loss: 0.000169222465862\n",
571 |       "epoch  182\n",
572 |       "train_loss: 0.000169222465105\n",
573 |       "epoch  183\n",
574 |       "train_loss: 0.000169222463621\n",
575 |       "epoch  184\n",
576 |       "train_loss: 0.000169222462748\n",
577 |       "epoch  185\n",
578 |       "train_loss: 0.000169222466502\n",
579 |       "epoch  186\n",
580 |       "train_loss: 0.000169222464348\n",
581 |       "epoch  187\n",
582 |       "train_loss: 0.00016922246496\n",
583 |       "epoch  188\n",
584 |       "train_loss: 0.00016922247046\n",
585 |       "epoch  189\n",
586 |       "train_loss: 0.000169222464639\n",
587 |       "test_loss:  6.21756274816e-05  auc:  0.721473329031\n",
588 |       "\n",
589 |       "epoch  190\n",
590 |       "train_loss: 0.000169222466735\n",
591 |       "epoch  191\n",
592 |       "train_loss: 0.000169222467754\n",
593 |       "epoch  192\n",
594 |       "train_loss: 0.000169222463475\n",
595 |       "epoch  193\n",
596 |       "train_loss: 0.000169222464028\n",
597 |       "epoch  194\n",
598 |       "train_loss: 0.000169222461118\n",
599 |       "epoch  195\n",
600 |       "train_loss: 0.000169222463068\n",
601 |       "epoch  196\n",
602 |       "train_loss: 0.000169222463766\n",
603 |       "epoch  197\n",
604 |       "train_loss: 0.00016922246234\n",
605 |       "epoch  198\n",
606 |       "train_loss: 0.000169222458586\n",
607 |       "epoch  199\n",
608 |       "train_loss: 0.00016922246365\n",
609 |       "test_loss:  6.21756310719e-05  auc:  0.721355034757\n",
610 |       "\n"
611 |      ]
612 |     }
613 |    ],
614 |    "source": [
615 |     "user_count = len(user_id_mapping)\n",
616 |     "item_count = len(item_id_mapping)\n",
617 |     "\n",
618 |     "with tf.Graph().as_default(), tf.Session() as session:\n",
619 |     "    with tf.variable_scope('vbpr'):\n",
620 |     "        u, i, j, iv, jv, loss, auc, train_op = vbpr(user_count, item_count)\n",
621 |     "    \n",
622 |     "    session.run(tf.initialize_all_variables())\n",
623 |     "    \n",
624 |     "    for epoch in range(1, 200):\n",
625 |     "        print \"epoch \", epoch\n",
626 |     "        _loss_train = 0.0\n",
627 |     "        sample_count = 500\n",
628 |     "        batch_size = 4096\n",
629 |     "        for d, _iv, _jv in uniform_sample_batch(train_ratings, item_count, image_features,\n",
630 |     "                                                batch_size=batch_size, sample_count=sample_count):\n",
631 |     "            _loss, _ = session.run([loss, train_op], feed_dict={\n",
632 |     "                    u:d[:,0], i:d[:,1], j:d[:,2], iv:_iv, jv:_jv\n",
633 |     "                })\n",
634 |     "            _loss_train += _loss\n",
635 |     "        print \"train_loss:\", _loss_train/sample_count\n",
636 |     "        \n",
637 |     "        if epoch % 10 != 0:\n",
638 |     "            continue\n",
639 |     "        \n",
640 |     "        _auc_all = 0\n",
641 |     "        _loss_test = 0.0\n",
642 |     "        _test_user_count = len(test_ratings)\n",
643 |     "        for d, _iv, _jv in test_batch_generator_by_user(train_ratings, \n",
644 |     "                                                      test_ratings, item_count, image_features):\n",
645 |     "            _loss, _auc = session.run([loss, auc], feed_dict={\n",
646 |     "                    u:d[:,0], i:d[:,1], j:d[:,2], iv:_iv, jv:_jv\n",
647 |     "                })\n",
648 |     "            _loss_test += _loss\n",
649 |     "            _auc_all += _auc\n",
650 |     "        print \"test_loss: \", _loss_test/_test_user_count, \" auc: \", _auc_all/_test_user_count\n",
651 |     "        print \"\""
652 |    ]
653 |   }
654 |  ],
655 |  "metadata": {
656 |   "kernelspec": {
657 |    "display_name": "Python 2",
658 |    "language": "python",
659 |    "name": "python2"
660 |   },
661 |   "language_info": {
662 |    "codemirror_mode": {
663 |     "name": "ipython",
664 |     "version": 2
665 |    },
666 |    "file_extension": ".py",
667 |    "mimetype": "text/x-python",
668 |    "name": "python",
669 |    "nbconvert_exporter": "python",
670 |    "pygments_lexer": "ipython2",
671 |    "version": "2.7.11"
672 |   }
673 |  },
674 |  "nbformat": 4,
675 |  "nbformat_minor": 0
676 | }
677 | 


--------------------------------------------------------------------------------