├── BPRMF.py ├── MMMF.py ├── README.md ├── VBPR.py ├── Where to find the data ├── __init__.py ├── common.py ├── corpus.py ├── main.py └── model.py /BPRMF.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jul 18 13:14:30 2018 4 | 5 | @author: Shinelon 6 | """ 7 | from VBR2016 import model 8 | import numpy as np 9 | import time 10 | import copy 11 | 12 | class BPRMF(model.model): 13 | def __init__(self, corp, K, lambd, biasReg): 14 | super(BPRMF, self).__init__(corp) 15 | self.K = K 16 | self.lambd = lambd 17 | self.biasReg = biasReg 18 | self.beta_item = [] 19 | self.gamma_user = [[]] 20 | self.gamma_item = [[]] 21 | 22 | def init(self): 23 | self.NW = self.nItems + self.K*(self.nUsers + self.nItems) 24 | self.W = [0.0]*self.NW 25 | self.bestW = [0.0]*self.NW 26 | self.getParametersFromVectors(self.W, self.beta_item, self.gamma_user, self.gamma_item, 'INIT') 27 | return 28 | 29 | def cleanUp(self): 30 | self.getParametersFromVectors(self.W, self.beta_item, self.gamma_user, self.gamma_item, 'FREE') 31 | return 32 | 33 | def prediction(self, user, item): 34 | return self.beta_item[item] + np.dot(self.gamma_user[user], self.gamma_item[item]) 35 | 36 | def getParametersFromVectors(self, g, beta_item, gamma_user, gamma_item, action='on'): 37 | if action == 'FREE': 38 | self.gamma_user = [] 39 | self.gamma_item = [] 40 | return 41 | if action == 'INIT': 42 | self.beta_item = g[:self.nItems] 43 | self.gamma_user = np.random.random((self.nUsers, self.K)) 44 | self.gamma_item = np.random.random((self.nItems, self.K)) 45 | return 46 | self.beta_item = g[:self.nItems] 47 | g = np.array(g[self.nItems:]).reshape(self.nUsers+self.nItems, self.K) 48 | self.gamma_user = g[:self.nUsers] 49 | self.gamma_item = g[self.nUsers:] 50 | return 51 | 52 | def sampleUser(self): 53 | while True: 54 | user_id = np.random.randint(0, self.nUsers-1) 55 | if len(self.pos_per_user[user_id]) == 0 or len(self.pos_per_user[user_id]) == self.nItems: 56 | continue 57 | return user_id 58 | 59 | def train(self, iterations, learn_rate): 60 | self.tostring1() 61 | bestValidAUC = -1 62 | best_iter = 0 63 | for Iter in range(iterations): 64 | clock_t = time.time() 65 | self.oneIteration(learn_rate) 66 | print "Iter: %d, took %f"%(Iter, time.time()-clock_t) 67 | if Iter % 5 == 0: 68 | self.AUC() 69 | print "[Valid AUC = %f], Test AUC = %f, Test Std = %f\n"%(self.AUC_val, self.AUC_test, self.std) 70 | if bestValidAUC < self.AUC_val: 71 | bestValidAUC = self.AUC_val 72 | best_iter = Iter 73 | self.W = [] 74 | self.W.extend(self.beta_item) 75 | self.W.extend(self.gamma_user.reshape(1,self.nUsers*self.K).tolist()[0]) 76 | self.W.extend(self.gamma_item.reshape(1,self.nItems*self.K).tolist()[0]) 77 | self.copyBestModel() 78 | elif self.AUC_val < bestValidAUC and Iter > best_iter + 50: 79 | print "Overfitting!" 80 | break 81 | #self.W = copy.deepcopy(self.bestW) 82 | self.getParametersFromVectors(self.bestW, self.beta_item, self.gamma_user, self.gamma_item, action='on') 83 | self.AUC() 84 | self.tostring2() 85 | return 86 | 87 | def oneIteration(self, learn_rate): 88 | print "oneIteration..." 89 | userMatrix = [] 90 | for i in range(self.nUsers): 91 | userMatrix.append([]) 92 | for u in range(self.nUsers): 93 | for w in self.pos_per_user[u]: 94 | userMatrix[u].append(w) 95 | for i in range(self.num_pos_events): 96 | if i%200 == 0: 97 | print i 98 | user_id = self.sampleUser() 99 | if len(userMatrix[user_id]) == 0: 100 | for w in self.pos_per_user[user_id]: 101 | userMatrix[user_id].append(w) 102 | rand_num = np.random.randint(0, len(userMatrix[user_id])) 103 | pos_item_id = userMatrix[user_id][rand_num] 104 | userMatrix[user_id].remove(pos_item_id) 105 | while True: 106 | neg_item_id = np.random.randint(0, self.nItems-1) 107 | if not self.pos_per_user[user_id].has_key(neg_item_id): 108 | break 109 | self.updateFactors(user_id, pos_item_id, neg_item_id, learn_rate) 110 | print "one iteration end!" 111 | return 112 | 113 | def updateFactors(self, user_id, pos_item_id, neg_item_id, learn_rate): 114 | #print "updateFactors..." 115 | x_uij = self.beta_item[pos_item_id] - self.beta_item[neg_item_id] 116 | x_uij += np.dot(self.gamma_user[user_id], self.gamma_item[pos_item_id]) - np.dot(self.gamma_user[user_id], self.gamma_item[neg_item_id]) 117 | deri = 1.0/(1+np.exp(x_uij)) 118 | self.beta_item[pos_item_id] += learn_rate * (deri - self.biasReg * self.beta_item[pos_item_id]) 119 | self.beta_item[neg_item_id] += learn_rate * (-deri - self.biasReg * self.beta_item[neg_item_id]) 120 | for f in range(self.K): 121 | w_uf = self.gamma_user[user_id][f] 122 | h_if = self.gamma_item[pos_item_id][f] 123 | h_jf = self.gamma_item[neg_item_id][f] 124 | self.gamma_user[user_id][f] += learn_rate * ( deri * (h_if - h_jf) - self.lambd * w_uf) 125 | self.gamma_item[pos_item_id][f] += learn_rate * ( deri * w_uf - self.lambd * h_if) 126 | self.gamma_item[neg_item_id][f] += learn_rate * (-deri * w_uf - self.lambd / 10.0 * h_jf) 127 | return 128 | 129 | def tostring1(self): 130 | print "BPR-MF__K_%d_lambda_%.2f_biasReg_%.2f"%(self.K, self.lambd, self.biasReg) 131 | return 132 | 133 | def tostring2(self): 134 | print "<<< BPR-MF >>> Test AUC = %f, Test Std = %f\n"%(self.AUC_test, self.std) 135 | return 136 | -------------------------------------------------------------------------------- /MMMF.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jul 19 10:06:21 2018 4 | 5 | @author: Shinelon 6 | """ 7 | from VBR2016 import BPRMF 8 | import numpy as np 9 | 10 | class MMMF(BPRMF.BPRMF): 11 | def __init__(self, corp, K, lambd, biasReg): 12 | super(MMMF, self).__init__(corp, K, lambd, biasReg) 13 | 14 | def updataFactors(self, user_id, pos_item_id, neg_item_id, learn_rate): 15 | x_uij = self.beta_item[pos_item_id] - self.beta_item[neg_item_id] 16 | x_uij += np.dot(self.gamma_user[user_id], self.gamma_item[pos_item_id]) - np.dot(self.gamma_user[user_id], self.gamma_item[neg_item_id]) 17 | deri = 1.0/(1+np.exp(x_uij)) 18 | if x_uij < 0: 19 | deri = 1 20 | else: 21 | deri = 0 22 | self.beta_item[pos_item_id] += learn_rate * (deri - self.biasReg * self.beta_item[pos_item_id]) 23 | self.beta_item[neg_item_id] += learn_rate * (-deri - self.biasReg * self.beta_item[neg_item_id]) 24 | for f in range(self.K): 25 | w_uf = self.gamma_user[user_id][f] 26 | h_if = self.gamma_item[pos_item_id][f] 27 | h_jf = self.gamma_item[neg_item_id][f] 28 | self.gamma_user[user_id][f] += learn_rate * ( deri * (h_if - h_jf) - self.lambd * w_uf) 29 | self.gamma_item[pos_item_id][f] += learn_rate * ( deri * w_uf - self.lambd * h_if) 30 | self.gamma_item[neg_item_id][f] += learn_rate * (-deri * w_uf - self.lambd / 10.0 * h_jf) 31 | return 32 | 33 | def tostring1(self): 34 | print "MMMF__K_%d_lambda_%.2f_biasReg_%.2f"%(self.K, self.lambd, self.biasReg) 35 | return 36 | 37 | def tostring2(self): 38 | print "<<< MMMF >>> Test AUC = %f, Test Std = %f\n"%(self.AUC_test, self.std) 39 | return 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VBPR 2 | A pyhton implentation of VBPR 3 | refer to the paper: 4 | VBPR: Visual Bayesian Personalized Randking from Implicit Feedback by Ruining He Julian McAuley 5 | Copyright 2016, Association for the Advancement of Artificial Intelligence(www.aaai.org). All rights resevered. 6 | a test 7 | -------------------------------------------------------------------------------- /VBPR.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jul 19 10:42:39 2018 4 | 5 | @author: Shinelon 6 | """ 7 | from VBR2016 import BPRMF 8 | import numpy as np 9 | import random 10 | import time 11 | 12 | class VBPR(BPRMF.BPRMF): 13 | def __init__(self, corp, K, K2, lambd, lambd2, biasReg): 14 | super(VBPR, self).__init__(corp, K, lambd, biasReg) 15 | self.K2 = K2 16 | self.lambd2 = lambd2 17 | 18 | def init(self): 19 | self.NW = self.K * self.nUsers + (self.K + 1) * self.nItems + self.K2 * self.nUsers + self.K2 * self.corp.imageFeatureDim + self.corp.imageFeatureDim 20 | self.W = np.zeros((1, self.NW)) 21 | self.bestW = np.zeros((1, self.NW)) 22 | for i in range(self.nItems, self.NW-self.corp.imageFeatureDim): 23 | self.W[0][i] = random.random() 24 | self.getParametersFromVector(self.W, 'INIT') 25 | return 26 | 27 | def cleanUp(self): 28 | self.getParametersFromVector(self.W, 'FREE') 29 | self.W = [] 30 | self.bestW = [] 31 | return 32 | 33 | def getParametersFromVector(self, g, action): 34 | if action == 'FREE': 35 | self.gamma_user = [] 36 | self.gamma_item = [] 37 | self.theta_user = [] 38 | self.U = [] 39 | return 40 | if action == 'INIT': 41 | self.beta_item = np.zeros((1, self.nItems)).tolist()[0] 42 | self.gamma_user = np.ones((self.nUsers, self.K)).tolist() 43 | self.gamma_item = np.ones((self.nItems, self.K)).tolist() 44 | self.theta_user = np.ones((self.nUsers, self.K2)).tolist() 45 | self.U = np.ones((self.K2, self.corp.imageFeatureDim)).tolist() 46 | self.beta_cnn = np.zeros((1, self.corp.imageFeatureDim)).tolist()[0] 47 | self.theta_item = np.zeros((self.nItems, self.K2)).tolist() 48 | self.beta_item_visual = np.zeros((1, self.nItems)).tolist()[0] 49 | return 50 | 51 | def getVisualFactors(self): 52 | self.theta_item = np.zeros((self.nItems, self.K2)).tolist() 53 | self.beta_item_visual = np.zeros((1, self.nItems)).tolist()[0] 54 | for x in range(self.nItems): 55 | feat = self.corp.imageFeatures[x] 56 | for k in range(self.K2): 57 | for i in range(len(feat)): 58 | self.theta_item[x][k] += self.U[k][feat[i][0]]*feat[i][1] 59 | for i in range(len(feat)): 60 | self.beta_item_visual[x] += self.beta_cnn[feat[i][0]]*feat[i][1] 61 | return 62 | 63 | def prediction(self, user, item): 64 | return self.beta_item[item] + np.dot(self.gamma_user[user], self.gamma_item[item]) + np.dot(self.theta_item[item], self.theta_user[user]) + self.beta_item_visual[item] 65 | 66 | def train(self, iterations, learn_rate): 67 | self.tostring1() 68 | bestValidAUC = -1 69 | best_iter = 0 70 | for Iter in range(iterations): 71 | clock_t = time.time() 72 | self.oneIteration(learn_rate) 73 | print "Iter: %d, took %f"%(Iter, time.time()-clock_t) 74 | if Iter % 1 == 0: 75 | self.getVisualFactors() 76 | self.AUC() 77 | print "[Valid AUC = %f], Test AUC = %f, Test Std = %f\n"%(self.AUC_val, self.AUC_test, self.std) 78 | if bestValidAUC < self.AUC_val: 79 | bestValidAUC = self.AUC_val 80 | best_iter = Iter 81 | self.W = [] 82 | self.W.extend(self.beta_item) 83 | self.W.extend(self.gamma_user.reshape(1,self.nUsers*self.K).tolist()[0]) 84 | self.W.extend(self.gamma_item.reshape(1,self.nItems*self.K).tolist()[0]) 85 | self.copyBestModel() 86 | elif self.AUC_val < bestValidAUC and Iter > best_iter + 20: 87 | print "Overfitting!" 88 | break 89 | #self.W = copy.deepcopy(self.bestW) 90 | self.getParametersFromVectors(self.bestW, action='on') 91 | self.AUC() 92 | self.tostring2() 93 | return 94 | 95 | def updateFactors(self, user_id, pos_item_id, neg_item_id, learn_rate): 96 | #print "updataFactors..." 97 | diff = [] 98 | feat_i = self.corp.imageFeatures[pos_item_id] 99 | feat_j = self.corp.imageFeatures[neg_item_id] 100 | p_i = 0 101 | p_j = 0 102 | while p_i < len(feat_i) and p_j < len(feat_j): 103 | ind_i = int(feat_i[p_i][0]) 104 | ind_j = int(feat_j[p_j][0]) 105 | if ind_i < ind_j: 106 | diff.append((ind_i, feat_i[p_i][1])) 107 | p_i += 1 108 | elif ind_i > ind_j: 109 | diff.append((ind_j, -feat_j[p_j][1])) 110 | p_j += 1 111 | else: 112 | diff.append((ind_i, feat_i[p_i][1]-feat_j[p_j][1])) 113 | p_i += 1 114 | p_j += 1 115 | while p_i < len(feat_i): 116 | diff.append((int(feat_i[p_i][0]), feat_i[p_i][1])) 117 | p_i += 1 118 | while p_j < len(feat_j): 119 | diff.append((int(feat_j[p_j][0]), -feat_j[p_j][1])) 120 | p_j += 1 121 | for r in range(self.K2): 122 | self.theta_item[0][r] = 0 123 | for ind in range(len(diff)): 124 | c = diff[ind][0] 125 | self.theta_item[0][r] += self.U[r][c]*diff[ind][1] 126 | visual_bias = 0 127 | for ind in range(len(diff)): 128 | c = diff[ind][0] 129 | visual_bias += self.beta_cnn[c]*diff[ind][1] 130 | x_uij = self.beta_item[pos_item_id] - self.beta_item[neg_item_id] 131 | x_uij += np.dot(self.gamma_user[user_id], self.gamma_item[pos_item_id]) - np.dot(self.gamma_user[user_id], self.gamma_item[neg_item_id]) 132 | x_uij += np.dot(self.theta_user[user_id], self.theta_item[0]) 133 | x_uij += visual_bias 134 | deri = 1./(1+np.exp(x_uij)) 135 | self.beta_item[pos_item_id] += learn_rate * (deri - self.biasReg * self.beta_item[pos_item_id]) 136 | self.beta_item[neg_item_id] += learn_rate * (-deri - self.biasReg * self.beta_item[neg_item_id]) 137 | for f in range(self.K): 138 | w_uf = self.gamma_user[user_id][f] 139 | h_if = self.gamma_item[pos_item_id][f] 140 | h_jf = self.gamma_item[neg_item_id][f] 141 | self.gamma_user[user_id][f] += learn_rate * ( deri * (h_if - h_jf) - self.lambd * w_uf) 142 | self.gamma_item[pos_item_id][f] += learn_rate * ( deri * w_uf - self.lambd * h_if) 143 | self.gamma_item[neg_item_id][f] += learn_rate * (-deri * w_uf - self.lambd / 10.0 * h_jf) 144 | for f in range(self.K2): 145 | for ind in range(len(diff)): 146 | c = diff[ind][0] 147 | self.U[f][c] += learn_rate * (deri * self.theta_user[user_id][f] * diff[ind][1] - self.lambd2 * self.U[f][c]) 148 | self.theta_user[user_id][f] += learn_rate * (deri * self.theta_item[0][f] - self.lambd * self.theta_user[user_id][f]) 149 | for ind in range(len(diff)): 150 | c = diff[ind][0] 151 | self.beta_cnn[c] += learn_rate * (deri * diff[ind][1] - self.lambd2 * self.beta_cnn[c]) 152 | #print "updataFactors..." 153 | return 154 | 155 | def tostring1(self): 156 | print "VBPRF__K_%d_lambda_%.2f_biasReg_%.2f"%(self.K, self.lambd, self.biasReg) 157 | return 158 | 159 | def tostring2(self): 160 | print "<<< VBPRF >>> Test AUC = %f, Test Std = %f\n"%(self.AUC_test, self.std) 161 | return 162 | 163 | 164 | -------------------------------------------------------------------------------- /Where to find the data: -------------------------------------------------------------------------------- 1 | http://jmcauley.ucsd.edu/data/amazon/ is all you need! 2 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jul 16 14:15:48 2018 4 | 5 | @author: Shinelon 6 | """ 7 | 8 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jul 16 13:50:56 2018 4 | 5 | @author: Shinelon 6 | """ 7 | import numpy as np 8 | import codecs 9 | import os 10 | import pandas as pd 11 | import re 12 | 13 | class vote: 14 | def __init__(self): 15 | self.user = 0 16 | self.item = 0 17 | self.label = 0 18 | self.voteTime = 0 19 | 20 | def fopen(path='hehe.txt', mode='r'): 21 | head = "C:/Users/Shinelon/Desktop/VBR2016" 22 | m = os.path.join(head, path) 23 | m = m.replace('\\','/') 24 | f = codecs.open(m, mode) 25 | return f 26 | 27 | def inner(x, y): 28 | if not (len(x) == len(y)): 29 | print "inner error, size does not match" 30 | return np.dot(x, y) 31 | 32 | def square(x): 33 | return x*x 34 | 35 | def desquare(x): 36 | return 2*x 37 | 38 | def stringTrim(s): 39 | return s.strip() 40 | 41 | def sigmoid(x): 42 | return 1.0/(1.0 + np.exp(-x)) 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /corpus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | from VBR2016 import common 8 | import random 9 | import struct 10 | 11 | class corpus: 12 | def __init__(self): 13 | self.V = [] 14 | self.nUsers = 0 15 | self.nItems = 0 16 | self.nVotes = 0 17 | self.userIds = {} 18 | self.itemIds = {} 19 | self.ruserIds = {} 20 | self.ritemIds = {} 21 | self.imgAsins = {} 22 | self.uCounts = {} 23 | self.bCounts = {} 24 | self.imageFeatures = {} 25 | self.imageFeatureDim = 4096 26 | 27 | def loadData(self, voteFile, imgFeatPath, userMin, itemMin): 28 | self.nUsers = 0 29 | self.nItems = 0 30 | self.nVotes = 0 31 | self.imageFeatureDim = 4096 32 | 33 | self.loadVotes(imgFeatPath, voteFile, userMin, itemMin) 34 | self.loadImageFeatures(imgFeatPath) 35 | print "\n \"nUsers\": %d, \"nItems\": %d, \"nVotes\": %d\n"%(self.nUsers, self.nItems, self.nVotes) 36 | return 37 | 38 | def cleanUp(self): 39 | self.V = [] 40 | return 41 | 42 | def loadVotes(self, imgFeatPath, voteFile, userMin, itemMin): 43 | f = common.fopen(imgFeatPath, 'rb') 44 | print "pre-loading image asins from %s"%(imgFeatPath) 45 | #feat = [0.0]*self.imageFeatureDim 46 | while True: 47 | asin = f.read(10) 48 | #print asin 49 | if asin == '': 50 | break 51 | asin == asin.strip() 52 | self.imgAsins[asin] = 1 53 | feature = [] 54 | for i in range(4096): 55 | feature.append(struct.unpack('f', f.read(4))) 56 | f.close() 57 | 58 | print "Loading votes from %s, userMin = %d, itemMin = %d "%(voteFile, userMin, itemMin) 59 | self.voteMap = {} 60 | f1 = common.fopen(voteFile) 61 | count = 0 62 | for l in f1: 63 | l = l.strip() 64 | l = l.split(',') 65 | uName = l[0] 66 | bName = l[1] 67 | value = l[2] 68 | count += 1 69 | if count % 10000 == 0: 70 | print count 71 | if not self.imgAsins.has_key(bName): 72 | continue 73 | if not self.uCounts.has_key(uName): 74 | self.uCounts[uName] = 0 75 | if not self.bCounts.has_key(bName): 76 | self.bCounts[bName] = 0 77 | self.uCounts[uName] += 1 78 | self.bCounts[bName] += 1 79 | f1.close() 80 | self.nUsers = 0 81 | self.nItems = 0 82 | f2 = common.fopen(voteFile) 83 | count = 0 84 | for l in f2: 85 | l = l.strip() 86 | l = l.split(',') 87 | count += 1 88 | if count % 10000 == 0: 89 | print count 90 | uName, bName, value, voteTime = l[0], l[1], l[2], l[3] 91 | if not self.imgAsins.has_key(bName): 92 | continue 93 | if self.uCounts[uName] < userMin or self.bCounts[bName] < itemMin: 94 | continue 95 | if not self.itemIds.has_key(bName): 96 | self.ritemIds[self.nItems] = bName 97 | self.itemIds[bName] = self.nItems 98 | self.nItems += 1 99 | if not self.userIds.has_key(uName): 100 | self.ruserIds[self.nUsers] = uName 101 | self.userIds[uName] = self.nUsers 102 | self.nUsers += 1 103 | self.voteMap[(self.userIds[uName], self.itemIds[bName])] = voteTime 104 | f2.close() 105 | self.generateVotes() 106 | return 107 | 108 | def loadImageFeatures(self, imgFeatPath): 109 | f = common.fopen(imgFeatPath, 'rb') 110 | print "\nLoading imgFeatures from %s"%imgFeatPath 111 | ma = 58.388599 112 | while True: 113 | asin = f.read(10) 114 | if asin == '': 115 | break 116 | asin = asin.strip() 117 | feature = [] 118 | for i in range(4096): 119 | feature.append(struct.unpack('f', f.read(4))) 120 | if not self.itemIds.has_key(asin): 121 | continue 122 | for i in range(4096): 123 | feature[i] = feature[i] + (feature[i][0]/ma,) 124 | self.imageFeatures[self.itemIds[asin]] = feature 125 | f.close() 126 | return 127 | 128 | def generateVotes(self): 129 | print "\n Generating votes data: " 130 | for key in self.voteMap: 131 | v = common.vote() 132 | v.user = key[0] 133 | v.item = key[1] 134 | v.voteTime = self.voteMap[key] 135 | self.V.append(v) 136 | self.nVotes = len(self.V) 137 | random.shuffle(self.V) 138 | return -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jul 18 13:05:41 2018 4 | 5 | @author: Shinelon 6 | """ 7 | from VBR2016 import BPRMF, corpus, MMMF, VBPR 8 | corp = corpus.corpus() 9 | corp.loadData('ratings_Video_Games.csv','image_features_Video_Games.b',15,15) 10 | def goMMMF(corp, K, lambd, learn_rate, iterations, biasReg): 11 | x = MMMF.MMMF(corp, K, lambd, biasReg) 12 | x.init() 13 | x.train(iterations, learn_rate) 14 | x.saveModel('hehe.txt') 15 | return 16 | 17 | def goBPRMF(corp, K, lambd, learn_rate, iterations, biasReg): 18 | x = BPRMF.BPRMF(corp, K, lambd, biasReg) 19 | x.init() 20 | x.train(iterations, learn_rate) 21 | x.saveModel('hehe.txt') 22 | return 23 | 24 | def goVBPR(corp, K, K2, lambd, lambd2, biasReg, iterations, learn_rate): 25 | x = VBPR.VBPR(corp, K, K2, lambd, lambd2, biasReg) 26 | x.init() 27 | x.train(iterations, learn_rate) 28 | x.saveModel('hehe.txt') 29 | return 30 | 31 | goBPRMF(corp, 20, 10, 0.01, 20, 0.01) 32 | goVBPR(corp, 20, 20, 10, 10, 0.01, 5, 0.01) 33 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jul 16 19:26:20 2018 4 | 5 | @author: Shinelon 6 | """ 7 | from VBR2016 import common, corpus 8 | import numpy as np 9 | import copy 10 | 11 | class model(object): 12 | def __init__(self, corp): 13 | self.corp = corp 14 | self.nUsers = self.corp.nUsers 15 | self.nItems = self.corp.nItems 16 | self.nVotes = self.corp.nVotes 17 | self.test_per_user = [(-1, -1)]*self.nUsers 18 | self.val_per_user = [(-1, -1)]*self.nUsers 19 | self.pos_per_user = [] 20 | self.pos_per_item = [] 21 | for i in range(self.nUsers): 22 | self.pos_per_user.append({}) 23 | for i in range(self.nItems): 24 | self.pos_per_item.append({}) 25 | for i in range(self.nVotes): 26 | user = self.corp.V[i].user 27 | item = self.corp.V[i].item 28 | voteTime = self.corp.V[i].voteTime 29 | if self.test_per_user[user][0] == -1: 30 | self.test_per_user[user] = (item, voteTime) 31 | elif self.val_per_user[user][0] == -1: 32 | self.val_per_user[user] = (item, voteTime) 33 | else: 34 | self.pos_per_user[user][item] = voteTime 35 | self.pos_per_item[item][user] = voteTime 36 | 37 | self.num_pos_events = 0 38 | for i in range(self.nUsers): 39 | self.num_pos_events += len(self.pos_per_user[i]) 40 | #模型参数 41 | #self.NW = 0 42 | #self.W = [] 43 | #self.bestW = [] 44 | 45 | self.itemPrice = {} 46 | self.itemBrand = {} 47 | 48 | def AUC(self): 49 | AUC_u_val = [0]*self.nUsers 50 | AUC_u_test = [0]*self.nUsers 51 | for u in range(self.nUsers): 52 | item_test = self.test_per_user[u][0] 53 | item_val = self.val_per_user[u][0] 54 | x_u_test = self.Aprediction(u, item_test) 55 | x_u_val = self.Aprediction(u, item_val) 56 | count_test = 0 57 | count_val = 0 58 | maxnum = 0 59 | for j in range(self.nItems): 60 | if (not self.pos_per_user[u].has_key(j)) or item_test == j or item_val == j: 61 | continue 62 | maxnum += 1 63 | x_uj = self.Aprediction(u, j) 64 | if x_u_test > x_uj: 65 | count_test += 1 66 | if x_u_val > x_uj: 67 | count_val += 1 68 | try: 69 | AUC_u_val[u] = 1.0*count_val/maxnum 70 | AUC_u_test[u] = 1.0*count_test/maxnum 71 | except: 72 | print count_val, count_test, maxnum, x_u_test, x_u_val 73 | self.AUC_val = sum(AUC_u_val)/self.nUsers 74 | self.AUC_test = sum(AUC_u_test)/self.nUsers 75 | self.std = np.std(AUC_u_test) 76 | return 77 | 78 | def AUC_codeItem(self, AUC_test, std, num_user): 79 | return 80 | 81 | def copyBestModel(self): 82 | self.bestW = copy.deepcopy(self.W) 83 | return 84 | 85 | def saveModel(self, path): 86 | f = common.fopen(path, 'w') 87 | self.stringBestW = [str(w)+' ' for w in self.bestW] 88 | f.writelines(self.stringBestW) 89 | f.close() 90 | return 91 | 92 | def loadModel(self, path): 93 | f = common.fopen(path, 'r') 94 | self.stringBestW = '' 95 | for line in f.readlines(): 96 | self.stringBestW += line 97 | self.bestW = [int(w.strip()) for w in self.stringBestW.split()] 98 | f.close() 99 | return 100 | 101 | def toString(self): 102 | return "Empty Model!" 103 | 104 | def Aprediction(self, user, item): 105 | self.childPrediction = getattr(self, 'prediction') 106 | return self.childPrediction(user, item) 107 | --------------------------------------------------------------------------------