├── .gitignore ├── .idea ├── GraphicalModelForRecommendation.iml ├── encodings.xml ├── misc.xml ├── modules.xml ├── other.xml ├── scopes │ └── scope_settings.xml ├── testrunner.xml └── vcs.xml ├── GMRec.py ├── README.md ├── RecommenderContext.py ├── baseline ├── itemKNN.py ├── svd.py ├── svdplusplus.py └── userKNN.py ├── cf_ranking ├── AspectModel.py ├── AutoEncoderCTR.py ├── HMM.py ├── LDA.py ├── PLSA.py ├── PMF.py ├── Recommender.py └── __init__.py ├── cf_rating ├── BNPoissMF.py ├── BPMF.py ├── BPTF.py ├── BPoissMF.py ├── GPLSA.py ├── MMLvd.py ├── PMF.py ├── PoissonMF.py ├── __init__.py └── graphicalrecommender.py ├── config ├── BPMF.cfg ├── BPTF.cfg ├── PMF.cfg └── logging.cfg ├── data ├── DataModel.py ├── __init__.py ├── convertor │ ├── DataConvertor.py │ ├── DocumentDataConvertor.py │ ├── GeneralDataConvertor.py │ ├── TimeDataConvertor.py │ └── __init__.py ├── model │ └── __init__.py ├── sparsematrix.py ├── sparsetensor.py └── splitter │ ├── CrossValidationDataSplitter.py │ ├── DataSplitter.py │ ├── GenericDataSplitter.py │ ├── GivenNDataSplitter.py │ ├── RatioDataSplitter.py │ └── __init__.py ├── evaluator ├── __init__.py ├── pValue.py ├── ranking │ ├── PrecisionRecallF1TopN.py │ └── __init__.py └── rating │ ├── MAE.py │ ├── MSE.py │ ├── RMSE.py │ └── __init__.py ├── hybrid ├── CTR.py └── __init__.py ├── main.py └── util ├── AIC.py ├── LDA.py ├── NormalInvWishartDistribution.py ├── __init__.py ├── dateconvert.py ├── logger.py ├── normalization.py ├── normalwishartdistribution.py └── readconf.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | cf_rating/PWTempBPMF.py -------------------------------------------------------------------------------- /.idea/GraphicalModelForRecommendation.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/testrunner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /GMRec.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from util import logger 5 | from data.DataModel import DataModel 6 | from RecommenderContext import RecommenderContext 7 | 8 | from util.readconf import ReadConfig 9 | from cf_rating import PMF, BPMF, BPTF 10 | 11 | 12 | class GMRec: 13 | def __init__(self, config_file, algorithm_name): 14 | self.config_file = config_file 15 | self.algorithm_name = algorithm_name 16 | self.all_algorithms = { 17 | 'PMF': PMF, 'BPMF': BPMF, 'BPTF': BPTF 18 | } 19 | 20 | def _set_logger(self, config_handler): 21 | result_file = config_handler.get_parameter_string("Output", "logger") + "{0}_Result.log".format(self.algorithm_name) 22 | process_file = config_handler.get_parameter_string("Output", "logger") + "{0}_Process.log".format(self.algorithm_name) 23 | logger1 = {'Result': logger.Result(result_file), 'Process': logger.Process(process_file)} 24 | return logger1 25 | 26 | def run(self): 27 | config_handler = ReadConfig(self.config_file) 28 | loggerc = self._set_logger(config_handler) 29 | data_model = DataModel(config_handler) 30 | recommender_context = RecommenderContext(config_handler, data_model, loggerc) 31 | 32 | recommender_context.get_logger()['Process'].debug("\n" + "#"*50 + "Start" + '#'*50) 33 | recommender_context.get_logger()['Result'].debug("\n" + "#"*50 + "Start" + '#'*50) 34 | 35 | recommender_context.get_logger()['Process'].debug("Build data model") 36 | recommender_context.get_data_model().build_data_model() 37 | 38 | experiment_num = recommender_context.get_config().get_parameter_int("splitter", "experiment_num") 39 | for experiment_id in range(experiment_num): 40 | recommender_context.get_logger()['Process'].debug("The {0}th experiment.".format(experiment_id)) 41 | recommender_context.get_logger()['Result'].debug("The {0}th experiment.".format(experiment_id)) 42 | 43 | recommender_context.get_logger()['Process'].debug("Split dataset into train and test") 44 | save_path = recommender_context.get_config().get_parameter_string("splitter", "save_path") 45 | recommender_context.experiment_id = experiment_id 46 | recommender_context.get_data_model().get_data_splitter().split_data(save_path, experiment_id) 47 | 48 | recommender_context.get_logger()['Process'].debug("Enter into training ....") 49 | algorithm = self.all_algorithms[self.algorithm_name](recommender_context) 50 | algorithm.run() 51 | 52 | recommender_context.get_logger()['Process'].debug("\n" + "#"*50 + "Finish" + "#"*50) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraphicalModelForRecommendation 2 | Graphical Model for Recommender Systems 3 | 4 | 5 | # Factorization Machine 6 | web.media.mit.edu/~pernghwa/papers/poissonfm.pdf 7 | https://github.com/blei-lab/publications/blob/4a002eac1613c8ce4982d0dc7ae119029a42bde5/2015_CharlinRanganathMcInerneyBlei/README.md 8 | https://github.com/blei-lab 9 | Nonparametric Poisson Factorization Machine: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7373420&tag=1 10 | 11 | 12 | negative-binomial matrix factorization 13 | https://github.com/dawenl/nbmf/blob/master/code/nbmf.py 14 | 15 | 16 | Kalman Filter 17 | http://link.springer.com/article/10.1186/1687-1847-2012-172#Abs1 18 | 19 | 20 | https://www.cs.princeton.edu/~blei/topicmodeling.html 21 | 22 | Stochastic gradient descent 23 | https://en.wikipedia.org/wiki/Stochastic_gradient_descent -------------------------------------------------------------------------------- /RecommenderContext.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | class RecommenderContext: 6 | def __init__(self, config_handler, data_model, logger): 7 | self.config_handler = config_handler 8 | self.data_model = data_model 9 | self.logger = logger 10 | self.experiment_id = 0 11 | 12 | def get_config(self): 13 | return self.config_handler 14 | 15 | def get_data_model(self): 16 | return self.data_model 17 | 18 | def get_logger(self): 19 | return self.logger 20 | 21 | 22 | -------------------------------------------------------------------------------- /baseline/itemKNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/baseline/itemKNN.py -------------------------------------------------------------------------------- /baseline/svd.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | class SVD: 6 | def __init__(self): 7 | pass 8 | 9 | def build_model(self): 10 | pass -------------------------------------------------------------------------------- /baseline/svdplusplus.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | -------------------------------------------------------------------------------- /baseline/userKNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/baseline/userKNN.py -------------------------------------------------------------------------------- /cf_ranking/AspectModel.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import numpy as np 5 | from scipy.sparse import dok_matrix 6 | from Recommender import Recommender 7 | from util import normalize 8 | 9 | ''' 10 | Hofmann, 1999, Latent class models for collaborative filtering 11 | 12 | ''' 13 | 14 | class AspectModel(Recommender): 15 | def __init__(self, trainMatrix, testMatrix, configHandler): 16 | super.__init__(trainMatrix, testMatrix, configHandler) 17 | 18 | 19 | def initModel(self): 20 | '''''' 21 | self.numUsers, self.numItems = self.trainMatrix.shape() 22 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 23 | self.MAX_Iterations = int(self.configHandler.getParameter('AspectModel', 'MAX_Iterations')) 24 | self.numFactors = int(self.configHandler.getParameter('AspectModel', 'numFactors')) 25 | self.threshold = float(self.configHandler.getParameter('AspectModel', 'threshold')) 26 | 27 | self.X = np.random.uniform(0, 1, size=(self.numUsers, self.numFactors)) # P(x|z) 28 | self.X = normalize(self.X) 29 | 30 | self.Y = np.random.uniform(0, 1, size=(self.numItems, self.numFactors)) # P(y|z) 31 | self.Y = normalize(self.Y) 32 | 33 | self.Z = np.random.uniform(0, 1, size=self.numFactors) # P(z) 34 | self.Z = normalize(self.Z) 35 | 36 | self.Q = np.zeros((self.numUsers, self.numFactors, self.numItems)) # P(z|x,y) 37 | 38 | 39 | def buildModel(self): 40 | '''''' 41 | self.initModel() 42 | oldLikelihood = np.inf 43 | for iteration in range(self.MAX_Iterations): 44 | '''''' 45 | print 'Iteration {}'.format(iteration) 46 | self.eStep() # E-Step 47 | self.mStep() # M-Step 48 | likelihood = self.likelihood() 49 | 50 | if likelihood - oldLikelihood < self.threshold: 51 | break 52 | else: 53 | oldLikelihood = likelihood 54 | 55 | 56 | def eStep(self): 57 | '''''' 58 | self.Q = self.X[..., np.newaxis, ...] * self.Y[np.newaxis, ...] * self.Z[np.newaxis, np.newaxis, ...] 59 | self.Q = self.Q / np.sum(self.Q, axis=-1)[..., np.newaxis] 60 | 61 | 62 | def mStep(self): 63 | '''''' 64 | probability = self.Q * self.trainMatrix[..., np.newaxis] 65 | self.X = np.sum(probability, axis=1) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...] 66 | self.Y = np.sum(probability, axis=0) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...] 67 | self.Z = np.sum(np.sum(probability, axis=0), axis=0) / np.sum(probability) 68 | 69 | def likelihood(self): 70 | '''''' 71 | result = 0.00 72 | logX = np.log(self.X) 73 | logY = np.log(self.Y) 74 | logZ = np.log(self.Z) 75 | for user_id, item_id in self.trainMatrix.keys(): 76 | result += np.log(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :] + logZ)) 77 | return result / len(self.trainMatrix.keys()) 78 | 79 | def RegularizedLikelihood(self): 80 | '''''' 81 | result = 0.00 82 | logX = np.log(self.X) 83 | logY = np.log(self.Y) 84 | logZ = np.log(self.Z) 85 | logQ = np.log(self.Q) 86 | for user_id, item_id in self.trainMatrix.keys(): 87 | result += np.sum(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :] + logZ)) 88 | result += np.sum(self.Q[user_id, item_id, :] * logQ[user_id, item_id, :]) 89 | return result / len(self.trainMatrix.keys()) 90 | 91 | 92 | def ranking(self, user_id, item_id): 93 | '''''' 94 | return np.sum(self.X[user_id, :] * self.Y[item_id, :] * self.Z) 95 | 96 | -------------------------------------------------------------------------------- /cf_ranking/AutoEncoderCTR.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | https://github.com/wnzhang/deep-ctr 6 | ''' -------------------------------------------------------------------------------- /cf_ranking/HMM.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | class HMM(object): 6 | def __init__(self, state_num, item_num): 7 | pass -------------------------------------------------------------------------------- /cf_ranking/LDA.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from Recommender import Recommender 5 | 6 | class LatentDirichletAllocation(Recommender): 7 | ''' 8 | Latent Dirichlet Allocation 9 | ''' 10 | def __init__(self, trainMatrix, testMatrix, configHandler): 11 | super.__init__(trainMatrix, testMatrix, configHandler) 12 | 13 | def initModel(self): 14 | pass 15 | 16 | def buildModel(self): 17 | pass 18 | -------------------------------------------------------------------------------- /cf_ranking/PLSA.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from Recommender import Recommender 5 | import numpy as np 6 | from scipy.sparse import dok_matrix 7 | from util import normalize 8 | 9 | class PLSA(Recommender): 10 | def __init__(self, trainMatrix, testMatrix, configHandler): 11 | super.__init__(trainMatrix, testMatrix, configHandler) 12 | 13 | def initModel(self): 14 | self.numUsers, self.numItems = self.trainMatrix.shape() 15 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 16 | self.MAX_Iterations = int(self.configHandler.getParameter('PLSA', 'MAX_Iterations')) 17 | self.numFactors = int(self.configHandler.getParameter('PLSA', 'numFactors')) 18 | 19 | self.X = np.random.uniform(0, 1, size=(self.numUsers, self.numFactors)) # P(z|x) 20 | self.X = normalize(self.X) 21 | 22 | self.Y = np.random.uniform(0, 1, size=(self.numItems, self.numFactors)) # P(y|z) 23 | self.Y = normalize(self.Y) 24 | 25 | self.Q = np.zeros((self.numUsers, self.numFactors, self.numItems)) # P(y,z|x) 26 | 27 | def buildModel(self): 28 | '''''' 29 | self.initModel() 30 | oldLikelihood = np.inf 31 | for iteration in range(self.MAX_Iterations): 32 | '''''' 33 | print 'Iteration {}'.format(iteration) 34 | self.eStep() # E-Step 35 | self.mStep() # M-Step 36 | likelihood = self.likelihood() 37 | 38 | if likelihood - oldLikelihood < self.threshold: 39 | break 40 | else: 41 | oldLikelihood = likelihood 42 | 43 | 44 | def eStep(self): 45 | '''''' 46 | self.Q = self.X[..., np.newaxis, ...] * self.Y[np.newaxis, ...] 47 | self.Q = self.Q / np.sum(self.Q, axis=-1)[..., np.newaxis] 48 | 49 | 50 | def mStep(self): 51 | '''''' 52 | probability = self.Q * self.trainMatrix[..., np.newaxis] 53 | self.X = np.sum(probability, axis=1) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...] 54 | self.Y = np.sum(probability, axis=0) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...] 55 | 56 | def likelihood(self): 57 | '''''' 58 | result = 0.00 59 | logX = np.log(self.X) 60 | logY = np.log(self.Y) 61 | for user_id, item_id in self.trainMatrix.keys(): 62 | result += np.log(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :])) 63 | return result / len(self.trainMatrix.keys()) 64 | 65 | def RegularizedLikelihood(self): 66 | '''''' 67 | result = 0.00 68 | logX = np.log(self.X) 69 | logY = np.log(self.Y) 70 | logQ = np.log(self.Q) 71 | for user_id, item_id in self.trainMatrix.keys(): 72 | result += np.sum(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :])) 73 | result += np.sum(self.Q[user_id, item_id, :] * logQ[user_id, item_id, :]) 74 | return result / len(self.trainMatrix.keys()) 75 | 76 | 77 | def ranking(self, user_id, item_id): 78 | '''''' 79 | return np.sum(self.X[user_id, :] * self.Y[item_id, :] * self.Z) 80 | -------------------------------------------------------------------------------- /cf_ranking/PMF.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/cf_ranking/PMF.py -------------------------------------------------------------------------------- /cf_ranking/Recommender.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from scipy.sparse import dok_matrix 5 | 6 | class Recommender: 7 | def __init__(self, trainMatrix, testMatrix, configHandler): 8 | self.trainMatrix = trainMatrix 9 | self.testMatrix = testMatrix 10 | self.configHandler = configHandler 11 | 12 | def initModel(self): 13 | self.numUsers, self.numItems = self.trainMatrix.shape() 14 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 15 | self.MAX_Iterations = int(self.configHandle.getParameter('PMF', 'MAX_Iterations')) 16 | 17 | def buildModel(self): 18 | pass 19 | 20 | def predict(self): 21 | pass 22 | 23 | def evaluate(self): 24 | pass 25 | 26 | def execute(self): 27 | self.initModel() 28 | self.buildModel() 29 | self.evaluate() -------------------------------------------------------------------------------- /cf_ranking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/cf_ranking/__init__.py -------------------------------------------------------------------------------- /cf_rating/BNPoissMF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | Paper: Prem Gopalan, Francisco J.R. Ruiz, et al. Bayesian Non-parameter Poisson Factorization for Recommendation Systems 6 | github: https://github.com/premgopalan/hgaprec/tree/master/src 7 | 8 | Author: Haidong Zhang 9 | Date: April 16, 2016 10 | ''' 11 | 12 | from GraphicalRecommender import Recommender 13 | import numpy as np 14 | from numpy import log, sqrt 15 | from scipy.sparse import dok_matrix 16 | from util import normalize 17 | from util import Logger 18 | 19 | THRESHOLD = 1e-30 20 | 21 | class BNPoissMF(Recommender): 22 | def __init__(self, trainMatrix, testMatrix, configHandler): 23 | Recommender.__init__(trainMatrix, testMatrix, configHandler) 24 | self.logger = Logger('BNPoissMF.log') 25 | 26 | 27 | def initModel(self): 28 | ''' Read the model parameters, and get some common values. 29 | ''' 30 | self.numUsers, self.numItems = self.trainMatrix.shape() 31 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 32 | self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations')) 33 | self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors')) 34 | self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold')) 35 | 36 | # Get the Parameters 37 | self.user_alpha = float(self.configHandler.getParameter('BPoissMF', 'user_alpha')) 38 | self.user_c = float(self.configHandler.getParameter('BPoissMF', 'user_c')) 39 | 40 | self.item_a = float(self.configHandler.getParameter('BPoissMF', 'item_a')) 41 | self.item_b = float(self.configHandler.getParameter('BPoissMF', 'item_b')) 42 | 43 | # The model parameters for users 44 | self.gamma0 = np.zeros(self.numUsers) 45 | self.gamma1 = np.zeros(self.numUsers) 46 | self.s = np.zeros(self.numUsers) 47 | self.nu = np.zeros((self.numUsers, self.numFactors)) 48 | self.theta = np.zeros((self.numUsers, self.numFactors)) 49 | 50 | # The model parameters for stick proportions 51 | self.tau = np.zeros((self.numUsers, self.numFactors)) 52 | 53 | # The model parameters for item weights 54 | self.lambda0 = np.zeros((self.numItems, self.numFactors)) 55 | self.lambda1 = np.zeros((self.numItems, self.numFactors)) 56 | self.beta = np.zeros((self.numItems, self.numFactors)) 57 | 58 | self.z = np.zeros((self.numUsers, self.numItems)) 59 | 60 | self.pi = np.zeros((self.numUsers, self.numItems)) 61 | self.logPi = np.zeros((self.numUsers, self.numItems)) 62 | 63 | 64 | def buildModels(self): 65 | pass 66 | 67 | def initUserScalingParameters(self): 68 | ''' initial equations for the user scaling parameters gamma_u0 and gamma_u1 69 | 70 | ''' 71 | 72 | 73 | 74 | 75 | def initStickProportions(self): 76 | ''' The update equations for the stick proportions tau_uk can be obtained by taking the derivative of the objective function with respect to tau_uk 77 | 78 | ''' 79 | self.nu = 0.001 * np.random.random((self.numUsers, self.numFactors)) 80 | 81 | 82 | def computePi(self): 83 | ''' Equation (10) 84 | 85 | ''' 86 | 87 | 88 | 89 | def initItemWeights(self): 90 | pass 91 | 92 | def updateUserScalingParameters(self): 93 | pass 94 | 95 | def updateStickProportions(self): 96 | pass 97 | 98 | def updateItemWeights(self): 99 | pass 100 | 101 | def calculateConjugacy(self): 102 | pass 103 | 104 | def GammaPoisson(self): 105 | pass 106 | 107 | def solveQuadratic(self, a, b, c): 108 | ''' 109 | ''' 110 | s1 = (-b + sqrt(b*b - 4*a*c)) / (2*a) 111 | s2 = (-b - sqrt(b*b - 4*a*c)) / (2*a) 112 | 113 | if s1 > .0 and s1 <= 1.0 and s2 > .0 and s2 <= 1.0: 114 | self.logger.error('s1 %f and s2 %f are out of range in solve_quadratic()' % (s1, s2)) 115 | self.logger.error('a = %.5f, b = %.5f, c = %.5f\n' % (a, b, c)) 116 | 117 | if s1 < s2: 118 | return s1 + THRESHOLD 119 | else: 120 | return s2 + THRESHOLD 121 | 122 | if s1 > .0 and s1 <= 1.0: 123 | return s1 124 | 125 | if s2 > .0 and s1 <= 1.0: 126 | return s2 127 | 128 | if np.abs(s1 - .0) < THRESHOLD: 129 | return THRESHOLD 130 | 131 | if np.abs(1.0 - s1) < THRESHOLD: 132 | return 1.0 - THRESHOLD 133 | 134 | if np.abs(s2 - .0) < THRESHOLD: 135 | return THRESHOLD 136 | 137 | if np.abs(s2 - 1.0) < THRESHOLD: 138 | return 1.0 - THRESHOLD 139 | 140 | self.logger.error('WARNING: s1 %.10f and s2 %.10f are out of range in solve_quadratic()' % (s1, s2)) 141 | return s1 142 | 143 | if __name__ == '__main__': 144 | bnprec = BNPoissMF() -------------------------------------------------------------------------------- /cf_rating/BPMF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | """ 4 | Reference code: http://www.utstat.toronto.edu/~rsalakhu/BPMF.html 5 | Reference paper: Salakhutdinov and Mnih, Bayesian Probabilistic Matrix Factorization using Markov Chain Monte Carlo,* ICML 2008. 6 | """ 7 | 8 | import numpy as np 9 | import codecs 10 | from GraphicalRecommender import Recommender 11 | from util.NormalWishartDistribution import NormalWishartDistribution 12 | from scipy.sparse import dok_matrix 13 | 14 | 15 | class BayesianProbabilisticMatrixFactorization(Recommender): 16 | """ 17 | Bayesian Probabilistic Matrix Factorization. 18 | """ 19 | def __init__(self, recommender_context): 20 | Recommender.__init__(self, recommender_context) 21 | 22 | def _read_cfg(self): 23 | self.user_normal_dist_mu0_init = self.config_handler['Parameters', 'user_normal_dist_mu0', 'float'] 24 | self.user_normal_dist_beta0_init = self.config_handler['Parameters', 'user_normal_dist_beta0', 'float'] 25 | self.user_Wishart_dist_W0_init = self.config_handler['Parameters', 'user_Wishart_dist_W0', 'float'] 26 | 27 | self.item_normal_dist_mu0_init = self.config_handler['Parameters', 'item_normal_dist_mu0', 'float'] 28 | self.item_normal_dist_beta0_init = self.config_handler['Parameters', 'item_normal_dist_beta0', 'float'] 29 | self.item_Wishart_dist_W0_init = self.config_handler['Parameters', 'item_Wishart_dist_W0', 'float'] 30 | 31 | self.rating_sigma_init = self.config_handler['Parameters', 'rating_sigma', 'float'] 32 | 33 | def _init_model(self): 34 | self.user_num, self.item_num = self.train_matrix.shape 35 | self.mean_rating = np.mean(self.train_matrix.values()) 36 | 37 | self.predictions = dok_matrix((self.user_num, self.item_num)) 38 | 39 | if self.config_handler['Output', 'is_load', 'bool']: 40 | self._load_model() 41 | assert(self.user_factors.shape[1] == self.item_factors.shape[1]) 42 | self.factor_num = self.user_factors.shape[1] 43 | else: 44 | self._read_cfg() 45 | 46 | if self.config_handler['Parameters', 'is_init_path', 'bool']: 47 | self._load_init_model() 48 | else: 49 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int'] 50 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) 51 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) 52 | 53 | self.markov_num = 0 54 | validation_rmse, test_rmse = self.__evaluate_epoch__() 55 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(0, validation_rmse, test_rmse)) 56 | 57 | self.user_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.user_normal_dist_mu0_init 58 | self.user_normal_dist_beta0 = self.user_normal_dist_beta0_init 59 | self.user_Wishart_dist_W0 = np.eye(self.factor_num) * self.user_Wishart_dist_W0_init 60 | self.user_Wishart_dist_nu0 = self.factor_num 61 | 62 | self.item_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.item_normal_dist_mu0_init 63 | self.item_normal_dist_beta0 = self.item_normal_dist_beta0_init 64 | self.item_Wishart_dist_W0 = np.eye(self.factor_num) * self.item_Wishart_dist_W0_init 65 | self.item_Wishart_dist_nu0 = self.factor_num 66 | 67 | self.rating_sigma = self.rating_sigma_init 68 | 69 | def _build_model(self): 70 | user_train_matrix = dict() 71 | item_train_matrix = dict() 72 | for user_id, item_id in self.train_matrix.keys(): 73 | user_train_matrix.setdefault(user_id, dok_matrix((1, self.item_num))) 74 | user_train_matrix[user_id][0, item_id] = self.train_matrix.get((user_id, item_id)) 75 | item_train_matrix.setdefault(item_id, dok_matrix((1, self.user_num))) 76 | item_train_matrix[item_id][0, user_id] = self.train_matrix.get((user_id, item_id)) 77 | 78 | self.previous_loss = -np.inf 79 | max_iterations = self.config_handler['Parameters', 'max_iterations', 'int'] 80 | for iteration in range(max_iterations): 81 | self.logger['Process'].debug('Epoch {0}: update hyper-parameters'.format(iteration)) 82 | user_factors_mu, user_factors_variance = \ 83 | self._sampling_hyperparameters(self.user_factors, self.user_normal_dist_mu0, self.user_normal_dist_beta0, 84 | self.user_Wishart_dist_nu0, self.user_Wishart_dist_W0) 85 | item_factors_mu, item_factors_variance = \ 86 | self._sampling_hyperparameters(self.item_factors, self.item_normal_dist_mu0, self.item_normal_dist_beta0, 87 | self.item_Wishart_dist_nu0, self.item_Wishart_dist_W0) 88 | 89 | self.logger['Process'].debug('Epoch {0}: update latent factors'.format(iteration)) 90 | for gibbs_iteration in range(2): 91 | for user_id in range(self.user_num): 92 | user_ratings = user_train_matrix[user_id] if user_id in user_train_matrix else dict() 93 | if len(user_ratings.keys()) == 0: 94 | continue 95 | self.user_factors[user_id] = self._update_parameters( 96 | self.item_factors, user_ratings, user_factors_mu, user_factors_variance) 97 | 98 | for item_id in range(self.item_num): 99 | item_ratings = item_train_matrix[item_id] if item_id in item_train_matrix else dict() 100 | if len(item_ratings.keys()) == 0: 101 | continue 102 | self.item_factors[item_id] = self._update_parameters( 103 | self.user_factors, item_ratings, item_factors_mu, item_factors_variance) 104 | 105 | validation_rmse, test_rmse = self.__evaluate_epoch__() 106 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(iteration, validation_rmse, test_rmse)) 107 | 108 | def __evaluate_epoch__(self): 109 | validation_rmse = 0.0 110 | for user_id, item_id in self.train_matrix.keys(): 111 | real_rating = self.train_matrix.get((user_id, item_id)) 112 | predict_rating = self._predict(user_id, item_id) 113 | validation_rmse += (real_rating - predict_rating) ** 2 114 | self._recommend() 115 | results = self._evaluate() 116 | return np.sqrt(validation_rmse/len(self.train_matrix.keys())), results['RMSE'] 117 | 118 | def _sampling_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0): 119 | num_N = factors.shape[0] 120 | mean_U = np.mean(factors, axis=0) 121 | variance_S = np.cov(factors.transpose(), bias=1) 122 | mu0_minus_factors = normal_dist_mu0 - mean_U 123 | mu0_minus_factors = np.reshape(mu0_minus_factors, (mu0_minus_factors.shape[0], 1)) 124 | 125 | W0 = np.linalg.inv(Wishart_dist_W0) + num_N * variance_S \ 126 | + normal_dist_beta0 * num_N / (normal_dist_beta0 + num_N) * np.dot(mu0_minus_factors, mu0_minus_factors.transpose()) 127 | W0_post = np.linalg.inv(W0) 128 | W0_post = (W0_post + W0_post.transpose()) / 2 129 | 130 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + num_N * mean_U) / (normal_dist_beta0 + num_N) 131 | beta_post = (normal_dist_beta0 + num_N) 132 | nu_post = Wishart_dist_nu0 + num_N 133 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post) 134 | mu, sigma = normal_Wishart_distribution.sample() 135 | return mu, sigma 136 | 137 | def _update_parameters(self, factors, ratings, factors_mu, factors_variance): 138 | index = np.array([col_id for row_id, col_id in ratings.keys()]) 139 | VVT = np.dot(factors[index, :].transpose(), factors[index, :]) 140 | sigma = factors_variance + self.rating_sigma * VVT 141 | sigma_inv = np.linalg.inv(sigma) 142 | 143 | rating_values = np.array(ratings.values()) - self.mean_rating 144 | VR = np.dot(factors[index, :].transpose(), np.reshape(rating_values, newshape=(rating_values.shape[0], 1))) 145 | mu_right = self.rating_sigma * VR + np.dot(factors_variance, np.reshape(factors_mu, newshape=(factors_mu.shape[0], 1))) 146 | mu = np.dot(sigma_inv, mu_right) 147 | mu = np.reshape(mu, newshape=(mu.shape[0], )) 148 | return np.random.multivariate_normal(mu, sigma_inv) 149 | 150 | def _recommend(self): 151 | for user_id, item_id in self.test_matrix.keys(): 152 | predict_rating = self._predict(user_id, item_id) + self.predictions[user_id, item_id] * self.markov_num 153 | self.predictions[user_id, item_id] = predict_rating / (self.markov_num + 1) 154 | self.markov_num += 1 155 | 156 | def _predict(self, user_id, item_id, time_id=0): 157 | predict_rating = np.dot(self.user_factors[user_id, :], self.item_factors[item_id, :]) + self.mean_rating 158 | if predict_rating > 5: 159 | return 5 160 | elif predict_rating < 1: 161 | return 1 162 | else: 163 | return predict_rating 164 | 165 | def _load_init_model(self): 166 | load_path = self.config_handler["Output", "load_path", "string"] 167 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id) 168 | 169 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp: 170 | for line in read_fp: 171 | if line.startswith('factor_num'): 172 | self.factor_num = int(line.split(':')[1].strip()) 173 | elif line.startswith('user_factor'): 174 | self.user_factors = self._load_matrix(read_fp) 175 | elif line.startswith('item_factor'): 176 | self.item_factors = self._load_matrix(read_fp) 177 | 178 | def _save_result(self, result): 179 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num)) 180 | 181 | self.logger['Result'].debug('user_normal_dist_mu0: {0}'.format(self.user_normal_dist_mu0_init)) 182 | self.logger['Result'].debug('user_normal_dist_beta0: {0}'.format(self.user_normal_dist_beta0_init)) 183 | self.logger['Result'].debug('user_Wishart_dist_W0: {0}'.format(self.user_Wishart_dist_W0_init)) 184 | 185 | self.logger['Result'].debug('item_normal_dist_mu0: {0}'.format(self.item_normal_dist_mu0_init)) 186 | self.logger['Result'].debug('item_normal_dist_beta0: {0}'.format(self.item_normal_dist_beta0_init)) 187 | self.logger['Result'].debug('item_Wishart_dist_W0: {0}'.format(self.item_Wishart_dist_W0_init)) 188 | 189 | Recommender._save_result(self, result) 190 | 191 | def _save_model(self): 192 | save_path = self.config_handler["Output", "save_path", "string"] 193 | save_file = save_path + "BPMF_{0}.txt".format(self.recommender_context.experiment_id) 194 | 195 | with codecs.open(save_file, mode='w', encoding='utf-8') as write_fp: 196 | write_fp.write('factor_num: {0}\n'.format(self.factor_num)) 197 | write_fp.write('user_normal_dist_mu0: {0}\n'.format(self.user_normal_dist_mu0_init)) 198 | write_fp.write('user_normal_dist_beta0: {0}\n'.format(self.user_normal_dist_beta0_init)) 199 | write_fp.write('user_Wishart_dist_W0: {0}\n'.format(self.user_Wishart_dist_W0_init)) 200 | write_fp.write('item_normal_dist_mu0: {0}\n'.format(self.item_normal_dist_mu0_init)) 201 | write_fp.write('item_normal_dist_beta0: {0}\n'.format(self.item_normal_dist_beta0_init)) 202 | write_fp.write('item_Wishart_dist_W0: {0}\n'.format(self.item_Wishart_dist_W0_init)) 203 | 204 | write_fp.write('user_factors \n') 205 | self._save_matrix(self.user_factors, write_fp) 206 | 207 | write_fp.write('item_factors \n') 208 | self._save_matrix(self.item_factors, write_fp) 209 | 210 | def _load_model(self): 211 | load_path = self.config_handler["Output", "load_path", "string"] 212 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id) 213 | 214 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp: 215 | for line in read_fp: 216 | if line.startswith('factor_num'): 217 | self.factor_num = int(line.split(':')[1].strip()) 218 | elif line.startswith('user_normal_dist_mu0'): 219 | self.user_normal_dist_mu0_init = float(line.split(':')[1].strip()) 220 | elif line.startswith('user_normal_dist_beta0'): 221 | self.user_normal_dist_beta0_init = float(line.split(':')[1].strip()) 222 | elif line.startswith('user_Wishart_dist_W0'): 223 | self.user_Wishart_dist_W0_init = float(line.split(':')[1].strip()) 224 | elif line.startswith('item_normal_dist_mu0'): 225 | self.item_normal_dist_mu0_init = float(line.split(':')[1].strip()) 226 | elif line.startswith('item_normal_dist_beta0'): 227 | self.item_normal_dist_beta0_init = float(line.split(':')[1].strip()) 228 | elif line.startswith('item_Wishart_dist_W0'): 229 | self.item_Wishart_dist_W0_init = float(line.split(':')[1].strip()) 230 | elif line.startswith('user_factor'): 231 | self.user_factors = self._load_matrix(read_fp) 232 | elif line.startswith('item_factor'): 233 | self.item_factors = self._load_matrix(read_fp) -------------------------------------------------------------------------------- /cf_rating/BPTF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | """ 5 | Reference Paper: Liang Xiong et al. 6 | Temporal Collaborative Filtering with Bayesian Probabilistic Tensor Factorization, 7 | Reference Code: https://www.cs.cmu.edu/~lxiong/bptf/bptf.html 8 | """ 9 | 10 | import numpy as np 11 | import codecs 12 | from scipy.sparse import dok_matrix 13 | from GraphicalRecommender import Recommender 14 | from util.NormalWishartDistribution import NormalWishartDistribution 15 | 16 | 17 | class BayesianProbabilisticTensorFactorization(Recommender): 18 | def __init__(self, config_handler): 19 | Recommender.__init__(self, config_handler) 20 | 21 | def _read_cfg(self): 22 | 23 | self.user_normal_dist_mu0_init = self.config_handler['Parameters', 'user_normal_dist_mu0', 'float'] 24 | self.user_normal_dist_beta0_init = self.config_handler['Parameters', 'user_normal_dist_beta0', 'float'] 25 | self.user_Wishart_dist_W0_init = self.config_handler['Parameters', 'user_Wishart_dist_W0', 'float'] 26 | 27 | self.item_normal_dist_mu0_init = self.config_handler['Parameters', 'item_normal_dist_mu0', 'float'] 28 | self.item_normal_dist_beta0_init = self.config_handler['Parameters', 'item_normal_dist_beta0', 'float'] 29 | self.item_Wishart_dist_W0_init = self.config_handler['Parameters', 'item_Wishart_dist_W0', 'float'] 30 | 31 | self.time_normal_dist_mu0_init = self.config_handler['Parameters', 'time_normal_dist_mu0', 'float'] 32 | self.time_normal_dist_beta0_init = self.config_handler['Parameters', 'time_normal_dist_beta0', 'float'] 33 | self.time_Wishart_dist_W0_init = self.config_handler['Parameters', 'time_Wishart_dist_W0', 'float'] 34 | 35 | self.rating_sigma_init = self.config_handler['Parameters', 'rating_sigma', 'float'] 36 | 37 | def _init_model(self): 38 | self.user_num, self.item_num, self.time_num = self.train_tensor.shape() 39 | self.mean_rating = np.mean(self.train_tensor.values()) 40 | 41 | self.predictions = dok_matrix((self.user_num, self.item_num, self.time_num)) 42 | 43 | if self.config_handler['Parameters', 'is_load', 'bool']: 44 | self._load_model() 45 | assert(self.user_factors.shape[1] == self.item_factors.shape[1] and self.item_factors.shape[1] == self.time_factors.shape[1]) 46 | self.factor_num = self.user_factors.shape[1] 47 | else: 48 | self._read_cfg() 49 | 50 | # initialize the latent factors of user, item and time. 51 | if self.config_handler['Parameters', 'is_init_path', 'bool']: 52 | self._load_init_model() 53 | else: 54 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int'] 55 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) 56 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) 57 | self.time_factors = np.random.normal(0, 1, size=(self.time_num, self.factor_num)) 58 | 59 | self.markov_num = 0 60 | validation_rmse, test_rmse = self.__evaluate_epoch__() 61 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(0, validation_rmse, test_rmse)) 62 | 63 | # get the user parameters 64 | self.user_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.user_normal_dist_mu0_init 65 | self.user_normal_dist_beta0 = self.user_normal_dist_beta0_init 66 | self.user_Wishart_dist_W0 = np.eye(self.factor_num) * self.user_Wishart_dist_W0_init 67 | self.user_Wishart_dist_nu0 = self.factor_num 68 | 69 | # get the item parameters 70 | self.item_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.item_normal_dist_mu0_init 71 | self.item_normal_dist_beta0 = self.item_normal_dist_beta0_init 72 | self.item_Wishart_dist_W0 = np.eye(self.factor_num) * self.item_Wishart_dist_W0_init 73 | self.item_Wishart_dist_nu0 = self.factor_num 74 | 75 | # get the time parameters 76 | self.time_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.time_normal_dist_mu0_init 77 | self.time_normal_dist_beta0 = self.time_normal_dist_beta0_init 78 | self.time_Wishart_dist_W0 = np.eye(self.factor_num) * self.time_Wishart_dist_W0_init 79 | self.time_Wishart_dist_nu0 = self.factor_num 80 | 81 | self.rating_sigma = self.rating_sigma_init 82 | 83 | def _build_model(self): 84 | 85 | # Speed up the process of gibbs sampling 86 | train_matrix_by_user, train_matrix_by_item, train_matrix_by_time = dict(), dict(), dict() 87 | for user_id, item_id, time_id in self.train_tensor.keys(): 88 | train_matrix_by_user.setdefault(user_id, dok_matrix((self.item_num, self.time_num))) 89 | train_matrix_by_user[user_id][item_id, time_id] = self.train_tensor[user_id, item_id, time_id] 90 | 91 | train_matrix_by_item.setdefault(item_id, dok_matrix((self.user_num, self.time_num))) 92 | train_matrix_by_item[item_id][user_id, time_id] = self.train_tensor[user_id, item_id, time_id] 93 | 94 | train_matrix_by_time.setdefault(time_id, dok_matrix((self.user_num, self.item_num))) 95 | train_matrix_by_time[time_id][user_id, item_id] = self.train_tensor[user_id, item_id, time_id] 96 | 97 | max_iterations = self.config_handler['Parameters', 'max_iterations', 'int'] 98 | for iteration in range(max_iterations): 99 | user_factors_mu, user_factors_variance = \ 100 | self._sampling_hyperparameters(self.user_factors, self.user_normal_dist_mu0, self.user_normal_dist_beta0, 101 | self.user_Wishart_dist_nu0, self.user_Wishart_dist_W0) 102 | item_factors_mu, item_factors_variance = \ 103 | self._sampling_hyperparameters(self.item_factors, self.item_normal_dist_mu0, self.item_normal_dist_beta0, 104 | self.item_Wishart_dist_nu0, self.item_Wishart_dist_W0) 105 | 106 | time_factors_mu, time_factors_variance = \ 107 | self._sampling_time_hyperparameters(self.time_factors, self.time_normal_dist_mu0, self.time_normal_dist_beta0, 108 | self.time_Wishart_dist_nu0, self.time_Wishart_dist_W0) 109 | 110 | for gibbs_iteration in range(2): 111 | for user_id in range(self.user_num): 112 | item_time_matrix = train_matrix_by_user[user_id] 113 | if len(item_time_matrix.keys()) < 1: 114 | continue 115 | self.user_factors[user_id] = self._update_parameters( 116 | self.item_factors, self.time_factors, item_time_matrix, user_factors_mu, user_factors_variance) 117 | 118 | for item_id in range(self.item_num): 119 | user_time_matrix = train_matrix_by_item[item_id] 120 | if len(user_time_matrix.keys()) < 1: 121 | continue 122 | self.item_factors[item_id] = self._update_parameters( 123 | self.user_factors, self.time_factors, user_time_matrix, item_factors_mu, item_factors_variance) 124 | 125 | for time_id in range(self.time_num): 126 | user_item_matrix = train_matrix_by_time[time_id] 127 | if len(user_item_matrix.keys()) < 1: 128 | continue 129 | self.time_factors[time_id] = self._update_time_parameters( 130 | self.user_factors, self.item_factors, self.time_factors, user_item_matrix, time_factors_mu, time_factors_variance, time_id) 131 | 132 | validation_rmse, test_rmse = self.__evaluate_epoch__() 133 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(iteration, validation_rmse, test_rmse)) 134 | 135 | def run(self): 136 | self.logger['Process'].debug('Get the train dataset') 137 | self.train_tensor = self.recommender_context.get_data_model().get_data_splitter().get_train_data() 138 | self.logger['Result'].debug('The number of user-item pair in train dataset is {0}'.format(len(self.train_tensor.keys()))) 139 | 140 | self.logger['Process'].debug('Get the test dataset') 141 | self.test_tensor = self.recommender_context.get_data_model().get_data_splitter().get_test_data() 142 | self.logger['Result'].debug('The number of user-item pair in test dataset is {0}'.format(len(self.test_tensor.keys()))) 143 | 144 | self.logger['Process'].debug('Initialize the model parameters') 145 | self._init_model() 146 | 147 | self.logger['Process'].debug('Building model....') 148 | self._build_model() 149 | 150 | is_save = self.config_handler['Output', 'is_save', 'bool'] 151 | if is_save: 152 | self.logger['Process'].debug('Save model ....') 153 | self._save_model() 154 | 155 | self.logger['Process'].debug('Recommending ...') 156 | self._recommend() 157 | 158 | self.logger['Process'].debug('Evaluating ...') 159 | result = self._evaluate() 160 | self._save_result(result) 161 | 162 | self.logger['Process'].debug("Finish.") 163 | self.logger['Process'].debug("#"*50) 164 | 165 | def __evaluate_epoch__(self): 166 | validation_rmse = 0.0 167 | for user_id, item_id, time_id in self.train_tensor.keys(): 168 | real_rating = self.train_tensor.get((user_id, item_id, time_id)) 169 | predict_rating = self._predict(user_id, item_id, time_id) 170 | validation_rmse += (real_rating - predict_rating) ** 2 171 | self._recommend() 172 | results = self._evaluate() 173 | return np.sqrt(validation_rmse/len(self.train_tensor.keys())), results['RMSE'] 174 | 175 | def _recommend(self): 176 | for user_id, item_id, time_id in self.test_tensor.keys(): 177 | predict_rating = self._predict(user_id, item_id, time_id) + self.predictions[user_id, item_id, time_id] * self.markov_num 178 | self.predictions[user_id, item_id, time_id] = predict_rating / (self.markov_num + 1) 179 | self.markov_num += 1 180 | 181 | # Update hyper-parameters of user or item 182 | def _sampling_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0): 183 | num_N = factors.shape[0] 184 | mean_U = np.mean(factors, axis=0) 185 | variance_S = np.cov(factors.transpose(), bias=1) 186 | mu0_minus_factors = normal_dist_mu0 - mean_U 187 | mu0_minus_factors = np.reshape(mu0_minus_factors, (mu0_minus_factors.shape[0], 1)) 188 | 189 | W0 = np.linalg.inv(Wishart_dist_W0) + num_N * variance_S \ 190 | + normal_dist_beta0 * num_N / (normal_dist_beta0 + num_N) * np.dot(mu0_minus_factors, mu0_minus_factors.transpose()) 191 | W0_post = np.linalg.inv(W0) 192 | W0_post = (W0_post + W0_post.transpose()) / 2 193 | 194 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + num_N * mean_U) / (normal_dist_beta0 + num_N) 195 | beta_post = (normal_dist_beta0 + num_N) 196 | nu_post = Wishart_dist_nu0 + num_N 197 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post) 198 | mu, sigma = normal_Wishart_distribution.sample() 199 | return mu, sigma 200 | 201 | # Update time hyper-parameters 202 | def _sampling_time_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0): 203 | num_K = factors.shape[0] 204 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + factors[0, :]) / (1.0 + normal_dist_beta0) 205 | beta_post = normal_dist_beta0 + 1.0 206 | nu_post = Wishart_dist_nu0 + num_K 207 | X = np.array([factors[t, :] - factors[t-1, :] for t in range(1, num_K)]) 208 | variance_S = np.dot(X.transpose(), X) 209 | 210 | mu0_minus_factors = factors[0, :] - normal_dist_mu0 211 | mu0_minus_factors = np.reshape(mu0_minus_factors, newshape=(mu0_minus_factors.shape[0], 1)) 212 | W0_post = np.linalg.inv(Wishart_dist_W0) + variance_S + normal_dist_beta0 / (1.0 + normal_dist_beta0) * np.dot((mu0_minus_factors, mu0_minus_factors.transpose())) 213 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post) 214 | mu, sigma = normal_Wishart_distribution.sample() 215 | return mu, sigma 216 | 217 | def _update_parameters(self, factors0, factors1, ratings, factors_mu, factors_variance): 218 | """ 219 | :param factors0: 220 | :param factors1: 221 | :param ratings: 222 | :param factors_mu: 223 | :param factors_variance: 224 | :return: 225 | """ 226 | index = ratings.keys() 227 | 228 | QQ = 0 229 | RQ = 0 230 | for dim0, dim1 in index: 231 | Q = factors0[dim0, :] * factors1[dim1, :] 232 | QQ += np.mat(Q).transpose() * np.mat(Q) 233 | RQ += (ratings[dim0, dim1] - self.mean_rating) * Q 234 | sigma_inv = np.linalg.inv(factors_variance + self.rating_sigma * QQ) 235 | mu = sigma_inv * (np.dot(factors_variance, np.reshape(factors_mu, newshape=(factors_mu.shape[0], 1))) + self.rating_sigma * RQ) 236 | return np.random.multivariate_normal(mu, sigma_inv) 237 | 238 | def _update_time_parameters(self, user_factors, item_factors, time_factors, ratings, factors_mu, factors_variance, time_id): 239 | index = ratings.keys() 240 | QQ, RQ = 0.0, 0.0 241 | for dim0, dim1 in index: 242 | Q = user_factors[dim0, :] * item_factors[dim1, :] 243 | QQ += np.mat(Q).transpose() * np.mat(Q) 244 | RQ += (ratings[dim0, dim1] - self.mean_rating) * Q 245 | 246 | RQ = np.reshape(RQ, newshape=(RQ.shape[0], 1)) 247 | if time_id == 0: 248 | mu = (time_factors[1, :] + factors_mu) / 2 249 | sigma_inv = np.linalg.inv(2 * factors_variance + self.rating_sigma * QQ) 250 | elif time_id == self.time_num-1: 251 | sigma_inv = np.linalg.inv(factors_variance + self.rating_sigma * QQ) 252 | Tk_1 = np.reshape(time_factors[self.time_num-2, :], newshape=(time_factors.shape[1], 1)) 253 | mu = sigma_inv * (np.dot(factors_variance, Tk_1) + self.rating_sigma * RQ) 254 | else: 255 | sigma_inv = np.linalg.inv(2 * factors_variance + self.rating_sigma * QQ) 256 | Tk = time_factors[time_id-1, :] + time_factors[time_id+1, :] 257 | mu = sigma_inv * (np.dot(factors_variance, np.reshape(Tk, newshape=(Tk.shape[0], 1))) + self.rating_sigma * RQ) 258 | 259 | return np.random.multivariate_normal(mu, sigma_inv) 260 | 261 | def _predict(self, user_id, item_id, time_id=0): 262 | assert(time_id < self.time_num) 263 | predict_rating = np.sum(self.user_factors[user_id, :] * self.item_factors[item_id, :] * self.time_factors[time_id, :]) + self.mean_rating 264 | if predict_rating > 5: 265 | return 5 266 | elif predict_rating < 1: 267 | return 1 268 | else: 269 | return predict_rating 270 | 271 | def _save_result(self, result): 272 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num)) 273 | 274 | self.logger['Result'].debug('user_normal_dist_mu0: {0}'.format(self.user_normal_dist_mu0_init)) 275 | self.logger['Result'].debug('user_normal_dist_beta0: {0}'.format(self.user_normal_dist_beta0_init)) 276 | self.logger['Result'].debug('user_Wishart_dist_W0: {0}'.format(self.user_Wishart_dist_W0_init)) 277 | 278 | self.logger['Result'].debug('item_normal_dist_mu0: {0}'.format(self.item_normal_dist_mu0_init)) 279 | self.logger['Result'].debug('item_normal_dist_beta0: {0}'.format(self.item_normal_dist_beta0_init)) 280 | self.logger['Result'].debug('item_Wishart_dist_W0: {0}'.format(self.item_Wishart_dist_W0_init)) 281 | 282 | self.logger['Result'].debug('time_normal_dist_mu0: {0}'.format(self.time_normal_dist_mu0_init)) 283 | self.logger['Result'].debug('time_normal_dist_beta0: {0}'.format(self.time_normal_dist_beta0_init)) 284 | self.logger['Result'].debug('time_Wishart_dist_W0: {0}'.format(self.time_Wishart_dist_W0_init)) 285 | 286 | self.logger['Result'].debug('rating_sigma: {0}'.format(self.rating_sigma_init)) 287 | Recommender._save_result(self, result) 288 | 289 | def _load_init_model(self): 290 | load_path = self.config_handler["Output", "load_path", "string"] 291 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id) 292 | 293 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp: 294 | for line in read_fp: 295 | if line.startswith('factor_num'): 296 | self.factor_num = int(line.split(':')[1].strip()) 297 | elif line.startswith('user_factor'): 298 | self.user_factors = self._load_matrix(read_fp) 299 | elif line.startswith('item_factor'): 300 | self.item_factors = self._load_matrix(read_fp) -------------------------------------------------------------------------------- /cf_rating/BPoissMF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | Paper: Prem Gopalan, et al. Scalable Recommendation with Poisson Factorization. 6 | Github: https://github.com/mertterzihan/PMF/blob/master/Code/PoissonFactorization.py#L47 7 | http://www.hongliangjie.com/2015/08/17/poisson-matrix-factorization/ 8 | 9 | Author: Haidong Zhang 10 | Date: April 24, 2016 11 | ''' 12 | 13 | from GraphicalRecommender import Recommender 14 | import numpy as np 15 | from scipy.sparse import dok_matrix 16 | from util import normalize 17 | from random import shuffle 18 | from itertools import product 19 | from scipy.stats import poisson 20 | from scipy.stats import gamma as gammafun 21 | import sys 22 | 23 | def gammaRnd(shape, scale, size=None): 24 | result = np.random.gamma(shape, scale, size) 25 | return result 26 | 27 | def poissonRnd(scale, size=None): 28 | result = np.random.poisson(scale, size) 29 | return result 30 | 31 | class BPoissMF(Recommender): 32 | def __init__(self, trainMatrix, testMatrix, configHandler): 33 | Recommender.__init__(trainMatrix, testMatrix, configHandler) 34 | 35 | def initModel(self): 36 | self.numUsers, self.numItems = self.trainMatrix.shape() 37 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 38 | self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations')) 39 | self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors')) 40 | self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold')) 41 | 42 | # Get the Parameters 43 | self.a = float(self.configHandler.getParameter('BPoissMF', 'a')) 44 | self.ap = float(self.configHandler.getParameter('BPoissMF', 'ap')) 45 | self.bp = float(self.configHandler.getParameter('BPoissMF', 'bp')) 46 | 47 | self.c = float(self.configHandler.getParameter('BPoissMF', 'c')) 48 | self.cp = float(self.configHandler.getParameter('BPoissMF', 'cp')) 49 | self.dp = float(self.configHandler.getParameter('BPoissMF', 'dp')) 50 | 51 | # Init xi 52 | self.xi = gammaRnd(self.ap, self.ap/self.bp, size=self.numUsers) 53 | # Init theta 54 | self.theta = np.zeros((self.numUsers, self.numFactors)) 55 | for i in range(self.numUsers): 56 | self.theta[i, :] = gammaRnd(self.a, self.xi[i]) 57 | 58 | # Init eta 59 | self.eta = gammaRnd(self.cp, self.cp/self.dp, size=self.numItems) 60 | #Init beta 61 | self.beta = np.zeros((self.numItems, self.numFactors)) 62 | for i in range(self.numItems): 63 | self.beta[i, :] = gammaRnd(self.c, self.eta[i]) 64 | 65 | # Init z 66 | self.zs = np.zeros((self.numUsers, self.numItems, self.numFactors)) 67 | for user_id, item_id in self.trainMatrix.keys(): 68 | p = self.theta[user_id, :] * self.beta[item_id, :] 69 | p /= np.sum(p) 70 | self.zs[user_id, item_id, :] = np.random.multinomial(self.trainMatrix[user_id, item_id], p) 71 | 72 | def sample(self, ): 73 | '''''' 74 | self.loglikelihood = [] 75 | for curr_iter in xrange(self.MAX_Iterations): 76 | 'Gibbs Sampling.' 77 | randUsers = range(self.numUsers) 78 | randTopics = range(self.numFactors) 79 | randItems = range(self.numItems) 80 | 81 | # Sample theta 82 | shuffle(randUsers) 83 | for user_id in randUsers: 84 | shuffle(randTopics) 85 | for topic_id in randTopics: 86 | self.theta[user_id, topic_id] = gammaRnd(self.a + np.sum(self.zs[user_id, :, topic_id]), 87 | self.xi[user_id] + np.sum(self.beta[:, topic_id])) 88 | 89 | # Sample beta 90 | shuffle(randItems) 91 | for item_id in randItems: 92 | shuffle(randTopics) 93 | for topic_id in randTopics: 94 | self.beta[item_id, topic_id] = gammaRnd(self.c + np.sum(self.zs[:, item_id, topic_id]), 95 | self.eta[item_id] + np.sum(self.theta[:, topic_id])) 96 | 97 | # Sample xi 98 | shuffle(randUsers) 99 | for user_id in randUsers: 100 | self.xi[user_id] = gammaRnd(self.ap + self.numFactors*self.a, self.b + self.theta[user_id, :].sum()) 101 | 102 | # Sample eta 103 | shuffle(randItems) 104 | for item_id in randItems: 105 | self.eta[item_id] = gammaRnd(self.cp + self.numFactors*self.c, self.d + self.beta[item_id, :].sum()) 106 | 107 | # Sample zs 108 | nonzeros = self.trainMatrix.keys() 109 | randNonZeros = shuffle(len(nonzeros)) 110 | for pair_id in randNonZeros: 111 | user_id, item_id = nonzeros[pair_id] 112 | p = self.theta[user_id, :] * self.beta[item_id, :] 113 | p /= p.sum() 114 | self.zs[user_id, item_id, :] = np.random.multinomial(self.trainMatrix[user_id, item_id], p) 115 | 116 | 117 | if __name__ == '__main__': 118 | bnprec = BPoissMF() -------------------------------------------------------------------------------- /cf_rating/GPLSA.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | Gausian Probabilistic Latent Semantic Analysis 6 | ''' 7 | 8 | from GraphicalRecommender import Recommender 9 | 10 | class GPLSA(Recommender): 11 | '''''' 12 | def __init__(self, trainMatrix, testMatrix, configHandler): 13 | super.__init__(trainMatrix, testMatrix, configHandler) 14 | 15 | def initModel(self): 16 | '''''' 17 | 18 | 19 | def buildModel(self): 20 | '''''' 21 | 22 | def eStep(self): 23 | '''''' 24 | 25 | def mStep(self): 26 | '''''' 27 | 28 | def predict(self, user_id, item_id): 29 | '''''' 30 | -------------------------------------------------------------------------------- /cf_rating/MMLvd.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/evn python 2 | #coding:utf-8 3 | 4 | from GraphicalRecommender import Recommender 5 | from scipy.sparse import dok_matrix 6 | import numpy as np 7 | from numpy import newaxis 8 | 9 | class MMLvd(Recommender): 10 | def __init__(self, trainMatrix, testMatrix, configHandler): 11 | super.__init__(trainMatrix, testMatrix, configHandler) 12 | 13 | 14 | def initModel(self): 15 | self.numUsers, self.numItems = self.trainMatrix.shape() 16 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 17 | self.MAX_Iterations = int(self.configHandler.getParameter('BPMF', 'MAX_Iterations')) 18 | self.numFactors = int(self.configHandler.getParameter('BPMF', 'numFactors')) 19 | 20 | self.beta0 = float(self.configHandler.getParameter('BPMF', 'beta0')) 21 | self.nu0 = float(self.configHandler.getParameter('BPMF', 'nu0')) 22 | self.wh0 = np.eye(self.numFactors) 23 | 24 | self.learnRate = float(self.configHandler.getParameter('BPMF', 'learning_rate')) 25 | self.regU = float(self.configHandler.getParameter('BPMF', 'regU')) 26 | self.regI = float(self.configHandler.getParameter('BPMF', 'regI')) 27 | 28 | self.P = np.random.normal(0, 1, size=(self.numUsers, self.numFactors)) 29 | self.Q = np.random.normal(0, 1, size=(self.numItems, self.numFactors)) 30 | 31 | self.alpha = 2 32 | self.alpha_k = self.alpha/self.numFactors 33 | 34 | self.numRatings = 5 35 | 36 | self.theta = np.random.dirichlet(np.array([self.alpha_k for i in range(self.numFactors)])) 37 | self.gamma = np.zeros((self.numUsers, self.numFactors, self.numItems)) 38 | 39 | self.sigma = np.random.normal(0, 1, size = self.numRatings) 40 | self.omega = np.random.normal(0, 1, size = self.numUsers) 41 | 42 | self.mu_vd = 1.0 / (1.0 + np.exp(-(self.omega[newaxis, ...] + self.sigma[..., newaxis]))) 43 | 44 | self.xi = 10.0 45 | self.nu = 10.0 46 | self.phi = 2.0 47 | 48 | def buildModel(self): 49 | pass 50 | 51 | def EStep(self): 52 | gamma_nkd = np.zeros((self.numUsers, self.numFactors, self.numItems)) 53 | beta_vkd = np.zeros((self.numRatings, self.numFactors, self.numItems)) 54 | 55 | for u in range(self.numUsers): 56 | for d in range(self.numItems): 57 | rating = self.trainMatrix.get((u, d)) 58 | if rating == 0: 59 | gamma_nkd[u, :, d] = (beta_vkd * (1 - self.mu_vd)[..., newaxis, ...]).sum(axis=0) 60 | else: 61 | gamma_nkd[u, :, d] = (beta_vkd[rating, :, d] * self.mu_vd[rating, d]) 62 | 63 | qn_k = np.zeros((self.numUsers, self.numFactors)) 64 | qn_kvd = np.zeros((self.numUsers, self.numFactors, self.numRatings, self.numItems)) 65 | qn_vd = np.zeros((self.numUsers, self.numRatings, self.numItems)) 66 | 67 | qn_k = np.exp(np.log(self.theta) + gamma_nkd.sum(axis=-1) - np.log(self.theta * np.exp( np.log(gamma_nkd).sum(axis=-1)))) 68 | for u in range(self.numUsers): 69 | for d in range(self.numItems): 70 | rating = self.trainMatrix.get((u, d)) 71 | if rating == 0: 72 | qn_kvd[u, :, rating, d] = qn_k[u, :] * (1 - self.mu_vd) * beta_vkd[rating, :, d] / ((1 - self.mu_vd) * beta_vkd[:, d, :]).sum(axis=0) 73 | else: 74 | qn_kvd[u, :, rating, d] = qn_k[u, :] 75 | 76 | qn_vd = qn_kvd.sum(axis=1) 77 | 78 | 79 | 80 | 81 | def MStep(self, qn_k, qn_kvd, qn_vd): 82 | self.theta = (self.alpha_k - 1 + qn_k.sum(axis=0)) / (self.numUsers - self.numFactors + self.alpha_k * self.numFactors) 83 | C_vdk = np.zeros((self.numRatings, self.numItems, self.numFactors)) 84 | 85 | for u in range(self.numUsers): 86 | for d in range(self.numItems): 87 | rating = self.trainMatrix.get((u, d)) 88 | for r in range(self.numRatings): 89 | if r == rating: 90 | C_vdk[r, d, :] += qn_k 91 | else: 92 | C_vdk[r, d, :] += qn_kvd[:, r, d] 93 | 94 | beta_vdk = np.zeros((self.numRatings, self.numItems, self.numFactors)) 95 | beta_vdk = (self.phi - 1 + C_vdk) / (qn_k.sum(axis=0) - self.numRatings + self.phi * self.numRatings) 96 | 97 | self.sigma = self.sigma - self.learnRate * self.mu_vd * (1 - self.mu_vd) - self.sigma / self.xi 98 | self.omega = self.omega - self.learnRate * self.mu_vd * (1 - self.mu_vd) - self.omega / self.nu 99 | 100 | self.mu_vd = 1.0 / (1.0 + np.exp(-(self.omega[newaxis, ...] + self.sigma[..., newaxis]))) 101 | 102 | def predict(self, u, i): 103 | return np.argmax(self.mu_vd[:, i])+1 -------------------------------------------------------------------------------- /cf_rating/PMF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | """ 5 | Reference code: http://www.utstat.toronto.edu/~rsalakhu/code_BPMF/pmf.m 6 | Reference paper: https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf 7 | momentum: https://en.wikipedia.org/wiki/Stochastic_gradient_descent 8 | """ 9 | 10 | import codecs 11 | import numpy as np 12 | from scipy.sparse import dok_matrix 13 | from random import shuffle 14 | from GraphicalRecommender import Recommender 15 | 16 | 17 | class ProbabilisticMatrixFactorization(Recommender): 18 | def __init__(self, recommender_context): 19 | Recommender.__init__(self, recommender_context) 20 | 21 | def _init_model(self): 22 | self.user_num, self.item_num = self.train_matrix.shape 23 | self.rating_mean = np.mean(self.train_matrix.values()) 24 | self.predictions = dok_matrix((self.user_num, self.item_num)) 25 | 26 | if self.config_handler['Output', 'is_load', 'bool']: 27 | self._load_model() 28 | assert(self.user_factors.shape[1] == self.item_factors.shape[1]) 29 | self.factor_num = self.user_factors.shape[1] 30 | else: 31 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int'] 32 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) * 0.1 33 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) * 0.1 34 | 35 | # Other Parameters 36 | self.learn_rate = self.config_handler['Parameters', 'learn_rate', 'float'] 37 | self.momentum = self.config_handler['Parameters', 'momentum', 'float'] 38 | self.user_lambda = self.config_handler['Parameters', 'user_lambda', 'float'] 39 | self.item_lambda = self.config_handler['Parameters', 'item_lambda', 'float'] 40 | 41 | # Momentum for update factors 42 | self.user_factors_inc = np.zeros((self.user_num, self.factor_num)) 43 | self.item_factors_inc = np.zeros((self.item_num, self.factor_num)) 44 | 45 | def _build_model(self): 46 | 47 | user_item_keys = self.train_matrix.keys() 48 | users = np.array([user_id for user_id, item_id in user_item_keys]) 49 | items = np.array([item_id for user_id, item_id in user_item_keys]) 50 | ratings = np.array(self.train_matrix.values()) 51 | 52 | # get the index of user_item_keys for stostic 53 | index = np.arange(len(user_item_keys)) 54 | batch_size = self.config_handler.get_parameter_int('Parameters', 'batch_size') 55 | batch_num = int(float(len(index)) / batch_size) 56 | 57 | # building model 58 | losses = list() 59 | max_iterations = self.config_handler.get_parameter_int('Parameters', 'max_iterations') 60 | for iteration in range(max_iterations): 61 | shuffle(index) 62 | 63 | for batch_id in range(batch_num): 64 | batch_index = index[batch_id*batch_size:(batch_id+1)*batch_size] 65 | batch_users = users[batch_index] 66 | batch_items = items[batch_index] 67 | batch_ratings = ratings[batch_index] - self.rating_mean 68 | batch_user_factors = self.user_factors[batch_users, :] 69 | batch_item_factors = self.item_factors[batch_items, :] 70 | 71 | # Compute Prediction 72 | batch_predictions = np.sum(batch_user_factors * batch_item_factors, axis=-1) 73 | batch_error = batch_predictions - batch_ratings 74 | # batch_loss = np.sum(batch_error, batch_error) 75 | # batch_loss += 0.5 * self.user_lambda * np.sum(np.dot(batch_user_factors, batch_user_factors)) 76 | # batch_loss += 0.5 * self.item_lambda * np.sum(np.dot(batch_item_factors, batch_item_factors)) 77 | 78 | # Compute Gradient 79 | batch_user_delta = \ 80 | batch_error[..., np.newaxis] * batch_item_factors + self.user_lambda * batch_user_factors 81 | batch_item_delta = \ 82 | batch_error[..., np.newaxis] * batch_user_factors + self.item_lambda * batch_item_factors 83 | 84 | user_delta = np.zeros((self.user_num, self.factor_num)) 85 | item_delta = np.zeros((self.item_num, self.factor_num)) 86 | for i in range(batch_size): 87 | user_delta[batch_users[i], :] += batch_user_delta[i, :] 88 | item_delta[batch_items[i], :] += batch_item_delta[i, :] 89 | 90 | # Update Parameters 91 | self.user_factors_inc = \ 92 | self.momentum * self.user_factors_inc + self.learn_rate * user_delta 93 | self.user_factors -= self.user_factors_inc 94 | 95 | self.item_factors_inc = \ 96 | self.momentum * self.item_factors_inc + self.learn_rate * item_delta 97 | self.item_factors -= self.item_factors_inc 98 | 99 | batch_predictions = \ 100 | np.sum(self.user_factors[batch_users, :] * self.item_factors[batch_items, :], axis=-1) 101 | batch_error = batch_predictions - batch_ratings 102 | batch_loss = np.dot(batch_error, batch_error) 103 | # batch_loss += 0.5 * self.user_lambda * np.sum( 104 | # self.user_factors[batch_users, :] * self.user_factors[batch_users, :]) 105 | # batch_loss += 0.5 * self.item_lambda * np.sum( 106 | # self.item_factors[batch_items, :] * self.item_factors[batch_items, :]) 107 | losses.append(batch_loss / batch_size) 108 | self._recommend() 109 | result = self._evaluate() 110 | self.logger['Process'].debug("Epoch {0} batch {1}: Training RMSE - {2}, Testing RMSE - {3}".format( 111 | iteration, batch_id, losses[-1], result['RMSE'])) 112 | 113 | def _save_result(self, result): 114 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num)) 115 | self.logger['Result'].debug('learn_rate: {0}'.format(self.learn_rate)) 116 | self.logger['Result'].debug('user_lambda: {0}'.format(self.user_lambda)) 117 | self.logger['Result'].debug('item_lambda: {0}'.format(self.item_lambda)) 118 | self.logger['Result'].debug('momentum: {0}'.format(self.momentum)) 119 | Recommender._save_result(self, result) 120 | 121 | def _predict(self, user_id, item_id, time_id=0): 122 | predict_rating = np.dot(self.user_factors[user_id, :], self.item_factors[item_id, :]) + self.rating_mean 123 | if predict_rating > 5: 124 | return 5 125 | elif predict_rating < 1: 126 | return 1 127 | else: 128 | return predict_rating 129 | 130 | def _save_model(self): 131 | save_path = self.config_handler.get_parameter_string("Output", "save_path") 132 | save_file = save_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id) 133 | 134 | with codecs.open(save_file, mode='w', encoding='utf-8') as write_fp: 135 | write_fp.write('factor_num: {0}\n'.format(self.factor_num)) 136 | write_fp.write('learn_rate: {0}\n'.format(self.learn_rate)) 137 | write_fp.write('user_lambda: {0}\n'.format(self.user_lambda)) 138 | write_fp.write('item_lambda: {0}\n'.format(self.item_lambda)) 139 | write_fp.write('momentum: {0}\n'.format(self.momentum)) 140 | write_fp.write('user_factors \n') 141 | self._save_matrix(self.user_factors, write_fp) 142 | 143 | write_fp.write('item_factors \n') 144 | self._save_matrix(self.item_factors, write_fp) 145 | 146 | def _load_model(self): 147 | load_path = self.config_handler.get_parameter_string("Output", "load_path") 148 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id) 149 | 150 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp: 151 | for line in read_fp: 152 | if line.startswith('factor_num'): 153 | self.factor_num = int(line.split(':')[1].strip()) 154 | elif line.startswith('learn_rate'): 155 | self.learn_rate = float(line.split(':')[1].strip()) 156 | elif line.startswith('user_lambda'): 157 | self.user_lambda = float(line.split(':')[1].strip()) 158 | elif line.startswith('item_lambda'): 159 | self.item_lambda = float(line.split(':')[1].strip()) 160 | elif line.startswith('momentum'): 161 | self.momentum = float(line.split(':')[1].strip()) 162 | elif line.startswith('user_factor'): 163 | self.user_factors = self._load_matrix(read_fp) 164 | elif line.startswith('item_factor'): 165 | self.item_factors = self._load_matrix(read_fp) -------------------------------------------------------------------------------- /cf_rating/PoissonMF.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | ''' 6 | https://github.com/dawenl/stochastic_PMF/blob/master/code/pmf.py 7 | ''' 8 | 9 | 10 | import numpy as np 11 | from scipy import special 12 | from sklearn.base import BaseEstimator, TransformerMixin 13 | 14 | class PoissonMF(BaseEstimator, TransformerMixin): 15 | ''' Poisson Matrix Factorization with batch inference''' 16 | 17 | def __init__(self, n_components=100, max_iter=100, tol=0.0005, smoothness=100, random_state=None, verbose=False, **kwargs): 18 | ''' Poisson Matrix Factorization 19 | Augument: 20 | n_component: int 21 | number of latent components 22 | 23 | max_iter: int 24 | maximal number of iterations to perform 25 | 26 | tol: float 27 | the threshold on the increase of the objective to stop the iteration 28 | 29 | smoothness: int 30 | smoothness on the initialization variantional parameters 31 | 32 | random_state: int 33 | Pseudo random number generator used for sampling. 34 | 35 | **kwargs: dict 36 | Model hyperparameters 37 | ''' 38 | self.n_components = n_components 39 | self.max_iter = max_iter 40 | self.tol = tol 41 | self.smoothness = smoothness 42 | self.random_state = random_state 43 | self.verbose = verbose 44 | if type(random_state) is int: 45 | np.random.seed(self.random_state) 46 | elif self.random_state is not None: 47 | np.random.setstate(self.random_state) 48 | 49 | self._parse_args(**kwargs) 50 | 51 | def _parse_args(self, **kwargs): 52 | self.a = float(kwargs.get('a', 0.1)) 53 | self.b = float(kwargs.get('b', 0.1)) 54 | 55 | def _init_component(self, n_feats): 56 | # variational parameters for beta 57 | self.gamma_b = self.smoothness * np.random.gamma(self.smoothness, 1./self.smoothness, size=(self.n_components, n_feats)) 58 | self.rho_b = self.smoothness * np.random.gamma(self.smoothness, 1./self.smoothness, size=(self.n_components, n_feats)) 59 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b) 60 | 61 | def set_component(self, shape, rate): 62 | '''Set the latent components from variational parameters. 63 | Parameters 64 | ---------- 65 | shape : numpy-array, shape (n_components, n_feats) 66 | Shape parameters for the variational distribution 67 | rate : numpy-array, shape (n_components, n_feats) 68 | Rate parameters for the variational distribution 69 | Returns 70 | ------- 71 | self : object 72 | Return the instance itself. 73 | 74 | ''' 75 | self.gamma_b, self.rho_b = shape, rate 76 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b) 77 | return self 78 | 79 | def _init_weights(self, n_samples): 80 | # variational parameters for theta 81 | self.gamma_t = self.smoothness \ 82 | * np.random.gamma(self.smoothness, 1. / self.smoothness, 83 | size=(n_samples, self.n_components)) 84 | self.rho_t = self.smoothness \ 85 | * np.random.gamma(self.smoothness, 1. / self.smoothness, 86 | size=(n_samples, self.n_components)) 87 | self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t) 88 | self.c = 1. / np.mean(self.Et) 89 | 90 | def fit(self, X): 91 | '''Fit the model to the data in X. 92 | Parameters 93 | ---------- 94 | X : array-like, shape (n_samples, n_feats) 95 | Training data. 96 | Returns 97 | ------- 98 | self: object 99 | Returns the instance itself. 100 | ''' 101 | n_samples, n_feats = X.shape 102 | self._init_components(n_feats) 103 | self._init_weights(n_samples) 104 | self._update(X) 105 | return self 106 | 107 | def transform(self, X, attr=None): 108 | '''Encode the data as a linear combination of the latent components. 109 | Parameters 110 | ---------- 111 | X : array-like, shape (n_samples, n_feats) 112 | attr: string 113 | The name of attribute, default 'Eb'. Can be changed to Elogb to 114 | obtain E_q[log beta] as transformed data. 115 | Returns 116 | ------- 117 | X_new : array-like, shape(n_samples, n_filters) 118 | Transformed data, as specified by attr. 119 | ''' 120 | 121 | if not hasattr(self, 'Eb'): 122 | raise ValueError('There are no pre-trained components.') 123 | n_samples, n_feats = X.shape 124 | if n_feats != self.Eb.shape[1]: 125 | raise ValueError('The dimension of the transformed data ' 126 | 'does not match with the existing components.') 127 | if attr is None: 128 | attr = 'Et' 129 | self._init_weights(n_samples) 130 | self._update(X, update_beta=False) 131 | return getattr(self, attr) 132 | 133 | def _update_theta(self, X): 134 | ratio = X / self._xexplog() 135 | self.gamma_t = self.a + np.exp(self.Elogt) * np.dot( 136 | ratio, np.exp(self.Elogb).T) 137 | self.rho_t = self.a * self.c + np.sum(self.Eb, axis=1) 138 | self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t) 139 | self.c = 1. / np.mean(self.Et) 140 | 141 | def _update_beta(self, X): 142 | ratio = X / self._xexplog() 143 | self.gamma_b = self.b + np.exp(self.Elogb) * np.dot( 144 | np.exp(self.Elogt).T, ratio) 145 | self.rho_b = self.b + np.sum(self.Et, axis=0, keepdims=True).T 146 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b) 147 | 148 | def _xexplog(self): 149 | ''' 150 | sum_k exp(E[log theta_{ik} * beta_{kd}]) 151 | ''' 152 | return np.dot(np.exp(self.Elogt), np.exp(self.Elogb)) 153 | 154 | 155 | class OnlinePoissonMF(PoissonMF): 156 | ''' Poisson matrix factorization with stochastic inference''' 157 | def __init__(self, n_components=100, batch_size=10, n_pass=10, max_iter=100, tol=0.0005, shuffle=True, smoothness=100, random_state=None, verbose=False, **kwargs): 158 | ''' Poisson matrix factorization 159 | Argument 160 | -------------- 161 | n_components : int 162 | Number of latent components 163 | 164 | batch_size : int 165 | The size of mini-batch 166 | 167 | n_pass : int 168 | The number of passes through the entire data 169 | 170 | max_iter : int 171 | Maximal number of iterations to perform for a single mini-batch 172 | 173 | tol: float 174 | The threshold on the increase of the objective to stop the iteration 175 | 176 | shuffle: bool 177 | whether to shuffle the data or not 178 | 179 | smoothness : int 180 | smoothness on the initialization variational parameters 181 | 182 | random_state: int or randomstate 183 | Pseudo random number generator used for sampling 184 | 185 | verbose : bool 186 | whether to show progress during model fitting 187 | 188 | ** kwargs : dict 189 | Model hyperparameters and learning rate 190 | 191 | ''' 192 | self.n_components = n_components 193 | self.batch_size = batch_size 194 | self.n_pass = n_pass 195 | self.max_iter = max_iter 196 | self.tol = tol 197 | self.shuffle = shuffle 198 | self.smoothness = smoothness 199 | self.random_state = random_state 200 | self.verbose = verbose 201 | 202 | if type(self.random_state) is int: 203 | np.random.seed(self.random_state) 204 | elif self.random_state is not None: 205 | np.random.setstate(self.random_state) 206 | 207 | def _parse_args(self, **kwargs): 208 | self.a = float(kwargs.get('a', 0.1)) 209 | self.b = float(kwargs.get('b', 0.1)) 210 | self.t0 = float(kwargs.get('t0', 1.)) 211 | self.kappa = float(kwargs.get('kappa', 0.6)) 212 | 213 | 214 | def fit(self, X, est_total=None): 215 | ''' Fit the model to the data in X. X has to be loaded into memory. 216 | 217 | Parameters 218 | ------------ 219 | X: array-like, shape (n_samples, n_feats) 220 | Training data. 221 | 222 | est_total : int 223 | The estimated size of the entire data. Could be larger than the actual size. 224 | 225 | Returns 226 | -------------- 227 | self: object 228 | Returns the instance itself. 229 | 230 | ''' 231 | n_samples, n_feats = X.shape 232 | if est_total is None: 233 | self._scale = float(n_samples) / self.batch_size 234 | else: 235 | self._scale = float(est_total) / self.batch_size 236 | self._init_component(n_feats) 237 | self.bound = list() 238 | for count in xrange(self.n_pass): 239 | if self.verbose: 240 | print 'Iteration %d: passing through the data ...' % count 241 | indices = np.arange(n_samples) 242 | if self.shuffle: 243 | np.random.shuffle(indices) 244 | X_shuffled = X[indices] 245 | for (i, istart) in enumerate(xrange(0, n_samples, self.batch_size), 1): 246 | print '\tMinibatch %d:' % i 247 | iend = min(istart + self.batch_size, n_samples) 248 | self.set_learning_rate(iter=i) 249 | mini_batch = X_shuffled[istart: iend] 250 | self.partial_fit(mini_batch) 251 | self.bound.append(self._stoch_bound(mini_batch)) 252 | return self 253 | 254 | def partial_fit(self, X): 255 | ''' Fit the data in X as a mini-batch and update the parameter by taking a natural gradient step. Could be invoked from a high-level out-of-core wrapper. 256 | 257 | Parameters 258 | ------------- 259 | X : array-like, shape (batch_size, n_fedats) 260 | Mini-batch data. 261 | 262 | Returns 263 | -------------- 264 | self: object 265 | Returns the instance itself. 266 | 267 | ''' 268 | 269 | self.transform(X) 270 | # take a (natural) gradient step 271 | ratio = X / self._xexplog() 272 | self.gamma_b = (1 - self.rho) * self.gamma_b + self.rho * (self.b + self._scale * np.exp(self.Elogb) * np.dot(np.exp(self.Elogt).T, ratio)) 273 | self.rho_b = (1 - self.rho) * self.rho_b + self.rho * (self.b + self._scale * np.sum(self.Et, axis=0, keepdims=True).T) 274 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b) 275 | return self 276 | 277 | def set_learning_rate(self, iter=None, rho=None): 278 | ''' Set the learning rate for the gradient step 279 | 280 | Parameters 281 | --------------------- 282 | iter : int 283 | The current iteration, used to compute a Robbins-Monro type 284 | learning rate 285 | rho: float 286 | Directly specify the learning rate. Will override the one computed from the current iteration. 287 | 288 | 289 | Returns 290 | ---------- 291 | self: object 292 | Returns the isntance itself. 293 | 294 | ''' 295 | if rho is not None: 296 | self.rho = rho 297 | elif iter is not None: 298 | self.rho = (iter + self.t0)**(-self.kappa) 299 | else: 300 | raise ValueError('invalid learning rate.') 301 | return self 302 | 303 | def _stoch_bound(self, X): 304 | bound = np.sum(X * np.log(self._xexplog()) - self.Et.dot(self.Eb)) 305 | bound += _gamma_term(self.a, self.a * self.c, self.gamma_t, self.rho_t, self.Et, self.Elogt) 306 | bound += self.n_components * X.shape[0] * self.a * np.log(self.c) 307 | bound *= self._scale 308 | bound += _gamma_term(self.b, self.b, self.gamm_b, self.rho_b, self.Eb, self.Elogb) 309 | return bound 310 | 311 | def _compute_expectations(gamma, rho): 312 | return (gamma/rho, special.psi(gamma) - np.log(rho)) 313 | 314 | def _gamma_term(a, b, shape, rate, Ex, Elogx): 315 | return np.sum((a - shape) * Elogx - (b - rate) * Ex + (special.gammaln(shape) - shape * np.log(rate))) -------------------------------------------------------------------------------- /cf_rating/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from PMF import ProbabilisticMatrixFactorization as PMF 5 | from BPMF import BayesianProbabilisticMatrixFactorization as BPMF 6 | from BPTF import BayesianProbabilisticTensorFactorization as BPTF 7 | -------------------------------------------------------------------------------- /cf_rating/graphicalrecommender.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from scipy.sparse import dok_matrix 5 | from evaluator import Evaluator 6 | import numpy as np 7 | 8 | 9 | class Recommender: 10 | def __init__(self, recommender_context): 11 | self.train_data = None 12 | self.test_data = None 13 | self.recommender_context = recommender_context 14 | self.config_handler = self.recommender_context.get_config() 15 | self.logger = self.recommender_context.get_logger() 16 | 17 | def _init_model(self): 18 | self.user_num, self.item_num = self.train_matrix.shape 19 | self.mean_rating = np.mean(self.train_matrix.values()) 20 | 21 | self.predictions = dok_matrix((self.user_num, self.item_num)) 22 | 23 | self.factor_num = self.config_handler.get_parameter_int('Parameter', 'factor_num') 24 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) * 0.1 25 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) * 0.1 26 | self.user_factors_inc = np.zeros((self.user_num, self.factor_num)) 27 | self.item_factors_inc = np.zeros((self.item_num, self.factor_num)) 28 | 29 | def _build_model(self): 30 | self.max_iterations = self.config_handler.get_parameter_int('Parameter', 'max_iterations') 31 | 32 | def _recommend(self): 33 | for user_id, item_id in self.test_matrix.keys(): 34 | self.predictions[user_id, item_id] = self._predict(user_id, item_id) 35 | 36 | def _predict(self, user_id, item_id, time_id=0): 37 | return 0.0 38 | 39 | def _evaluate(self): 40 | evaluator_cfg = self.config_handler.get_parameter_string("Output", 'evaluator') 41 | evaluator_cfg = evaluator_cfg.strip().split(',') 42 | evaluator = Evaluator(self.predictions, self.test_matrix) 43 | result = {} 44 | for key in evaluator_cfg: 45 | result[key] = evaluator.rating[key.strip()] 46 | return result 47 | 48 | def run(self): 49 | self.logger['Process'].debug('Get the train dataset') 50 | self.train_matrix = self.recommender_context.get_data_model().get_data_splitter().get_train_matrix() 51 | self.logger['Result'].debug('The number of user-item pair in train dataset is {0}'.format(len(self.train_matrix.keys()))) 52 | 53 | self.logger['Process'].debug('Get the test dataset') 54 | self.test_matrix = self.recommender_context.get_data_model().get_data_splitter().get_test_matrix() 55 | self.logger['Result'].debug('The number of user-item pair in test dataset is {0}'.format(len(self.test_matrix.keys()))) 56 | 57 | self.logger['Process'].debug('Initialize the model parameters') 58 | self._init_model() 59 | 60 | self.logger['Process'].debug('Building model....') 61 | self._build_model() 62 | 63 | is_save = self.config_handler.get_parameter_bool('Output', 'is_save') 64 | if is_save: 65 | self.logger['Process'].debug('Save model ....') 66 | self._save_model() 67 | 68 | self.logger['Process'].debug('Recommending ...') 69 | self._recommend() 70 | 71 | self.logger['Process'].debug('Evaluating ...') 72 | result = self._evaluate() 73 | self._save_result(result) 74 | 75 | self.logger['Process'].debug("Finish.") 76 | self.logger['Process'].debug("#"*50) 77 | 78 | def _save_result(self, result): 79 | for key in result: 80 | self.logger['Result'].debug("{0}: {1}".format(key, result[key])) 81 | 82 | def _save_model(self): 83 | pass 84 | 85 | def _load_model(self): 86 | pass 87 | 88 | def _load_matrix(self, read_fp): 89 | data = list() 90 | for vector in read_fp: 91 | if vector.startswith('matrix_end'): 92 | break 93 | vector = vector.strip().split('\t') 94 | vector = [float(feature) for feature in vector] 95 | data.append(vector) 96 | return np.array(data) 97 | 98 | def _save_matrix(self, matrix, write_fp): 99 | for vector in matrix: 100 | for feature in vector: 101 | write_fp.write("{0}\t".format(feature)) 102 | write_fp.write("\n") 103 | write_fp.write('matrix_end\n') 104 | -------------------------------------------------------------------------------- /config/BPMF.cfg: -------------------------------------------------------------------------------- 1 | [Dataset] 2 | #ratings = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat 3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt 4 | data_format = time 5 | 6 | [splitter] 7 | method = given_n 8 | method_index = 0 9 | method_parameter = 10 10 | data_save = 1 11 | save_path = output/ 12 | 13 | experiment_num = 5 14 | 15 | [Parameters] 16 | max_iterations = 50 17 | factor_num = 30 18 | 19 | user_normal_dist_mu0 = 0 20 | user_normal_dist_beta0 = 2 21 | user_Wishart_dist_W0 = 1 22 | 23 | item_normal_dist_mu0 = 0 24 | item_normal_dist_beta0 = 2 25 | item_Wishart_dist_W0 = 1 26 | 27 | rating_sigma = 2 28 | 29 | is_init_path = 1 30 | init_path = output/ 31 | 32 | [Output] 33 | evaluator = RMSE,MAE,MSE 34 | logger = output/ 35 | is_save = 1 36 | save_path = output/ 37 | is_load = 0 38 | load_path = output/ 39 | 40 | -------------------------------------------------------------------------------- /config/BPTF.cfg: -------------------------------------------------------------------------------- 1 | [Dataset] 2 | #rating = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat 3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt 4 | splitter = time 5 | experiment_num = 5 6 | 7 | [Parameters] 8 | max_iterations = 50 9 | factor_num = 10 10 | 11 | user_normal_dist_mu0 = 0 12 | user_normal_dist_beta0 = 1 13 | user_Wishart_dist_W0 = 1 14 | 15 | item_normal_dist_mu0 = 0 16 | item_normal_dist_beta0 = 1 17 | item_Wishart_dist_W0 = 1 18 | 19 | time_normal_dist_mu0 = 1 20 | time_normal_dist_beta0 = 1 21 | time_Wishart_dist_W0 = 1 22 | 23 | rating_sigma = 1 24 | 25 | [Output] 26 | evaluator = RMSE, MSE, 27 | 28 | -------------------------------------------------------------------------------- /config/PMF.cfg: -------------------------------------------------------------------------------- 1 | [Dataset] 2 | # ratings = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat 3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt 4 | data_format = time 5 | 6 | [splitter] 7 | method = given_n 8 | method_index = 0 9 | method_parameter = 10 10 | data_save = 1 11 | save_path = output/ 12 | 13 | experiment_num = 5 14 | 15 | [Parameters] 16 | learn_rate = 0.001 17 | max_iterations = 50 18 | factor_num = 30 19 | momentum = 0.9 20 | user_lambda = 0.05 21 | item_lambda = 0.05 22 | stop_threshold = 0.01 23 | batch_size = 100000 24 | 25 | 26 | [Output] 27 | evaluator = RMSE, MSE 28 | logger = output/ 29 | is_save = 1 30 | save_path = output/ 31 | is_load = 0 32 | load_path = output/ -------------------------------------------------------------------------------- /config/logging.cfg: -------------------------------------------------------------------------------- 1 | #logger.conf 2 | ############################################### 3 | [loggers] 4 | keys=root 5 | 6 | [logger_root] 7 | level=DEBUG 8 | handlers=hand01,hand03 9 | 10 | ############################################### 11 | [handlers] 12 | keys=hand01,hand02,hand03 13 | 14 | [handler_hand01] 15 | class=StreamHandler 16 | level=INFO 17 | formatter=form02 18 | args=(sys.stderr,) 19 | 20 | [handler_hand02] 21 | class=FileHandler 22 | level=DEBUG 23 | formatter=form01 24 | args=('myapp.log', 'a') 25 | 26 | [handler_hand03] 27 | class=handlers.RotatingFileHandler 28 | level=INFO 29 | formatter=form02 30 | #args=('myapp.log', 'a', 10*1024*1024, 5) 31 | 32 | ############################################### 33 | [formatters] 34 | keys=form01,form02 35 | 36 | [formatter_form01] 37 | format=%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s 38 | datefmt=%a, %d %b %Y %H:%M:%S 39 | 40 | [formatter_form02] 41 | format=%(name)-12s: %(levelname)-8s %(message)s 42 | datefmt= -------------------------------------------------------------------------------- /data/DataModel.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import splitter 5 | import convertor 6 | 7 | 8 | class DataModel: 9 | def __init__(self, config_handler): 10 | self.config_handler = config_handler 11 | 12 | def build_data_model(self): 13 | """ 14 | Read data to convertor, and initialize splitter 15 | """ 16 | data_format = self.config_handler.get_parameter_string('Dataset', 'data_format') 17 | 18 | # Read data to convertor 19 | if data_format == 'time': 20 | self.convertor = convertor.TimeDataConvertor() 21 | elif data_format == 'document': 22 | self.convertor = convertor.DocumentDataConvertor() 23 | else: 24 | self.convertor = convertor.GeneralDataConvertor() 25 | dataset_file = self.config_handler.get_parameter_string('Dataset', 'ratings') 26 | self.convertor.read_data(dataset_file) 27 | 28 | # Initialize splitter, and transport convertor into the splitter 29 | splitter_method = self.config_handler.get_parameter_string('splitter', 'method') 30 | splitter_method_index = self.config_handler.get_parameter_int('splitter', 'method_index') 31 | splitter_method_parameter = self.config_handler.get_parameter_float('splitter', 'method_parameter') 32 | if splitter_method == 'given_n': 33 | self.splitter = splitter.GivenNDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter) 34 | elif splitter_method == 'generic': 35 | self.splitter = splitter.GenericDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter) 36 | elif splitter_method == 'ratio': 37 | self.splitter = splitter.GenericDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter) 38 | elif splitter_method == 'cv': 39 | self.splitter = splitter.CrossValidationDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter) 40 | 41 | def get_data_splitter(self): 42 | return self.splitter 43 | 44 | def get_data_convertor(self): 45 | return self.convertor 46 | 47 | 48 | if __name__ == '__main__': 49 | pass -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from DataModel import DataModel -------------------------------------------------------------------------------- /data/convertor/DataConvertor.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | class DataConvertor(object): 6 | def __init__(self): 7 | self.data = None 8 | self.shape = None 9 | self.data_structure = None 10 | 11 | def read_data(self, filename): 12 | """ 13 | read raw dataset, and convert to sparse matrix format. 14 | :param filename: 15 | """ 16 | pass 17 | 18 | def read_given_train_test(self, train_file, test_file): 19 | """ 20 | read given data set 21 | """ 22 | -------------------------------------------------------------------------------- /data/convertor/DocumentDataConvertor.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from DataConvertor import DataConvertor 5 | 6 | class DocumentDataConvertor (DataConvertor): 7 | def __init__(self): 8 | DataConvertor.__init__(self) -------------------------------------------------------------------------------- /data/convertor/GeneralDataConvertor.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import codecs 5 | import re 6 | from scipy.sparse import dok_matrix 7 | 8 | 9 | class GeneralDataConvertor(object): 10 | def __init__(self): 11 | pass 12 | 13 | def read_data(self, filename): 14 | """ 15 | read raw dataset, and convert to sparse matrix format. 16 | :param filename: 17 | """ 18 | users, items = set(), set() 19 | ratings = list() 20 | with codecs.open(filename, mode="r", encoding="utf-8") as read_file: 21 | for line in read_file: 22 | user_item_rating = re.split('\t|,|::', line.strip()) 23 | user_id = int(user_item_rating[0]) 24 | item_id = int(user_item_rating[1]) 25 | rating = int(user_item_rating[2]) 26 | users.add(user_id) 27 | items.add(item_id) 28 | ratings.append((user_id, item_id, rating)) 29 | 30 | # Convert 31 | user_num, item_num = len(users), len(items) 32 | users_dict = {user_id: index for index, user_id in enumerate(list(users))} 33 | items_dict = {item_id: index for index, item_id in enumerate(list(items))} 34 | data_model = dok_matrix((user_num, item_num)) 35 | for user_id, item_id, rating in ratings: 36 | data_model[users_dict[user_id], items_dict[item_id]] = rating 37 | return data_model 38 | 39 | def read_given_train_test(self, train_file, test_file): 40 | """ 41 | read given data set 42 | """ 43 | users, items = set(), set() 44 | ratings = list() 45 | with codecs.open(train_file, mode="r", encoding="utf-8") as read_file: 46 | for line in read_file: 47 | user_item_rating = re.split('\t|,|::', line.strip()) 48 | user_id = int(user_item_rating[0]) 49 | item_id = int(user_item_rating[1]) 50 | rating = int(user_item_rating[2]) 51 | users.add(user_id) 52 | items.add(item_id) 53 | ratings.append((user_id, item_id, rating)) 54 | 55 | # Convert 56 | user_num, item_num = len(users), len(items) 57 | users_dict = {user_id: index for index, user_id in enumerate(list(users))} 58 | items_dict = {item_id: index for index, item_id in enumerate(list(items))} 59 | train_matrix = dok_matrix((user_num, item_num)) 60 | test_matrix = dok_matrix((user_num, item_num)) 61 | for user_id, item_id, rating in ratings: 62 | train_matrix[users_dict[user_id], items_dict[item_id]] = rating 63 | 64 | with codecs.open(test_file, mode='r', encoding='utf-8') as read_file: 65 | for line in read_file: 66 | user_item_rating = re.split('\t|,|::', line.strip()) 67 | user_id = int(user_item_rating[0]) 68 | item_id = int(user_item_rating[1]) 69 | rating = int(user_item_rating[2]) 70 | test_matrix[users_dict[user_id], items_dict[item_id]] = rating 71 | return train_matrix, test_matrix -------------------------------------------------------------------------------- /data/convertor/TimeDataConvertor.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import os 5 | import sys 6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | sys.path.insert(0, parent_dir) 8 | 9 | import codecs 10 | import re 11 | import numpy as np 12 | from scipy.sparse import dok_matrix 13 | 14 | from util.dateconvert import DateConvertor 15 | from data.sparsetensor import SparseTensor 16 | from DataConvertor import DataConvertor 17 | 18 | 19 | class TimeDataConvertor (DataConvertor): 20 | def __init__(self): 21 | DataConvertor.__init__(self) 22 | self.data_structure = SparseTensor 23 | 24 | def read_data(self, filename, time_format="month"): 25 | """ 26 | Read data: (user_id, item_id, rating, timestamp) 27 | """ 28 | users, items, times = set(), set(), set() 29 | ratings = list() 30 | with codecs.open(filename, mode="r", encoding="utf-8") as read_file: 31 | for line in read_file: 32 | user_item_timestamp_rating = re.split('\t|,|::', line.strip()) 33 | user_id = int(user_item_timestamp_rating[0]) 34 | item_id = int(user_item_timestamp_rating[1]) 35 | rating = int(user_item_timestamp_rating[2]) 36 | time_id = DateConvertor.convert_timestamp(int(user_item_timestamp_rating[3]), time_format) 37 | users.add(user_id) 38 | items.add(item_id) 39 | times.add(time_id) 40 | ratings.append((user_id, item_id, time_id, rating)) 41 | 42 | # Convert 43 | user_num, item_num, time_num = len(users), len(items), len(times) 44 | self.shape = (user_num, item_num, time_num) 45 | users_dict = {user_id: index for index, user_id in enumerate(list(users))} 46 | items_dict = {item_id: index for index, item_id in enumerate(list(items))} 47 | times_dict = {time_id: index for index, time_id in enumerate(list(np.sort(list(times))))} 48 | sparse_tensor = SparseTensor(shape=(user_num, item_num, time_num)) 49 | for user_id, item_id, time_id, rating in ratings: 50 | sparse_tensor[users_dict[user_id], items_dict[item_id], times_dict[time_id]] = rating 51 | self.data = sparse_tensor 52 | 53 | def tensor_matrix(self, tensor_data): 54 | user_num, item_num = tensor_data.shape[0], tensor_data.shape[1] 55 | matrix_data = dok_matrix((user_num, item_num)) 56 | for user_id, item_id, time_id in tensor_data.keys(): 57 | matrix_data[user_id, item_id] += tensor_data.get((user_id, item_id, time_id)) 58 | return matrix_data 59 | 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | file_path = 'D:/Study/Dataset/MovieLens/ml-1m/ratings.dat' 65 | convertor = TimeDataConvertor() 66 | # data_model = convertor.read_data(file_path) 67 | # print 'the number of users is {0}.'.format(data_model.shape[0]) 68 | # print 'the number of items is {0}.'.format(data_model.shape[1]) 69 | # del data_model 70 | 71 | data_model = convertor.read_data(file_path) 72 | print 'the number of users is {0}'.format(data_model.shape[0]) 73 | print 'the number of items is {0}'.format(data_model.shape[1]) 74 | print 'the number of times is {0}'.format(data_model.shape[2]) 75 | print 'the number of records is {0}'.format(len(data_model.keys())) 76 | 77 | data_matrix = convertor.tensor_matrix(data_model) 78 | print 'the number of users is {0}'.format(data_matrix.shape[0]) 79 | print 'the number of items is {0}'.format(data_matrix.shape[1]) 80 | print 'the number of records is {0}'.format(len(data_matrix.keys())) -------------------------------------------------------------------------------- /data/convertor/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from GeneralDataConvertor import GeneralDataConvertor 5 | from TimeDataConvertor import TimeDataConvertor 6 | from DocumentDataConvertor import DocumentDataConvertor 7 | -------------------------------------------------------------------------------- /data/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | -------------------------------------------------------------------------------- /data/sparsematrix.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import numpy as np 5 | 6 | 7 | class SparseMatrix: 8 | def __init__(self, shape): 9 | assert(len(shape) == 2) 10 | self.data = dict() 11 | self.row_data = dict() 12 | self.col_data = dict() 13 | self.shape = shape 14 | 15 | def __setitem__(self, key, value): 16 | assert(len(key) == 2) 17 | assert(key[0] < self.shape[0]) 18 | assert(key[1] < self.shape[1]) 19 | self.data[key] = value 20 | self.row_data.setdefault(key[0], SparseMatrix(shape=(1, self.shape[1]))) 21 | self.row_data[key[0]].data[0, key[1]] = value 22 | self.col_data.setdefault(key[1], SparseMatrix(shape=(self.shape[0], 1))) 23 | self.col_data[key[1]].data[key[0], 0] = value 24 | 25 | def __getitem__(self, key): 26 | assert(len(key) == 2) 27 | assert(key[0] < self.shape[0]) 28 | assert(key[1] < self.shape[1]) 29 | return self.get(key) 30 | 31 | def keys(self): 32 | return self.data.keys() 33 | 34 | def get(self, key): 35 | if key in self.data: 36 | return self.data[key] 37 | else: 38 | return 0 39 | 40 | def getrow(self, row_id): 41 | if row_id in self.row_data: 42 | return self.row_data[row_id] 43 | else: 44 | return SparseMatrix(shape=(1, self.shape[1])) 45 | 46 | def getcol(self, col_id): 47 | if col_id in self.col_data: 48 | return self.col_data[col_id] 49 | else: 50 | return SparseMatrix(shape=(self.shape[0], 1)) 51 | 52 | def transpose(self): 53 | sparse_matrix = SparseMatrix(shape=(self.shape[1], self.shape[0])) 54 | for row_id, col_id in self.keys(): 55 | sparse_matrix[col_id, row_id] = self.get((row_id, col_id)) 56 | return sparse_matrix 57 | 58 | def values(self): 59 | return self.data.values() 60 | 61 | 62 | if __name__ == '__main__': 63 | matrix = SparseMatrix(shape=(3, 4)) 64 | print 'Matrix Shape: {0}'.format(matrix.keys()) 65 | matrix[0, 2] = 1 66 | matrix[0, 3] = 3 67 | matrix[1, 2] = 4 68 | print 'Matrix Shape: {0}'.format(matrix.keys()) 69 | 70 | row_matrix = matrix.getrow(0) 71 | print 'Matrix Shape: {0}'.format(row_matrix.keys()) 72 | 73 | col_matrix = matrix.getcol(2) 74 | print 'Matrix Shape: {0}'.format(col_matrix.keys()) 75 | 76 | col_matrix = col_matrix.transpose() 77 | print 'Matrix Shape: {0}'.format(col_matrix.keys()) 78 | -------------------------------------------------------------------------------- /data/sparsetensor.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import numpy as np 5 | 6 | 7 | class SparseTensor: 8 | def __init__(self, shape): 9 | self.data = dict() 10 | self.shape = shape 11 | 12 | def __setitem__(self, key, value): 13 | assert(len(key) == len(self.shape)) 14 | for i in range(len(self.shape)): 15 | assert(key[i] < self.shape[i]) 16 | self.data[key] = value 17 | 18 | def __getitem__(self, item): 19 | assert(len(item) == len(self.shape)) 20 | for i in range(len(self.shape)): 21 | assert(item[i] < self.shape[i]) 22 | return self.get(item) 23 | 24 | def keys(self): 25 | return self.data.keys() 26 | 27 | def get(self, key): 28 | if key in self.data: 29 | return self.data[key] 30 | else: 31 | return 0 32 | 33 | def get_dimension(self, dim=0, value=0): 34 | index = [i for i in range(dim)] 35 | index.extend([i for i in range(dim+1, len(self.shape))]) 36 | shape = tuple(np.array(self.shape)[index]) 37 | data = dict() 38 | 39 | for key in self.keys(): 40 | if key[dim] != value: 41 | continue 42 | _key = np.array(key)[index] 43 | data[tuple(_key)] = self.data[key] 44 | t = SparseTensor(shape) 45 | t.data = data 46 | return t 47 | 48 | if __name__ == '__main__': 49 | tensor = SparseTensor(shape=(3, 4, 4)) 50 | print tensor.keys() 51 | tensor[0, 1, 2] = 1 52 | tensor[1, 3, 2] = 4 53 | tensor[1, 2, 3] = 2 54 | print tensor.keys() 55 | print tensor.get((0, 1, 2)) 56 | print tensor[1, 3, 2] 57 | tensor = tensor.get_dimension(dim=0, value=1) 58 | print tensor.shape 59 | print tensor.keys() 60 | print tensor[1, 1] -------------------------------------------------------------------------------- /data/splitter/CrossValidationDataSplitter.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from DataSplitter import DataSplitter 5 | 6 | 7 | class CrossValidationDataSplitter (DataSplitter): 8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter): 9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter) -------------------------------------------------------------------------------- /data/splitter/DataSplitter.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import cPickle 5 | import os 6 | from scipy.sparse import dok_matrix 7 | 8 | class DataSplitter: 9 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter): 10 | self.train_data = None 11 | self.test_data = None 12 | self.validation_data = None 13 | self.convertor = convertor 14 | self.splitter_method_index = splitter_method_index 15 | self.splitter_mathod_parameter = splitter_method_parameter 16 | self.methods = dict() 17 | 18 | def set_data_convertor(self, data_convertor): 19 | self.data_convertor = data_convertor 20 | 21 | def get_train_data(self): 22 | return self.train_data 23 | 24 | def get_test_data(self): 25 | return self.test_data 26 | 27 | def get_train_matrix(self): 28 | if len(self.train_data.shape) == 2: 29 | return self.train_data 30 | train_matrix = dok_matrix((self.train_data.shape[0], self.train_data.shape[1])) 31 | for key in self.train_data.keys(): 32 | train_matrix[key[0], key[1]] = self.train_data[key] 33 | return train_matrix 34 | 35 | def get_test_matrix(self): 36 | if len(self.test_data.shape) == 2: 37 | return self.test_data 38 | test_matrix = dok_matrix((self.test_data.shape[0], self.test_data.shape[1])) 39 | for key in self.test_data.keys(): 40 | test_matrix[key[0], key[1]] = self.test_data[key] 41 | return test_matrix 42 | 43 | def get_validation_data(self): 44 | return self.validation_data 45 | 46 | def split_data(self, save_path, experiment_id): 47 | self.save_train_test_data(save_path, experiment_id) 48 | 49 | def save_train_test_data(self, save_path, experiment_id): 50 | save_file = save_path + "train_matrix_{0}.bin".format(experiment_id) 51 | self._save_data(self.train_data, save_file) 52 | 53 | save_file = save_path + "test_matrix_{0}.bin".format(experiment_id) 54 | self._save_data(self.test_data, save_file) 55 | 56 | def load_train_test_data(self, load_path, experiment_id): 57 | load_file = load_path + "train_matrix_{0}.bin".format(experiment_id) 58 | if os.path.exists(load_file): 59 | self.train_data = self._load_data(load_file) 60 | 61 | load_file = load_path + "test_matrix_{0}.bin".format(experiment_id) 62 | if os.path.exists(load_file): 63 | self.test_data = self._load_data(load_file) 64 | return True 65 | return False 66 | 67 | def _save_data(self, data, filename): 68 | with open(filename, 'w') as write_fp: 69 | cPickle.dump(data, write_fp) 70 | 71 | def _load_data(self, filename): 72 | with open(filename, 'r') as read_fp: 73 | data = cPickle.load(read_fp) 74 | return data -------------------------------------------------------------------------------- /data/splitter/GenericDataSplitter.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from DataSplitter import DataSplitter 5 | 6 | 7 | class GenericDataSplitter (DataSplitter): 8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter): 9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter) -------------------------------------------------------------------------------- /data/splitter/GivenNDataSplitter.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | from DataSplitter import DataSplitter 6 | import numpy as np 7 | 8 | 9 | class GivenNDataSplitter(DataSplitter): 10 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter): 11 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter) 12 | self.splitter_given_n = splitter_method_parameter 13 | self.start_time_id = 0 14 | self.methods = { 15 | 0: self.get_given_n_by_user, 16 | 1: self.get_given_n_by_item, 17 | 2: self.get_given_n_by_user_date, 18 | 3: self.get_given_n_by_item_date, 19 | 4: self.get_given_n_by_date, 20 | } 21 | 22 | def get_given_n_by_user(self, given_num): 23 | """ 24 | Split ratings into two parts: training set consisting of user-item ratings where {@code given_num} ratings 25 | are preserved for each user, and the rest are used as the testing data. 26 | """ 27 | assert (given_num > 0) 28 | self.train_data = self.convertor.data_structure(self.convertor.shape) 29 | self.test_data = self.convertor.data_structure(self.convertor.shape) 30 | 31 | user_keys = dict() 32 | for key in self.convertor.data.keys(): 33 | user_keys.setdefault(key[0], list()) 34 | user_keys[key[0]].append(key) 35 | 36 | for user_id in user_keys: 37 | rating_num = len(user_keys[user_id]) 38 | if rating_num > given_num: 39 | index = np.arange(rating_num) 40 | np.random.shuffle(index) 41 | for i in index[:rating_num-given_num]: 42 | key = user_keys[user_id][index[i]] 43 | self.train_data[key] = self.convertor.data[key] 44 | for i in index[rating_num-given_num:]: 45 | key = user_keys[user_id][index[i]] 46 | self.test_data[key] = self.convertor.data[key] 47 | else: 48 | for key in user_keys[user_id]: 49 | self.test_data[key] = self.convertor.data[key] 50 | 51 | def get_given_n_by_item(self, given_num): 52 | """ 53 | Split ratings into two parts: training set consisting of user-item ratings where {@code given_num} ratings 54 | are preserved for each item, and the rest are used as the testing data. 55 | """ 56 | assert (given_num > 0) 57 | self.train_data = self.convertor.data_structure(self.convertor.shape) 58 | self.test_data = self.convertor.data_structure(self.convertor.shape) 59 | 60 | item_keys = dict() 61 | for key in self.convertor.data.keys(): 62 | item_keys.setdefault(key[1], list()) 63 | item_keys[key[1]].append(key) 64 | 65 | for item_id in item_keys: 66 | rating_num = len(item_keys[item_id]) 67 | if rating_num > given_num: 68 | index = np.arange(rating_num) 69 | np.random.shuffle(index) 70 | for i in index[:rating_num-given_num]: 71 | key = item_keys[item_id][index[i]] 72 | self.train_data[key] = self.convertor.data[key] 73 | for i in index[rating_num-given_num:]: 74 | key = item_keys[item_id][index[i]] 75 | self.test_data[key] = self.convertor.data[key] 76 | else: 77 | for key in item_keys[item_id]: 78 | self.test_data[key] = self.convertor.data[key] 79 | 80 | def get_given_n_by_date(self, given_num): 81 | """ 82 | given_num: the {@code given_num} number of time periods used for training , 83 | and the next time period used for testing. 84 | """ 85 | self.train_data = self.convertor.data_structure(self.convertor.shape) 86 | self.test_data = self.convertor.data_structure(self.convertor.shape) 87 | 88 | for key in self.convertor.data.keys(): 89 | if self.start_time_id <= key[2] < self.start_time_id + given_num: 90 | self.train_data[key] = self.convertor.data[key] 91 | elif key[2] >= self.start_time_id + given_num: 92 | self.test_data[key] = self.convertor.data[key] 93 | self.start_time_id += 1 94 | 95 | def get_given_n_by_user_date(self, given_num): 96 | pass 97 | 98 | def get_given_n_by_item_date(self, given_num): 99 | pass 100 | 101 | def split_data(self, save_path, experiment_id): 102 | if not self.load_train_test_data(save_path, experiment_id): 103 | self.methods[self.splitter_method_index](self.splitter_given_n) 104 | DataSplitter.split_data(self, save_path, experiment_id) -------------------------------------------------------------------------------- /data/splitter/RatioDataSplitter.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from DataSplitter import DataSplitter 5 | 6 | 7 | class RatioDataSplitter (DataSplitter): 8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter): 9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter) 10 | self.splitter_ratio = splitter_method_parameter -------------------------------------------------------------------------------- /data/splitter/__init__.py: -------------------------------------------------------------------------------- 1 | #1usr/bin/env python 2 | #coding:utf-8 3 | 4 | from GivenNDataSplitter import GivenNDataSplitter 5 | from CrossValidationDataSplitter import CrossValidationDataSplitter 6 | from GenericDataSplitter import GenericDataSplitter 7 | from RatioDataSplitter import RatioDataSplitter 8 | -------------------------------------------------------------------------------- /evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from rating import MAE, RMSE, MSE 5 | 6 | 7 | class Rating: 8 | def __init__(self, recommendation, test_matrix): 9 | self.recommendation = recommendation 10 | self.test_matrix = test_matrix 11 | self.evaluator = {'MAE': MAE, 'RMSE': RMSE, 'MSE': MSE} 12 | 13 | def __getitem__(self, item): 14 | assert (item in self.evaluator) 15 | return self.evaluator[item](self.recommendation, self.test_matrix) 16 | 17 | 18 | class Ranking: 19 | def __init__(self, recommendation, test_matrix): 20 | self.recommendation = recommendation 21 | self.test_matrix = test_matrix 22 | self.evaluator = {} 23 | 24 | def __getitem__(self, item): 25 | assert (item in self.evaluator) 26 | return self.evaluator[item](self.recommendation, self.test_matrix) 27 | 28 | 29 | class Evaluator: 30 | def __init__(self, recommendation, test_matrix): 31 | self.rating = Rating(recommendation, test_matrix) 32 | self.ranking = Ranking(recommendation, test_matrix) 33 | 34 | def __getattr__(self, key): 35 | if key == 'rating': 36 | return self.rating 37 | elif key == 'ranking': 38 | return self.ranking 39 | else: 40 | raise AttributeError 41 | 42 | 43 | if __name__ == '__main__': 44 | from scipy.sparse import dok_matrix 45 | 46 | recommendation = dok_matrix((3, 4)) 47 | recommendation[0, 0] = 3 48 | recommendation[0, 1] = 4 49 | test_matrix = dok_matrix((3, 4)) 50 | 51 | evaluator = Evaluator(recommendation, test_matrix) 52 | print evaluator.rating['MAE'] 53 | print evaluator.rating['RMSE'] 54 | print evaluator.rating['MSE'] -------------------------------------------------------------------------------- /evaluator/pValue.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | 6 | ''' 7 | 8 | import numpy as np 9 | from scipy import stats 10 | 11 | if __name__ == '__main__': 12 | x = [850,740,900,1070,930,850,950,980,980,880,1000,980,930,650,760,810,1000,1000,960,960] 13 | x = np.array(x) 14 | x1 = x - 1 15 | print 't-statistic = %6.3f pvalue = %6.4f' % stats.ttest_ind(x, x1) 16 | # [h,pvalue,ci]=ttest(x,990) -------------------------------------------------------------------------------- /evaluator/ranking/PrecisionRecallF1TopN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/evaluator/ranking/PrecisionRecallF1TopN.py -------------------------------------------------------------------------------- /evaluator/ranking/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'zhanghaidong' 2 | -------------------------------------------------------------------------------- /evaluator/rating/MAE.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding: utf-8 3 | 4 | 5 | def MAE(recommendation, test_matrix): 6 | loss = 0.0 7 | number = 0 8 | for key in recommendation.keys(): 9 | if len(key) == 2: 10 | user_id, item_id = key 11 | elif len(key) == 3: 12 | user_id, item_id, time_id = key 13 | else: 14 | raise AttributeError 15 | 16 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key) 17 | loss += abs(error) 18 | number += 1 19 | if number > 0: 20 | return loss / number 21 | return 0.0 22 | -------------------------------------------------------------------------------- /evaluator/rating/MSE.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding: utf-8 3 | 4 | 5 | def MSE(recommendation, test_matrix): 6 | loss = 0.0 7 | number = 0 8 | for key in recommendation.keys(): 9 | if len(key) == 2: 10 | user_id, item_id = key 11 | elif len(key) == 3: 12 | user_id, item_id, time_id = key 13 | else: 14 | raise AttributeError 15 | 16 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key) 17 | loss += error * error 18 | number += 1 19 | if number > 0: 20 | return loss / number 21 | return 0.0 22 | 23 | -------------------------------------------------------------------------------- /evaluator/rating/RMSE.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding: utf-8 3 | 4 | import math 5 | 6 | def RMSE(recommendation, test_matrix): 7 | """ 8 | 9 | :param recommendation: 10 | :param test_matrix: 11 | :return: 12 | """ 13 | loss = 0.0 14 | number = 0.0 15 | for key in recommendation.keys(): 16 | if len(key) == 2: 17 | user_id, item_id = key 18 | elif len(key) == 3: 19 | user_id, item_id, time_id = key 20 | else: 21 | raise AttributeError 22 | 23 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key) 24 | loss += error * error 25 | number += 1 26 | if number > 0: 27 | return math.sqrt(loss / number) 28 | return 0.0 29 | -------------------------------------------------------------------------------- /evaluator/rating/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'zhanghaidong' 2 | 3 | from MAE import * 4 | from MSE import * 5 | from RMSE import * 6 | 7 | -------------------------------------------------------------------------------- /hybrid/CTR.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | Collaborative Topic Regression 6 | ''' 7 | 8 | from Recommender import Recommender 9 | from scipy.sparse import dok_matrix 10 | import numpy as np 11 | from util import normalize 12 | 13 | 14 | class CTR(Recommender): 15 | def __init__(self, train_matrix, test_matrix, config_handler): 16 | super.__init__(train_matrix, test_matrix, config_handler) 17 | 18 | 19 | 20 | def initModel(self): 21 | '''''' 22 | self.numUsers, self.numItems = self.trainMatrix.shape() 23 | self.prediction = dok_matrix((self.numUsers, self.numItems)) 24 | self.MAX_Iterations = int(self.configHandler.getParameter('CTR', 'MAX_Iterations')) 25 | self.numFactors = int(self.configHandler.getParameter('CTR', 'numFactors')) 26 | self.threshold = float(self.configHandler.getParameter('CTR', 'threshold')) 27 | 28 | self.U = np.zeros((self.numUsers, self.numFactors)) 29 | self.V = np.zeros((self.numItems, self.numFactors)) 30 | 31 | def buildModel(self, corpus): 32 | ''' 33 | corpus: document * words. 34 | ''' 35 | 36 | # Update U 37 | 38 | 39 | 40 | # Update V 41 | 42 | 43 | 44 | 45 | # Update theta 46 | 47 | 48 | def predict(self): 49 | '''''' 50 | -------------------------------------------------------------------------------- /hybrid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/hybrid/__init__.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from GMRec import GMRec 5 | 6 | 7 | if __name__ == '__main__': 8 | algorithms = {1:"PMF", 2:"BPMF", 3: "BPTF"} 9 | while True: 10 | print "0:Exist; 1. PMF; 2. BPMF; 3. BPTF;" 11 | algorithm_index = input("Please input the algorithm:\n") 12 | if algorithm_index == 0: 13 | exit() 14 | elif algorithm_index in algorithms: 15 | break 16 | print "Error, please input correct algorithm name." 17 | 18 | config_file = "config/{0}.cfg".format(algorithms[algorithm_index]) 19 | gmrec = GMRec(config_file, algorithms[algorithm_index]) 20 | gmrec.run() -------------------------------------------------------------------------------- /util/AIC.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | def calculateAIC(numParameters, likelihood): 6 | return 2*numParameters - 2*likelihood 7 | 8 | 9 | 10 | if __name__ == '__main__': 11 | def hmm(numStates, numItems): 12 | return numStates * numStates + numStates * numItems + numStates + 2 * numStates 13 | 14 | def ihmm(numStates, numItems): 15 | return numStates * numStates + numStates * numItems + numStates + 2 * numStates + numStates 16 | 17 | 18 | numStates = 10 19 | numItems = 1621 20 | likelihood = -1389871 21 | numParameters = hmm(numStates, numItems) 22 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 23 | 24 | numStates = 20 25 | numItems = 1621 26 | likelihood = -1321175 27 | numParameters = hmm(numStates, numItems) 28 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 29 | 30 | numStates = 30 31 | numItems = 1621 32 | likelihood = -1270278 33 | numParameters = hmm(numStates, numItems) 34 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 35 | 36 | numStates = 40 37 | numItems = 1621 38 | likelihood = -1353854 39 | numParameters = hmm(numStates, numItems) 40 | print 'HMM: numStates-{}, numParameters-{},likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 41 | 42 | numStates = 10 43 | numItems = 1621 44 | likelihood = -1349191 45 | numParameters = ihmm(numStates, numItems) 46 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 47 | 48 | numStates = 20 49 | numItems = 1621 50 | numItems = 5264 # Netflix 51 | likelihood = -3746421 52 | numParameters = ihmm(numStates, numItems) 53 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 54 | 55 | numStates = 30 56 | numItems = 1621 57 | numItems = 5264 # Netflix 58 | likelihood = -3677849 59 | numParameters = ihmm(numStates, numItems) 60 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 61 | 62 | numStates = 40 63 | numItems = 1621 64 | likelihood = -1230340 65 | numParameters = ihmm(numStates, numItems) 66 | print 'IHMM: numStates-{}, numParameters-{},likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood)) 67 | 68 | 69 | print hmm(10, numItems) 70 | print hmm(20, numItems) 71 | print hmm(30, numItems) 72 | print hmm(10, numItems) - hmm(30, numItems) -------------------------------------------------------------------------------- /util/LDA.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | from scipy.sparse import dok_matrix 4 | 5 | class LDA: 6 | def __init__(self, _corpus, _parameters): 7 | self.corpus = _corpus 8 | self.parameters = _parameters 9 | 10 | def buildModel(self): 11 | pass 12 | 13 | 14 | def eStep(self): 15 | pass 16 | 17 | 18 | def mStep(self): 19 | pass 20 | 21 | 22 | -------------------------------------------------------------------------------- /util/NormalInvWishartDistribution.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import numpy as np 5 | from scipy.stats import chi2 6 | 7 | 8 | class NormalInverseWishartDistribution(object): 9 | def __init__(self, mu, lambda_beta, nu, psi): 10 | self.mu = mu 11 | self.lambda_beta = lambda_beta 12 | self.psi = psi 13 | self.nu = nu 14 | self.inv_psi = np.linalg.inv(psi) 15 | 16 | def sample(self): 17 | sigma = np.linalg.inv(self.wishartrand(self.nu, self.psi)) 18 | return np.random.multivariate_normal(self.mu, sigma / self.lambda_beta), sigma 19 | 20 | def wishartrand(self, nu, phi): 21 | dim = phi.shape[0] 22 | chol = np.linalg.cholesky(phi) 23 | foo = np.zeros((dim, dim)) 24 | 25 | for i in range(dim): 26 | for j in range(i+1): 27 | if i == j: 28 | foo[i, j] = np.sqrt(chi2.rvs(self.nu-(i+1)+1)) 29 | else: 30 | foo[i, j] = np.random.normal(0, 1) 31 | return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T))) 32 | 33 | def posterior(self, data): 34 | n = len(data) 35 | data_mean = np.mean(data, axis=0) 36 | squares_sum = np.cov(data.transpose(), bias=1) 37 | mu_post = (self.lambda_beta * self.mu + n * data_mean) / (self.lambda_beta + n) 38 | beta_post = self.lambda_beta + n 39 | nu_post = self.nu + n 40 | mu0_minus_mean = self.mu - data_mean 41 | psi_post = self.psi + squares_sum * n + self.lambda_beta * n / (self.lambda_beta + n) * np.dot(mu0_minus_mean.transpose(), mu0_minus_mean) 42 | psi_post = (psi_post + np.transpose(psi_post)) / 2 43 | return NormalInverseWishartDistribution(mu_post, beta_post, nu_post, psi_post) 44 | 45 | if __name__ == '__main__': 46 | nu = 5 47 | a = np.array([[1,0.5,0],[0.5,1,0],[0,0,1]]) 48 | # x = np.array([invwishartrand(nu,a) for i in range(20000)]) 49 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | from readconf import * 5 | from normalization import normalize 6 | from logger import * -------------------------------------------------------------------------------- /util/dateconvert.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class DateConvertor(object): 9 | def __init__(self): 10 | pass 11 | 12 | @staticmethod 13 | def convert_timestamp(timestamp, convert_format="month"): 14 | if convert_format == "month" or convert_format == "m": 15 | return DateConvertor.convert_month_timestamp(timestamp) 16 | elif convert_format == "week" or convert_format == "w": 17 | return DateConvertor.convert_week_timestamp(timestamp) 18 | elif convert_format == "day" or convert_format == "d": 19 | return DateConvertor.convert_day_timestamp(timestamp) 20 | else: 21 | raise IOError 22 | 23 | @staticmethod 24 | def convert_string(timestamp): 25 | year_month_day = time.localtime(timestamp) 26 | year, month, day = year_month_day.tm_year, year_month_day.tm_mon, year_month_day.tm_mday 27 | hour, minute, second = year_month_day.tm_hour, year_month_day.tm_min, year_month_day.tm_sec 28 | return '%d-%d-%d %d:%d:%d' % (year, month, day, hour, minute, second) 29 | 30 | @staticmethod 31 | def convert_month_timestamp(timestamp): 32 | year_month_day = datetime.datetime.fromtimestamp(timestamp) 33 | month_timestamp = datetime.datetime(year=year_month_day.year, month=year_month_day.month, day=1) 34 | return time.mktime(month_timestamp.timetuple()) 35 | 36 | @staticmethod 37 | def convert_week_timestamp(timestamp): 38 | week = int(time.strftime('%w', time.localtime(timestamp))) 39 | MondayStamp = timestamp - (week-1)*86400 40 | MondayStr = time.localtime(MondayStamp) 41 | return time.mktime(time.strptime(time.strftime('%Y-%m-%d', MondayStr), '%Y-%m-%d')) 42 | 43 | @staticmethod 44 | def convert_day_timestamp(timestamp): 45 | year_month_day = datetime.datetime.fromtimestamp(timestamp) 46 | month_timestamp = datetime.datetime(year=year_month_day.year, month=year_month_day.month, day=year_month_day.day) 47 | return time.mktime(month_timestamp.timetuple()) 48 | 49 | if __name__ == '__main__': 50 | now = time.mktime(time.localtime()) 51 | print DateConvertor.convert_timestamp(now, 'm') 52 | print DateConvertor.convert_string(now) -------------------------------------------------------------------------------- /util/logger.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | 5 | import logging 6 | from logging.handlers import RotatingFileHandler 7 | # import logging.config 8 | 9 | 10 | class Logger: 11 | def __init__(self, filename): 12 | self.filename = filename 13 | self.logger = None 14 | 15 | def _set_config(self): 16 | pass 17 | 18 | def _set_console(self): 19 | console = logging.StreamHandler() 20 | console.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') 22 | console.setFormatter(formatter) 23 | self.logger.addHandler(console) 24 | 25 | def information(self, message): 26 | self.logger.info(message) 27 | 28 | def debug(self, message): 29 | self.logger.debug(message) 30 | 31 | def warn(self, message): 32 | self.logger.warn(message) 33 | 34 | def error(self, message): 35 | self.logger.error(message) 36 | 37 | 38 | class Process(Logger): 39 | def __init__(self, filename): 40 | Logger.__init__(self, filename) 41 | self._set_config() 42 | self._set_console() 43 | 44 | def _set_config(self): 45 | Rthandler = RotatingFileHandler(filename=self.filename, 46 | maxBytes=10*1024*1024, 47 | backupCount=5, mode='a') 48 | Rthandler.setLevel(logging.DEBUG) 49 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 50 | Rthandler.setFormatter(formatter) 51 | self.logger = logging.getLogger('Progress') 52 | self.logger.addHandler(Rthandler) 53 | self.logger.setLevel(logging.DEBUG) 54 | 55 | 56 | class Result(Logger): 57 | def __init__(self, filename): 58 | Logger.__init__(self, filename) 59 | self._set_config() 60 | self._set_console() 61 | 62 | def _set_config(self): 63 | handler = logging.FileHandler(filename=self.filename, mode='a') 64 | frt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 65 | handler.setFormatter(frt) 66 | handler.setLevel(logging.DEBUG) 67 | self.logger = logging.getLogger('Result') 68 | self.logger.addHandler(handler) 69 | self.logger.setLevel(logging.DEBUG) 70 | 71 | 72 | if __name__ == '__main__': 73 | logger = {'Result': Result('../output/result.log'), 'Process': Process('../output/process.log')} 74 | logger['Result'].debug('Wrong') 75 | logger['Process'].debug('Wrong') -------------------------------------------------------------------------------- /util/normalization.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | ''' 5 | @author: haidong zhang 6 | 7 | ''' 8 | import numpy as np 9 | 10 | def normalize(matrix): 11 | ''' 12 | Normalize the matrix 13 | ''' 14 | numDims = len(matrix.shape) 15 | if numDims == 1: 16 | # a vector 17 | s = np.sum(matrix) 18 | assert(s != 0) 19 | return matrix / s 20 | else: 21 | # a matrix 22 | s = np.sum(matrix, axis=1) 23 | return matrix / s[..., np.newaxis] -------------------------------------------------------------------------------- /util/normalwishartdistribution.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import numpy as np 5 | import random 6 | from scipy.stats import chi2 7 | 8 | 9 | class NormalWishartDistribution(object): 10 | def __init__(self, mu, lambda_beta, nu, psi): 11 | self.mu = mu 12 | self.lambda_beta = lambda_beta 13 | self.psi = psi 14 | self.nu = nu 15 | # self.inv_psi = np.linalg.inv(psi) 16 | 17 | def sample(self): 18 | sigma = np.linalg.inv(self.wishartrand(self.nu, self.psi)) 19 | mu = np.random.multivariate_normal(self.mu, sigma / self.lambda_beta) 20 | return mu, sigma 21 | 22 | def wishartrand(self, nu, sigma, C=None): 23 | """Return a sample from a Wishart distribution.""" 24 | if C == None: 25 | C = np.linalg.cholesky(sigma) 26 | D = sigma.shape[0] 27 | a = np.zeros((D, D), dtype=np.float32) 28 | for r in xrange(D): 29 | if r != 0: 30 | a[r, :r] = np.random.normal(size=(r,)) 31 | a[r, r] = np.sqrt(random.gammavariate(0.5*(nu - D + 1), 2.0)) 32 | return np.dot(np.dot(np.dot(C, a), a.T), C.T) 33 | 34 | def wishartrand1(self, nu, phi): 35 | dim = phi.shape[0] 36 | chol = np.linalg.cholesky(phi) 37 | foo = np.zeros((dim, dim)) 38 | 39 | for i in range(dim): 40 | for j in range(i+1): 41 | if i == j: 42 | foo[i, j] = np.sqrt(chi2.rvs(nu-(i+1)+1)) 43 | else: 44 | foo[i, j] = np.random.normal(0, 1) 45 | return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T))) 46 | 47 | def posterior(self, data): 48 | n = len(data) 49 | data_mean = np.mean(data, axis=0) 50 | squares_sum = np.cov(data.transpose(), bias=1) 51 | mu_post = (self.lambda_beta * self.mu + n * data_mean) / (self.lambda_beta + n) 52 | beta_post = self.lambda_beta + n 53 | nu_post = self.nu + n 54 | mu0_minus_mean = self.mu - data_mean 55 | psi_post = np.linalg.inv(self.psi) + squares_sum * n + self.lambda_beta * n / (self.lambda_beta + n) * np.dot(mu0_minus_mean.transpose(), mu0_minus_mean) 56 | psi_post = np.linalg.inv(psi_post) 57 | psi_post = (psi_post + np.transpose(psi_post)) / 2 58 | return NormalWishartDistribution(mu_post, beta_post, nu_post, psi_post) 59 | 60 | if __name__ == '__main__': 61 | nu = 5 62 | sigma = np.array([[1, 0.5], [0.5, 2]]) 63 | df = 10 64 | np.random.seed(1) 65 | nwd = NormalWishartDistribution(0, 0, df, sigma) 66 | sigma1 = nwd.wishartrand(df, sigma) 67 | print sigma1 68 | print np.linalg.inv(sigma1) 69 | -------------------------------------------------------------------------------- /util/readconf.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | #coding:utf-8 3 | 4 | import ConfigParser 5 | 6 | 7 | class ReadConfig: 8 | def __init__(self, config_file_path): 9 | self.cf = ConfigParser.ConfigParser() 10 | self.cf.read(config_file_path) 11 | 12 | def __getitem__(self, key): 13 | assert(len(key) == 3) 14 | if key[2] == 'string': 15 | return self.get_parameter_string(key[0], key[1]) 16 | elif key[2] == 'bool': 17 | return self.get_parameter_bool(key[0], key[1]) 18 | elif key[2] == 'int': 19 | return self.get_parameter_int(key[0], key[1]) 20 | elif key[2] == 'float': 21 | return self.get_parameter_float(key[0], key[1]) 22 | else: 23 | raise KeyError 24 | 25 | def get_parameter_string(self, section, key): 26 | return self.cf.get(section, key) 27 | 28 | def get_parameter_int(self, section, key): 29 | return int(self.get_parameter_string(section, key)) 30 | 31 | def get_parameter_float(self, section, key): 32 | return float(self.get_parameter_string(section, key)) 33 | 34 | def get_parameter_bool(self, section, key): 35 | return bool(self.get_parameter_int(section, key)) --------------------------------------------------------------------------------