├── .gitignore
├── .idea
├── GraphicalModelForRecommendation.iml
├── encodings.xml
├── misc.xml
├── modules.xml
├── other.xml
├── scopes
│ └── scope_settings.xml
├── testrunner.xml
└── vcs.xml
├── GMRec.py
├── README.md
├── RecommenderContext.py
├── baseline
├── itemKNN.py
├── svd.py
├── svdplusplus.py
└── userKNN.py
├── cf_ranking
├── AspectModel.py
├── AutoEncoderCTR.py
├── HMM.py
├── LDA.py
├── PLSA.py
├── PMF.py
├── Recommender.py
└── __init__.py
├── cf_rating
├── BNPoissMF.py
├── BPMF.py
├── BPTF.py
├── BPoissMF.py
├── GPLSA.py
├── MMLvd.py
├── PMF.py
├── PoissonMF.py
├── __init__.py
└── graphicalrecommender.py
├── config
├── BPMF.cfg
├── BPTF.cfg
├── PMF.cfg
└── logging.cfg
├── data
├── DataModel.py
├── __init__.py
├── convertor
│ ├── DataConvertor.py
│ ├── DocumentDataConvertor.py
│ ├── GeneralDataConvertor.py
│ ├── TimeDataConvertor.py
│ └── __init__.py
├── model
│ └── __init__.py
├── sparsematrix.py
├── sparsetensor.py
└── splitter
│ ├── CrossValidationDataSplitter.py
│ ├── DataSplitter.py
│ ├── GenericDataSplitter.py
│ ├── GivenNDataSplitter.py
│ ├── RatioDataSplitter.py
│ └── __init__.py
├── evaluator
├── __init__.py
├── pValue.py
├── ranking
│ ├── PrecisionRecallF1TopN.py
│ └── __init__.py
└── rating
│ ├── MAE.py
│ ├── MSE.py
│ ├── RMSE.py
│ └── __init__.py
├── hybrid
├── CTR.py
└── __init__.py
├── main.py
└── util
├── AIC.py
├── LDA.py
├── NormalInvWishartDistribution.py
├── __init__.py
├── dateconvert.py
├── logger.py
├── normalization.py
├── normalwishartdistribution.py
└── readconf.py
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | cf_rating/PWTempBPMF.py
--------------------------------------------------------------------------------
/.idea/GraphicalModelForRecommendation.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/testrunner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/GMRec.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from util import logger
5 | from data.DataModel import DataModel
6 | from RecommenderContext import RecommenderContext
7 |
8 | from util.readconf import ReadConfig
9 | from cf_rating import PMF, BPMF, BPTF
10 |
11 |
12 | class GMRec:
13 | def __init__(self, config_file, algorithm_name):
14 | self.config_file = config_file
15 | self.algorithm_name = algorithm_name
16 | self.all_algorithms = {
17 | 'PMF': PMF, 'BPMF': BPMF, 'BPTF': BPTF
18 | }
19 |
20 | def _set_logger(self, config_handler):
21 | result_file = config_handler.get_parameter_string("Output", "logger") + "{0}_Result.log".format(self.algorithm_name)
22 | process_file = config_handler.get_parameter_string("Output", "logger") + "{0}_Process.log".format(self.algorithm_name)
23 | logger1 = {'Result': logger.Result(result_file), 'Process': logger.Process(process_file)}
24 | return logger1
25 |
26 | def run(self):
27 | config_handler = ReadConfig(self.config_file)
28 | loggerc = self._set_logger(config_handler)
29 | data_model = DataModel(config_handler)
30 | recommender_context = RecommenderContext(config_handler, data_model, loggerc)
31 |
32 | recommender_context.get_logger()['Process'].debug("\n" + "#"*50 + "Start" + '#'*50)
33 | recommender_context.get_logger()['Result'].debug("\n" + "#"*50 + "Start" + '#'*50)
34 |
35 | recommender_context.get_logger()['Process'].debug("Build data model")
36 | recommender_context.get_data_model().build_data_model()
37 |
38 | experiment_num = recommender_context.get_config().get_parameter_int("splitter", "experiment_num")
39 | for experiment_id in range(experiment_num):
40 | recommender_context.get_logger()['Process'].debug("The {0}th experiment.".format(experiment_id))
41 | recommender_context.get_logger()['Result'].debug("The {0}th experiment.".format(experiment_id))
42 |
43 | recommender_context.get_logger()['Process'].debug("Split dataset into train and test")
44 | save_path = recommender_context.get_config().get_parameter_string("splitter", "save_path")
45 | recommender_context.experiment_id = experiment_id
46 | recommender_context.get_data_model().get_data_splitter().split_data(save_path, experiment_id)
47 |
48 | recommender_context.get_logger()['Process'].debug("Enter into training ....")
49 | algorithm = self.all_algorithms[self.algorithm_name](recommender_context)
50 | algorithm.run()
51 |
52 | recommender_context.get_logger()['Process'].debug("\n" + "#"*50 + "Finish" + "#"*50)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GraphicalModelForRecommendation
2 | Graphical Model for Recommender Systems
3 |
4 |
5 | # Factorization Machine
6 | web.media.mit.edu/~pernghwa/papers/poissonfm.pdf
7 | https://github.com/blei-lab/publications/blob/4a002eac1613c8ce4982d0dc7ae119029a42bde5/2015_CharlinRanganathMcInerneyBlei/README.md
8 | https://github.com/blei-lab
9 | Nonparametric Poisson Factorization Machine: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7373420&tag=1
10 |
11 |
12 | negative-binomial matrix factorization
13 | https://github.com/dawenl/nbmf/blob/master/code/nbmf.py
14 |
15 |
16 | Kalman Filter
17 | http://link.springer.com/article/10.1186/1687-1847-2012-172#Abs1
18 |
19 |
20 | https://www.cs.princeton.edu/~blei/topicmodeling.html
21 |
22 | Stochastic gradient descent
23 | https://en.wikipedia.org/wiki/Stochastic_gradient_descent
--------------------------------------------------------------------------------
/RecommenderContext.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | class RecommenderContext:
6 | def __init__(self, config_handler, data_model, logger):
7 | self.config_handler = config_handler
8 | self.data_model = data_model
9 | self.logger = logger
10 | self.experiment_id = 0
11 |
12 | def get_config(self):
13 | return self.config_handler
14 |
15 | def get_data_model(self):
16 | return self.data_model
17 |
18 | def get_logger(self):
19 | return self.logger
20 |
21 |
22 |
--------------------------------------------------------------------------------
/baseline/itemKNN.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/baseline/itemKNN.py
--------------------------------------------------------------------------------
/baseline/svd.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | class SVD:
6 | def __init__(self):
7 | pass
8 |
9 | def build_model(self):
10 | pass
--------------------------------------------------------------------------------
/baseline/svdplusplus.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
--------------------------------------------------------------------------------
/baseline/userKNN.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/baseline/userKNN.py
--------------------------------------------------------------------------------
/cf_ranking/AspectModel.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import numpy as np
5 | from scipy.sparse import dok_matrix
6 | from Recommender import Recommender
7 | from util import normalize
8 |
9 | '''
10 | Hofmann, 1999, Latent class models for collaborative filtering
11 |
12 | '''
13 |
14 | class AspectModel(Recommender):
15 | def __init__(self, trainMatrix, testMatrix, configHandler):
16 | super.__init__(trainMatrix, testMatrix, configHandler)
17 |
18 |
19 | def initModel(self):
20 | ''''''
21 | self.numUsers, self.numItems = self.trainMatrix.shape()
22 | self.prediction = dok_matrix((self.numUsers, self.numItems))
23 | self.MAX_Iterations = int(self.configHandler.getParameter('AspectModel', 'MAX_Iterations'))
24 | self.numFactors = int(self.configHandler.getParameter('AspectModel', 'numFactors'))
25 | self.threshold = float(self.configHandler.getParameter('AspectModel', 'threshold'))
26 |
27 | self.X = np.random.uniform(0, 1, size=(self.numUsers, self.numFactors)) # P(x|z)
28 | self.X = normalize(self.X)
29 |
30 | self.Y = np.random.uniform(0, 1, size=(self.numItems, self.numFactors)) # P(y|z)
31 | self.Y = normalize(self.Y)
32 |
33 | self.Z = np.random.uniform(0, 1, size=self.numFactors) # P(z)
34 | self.Z = normalize(self.Z)
35 |
36 | self.Q = np.zeros((self.numUsers, self.numFactors, self.numItems)) # P(z|x,y)
37 |
38 |
39 | def buildModel(self):
40 | ''''''
41 | self.initModel()
42 | oldLikelihood = np.inf
43 | for iteration in range(self.MAX_Iterations):
44 | ''''''
45 | print 'Iteration {}'.format(iteration)
46 | self.eStep() # E-Step
47 | self.mStep() # M-Step
48 | likelihood = self.likelihood()
49 |
50 | if likelihood - oldLikelihood < self.threshold:
51 | break
52 | else:
53 | oldLikelihood = likelihood
54 |
55 |
56 | def eStep(self):
57 | ''''''
58 | self.Q = self.X[..., np.newaxis, ...] * self.Y[np.newaxis, ...] * self.Z[np.newaxis, np.newaxis, ...]
59 | self.Q = self.Q / np.sum(self.Q, axis=-1)[..., np.newaxis]
60 |
61 |
62 | def mStep(self):
63 | ''''''
64 | probability = self.Q * self.trainMatrix[..., np.newaxis]
65 | self.X = np.sum(probability, axis=1) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...]
66 | self.Y = np.sum(probability, axis=0) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...]
67 | self.Z = np.sum(np.sum(probability, axis=0), axis=0) / np.sum(probability)
68 |
69 | def likelihood(self):
70 | ''''''
71 | result = 0.00
72 | logX = np.log(self.X)
73 | logY = np.log(self.Y)
74 | logZ = np.log(self.Z)
75 | for user_id, item_id in self.trainMatrix.keys():
76 | result += np.log(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :] + logZ))
77 | return result / len(self.trainMatrix.keys())
78 |
79 | def RegularizedLikelihood(self):
80 | ''''''
81 | result = 0.00
82 | logX = np.log(self.X)
83 | logY = np.log(self.Y)
84 | logZ = np.log(self.Z)
85 | logQ = np.log(self.Q)
86 | for user_id, item_id in self.trainMatrix.keys():
87 | result += np.sum(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :] + logZ))
88 | result += np.sum(self.Q[user_id, item_id, :] * logQ[user_id, item_id, :])
89 | return result / len(self.trainMatrix.keys())
90 |
91 |
92 | def ranking(self, user_id, item_id):
93 | ''''''
94 | return np.sum(self.X[user_id, :] * self.Y[item_id, :] * self.Z)
95 |
96 |
--------------------------------------------------------------------------------
/cf_ranking/AutoEncoderCTR.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | https://github.com/wnzhang/deep-ctr
6 | '''
--------------------------------------------------------------------------------
/cf_ranking/HMM.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | class HMM(object):
6 | def __init__(self, state_num, item_num):
7 | pass
--------------------------------------------------------------------------------
/cf_ranking/LDA.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from Recommender import Recommender
5 |
6 | class LatentDirichletAllocation(Recommender):
7 | '''
8 | Latent Dirichlet Allocation
9 | '''
10 | def __init__(self, trainMatrix, testMatrix, configHandler):
11 | super.__init__(trainMatrix, testMatrix, configHandler)
12 |
13 | def initModel(self):
14 | pass
15 |
16 | def buildModel(self):
17 | pass
18 |
--------------------------------------------------------------------------------
/cf_ranking/PLSA.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from Recommender import Recommender
5 | import numpy as np
6 | from scipy.sparse import dok_matrix
7 | from util import normalize
8 |
9 | class PLSA(Recommender):
10 | def __init__(self, trainMatrix, testMatrix, configHandler):
11 | super.__init__(trainMatrix, testMatrix, configHandler)
12 |
13 | def initModel(self):
14 | self.numUsers, self.numItems = self.trainMatrix.shape()
15 | self.prediction = dok_matrix((self.numUsers, self.numItems))
16 | self.MAX_Iterations = int(self.configHandler.getParameter('PLSA', 'MAX_Iterations'))
17 | self.numFactors = int(self.configHandler.getParameter('PLSA', 'numFactors'))
18 |
19 | self.X = np.random.uniform(0, 1, size=(self.numUsers, self.numFactors)) # P(z|x)
20 | self.X = normalize(self.X)
21 |
22 | self.Y = np.random.uniform(0, 1, size=(self.numItems, self.numFactors)) # P(y|z)
23 | self.Y = normalize(self.Y)
24 |
25 | self.Q = np.zeros((self.numUsers, self.numFactors, self.numItems)) # P(y,z|x)
26 |
27 | def buildModel(self):
28 | ''''''
29 | self.initModel()
30 | oldLikelihood = np.inf
31 | for iteration in range(self.MAX_Iterations):
32 | ''''''
33 | print 'Iteration {}'.format(iteration)
34 | self.eStep() # E-Step
35 | self.mStep() # M-Step
36 | likelihood = self.likelihood()
37 |
38 | if likelihood - oldLikelihood < self.threshold:
39 | break
40 | else:
41 | oldLikelihood = likelihood
42 |
43 |
44 | def eStep(self):
45 | ''''''
46 | self.Q = self.X[..., np.newaxis, ...] * self.Y[np.newaxis, ...]
47 | self.Q = self.Q / np.sum(self.Q, axis=-1)[..., np.newaxis]
48 |
49 |
50 | def mStep(self):
51 | ''''''
52 | probability = self.Q * self.trainMatrix[..., np.newaxis]
53 | self.X = np.sum(probability, axis=1) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...]
54 | self.Y = np.sum(probability, axis=0) / np.sum(np.sum(probability, axis=0), axis=0)[np.newaxis, ...]
55 |
56 | def likelihood(self):
57 | ''''''
58 | result = 0.00
59 | logX = np.log(self.X)
60 | logY = np.log(self.Y)
61 | for user_id, item_id in self.trainMatrix.keys():
62 | result += np.log(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :]))
63 | return result / len(self.trainMatrix.keys())
64 |
65 | def RegularizedLikelihood(self):
66 | ''''''
67 | result = 0.00
68 | logX = np.log(self.X)
69 | logY = np.log(self.Y)
70 | logQ = np.log(self.Q)
71 | for user_id, item_id in self.trainMatrix.keys():
72 | result += np.sum(self.Q[user_id, item_id, :] * (logX[user_id, :] + logY[item_id, :]))
73 | result += np.sum(self.Q[user_id, item_id, :] * logQ[user_id, item_id, :])
74 | return result / len(self.trainMatrix.keys())
75 |
76 |
77 | def ranking(self, user_id, item_id):
78 | ''''''
79 | return np.sum(self.X[user_id, :] * self.Y[item_id, :] * self.Z)
80 |
--------------------------------------------------------------------------------
/cf_ranking/PMF.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/cf_ranking/PMF.py
--------------------------------------------------------------------------------
/cf_ranking/Recommender.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from scipy.sparse import dok_matrix
5 |
6 | class Recommender:
7 | def __init__(self, trainMatrix, testMatrix, configHandler):
8 | self.trainMatrix = trainMatrix
9 | self.testMatrix = testMatrix
10 | self.configHandler = configHandler
11 |
12 | def initModel(self):
13 | self.numUsers, self.numItems = self.trainMatrix.shape()
14 | self.prediction = dok_matrix((self.numUsers, self.numItems))
15 | self.MAX_Iterations = int(self.configHandle.getParameter('PMF', 'MAX_Iterations'))
16 |
17 | def buildModel(self):
18 | pass
19 |
20 | def predict(self):
21 | pass
22 |
23 | def evaluate(self):
24 | pass
25 |
26 | def execute(self):
27 | self.initModel()
28 | self.buildModel()
29 | self.evaluate()
--------------------------------------------------------------------------------
/cf_ranking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/cf_ranking/__init__.py
--------------------------------------------------------------------------------
/cf_rating/BNPoissMF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | Paper: Prem Gopalan, Francisco J.R. Ruiz, et al. Bayesian Non-parameter Poisson Factorization for Recommendation Systems
6 | github: https://github.com/premgopalan/hgaprec/tree/master/src
7 |
8 | Author: Haidong Zhang
9 | Date: April 16, 2016
10 | '''
11 |
12 | from GraphicalRecommender import Recommender
13 | import numpy as np
14 | from numpy import log, sqrt
15 | from scipy.sparse import dok_matrix
16 | from util import normalize
17 | from util import Logger
18 |
19 | THRESHOLD = 1e-30
20 |
21 | class BNPoissMF(Recommender):
22 | def __init__(self, trainMatrix, testMatrix, configHandler):
23 | Recommender.__init__(trainMatrix, testMatrix, configHandler)
24 | self.logger = Logger('BNPoissMF.log')
25 |
26 |
27 | def initModel(self):
28 | ''' Read the model parameters, and get some common values.
29 | '''
30 | self.numUsers, self.numItems = self.trainMatrix.shape()
31 | self.prediction = dok_matrix((self.numUsers, self.numItems))
32 | self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations'))
33 | self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors'))
34 | self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold'))
35 |
36 | # Get the Parameters
37 | self.user_alpha = float(self.configHandler.getParameter('BPoissMF', 'user_alpha'))
38 | self.user_c = float(self.configHandler.getParameter('BPoissMF', 'user_c'))
39 |
40 | self.item_a = float(self.configHandler.getParameter('BPoissMF', 'item_a'))
41 | self.item_b = float(self.configHandler.getParameter('BPoissMF', 'item_b'))
42 |
43 | # The model parameters for users
44 | self.gamma0 = np.zeros(self.numUsers)
45 | self.gamma1 = np.zeros(self.numUsers)
46 | self.s = np.zeros(self.numUsers)
47 | self.nu = np.zeros((self.numUsers, self.numFactors))
48 | self.theta = np.zeros((self.numUsers, self.numFactors))
49 |
50 | # The model parameters for stick proportions
51 | self.tau = np.zeros((self.numUsers, self.numFactors))
52 |
53 | # The model parameters for item weights
54 | self.lambda0 = np.zeros((self.numItems, self.numFactors))
55 | self.lambda1 = np.zeros((self.numItems, self.numFactors))
56 | self.beta = np.zeros((self.numItems, self.numFactors))
57 |
58 | self.z = np.zeros((self.numUsers, self.numItems))
59 |
60 | self.pi = np.zeros((self.numUsers, self.numItems))
61 | self.logPi = np.zeros((self.numUsers, self.numItems))
62 |
63 |
64 | def buildModels(self):
65 | pass
66 |
67 | def initUserScalingParameters(self):
68 | ''' initial equations for the user scaling parameters gamma_u0 and gamma_u1
69 |
70 | '''
71 |
72 |
73 |
74 |
75 | def initStickProportions(self):
76 | ''' The update equations for the stick proportions tau_uk can be obtained by taking the derivative of the objective function with respect to tau_uk
77 |
78 | '''
79 | self.nu = 0.001 * np.random.random((self.numUsers, self.numFactors))
80 |
81 |
82 | def computePi(self):
83 | ''' Equation (10)
84 |
85 | '''
86 |
87 |
88 |
89 | def initItemWeights(self):
90 | pass
91 |
92 | def updateUserScalingParameters(self):
93 | pass
94 |
95 | def updateStickProportions(self):
96 | pass
97 |
98 | def updateItemWeights(self):
99 | pass
100 |
101 | def calculateConjugacy(self):
102 | pass
103 |
104 | def GammaPoisson(self):
105 | pass
106 |
107 | def solveQuadratic(self, a, b, c):
108 | '''
109 | '''
110 | s1 = (-b + sqrt(b*b - 4*a*c)) / (2*a)
111 | s2 = (-b - sqrt(b*b - 4*a*c)) / (2*a)
112 |
113 | if s1 > .0 and s1 <= 1.0 and s2 > .0 and s2 <= 1.0:
114 | self.logger.error('s1 %f and s2 %f are out of range in solve_quadratic()' % (s1, s2))
115 | self.logger.error('a = %.5f, b = %.5f, c = %.5f\n' % (a, b, c))
116 |
117 | if s1 < s2:
118 | return s1 + THRESHOLD
119 | else:
120 | return s2 + THRESHOLD
121 |
122 | if s1 > .0 and s1 <= 1.0:
123 | return s1
124 |
125 | if s2 > .0 and s1 <= 1.0:
126 | return s2
127 |
128 | if np.abs(s1 - .0) < THRESHOLD:
129 | return THRESHOLD
130 |
131 | if np.abs(1.0 - s1) < THRESHOLD:
132 | return 1.0 - THRESHOLD
133 |
134 | if np.abs(s2 - .0) < THRESHOLD:
135 | return THRESHOLD
136 |
137 | if np.abs(s2 - 1.0) < THRESHOLD:
138 | return 1.0 - THRESHOLD
139 |
140 | self.logger.error('WARNING: s1 %.10f and s2 %.10f are out of range in solve_quadratic()' % (s1, s2))
141 | return s1
142 |
143 | if __name__ == '__main__':
144 | bnprec = BNPoissMF()
--------------------------------------------------------------------------------
/cf_rating/BPMF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 | """
4 | Reference code: http://www.utstat.toronto.edu/~rsalakhu/BPMF.html
5 | Reference paper: Salakhutdinov and Mnih, Bayesian Probabilistic Matrix Factorization using Markov Chain Monte Carlo,* ICML 2008.
6 | """
7 |
8 | import numpy as np
9 | import codecs
10 | from GraphicalRecommender import Recommender
11 | from util.NormalWishartDistribution import NormalWishartDistribution
12 | from scipy.sparse import dok_matrix
13 |
14 |
15 | class BayesianProbabilisticMatrixFactorization(Recommender):
16 | """
17 | Bayesian Probabilistic Matrix Factorization.
18 | """
19 | def __init__(self, recommender_context):
20 | Recommender.__init__(self, recommender_context)
21 |
22 | def _read_cfg(self):
23 | self.user_normal_dist_mu0_init = self.config_handler['Parameters', 'user_normal_dist_mu0', 'float']
24 | self.user_normal_dist_beta0_init = self.config_handler['Parameters', 'user_normal_dist_beta0', 'float']
25 | self.user_Wishart_dist_W0_init = self.config_handler['Parameters', 'user_Wishart_dist_W0', 'float']
26 |
27 | self.item_normal_dist_mu0_init = self.config_handler['Parameters', 'item_normal_dist_mu0', 'float']
28 | self.item_normal_dist_beta0_init = self.config_handler['Parameters', 'item_normal_dist_beta0', 'float']
29 | self.item_Wishart_dist_W0_init = self.config_handler['Parameters', 'item_Wishart_dist_W0', 'float']
30 |
31 | self.rating_sigma_init = self.config_handler['Parameters', 'rating_sigma', 'float']
32 |
33 | def _init_model(self):
34 | self.user_num, self.item_num = self.train_matrix.shape
35 | self.mean_rating = np.mean(self.train_matrix.values())
36 |
37 | self.predictions = dok_matrix((self.user_num, self.item_num))
38 |
39 | if self.config_handler['Output', 'is_load', 'bool']:
40 | self._load_model()
41 | assert(self.user_factors.shape[1] == self.item_factors.shape[1])
42 | self.factor_num = self.user_factors.shape[1]
43 | else:
44 | self._read_cfg()
45 |
46 | if self.config_handler['Parameters', 'is_init_path', 'bool']:
47 | self._load_init_model()
48 | else:
49 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int']
50 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num))
51 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num))
52 |
53 | self.markov_num = 0
54 | validation_rmse, test_rmse = self.__evaluate_epoch__()
55 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(0, validation_rmse, test_rmse))
56 |
57 | self.user_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.user_normal_dist_mu0_init
58 | self.user_normal_dist_beta0 = self.user_normal_dist_beta0_init
59 | self.user_Wishart_dist_W0 = np.eye(self.factor_num) * self.user_Wishart_dist_W0_init
60 | self.user_Wishart_dist_nu0 = self.factor_num
61 |
62 | self.item_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.item_normal_dist_mu0_init
63 | self.item_normal_dist_beta0 = self.item_normal_dist_beta0_init
64 | self.item_Wishart_dist_W0 = np.eye(self.factor_num) * self.item_Wishart_dist_W0_init
65 | self.item_Wishart_dist_nu0 = self.factor_num
66 |
67 | self.rating_sigma = self.rating_sigma_init
68 |
69 | def _build_model(self):
70 | user_train_matrix = dict()
71 | item_train_matrix = dict()
72 | for user_id, item_id in self.train_matrix.keys():
73 | user_train_matrix.setdefault(user_id, dok_matrix((1, self.item_num)))
74 | user_train_matrix[user_id][0, item_id] = self.train_matrix.get((user_id, item_id))
75 | item_train_matrix.setdefault(item_id, dok_matrix((1, self.user_num)))
76 | item_train_matrix[item_id][0, user_id] = self.train_matrix.get((user_id, item_id))
77 |
78 | self.previous_loss = -np.inf
79 | max_iterations = self.config_handler['Parameters', 'max_iterations', 'int']
80 | for iteration in range(max_iterations):
81 | self.logger['Process'].debug('Epoch {0}: update hyper-parameters'.format(iteration))
82 | user_factors_mu, user_factors_variance = \
83 | self._sampling_hyperparameters(self.user_factors, self.user_normal_dist_mu0, self.user_normal_dist_beta0,
84 | self.user_Wishart_dist_nu0, self.user_Wishart_dist_W0)
85 | item_factors_mu, item_factors_variance = \
86 | self._sampling_hyperparameters(self.item_factors, self.item_normal_dist_mu0, self.item_normal_dist_beta0,
87 | self.item_Wishart_dist_nu0, self.item_Wishart_dist_W0)
88 |
89 | self.logger['Process'].debug('Epoch {0}: update latent factors'.format(iteration))
90 | for gibbs_iteration in range(2):
91 | for user_id in range(self.user_num):
92 | user_ratings = user_train_matrix[user_id] if user_id in user_train_matrix else dict()
93 | if len(user_ratings.keys()) == 0:
94 | continue
95 | self.user_factors[user_id] = self._update_parameters(
96 | self.item_factors, user_ratings, user_factors_mu, user_factors_variance)
97 |
98 | for item_id in range(self.item_num):
99 | item_ratings = item_train_matrix[item_id] if item_id in item_train_matrix else dict()
100 | if len(item_ratings.keys()) == 0:
101 | continue
102 | self.item_factors[item_id] = self._update_parameters(
103 | self.user_factors, item_ratings, item_factors_mu, item_factors_variance)
104 |
105 | validation_rmse, test_rmse = self.__evaluate_epoch__()
106 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(iteration, validation_rmse, test_rmse))
107 |
108 | def __evaluate_epoch__(self):
109 | validation_rmse = 0.0
110 | for user_id, item_id in self.train_matrix.keys():
111 | real_rating = self.train_matrix.get((user_id, item_id))
112 | predict_rating = self._predict(user_id, item_id)
113 | validation_rmse += (real_rating - predict_rating) ** 2
114 | self._recommend()
115 | results = self._evaluate()
116 | return np.sqrt(validation_rmse/len(self.train_matrix.keys())), results['RMSE']
117 |
118 | def _sampling_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0):
119 | num_N = factors.shape[0]
120 | mean_U = np.mean(factors, axis=0)
121 | variance_S = np.cov(factors.transpose(), bias=1)
122 | mu0_minus_factors = normal_dist_mu0 - mean_U
123 | mu0_minus_factors = np.reshape(mu0_minus_factors, (mu0_minus_factors.shape[0], 1))
124 |
125 | W0 = np.linalg.inv(Wishart_dist_W0) + num_N * variance_S \
126 | + normal_dist_beta0 * num_N / (normal_dist_beta0 + num_N) * np.dot(mu0_minus_factors, mu0_minus_factors.transpose())
127 | W0_post = np.linalg.inv(W0)
128 | W0_post = (W0_post + W0_post.transpose()) / 2
129 |
130 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + num_N * mean_U) / (normal_dist_beta0 + num_N)
131 | beta_post = (normal_dist_beta0 + num_N)
132 | nu_post = Wishart_dist_nu0 + num_N
133 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post)
134 | mu, sigma = normal_Wishart_distribution.sample()
135 | return mu, sigma
136 |
137 | def _update_parameters(self, factors, ratings, factors_mu, factors_variance):
138 | index = np.array([col_id for row_id, col_id in ratings.keys()])
139 | VVT = np.dot(factors[index, :].transpose(), factors[index, :])
140 | sigma = factors_variance + self.rating_sigma * VVT
141 | sigma_inv = np.linalg.inv(sigma)
142 |
143 | rating_values = np.array(ratings.values()) - self.mean_rating
144 | VR = np.dot(factors[index, :].transpose(), np.reshape(rating_values, newshape=(rating_values.shape[0], 1)))
145 | mu_right = self.rating_sigma * VR + np.dot(factors_variance, np.reshape(factors_mu, newshape=(factors_mu.shape[0], 1)))
146 | mu = np.dot(sigma_inv, mu_right)
147 | mu = np.reshape(mu, newshape=(mu.shape[0], ))
148 | return np.random.multivariate_normal(mu, sigma_inv)
149 |
150 | def _recommend(self):
151 | for user_id, item_id in self.test_matrix.keys():
152 | predict_rating = self._predict(user_id, item_id) + self.predictions[user_id, item_id] * self.markov_num
153 | self.predictions[user_id, item_id] = predict_rating / (self.markov_num + 1)
154 | self.markov_num += 1
155 |
156 | def _predict(self, user_id, item_id, time_id=0):
157 | predict_rating = np.dot(self.user_factors[user_id, :], self.item_factors[item_id, :]) + self.mean_rating
158 | if predict_rating > 5:
159 | return 5
160 | elif predict_rating < 1:
161 | return 1
162 | else:
163 | return predict_rating
164 |
165 | def _load_init_model(self):
166 | load_path = self.config_handler["Output", "load_path", "string"]
167 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id)
168 |
169 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp:
170 | for line in read_fp:
171 | if line.startswith('factor_num'):
172 | self.factor_num = int(line.split(':')[1].strip())
173 | elif line.startswith('user_factor'):
174 | self.user_factors = self._load_matrix(read_fp)
175 | elif line.startswith('item_factor'):
176 | self.item_factors = self._load_matrix(read_fp)
177 |
178 | def _save_result(self, result):
179 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num))
180 |
181 | self.logger['Result'].debug('user_normal_dist_mu0: {0}'.format(self.user_normal_dist_mu0_init))
182 | self.logger['Result'].debug('user_normal_dist_beta0: {0}'.format(self.user_normal_dist_beta0_init))
183 | self.logger['Result'].debug('user_Wishart_dist_W0: {0}'.format(self.user_Wishart_dist_W0_init))
184 |
185 | self.logger['Result'].debug('item_normal_dist_mu0: {0}'.format(self.item_normal_dist_mu0_init))
186 | self.logger['Result'].debug('item_normal_dist_beta0: {0}'.format(self.item_normal_dist_beta0_init))
187 | self.logger['Result'].debug('item_Wishart_dist_W0: {0}'.format(self.item_Wishart_dist_W0_init))
188 |
189 | Recommender._save_result(self, result)
190 |
191 | def _save_model(self):
192 | save_path = self.config_handler["Output", "save_path", "string"]
193 | save_file = save_path + "BPMF_{0}.txt".format(self.recommender_context.experiment_id)
194 |
195 | with codecs.open(save_file, mode='w', encoding='utf-8') as write_fp:
196 | write_fp.write('factor_num: {0}\n'.format(self.factor_num))
197 | write_fp.write('user_normal_dist_mu0: {0}\n'.format(self.user_normal_dist_mu0_init))
198 | write_fp.write('user_normal_dist_beta0: {0}\n'.format(self.user_normal_dist_beta0_init))
199 | write_fp.write('user_Wishart_dist_W0: {0}\n'.format(self.user_Wishart_dist_W0_init))
200 | write_fp.write('item_normal_dist_mu0: {0}\n'.format(self.item_normal_dist_mu0_init))
201 | write_fp.write('item_normal_dist_beta0: {0}\n'.format(self.item_normal_dist_beta0_init))
202 | write_fp.write('item_Wishart_dist_W0: {0}\n'.format(self.item_Wishart_dist_W0_init))
203 |
204 | write_fp.write('user_factors \n')
205 | self._save_matrix(self.user_factors, write_fp)
206 |
207 | write_fp.write('item_factors \n')
208 | self._save_matrix(self.item_factors, write_fp)
209 |
210 | def _load_model(self):
211 | load_path = self.config_handler["Output", "load_path", "string"]
212 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id)
213 |
214 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp:
215 | for line in read_fp:
216 | if line.startswith('factor_num'):
217 | self.factor_num = int(line.split(':')[1].strip())
218 | elif line.startswith('user_normal_dist_mu0'):
219 | self.user_normal_dist_mu0_init = float(line.split(':')[1].strip())
220 | elif line.startswith('user_normal_dist_beta0'):
221 | self.user_normal_dist_beta0_init = float(line.split(':')[1].strip())
222 | elif line.startswith('user_Wishart_dist_W0'):
223 | self.user_Wishart_dist_W0_init = float(line.split(':')[1].strip())
224 | elif line.startswith('item_normal_dist_mu0'):
225 | self.item_normal_dist_mu0_init = float(line.split(':')[1].strip())
226 | elif line.startswith('item_normal_dist_beta0'):
227 | self.item_normal_dist_beta0_init = float(line.split(':')[1].strip())
228 | elif line.startswith('item_Wishart_dist_W0'):
229 | self.item_Wishart_dist_W0_init = float(line.split(':')[1].strip())
230 | elif line.startswith('user_factor'):
231 | self.user_factors = self._load_matrix(read_fp)
232 | elif line.startswith('item_factor'):
233 | self.item_factors = self._load_matrix(read_fp)
--------------------------------------------------------------------------------
/cf_rating/BPTF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | """
5 | Reference Paper: Liang Xiong et al.
6 | Temporal Collaborative Filtering with Bayesian Probabilistic Tensor Factorization,
7 | Reference Code: https://www.cs.cmu.edu/~lxiong/bptf/bptf.html
8 | """
9 |
10 | import numpy as np
11 | import codecs
12 | from scipy.sparse import dok_matrix
13 | from GraphicalRecommender import Recommender
14 | from util.NormalWishartDistribution import NormalWishartDistribution
15 |
16 |
17 | class BayesianProbabilisticTensorFactorization(Recommender):
18 | def __init__(self, config_handler):
19 | Recommender.__init__(self, config_handler)
20 |
21 | def _read_cfg(self):
22 |
23 | self.user_normal_dist_mu0_init = self.config_handler['Parameters', 'user_normal_dist_mu0', 'float']
24 | self.user_normal_dist_beta0_init = self.config_handler['Parameters', 'user_normal_dist_beta0', 'float']
25 | self.user_Wishart_dist_W0_init = self.config_handler['Parameters', 'user_Wishart_dist_W0', 'float']
26 |
27 | self.item_normal_dist_mu0_init = self.config_handler['Parameters', 'item_normal_dist_mu0', 'float']
28 | self.item_normal_dist_beta0_init = self.config_handler['Parameters', 'item_normal_dist_beta0', 'float']
29 | self.item_Wishart_dist_W0_init = self.config_handler['Parameters', 'item_Wishart_dist_W0', 'float']
30 |
31 | self.time_normal_dist_mu0_init = self.config_handler['Parameters', 'time_normal_dist_mu0', 'float']
32 | self.time_normal_dist_beta0_init = self.config_handler['Parameters', 'time_normal_dist_beta0', 'float']
33 | self.time_Wishart_dist_W0_init = self.config_handler['Parameters', 'time_Wishart_dist_W0', 'float']
34 |
35 | self.rating_sigma_init = self.config_handler['Parameters', 'rating_sigma', 'float']
36 |
37 | def _init_model(self):
38 | self.user_num, self.item_num, self.time_num = self.train_tensor.shape()
39 | self.mean_rating = np.mean(self.train_tensor.values())
40 |
41 | self.predictions = dok_matrix((self.user_num, self.item_num, self.time_num))
42 |
43 | if self.config_handler['Parameters', 'is_load', 'bool']:
44 | self._load_model()
45 | assert(self.user_factors.shape[1] == self.item_factors.shape[1] and self.item_factors.shape[1] == self.time_factors.shape[1])
46 | self.factor_num = self.user_factors.shape[1]
47 | else:
48 | self._read_cfg()
49 |
50 | # initialize the latent factors of user, item and time.
51 | if self.config_handler['Parameters', 'is_init_path', 'bool']:
52 | self._load_init_model()
53 | else:
54 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int']
55 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num))
56 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num))
57 | self.time_factors = np.random.normal(0, 1, size=(self.time_num, self.factor_num))
58 |
59 | self.markov_num = 0
60 | validation_rmse, test_rmse = self.__evaluate_epoch__()
61 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(0, validation_rmse, test_rmse))
62 |
63 | # get the user parameters
64 | self.user_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.user_normal_dist_mu0_init
65 | self.user_normal_dist_beta0 = self.user_normal_dist_beta0_init
66 | self.user_Wishart_dist_W0 = np.eye(self.factor_num) * self.user_Wishart_dist_W0_init
67 | self.user_Wishart_dist_nu0 = self.factor_num
68 |
69 | # get the item parameters
70 | self.item_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.item_normal_dist_mu0_init
71 | self.item_normal_dist_beta0 = self.item_normal_dist_beta0_init
72 | self.item_Wishart_dist_W0 = np.eye(self.factor_num) * self.item_Wishart_dist_W0_init
73 | self.item_Wishart_dist_nu0 = self.factor_num
74 |
75 | # get the time parameters
76 | self.time_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.time_normal_dist_mu0_init
77 | self.time_normal_dist_beta0 = self.time_normal_dist_beta0_init
78 | self.time_Wishart_dist_W0 = np.eye(self.factor_num) * self.time_Wishart_dist_W0_init
79 | self.time_Wishart_dist_nu0 = self.factor_num
80 |
81 | self.rating_sigma = self.rating_sigma_init
82 |
83 | def _build_model(self):
84 |
85 | # Speed up the process of gibbs sampling
86 | train_matrix_by_user, train_matrix_by_item, train_matrix_by_time = dict(), dict(), dict()
87 | for user_id, item_id, time_id in self.train_tensor.keys():
88 | train_matrix_by_user.setdefault(user_id, dok_matrix((self.item_num, self.time_num)))
89 | train_matrix_by_user[user_id][item_id, time_id] = self.train_tensor[user_id, item_id, time_id]
90 |
91 | train_matrix_by_item.setdefault(item_id, dok_matrix((self.user_num, self.time_num)))
92 | train_matrix_by_item[item_id][user_id, time_id] = self.train_tensor[user_id, item_id, time_id]
93 |
94 | train_matrix_by_time.setdefault(time_id, dok_matrix((self.user_num, self.item_num)))
95 | train_matrix_by_time[time_id][user_id, item_id] = self.train_tensor[user_id, item_id, time_id]
96 |
97 | max_iterations = self.config_handler['Parameters', 'max_iterations', 'int']
98 | for iteration in range(max_iterations):
99 | user_factors_mu, user_factors_variance = \
100 | self._sampling_hyperparameters(self.user_factors, self.user_normal_dist_mu0, self.user_normal_dist_beta0,
101 | self.user_Wishart_dist_nu0, self.user_Wishart_dist_W0)
102 | item_factors_mu, item_factors_variance = \
103 | self._sampling_hyperparameters(self.item_factors, self.item_normal_dist_mu0, self.item_normal_dist_beta0,
104 | self.item_Wishart_dist_nu0, self.item_Wishart_dist_W0)
105 |
106 | time_factors_mu, time_factors_variance = \
107 | self._sampling_time_hyperparameters(self.time_factors, self.time_normal_dist_mu0, self.time_normal_dist_beta0,
108 | self.time_Wishart_dist_nu0, self.time_Wishart_dist_W0)
109 |
110 | for gibbs_iteration in range(2):
111 | for user_id in range(self.user_num):
112 | item_time_matrix = train_matrix_by_user[user_id]
113 | if len(item_time_matrix.keys()) < 1:
114 | continue
115 | self.user_factors[user_id] = self._update_parameters(
116 | self.item_factors, self.time_factors, item_time_matrix, user_factors_mu, user_factors_variance)
117 |
118 | for item_id in range(self.item_num):
119 | user_time_matrix = train_matrix_by_item[item_id]
120 | if len(user_time_matrix.keys()) < 1:
121 | continue
122 | self.item_factors[item_id] = self._update_parameters(
123 | self.user_factors, self.time_factors, user_time_matrix, item_factors_mu, item_factors_variance)
124 |
125 | for time_id in range(self.time_num):
126 | user_item_matrix = train_matrix_by_time[time_id]
127 | if len(user_item_matrix.keys()) < 1:
128 | continue
129 | self.time_factors[time_id] = self._update_time_parameters(
130 | self.user_factors, self.item_factors, self.time_factors, user_item_matrix, time_factors_mu, time_factors_variance, time_id)
131 |
132 | validation_rmse, test_rmse = self.__evaluate_epoch__()
133 | self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(iteration, validation_rmse, test_rmse))
134 |
135 | def run(self):
136 | self.logger['Process'].debug('Get the train dataset')
137 | self.train_tensor = self.recommender_context.get_data_model().get_data_splitter().get_train_data()
138 | self.logger['Result'].debug('The number of user-item pair in train dataset is {0}'.format(len(self.train_tensor.keys())))
139 |
140 | self.logger['Process'].debug('Get the test dataset')
141 | self.test_tensor = self.recommender_context.get_data_model().get_data_splitter().get_test_data()
142 | self.logger['Result'].debug('The number of user-item pair in test dataset is {0}'.format(len(self.test_tensor.keys())))
143 |
144 | self.logger['Process'].debug('Initialize the model parameters')
145 | self._init_model()
146 |
147 | self.logger['Process'].debug('Building model....')
148 | self._build_model()
149 |
150 | is_save = self.config_handler['Output', 'is_save', 'bool']
151 | if is_save:
152 | self.logger['Process'].debug('Save model ....')
153 | self._save_model()
154 |
155 | self.logger['Process'].debug('Recommending ...')
156 | self._recommend()
157 |
158 | self.logger['Process'].debug('Evaluating ...')
159 | result = self._evaluate()
160 | self._save_result(result)
161 |
162 | self.logger['Process'].debug("Finish.")
163 | self.logger['Process'].debug("#"*50)
164 |
165 | def __evaluate_epoch__(self):
166 | validation_rmse = 0.0
167 | for user_id, item_id, time_id in self.train_tensor.keys():
168 | real_rating = self.train_tensor.get((user_id, item_id, time_id))
169 | predict_rating = self._predict(user_id, item_id, time_id)
170 | validation_rmse += (real_rating - predict_rating) ** 2
171 | self._recommend()
172 | results = self._evaluate()
173 | return np.sqrt(validation_rmse/len(self.train_tensor.keys())), results['RMSE']
174 |
175 | def _recommend(self):
176 | for user_id, item_id, time_id in self.test_tensor.keys():
177 | predict_rating = self._predict(user_id, item_id, time_id) + self.predictions[user_id, item_id, time_id] * self.markov_num
178 | self.predictions[user_id, item_id, time_id] = predict_rating / (self.markov_num + 1)
179 | self.markov_num += 1
180 |
181 | # Update hyper-parameters of user or item
182 | def _sampling_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0):
183 | num_N = factors.shape[0]
184 | mean_U = np.mean(factors, axis=0)
185 | variance_S = np.cov(factors.transpose(), bias=1)
186 | mu0_minus_factors = normal_dist_mu0 - mean_U
187 | mu0_minus_factors = np.reshape(mu0_minus_factors, (mu0_minus_factors.shape[0], 1))
188 |
189 | W0 = np.linalg.inv(Wishart_dist_W0) + num_N * variance_S \
190 | + normal_dist_beta0 * num_N / (normal_dist_beta0 + num_N) * np.dot(mu0_minus_factors, mu0_minus_factors.transpose())
191 | W0_post = np.linalg.inv(W0)
192 | W0_post = (W0_post + W0_post.transpose()) / 2
193 |
194 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + num_N * mean_U) / (normal_dist_beta0 + num_N)
195 | beta_post = (normal_dist_beta0 + num_N)
196 | nu_post = Wishart_dist_nu0 + num_N
197 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post)
198 | mu, sigma = normal_Wishart_distribution.sample()
199 | return mu, sigma
200 |
201 | # Update time hyper-parameters
202 | def _sampling_time_hyperparameters(self, factors, normal_dist_mu0, normal_dist_beta0, Wishart_dist_nu0, Wishart_dist_W0):
203 | num_K = factors.shape[0]
204 | mu_post = (normal_dist_beta0 * normal_dist_mu0 + factors[0, :]) / (1.0 + normal_dist_beta0)
205 | beta_post = normal_dist_beta0 + 1.0
206 | nu_post = Wishart_dist_nu0 + num_K
207 | X = np.array([factors[t, :] - factors[t-1, :] for t in range(1, num_K)])
208 | variance_S = np.dot(X.transpose(), X)
209 |
210 | mu0_minus_factors = factors[0, :] - normal_dist_mu0
211 | mu0_minus_factors = np.reshape(mu0_minus_factors, newshape=(mu0_minus_factors.shape[0], 1))
212 | W0_post = np.linalg.inv(Wishart_dist_W0) + variance_S + normal_dist_beta0 / (1.0 + normal_dist_beta0) * np.dot((mu0_minus_factors, mu0_minus_factors.transpose()))
213 | normal_Wishart_distribution = NormalWishartDistribution(mu_post, beta_post, nu_post, W0_post)
214 | mu, sigma = normal_Wishart_distribution.sample()
215 | return mu, sigma
216 |
217 | def _update_parameters(self, factors0, factors1, ratings, factors_mu, factors_variance):
218 | """
219 | :param factors0:
220 | :param factors1:
221 | :param ratings:
222 | :param factors_mu:
223 | :param factors_variance:
224 | :return:
225 | """
226 | index = ratings.keys()
227 |
228 | QQ = 0
229 | RQ = 0
230 | for dim0, dim1 in index:
231 | Q = factors0[dim0, :] * factors1[dim1, :]
232 | QQ += np.mat(Q).transpose() * np.mat(Q)
233 | RQ += (ratings[dim0, dim1] - self.mean_rating) * Q
234 | sigma_inv = np.linalg.inv(factors_variance + self.rating_sigma * QQ)
235 | mu = sigma_inv * (np.dot(factors_variance, np.reshape(factors_mu, newshape=(factors_mu.shape[0], 1))) + self.rating_sigma * RQ)
236 | return np.random.multivariate_normal(mu, sigma_inv)
237 |
238 | def _update_time_parameters(self, user_factors, item_factors, time_factors, ratings, factors_mu, factors_variance, time_id):
239 | index = ratings.keys()
240 | QQ, RQ = 0.0, 0.0
241 | for dim0, dim1 in index:
242 | Q = user_factors[dim0, :] * item_factors[dim1, :]
243 | QQ += np.mat(Q).transpose() * np.mat(Q)
244 | RQ += (ratings[dim0, dim1] - self.mean_rating) * Q
245 |
246 | RQ = np.reshape(RQ, newshape=(RQ.shape[0], 1))
247 | if time_id == 0:
248 | mu = (time_factors[1, :] + factors_mu) / 2
249 | sigma_inv = np.linalg.inv(2 * factors_variance + self.rating_sigma * QQ)
250 | elif time_id == self.time_num-1:
251 | sigma_inv = np.linalg.inv(factors_variance + self.rating_sigma * QQ)
252 | Tk_1 = np.reshape(time_factors[self.time_num-2, :], newshape=(time_factors.shape[1], 1))
253 | mu = sigma_inv * (np.dot(factors_variance, Tk_1) + self.rating_sigma * RQ)
254 | else:
255 | sigma_inv = np.linalg.inv(2 * factors_variance + self.rating_sigma * QQ)
256 | Tk = time_factors[time_id-1, :] + time_factors[time_id+1, :]
257 | mu = sigma_inv * (np.dot(factors_variance, np.reshape(Tk, newshape=(Tk.shape[0], 1))) + self.rating_sigma * RQ)
258 |
259 | return np.random.multivariate_normal(mu, sigma_inv)
260 |
261 | def _predict(self, user_id, item_id, time_id=0):
262 | assert(time_id < self.time_num)
263 | predict_rating = np.sum(self.user_factors[user_id, :] * self.item_factors[item_id, :] * self.time_factors[time_id, :]) + self.mean_rating
264 | if predict_rating > 5:
265 | return 5
266 | elif predict_rating < 1:
267 | return 1
268 | else:
269 | return predict_rating
270 |
271 | def _save_result(self, result):
272 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num))
273 |
274 | self.logger['Result'].debug('user_normal_dist_mu0: {0}'.format(self.user_normal_dist_mu0_init))
275 | self.logger['Result'].debug('user_normal_dist_beta0: {0}'.format(self.user_normal_dist_beta0_init))
276 | self.logger['Result'].debug('user_Wishart_dist_W0: {0}'.format(self.user_Wishart_dist_W0_init))
277 |
278 | self.logger['Result'].debug('item_normal_dist_mu0: {0}'.format(self.item_normal_dist_mu0_init))
279 | self.logger['Result'].debug('item_normal_dist_beta0: {0}'.format(self.item_normal_dist_beta0_init))
280 | self.logger['Result'].debug('item_Wishart_dist_W0: {0}'.format(self.item_Wishart_dist_W0_init))
281 |
282 | self.logger['Result'].debug('time_normal_dist_mu0: {0}'.format(self.time_normal_dist_mu0_init))
283 | self.logger['Result'].debug('time_normal_dist_beta0: {0}'.format(self.time_normal_dist_beta0_init))
284 | self.logger['Result'].debug('time_Wishart_dist_W0: {0}'.format(self.time_Wishart_dist_W0_init))
285 |
286 | self.logger['Result'].debug('rating_sigma: {0}'.format(self.rating_sigma_init))
287 | Recommender._save_result(self, result)
288 |
289 | def _load_init_model(self):
290 | load_path = self.config_handler["Output", "load_path", "string"]
291 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id)
292 |
293 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp:
294 | for line in read_fp:
295 | if line.startswith('factor_num'):
296 | self.factor_num = int(line.split(':')[1].strip())
297 | elif line.startswith('user_factor'):
298 | self.user_factors = self._load_matrix(read_fp)
299 | elif line.startswith('item_factor'):
300 | self.item_factors = self._load_matrix(read_fp)
--------------------------------------------------------------------------------
/cf_rating/BPoissMF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | Paper: Prem Gopalan, et al. Scalable Recommendation with Poisson Factorization.
6 | Github: https://github.com/mertterzihan/PMF/blob/master/Code/PoissonFactorization.py#L47
7 | http://www.hongliangjie.com/2015/08/17/poisson-matrix-factorization/
8 |
9 | Author: Haidong Zhang
10 | Date: April 24, 2016
11 | '''
12 |
13 | from GraphicalRecommender import Recommender
14 | import numpy as np
15 | from scipy.sparse import dok_matrix
16 | from util import normalize
17 | from random import shuffle
18 | from itertools import product
19 | from scipy.stats import poisson
20 | from scipy.stats import gamma as gammafun
21 | import sys
22 |
23 | def gammaRnd(shape, scale, size=None):
24 | result = np.random.gamma(shape, scale, size)
25 | return result
26 |
27 | def poissonRnd(scale, size=None):
28 | result = np.random.poisson(scale, size)
29 | return result
30 |
31 | class BPoissMF(Recommender):
32 | def __init__(self, trainMatrix, testMatrix, configHandler):
33 | Recommender.__init__(trainMatrix, testMatrix, configHandler)
34 |
35 | def initModel(self):
36 | self.numUsers, self.numItems = self.trainMatrix.shape()
37 | self.prediction = dok_matrix((self.numUsers, self.numItems))
38 | self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations'))
39 | self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors'))
40 | self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold'))
41 |
42 | # Get the Parameters
43 | self.a = float(self.configHandler.getParameter('BPoissMF', 'a'))
44 | self.ap = float(self.configHandler.getParameter('BPoissMF', 'ap'))
45 | self.bp = float(self.configHandler.getParameter('BPoissMF', 'bp'))
46 |
47 | self.c = float(self.configHandler.getParameter('BPoissMF', 'c'))
48 | self.cp = float(self.configHandler.getParameter('BPoissMF', 'cp'))
49 | self.dp = float(self.configHandler.getParameter('BPoissMF', 'dp'))
50 |
51 | # Init xi
52 | self.xi = gammaRnd(self.ap, self.ap/self.bp, size=self.numUsers)
53 | # Init theta
54 | self.theta = np.zeros((self.numUsers, self.numFactors))
55 | for i in range(self.numUsers):
56 | self.theta[i, :] = gammaRnd(self.a, self.xi[i])
57 |
58 | # Init eta
59 | self.eta = gammaRnd(self.cp, self.cp/self.dp, size=self.numItems)
60 | #Init beta
61 | self.beta = np.zeros((self.numItems, self.numFactors))
62 | for i in range(self.numItems):
63 | self.beta[i, :] = gammaRnd(self.c, self.eta[i])
64 |
65 | # Init z
66 | self.zs = np.zeros((self.numUsers, self.numItems, self.numFactors))
67 | for user_id, item_id in self.trainMatrix.keys():
68 | p = self.theta[user_id, :] * self.beta[item_id, :]
69 | p /= np.sum(p)
70 | self.zs[user_id, item_id, :] = np.random.multinomial(self.trainMatrix[user_id, item_id], p)
71 |
72 | def sample(self, ):
73 | ''''''
74 | self.loglikelihood = []
75 | for curr_iter in xrange(self.MAX_Iterations):
76 | 'Gibbs Sampling.'
77 | randUsers = range(self.numUsers)
78 | randTopics = range(self.numFactors)
79 | randItems = range(self.numItems)
80 |
81 | # Sample theta
82 | shuffle(randUsers)
83 | for user_id in randUsers:
84 | shuffle(randTopics)
85 | for topic_id in randTopics:
86 | self.theta[user_id, topic_id] = gammaRnd(self.a + np.sum(self.zs[user_id, :, topic_id]),
87 | self.xi[user_id] + np.sum(self.beta[:, topic_id]))
88 |
89 | # Sample beta
90 | shuffle(randItems)
91 | for item_id in randItems:
92 | shuffle(randTopics)
93 | for topic_id in randTopics:
94 | self.beta[item_id, topic_id] = gammaRnd(self.c + np.sum(self.zs[:, item_id, topic_id]),
95 | self.eta[item_id] + np.sum(self.theta[:, topic_id]))
96 |
97 | # Sample xi
98 | shuffle(randUsers)
99 | for user_id in randUsers:
100 | self.xi[user_id] = gammaRnd(self.ap + self.numFactors*self.a, self.b + self.theta[user_id, :].sum())
101 |
102 | # Sample eta
103 | shuffle(randItems)
104 | for item_id in randItems:
105 | self.eta[item_id] = gammaRnd(self.cp + self.numFactors*self.c, self.d + self.beta[item_id, :].sum())
106 |
107 | # Sample zs
108 | nonzeros = self.trainMatrix.keys()
109 | randNonZeros = shuffle(len(nonzeros))
110 | for pair_id in randNonZeros:
111 | user_id, item_id = nonzeros[pair_id]
112 | p = self.theta[user_id, :] * self.beta[item_id, :]
113 | p /= p.sum()
114 | self.zs[user_id, item_id, :] = np.random.multinomial(self.trainMatrix[user_id, item_id], p)
115 |
116 |
117 | if __name__ == '__main__':
118 | bnprec = BPoissMF()
--------------------------------------------------------------------------------
/cf_rating/GPLSA.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | Gausian Probabilistic Latent Semantic Analysis
6 | '''
7 |
8 | from GraphicalRecommender import Recommender
9 |
10 | class GPLSA(Recommender):
11 | ''''''
12 | def __init__(self, trainMatrix, testMatrix, configHandler):
13 | super.__init__(trainMatrix, testMatrix, configHandler)
14 |
15 | def initModel(self):
16 | ''''''
17 |
18 |
19 | def buildModel(self):
20 | ''''''
21 |
22 | def eStep(self):
23 | ''''''
24 |
25 | def mStep(self):
26 | ''''''
27 |
28 | def predict(self, user_id, item_id):
29 | ''''''
30 |
--------------------------------------------------------------------------------
/cf_rating/MMLvd.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/evn python
2 | #coding:utf-8
3 |
4 | from GraphicalRecommender import Recommender
5 | from scipy.sparse import dok_matrix
6 | import numpy as np
7 | from numpy import newaxis
8 |
9 | class MMLvd(Recommender):
10 | def __init__(self, trainMatrix, testMatrix, configHandler):
11 | super.__init__(trainMatrix, testMatrix, configHandler)
12 |
13 |
14 | def initModel(self):
15 | self.numUsers, self.numItems = self.trainMatrix.shape()
16 | self.prediction = dok_matrix((self.numUsers, self.numItems))
17 | self.MAX_Iterations = int(self.configHandler.getParameter('BPMF', 'MAX_Iterations'))
18 | self.numFactors = int(self.configHandler.getParameter('BPMF', 'numFactors'))
19 |
20 | self.beta0 = float(self.configHandler.getParameter('BPMF', 'beta0'))
21 | self.nu0 = float(self.configHandler.getParameter('BPMF', 'nu0'))
22 | self.wh0 = np.eye(self.numFactors)
23 |
24 | self.learnRate = float(self.configHandler.getParameter('BPMF', 'learning_rate'))
25 | self.regU = float(self.configHandler.getParameter('BPMF', 'regU'))
26 | self.regI = float(self.configHandler.getParameter('BPMF', 'regI'))
27 |
28 | self.P = np.random.normal(0, 1, size=(self.numUsers, self.numFactors))
29 | self.Q = np.random.normal(0, 1, size=(self.numItems, self.numFactors))
30 |
31 | self.alpha = 2
32 | self.alpha_k = self.alpha/self.numFactors
33 |
34 | self.numRatings = 5
35 |
36 | self.theta = np.random.dirichlet(np.array([self.alpha_k for i in range(self.numFactors)]))
37 | self.gamma = np.zeros((self.numUsers, self.numFactors, self.numItems))
38 |
39 | self.sigma = np.random.normal(0, 1, size = self.numRatings)
40 | self.omega = np.random.normal(0, 1, size = self.numUsers)
41 |
42 | self.mu_vd = 1.0 / (1.0 + np.exp(-(self.omega[newaxis, ...] + self.sigma[..., newaxis])))
43 |
44 | self.xi = 10.0
45 | self.nu = 10.0
46 | self.phi = 2.0
47 |
48 | def buildModel(self):
49 | pass
50 |
51 | def EStep(self):
52 | gamma_nkd = np.zeros((self.numUsers, self.numFactors, self.numItems))
53 | beta_vkd = np.zeros((self.numRatings, self.numFactors, self.numItems))
54 |
55 | for u in range(self.numUsers):
56 | for d in range(self.numItems):
57 | rating = self.trainMatrix.get((u, d))
58 | if rating == 0:
59 | gamma_nkd[u, :, d] = (beta_vkd * (1 - self.mu_vd)[..., newaxis, ...]).sum(axis=0)
60 | else:
61 | gamma_nkd[u, :, d] = (beta_vkd[rating, :, d] * self.mu_vd[rating, d])
62 |
63 | qn_k = np.zeros((self.numUsers, self.numFactors))
64 | qn_kvd = np.zeros((self.numUsers, self.numFactors, self.numRatings, self.numItems))
65 | qn_vd = np.zeros((self.numUsers, self.numRatings, self.numItems))
66 |
67 | qn_k = np.exp(np.log(self.theta) + gamma_nkd.sum(axis=-1) - np.log(self.theta * np.exp( np.log(gamma_nkd).sum(axis=-1))))
68 | for u in range(self.numUsers):
69 | for d in range(self.numItems):
70 | rating = self.trainMatrix.get((u, d))
71 | if rating == 0:
72 | qn_kvd[u, :, rating, d] = qn_k[u, :] * (1 - self.mu_vd) * beta_vkd[rating, :, d] / ((1 - self.mu_vd) * beta_vkd[:, d, :]).sum(axis=0)
73 | else:
74 | qn_kvd[u, :, rating, d] = qn_k[u, :]
75 |
76 | qn_vd = qn_kvd.sum(axis=1)
77 |
78 |
79 |
80 |
81 | def MStep(self, qn_k, qn_kvd, qn_vd):
82 | self.theta = (self.alpha_k - 1 + qn_k.sum(axis=0)) / (self.numUsers - self.numFactors + self.alpha_k * self.numFactors)
83 | C_vdk = np.zeros((self.numRatings, self.numItems, self.numFactors))
84 |
85 | for u in range(self.numUsers):
86 | for d in range(self.numItems):
87 | rating = self.trainMatrix.get((u, d))
88 | for r in range(self.numRatings):
89 | if r == rating:
90 | C_vdk[r, d, :] += qn_k
91 | else:
92 | C_vdk[r, d, :] += qn_kvd[:, r, d]
93 |
94 | beta_vdk = np.zeros((self.numRatings, self.numItems, self.numFactors))
95 | beta_vdk = (self.phi - 1 + C_vdk) / (qn_k.sum(axis=0) - self.numRatings + self.phi * self.numRatings)
96 |
97 | self.sigma = self.sigma - self.learnRate * self.mu_vd * (1 - self.mu_vd) - self.sigma / self.xi
98 | self.omega = self.omega - self.learnRate * self.mu_vd * (1 - self.mu_vd) - self.omega / self.nu
99 |
100 | self.mu_vd = 1.0 / (1.0 + np.exp(-(self.omega[newaxis, ...] + self.sigma[..., newaxis])))
101 |
102 | def predict(self, u, i):
103 | return np.argmax(self.mu_vd[:, i])+1
--------------------------------------------------------------------------------
/cf_rating/PMF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | """
5 | Reference code: http://www.utstat.toronto.edu/~rsalakhu/code_BPMF/pmf.m
6 | Reference paper: https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf
7 | momentum: https://en.wikipedia.org/wiki/Stochastic_gradient_descent
8 | """
9 |
10 | import codecs
11 | import numpy as np
12 | from scipy.sparse import dok_matrix
13 | from random import shuffle
14 | from GraphicalRecommender import Recommender
15 |
16 |
17 | class ProbabilisticMatrixFactorization(Recommender):
18 | def __init__(self, recommender_context):
19 | Recommender.__init__(self, recommender_context)
20 |
21 | def _init_model(self):
22 | self.user_num, self.item_num = self.train_matrix.shape
23 | self.rating_mean = np.mean(self.train_matrix.values())
24 | self.predictions = dok_matrix((self.user_num, self.item_num))
25 |
26 | if self.config_handler['Output', 'is_load', 'bool']:
27 | self._load_model()
28 | assert(self.user_factors.shape[1] == self.item_factors.shape[1])
29 | self.factor_num = self.user_factors.shape[1]
30 | else:
31 | self.factor_num = self.config_handler['Parameters', 'factor_num', 'int']
32 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) * 0.1
33 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) * 0.1
34 |
35 | # Other Parameters
36 | self.learn_rate = self.config_handler['Parameters', 'learn_rate', 'float']
37 | self.momentum = self.config_handler['Parameters', 'momentum', 'float']
38 | self.user_lambda = self.config_handler['Parameters', 'user_lambda', 'float']
39 | self.item_lambda = self.config_handler['Parameters', 'item_lambda', 'float']
40 |
41 | # Momentum for update factors
42 | self.user_factors_inc = np.zeros((self.user_num, self.factor_num))
43 | self.item_factors_inc = np.zeros((self.item_num, self.factor_num))
44 |
45 | def _build_model(self):
46 |
47 | user_item_keys = self.train_matrix.keys()
48 | users = np.array([user_id for user_id, item_id in user_item_keys])
49 | items = np.array([item_id for user_id, item_id in user_item_keys])
50 | ratings = np.array(self.train_matrix.values())
51 |
52 | # get the index of user_item_keys for stostic
53 | index = np.arange(len(user_item_keys))
54 | batch_size = self.config_handler.get_parameter_int('Parameters', 'batch_size')
55 | batch_num = int(float(len(index)) / batch_size)
56 |
57 | # building model
58 | losses = list()
59 | max_iterations = self.config_handler.get_parameter_int('Parameters', 'max_iterations')
60 | for iteration in range(max_iterations):
61 | shuffle(index)
62 |
63 | for batch_id in range(batch_num):
64 | batch_index = index[batch_id*batch_size:(batch_id+1)*batch_size]
65 | batch_users = users[batch_index]
66 | batch_items = items[batch_index]
67 | batch_ratings = ratings[batch_index] - self.rating_mean
68 | batch_user_factors = self.user_factors[batch_users, :]
69 | batch_item_factors = self.item_factors[batch_items, :]
70 |
71 | # Compute Prediction
72 | batch_predictions = np.sum(batch_user_factors * batch_item_factors, axis=-1)
73 | batch_error = batch_predictions - batch_ratings
74 | # batch_loss = np.sum(batch_error, batch_error)
75 | # batch_loss += 0.5 * self.user_lambda * np.sum(np.dot(batch_user_factors, batch_user_factors))
76 | # batch_loss += 0.5 * self.item_lambda * np.sum(np.dot(batch_item_factors, batch_item_factors))
77 |
78 | # Compute Gradient
79 | batch_user_delta = \
80 | batch_error[..., np.newaxis] * batch_item_factors + self.user_lambda * batch_user_factors
81 | batch_item_delta = \
82 | batch_error[..., np.newaxis] * batch_user_factors + self.item_lambda * batch_item_factors
83 |
84 | user_delta = np.zeros((self.user_num, self.factor_num))
85 | item_delta = np.zeros((self.item_num, self.factor_num))
86 | for i in range(batch_size):
87 | user_delta[batch_users[i], :] += batch_user_delta[i, :]
88 | item_delta[batch_items[i], :] += batch_item_delta[i, :]
89 |
90 | # Update Parameters
91 | self.user_factors_inc = \
92 | self.momentum * self.user_factors_inc + self.learn_rate * user_delta
93 | self.user_factors -= self.user_factors_inc
94 |
95 | self.item_factors_inc = \
96 | self.momentum * self.item_factors_inc + self.learn_rate * item_delta
97 | self.item_factors -= self.item_factors_inc
98 |
99 | batch_predictions = \
100 | np.sum(self.user_factors[batch_users, :] * self.item_factors[batch_items, :], axis=-1)
101 | batch_error = batch_predictions - batch_ratings
102 | batch_loss = np.dot(batch_error, batch_error)
103 | # batch_loss += 0.5 * self.user_lambda * np.sum(
104 | # self.user_factors[batch_users, :] * self.user_factors[batch_users, :])
105 | # batch_loss += 0.5 * self.item_lambda * np.sum(
106 | # self.item_factors[batch_items, :] * self.item_factors[batch_items, :])
107 | losses.append(batch_loss / batch_size)
108 | self._recommend()
109 | result = self._evaluate()
110 | self.logger['Process'].debug("Epoch {0} batch {1}: Training RMSE - {2}, Testing RMSE - {3}".format(
111 | iteration, batch_id, losses[-1], result['RMSE']))
112 |
113 | def _save_result(self, result):
114 | self.logger['Result'].debug('factor_num: {0}'.format(self.factor_num))
115 | self.logger['Result'].debug('learn_rate: {0}'.format(self.learn_rate))
116 | self.logger['Result'].debug('user_lambda: {0}'.format(self.user_lambda))
117 | self.logger['Result'].debug('item_lambda: {0}'.format(self.item_lambda))
118 | self.logger['Result'].debug('momentum: {0}'.format(self.momentum))
119 | Recommender._save_result(self, result)
120 |
121 | def _predict(self, user_id, item_id, time_id=0):
122 | predict_rating = np.dot(self.user_factors[user_id, :], self.item_factors[item_id, :]) + self.rating_mean
123 | if predict_rating > 5:
124 | return 5
125 | elif predict_rating < 1:
126 | return 1
127 | else:
128 | return predict_rating
129 |
130 | def _save_model(self):
131 | save_path = self.config_handler.get_parameter_string("Output", "save_path")
132 | save_file = save_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id)
133 |
134 | with codecs.open(save_file, mode='w', encoding='utf-8') as write_fp:
135 | write_fp.write('factor_num: {0}\n'.format(self.factor_num))
136 | write_fp.write('learn_rate: {0}\n'.format(self.learn_rate))
137 | write_fp.write('user_lambda: {0}\n'.format(self.user_lambda))
138 | write_fp.write('item_lambda: {0}\n'.format(self.item_lambda))
139 | write_fp.write('momentum: {0}\n'.format(self.momentum))
140 | write_fp.write('user_factors \n')
141 | self._save_matrix(self.user_factors, write_fp)
142 |
143 | write_fp.write('item_factors \n')
144 | self._save_matrix(self.item_factors, write_fp)
145 |
146 | def _load_model(self):
147 | load_path = self.config_handler.get_parameter_string("Output", "load_path")
148 | load_file = load_path + "PMF_{0}.txt".format(self.recommender_context.experiment_id)
149 |
150 | with codecs.open(load_file, mode='r', encoding='utf-8') as read_fp:
151 | for line in read_fp:
152 | if line.startswith('factor_num'):
153 | self.factor_num = int(line.split(':')[1].strip())
154 | elif line.startswith('learn_rate'):
155 | self.learn_rate = float(line.split(':')[1].strip())
156 | elif line.startswith('user_lambda'):
157 | self.user_lambda = float(line.split(':')[1].strip())
158 | elif line.startswith('item_lambda'):
159 | self.item_lambda = float(line.split(':')[1].strip())
160 | elif line.startswith('momentum'):
161 | self.momentum = float(line.split(':')[1].strip())
162 | elif line.startswith('user_factor'):
163 | self.user_factors = self._load_matrix(read_fp)
164 | elif line.startswith('item_factor'):
165 | self.item_factors = self._load_matrix(read_fp)
--------------------------------------------------------------------------------
/cf_rating/PoissonMF.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | '''
6 | https://github.com/dawenl/stochastic_PMF/blob/master/code/pmf.py
7 | '''
8 |
9 |
10 | import numpy as np
11 | from scipy import special
12 | from sklearn.base import BaseEstimator, TransformerMixin
13 |
14 | class PoissonMF(BaseEstimator, TransformerMixin):
15 | ''' Poisson Matrix Factorization with batch inference'''
16 |
17 | def __init__(self, n_components=100, max_iter=100, tol=0.0005, smoothness=100, random_state=None, verbose=False, **kwargs):
18 | ''' Poisson Matrix Factorization
19 | Augument:
20 | n_component: int
21 | number of latent components
22 |
23 | max_iter: int
24 | maximal number of iterations to perform
25 |
26 | tol: float
27 | the threshold on the increase of the objective to stop the iteration
28 |
29 | smoothness: int
30 | smoothness on the initialization variantional parameters
31 |
32 | random_state: int
33 | Pseudo random number generator used for sampling.
34 |
35 | **kwargs: dict
36 | Model hyperparameters
37 | '''
38 | self.n_components = n_components
39 | self.max_iter = max_iter
40 | self.tol = tol
41 | self.smoothness = smoothness
42 | self.random_state = random_state
43 | self.verbose = verbose
44 | if type(random_state) is int:
45 | np.random.seed(self.random_state)
46 | elif self.random_state is not None:
47 | np.random.setstate(self.random_state)
48 |
49 | self._parse_args(**kwargs)
50 |
51 | def _parse_args(self, **kwargs):
52 | self.a = float(kwargs.get('a', 0.1))
53 | self.b = float(kwargs.get('b', 0.1))
54 |
55 | def _init_component(self, n_feats):
56 | # variational parameters for beta
57 | self.gamma_b = self.smoothness * np.random.gamma(self.smoothness, 1./self.smoothness, size=(self.n_components, n_feats))
58 | self.rho_b = self.smoothness * np.random.gamma(self.smoothness, 1./self.smoothness, size=(self.n_components, n_feats))
59 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
60 |
61 | def set_component(self, shape, rate):
62 | '''Set the latent components from variational parameters.
63 | Parameters
64 | ----------
65 | shape : numpy-array, shape (n_components, n_feats)
66 | Shape parameters for the variational distribution
67 | rate : numpy-array, shape (n_components, n_feats)
68 | Rate parameters for the variational distribution
69 | Returns
70 | -------
71 | self : object
72 | Return the instance itself.
73 |
74 | '''
75 | self.gamma_b, self.rho_b = shape, rate
76 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
77 | return self
78 |
79 | def _init_weights(self, n_samples):
80 | # variational parameters for theta
81 | self.gamma_t = self.smoothness \
82 | * np.random.gamma(self.smoothness, 1. / self.smoothness,
83 | size=(n_samples, self.n_components))
84 | self.rho_t = self.smoothness \
85 | * np.random.gamma(self.smoothness, 1. / self.smoothness,
86 | size=(n_samples, self.n_components))
87 | self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t)
88 | self.c = 1. / np.mean(self.Et)
89 |
90 | def fit(self, X):
91 | '''Fit the model to the data in X.
92 | Parameters
93 | ----------
94 | X : array-like, shape (n_samples, n_feats)
95 | Training data.
96 | Returns
97 | -------
98 | self: object
99 | Returns the instance itself.
100 | '''
101 | n_samples, n_feats = X.shape
102 | self._init_components(n_feats)
103 | self._init_weights(n_samples)
104 | self._update(X)
105 | return self
106 |
107 | def transform(self, X, attr=None):
108 | '''Encode the data as a linear combination of the latent components.
109 | Parameters
110 | ----------
111 | X : array-like, shape (n_samples, n_feats)
112 | attr: string
113 | The name of attribute, default 'Eb'. Can be changed to Elogb to
114 | obtain E_q[log beta] as transformed data.
115 | Returns
116 | -------
117 | X_new : array-like, shape(n_samples, n_filters)
118 | Transformed data, as specified by attr.
119 | '''
120 |
121 | if not hasattr(self, 'Eb'):
122 | raise ValueError('There are no pre-trained components.')
123 | n_samples, n_feats = X.shape
124 | if n_feats != self.Eb.shape[1]:
125 | raise ValueError('The dimension of the transformed data '
126 | 'does not match with the existing components.')
127 | if attr is None:
128 | attr = 'Et'
129 | self._init_weights(n_samples)
130 | self._update(X, update_beta=False)
131 | return getattr(self, attr)
132 |
133 | def _update_theta(self, X):
134 | ratio = X / self._xexplog()
135 | self.gamma_t = self.a + np.exp(self.Elogt) * np.dot(
136 | ratio, np.exp(self.Elogb).T)
137 | self.rho_t = self.a * self.c + np.sum(self.Eb, axis=1)
138 | self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t)
139 | self.c = 1. / np.mean(self.Et)
140 |
141 | def _update_beta(self, X):
142 | ratio = X / self._xexplog()
143 | self.gamma_b = self.b + np.exp(self.Elogb) * np.dot(
144 | np.exp(self.Elogt).T, ratio)
145 | self.rho_b = self.b + np.sum(self.Et, axis=0, keepdims=True).T
146 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
147 |
148 | def _xexplog(self):
149 | '''
150 | sum_k exp(E[log theta_{ik} * beta_{kd}])
151 | '''
152 | return np.dot(np.exp(self.Elogt), np.exp(self.Elogb))
153 |
154 |
155 | class OnlinePoissonMF(PoissonMF):
156 | ''' Poisson matrix factorization with stochastic inference'''
157 | def __init__(self, n_components=100, batch_size=10, n_pass=10, max_iter=100, tol=0.0005, shuffle=True, smoothness=100, random_state=None, verbose=False, **kwargs):
158 | ''' Poisson matrix factorization
159 | Argument
160 | --------------
161 | n_components : int
162 | Number of latent components
163 |
164 | batch_size : int
165 | The size of mini-batch
166 |
167 | n_pass : int
168 | The number of passes through the entire data
169 |
170 | max_iter : int
171 | Maximal number of iterations to perform for a single mini-batch
172 |
173 | tol: float
174 | The threshold on the increase of the objective to stop the iteration
175 |
176 | shuffle: bool
177 | whether to shuffle the data or not
178 |
179 | smoothness : int
180 | smoothness on the initialization variational parameters
181 |
182 | random_state: int or randomstate
183 | Pseudo random number generator used for sampling
184 |
185 | verbose : bool
186 | whether to show progress during model fitting
187 |
188 | ** kwargs : dict
189 | Model hyperparameters and learning rate
190 |
191 | '''
192 | self.n_components = n_components
193 | self.batch_size = batch_size
194 | self.n_pass = n_pass
195 | self.max_iter = max_iter
196 | self.tol = tol
197 | self.shuffle = shuffle
198 | self.smoothness = smoothness
199 | self.random_state = random_state
200 | self.verbose = verbose
201 |
202 | if type(self.random_state) is int:
203 | np.random.seed(self.random_state)
204 | elif self.random_state is not None:
205 | np.random.setstate(self.random_state)
206 |
207 | def _parse_args(self, **kwargs):
208 | self.a = float(kwargs.get('a', 0.1))
209 | self.b = float(kwargs.get('b', 0.1))
210 | self.t0 = float(kwargs.get('t0', 1.))
211 | self.kappa = float(kwargs.get('kappa', 0.6))
212 |
213 |
214 | def fit(self, X, est_total=None):
215 | ''' Fit the model to the data in X. X has to be loaded into memory.
216 |
217 | Parameters
218 | ------------
219 | X: array-like, shape (n_samples, n_feats)
220 | Training data.
221 |
222 | est_total : int
223 | The estimated size of the entire data. Could be larger than the actual size.
224 |
225 | Returns
226 | --------------
227 | self: object
228 | Returns the instance itself.
229 |
230 | '''
231 | n_samples, n_feats = X.shape
232 | if est_total is None:
233 | self._scale = float(n_samples) / self.batch_size
234 | else:
235 | self._scale = float(est_total) / self.batch_size
236 | self._init_component(n_feats)
237 | self.bound = list()
238 | for count in xrange(self.n_pass):
239 | if self.verbose:
240 | print 'Iteration %d: passing through the data ...' % count
241 | indices = np.arange(n_samples)
242 | if self.shuffle:
243 | np.random.shuffle(indices)
244 | X_shuffled = X[indices]
245 | for (i, istart) in enumerate(xrange(0, n_samples, self.batch_size), 1):
246 | print '\tMinibatch %d:' % i
247 | iend = min(istart + self.batch_size, n_samples)
248 | self.set_learning_rate(iter=i)
249 | mini_batch = X_shuffled[istart: iend]
250 | self.partial_fit(mini_batch)
251 | self.bound.append(self._stoch_bound(mini_batch))
252 | return self
253 |
254 | def partial_fit(self, X):
255 | ''' Fit the data in X as a mini-batch and update the parameter by taking a natural gradient step. Could be invoked from a high-level out-of-core wrapper.
256 |
257 | Parameters
258 | -------------
259 | X : array-like, shape (batch_size, n_fedats)
260 | Mini-batch data.
261 |
262 | Returns
263 | --------------
264 | self: object
265 | Returns the instance itself.
266 |
267 | '''
268 |
269 | self.transform(X)
270 | # take a (natural) gradient step
271 | ratio = X / self._xexplog()
272 | self.gamma_b = (1 - self.rho) * self.gamma_b + self.rho * (self.b + self._scale * np.exp(self.Elogb) * np.dot(np.exp(self.Elogt).T, ratio))
273 | self.rho_b = (1 - self.rho) * self.rho_b + self.rho * (self.b + self._scale * np.sum(self.Et, axis=0, keepdims=True).T)
274 | self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
275 | return self
276 |
277 | def set_learning_rate(self, iter=None, rho=None):
278 | ''' Set the learning rate for the gradient step
279 |
280 | Parameters
281 | ---------------------
282 | iter : int
283 | The current iteration, used to compute a Robbins-Monro type
284 | learning rate
285 | rho: float
286 | Directly specify the learning rate. Will override the one computed from the current iteration.
287 |
288 |
289 | Returns
290 | ----------
291 | self: object
292 | Returns the isntance itself.
293 |
294 | '''
295 | if rho is not None:
296 | self.rho = rho
297 | elif iter is not None:
298 | self.rho = (iter + self.t0)**(-self.kappa)
299 | else:
300 | raise ValueError('invalid learning rate.')
301 | return self
302 |
303 | def _stoch_bound(self, X):
304 | bound = np.sum(X * np.log(self._xexplog()) - self.Et.dot(self.Eb))
305 | bound += _gamma_term(self.a, self.a * self.c, self.gamma_t, self.rho_t, self.Et, self.Elogt)
306 | bound += self.n_components * X.shape[0] * self.a * np.log(self.c)
307 | bound *= self._scale
308 | bound += _gamma_term(self.b, self.b, self.gamm_b, self.rho_b, self.Eb, self.Elogb)
309 | return bound
310 |
311 | def _compute_expectations(gamma, rho):
312 | return (gamma/rho, special.psi(gamma) - np.log(rho))
313 |
314 | def _gamma_term(a, b, shape, rate, Ex, Elogx):
315 | return np.sum((a - shape) * Elogx - (b - rate) * Ex + (special.gammaln(shape) - shape * np.log(rate)))
--------------------------------------------------------------------------------
/cf_rating/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from PMF import ProbabilisticMatrixFactorization as PMF
5 | from BPMF import BayesianProbabilisticMatrixFactorization as BPMF
6 | from BPTF import BayesianProbabilisticTensorFactorization as BPTF
7 |
--------------------------------------------------------------------------------
/cf_rating/graphicalrecommender.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from scipy.sparse import dok_matrix
5 | from evaluator import Evaluator
6 | import numpy as np
7 |
8 |
9 | class Recommender:
10 | def __init__(self, recommender_context):
11 | self.train_data = None
12 | self.test_data = None
13 | self.recommender_context = recommender_context
14 | self.config_handler = self.recommender_context.get_config()
15 | self.logger = self.recommender_context.get_logger()
16 |
17 | def _init_model(self):
18 | self.user_num, self.item_num = self.train_matrix.shape
19 | self.mean_rating = np.mean(self.train_matrix.values())
20 |
21 | self.predictions = dok_matrix((self.user_num, self.item_num))
22 |
23 | self.factor_num = self.config_handler.get_parameter_int('Parameter', 'factor_num')
24 | self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) * 0.1
25 | self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) * 0.1
26 | self.user_factors_inc = np.zeros((self.user_num, self.factor_num))
27 | self.item_factors_inc = np.zeros((self.item_num, self.factor_num))
28 |
29 | def _build_model(self):
30 | self.max_iterations = self.config_handler.get_parameter_int('Parameter', 'max_iterations')
31 |
32 | def _recommend(self):
33 | for user_id, item_id in self.test_matrix.keys():
34 | self.predictions[user_id, item_id] = self._predict(user_id, item_id)
35 |
36 | def _predict(self, user_id, item_id, time_id=0):
37 | return 0.0
38 |
39 | def _evaluate(self):
40 | evaluator_cfg = self.config_handler.get_parameter_string("Output", 'evaluator')
41 | evaluator_cfg = evaluator_cfg.strip().split(',')
42 | evaluator = Evaluator(self.predictions, self.test_matrix)
43 | result = {}
44 | for key in evaluator_cfg:
45 | result[key] = evaluator.rating[key.strip()]
46 | return result
47 |
48 | def run(self):
49 | self.logger['Process'].debug('Get the train dataset')
50 | self.train_matrix = self.recommender_context.get_data_model().get_data_splitter().get_train_matrix()
51 | self.logger['Result'].debug('The number of user-item pair in train dataset is {0}'.format(len(self.train_matrix.keys())))
52 |
53 | self.logger['Process'].debug('Get the test dataset')
54 | self.test_matrix = self.recommender_context.get_data_model().get_data_splitter().get_test_matrix()
55 | self.logger['Result'].debug('The number of user-item pair in test dataset is {0}'.format(len(self.test_matrix.keys())))
56 |
57 | self.logger['Process'].debug('Initialize the model parameters')
58 | self._init_model()
59 |
60 | self.logger['Process'].debug('Building model....')
61 | self._build_model()
62 |
63 | is_save = self.config_handler.get_parameter_bool('Output', 'is_save')
64 | if is_save:
65 | self.logger['Process'].debug('Save model ....')
66 | self._save_model()
67 |
68 | self.logger['Process'].debug('Recommending ...')
69 | self._recommend()
70 |
71 | self.logger['Process'].debug('Evaluating ...')
72 | result = self._evaluate()
73 | self._save_result(result)
74 |
75 | self.logger['Process'].debug("Finish.")
76 | self.logger['Process'].debug("#"*50)
77 |
78 | def _save_result(self, result):
79 | for key in result:
80 | self.logger['Result'].debug("{0}: {1}".format(key, result[key]))
81 |
82 | def _save_model(self):
83 | pass
84 |
85 | def _load_model(self):
86 | pass
87 |
88 | def _load_matrix(self, read_fp):
89 | data = list()
90 | for vector in read_fp:
91 | if vector.startswith('matrix_end'):
92 | break
93 | vector = vector.strip().split('\t')
94 | vector = [float(feature) for feature in vector]
95 | data.append(vector)
96 | return np.array(data)
97 |
98 | def _save_matrix(self, matrix, write_fp):
99 | for vector in matrix:
100 | for feature in vector:
101 | write_fp.write("{0}\t".format(feature))
102 | write_fp.write("\n")
103 | write_fp.write('matrix_end\n')
104 |
--------------------------------------------------------------------------------
/config/BPMF.cfg:
--------------------------------------------------------------------------------
1 | [Dataset]
2 | #ratings = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat
3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt
4 | data_format = time
5 |
6 | [splitter]
7 | method = given_n
8 | method_index = 0
9 | method_parameter = 10
10 | data_save = 1
11 | save_path = output/
12 |
13 | experiment_num = 5
14 |
15 | [Parameters]
16 | max_iterations = 50
17 | factor_num = 30
18 |
19 | user_normal_dist_mu0 = 0
20 | user_normal_dist_beta0 = 2
21 | user_Wishart_dist_W0 = 1
22 |
23 | item_normal_dist_mu0 = 0
24 | item_normal_dist_beta0 = 2
25 | item_Wishart_dist_W0 = 1
26 |
27 | rating_sigma = 2
28 |
29 | is_init_path = 1
30 | init_path = output/
31 |
32 | [Output]
33 | evaluator = RMSE,MAE,MSE
34 | logger = output/
35 | is_save = 1
36 | save_path = output/
37 | is_load = 0
38 | load_path = output/
39 |
40 |
--------------------------------------------------------------------------------
/config/BPTF.cfg:
--------------------------------------------------------------------------------
1 | [Dataset]
2 | #rating = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat
3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt
4 | splitter = time
5 | experiment_num = 5
6 |
7 | [Parameters]
8 | max_iterations = 50
9 | factor_num = 10
10 |
11 | user_normal_dist_mu0 = 0
12 | user_normal_dist_beta0 = 1
13 | user_Wishart_dist_W0 = 1
14 |
15 | item_normal_dist_mu0 = 0
16 | item_normal_dist_beta0 = 1
17 | item_Wishart_dist_W0 = 1
18 |
19 | time_normal_dist_mu0 = 1
20 | time_normal_dist_beta0 = 1
21 | time_Wishart_dist_W0 = 1
22 |
23 | rating_sigma = 1
24 |
25 | [Output]
26 | evaluator = RMSE, MSE,
27 |
28 |
--------------------------------------------------------------------------------
/config/PMF.cfg:
--------------------------------------------------------------------------------
1 | [Dataset]
2 | # ratings = D:\Study\Dataset\MovieLens\ml-1m\ratings.dat
3 | ratings = D:\Study\Coding\Pycharm\DataAnalysis\MovieLens\MovieLens_u0_i20.txt
4 | data_format = time
5 |
6 | [splitter]
7 | method = given_n
8 | method_index = 0
9 | method_parameter = 10
10 | data_save = 1
11 | save_path = output/
12 |
13 | experiment_num = 5
14 |
15 | [Parameters]
16 | learn_rate = 0.001
17 | max_iterations = 50
18 | factor_num = 30
19 | momentum = 0.9
20 | user_lambda = 0.05
21 | item_lambda = 0.05
22 | stop_threshold = 0.01
23 | batch_size = 100000
24 |
25 |
26 | [Output]
27 | evaluator = RMSE, MSE
28 | logger = output/
29 | is_save = 1
30 | save_path = output/
31 | is_load = 0
32 | load_path = output/
--------------------------------------------------------------------------------
/config/logging.cfg:
--------------------------------------------------------------------------------
1 | #logger.conf
2 | ###############################################
3 | [loggers]
4 | keys=root
5 |
6 | [logger_root]
7 | level=DEBUG
8 | handlers=hand01,hand03
9 |
10 | ###############################################
11 | [handlers]
12 | keys=hand01,hand02,hand03
13 |
14 | [handler_hand01]
15 | class=StreamHandler
16 | level=INFO
17 | formatter=form02
18 | args=(sys.stderr,)
19 |
20 | [handler_hand02]
21 | class=FileHandler
22 | level=DEBUG
23 | formatter=form01
24 | args=('myapp.log', 'a')
25 |
26 | [handler_hand03]
27 | class=handlers.RotatingFileHandler
28 | level=INFO
29 | formatter=form02
30 | #args=('myapp.log', 'a', 10*1024*1024, 5)
31 |
32 | ###############################################
33 | [formatters]
34 | keys=form01,form02
35 |
36 | [formatter_form01]
37 | format=%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s
38 | datefmt=%a, %d %b %Y %H:%M:%S
39 |
40 | [formatter_form02]
41 | format=%(name)-12s: %(levelname)-8s %(message)s
42 | datefmt=
--------------------------------------------------------------------------------
/data/DataModel.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import splitter
5 | import convertor
6 |
7 |
8 | class DataModel:
9 | def __init__(self, config_handler):
10 | self.config_handler = config_handler
11 |
12 | def build_data_model(self):
13 | """
14 | Read data to convertor, and initialize splitter
15 | """
16 | data_format = self.config_handler.get_parameter_string('Dataset', 'data_format')
17 |
18 | # Read data to convertor
19 | if data_format == 'time':
20 | self.convertor = convertor.TimeDataConvertor()
21 | elif data_format == 'document':
22 | self.convertor = convertor.DocumentDataConvertor()
23 | else:
24 | self.convertor = convertor.GeneralDataConvertor()
25 | dataset_file = self.config_handler.get_parameter_string('Dataset', 'ratings')
26 | self.convertor.read_data(dataset_file)
27 |
28 | # Initialize splitter, and transport convertor into the splitter
29 | splitter_method = self.config_handler.get_parameter_string('splitter', 'method')
30 | splitter_method_index = self.config_handler.get_parameter_int('splitter', 'method_index')
31 | splitter_method_parameter = self.config_handler.get_parameter_float('splitter', 'method_parameter')
32 | if splitter_method == 'given_n':
33 | self.splitter = splitter.GivenNDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter)
34 | elif splitter_method == 'generic':
35 | self.splitter = splitter.GenericDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter)
36 | elif splitter_method == 'ratio':
37 | self.splitter = splitter.GenericDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter)
38 | elif splitter_method == 'cv':
39 | self.splitter = splitter.CrossValidationDataSplitter(self.convertor, splitter_method_index, splitter_method_parameter)
40 |
41 | def get_data_splitter(self):
42 | return self.splitter
43 |
44 | def get_data_convertor(self):
45 | return self.convertor
46 |
47 |
48 | if __name__ == '__main__':
49 | pass
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from DataModel import DataModel
--------------------------------------------------------------------------------
/data/convertor/DataConvertor.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | class DataConvertor(object):
6 | def __init__(self):
7 | self.data = None
8 | self.shape = None
9 | self.data_structure = None
10 |
11 | def read_data(self, filename):
12 | """
13 | read raw dataset, and convert to sparse matrix format.
14 | :param filename:
15 | """
16 | pass
17 |
18 | def read_given_train_test(self, train_file, test_file):
19 | """
20 | read given data set
21 | """
22 |
--------------------------------------------------------------------------------
/data/convertor/DocumentDataConvertor.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from DataConvertor import DataConvertor
5 |
6 | class DocumentDataConvertor (DataConvertor):
7 | def __init__(self):
8 | DataConvertor.__init__(self)
--------------------------------------------------------------------------------
/data/convertor/GeneralDataConvertor.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import codecs
5 | import re
6 | from scipy.sparse import dok_matrix
7 |
8 |
9 | class GeneralDataConvertor(object):
10 | def __init__(self):
11 | pass
12 |
13 | def read_data(self, filename):
14 | """
15 | read raw dataset, and convert to sparse matrix format.
16 | :param filename:
17 | """
18 | users, items = set(), set()
19 | ratings = list()
20 | with codecs.open(filename, mode="r", encoding="utf-8") as read_file:
21 | for line in read_file:
22 | user_item_rating = re.split('\t|,|::', line.strip())
23 | user_id = int(user_item_rating[0])
24 | item_id = int(user_item_rating[1])
25 | rating = int(user_item_rating[2])
26 | users.add(user_id)
27 | items.add(item_id)
28 | ratings.append((user_id, item_id, rating))
29 |
30 | # Convert
31 | user_num, item_num = len(users), len(items)
32 | users_dict = {user_id: index for index, user_id in enumerate(list(users))}
33 | items_dict = {item_id: index for index, item_id in enumerate(list(items))}
34 | data_model = dok_matrix((user_num, item_num))
35 | for user_id, item_id, rating in ratings:
36 | data_model[users_dict[user_id], items_dict[item_id]] = rating
37 | return data_model
38 |
39 | def read_given_train_test(self, train_file, test_file):
40 | """
41 | read given data set
42 | """
43 | users, items = set(), set()
44 | ratings = list()
45 | with codecs.open(train_file, mode="r", encoding="utf-8") as read_file:
46 | for line in read_file:
47 | user_item_rating = re.split('\t|,|::', line.strip())
48 | user_id = int(user_item_rating[0])
49 | item_id = int(user_item_rating[1])
50 | rating = int(user_item_rating[2])
51 | users.add(user_id)
52 | items.add(item_id)
53 | ratings.append((user_id, item_id, rating))
54 |
55 | # Convert
56 | user_num, item_num = len(users), len(items)
57 | users_dict = {user_id: index for index, user_id in enumerate(list(users))}
58 | items_dict = {item_id: index for index, item_id in enumerate(list(items))}
59 | train_matrix = dok_matrix((user_num, item_num))
60 | test_matrix = dok_matrix((user_num, item_num))
61 | for user_id, item_id, rating in ratings:
62 | train_matrix[users_dict[user_id], items_dict[item_id]] = rating
63 |
64 | with codecs.open(test_file, mode='r', encoding='utf-8') as read_file:
65 | for line in read_file:
66 | user_item_rating = re.split('\t|,|::', line.strip())
67 | user_id = int(user_item_rating[0])
68 | item_id = int(user_item_rating[1])
69 | rating = int(user_item_rating[2])
70 | test_matrix[users_dict[user_id], items_dict[item_id]] = rating
71 | return train_matrix, test_matrix
--------------------------------------------------------------------------------
/data/convertor/TimeDataConvertor.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import os
5 | import sys
6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7 | sys.path.insert(0, parent_dir)
8 |
9 | import codecs
10 | import re
11 | import numpy as np
12 | from scipy.sparse import dok_matrix
13 |
14 | from util.dateconvert import DateConvertor
15 | from data.sparsetensor import SparseTensor
16 | from DataConvertor import DataConvertor
17 |
18 |
19 | class TimeDataConvertor (DataConvertor):
20 | def __init__(self):
21 | DataConvertor.__init__(self)
22 | self.data_structure = SparseTensor
23 |
24 | def read_data(self, filename, time_format="month"):
25 | """
26 | Read data: (user_id, item_id, rating, timestamp)
27 | """
28 | users, items, times = set(), set(), set()
29 | ratings = list()
30 | with codecs.open(filename, mode="r", encoding="utf-8") as read_file:
31 | for line in read_file:
32 | user_item_timestamp_rating = re.split('\t|,|::', line.strip())
33 | user_id = int(user_item_timestamp_rating[0])
34 | item_id = int(user_item_timestamp_rating[1])
35 | rating = int(user_item_timestamp_rating[2])
36 | time_id = DateConvertor.convert_timestamp(int(user_item_timestamp_rating[3]), time_format)
37 | users.add(user_id)
38 | items.add(item_id)
39 | times.add(time_id)
40 | ratings.append((user_id, item_id, time_id, rating))
41 |
42 | # Convert
43 | user_num, item_num, time_num = len(users), len(items), len(times)
44 | self.shape = (user_num, item_num, time_num)
45 | users_dict = {user_id: index for index, user_id in enumerate(list(users))}
46 | items_dict = {item_id: index for index, item_id in enumerate(list(items))}
47 | times_dict = {time_id: index for index, time_id in enumerate(list(np.sort(list(times))))}
48 | sparse_tensor = SparseTensor(shape=(user_num, item_num, time_num))
49 | for user_id, item_id, time_id, rating in ratings:
50 | sparse_tensor[users_dict[user_id], items_dict[item_id], times_dict[time_id]] = rating
51 | self.data = sparse_tensor
52 |
53 | def tensor_matrix(self, tensor_data):
54 | user_num, item_num = tensor_data.shape[0], tensor_data.shape[1]
55 | matrix_data = dok_matrix((user_num, item_num))
56 | for user_id, item_id, time_id in tensor_data.keys():
57 | matrix_data[user_id, item_id] += tensor_data.get((user_id, item_id, time_id))
58 | return matrix_data
59 |
60 |
61 |
62 |
63 | if __name__ == '__main__':
64 | file_path = 'D:/Study/Dataset/MovieLens/ml-1m/ratings.dat'
65 | convertor = TimeDataConvertor()
66 | # data_model = convertor.read_data(file_path)
67 | # print 'the number of users is {0}.'.format(data_model.shape[0])
68 | # print 'the number of items is {0}.'.format(data_model.shape[1])
69 | # del data_model
70 |
71 | data_model = convertor.read_data(file_path)
72 | print 'the number of users is {0}'.format(data_model.shape[0])
73 | print 'the number of items is {0}'.format(data_model.shape[1])
74 | print 'the number of times is {0}'.format(data_model.shape[2])
75 | print 'the number of records is {0}'.format(len(data_model.keys()))
76 |
77 | data_matrix = convertor.tensor_matrix(data_model)
78 | print 'the number of users is {0}'.format(data_matrix.shape[0])
79 | print 'the number of items is {0}'.format(data_matrix.shape[1])
80 | print 'the number of records is {0}'.format(len(data_matrix.keys()))
--------------------------------------------------------------------------------
/data/convertor/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from GeneralDataConvertor import GeneralDataConvertor
5 | from TimeDataConvertor import TimeDataConvertor
6 | from DocumentDataConvertor import DocumentDataConvertor
7 |
--------------------------------------------------------------------------------
/data/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
--------------------------------------------------------------------------------
/data/sparsematrix.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import numpy as np
5 |
6 |
7 | class SparseMatrix:
8 | def __init__(self, shape):
9 | assert(len(shape) == 2)
10 | self.data = dict()
11 | self.row_data = dict()
12 | self.col_data = dict()
13 | self.shape = shape
14 |
15 | def __setitem__(self, key, value):
16 | assert(len(key) == 2)
17 | assert(key[0] < self.shape[0])
18 | assert(key[1] < self.shape[1])
19 | self.data[key] = value
20 | self.row_data.setdefault(key[0], SparseMatrix(shape=(1, self.shape[1])))
21 | self.row_data[key[0]].data[0, key[1]] = value
22 | self.col_data.setdefault(key[1], SparseMatrix(shape=(self.shape[0], 1)))
23 | self.col_data[key[1]].data[key[0], 0] = value
24 |
25 | def __getitem__(self, key):
26 | assert(len(key) == 2)
27 | assert(key[0] < self.shape[0])
28 | assert(key[1] < self.shape[1])
29 | return self.get(key)
30 |
31 | def keys(self):
32 | return self.data.keys()
33 |
34 | def get(self, key):
35 | if key in self.data:
36 | return self.data[key]
37 | else:
38 | return 0
39 |
40 | def getrow(self, row_id):
41 | if row_id in self.row_data:
42 | return self.row_data[row_id]
43 | else:
44 | return SparseMatrix(shape=(1, self.shape[1]))
45 |
46 | def getcol(self, col_id):
47 | if col_id in self.col_data:
48 | return self.col_data[col_id]
49 | else:
50 | return SparseMatrix(shape=(self.shape[0], 1))
51 |
52 | def transpose(self):
53 | sparse_matrix = SparseMatrix(shape=(self.shape[1], self.shape[0]))
54 | for row_id, col_id in self.keys():
55 | sparse_matrix[col_id, row_id] = self.get((row_id, col_id))
56 | return sparse_matrix
57 |
58 | def values(self):
59 | return self.data.values()
60 |
61 |
62 | if __name__ == '__main__':
63 | matrix = SparseMatrix(shape=(3, 4))
64 | print 'Matrix Shape: {0}'.format(matrix.keys())
65 | matrix[0, 2] = 1
66 | matrix[0, 3] = 3
67 | matrix[1, 2] = 4
68 | print 'Matrix Shape: {0}'.format(matrix.keys())
69 |
70 | row_matrix = matrix.getrow(0)
71 | print 'Matrix Shape: {0}'.format(row_matrix.keys())
72 |
73 | col_matrix = matrix.getcol(2)
74 | print 'Matrix Shape: {0}'.format(col_matrix.keys())
75 |
76 | col_matrix = col_matrix.transpose()
77 | print 'Matrix Shape: {0}'.format(col_matrix.keys())
78 |
--------------------------------------------------------------------------------
/data/sparsetensor.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import numpy as np
5 |
6 |
7 | class SparseTensor:
8 | def __init__(self, shape):
9 | self.data = dict()
10 | self.shape = shape
11 |
12 | def __setitem__(self, key, value):
13 | assert(len(key) == len(self.shape))
14 | for i in range(len(self.shape)):
15 | assert(key[i] < self.shape[i])
16 | self.data[key] = value
17 |
18 | def __getitem__(self, item):
19 | assert(len(item) == len(self.shape))
20 | for i in range(len(self.shape)):
21 | assert(item[i] < self.shape[i])
22 | return self.get(item)
23 |
24 | def keys(self):
25 | return self.data.keys()
26 |
27 | def get(self, key):
28 | if key in self.data:
29 | return self.data[key]
30 | else:
31 | return 0
32 |
33 | def get_dimension(self, dim=0, value=0):
34 | index = [i for i in range(dim)]
35 | index.extend([i for i in range(dim+1, len(self.shape))])
36 | shape = tuple(np.array(self.shape)[index])
37 | data = dict()
38 |
39 | for key in self.keys():
40 | if key[dim] != value:
41 | continue
42 | _key = np.array(key)[index]
43 | data[tuple(_key)] = self.data[key]
44 | t = SparseTensor(shape)
45 | t.data = data
46 | return t
47 |
48 | if __name__ == '__main__':
49 | tensor = SparseTensor(shape=(3, 4, 4))
50 | print tensor.keys()
51 | tensor[0, 1, 2] = 1
52 | tensor[1, 3, 2] = 4
53 | tensor[1, 2, 3] = 2
54 | print tensor.keys()
55 | print tensor.get((0, 1, 2))
56 | print tensor[1, 3, 2]
57 | tensor = tensor.get_dimension(dim=0, value=1)
58 | print tensor.shape
59 | print tensor.keys()
60 | print tensor[1, 1]
--------------------------------------------------------------------------------
/data/splitter/CrossValidationDataSplitter.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from DataSplitter import DataSplitter
5 |
6 |
7 | class CrossValidationDataSplitter (DataSplitter):
8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
--------------------------------------------------------------------------------
/data/splitter/DataSplitter.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import cPickle
5 | import os
6 | from scipy.sparse import dok_matrix
7 |
8 | class DataSplitter:
9 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
10 | self.train_data = None
11 | self.test_data = None
12 | self.validation_data = None
13 | self.convertor = convertor
14 | self.splitter_method_index = splitter_method_index
15 | self.splitter_mathod_parameter = splitter_method_parameter
16 | self.methods = dict()
17 |
18 | def set_data_convertor(self, data_convertor):
19 | self.data_convertor = data_convertor
20 |
21 | def get_train_data(self):
22 | return self.train_data
23 |
24 | def get_test_data(self):
25 | return self.test_data
26 |
27 | def get_train_matrix(self):
28 | if len(self.train_data.shape) == 2:
29 | return self.train_data
30 | train_matrix = dok_matrix((self.train_data.shape[0], self.train_data.shape[1]))
31 | for key in self.train_data.keys():
32 | train_matrix[key[0], key[1]] = self.train_data[key]
33 | return train_matrix
34 |
35 | def get_test_matrix(self):
36 | if len(self.test_data.shape) == 2:
37 | return self.test_data
38 | test_matrix = dok_matrix((self.test_data.shape[0], self.test_data.shape[1]))
39 | for key in self.test_data.keys():
40 | test_matrix[key[0], key[1]] = self.test_data[key]
41 | return test_matrix
42 |
43 | def get_validation_data(self):
44 | return self.validation_data
45 |
46 | def split_data(self, save_path, experiment_id):
47 | self.save_train_test_data(save_path, experiment_id)
48 |
49 | def save_train_test_data(self, save_path, experiment_id):
50 | save_file = save_path + "train_matrix_{0}.bin".format(experiment_id)
51 | self._save_data(self.train_data, save_file)
52 |
53 | save_file = save_path + "test_matrix_{0}.bin".format(experiment_id)
54 | self._save_data(self.test_data, save_file)
55 |
56 | def load_train_test_data(self, load_path, experiment_id):
57 | load_file = load_path + "train_matrix_{0}.bin".format(experiment_id)
58 | if os.path.exists(load_file):
59 | self.train_data = self._load_data(load_file)
60 |
61 | load_file = load_path + "test_matrix_{0}.bin".format(experiment_id)
62 | if os.path.exists(load_file):
63 | self.test_data = self._load_data(load_file)
64 | return True
65 | return False
66 |
67 | def _save_data(self, data, filename):
68 | with open(filename, 'w') as write_fp:
69 | cPickle.dump(data, write_fp)
70 |
71 | def _load_data(self, filename):
72 | with open(filename, 'r') as read_fp:
73 | data = cPickle.load(read_fp)
74 | return data
--------------------------------------------------------------------------------
/data/splitter/GenericDataSplitter.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from DataSplitter import DataSplitter
5 |
6 |
7 | class GenericDataSplitter (DataSplitter):
8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
--------------------------------------------------------------------------------
/data/splitter/GivenNDataSplitter.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | from DataSplitter import DataSplitter
6 | import numpy as np
7 |
8 |
9 | class GivenNDataSplitter(DataSplitter):
10 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
11 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
12 | self.splitter_given_n = splitter_method_parameter
13 | self.start_time_id = 0
14 | self.methods = {
15 | 0: self.get_given_n_by_user,
16 | 1: self.get_given_n_by_item,
17 | 2: self.get_given_n_by_user_date,
18 | 3: self.get_given_n_by_item_date,
19 | 4: self.get_given_n_by_date,
20 | }
21 |
22 | def get_given_n_by_user(self, given_num):
23 | """
24 | Split ratings into two parts: training set consisting of user-item ratings where {@code given_num} ratings
25 | are preserved for each user, and the rest are used as the testing data.
26 | """
27 | assert (given_num > 0)
28 | self.train_data = self.convertor.data_structure(self.convertor.shape)
29 | self.test_data = self.convertor.data_structure(self.convertor.shape)
30 |
31 | user_keys = dict()
32 | for key in self.convertor.data.keys():
33 | user_keys.setdefault(key[0], list())
34 | user_keys[key[0]].append(key)
35 |
36 | for user_id in user_keys:
37 | rating_num = len(user_keys[user_id])
38 | if rating_num > given_num:
39 | index = np.arange(rating_num)
40 | np.random.shuffle(index)
41 | for i in index[:rating_num-given_num]:
42 | key = user_keys[user_id][index[i]]
43 | self.train_data[key] = self.convertor.data[key]
44 | for i in index[rating_num-given_num:]:
45 | key = user_keys[user_id][index[i]]
46 | self.test_data[key] = self.convertor.data[key]
47 | else:
48 | for key in user_keys[user_id]:
49 | self.test_data[key] = self.convertor.data[key]
50 |
51 | def get_given_n_by_item(self, given_num):
52 | """
53 | Split ratings into two parts: training set consisting of user-item ratings where {@code given_num} ratings
54 | are preserved for each item, and the rest are used as the testing data.
55 | """
56 | assert (given_num > 0)
57 | self.train_data = self.convertor.data_structure(self.convertor.shape)
58 | self.test_data = self.convertor.data_structure(self.convertor.shape)
59 |
60 | item_keys = dict()
61 | for key in self.convertor.data.keys():
62 | item_keys.setdefault(key[1], list())
63 | item_keys[key[1]].append(key)
64 |
65 | for item_id in item_keys:
66 | rating_num = len(item_keys[item_id])
67 | if rating_num > given_num:
68 | index = np.arange(rating_num)
69 | np.random.shuffle(index)
70 | for i in index[:rating_num-given_num]:
71 | key = item_keys[item_id][index[i]]
72 | self.train_data[key] = self.convertor.data[key]
73 | for i in index[rating_num-given_num:]:
74 | key = item_keys[item_id][index[i]]
75 | self.test_data[key] = self.convertor.data[key]
76 | else:
77 | for key in item_keys[item_id]:
78 | self.test_data[key] = self.convertor.data[key]
79 |
80 | def get_given_n_by_date(self, given_num):
81 | """
82 | given_num: the {@code given_num} number of time periods used for training ,
83 | and the next time period used for testing.
84 | """
85 | self.train_data = self.convertor.data_structure(self.convertor.shape)
86 | self.test_data = self.convertor.data_structure(self.convertor.shape)
87 |
88 | for key in self.convertor.data.keys():
89 | if self.start_time_id <= key[2] < self.start_time_id + given_num:
90 | self.train_data[key] = self.convertor.data[key]
91 | elif key[2] >= self.start_time_id + given_num:
92 | self.test_data[key] = self.convertor.data[key]
93 | self.start_time_id += 1
94 |
95 | def get_given_n_by_user_date(self, given_num):
96 | pass
97 |
98 | def get_given_n_by_item_date(self, given_num):
99 | pass
100 |
101 | def split_data(self, save_path, experiment_id):
102 | if not self.load_train_test_data(save_path, experiment_id):
103 | self.methods[self.splitter_method_index](self.splitter_given_n)
104 | DataSplitter.split_data(self, save_path, experiment_id)
--------------------------------------------------------------------------------
/data/splitter/RatioDataSplitter.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from DataSplitter import DataSplitter
5 |
6 |
7 | class RatioDataSplitter (DataSplitter):
8 | def __init__(self, convertor, splitter_method_index, splitter_method_parameter):
9 | DataSplitter.__init__(self, convertor, splitter_method_index, splitter_method_parameter)
10 | self.splitter_ratio = splitter_method_parameter
--------------------------------------------------------------------------------
/data/splitter/__init__.py:
--------------------------------------------------------------------------------
1 | #1usr/bin/env python
2 | #coding:utf-8
3 |
4 | from GivenNDataSplitter import GivenNDataSplitter
5 | from CrossValidationDataSplitter import CrossValidationDataSplitter
6 | from GenericDataSplitter import GenericDataSplitter
7 | from RatioDataSplitter import RatioDataSplitter
8 |
--------------------------------------------------------------------------------
/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from rating import MAE, RMSE, MSE
5 |
6 |
7 | class Rating:
8 | def __init__(self, recommendation, test_matrix):
9 | self.recommendation = recommendation
10 | self.test_matrix = test_matrix
11 | self.evaluator = {'MAE': MAE, 'RMSE': RMSE, 'MSE': MSE}
12 |
13 | def __getitem__(self, item):
14 | assert (item in self.evaluator)
15 | return self.evaluator[item](self.recommendation, self.test_matrix)
16 |
17 |
18 | class Ranking:
19 | def __init__(self, recommendation, test_matrix):
20 | self.recommendation = recommendation
21 | self.test_matrix = test_matrix
22 | self.evaluator = {}
23 |
24 | def __getitem__(self, item):
25 | assert (item in self.evaluator)
26 | return self.evaluator[item](self.recommendation, self.test_matrix)
27 |
28 |
29 | class Evaluator:
30 | def __init__(self, recommendation, test_matrix):
31 | self.rating = Rating(recommendation, test_matrix)
32 | self.ranking = Ranking(recommendation, test_matrix)
33 |
34 | def __getattr__(self, key):
35 | if key == 'rating':
36 | return self.rating
37 | elif key == 'ranking':
38 | return self.ranking
39 | else:
40 | raise AttributeError
41 |
42 |
43 | if __name__ == '__main__':
44 | from scipy.sparse import dok_matrix
45 |
46 | recommendation = dok_matrix((3, 4))
47 | recommendation[0, 0] = 3
48 | recommendation[0, 1] = 4
49 | test_matrix = dok_matrix((3, 4))
50 |
51 | evaluator = Evaluator(recommendation, test_matrix)
52 | print evaluator.rating['MAE']
53 | print evaluator.rating['RMSE']
54 | print evaluator.rating['MSE']
--------------------------------------------------------------------------------
/evaluator/pValue.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 |
6 | '''
7 |
8 | import numpy as np
9 | from scipy import stats
10 |
11 | if __name__ == '__main__':
12 | x = [850,740,900,1070,930,850,950,980,980,880,1000,980,930,650,760,810,1000,1000,960,960]
13 | x = np.array(x)
14 | x1 = x - 1
15 | print 't-statistic = %6.3f pvalue = %6.4f' % stats.ttest_ind(x, x1)
16 | # [h,pvalue,ci]=ttest(x,990)
--------------------------------------------------------------------------------
/evaluator/ranking/PrecisionRecallF1TopN.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/evaluator/ranking/PrecisionRecallF1TopN.py
--------------------------------------------------------------------------------
/evaluator/ranking/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'zhanghaidong'
2 |
--------------------------------------------------------------------------------
/evaluator/rating/MAE.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding: utf-8
3 |
4 |
5 | def MAE(recommendation, test_matrix):
6 | loss = 0.0
7 | number = 0
8 | for key in recommendation.keys():
9 | if len(key) == 2:
10 | user_id, item_id = key
11 | elif len(key) == 3:
12 | user_id, item_id, time_id = key
13 | else:
14 | raise AttributeError
15 |
16 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key)
17 | loss += abs(error)
18 | number += 1
19 | if number > 0:
20 | return loss / number
21 | return 0.0
22 |
--------------------------------------------------------------------------------
/evaluator/rating/MSE.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding: utf-8
3 |
4 |
5 | def MSE(recommendation, test_matrix):
6 | loss = 0.0
7 | number = 0
8 | for key in recommendation.keys():
9 | if len(key) == 2:
10 | user_id, item_id = key
11 | elif len(key) == 3:
12 | user_id, item_id, time_id = key
13 | else:
14 | raise AttributeError
15 |
16 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key)
17 | loss += error * error
18 | number += 1
19 | if number > 0:
20 | return loss / number
21 | return 0.0
22 |
23 |
--------------------------------------------------------------------------------
/evaluator/rating/RMSE.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding: utf-8
3 |
4 | import math
5 |
6 | def RMSE(recommendation, test_matrix):
7 | """
8 |
9 | :param recommendation:
10 | :param test_matrix:
11 | :return:
12 | """
13 | loss = 0.0
14 | number = 0.0
15 | for key in recommendation.keys():
16 | if len(key) == 2:
17 | user_id, item_id = key
18 | elif len(key) == 3:
19 | user_id, item_id, time_id = key
20 | else:
21 | raise AttributeError
22 |
23 | error = recommendation.get((user_id, item_id)) - test_matrix.get(key)
24 | loss += error * error
25 | number += 1
26 | if number > 0:
27 | return math.sqrt(loss / number)
28 | return 0.0
29 |
--------------------------------------------------------------------------------
/evaluator/rating/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'zhanghaidong'
2 |
3 | from MAE import *
4 | from MSE import *
5 | from RMSE import *
6 |
7 |
--------------------------------------------------------------------------------
/hybrid/CTR.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | Collaborative Topic Regression
6 | '''
7 |
8 | from Recommender import Recommender
9 | from scipy.sparse import dok_matrix
10 | import numpy as np
11 | from util import normalize
12 |
13 |
14 | class CTR(Recommender):
15 | def __init__(self, train_matrix, test_matrix, config_handler):
16 | super.__init__(train_matrix, test_matrix, config_handler)
17 |
18 |
19 |
20 | def initModel(self):
21 | ''''''
22 | self.numUsers, self.numItems = self.trainMatrix.shape()
23 | self.prediction = dok_matrix((self.numUsers, self.numItems))
24 | self.MAX_Iterations = int(self.configHandler.getParameter('CTR', 'MAX_Iterations'))
25 | self.numFactors = int(self.configHandler.getParameter('CTR', 'numFactors'))
26 | self.threshold = float(self.configHandler.getParameter('CTR', 'threshold'))
27 |
28 | self.U = np.zeros((self.numUsers, self.numFactors))
29 | self.V = np.zeros((self.numItems, self.numFactors))
30 |
31 | def buildModel(self, corpus):
32 | '''
33 | corpus: document * words.
34 | '''
35 |
36 | # Update U
37 |
38 |
39 |
40 | # Update V
41 |
42 |
43 |
44 |
45 | # Update theta
46 |
47 |
48 | def predict(self):
49 | ''''''
50 |
--------------------------------------------------------------------------------
/hybrid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlgorithmFan/GraphicalModelForRecommendation/a1ff8877dcd91d1fd3880a12c25120be027515e8/hybrid/__init__.py
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from GMRec import GMRec
5 |
6 |
7 | if __name__ == '__main__':
8 | algorithms = {1:"PMF", 2:"BPMF", 3: "BPTF"}
9 | while True:
10 | print "0:Exist; 1. PMF; 2. BPMF; 3. BPTF;"
11 | algorithm_index = input("Please input the algorithm:\n")
12 | if algorithm_index == 0:
13 | exit()
14 | elif algorithm_index in algorithms:
15 | break
16 | print "Error, please input correct algorithm name."
17 |
18 | config_file = "config/{0}.cfg".format(algorithms[algorithm_index])
19 | gmrec = GMRec(config_file, algorithms[algorithm_index])
20 | gmrec.run()
--------------------------------------------------------------------------------
/util/AIC.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | def calculateAIC(numParameters, likelihood):
6 | return 2*numParameters - 2*likelihood
7 |
8 |
9 |
10 | if __name__ == '__main__':
11 | def hmm(numStates, numItems):
12 | return numStates * numStates + numStates * numItems + numStates + 2 * numStates
13 |
14 | def ihmm(numStates, numItems):
15 | return numStates * numStates + numStates * numItems + numStates + 2 * numStates + numStates
16 |
17 |
18 | numStates = 10
19 | numItems = 1621
20 | likelihood = -1389871
21 | numParameters = hmm(numStates, numItems)
22 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
23 |
24 | numStates = 20
25 | numItems = 1621
26 | likelihood = -1321175
27 | numParameters = hmm(numStates, numItems)
28 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
29 |
30 | numStates = 30
31 | numItems = 1621
32 | likelihood = -1270278
33 | numParameters = hmm(numStates, numItems)
34 | print 'HMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
35 |
36 | numStates = 40
37 | numItems = 1621
38 | likelihood = -1353854
39 | numParameters = hmm(numStates, numItems)
40 | print 'HMM: numStates-{}, numParameters-{},likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
41 |
42 | numStates = 10
43 | numItems = 1621
44 | likelihood = -1349191
45 | numParameters = ihmm(numStates, numItems)
46 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
47 |
48 | numStates = 20
49 | numItems = 1621
50 | numItems = 5264 # Netflix
51 | likelihood = -3746421
52 | numParameters = ihmm(numStates, numItems)
53 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
54 |
55 | numStates = 30
56 | numItems = 1621
57 | numItems = 5264 # Netflix
58 | likelihood = -3677849
59 | numParameters = ihmm(numStates, numItems)
60 | print 'IHMM: numStates-{}, numParameters-{}, likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
61 |
62 | numStates = 40
63 | numItems = 1621
64 | likelihood = -1230340
65 | numParameters = ihmm(numStates, numItems)
66 | print 'IHMM: numStates-{}, numParameters-{},likelihood-{}'.format(numStates, numParameters, calculateAIC(numParameters, likelihood))
67 |
68 |
69 | print hmm(10, numItems)
70 | print hmm(20, numItems)
71 | print hmm(30, numItems)
72 | print hmm(10, numItems) - hmm(30, numItems)
--------------------------------------------------------------------------------
/util/LDA.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 | from scipy.sparse import dok_matrix
4 |
5 | class LDA:
6 | def __init__(self, _corpus, _parameters):
7 | self.corpus = _corpus
8 | self.parameters = _parameters
9 |
10 | def buildModel(self):
11 | pass
12 |
13 |
14 | def eStep(self):
15 | pass
16 |
17 |
18 | def mStep(self):
19 | pass
20 |
21 |
22 |
--------------------------------------------------------------------------------
/util/NormalInvWishartDistribution.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import numpy as np
5 | from scipy.stats import chi2
6 |
7 |
8 | class NormalInverseWishartDistribution(object):
9 | def __init__(self, mu, lambda_beta, nu, psi):
10 | self.mu = mu
11 | self.lambda_beta = lambda_beta
12 | self.psi = psi
13 | self.nu = nu
14 | self.inv_psi = np.linalg.inv(psi)
15 |
16 | def sample(self):
17 | sigma = np.linalg.inv(self.wishartrand(self.nu, self.psi))
18 | return np.random.multivariate_normal(self.mu, sigma / self.lambda_beta), sigma
19 |
20 | def wishartrand(self, nu, phi):
21 | dim = phi.shape[0]
22 | chol = np.linalg.cholesky(phi)
23 | foo = np.zeros((dim, dim))
24 |
25 | for i in range(dim):
26 | for j in range(i+1):
27 | if i == j:
28 | foo[i, j] = np.sqrt(chi2.rvs(self.nu-(i+1)+1))
29 | else:
30 | foo[i, j] = np.random.normal(0, 1)
31 | return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T)))
32 |
33 | def posterior(self, data):
34 | n = len(data)
35 | data_mean = np.mean(data, axis=0)
36 | squares_sum = np.cov(data.transpose(), bias=1)
37 | mu_post = (self.lambda_beta * self.mu + n * data_mean) / (self.lambda_beta + n)
38 | beta_post = self.lambda_beta + n
39 | nu_post = self.nu + n
40 | mu0_minus_mean = self.mu - data_mean
41 | psi_post = self.psi + squares_sum * n + self.lambda_beta * n / (self.lambda_beta + n) * np.dot(mu0_minus_mean.transpose(), mu0_minus_mean)
42 | psi_post = (psi_post + np.transpose(psi_post)) / 2
43 | return NormalInverseWishartDistribution(mu_post, beta_post, nu_post, psi_post)
44 |
45 | if __name__ == '__main__':
46 | nu = 5
47 | a = np.array([[1,0.5,0],[0.5,1,0],[0,0,1]])
48 | # x = np.array([invwishartrand(nu,a) for i in range(20000)])
49 |
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | from readconf import *
5 | from normalization import normalize
6 | from logger import *
--------------------------------------------------------------------------------
/util/dateconvert.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import time
5 | import datetime
6 |
7 |
8 | class DateConvertor(object):
9 | def __init__(self):
10 | pass
11 |
12 | @staticmethod
13 | def convert_timestamp(timestamp, convert_format="month"):
14 | if convert_format == "month" or convert_format == "m":
15 | return DateConvertor.convert_month_timestamp(timestamp)
16 | elif convert_format == "week" or convert_format == "w":
17 | return DateConvertor.convert_week_timestamp(timestamp)
18 | elif convert_format == "day" or convert_format == "d":
19 | return DateConvertor.convert_day_timestamp(timestamp)
20 | else:
21 | raise IOError
22 |
23 | @staticmethod
24 | def convert_string(timestamp):
25 | year_month_day = time.localtime(timestamp)
26 | year, month, day = year_month_day.tm_year, year_month_day.tm_mon, year_month_day.tm_mday
27 | hour, minute, second = year_month_day.tm_hour, year_month_day.tm_min, year_month_day.tm_sec
28 | return '%d-%d-%d %d:%d:%d' % (year, month, day, hour, minute, second)
29 |
30 | @staticmethod
31 | def convert_month_timestamp(timestamp):
32 | year_month_day = datetime.datetime.fromtimestamp(timestamp)
33 | month_timestamp = datetime.datetime(year=year_month_day.year, month=year_month_day.month, day=1)
34 | return time.mktime(month_timestamp.timetuple())
35 |
36 | @staticmethod
37 | def convert_week_timestamp(timestamp):
38 | week = int(time.strftime('%w', time.localtime(timestamp)))
39 | MondayStamp = timestamp - (week-1)*86400
40 | MondayStr = time.localtime(MondayStamp)
41 | return time.mktime(time.strptime(time.strftime('%Y-%m-%d', MondayStr), '%Y-%m-%d'))
42 |
43 | @staticmethod
44 | def convert_day_timestamp(timestamp):
45 | year_month_day = datetime.datetime.fromtimestamp(timestamp)
46 | month_timestamp = datetime.datetime(year=year_month_day.year, month=year_month_day.month, day=year_month_day.day)
47 | return time.mktime(month_timestamp.timetuple())
48 |
49 | if __name__ == '__main__':
50 | now = time.mktime(time.localtime())
51 | print DateConvertor.convert_timestamp(now, 'm')
52 | print DateConvertor.convert_string(now)
--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 |
5 | import logging
6 | from logging.handlers import RotatingFileHandler
7 | # import logging.config
8 |
9 |
10 | class Logger:
11 | def __init__(self, filename):
12 | self.filename = filename
13 | self.logger = None
14 |
15 | def _set_config(self):
16 | pass
17 |
18 | def _set_console(self):
19 | console = logging.StreamHandler()
20 | console.setLevel(logging.DEBUG)
21 | formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
22 | console.setFormatter(formatter)
23 | self.logger.addHandler(console)
24 |
25 | def information(self, message):
26 | self.logger.info(message)
27 |
28 | def debug(self, message):
29 | self.logger.debug(message)
30 |
31 | def warn(self, message):
32 | self.logger.warn(message)
33 |
34 | def error(self, message):
35 | self.logger.error(message)
36 |
37 |
38 | class Process(Logger):
39 | def __init__(self, filename):
40 | Logger.__init__(self, filename)
41 | self._set_config()
42 | self._set_console()
43 |
44 | def _set_config(self):
45 | Rthandler = RotatingFileHandler(filename=self.filename,
46 | maxBytes=10*1024*1024,
47 | backupCount=5, mode='a')
48 | Rthandler.setLevel(logging.DEBUG)
49 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
50 | Rthandler.setFormatter(formatter)
51 | self.logger = logging.getLogger('Progress')
52 | self.logger.addHandler(Rthandler)
53 | self.logger.setLevel(logging.DEBUG)
54 |
55 |
56 | class Result(Logger):
57 | def __init__(self, filename):
58 | Logger.__init__(self, filename)
59 | self._set_config()
60 | self._set_console()
61 |
62 | def _set_config(self):
63 | handler = logging.FileHandler(filename=self.filename, mode='a')
64 | frt = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
65 | handler.setFormatter(frt)
66 | handler.setLevel(logging.DEBUG)
67 | self.logger = logging.getLogger('Result')
68 | self.logger.addHandler(handler)
69 | self.logger.setLevel(logging.DEBUG)
70 |
71 |
72 | if __name__ == '__main__':
73 | logger = {'Result': Result('../output/result.log'), 'Process': Process('../output/process.log')}
74 | logger['Result'].debug('Wrong')
75 | logger['Process'].debug('Wrong')
--------------------------------------------------------------------------------
/util/normalization.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | '''
5 | @author: haidong zhang
6 |
7 | '''
8 | import numpy as np
9 |
10 | def normalize(matrix):
11 | '''
12 | Normalize the matrix
13 | '''
14 | numDims = len(matrix.shape)
15 | if numDims == 1:
16 | # a vector
17 | s = np.sum(matrix)
18 | assert(s != 0)
19 | return matrix / s
20 | else:
21 | # a matrix
22 | s = np.sum(matrix, axis=1)
23 | return matrix / s[..., np.newaxis]
--------------------------------------------------------------------------------
/util/normalwishartdistribution.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import numpy as np
5 | import random
6 | from scipy.stats import chi2
7 |
8 |
9 | class NormalWishartDistribution(object):
10 | def __init__(self, mu, lambda_beta, nu, psi):
11 | self.mu = mu
12 | self.lambda_beta = lambda_beta
13 | self.psi = psi
14 | self.nu = nu
15 | # self.inv_psi = np.linalg.inv(psi)
16 |
17 | def sample(self):
18 | sigma = np.linalg.inv(self.wishartrand(self.nu, self.psi))
19 | mu = np.random.multivariate_normal(self.mu, sigma / self.lambda_beta)
20 | return mu, sigma
21 |
22 | def wishartrand(self, nu, sigma, C=None):
23 | """Return a sample from a Wishart distribution."""
24 | if C == None:
25 | C = np.linalg.cholesky(sigma)
26 | D = sigma.shape[0]
27 | a = np.zeros((D, D), dtype=np.float32)
28 | for r in xrange(D):
29 | if r != 0:
30 | a[r, :r] = np.random.normal(size=(r,))
31 | a[r, r] = np.sqrt(random.gammavariate(0.5*(nu - D + 1), 2.0))
32 | return np.dot(np.dot(np.dot(C, a), a.T), C.T)
33 |
34 | def wishartrand1(self, nu, phi):
35 | dim = phi.shape[0]
36 | chol = np.linalg.cholesky(phi)
37 | foo = np.zeros((dim, dim))
38 |
39 | for i in range(dim):
40 | for j in range(i+1):
41 | if i == j:
42 | foo[i, j] = np.sqrt(chi2.rvs(nu-(i+1)+1))
43 | else:
44 | foo[i, j] = np.random.normal(0, 1)
45 | return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T)))
46 |
47 | def posterior(self, data):
48 | n = len(data)
49 | data_mean = np.mean(data, axis=0)
50 | squares_sum = np.cov(data.transpose(), bias=1)
51 | mu_post = (self.lambda_beta * self.mu + n * data_mean) / (self.lambda_beta + n)
52 | beta_post = self.lambda_beta + n
53 | nu_post = self.nu + n
54 | mu0_minus_mean = self.mu - data_mean
55 | psi_post = np.linalg.inv(self.psi) + squares_sum * n + self.lambda_beta * n / (self.lambda_beta + n) * np.dot(mu0_minus_mean.transpose(), mu0_minus_mean)
56 | psi_post = np.linalg.inv(psi_post)
57 | psi_post = (psi_post + np.transpose(psi_post)) / 2
58 | return NormalWishartDistribution(mu_post, beta_post, nu_post, psi_post)
59 |
60 | if __name__ == '__main__':
61 | nu = 5
62 | sigma = np.array([[1, 0.5], [0.5, 2]])
63 | df = 10
64 | np.random.seed(1)
65 | nwd = NormalWishartDistribution(0, 0, df, sigma)
66 | sigma1 = nwd.wishartrand(df, sigma)
67 | print sigma1
68 | print np.linalg.inv(sigma1)
69 |
--------------------------------------------------------------------------------
/util/readconf.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | #coding:utf-8
3 |
4 | import ConfigParser
5 |
6 |
7 | class ReadConfig:
8 | def __init__(self, config_file_path):
9 | self.cf = ConfigParser.ConfigParser()
10 | self.cf.read(config_file_path)
11 |
12 | def __getitem__(self, key):
13 | assert(len(key) == 3)
14 | if key[2] == 'string':
15 | return self.get_parameter_string(key[0], key[1])
16 | elif key[2] == 'bool':
17 | return self.get_parameter_bool(key[0], key[1])
18 | elif key[2] == 'int':
19 | return self.get_parameter_int(key[0], key[1])
20 | elif key[2] == 'float':
21 | return self.get_parameter_float(key[0], key[1])
22 | else:
23 | raise KeyError
24 |
25 | def get_parameter_string(self, section, key):
26 | return self.cf.get(section, key)
27 |
28 | def get_parameter_int(self, section, key):
29 | return int(self.get_parameter_string(section, key))
30 |
31 | def get_parameter_float(self, section, key):
32 | return float(self.get_parameter_string(section, key))
33 |
34 | def get_parameter_bool(self, section, key):
35 | return bool(self.get_parameter_int(section, key))
--------------------------------------------------------------------------------