├── .gitignore ├── README.md ├── __init__.py ├── configx ├── __init__.py └── configx.py ├── data ├── __init__.py ├── ft_ratings.txt └── ft_trust.txt ├── metrics ├── __init__.py └── metric.py ├── model ├── __init__.py ├── bias_svd.py ├── funk_svd.py ├── integ_svd.py ├── item_cf.py ├── item_cf_big.py ├── mf.py ├── pmf.py ├── social_cune.py ├── social_mf.py ├── social_rec.py ├── social_reg.py ├── social_rste.py ├── svd++.py ├── tri_cf.py ├── trust_svd.py ├── trust_walker.py └── user_cf.py ├── reader ├── __init__.py ├── rating.py └── trust.py └── utility ├── __init__.py ├── cross_validation.py ├── data_prepro.py ├── data_statistics.py ├── draw_figure.py ├── matrix.py ├── similarity.py ├── tools.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | .git 2 | 3 | /data/ 4 | /configx/ 5 | __pycache__/ 6 | 7 | 8 | __init__.py 9 | configx.py 10 | *.pyc 11 | funk_svd_r*.py 12 | social_bi_cf*.py 13 | *tri_cf*.py 14 | social_svd*.py 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recommender System Suits: An open source toolkit for recommender system 2 | 3 | This repository provides a set of classical **traditional recommendation methods** which make predictions only using rating data and **social recommendation methods** which utilize trust/social information in order to alleviate the sparsity of ratings data. Besides, we have collected some classical methods implemented by others for your convenience. 4 | 5 | ## Traditional recommendation 6 | 7 | * **UserCF**[Resnick et al. 1994] 8 | 9 | Resnick, Paul, et al. "GroupLens: an open architecture for collaborative filtering of netnews." Proceedings of the 1994 ACM conference on Computer supported cooperative work. ACM, 1994. 10 | 11 | * **ItemCF**[Sarwar et al. 2001] 12 | 13 | Sarwar, Badrul, et al. "Item-based collaborative filtering recommendation algorithms." Proceedings of the 10th international conference on World Wide Web. ACM, 2001. 14 | 15 | * **FunkSVD**[Simon Funk. 2006] 16 | 17 | http://sifter.org/~simon/journal/20061211.html 18 | 19 | * **PMF**[Salakhutdinov. 2008] 20 | 21 | Mnih, Andriy, and Ruslan R. Salakhutdinov. "Probabilistic matrix factorization." Advances in neural information processing systems (2008): 1257-1264. 22 | 23 | * **IntegSVD**[Koren et al. 2008] 24 | 25 | Koren, Yehuda. "Factorization meets the neighborhood: a multifaceted collaborative filtering model." Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2008. 26 | 27 | * **BiasSVD**[Koren et al. 2009] 28 | 29 | Koren, Yehuda, Robert Bell, and Chris Volinsky. "Matrix factorization techniques for recommender systems." Computer 42.8 (2009). 30 | 31 | * **SVD++**[Koren et al. 2010] 32 | 33 | Koren, Yehuda. "Factor in the neighbors: Scalable and accurate collaborative filtering." ACM Transactions on Knowledge Discovery from Data (TKDD) 4.1 (2010): 1. 34 | 35 | 36 | 37 | ## Social recommendation 38 | * **SocialRec**[Ma et al. 2008] 39 | 40 | Ma, Hao, et al. "Sorec: social recommendation using probabilistic matrix factorization." Proceedings of the 17th ACM conference on Information and knowledge management. ACM, 2008. 41 | 42 | * **RSTE**[Ma et al. 2009] 43 | 44 | Ma, Hao, Irwin King, and Michael R. Lyu. "Learning to recommend with social trust ensemble." Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval. ACM, 2009. 45 | 46 | * **TrustWalker**[Jamali and Ester. 2009] 47 | 48 | Jamali, Mohsen, and Martin Ester. "Trustwalker: a random walk model for combining trust-based and item-based recommendation." Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2009. 49 | 50 | * **SocialMF**[Jamali and Ester 2010] 51 | 52 | Jamali, Mohsen, and Martin Ester. "A matrix factorization technique with trust propagation for recommendation in social networks." Proceedings of the fourth ACM conference on Recommender systems. ACM, 2010. 53 | 54 | * **SocialReg**[Ma et al. 2011] 55 | 56 | Ma, Hao, et al. "Recommender systems with social regularization." Proceedings of the fourth ACM international conference on Web search and data mining. ACM, 2011. 57 | 58 | * **TrustSVD**[Guo et al. 2015] 59 | 60 | Guo, Guibing, Jie Zhang, and Neil Yorke-Smith. "TrustSVD: Collaborative Filtering with Both the Explicit and Implicit Influence of User Trust and of Item Ratings." AAAI. Vol. 15. 2015. 61 | 62 | * **CUNE**[Zhang et al. 2017] 63 | 64 | Chuxu Zhang, Lu Yu, Yan Wang, Chirag Shah, Xiangliang Zhang. "Collaborative User Network Embedding for Social Recommender Systems." SDM, 2017. 65 | 66 | ## RSAlgorithms implemented by Others 67 | 68 | Sedhain et al. "Autorec: Autoencoders meet collaborative filtering." WWW, 2015. [code](https://github.com/gtshs2/Autorec) 69 | 70 | Kim et al. "Convolutional matrix factorization for document context-aware recommendation." RecSys, 2016. [code](https://github.com/cartopy/ConvMF) 71 | 72 | Liang et al. "Factorization meets the item embedding: Regularizing matrix factorization with item co-occurrence." RecSys, 2016. [code](https://github.com/dawenl/cofactor) 73 | 74 | He et al. "Fast matrix factorization for online recommendation with implicit feedback." SIGIR, 2016. [code](https://github.com/hexiangnan/sigir16-eals) 75 | 76 | Quadrana et al. "Personalizing session-based recommendations with hierarchical recurrent neural networks." RecSys, 2017. [code](https://github.com/mquad/hgru4rec) 77 | 78 | He et al. "Neural collaborative filtering." WWW, 2017. [code](https://github.com/hexiangnan/neural_collaborative_filtering) 79 | 80 | Ebesu et al. "Collaborative Memory Network for Recommendation Systems." SIGIR, 2018. [code](https://github.com/tebesu/CollaborativeMemoryNetwork) 81 | 82 | Fan et al. "Graph Neural Networks for Social Recommendation." WWW, 2019. [code](https://github.com/Wang-Shuo/GraphRec_PyTorch) 83 | 84 | Chong et al. "Efficient Heterogeneous Collaborative Filtering without Negative Sampling for Recommendation." AAAI, 2020. [code](https://github.com/chenchongthu/EHCF) 85 | 86 | 87 | ## Requirements 88 | * numpy==1.14.2 89 | * scipy==1.0.1 90 | * pandas==0.22.0 91 | * matplotlib==2.2.2 92 | 93 | ## Code Structure 94 | 95 | The structure of our project is presented in a tree form as follows: 96 | 97 | ``` 98 | Recommender System # the root of project 99 | │ README.md 100 | │ __init__.py 101 | │ .gitignore 102 | | 103 | └───configx # configurate the global parameters and hyper parameters 104 | │ │ configx.py 105 | | │ 106 | └───data # store the rating and social data 107 | │   │   ft_ratings.txt 108 | | │ ft_trust.txt 109 | | | 110 | │ └───cv # cross validation data 111 | │ │ ft-0.txt 112 | │ │ ft-1.txt 113 | │ │ ft-2.txt 114 | │ │ ft-3.txt 115 | │ │ ft-4.txt 116 | | | 117 | └───metrics # the metrics to measure the prediction accuracy for rating prediction task 118 | │   │   metric.py 119 | | | 120 | └───model # the set of methods of tranditional and social recommendation 121 | │   │   bias_svd.py 122 | │   │   funk_svd.py 123 | │   │   pmf.py 124 | │   │   integ_svd.py 125 | | | item_cf.py 126 | | | item_cf_big.py 127 | | | mf.py 128 | | | social_mf.py 129 | | | social_rec.py 130 | | | social_reg.py 131 | | | social_rste.py 132 | | | svd++.py 133 | | | trust_svd.py 134 | | | trust_walker.py 135 | | | user_cf.py 136 | | | 137 | └───reader # data generator for rating and social data 138 | │   │   rating.py 139 | │   │   trust.py 140 | | | 141 | └───utility # other commonly used tools 142 | │ cross_validation.py 143 | │ data_prepro.py 144 | │ data_statistics.py 145 | │ draw_figure.py 146 | │ matrix.py 147 | │ similarity.py 148 | │ tools.py 149 | │ util.py 150 | ``` 151 | 152 | 153 | ## Parameters Settings 154 | If you want to change the default hyparameters, you can set it in `configx.py`. The meanings of the hyparameters is as follows: 155 | 156 | #### Dataset Parameters 157 | 158 | `dataset_name`: the short name of dataset, the default value is `ft`. 159 | 160 | `k_fold_num`: the num of cross validation, the default value is `5`. 161 | 162 | `rating_path `: the path of raw ratings data file, the default value is `../data/ft_ratings.txt`. 163 | 164 | `rating_cv_path`: the cross validation path of ratings data, the default value is `../data/cv/`. 165 | 166 | `trust_path`: the path of raw trust data file, the default value is `../data/ft_trust.txt`. 167 | 168 | `sep`: the separator of rating and trust data in triple tuple, the default value is ` `. 169 | 170 | `random_state`: the seed of random number, the default value is `0`. 171 | 172 | `size`: the ratio of train set, the default value is `0.8`. 173 | 174 | `min_val`: the minimum rating value, the default value is `0.5`. 175 | 176 | `max_val`: the maximum rating value, the default value is `4.0`. 177 | 178 | #### Model HyperParameters 179 | 180 | `coldUserRating`: the number of ratings a cold start user rated on items, the default value is `5`. 181 | 182 | `factor`: the size of latent dimension for user and item, the default value is `10`. 183 | 184 | `threshold`: the threshold value of model training, the default value is `1e-4`. 185 | 186 | `lr`: the learning rate, the default value is `0.01`. 187 | 188 | `maxIter`: the maximum number of iterations, the default value is `100`. 189 | 190 | `lambdaP`: the parameter of user regularizer, the default value is `0.001`. 191 | 192 | `lambdaQ`: the parameter of item regularizer, the default value is `0.001`. 193 | 194 | `gamma`: momentum coefficient, the default value is `0.9`. 195 | 196 | `isEarlyStopping`: early stopping flag, the default value is `false`. 197 | 198 | #### Output Parameters 199 | 200 | `result_path`: the main directory of results, the default value is `../results/`. 201 | 202 | `model_path`: the directory of well-trained variables, the default value is `../results/model/`. 203 | 204 | `result_log_path`: the directory of logs when training models, the default value is `../results/log/`. 205 | 206 | ## Usage 207 | 208 | Next, I will take `pmf` as an example to introduce how to execute our code. 209 | 210 | First, we should split our rating data into several parts for training, testing and cross validation. 211 | ``` 212 | from utility.cross_validation import split_5_folds 213 | from configx.configx import ConfigX 214 | 215 | if __name__ == "__main__": 216 | configx = ConfigX() 217 | configx.k_fold_num = 5 218 | configx.rating_path = "../data/ft_ratings.txt" 219 | configx.rating_cv_path = "../data/cv/" 220 | 221 | split_5_folds(configx) 222 | ``` 223 | 224 | Next, we open the `pmf.py` file in `model` folder, and configure the hyperparameters for training and execute the following code: 225 | 226 | ``` 227 | if __name__ == '__main__': 228 | 229 | rmses = [] 230 | maes = [] 231 | bmf = FunkSVDwithR() 232 | for i in range(bmf.config.k_fold_num): 233 | bmf.train_model(i) 234 | rmse, mae = bmf.predict_model() 235 | print("current best rmse is %0.5f, mae is %0.5f" % (rmse, mae)) 236 | rmses.append(rmse) 237 | maes.append(mae) 238 | rmse_avg = sum(rmses) / 5 239 | mae_avg = sum(maes) / 5 240 | print("the rmses are %s" % rmses) 241 | print("the maes are %s" % maes) 242 | print("the average of rmses is %s " % rmse_avg) 243 | print("the average of maes is %s " % mae_avg) 244 | 245 | ``` 246 | 247 | ## Citing 248 | 249 | Please cite our paper if you use our codes. Thanks! 250 | 251 | @inproceedings{pricai2018sotricf, 252 | title="Social Collaborative Filtering Ensemble", 253 | author="Zhang, Honglei and Liu, Gangdu and Wu, Jun", 254 | booktitle="PRICAI", 255 | pages="1005--1017" 256 | year="2018", 257 | } 258 | 259 | @inproceedings{ijcnn2019MFDGE, 260 | title={Integrating dual user network embedding with matrix factorization for social recommender systems}, 261 | author={Chen, Liying and Zhang, Honglei and Wu, Jun}, 262 | booktitle={IJCNN}, 263 | pages={1--8}, 264 | year={2019}, 265 | } 266 | 267 | ## RSPapers 268 | 269 | Recently, we have launched an open source project [**RSPapers**](https://github.com/hongleizhang/RSPapers), which includes some classical **Surveys**, **Classical Recommender System**, **Social Recommender System**, **Deep Learning based Recommender System**, **Cold Start Problem in Recommender System** and **POI Recommender System**. 270 | 271 | ## Acknowledgements 272 | 273 | Specially summerize the Traditional and Social recommendations for you, and if you have any questions, please contact me generously. Last but not least, I sincerely look forward to working with you to contribute it. 274 | 275 | Greatly thank @**yunzhan2014** for making contributions to it. 276 | 277 | My Gmail: hongleizhang1993@gmail.com 278 | 279 | 280 | 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/__init__.py -------------------------------------------------------------------------------- /configx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/configx/__init__.py -------------------------------------------------------------------------------- /configx/configx.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | class ConfigX(object): 3 | """ 4 | docstring for ConfigX 5 | 6 | configurate the global parameters and hyper parameters 7 | 8 | """ 9 | 10 | def __init__(self): 11 | super(ConfigX, self).__init__() 12 | 13 | # Dataset Parameters 14 | self.dataset_name = "ft" # short name of datasets ["ft":"filmtrust","db":"douban","ca":"ciao"] 15 | self.k_fold_num = 5 # the num of cross validation 16 | self.rating_path = "../data/%s_ratings.txt" % self.dataset_name # the raw ratings data file 17 | self.rating_cv_path = "../data/cv/" # the cross validation file of ratings data 18 | self.trust_path = '../data/%s_trust.txt' % self.dataset_name # the raw trust data file 19 | self.sep = ' ' # the separator of rating and trust data in triple tuple 20 | self.random_state = 0 # the seed of random number 21 | self.size = 0.8 # the ratio of train set 22 | self.min_val = 0.5 # the minimum rating value 23 | self.max_val = 4.0 # the maximum rating value 24 | 25 | # Model HyperParameter 26 | self.coldUserRating = 5 # the number of ratings a cold start user rated on items 27 | self.factor = 10 # the size of latent dimension for user and item. 28 | self.threshold = 1e-4 # the threshold value of model training 29 | self.lr = 0.01 # the learning rate 30 | self.maxIter = 100 # the maximum number of iterations 31 | self.lambdaP = 0.001 # the parameter of user regularizer 32 | self.lambdaQ = 0.001 # the parameter of item regularizer 33 | self.gamma = 0 # momentum coefficient 34 | self.isEarlyStopping = False # early stopping flag 35 | 36 | # Output Parameters 37 | self.result_path = "../results/" # the directory of results 38 | self.model_path = "model/" # the directory of well-trained variables 39 | self.result_log_path = "log/" # the directory of logs when training models 40 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/data/__init__.py -------------------------------------------------------------------------------- /data/ft_trust.txt: -------------------------------------------------------------------------------- 1 | 2 966 1 2 | 2 104 1 3 | 5 1509 1 4 | 6 1192 1 5 | 7 1510 1 6 | 12 234 1 7 | 15 652 1 8 | 15 883 1 9 | 15 1511 1 10 | 15 1512 1 11 | 15 1239 1 12 | 16 301 1 13 | 16 509 1 14 | 18 1185 1 15 | 20 1441 1 16 | 20 1185 1 17 | 26 1179 1 18 | 26 60 1 19 | 27 1513 1 20 | 27 74 1 21 | 27 1514 1 22 | 29 965 1 23 | 29 298 1 24 | 29 1106 1 25 | 29 1398 1 26 | 29 436 1 27 | 29 892 1 28 | 29 1061 1 29 | 29 1350 1 30 | 29 989 1 31 | 29 1435 1 32 | 29 403 1 33 | 29 546 1 34 | 29 1159 1 35 | 29 1147 1 36 | 29 509 1 37 | 29 969 1 38 | 29 129 1 39 | 29 1481 1 40 | 29 568 1 41 | 29 716 1 42 | 29 188 1 43 | 29 628 1 44 | 35 1377 1 45 | 35 637 1 46 | 36 320 1 47 | 36 1229 1 48 | 1515 1333 1 49 | 38 483 1 50 | 38 1516 1 51 | 38 272 1 52 | 38 795 1 53 | 1517 947 1 54 | 47 1033 1 55 | 47 1518 1 56 | 51 1406 1 57 | 1519 1520 1 58 | 1521 1075 1 59 | 57 239 1 60 | 58 965 1 61 | 60 26 1 62 | 60 733 1 63 | 60 775 1 64 | 60 1522 1 65 | 60 1523 1 66 | 61 182 1 67 | 61 263 1 68 | 63 1524 1 69 | 63 1174 1 70 | 63 409 1 71 | 63 1274 1 72 | 1525 1327 1 73 | 1525 552 1 74 | 66 1147 1 75 | 74 27 1 76 | 78 1526 1 77 | 78 291 1 78 | 79 1153 1 79 | 79 739 1 80 | 79 1072 1 81 | 79 1249 1 82 | 79 188 1 83 | 79 315 1 84 | 79 426 1 85 | 79 355 1 86 | 79 863 1 87 | 79 842 1 88 | 79 782 1 89 | 79 282 1 90 | 1527 94 1 91 | 80 509 1 92 | 80 683 1 93 | 80 1187 1 94 | 80 628 1 95 | 80 965 1 96 | 80 1528 1 97 | 83 208 1 98 | 85 1055 1 99 | 86 43 1 100 | 1529 271 1 101 | 88 862 1 102 | 89 129 1 103 | 89 403 1 104 | 89 509 1 105 | 89 1530 1 106 | 89 410 1 107 | 89 298 1 108 | 89 1287 1 109 | 89 628 1 110 | 90 109 1 111 | 1531 1532 1 112 | 94 1495 1 113 | 95 1252 1 114 | 95 1533 1 115 | 95 1292 1 116 | 95 1232 1 117 | 95 1021 1 118 | 95 96 1 119 | 96 95 1 120 | 99 188 1 121 | 99 712 1 122 | 103 1505 1 123 | 103 368 1 124 | 109 1415 1 125 | 109 499 1 126 | 110 1491 1 127 | 114 509 1 128 | 114 1344 1 129 | 116 738 1 130 | 116 553 1 131 | 120 509 1 132 | 120 628 1 133 | 1534 316 1 134 | 129 298 1 135 | 129 1160 1 136 | 129 1414 1 137 | 129 29 1 138 | 129 89 1 139 | 135 436 1 140 | 1532 80 1 141 | 1532 188 1 142 | 1532 241 1 143 | 1532 1481 1 144 | 1532 892 1 145 | 1535 417 1 146 | 1535 826 1 147 | 141 965 1 148 | 143 514 1 149 | 143 1207 1 150 | 1509 230 1 151 | 146 23 1 152 | 146 264 1 153 | 146 1355 1 154 | 149 343 1 155 | 154 1288 1 156 | 154 898 1 157 | 154 372 1 158 | 154 1331 1 159 | 154 361 1 160 | 154 433 1 161 | 154 806 1 162 | 1536 272 1 163 | 161 300 1 164 | 161 825 1 165 | 161 272 1 166 | 161 1298 1 167 | 162 1398 1 168 | 162 1034 1 169 | 163 1435 1 170 | 163 1537 1 171 | 164 1538 1 172 | 164 644 1 173 | 165 584 1 174 | 165 499 1 175 | 165 1415 1 176 | 165 752 1 177 | 165 361 1 178 | 165 516 1 179 | 165 433 1 180 | 165 1288 1 181 | 166 188 1 182 | 167 628 1 183 | 1539 425 1 184 | 168 1216 1 185 | 168 630 1 186 | 171 1540 1 187 | 173 1541 1 188 | 174 872 1 189 | 1542 1327 1 190 | 182 263 1 191 | 182 61 1 192 | 188 1479 1 193 | 188 221 1 194 | 188 495 1 195 | 188 1022 1 196 | 188 509 1 197 | 188 905 1 198 | 188 1420 1 199 | 188 716 1 200 | 188 591 1 201 | 188 312 1 202 | 188 965 1 203 | 188 166 1 204 | 188 1398 1 205 | 188 1504 1 206 | 188 99 1 207 | 188 1361 1 208 | 188 402 1 209 | 188 989 1 210 | 188 478 1 211 | 188 29 1 212 | 188 782 1 213 | 188 1350 1 214 | 188 1327 1 215 | 188 552 1 216 | 188 150 1 217 | 188 491 1 218 | 188 1106 1 219 | 188 397 1 220 | 188 79 1 221 | 188 628 1 222 | 188 1532 1 223 | 188 1137 1 224 | 188 1481 1 225 | 188 307 1 226 | 188 1435 1 227 | 188 436 1 228 | 191 1284 1 229 | 191 810 1 230 | 192 300 1 231 | 193 1157 1 232 | 196 448 1 233 | 199 1043 1 234 | 200 507 1 235 | 200 1543 1 236 | 201 1197 1 237 | 201 395 1 238 | 201 319 1 239 | 208 83 1 240 | 1544 969 1 241 | 210 6 1 242 | 213 1057 1 243 | 215 1200 1 244 | 215 1187 1 245 | 217 1242 1 246 | 220 825 1 247 | 221 188 1 248 | 221 1420 1 249 | 222 938 1 250 | 222 578 1 251 | 222 315 1 252 | 222 273 1 253 | 222 782 1 254 | 223 509 1 255 | 223 1147 1 256 | 223 489 1 257 | 223 1398 1 258 | 223 636 1 259 | 227 1208 1 260 | 227 448 1 261 | 227 1545 1 262 | 227 457 1 263 | 227 345 1 264 | 227 196 1 265 | 228 718 1 266 | 228 1168 1 267 | 231 873 1 268 | 231 1039 1 269 | 232 477 1 270 | 238 509 1 271 | 240 1187 1 272 | 240 777 1 273 | 240 918 1 274 | 240 489 1 275 | 240 546 1 276 | 240 683 1 277 | 241 1532 1 278 | 242 1435 1 279 | 242 1159 1 280 | 244 1078 1 281 | 244 1546 1 282 | 249 984 1 283 | 254 298 1 284 | 256 1505 1 285 | 256 103 1 286 | 256 554 1 287 | 256 857 1 288 | 1547 1299 1 289 | 1547 320 1 290 | 261 1342 1 291 | 261 188 1 292 | 261 1504 1 293 | 263 182 1 294 | 263 61 1 295 | 267 1518 1 296 | 267 509 1 297 | 268 1548 1 298 | 268 436 1 299 | 268 1417 1 300 | 269 962 1 301 | 269 927 1 302 | 269 125 1 303 | 272 1536 1 304 | 272 1111 1 305 | 272 1131 1 306 | 272 300 1 307 | 272 571 1 308 | 272 1541 1 309 | 272 1180 1 310 | 272 508 1 311 | 273 938 1 312 | 273 222 1 313 | 278 812 1 314 | 278 509 1 315 | 281 718 1 316 | 282 1249 1 317 | 282 79 1 318 | 282 628 1 319 | 282 355 1 320 | 282 315 1 321 | 282 6 1 322 | 282 1355 1 323 | 282 188 1 324 | 282 739 1 325 | 282 782 1 326 | 282 1549 1 327 | 282 1302 1 328 | 282 863 1 329 | 282 452 1 330 | 282 716 1 331 | 282 1192 1 332 | 284 1288 1 333 | 284 1174 1 334 | 285 702 1 335 | 285 938 1 336 | 285 739 1 337 | 285 1398 1 338 | 285 1481 1 339 | 291 78 1 340 | 297 509 1 341 | 298 129 1 342 | 298 436 1 343 | 298 965 1 344 | 298 1550 1 345 | 298 509 1 346 | 298 969 1 347 | 298 1551 1 348 | 298 1159 1 349 | 298 254 1 350 | 298 188 1 351 | 298 160 1 352 | 298 89 1 353 | 298 1147 1 354 | 298 29 1 355 | 298 410 1 356 | 298 1398 1 357 | 298 1355 1 358 | 300 272 1 359 | 300 1466 1 360 | 300 161 1 361 | 300 849 1 362 | 300 795 1 363 | 300 310 1 364 | 300 764 1 365 | 300 702 1 366 | 300 192 1 367 | 300 938 1 368 | 301 16 1 369 | 304 798 1 370 | 305 683 1 371 | 306 226 1 372 | 306 1212 1 373 | 307 905 1 374 | 307 188 1 375 | 307 1273 1 376 | 307 631 1 377 | 310 300 1 378 | 310 1192 1 379 | 312 782 1 380 | 312 999 1 381 | 312 402 1 382 | 312 188 1 383 | 312 1348 1 384 | 312 282 1 385 | 312 426 1 386 | 312 858 1 387 | 313 29 1 388 | 313 716 1 389 | 313 892 1 390 | 313 903 1 391 | 313 568 1 392 | 313 509 1 393 | 313 188 1 394 | 313 628 1 395 | 313 436 1 396 | 314 918 1 397 | 315 79 1 398 | 315 188 1 399 | 315 1549 1 400 | 315 355 1 401 | 315 1153 1 402 | 315 739 1 403 | 315 863 1 404 | 316 1534 1 405 | 317 969 1 406 | 317 509 1 407 | 317 857 1 408 | 319 764 1 409 | 319 1552 1 410 | 319 201 1 411 | 319 188 1 412 | 319 1059 1 413 | 319 1213 1 414 | 319 678 1 415 | 320 1299 1 416 | 320 1282 1 417 | 320 1229 1 418 | 320 36 1 419 | 322 1553 1 420 | 328 337 1 421 | 336 533 1 422 | 340 1033 1 423 | 341 509 1 424 | 345 1272 1 425 | 345 554 1 426 | 345 94 1 427 | 353 445 1 428 | 353 535 1 429 | 353 1056 1 430 | 353 537 1 431 | 1545 452 1 432 | 355 863 1 433 | 355 1014 1 434 | 355 1165 1 435 | 355 739 1 436 | 355 315 1 437 | 355 79 1 438 | 361 806 1 439 | 361 1331 1 440 | 361 1468 1 441 | 361 752 1 442 | 361 516 1 443 | 361 433 1 444 | 361 1288 1 445 | 361 165 1 446 | 361 1507 1 447 | 361 372 1 448 | 361 898 1 449 | 361 154 1 450 | 1549 1062 1 451 | 364 628 1 452 | 372 433 1 453 | 372 516 1 454 | 372 361 1 455 | 373 1482 1 456 | 378 1554 1 457 | 378 771 1 458 | 380 379 1 459 | 380 1041 1 460 | 380 339 1 461 | 387 1039 1 462 | 395 188 1 463 | 395 628 1 464 | 395 1355 1 465 | 396 1466 1 466 | 397 616 1 467 | 397 868 1 468 | 397 580 1 469 | 397 694 1 470 | 397 188 1 471 | 397 489 1 472 | 397 1368 1 473 | 397 509 1 474 | 397 1060 1 475 | 399 1299 1 476 | 1555 556 1 477 | 1553 322 1 478 | 402 858 1 479 | 402 782 1 480 | 402 938 1 481 | 402 188 1 482 | 402 764 1 483 | 402 989 1 484 | 402 716 1 485 | 402 312 1 486 | 403 595 1 487 | 403 897 1 488 | 406 436 1 489 | 406 478 1 490 | 407 623 1 491 | 410 89 1 492 | 410 578 1 493 | 411 169 1 494 | 417 1556 1 495 | 417 96 1 496 | 417 1535 1 497 | 420 433 1 498 | 420 578 1 499 | 420 134 1 500 | 420 282 1 501 | 420 315 1 502 | 420 542 1 503 | 420 782 1 504 | 422 1249 1 505 | 423 428 1 506 | 423 106 1 507 | 425 1539 1 508 | 425 1295 1 509 | 1557 447 1 510 | 426 79 1 511 | 426 716 1 512 | 426 1249 1 513 | 426 1153 1 514 | 426 863 1 515 | 426 739 1 516 | 427 1104 1 517 | 427 313 1 518 | 428 423 1 519 | 428 1196 1 520 | 428 1439 1 521 | 430 1334 1 522 | 432 628 1 523 | 433 1507 1 524 | 433 516 1 525 | 433 154 1 526 | 433 1558 1 527 | 433 636 1 528 | 433 769 1 529 | 433 1331 1 530 | 433 508 1 531 | 433 752 1 532 | 433 420 1 533 | 433 1388 1 534 | 433 1397 1 535 | 433 1415 1 536 | 433 499 1 537 | 433 1288 1 538 | 433 1018 1 539 | 433 861 1 540 | 433 361 1 541 | 433 1383 1 542 | 433 584 1 543 | 433 372 1 544 | 433 806 1 545 | 436 135 1 546 | 436 918 1 547 | 436 1416 1 548 | 436 716 1 549 | 436 29 1 550 | 436 478 1 551 | 436 509 1 552 | 436 188 1 553 | 436 725 1 554 | 436 546 1 555 | 436 628 1 556 | 436 160 1 557 | 436 1413 1 558 | 436 1398 1 559 | 436 268 1 560 | 436 406 1 561 | 441 508 1 562 | 445 535 1 563 | 445 353 1 564 | 445 537 1 565 | 445 1056 1 566 | 447 1557 1 567 | 448 196 1 568 | 452 1420 1 569 | 452 1182 1 570 | 452 628 1 571 | 452 1549 1 572 | 452 568 1 573 | 452 282 1 574 | 452 221 1 575 | 452 782 1 576 | 452 312 1 577 | 452 1559 1 578 | 452 791 1 579 | 452 1202 1 580 | 452 1479 1 581 | 452 1137 1 582 | 452 1560 1 583 | 457 294 1 584 | 463 698 1 585 | 464 1065 1 586 | 467 164 1 587 | 467 1158 1 588 | 468 170 1 589 | 472 509 1 590 | 477 232 1 591 | 477 577 1 592 | 478 546 1 593 | 478 918 1 594 | 478 436 1 595 | 478 1416 1 596 | 478 406 1 597 | 481 676 1 598 | 481 1212 1 599 | 483 38 1 600 | 484 702 1 601 | 485 633 1 602 | 485 1271 1 603 | 485 1561 1 604 | 485 1562 1 605 | 485 1225 1 606 | 485 1563 1 607 | 485 873 1 608 | 485 1019 1 609 | 485 1564 1 610 | 485 1565 1 611 | 485 702 1 612 | 485 392 1 613 | 488 764 1 614 | 489 240 1 615 | 489 223 1 616 | 489 1187 1 617 | 489 397 1 618 | 489 509 1 619 | 489 931 1 620 | 489 1566 1 621 | 489 546 1 622 | 489 777 1 623 | 490 1102 1 624 | 490 883 1 625 | 490 265 1 626 | 490 1140 1 627 | 490 1493 1 628 | 491 395 1 629 | 493 1040 1 630 | 496 1187 1 631 | 499 1018 1 632 | 499 584 1 633 | 499 177 1 634 | 499 516 1 635 | 499 1415 1 636 | 499 433 1 637 | 499 165 1 638 | 502 1187 1 639 | 503 327 1 640 | 506 739 1 641 | 508 938 1 642 | 508 433 1 643 | 508 1065 1 644 | 508 1407 1 645 | 508 272 1 646 | 508 441 1 647 | 509 188 1 648 | 509 1567 1 649 | 509 617 1 650 | 509 278 1 651 | 509 1033 1 652 | 509 1427 1 653 | 509 489 1 654 | 509 114 1 655 | 509 89 1 656 | 509 1220 1 657 | 509 694 1 658 | 509 1147 1 659 | 509 618 1 660 | 509 574 1 661 | 509 965 1 662 | 509 931 1 663 | 509 80 1 664 | 509 1289 1 665 | 509 1078 1 666 | 509 297 1 667 | 509 223 1 668 | 509 1481 1 669 | 509 1276 1 670 | 509 1060 1 671 | 509 120 1 672 | 509 317 1 673 | 509 1463 1 674 | 509 1157 1 675 | 509 313 1 676 | 509 892 1 677 | 509 1206 1 678 | 509 1388 1 679 | 509 238 1 680 | 509 1336 1 681 | 509 1187 1 682 | 509 616 1 683 | 509 956 1 684 | 509 628 1 685 | 509 436 1 686 | 509 394 1 687 | 509 334 1 688 | 509 29 1 689 | 509 341 1 690 | 509 1508 1 691 | 509 658 1 692 | 509 1149 1 693 | 509 986 1 694 | 509 472 1 695 | 509 1398 1 696 | 509 546 1 697 | 509 298 1 698 | 509 1392 1 699 | 509 1128 1 700 | 509 397 1 701 | 509 1104 1 702 | 509 1368 1 703 | 509 580 1 704 | 509 267 1 705 | 509 1568 1 706 | 510 1569 1 707 | 514 143 1 708 | 514 1207 1 709 | 516 372 1 710 | 516 165 1 711 | 516 1018 1 712 | 516 433 1 713 | 516 1415 1 714 | 516 499 1 715 | 521 598 1 716 | 524 1212 1 717 | 524 1360 1 718 | 528 671 1 719 | 528 957 1 720 | 528 1398 1 721 | 532 1570 1 722 | 533 336 1 723 | 534 1571 1 724 | 534 167 1 725 | 535 537 1 726 | 535 1056 1 727 | 535 353 1 728 | 535 445 1 729 | 537 1056 1 730 | 537 353 1 731 | 537 535 1 732 | 537 445 1 733 | 539 502 1 734 | 1560 452 1 735 | 543 1572 1 736 | 543 1573 1 737 | 544 838 1 738 | 546 509 1 739 | 546 1202 1 740 | 546 965 1 741 | 546 1187 1 742 | 546 653 1 743 | 546 29 1 744 | 546 683 1 745 | 546 969 1 746 | 546 403 1 747 | 546 188 1 748 | 546 240 1 749 | 546 1147 1 750 | 546 478 1 751 | 546 489 1 752 | 546 436 1 753 | 546 223 1 754 | 546 918 1 755 | 546 628 1 756 | 546 716 1 757 | 546 777 1 758 | 546 123 1 759 | 546 931 1 760 | 546 1398 1 761 | 550 892 1 762 | 552 1525 1 763 | 552 1327 1 764 | 552 188 1 765 | 552 1574 1 766 | 553 116 1 767 | 556 319 1 768 | 556 1555 1 769 | 561 1575 1 770 | 562 1212 1 771 | 564 706 1 772 | 564 480 1 773 | 564 1576 1 774 | 564 62 1 775 | 564 810 1 776 | 564 1003 1 777 | 568 29 1 778 | 568 313 1 779 | 568 319 1 780 | 568 221 1 781 | 568 188 1 782 | 568 1420 1 783 | 568 452 1 784 | 574 509 1 785 | 574 1577 1 786 | 578 938 1 787 | 578 315 1 788 | 578 410 1 789 | 578 134 1 790 | 581 1104 1 791 | 584 165 1 792 | 584 1415 1 793 | 584 1018 1 794 | 584 433 1 795 | 584 499 1 796 | 1578 1034 1 797 | 1578 272 1 798 | 1578 1398 1 799 | 1578 825 1 800 | 591 188 1 801 | 593 1330 1 802 | 593 1166 1 803 | 593 1223 1 804 | 605 618 1 805 | 605 1149 1 806 | 605 1217 1 807 | 605 1205 1 808 | 606 1024 1 809 | 612 909 1 810 | 616 694 1 811 | 616 509 1 812 | 616 1434 1 813 | 616 397 1 814 | 616 617 1 815 | 617 509 1 816 | 617 616 1 817 | 618 1149 1 818 | 618 837 1 819 | 618 1205 1 820 | 618 605 1 821 | 618 640 1 822 | 618 509 1 823 | 618 1125 1 824 | 618 1217 1 825 | 619 114 1 826 | 620 938 1 827 | 1579 1580 1 828 | 623 776 1 829 | 624 942 1 830 | 628 1147 1 831 | 628 683 1 832 | 628 6 1 833 | 628 362 1 834 | 628 364 1 835 | 628 452 1 836 | 628 999 1 837 | 628 716 1 838 | 628 1423 1 839 | 628 89 1 840 | 628 120 1 841 | 628 1192 1 842 | 628 436 1 843 | 628 167 1 844 | 628 428 1 845 | 628 188 1 846 | 628 432 1 847 | 628 617 1 848 | 628 670 1 849 | 628 1159 1 850 | 628 509 1 851 | 628 1398 1 852 | 629 1159 1 853 | 629 628 1 854 | 629 428 1 855 | 629 1355 1 856 | 629 362 1 857 | 631 307 1 858 | 632 737 1 859 | 632 1304 1 860 | 632 1581 1 861 | 632 1428 1 862 | 634 1481 1 863 | 636 223 1 864 | 636 1398 1 865 | 636 123 1 866 | 637 35 1 867 | 640 1205 1 868 | 640 1217 1 869 | 640 618 1 870 | 640 1149 1 871 | 640 605 1 872 | 644 164 1 873 | 645 1103 1 874 | 648 911 1 875 | 649 776 1 876 | 655 1301 1 877 | 655 93 1 878 | 659 272 1 879 | 659 312 1 880 | 1582 1242 1 881 | 1582 1514 1 882 | 1582 986 1 883 | 661 1354 1 884 | 661 1344 1 885 | 665 1569 1 886 | 666 546 1 887 | 666 812 1 888 | 670 628 1 889 | 670 702 1 890 | 670 546 1 891 | 670 804 1 892 | 671 528 1 893 | 672 1272 1 894 | 672 1360 1 895 | 1583 1109 1 896 | 676 1212 1 897 | 679 1441 1 898 | 683 80 1 899 | 683 240 1 900 | 683 546 1 901 | 683 305 1 902 | 684 1269 1 903 | 685 393 1 904 | 694 616 1 905 | 695 1019 1 906 | 698 1201 1 907 | 698 463 1 908 | 702 782 1 909 | 702 300 1 910 | 702 484 1 911 | 702 938 1 912 | 711 716 1 913 | 712 99 1 914 | 716 1398 1 915 | 716 1159 1 916 | 716 436 1 917 | 716 312 1 918 | 716 1147 1 919 | 716 188 1 920 | 716 1481 1 921 | 716 509 1 922 | 716 402 1 923 | 716 1350 1 924 | 716 1504 1 925 | 716 711 1 926 | 716 426 1 927 | 716 282 1 928 | 716 628 1 929 | 716 1348 1 930 | 716 989 1 931 | 716 29 1 932 | 718 228 1 933 | 718 1168 1 934 | 721 1294 1 935 | 725 580 1 936 | 725 711 1 937 | 725 478 1 938 | 725 1416 1 939 | 725 436 1 940 | 725 616 1 941 | 725 406 1 942 | 725 215 1 943 | 728 298 1 944 | 1584 548 1 945 | 733 1456 1 946 | 733 1585 1 947 | 735 1015 1 948 | 735 928 1 949 | 1520 1519 1 950 | 738 553 1 951 | 738 116 1 952 | 739 79 1 953 | 739 863 1 954 | 739 1249 1 955 | 739 315 1 956 | 739 506 1 957 | 739 1153 1 958 | 739 282 1 959 | 739 426 1 960 | 739 355 1 961 | 741 680 1 962 | 743 1586 1 963 | 748 1040 1 964 | 748 337 1 965 | 752 806 1 966 | 752 1507 1 967 | 752 1018 1 968 | 752 433 1 969 | 752 1587 1 970 | 752 177 1 971 | 759 1208 1 972 | 759 307 1 973 | 762 1202 1 974 | 764 488 1 975 | 764 938 1 976 | 764 402 1 977 | 764 858 1 978 | 764 300 1 979 | 1588 262 1 980 | 769 433 1 981 | 769 361 1 982 | 771 378 1 983 | 772 210 1 984 | 773 905 1 985 | 773 272 1 986 | 773 1041 1 987 | 774 684 1 988 | 774 1269 1 989 | 776 649 1 990 | 776 1457 1 991 | 777 1187 1 992 | 781 1542 1 993 | 782 858 1 994 | 782 634 1 995 | 782 355 1 996 | 782 315 1 997 | 782 863 1 998 | 782 1348 1 999 | 782 312 1 1000 | 782 6 1 1001 | 782 188 1 1002 | 782 764 1 1003 | 782 79 1 1004 | 782 938 1 1005 | 782 134 1 1006 | 782 402 1 1007 | 782 1192 1 1008 | 782 509 1 1009 | 782 999 1 1010 | 788 1589 1 1011 | 790 448 1 1012 | 790 909 1 1013 | 791 1137 1 1014 | 791 1041 1 1015 | 795 300 1 1016 | 798 563 1 1017 | 798 304 1 1018 | 804 702 1 1019 | 804 739 1 1020 | 805 929 1 1021 | 806 1587 1 1022 | 806 1076 1 1023 | 806 154 1 1024 | 806 752 1 1025 | 806 1507 1 1026 | 807 1347 1 1027 | 807 906 1 1028 | 808 629 1 1029 | 810 191 1 1030 | 812 825 1 1031 | 812 1204 1 1032 | 812 666 1 1033 | 812 278 1 1034 | 813 867 1 1035 | 814 319 1 1036 | 821 1539 1 1037 | 824 61 1 1038 | 824 1000 1 1039 | 824 1201 1 1040 | 824 13 1 1041 | 824 702 1 1042 | 1537 163 1 1043 | 825 188 1 1044 | 825 812 1 1045 | 825 1064 1 1046 | 825 161 1 1047 | 825 272 1 1048 | 825 1033 1 1049 | 826 96 1 1050 | 828 720 1 1051 | 830 1168 1 1052 | 1564 196 1 1053 | 1564 485 1 1054 | 837 618 1 1055 | 838 544 1 1056 | 842 215 1 1057 | 842 1034 1 1058 | 842 653 1 1059 | 842 546 1 1060 | 842 945 1 1061 | 842 79 1 1062 | 842 1125 1 1063 | 842 618 1 1064 | 843 1541 1 1065 | 843 84 1 1066 | 843 1590 1 1067 | 847 865 1 1068 | 847 1591 1 1069 | 847 989 1 1070 | 849 300 1 1071 | 849 402 1 1072 | 853 319 1 1073 | 853 315 1 1074 | 853 764 1 1075 | 853 782 1 1076 | 853 938 1 1077 | 857 256 1 1078 | 857 605 1 1079 | 857 1149 1 1080 | 857 1369 1 1081 | 857 473 1 1082 | 859 1088 1 1083 | 862 489 1 1084 | 862 88 1 1085 | 862 1307 1 1086 | 863 79 1 1087 | 863 782 1 1088 | 863 355 1 1089 | 863 426 1 1090 | 863 282 1 1091 | 863 739 1 1092 | 867 813 1 1093 | 867 628 1 1094 | 873 1062 1 1095 | 883 1033 1 1096 | 883 849 1 1097 | 892 509 1 1098 | 892 969 1 1099 | 892 1532 1 1100 | 892 188 1 1101 | 892 1147 1 1102 | 892 1159 1 1103 | 892 550 1 1104 | 892 1398 1 1105 | 892 313 1 1106 | 892 436 1 1107 | 892 745 1 1108 | 892 1016 1 1109 | 892 965 1 1110 | 892 1481 1 1111 | 892 29 1 1112 | 892 321 1 1113 | 897 403 1 1114 | 897 595 1 1115 | 898 433 1 1116 | 898 1288 1 1117 | 898 752 1 1118 | 898 361 1 1119 | 898 1507 1 1120 | 898 806 1 1121 | 898 154 1 1122 | 900 458 1 1123 | 905 188 1 1124 | 905 1273 1 1125 | 906 1592 1 1126 | 906 999 1 1127 | 906 1593 1 1128 | 911 648 1 1129 | 911 949 1 1130 | 914 1183 1 1131 | 915 1594 1 1132 | 918 478 1 1133 | 918 436 1 1134 | 918 240 1 1135 | 918 123 1 1136 | 918 546 1 1137 | 918 223 1 1138 | 918 965 1 1139 | 1595 1223 1 1140 | 926 1565 1 1141 | 1567 509 1 1142 | 928 1015 1 1143 | 1541 272 1 1144 | 929 805 1 1145 | 931 489 1 1146 | 931 1187 1 1147 | 931 509 1 1148 | 938 853 1 1149 | 938 300 1 1150 | 938 1033 1 1151 | 938 402 1 1152 | 938 508 1 1153 | 938 702 1 1154 | 938 578 1 1155 | 938 273 1 1156 | 938 1160 1 1157 | 938 764 1 1158 | 938 999 1 1159 | 938 1508 1 1160 | 938 782 1 1161 | 938 858 1 1162 | 941 1162 1 1163 | 942 755 1 1164 | 942 961 1 1165 | 942 624 1 1166 | 947 985 1 1167 | 948 147 1 1168 | 949 648 1 1169 | 949 911 1 1170 | 956 803 1 1171 | 956 562 1 1172 | 956 509 1 1173 | 957 1398 1 1174 | 961 942 1 1175 | 961 624 1 1176 | 963 535 1 1177 | 1558 516 1 1178 | 1558 372 1 1179 | 1558 433 1 1180 | 965 975 1 1181 | 965 141 1 1182 | 965 1311 1 1183 | 965 58 1 1184 | 965 1596 1 1185 | 965 1597 1 1186 | 965 29 1 1187 | 965 509 1 1188 | 965 918 1 1189 | 965 298 1 1190 | 965 919 1 1191 | 965 221 1 1192 | 965 892 1 1193 | 965 546 1 1194 | 965 1398 1 1195 | 965 1147 1 1196 | 965 188 1 1197 | 965 80 1 1198 | 965 1187 1 1199 | 966 1567 1 1200 | 966 2 1 1201 | 969 628 1 1202 | 969 1147 1 1203 | 969 1598 1 1204 | 969 29 1 1205 | 969 188 1 1206 | 969 1159 1 1207 | 969 298 1 1208 | 969 1398 1 1209 | 969 509 1 1210 | 969 436 1 1211 | 969 892 1 1212 | 969 1187 1 1213 | 969 546 1 1214 | 969 965 1 1215 | 969 716 1 1216 | 969 1544 1 1217 | 969 1435 1 1218 | 970 234 1 1219 | 974 1174 1 1220 | 975 965 1 1221 | 978 977 1 1222 | 978 92 1 1223 | 978 732 1 1224 | 978 1556 1 1225 | 978 29 1 1226 | 978 1599 1 1227 | 979 1168 1 1228 | 983 830 1 1229 | 983 1074 1 1230 | 983 1168 1 1231 | 983 228 1 1232 | 983 147 1 1233 | 984 249 1 1234 | 986 509 1 1235 | 989 716 1 1236 | 989 1591 1 1237 | 989 312 1 1238 | 989 402 1 1239 | 989 188 1 1240 | 989 858 1 1241 | 989 865 1 1242 | 992 1149 1 1243 | 999 312 1 1244 | 999 1600 1 1245 | 999 188 1 1246 | 999 1348 1 1247 | 999 628 1 1248 | 1000 623 1 1249 | 1000 1551 1 1250 | 1001 682 1 1251 | 1002 298 1 1252 | 1002 898 1 1253 | 1002 683 1 1254 | 1002 80 1 1255 | 1013 1371 1 1256 | 1014 580 1 1257 | 1014 1417 1 1258 | 1014 503 1 1259 | 1014 355 1 1260 | 1014 1556 1 1261 | 1014 509 1 1262 | 1014 416 1 1263 | 1014 1533 1 1264 | 1014 1601 1 1265 | 1014 658 1 1266 | 1014 702 1 1267 | 1015 928 1 1268 | 1015 42 1 1269 | 1016 892 1 1270 | 1017 94 1 1271 | 1018 516 1 1272 | 1018 584 1 1273 | 1018 499 1 1274 | 1018 752 1 1275 | 1018 433 1 1276 | 1018 1415 1 1277 | 1019 1232 1 1278 | 1019 695 1 1279 | 1019 485 1 1280 | 1020 71 1 1281 | 1020 100 1 1282 | 1021 95 1 1283 | 1021 1602 1 1284 | 1022 188 1 1285 | 1024 606 1 1286 | 1024 1228 1 1287 | 1033 1518 1 1288 | 1033 764 1 1289 | 1033 938 1 1290 | 1034 1578 1 1291 | 1034 162 1 1292 | 1034 842 1 1293 | 1039 231 1 1294 | 1039 873 1 1295 | 1039 1437 1 1296 | 1039 387 1 1297 | 1040 493 1 1298 | 1041 1418 1 1299 | 1041 1481 1 1300 | 1041 1603 1 1301 | 1042 1149 1 1302 | 1042 605 1 1303 | 1043 1536 1 1304 | 1043 199 1 1305 | 1048 195 1 1306 | 1048 395 1 1307 | 1056 353 1 1308 | 1056 535 1 1309 | 1056 537 1 1310 | 1056 445 1 1311 | 1057 607 1 1312 | 1060 509 1 1313 | 1061 29 1 1314 | 1062 1549 1 1315 | 1062 1495 1 1316 | 1064 825 1 1317 | 1065 464 1 1318 | 1065 764 1 1319 | 1065 1178 1 1320 | 1065 319 1 1321 | 1065 508 1 1322 | 1065 315 1 1323 | 1072 1249 1 1324 | 1072 79 1 1325 | 1074 983 1 1326 | 1076 806 1 1327 | 1078 509 1 1328 | 1078 1114 1 1329 | 1080 1154 1 1330 | 1081 476 1 1331 | 1088 859 1 1332 | 1088 285 1 1333 | 1094 274 1 1334 | 1094 1459 1 1335 | 1094 1604 1 1336 | 1094 696 1 1337 | 1094 1500 1 1338 | 1094 1605 1 1339 | 1094 253 1 1340 | 1094 741 1 1341 | 1094 1565 1 1342 | 1094 857 1 1343 | 1094 159 1 1344 | 1094 329 1 1345 | 1094 295 1 1346 | 1094 703 1 1347 | 1094 32 1 1348 | 1094 1344 1 1349 | 1101 1091 1 1350 | 1101 707 1 1351 | 1103 645 1 1352 | 1104 509 1 1353 | 1104 581 1 1354 | 1104 1606 1 1355 | 1104 628 1 1356 | 1104 313 1 1357 | 1513 1607 1 1358 | 1513 27 1 1359 | 1106 188 1 1360 | 1106 29 1 1361 | 1109 1608 1 1362 | 1109 1545 1 1363 | 1109 1583 1 1364 | 1109 1609 1 1365 | 1111 272 1 1366 | 1112 825 1 1367 | 1117 1610 1 1368 | 1123 638 1 1369 | 1125 1205 1 1370 | 1125 618 1 1371 | 1125 640 1 1372 | 1125 1149 1 1373 | 1128 509 1 1374 | 1131 272 1 1375 | 1611 1612 1 1376 | 1135 1235 1 1377 | 1137 188 1 1378 | 1137 452 1 1379 | 1137 1560 1 1380 | 1137 1479 1 1381 | 1606 1104 1 1382 | 1140 224 1 1383 | 1140 490 1 1384 | 1142 1205 1 1385 | 1142 718 1 1386 | 1142 741 1 1387 | 1142 1500 1 1388 | 1577 574 1 1389 | 1147 546 1 1390 | 1147 969 1 1391 | 1147 628 1 1392 | 1147 403 1 1393 | 1147 1159 1 1394 | 1147 188 1 1395 | 1147 298 1 1396 | 1147 312 1 1397 | 1147 29 1 1398 | 1147 965 1 1399 | 1147 509 1 1400 | 1147 716 1 1401 | 1147 66 1 1402 | 1147 1398 1 1403 | 1147 1481 1 1404 | 1147 223 1 1405 | 1147 362 1 1406 | 1147 80 1 1407 | 1147 1488 1 1408 | 1147 892 1 1409 | 1149 705 1 1410 | 1149 640 1 1411 | 1149 1205 1 1412 | 1149 849 1 1413 | 1149 254 1 1414 | 1149 1042 1 1415 | 1149 618 1 1416 | 1149 1217 1 1417 | 1149 605 1 1418 | 1149 739 1 1419 | 1149 1125 1 1420 | 1149 876 1 1421 | 1149 382 1 1422 | 1149 473 1 1423 | 1149 837 1 1424 | 1149 471 1 1425 | 1149 509 1 1426 | 1153 739 1 1427 | 1153 1249 1 1428 | 1153 315 1 1429 | 1153 332 1 1430 | 1153 426 1 1431 | 1153 79 1 1432 | 1154 1080 1 1433 | 1154 1061 1 1434 | 1157 323 1 1435 | 1157 509 1 1436 | 1157 1194 1 1437 | 1157 1338 1 1438 | 1157 193 1 1439 | 1159 1147 1 1440 | 1159 969 1 1441 | 1159 242 1 1442 | 1159 298 1 1443 | 1159 1444 1 1444 | 1159 29 1 1445 | 1159 628 1 1446 | 1160 938 1 1447 | 1160 782 1 1448 | 1160 312 1 1449 | 1162 1223 1 1450 | 1162 1330 1 1451 | 1165 1348 1 1452 | 1165 355 1 1453 | 1165 188 1 1454 | 1165 1249 1 1455 | 1168 979 1 1456 | 1168 228 1 1457 | 1168 830 1 1458 | 1168 718 1 1459 | 1171 1170 1 1460 | 1173 91 1 1461 | 1173 877 1 1462 | 1173 1333 1 1463 | 1174 63 1 1464 | 1176 1398 1 1465 | 1176 1613 1 1466 | 1178 1357 1 1467 | 1178 1065 1 1468 | 1178 428 1 1469 | 1178 1132 1 1470 | 1178 484 1 1471 | 1178 877 1 1472 | 1179 26 1 1473 | 1180 272 1 1474 | 1607 1513 1 1475 | 1183 914 1 1476 | 1185 1614 1 1477 | 1185 20 1 1478 | 1185 18 1 1479 | 1187 496 1 1480 | 1187 240 1 1481 | 1187 215 1 1482 | 1187 509 1 1483 | 1187 777 1 1484 | 1187 294 1 1485 | 1187 123 1 1486 | 1187 502 1 1487 | 1187 628 1 1488 | 1187 931 1 1489 | 1187 1200 1 1490 | 1187 348 1 1491 | 1187 80 1 1492 | 1187 965 1 1493 | 1187 1615 1 1494 | 1187 546 1 1495 | 1187 489 1 1496 | 1187 223 1 1497 | 1187 1398 1 1498 | 1191 1327 1 1499 | 1192 6 1 1500 | 1192 938 1 1501 | 1195 1169 1 1502 | 1195 151 1 1503 | 1196 986 1 1504 | 1196 883 1 1505 | 1196 428 1 1506 | 1196 448 1 1507 | 1196 509 1 1508 | 1197 201 1 1509 | 1199 754 1 1510 | 1200 1187 1 1511 | 1200 215 1 1512 | 1200 240 1 1513 | 1201 1193 1 1514 | 1202 1357 1 1515 | 1202 403 1 1516 | 1202 1600 1 1517 | 1202 221 1 1518 | 1202 112 1 1519 | 1202 1528 1 1520 | 1202 1420 1 1521 | 1202 762 1 1522 | 1202 546 1 1523 | 1202 452 1 1524 | 1202 628 1 1525 | 1202 1167 1 1526 | 1202 1423 1 1527 | 1204 812 1 1528 | 1205 1217 1 1529 | 1205 1149 1 1530 | 1206 509 1 1531 | 1207 514 1 1532 | 1208 227 1 1533 | 1208 759 1 1534 | 1212 481 1 1535 | 1212 562 1 1536 | 1212 226 1 1537 | 1212 524 1 1538 | 1212 1360 1 1539 | 1212 676 1 1540 | 1212 1248 1 1541 | 1214 1560 1 1542 | 1217 618 1 1543 | 1217 1205 1 1544 | 1220 509 1 1545 | 1220 29 1 1546 | 1220 626 1 1547 | 1220 1335 1 1548 | 1223 593 1 1549 | 1223 1330 1 1550 | 1223 1166 1 1551 | 1223 1162 1 1552 | 1223 1243 1 1553 | 1223 1595 1 1554 | 1616 1016 1 1555 | 1226 147 1 1556 | 1228 1024 1 1557 | 1228 606 1 1558 | 1617 511 1 1559 | 1229 320 1 1560 | 1229 1348 1 1561 | 1232 1335 1 1562 | 1232 1505 1 1563 | 1232 95 1 1564 | 1232 1019 1 1565 | 1233 1618 1 1566 | 1538 164 1 1567 | 1243 1223 1 1568 | 1244 115 1 1569 | 1249 1072 1 1570 | 1249 79 1 1571 | 1249 791 1 1572 | 1249 282 1 1573 | 1249 362 1 1574 | 1249 1153 1 1575 | 1249 739 1 1576 | 1249 426 1 1577 | 1249 428 1 1578 | 1249 422 1 1579 | 1249 315 1 1580 | 1249 1165 1 1581 | 1252 95 1 1582 | 1252 1089 1 1583 | 1255 999 1 1584 | 1255 1147 1 1585 | 1591 989 1 1586 | 1619 147 1 1587 | 1262 1360 1 1588 | 1269 175 1 1589 | 1269 684 1 1590 | 1269 1620 1 1591 | 1269 774 1 1592 | 1272 345 1 1593 | 1273 905 1 1594 | 1275 532 1 1595 | 1621 1622 1 1596 | 1276 883 1 1597 | 1276 509 1 1598 | 1276 1203 1 1599 | 1278 1623 1 1600 | 1278 832 1 1601 | 1278 241 1 1602 | 1278 986 1 1603 | 1278 1624 1 1604 | 1278 1344 1 1605 | 1278 1625 1 1606 | 1278 114 1 1607 | 1278 1505 1 1608 | 1278 1531 1 1609 | 1281 272 1 1610 | 1281 1541 1 1611 | 1282 320 1 1612 | 1288 806 1 1613 | 1288 154 1 1614 | 1288 361 1 1615 | 1288 433 1 1616 | 1288 1331 1 1617 | 1288 284 1 1618 | 1288 898 1 1619 | 1288 1507 1 1620 | 1288 165 1 1621 | 1288 372 1 1622 | 1288 516 1 1623 | 1288 1468 1 1624 | 1289 320 1 1625 | 1289 509 1 1626 | 1289 628 1 1627 | 1292 324 1 1628 | 1292 95 1 1629 | 1295 425 1 1630 | 1298 1500 1 1631 | 1298 161 1 1632 | 1298 489 1 1633 | 1299 1626 1 1634 | 1299 399 1 1635 | 1301 93 1 1636 | 1306 1186 1 1637 | 1306 1500 1 1638 | 1307 626 1 1639 | 1321 147 1 1640 | 1325 1355 1 1641 | 1327 188 1 1642 | 1327 1191 1 1643 | 1327 552 1 1644 | 1327 1525 1 1645 | 1329 724 1 1646 | 1329 1545 1 1647 | 1329 1360 1 1648 | 1329 1627 1 1649 | 1330 1223 1 1650 | 1331 1288 1 1651 | 1331 1018 1 1652 | 1331 154 1 1653 | 1331 433 1 1654 | 1332 995 1 1655 | 1333 29 1 1656 | 1333 616 1 1657 | 1333 877 1 1658 | 1333 845 1 1659 | 1333 1515 1 1660 | 1333 1079 1 1661 | 1333 509 1 1662 | 1333 1168 1 1663 | 1333 681 1 1664 | 1333 1208 1 1665 | 1333 91 1 1666 | 1334 430 1 1667 | 1336 1032 1 1668 | 1336 509 1 1669 | 1336 946 1 1670 | 1338 1157 1 1671 | 1342 1366 1 1672 | 1342 1504 1 1673 | 1342 1628 1 1674 | 1342 261 1 1675 | 1628 1342 1 1676 | 1344 114 1 1677 | 1346 1406 1 1678 | 1348 716 1 1679 | 1348 999 1 1680 | 1348 312 1 1681 | 1350 188 1 1682 | 1355 1325 1 1683 | 1355 395 1 1684 | 1355 629 1 1685 | 1355 146 1 1686 | 1357 1398 1 1687 | 1359 1387 1 1688 | 1359 873 1 1689 | 1360 1329 1 1690 | 1360 524 1 1691 | 1360 1212 1 1692 | 1361 591 1 1693 | 1361 188 1 1694 | 1362 1624 1 1695 | 1366 1504 1 1696 | 1366 1342 1 1697 | 1369 857 1 1698 | 1371 1013 1 1699 | 1371 319 1 1700 | 1371 222 1 1701 | 1379 1629 1 1702 | 1379 1377 1 1703 | 1379 35 1 1704 | 1598 969 1 1705 | 1387 825 1 1706 | 1388 433 1 1707 | 1388 509 1 1708 | 1392 1336 1 1709 | 1392 509 1 1710 | 1395 302 1 1711 | 1398 1176 1 1712 | 1398 188 1 1713 | 1398 1578 1 1714 | 1398 1357 1 1715 | 1398 716 1 1716 | 1398 636 1 1717 | 1398 1147 1 1718 | 1398 546 1 1719 | 1398 509 1 1720 | 1398 528 1 1721 | 1398 965 1 1722 | 1398 29 1 1723 | 1398 223 1 1724 | 1398 957 1 1725 | 1398 1481 1 1726 | 1398 1432 1 1727 | 1398 436 1 1728 | 1398 160 1 1729 | 1398 1187 1 1730 | 1398 628 1 1731 | 1398 298 1 1732 | 1400 1630 1 1733 | 1400 1631 1 1734 | 1402 740 1 1735 | 1402 578 1 1736 | 1402 464 1 1737 | 1403 1411 1 1738 | 1632 1412 1 1739 | 1406 51 1 1740 | 1406 1346 1 1741 | 1406 4 1 1742 | 1406 509 1 1743 | 1407 441 1 1744 | 1407 508 1 1745 | 1410 1411 1 1746 | 1411 1403 1 1747 | 1412 1075 1 1748 | 1412 1632 1 1749 | 1413 436 1 1750 | 1413 683 1 1751 | 1415 109 1 1752 | 1415 584 1 1753 | 1415 499 1 1754 | 1415 516 1 1755 | 1415 433 1 1756 | 1415 1018 1 1757 | 1415 165 1 1758 | 1416 478 1 1759 | 1416 436 1 1760 | 1417 1466 1 1761 | 1418 1041 1 1762 | 1420 1202 1 1763 | 1420 568 1 1764 | 1420 221 1 1765 | 1420 452 1 1766 | 1422 1373 1 1767 | 1424 1016 1 1768 | 1424 1242 1 1769 | 1424 1272 1 1770 | 1427 509 1 1771 | 1432 1398 1 1772 | 1433 1633 1 1773 | 1435 242 1 1774 | 1435 29 1 1775 | 1435 969 1 1776 | 1435 1566 1 1777 | 1435 188 1 1778 | 1543 200 1 1779 | 1437 1039 1 1780 | 1620 1269 1 1781 | 1439 428 1 1782 | 1634 1101 1 1783 | 1634 707 1 1784 | 1634 1091 1 1785 | 1443 433 1 1786 | 1444 1159 1 1787 | 1452 1635 1 1788 | 1454 1636 1 1789 | 1637 854 1 1790 | 1458 554 1 1791 | 1458 663 1 1792 | 1458 448 1 1793 | 1459 1094 1 1794 | 1463 509 1 1795 | 1464 883 1 1796 | 1466 1417 1 1797 | 1466 396 1 1798 | 1466 300 1 1799 | 1466 938 1 1800 | 1468 1507 1 1801 | 1468 372 1 1802 | 1468 1288 1 1803 | 1469 979 1 1804 | 1469 1492 1 1805 | 1469 1498 1 1806 | 1469 384 1 1807 | 1473 1638 1 1808 | 1474 1639 1 1809 | 1475 1640 1 1810 | 1641 285 1 1811 | 1478 1641 1 1812 | 1478 285 1 1813 | 1479 792 1 1814 | 1479 1137 1 1815 | 1479 188 1 1816 | 1479 1182 1 1817 | 1480 1360 1 1818 | 1481 634 1 1819 | 1481 188 1 1820 | 1481 1041 1 1821 | 1481 1147 1 1822 | 1481 29 1 1823 | 1481 509 1 1824 | 1481 716 1 1825 | 1481 436 1 1826 | 1488 1147 1 1827 | 1491 110 1 1828 | 1493 490 1 1829 | 1494 1360 1 1830 | 1494 1039 1 1831 | 1494 1545 1 1832 | 1494 524 1 1833 | 1494 1570 1 1834 | 1494 1642 1 1835 | 1495 1062 1 1836 | 1516 38 1 1837 | 1516 645 1 1838 | 1504 261 1 1839 | 1504 716 1 1840 | 1504 188 1 1841 | 1504 1342 1 1842 | 1504 221 1 1843 | 1504 166 1 1844 | 1507 372 1 1845 | 1507 752 1 1846 | 1507 1468 1 1847 | 1507 1288 1 1848 | 1507 433 1 1849 | 1507 806 1 1850 | 1507 361 1 1851 | 1508 1187 1 1852 | 1508 509 1 1853 | 1508 938 1 1854 | -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/metrics/__init__.py -------------------------------------------------------------------------------- /metrics/metric.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import math 3 | 4 | 5 | class Metric(object): 6 | ''' 7 | the two metrics to measure the prediction accuracy for rating prediction task 8 | ''' 9 | 10 | def __init__(self): 11 | pass 12 | 13 | @staticmethod 14 | def MAE(res): 15 | error = 0 16 | count = 0 17 | for entry in res: 18 | error += abs(entry[2] - entry[3]) 19 | count += 1 20 | if count == 0: 21 | return error 22 | return float(error) / count 23 | 24 | @staticmethod 25 | def RMSE(res): 26 | error = 0 27 | count = 0 28 | for entry in res: 29 | error += abs(entry[2] - entry[3]) ** 2 30 | count += 1 31 | if count == 0: 32 | return error 33 | return math.sqrt(float(error) / count) 34 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/model/__init__.py -------------------------------------------------------------------------------- /model/bias_svd.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | import numpy as np 7 | from mf import MF 8 | 9 | 10 | class BiasSVD(MF): 11 | """ 12 | docstring for BiasSVD 13 | implement the BiasSVD 14 | 15 | Koren Y, Bell R, Volinsky C. Matrix factorization techniques for recommender systems[J]. Computer, 2009, 42(8). 16 | """ 17 | 18 | def __init__(self): 19 | super(BiasSVD, self).__init__() 20 | self.config.lambdaB = 0.001 # 偏置项系数 21 | # self.init_model() 22 | 23 | def init_model(self, k): 24 | super(BiasSVD, self).init_model(k) 25 | self.Bu = np.random.rand(self.rg.get_train_size()[0]) / (self.config.factor ** 0.5) # bias value of user 26 | self.Bi = np.random.rand(self.rg.get_train_size()[1]) / (self.config.factor ** 0.5) # bias value of item 27 | 28 | def train_model(self, k): 29 | super(BiasSVD, self).train_model(k) 30 | iteration = 0 31 | while iteration < self.config.maxIter: 32 | self.loss = 0 33 | for index, line in enumerate(self.rg.trainSet()): 34 | user, item, rating = line 35 | u = self.rg.user[user] 36 | i = self.rg.item[item] 37 | error = rating - self.predict(user, item) 38 | self.loss += error ** 2 39 | p, q = self.P[u], self.Q[i] 40 | # update latent vectors 41 | 42 | self.Bu[u] += self.config.lr * (error - self.config.lambdaB * self.Bu[u]) 43 | self.Bi[i] += self.config.lr * (error - self.config.lambdaB * self.Bi[i]) 44 | 45 | self.P[u] += self.config.lr * (error * q - self.config.lambdaP * p) 46 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) 47 | 48 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 49 | + self.config.lambdaB * ((self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) 50 | iteration += 1 51 | if self.isConverged(iteration): 52 | break 53 | 54 | def predict(self, u, i): 55 | # super(BasicMFwithR, self).predict() 56 | if self.rg.containsUser(u) and self.rg.containsItem(i): 57 | u = self.rg.user[u] 58 | i = self.rg.item[i] 59 | return self.P[u].dot(self.Q[i]) + self.rg.globalMean + self.Bi[i] + self.Bu[u] 60 | else: 61 | return self.rg.globalMean 62 | 63 | 64 | if __name__ == '__main__': 65 | 66 | rmses = [] 67 | maes = [] 68 | bmf = BiasSVD() 69 | bmf.config.k_fold_num = 1 70 | # print(bmf.rg.trainSet_u[1]) 71 | for i in range(bmf.config.k_fold_num): 72 | bmf.train_model(i) 73 | rmse, mae = bmf.predict_model() 74 | print("current best rmse is %0.5f, mae is %0.5f" % (rmse, mae)) 75 | rmses.append(rmse) 76 | maes.append(mae) 77 | rmse_avg = sum(rmses) / 5 78 | mae_avg = sum(maes) / 5 79 | print("the rmses are %s" % rmses) 80 | print("the maes are %s" % maes) 81 | print("the average of rmses is %s " % rmse_avg) 82 | print("the average of maes is %s " % mae_avg) -------------------------------------------------------------------------------- /model/funk_svd.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | from mf import MF 6 | 7 | 8 | class FunkSVD(MF): 9 | """ 10 | docstring for FunkSVD 11 | implement the FunkSVD without regularization 12 | 13 | http://sifter.org/~simon/journal/20061211.html 14 | """ 15 | 16 | def __init__(self): 17 | super(FunkSVD, self).__init__() 18 | # self.init_model(0) 19 | 20 | def train_model(self, k): 21 | super(FunkSVD, self).train_model(k) 22 | iteration = 0 23 | while iteration < self.config.maxIter: 24 | self.loss = 0 25 | for index, line in enumerate(self.rg.trainSet()): 26 | user, item, rating = line 27 | u = self.rg.user[user] 28 | i = self.rg.item[item] 29 | error = rating - self.predict(user, item) 30 | self.loss += error ** 2 31 | p, q = self.P[u], self.Q[i] 32 | # update latent vectors 33 | self.P[u] += self.config.lr * error * q 34 | self.Q[i] += self.config.lr * error * p 35 | 36 | iteration += 1 37 | if self.isConverged(iteration): 38 | break 39 | 40 | 41 | if __name__ == '__main__': 42 | 43 | rmses = [] 44 | bmf = FunkSVD() 45 | # print(bmf.rg.trainSet_u[1]) 46 | for i in range(bmf.config.k_fold_num): 47 | bmf.train_model(i) 48 | rmse, mae = bmf.predict_model() 49 | rmses.append(rmse) 50 | print(rmses) 51 | # bmf.config.k_current = 1 52 | # print(bmf.rg.trainSet_u[1]) 53 | # bmf.train_model() 54 | # bmf.predict_model() 55 | -------------------------------------------------------------------------------- /model/integ_svd.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | from prettyprinter import cpprint 6 | import numpy as np 7 | from mf import MF 8 | from collections import defaultdict 9 | from utility.similarity import pearson_sp 10 | from utility import util 11 | 12 | 13 | class IntegSVD(MF): 14 | """ 15 | docstring for IntegSVD 16 | implement the IntegSVD 17 | 18 | Koren Y. Factor in the neighbors: Scalable and accurate collaborative filtering[J]. ACM Transactions on Knowledge Discovery from Data (TKDD), 2010, 4(1): 1. 19 | """ 20 | 21 | def __init__(self): 22 | super(IntegSVD, self).__init__() 23 | 24 | # self.config.lr=0.001 25 | # self.config.maxIter=200 #400 26 | self.config.item_near_num = 10 # 10 27 | 28 | self.config.lambdaP = 0.001 29 | self.config.lambdaQ = 0.001 30 | 31 | # self.config.lambdaY = 0.01 32 | self.config.lambdaB = 0.01 33 | 34 | self.config.lambdaW = 0.01 35 | # self.config.lambdaC = 0.015 36 | # self.init_model() 37 | 38 | def init_model(self, k): 39 | super(IntegSVD, self).init_model(k) 40 | 41 | self.Bu = np.random.rand(self.rg.get_train_size()[0]) / (self.config.factor ** 0.5) # bias value of user 42 | self.Bi = np.random.rand(self.rg.get_train_size()[1]) / (self.config.factor ** 0.5) # bias value of item 43 | # self.Y = np.random.rand(self.rg.get_train_size()[1], self.config.factor) / (self.config.factor ** 0.5) # implicit preference 44 | 45 | self.user_item_nei = defaultdict(dict) 46 | self.W = np.random.rand(self.rg.get_train_size()[1], self.rg.get_train_size()[1]) 47 | # self.C=np.random.rand(self.rg.get_train_size()[1],self.rg.get_train_size()[1]) 48 | 49 | # print('initializinig neighbors') 50 | # for user in self.rg.trainSet_u: 51 | # for item in self.rg.trainSet_u[user]: 52 | # self.get_neighbor(user,item) 53 | 54 | def train_model(self, k): 55 | super(IntegSVD, self).train_model(k) 56 | iteration = 0 57 | while iteration < self.config.maxIter: 58 | self.loss = 0 59 | for index, line in enumerate(self.rg.trainSet()): 60 | user, item, rating = line 61 | u = self.rg.user[user] 62 | i = self.rg.item[item] 63 | ui_neighbors = self.get_neighbor(user, item) 64 | ui_nei_len = len(ui_neighbors) 65 | error = rating - self.predict(user, item) 66 | self.loss += error ** 2 67 | 68 | p, q = self.P[u], self.Q[i] 69 | # nu, sum_y = self.get_sum_y(user) 70 | 71 | # update latent vectors 72 | self.Bu[u] += self.config.lr * (error - self.config.lambdaB * self.Bu[u]) 73 | self.Bi[i] += self.config.lr * (error - self.config.lambdaB * self.Bi[i]) 74 | 75 | self.P[u] += self.config.lr * (error * q - self.config.lambdaP * p) 76 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) # + sum_y 77 | 78 | # 更新Y 79 | # u_items = self.rg.user_rated_items(u) 80 | # for j in u_items: 81 | # idj = self.rg.item[j] 82 | # self.Y[idj] += self.config.lr * (error / np.sqrt(nu) * q - self.config.lambdaY * self.Y[idj]) 83 | # 更新W,C 84 | for neighbor in ui_neighbors: 85 | j = self.rg.item[neighbor] 86 | ruj = self.rg.trainSet_u[user][neighbor] 87 | buj = self.rg.globalMean + self.Bu[u] + self.Bi[j] 88 | self.W[i][j] += self.config.lr * ( 89 | error / (ui_nei_len ** 0.5) * (ruj - buj) - self.config.lambdaW * self.W[i][j]) 90 | # self.C[i][j] += self.config.lr * (error / (ui_nei_len ** 0.5) - self.config.lambdaC * self.C[i][j]) 91 | 92 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 93 | + self.config.lambdaB * ( \ 94 | (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaW * ( 95 | self.W * self.W).sum() # + self.config.lambdaY * (self.Y * self.Y).sum() \ 96 | # +self.config.lambdaC * (self.C * self.C).sum() 97 | iteration += 1 98 | if self.isConverged(iteration): 99 | break 100 | 101 | util.save_data(self.user_item_nei, '../data/neibor/ft_intsvd_useritemnei_08.pkl') 102 | 103 | def predict(self, user, item): 104 | if self.rg.containsUser(user) and self.rg.containsItem(item): 105 | # _, sum_y = self.get_sum_y(user) 106 | sum_w = 0.0 107 | u = self.rg.user[user] 108 | i = self.rg.item[item] 109 | bui = self.rg.globalMean + self.Bi[i] + self.Bu[u] 110 | ui_neighbors = self.get_neighbor(user, item) 111 | ui_len = len(ui_neighbors) 112 | for neighbor in ui_neighbors: 113 | j = self.rg.item[neighbor] 114 | ruj = self.rg.trainSet_u[user][neighbor] 115 | buj = self.rg.globalMean + self.Bi[j] + self.Bu[u] 116 | sum_w += (ruj - buj) * self.W[i][j] # +self.C[i][j] 117 | if ui_len != 0: 118 | sum_w *= 1.0 / ui_len # 这的事 119 | return bui + self.Q[i].dot(self.P[u]) + sum_w # + sum_y 120 | else: 121 | return self.rg.globalMean 122 | 123 | def get_sum_y(self, u): 124 | u_items = self.rg.user_rated_items(u) 125 | nu = len(u_items) 126 | sum_y = np.zeros(self.config.factor) 127 | for j in u_items: 128 | sum_y += self.Y[self.rg.item[j]] 129 | sum_y /= (np.sqrt(nu)) 130 | return nu, sum_y 131 | 132 | def get_neighbor(self, user, item): 133 | if user in self.user_item_nei and item in self.user_item_nei[user]: 134 | return self.user_item_nei[user][item] 135 | items = self.rg.user_rated_items(user) 136 | u_item_d = {} 137 | for u_item in items: 138 | if item != u_item: 139 | sim = pearson_sp(self.rg.get_col(item), self.rg.get_col(u_item)) 140 | u_item_d[u_item] = round(sim, 4) 141 | matchItems = sorted(u_item_d.items(), key=lambda x: x[1], reverse=True)[:self.config.item_near_num] 142 | matchItems = list(zip(*matchItems)) 143 | if len(matchItems) > 0: 144 | self.user_item_nei[user][item] = matchItems[0] 145 | return matchItems[0] 146 | else: 147 | return [] 148 | 149 | 150 | if __name__ == '__main__': 151 | rmses = [] 152 | bmf = IntegSVD() 153 | # print(bmf.rg.trainSet_u[1]) 154 | for i in range(bmf.config.k_fold_num): 155 | bmf.train_model(i) 156 | rmse, mae = bmf.predict_model() 157 | rmses.append(rmse) 158 | print(rmses) 159 | # bmf.config.k_current = 1 160 | # print(bmf.rg.trainSet_u[1]) 161 | # bmf.train_model() 162 | # bmf.predict_model() 163 | -------------------------------------------------------------------------------- /model/item_cf.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | from prettyprinter import cpprint 6 | from mf import MF 7 | from utility.matrix import SimMatrix 8 | from utility.similarity import cosine_sp, pearson_sp 9 | 10 | 11 | class ItemCF(MF): 12 | """ 13 | docstring for ItemCF 14 | implement the ItemCF 15 | 16 | Sarwar B, Karypis G, Konstan J, et al. Item-based collaborative filtering recommendation algorithms[C]//Proceedings of the 10th international conference on World Wide Web. ACM, 2001: 285-295. 17 | """ 18 | 19 | def __init__(self): 20 | super(ItemCF, self).__init__() 21 | self.config.n = 50 22 | # self.init_model() 23 | 24 | def init_model(self, k): 25 | super(ItemCF, self).init_model(k) 26 | self.item_sim = SimMatrix() 27 | 28 | for i_test in self.rg.testSet_i: 29 | for i_train in self.rg.item: 30 | if i_test != i_train: 31 | if self.item_sim.contains(i_test, i_train): 32 | continue 33 | sim = pearson_sp(self.rg.get_col(i_test), self.rg.get_col(i_train)) 34 | self.item_sim.set(i_test, i_train, sim) 35 | 36 | def predict(self, u, i): 37 | 38 | # item_sim=dict() 39 | # for i_train in self.rg.item: 40 | # if i != i_train: 41 | # if i_train in item_sim : 42 | # continue 43 | # sim=cosine_sp(self.rg.get_col(i), self.rg.get_col(i_train)) 44 | # item_sim[i_train]=sim 45 | 46 | matchItems = sorted(self.item_sim[i].items(), key=lambda x: x[1], reverse=True) 47 | itemCount = self.config.n 48 | if itemCount > len(matchItems): 49 | itemCount = len(matchItems) 50 | 51 | sum, denom = 0, 0 52 | for n in range(itemCount): 53 | similarItem = matchItems[n][0] 54 | if self.rg.containsUserItem(u, similarItem): 55 | similarity = matchItems[n][1] 56 | rating = self.rg.trainSet_u[u][similarItem] 57 | sum += similarity * (rating - self.rg.itemMeans[similarItem]) 58 | denom += similarity 59 | if sum == 0: 60 | if not self.rg.containsItem(i): 61 | return self.rg.globalMean 62 | return self.rg.itemMeans[i] 63 | pred = self.rg.itemMeans[i] + sum / float(denom) 64 | # print('finished user:'+str(u)+" item:"+str(i)) 65 | return pred 66 | pass 67 | 68 | 69 | if __name__ == '__main__': 70 | ic = ItemCF() 71 | ic.init_model(0) 72 | print(ic.predict_model()) 73 | print(ic.predict_model_cold_users()) 74 | ic.init_model(1) 75 | print(ic.predict_model()) 76 | print(ic.predict_model_cold_users()) -------------------------------------------------------------------------------- /model/item_cf_big.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | from mf import MF 7 | from utility.matrix import SimMatrix 8 | from utility.similarity import cosine_sp 9 | 10 | 11 | class ItemCF(MF): 12 | """ 13 | docstring for ItemCF 14 | implement the ItemCF 15 | 16 | Sarwar B, Karypis G, Konstan J, et al. Item-based collaborative filtering recommendation algorithms[C]//Proceedings of the 10th international conference on World Wide Web. ACM, 2001: 285-295. 17 | """ 18 | 19 | def __init__(self): 20 | super(ItemCF, self).__init__() 21 | self.config.n = 10 22 | # self.init_model() 23 | 24 | def init_model(self, k): 25 | super(ItemCF, self).init_model(k) 26 | 27 | # def init_model(self): 28 | # self.item_sim = SimMatrix() 29 | 30 | # for i_test in self.rg.testSet_i: 31 | # for i_train in self.rg.item: 32 | # if i_test != i_train: 33 | # if self.item_sim.contains(i_test, i_train): 34 | # continue 35 | # sim = cosine_sp(self.rg.get_col(i_test), self.rg.get_col(i_train)) 36 | # self.item_sim.set(i_test, i_train, sim) 37 | 38 | def predict(self, u, i): 39 | 40 | item_sim = dict() 41 | for i_train in self.rg.item: 42 | if i != i_train: 43 | if i_train in item_sim: 44 | continue 45 | sim = cosine_sp(self.rg.get_col(i), self.rg.get_col(i_train)) 46 | item_sim[i_train] = sim 47 | 48 | matchItems = sorted(item_sim.items(), key=lambda x: x[1], reverse=True) 49 | itemCount = self.config.n 50 | if itemCount > len(matchItems): 51 | itemCount = len(matchItems) 52 | 53 | sum, denom = 0, 0 54 | for n in range(itemCount): 55 | similarItem = matchItems[n][0] 56 | if self.rg.containsUserItem(u, similarItem): 57 | similarity = matchItems[n][1] 58 | rating = self.rg.trainSet_u[u][similarItem] 59 | sum += similarity * (rating - self.rg.itemMeans[similarItem]) 60 | denom += similarity 61 | if sum == 0: 62 | if not self.rg.containsItem(i): 63 | return self.rg.globalMean 64 | return self.rg.itemMeans[i] 65 | pred = self.rg.itemMeans[i] + sum / float(denom) 66 | # print('finished user:'+str(u)+" item:"+str(i)) 67 | return pred 68 | pass 69 | 70 | 71 | if __name__ == '__main__': 72 | ic = ItemCF() 73 | ic.init_model(0) 74 | print(ic.predict_model()) 75 | print(ic.predict_model_cold_users()) 76 | ic.init_model(1) 77 | print(ic.predict_model()) 78 | print(ic.predict_model_cold_users()) 79 | -------------------------------------------------------------------------------- /model/mf.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import sys 3 | sys.path.append("..") 4 | 5 | import numpy as np 6 | import matplotlib.pylab as plt 7 | 8 | from prettyprinter import cpprint 9 | from metrics.metric import Metric 10 | from utility.tools import denormalize,sigmoid 11 | from reader.rating import RatingGetter 12 | from configx.configx import ConfigX 13 | 14 | 15 | class MF(object): 16 | """ 17 | docstring for MF 18 | the base class for matrix factorization based model-parent class 19 | 20 | """ 21 | 22 | def __init__(self): 23 | super(MF, self).__init__() 24 | self.config = ConfigX() 25 | cpprint(self.config.__dict__) #print the configuration 26 | 27 | # self.rg = RatingGetter() # loading raing data 28 | # self.init_model() 29 | self.iter_rmse = [] 30 | self.iter_mae = [] 31 | pass 32 | 33 | def init_model(self,k): 34 | self.read_data(k) 35 | self.P = np.random.rand(self.rg.get_train_size()[0], self.config.factor) / ( 36 | self.config.factor ** 0.5) # latent user matrix 37 | self.Q = np.random.rand(self.rg.get_train_size()[1], self.config.factor) / ( 38 | self.config.factor ** 0.5) # latent item matrix 39 | self.loss, self.lastLoss = 0.0, 0.0 40 | self.lastRmse, self.lastMae = 10.0,10.0 41 | pass 42 | 43 | def read_data(self,k): 44 | self.rg = RatingGetter(k) 45 | pass 46 | 47 | def train_model(self,k): 48 | self.init_model(k) 49 | pass 50 | 51 | # test all users in test set 52 | def predict_model(self): 53 | res = [] 54 | for ind, entry in enumerate(self.rg.testSet()): 55 | user, item, rating = entry 56 | rating_length = len(self.rg.trainSet_u[user]) # remove cold start users for test 57 | if rating_length <= self.config.coldUserRating: 58 | continue 59 | 60 | prediction = self.predict(user, item) 61 | # denormalize 62 | prediction = denormalize(prediction, self.config.min_val, self.config.max_val) 63 | 64 | pred = self.checkRatingBoundary(prediction) 65 | # add prediction in order to measure 66 | res.append([user, item, rating, pred]) 67 | rmse = Metric.RMSE(res) 68 | mae = Metric.MAE(res) 69 | self.iter_rmse.append(rmse) # for plot 70 | self.iter_mae.append(mae) 71 | return rmse, mae 72 | 73 | # test cold start users among test set 74 | def predict_model_cold_users(self): 75 | res = [] 76 | for user in self.rg.testColdUserSet_u.keys(): 77 | for item in self.rg.testColdUserSet_u[user].keys(): 78 | rating = self.rg.testColdUserSet_u[user][item] 79 | pred = self.predict(user, item) 80 | # pred = sigmoid(pred) 81 | # denormalize 82 | pred = denormalize(pred, self.config.min_val, self.config.max_val) 83 | pred = self.checkRatingBoundary(pred) 84 | res.append([user, item, rating, pred]) 85 | rmse = Metric.RMSE(res) 86 | mae = Metric.MAE(res) 87 | return rmse,mae 88 | 89 | def predict(self, u, i): 90 | if self.rg.containsUser(u) and self.rg.containsItem(i): 91 | return self.P[self.rg.user[u]].dot(self.Q[self.rg.item[i]]) 92 | elif self.rg.containsUser(u) and not self.rg.containsItem(i): 93 | return self.rg.userMeans[u] 94 | elif not self.rg.containsUser(u) and self.rg.containsItem(i): 95 | return self.rg.itemMeans[i] 96 | else: 97 | return self.rg.globalMean 98 | 99 | def checkRatingBoundary(self, prediction): 100 | prediction =round( min( max( prediction , self.config.min_val ) , self.config.max_val ) ,3) 101 | return prediction 102 | 103 | def isConverged(self, iter): 104 | from math import isnan 105 | if isnan(self.loss): 106 | print( 107 | 'Loss = NaN or Infinity: current settings does not fit the recommender! Change the settings and try again!') 108 | exit(-1) 109 | 110 | deltaLoss = (self.lastLoss - self.loss) 111 | rmse, mae = self.predict_model() 112 | 113 | # early stopping 114 | if self.config.isEarlyStopping == True: 115 | cond = self.lastRmse < rmse 116 | if cond: 117 | print('test rmse increase, so early stopping') 118 | return cond 119 | self.lastRmse = rmse 120 | self.lastMae = mae 121 | 122 | print('%s iteration %d: loss = %.4f, delta_loss = %.5f learning_Rate = %.5f rmse=%.5f mae=%.5f' % \ 123 | (self.__class__, iter, self.loss, deltaLoss, self.config.lr, rmse, mae)) 124 | 125 | # check if converged 126 | cond = abs(deltaLoss) < self.config.threshold 127 | converged = cond 128 | # if not converged: 129 | # self.updateLearningRate(iter) 130 | self.lastLoss = self.loss 131 | # shuffle(self.dao.trainingData) 132 | return converged 133 | 134 | def updateLearningRate(self, iter): 135 | if iter > 1: 136 | if abs(self.lastLoss) > abs(self.loss): 137 | self.config.lr *= 1.05 138 | else: 139 | self.config.lr *= 0.5 140 | if self.config.lr > 1: 141 | self.config.lr = 1 142 | 143 | def show_rmse(self): 144 | ''' 145 | show figure for rmse and epoch 146 | ''' 147 | nums = range(len(self.iter_rmse)) 148 | plt.plot(nums, self.iter_rmse, label='RMSE') 149 | plt.plot(nums, self.iter_mae, label='MAE') 150 | plt.xlabel('# of epoch') 151 | plt.ylabel('metric') 152 | plt.title(self.__class__) 153 | plt.legend() 154 | plt.show() 155 | pass 156 | def show_loss(self,loss_all,faloss_all): 157 | ''' 158 | show figure for rmse and epoch 159 | ''' 160 | nums = range(len(loss_all)) 161 | plt.plot(nums, loss_all, label='front') 162 | plt.plot(nums, faloss_all, label='rear') 163 | plt.xlabel('# of epoch') 164 | plt.ylabel('loss') 165 | plt.title('loss experiment') 166 | plt.legend() 167 | plt.show() 168 | pass 169 | -------------------------------------------------------------------------------- /model/pmf.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | from prettyprinter import cpprint, set_default_style 7 | # set_default_style('light') 8 | import numpy as np 9 | from mf import MF 10 | from utility import tools 11 | 12 | 13 | class FunkSVDwithR(MF): 14 | """ 15 | docstring for FunkSVDwithR 16 | implement the FunkSVD with regularization 17 | http://sifter.org/~simon/journal/20061211.html 18 | """ 19 | 20 | def __init__(self): # 21 | super(FunkSVDwithR, self).__init__() 22 | self.config.lambdaP = 0.001 # 23 | self.config.lambdaQ = 0.001 24 | self.config.gamma = 0.9 # Momentum 25 | self.config.isEarlyStopping = True 26 | # self.init_model() 27 | 28 | # def init_model(self): 29 | # super(FunkSVDwithR, self).init_model() 30 | 31 | def train_model(self, k): 32 | super(FunkSVDwithR, self).train_model(k) 33 | iteration = 0 34 | p_delta, q_delta = dict(), dict() 35 | while iteration < self.config.maxIter: 36 | self.loss = 0 37 | for index, line in enumerate(self.rg.trainSet()): 38 | user, item, rating = line 39 | u = self.rg.user[user] 40 | i = self.rg.item[item] 41 | pred = self.predict(user, item) 42 | # pred = tools.sigmoid(pred) 43 | error = rating - pred # self.predict(user,item) 44 | self.loss += error ** 2 45 | p, q = self.P[u], self.Q[i] 46 | # update latent vectors 47 | 48 | if not u in p_delta: 49 | p_delta[u] = np.zeros(self.config.factor) 50 | if not i in q_delta: 51 | q_delta[i] = np.zeros(self.config.factor) 52 | 53 | p_delta[u] = self.config.lr * (-error * q + self.config.lambdaP * p) + self.config.gamma * p_delta[u] 54 | q_delta[i] = self.config.lr * (-error * p + self.config.lambdaQ * q) + self.config.gamma * q_delta[i] 55 | self.P[u] -= p_delta[u] 56 | self.Q[i] -= q_delta[i] 57 | 58 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() 59 | 60 | iteration += 1 61 | if self.isConverged(iteration): 62 | iteration = self.config.maxIter 63 | break 64 | 65 | 66 | if __name__ == '__main__': 67 | # print(bmf.predict_model_cold_users()) 68 | # coldrmse = bmf.predict_model_cold_users() 69 | # print('cold start user rmse is :' + str(coldrmse)) 70 | # bmf.show_rmse() 71 | 72 | rmses = [] 73 | maes = [] 74 | bmf = FunkSVDwithR() 75 | # print(bmf.rg.trainSet_u[1]) 76 | for i in range(bmf.config.k_fold_num): 77 | bmf.train_model(i) 78 | rmse, mae = bmf.predict_model() 79 | print("current best rmse is %0.5f, mae is %0.5f" % (rmse, mae)) 80 | rmses.append(rmse) 81 | maes.append(mae) 82 | rmse_avg = sum(rmses) / 5 83 | mae_avg = sum(maes) / 5 84 | print("the rmses are %s" % rmses) 85 | print("the maes are %s" % maes) 86 | print("the average of rmses is %s " % rmse_avg) 87 | print("the average of maes is %s " % mae_avg) 88 | -------------------------------------------------------------------------------- /model/social_cune.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from collections import defaultdict 7 | from random import randint 8 | from random import shuffle,choice 9 | from math import log 10 | import gensim.models.word2vec as w2v 11 | from prettyprinter import cpprint 12 | 13 | from mf import MF 14 | from reader.trust import TrustGetter 15 | from utility.matrix import SimMatrix 16 | from utility.similarity import cosine 17 | from utility import util 18 | 19 | 20 | class CUNE(MF): 21 | """ 22 | docstring for CUNE 23 | 24 | Zhang et al. Collaborative User Network Embedding for Social Recommender Systems. SDM 25 | """ 26 | 27 | def __init__(self): 28 | super(CUNE, self).__init__() 29 | self.config.lambdaP = 0.01 30 | self.config.lambdaQ = 0.01 31 | self.config.alpha = 0.01 32 | self.config.isEarlyStopping = True 33 | self.tg = TrustGetter() 34 | self.config.walkCount = 30 35 | self.config.walkLength = 20 36 | self.config.walkDim = 20 37 | self.config.winSize = 5 38 | self.config.topK = 50 39 | 40 | def init_model(self, k): 41 | super(CUNE, self).init_model(k) 42 | self.user_sim = SimMatrix() 43 | self.generate_cu_net() 44 | self.deep_walk() 45 | self.compute_social_sim() 46 | 47 | def generate_cu_net(self): 48 | print('Building collaborative user network...') 49 | itemNet = {} 50 | for item in self.rg.trainSet_i: 51 | if len(self.rg.trainSet_i[item])>1: 52 | itemNet[item] = self.rg.trainSet_i[item] 53 | 54 | filteredRatings = defaultdict(list) 55 | for item in itemNet: 56 | for user in itemNet[item]: 57 | if itemNet[item][user] > 0: 58 | filteredRatings[user].append(item) 59 | 60 | self.CUNet = defaultdict(list) 61 | 62 | for user1 in filteredRatings: 63 | s1 = set(filteredRatings[user1]) 64 | for user2 in filteredRatings: 65 | if user1 != user2: 66 | s2 = set(filteredRatings[user2]) 67 | weight = len(s1.intersection(s2)) 68 | if weight > 0: 69 | self.CUNet[user1]+=[user2] # * weight 70 | 71 | # cpprint(self.CUNet) 72 | pass 73 | def deep_walk(self): 74 | print('Generating random deep walks...') 75 | self.walks = [] 76 | self.visited = defaultdict(dict) 77 | for user in self.CUNet: 78 | for t in range(self.config.walkCount): 79 | path = [str(user)] 80 | lastNode = user 81 | for i in range(1,self.config.walkLength): 82 | nextNode = choice(self.CUNet[lastNode]) 83 | count=0 84 | while(nextNode in self.visited[lastNode]): 85 | nextNode = choice(self.CUNet[lastNode]) 86 | #break infinite loop 87 | count+=1 88 | if count==self.config.walkLength: # 10 89 | break 90 | path.append(str(nextNode)) 91 | self.visited[user][nextNode] = 1 92 | lastNode = nextNode 93 | self.walks.append(path) 94 | # shuffle(self.walks) 95 | # cpprint(self.walks) 96 | print('Generating user embedding...') 97 | self.model = w2v.Word2Vec(self.walks, size=self.config.walkDim, window=5, min_count=0, iter=3) 98 | print('User embedding generated.') 99 | pass 100 | 101 | def compute_social_sim(self): 102 | print('Constructing similarity matrix...') 103 | # self.W = np.zeros((self.rg.get_train_size()[0], self.config.walkDim)) 104 | self.topKSim = defaultdict(dict) 105 | i = 0 106 | for user1 in self.CUNet: 107 | sims = {} 108 | for user2 in self.CUNet: 109 | if user1 != user2: 110 | wu1 = self.model[str(user1)] 111 | wu2 = self.model[str(user2)] 112 | sims[user2]=cosine(wu1,wu2) #若为空咋整 113 | self.topKSim[user1] = sorted(sims.items(), key=lambda d: d[1], reverse=True)[:self.config.topK] 114 | i += 1 115 | if i % 200 == 0: 116 | print('progress:', i, '/', len(self.CUNet)) 117 | # print(self.topKSim) 118 | #构建被关注列表 119 | print('Constructing desimilarity matrix...') 120 | self.topKSimBy = defaultdict(dict) 121 | for user in self.topKSim: 122 | users=self.topKSim[user] 123 | for user2 in users: 124 | self.topKSimBy[user2[0]][user] = user2[1] 125 | print('Similarity matrix finished.') 126 | 127 | def train_model(self, k): 128 | super(CUNE, self).train_model(k) 129 | iteration = 0 130 | while iteration < self.config.maxIter: 131 | self.loss = 0 132 | for index, line in enumerate(self.rg.trainSet()): 133 | user, item, rating = line 134 | u = self.rg.user[user] 135 | i = self.rg.item[item] 136 | error = rating - self.predict(user, item) 137 | self.loss += 0.5 * error ** 2 138 | p, q = self.P[u], self.Q[i] 139 | 140 | social_term_p, social_term_loss = np.zeros((self.config.factor)), 0.0 141 | followees = self.topKSim[user] #self.tg.get_followees(user) #self.topKSim[user] 142 | # print(followees) 143 | for followee in followees: 144 | if self.rg.containsUser(followee[0]): 145 | # s = self.user_sim[user][followee] 146 | uf = self.P[self.rg.user[followee[0]]] 147 | social_term_p += followee[1]* (p - uf) 148 | social_term_loss += followee[1]* ((p - uf).dot(p - uf)) 149 | 150 | social_term_m = np.zeros((self.config.factor)) 151 | followers = self.topKSimBy[user] 152 | followers = sorted(followers.items(), key=lambda d: d[1], reverse=True)[:self.config.topK] 153 | for follower in followers: 154 | if self.rg.containsUser(follower[0]): 155 | ug = self.P[self.rg.user[follower[0]]] 156 | social_term_m += follower[1]*(p - ug) 157 | 158 | 159 | # update latent vectors 160 | self.P[u] += self.config.lr * ( 161 | error * q - self.config.alpha * (social_term_p + social_term_m) - self.config.lambdaP * p) 162 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) 163 | 164 | self.loss += 0.5 * self.config.alpha * social_term_loss 165 | 166 | self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum() + 0.5 * self.config.lambdaQ * ( 167 | self.Q * self.Q).sum() 168 | 169 | iteration += 1 170 | if self.isConverged(iteration): 171 | break 172 | 173 | 174 | if __name__ == '__main__': 175 | # srg = CUNE() 176 | # srg.train_model(0) 177 | # coldrmse = srg.predict_model_cold_users() 178 | # print('cold start user rmse is :' + str(coldrmse)) 179 | # srg.show_rmse() 180 | 181 | rmses = [] 182 | maes = [] 183 | cunemf = CUNE() 184 | # cunemf.init_model(0) 185 | # cunemf.generate_cu_net() 186 | # cunemf.deep_walk() 187 | # print(bmf.rg.trainSet_u[1]) 188 | cunemf.config.k_fold_num = 5 189 | for i in range(cunemf.config.k_fold_num): 190 | print('the %dth cross validation training' % i) 191 | cunemf.train_model(i) 192 | rmse, mae = cunemf.predict_model() 193 | rmses.append(rmse) 194 | maes.append(mae) 195 | rmse_avg = sum(rmses) / cunemf.config.k_fold_num 196 | mae_avg = sum(maes) / cunemf.config.k_fold_num 197 | print("the rmses are %s" % rmses) 198 | print("the maes are %s" % maes) 199 | print("the average of rmses is %s " % rmse_avg) 200 | print("the average of maes is %s " % mae_avg) 201 | -------------------------------------------------------------------------------- /model/social_mf.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from mf import MF 7 | from reader.trust import TrustGetter 8 | 9 | 10 | class SocialMF(MF): 11 | """ 12 | docstring for SocialMF 13 | 14 | Jamali M, Ester M. A matrix factorization technique with trust propagation for recommendation in social networks[C]//Proceedings of the fourth ACM conference on Recommender systems. ACM, 2010: 135-142. 15 | """ 16 | 17 | def __init__(self): 18 | super(SocialMF, self).__init__() 19 | # self.config.lr=0.0001 20 | self.config.alpha = 1 # 0.8 rmse=0.87605 21 | self.tg = TrustGetter() # loading trust data 22 | # self.init_model() 23 | 24 | def train_model(self, k): 25 | super(SocialMF, self).train_model(k) 26 | iteration = 0 27 | while iteration < self.config.maxIter: 28 | self.loss = 0 29 | for index, line in enumerate(self.rg.trainSet()): 30 | user, item, rating = line 31 | u = self.rg.user[user] 32 | i = self.rg.item[item] 33 | error = rating - self.predict(user, item) 34 | self.loss += error ** 2 35 | p, q = self.P[u], self.Q[i] 36 | 37 | total_weight = 0.0 38 | social_term = np.zeros(self.config.factor) 39 | followees = self.tg.get_followees(user) # get user u's focus lsit 40 | for followee in followees: 41 | weight = followees[followee] 42 | if self.rg.containsUser(followee): 43 | uk = self.P[self.rg.user[followee]] 44 | social_term += weight * uk 45 | total_weight += weight 46 | 47 | if total_weight != 0: 48 | social_term = p - social_term / total_weight 49 | 50 | social_term_a = np.zeros(self.config.factor) 51 | total_count = 0 52 | followers = self.tg.get_followers(user) 53 | for follower in followers: 54 | if self.rg.containsUser(follower): 55 | total_count += 1 56 | uv = self.P[self.rg.user[follower]] 57 | social_term_m = np.zeros(self.config.factor) 58 | total_weight = 0.0 59 | followees = self.tg.get_followees(follower) 60 | for followee in followees: 61 | weight = followees[followee] 62 | if self.rg.containsUser(followee): 63 | uw = self.P[self.rg.user[followee]] 64 | social_term_m += weight * uw 65 | total_weight += weight 66 | if total_weight != 0: 67 | social_term_a += uv - social_term_m / total_weight 68 | if total_count != 0: 69 | social_term_a /= total_count 70 | 71 | # update latent vectors 72 | self.P[u] += self.config.lr * ( 73 | error * q - self.config.alpha * social_term + self.config.alpha * social_term_a - self.config.lambdaP * p) # 74 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) 75 | 76 | self.loss += self.config.alpha * social_term.dot(social_term).sum() 77 | 78 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() 79 | 80 | iteration += 1 81 | if self.isConverged(iteration): 82 | break 83 | 84 | 85 | if __name__ == '__main__': 86 | rmses = [] 87 | maes = [] 88 | tcsr = SocialMF() 89 | # print(bmf.rg.trainSet_u[1]) 90 | for i in range(tcsr.config.k_fold_num): 91 | print('the %dth cross validation training' % i) 92 | tcsr.train_model(i) 93 | rmse, mae = tcsr.predict_model() 94 | rmses.append(rmse) 95 | maes.append(mae) 96 | rmse_avg = sum(rmses) / 5 97 | mae_avg = sum(maes) / 5 98 | print("the rmses are %s" % rmses) 99 | print("the maes are %s" % maes) 100 | print("the average of rmses is %s " % rmse_avg) 101 | print("the average of maes is %s " % mae_avg) 102 | -------------------------------------------------------------------------------- /model/social_rec.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from mf import MF 7 | from reader.trust import TrustGetter 8 | 9 | 10 | class SocialRec(MF): 11 | """ 12 | docstring for SocialRec 13 | 14 | Ma H, Yang H, Lyu M R, et al. Sorec: social recommendation using probabilistic matrix factorization[C]//Proceedings of the 17th ACM conference on Information and knowledge management. ACM, 2008: 931-940. 15 | 16 | """ 17 | 18 | def __init__(self): 19 | super(SocialRec, self).__init__() 20 | # self.config.lr=0.0001 21 | self.config.alpha = 0.1 22 | self.config.lambdaZ = 0.01 23 | self.tg = TrustGetter() 24 | # self.init_model() 25 | 26 | def init_model(self, k): 27 | super(SocialRec, self).init_model(k) 28 | self.Z = np.random.rand(self.rg.get_train_size()[0], self.config.factor) / ( 29 | self.config.factor ** 0.5) # latent user social matrix 30 | 31 | def train_model(self, k): 32 | super(SocialRec, self).train_model(k) 33 | iteration = 0 34 | while iteration < self.config.maxIter: 35 | # tempP=np.zeros((self.rg.get_train_size()[0], self.config.factor)) 36 | self.loss = 0 37 | for index, line in enumerate(self.rg.trainSet()): 38 | user, item, rating = line 39 | u = self.rg.user[user] 40 | i = self.rg.item[item] 41 | error = rating - self.predict(user, item) 42 | self.loss += error ** 2 43 | p, q = self.P[u], self.Q[i] 44 | 45 | followees = self.tg.get_followees(user) 46 | zs = np.zeros(self.config.factor) 47 | for followee in followees: 48 | if self.rg.containsUser(user) and self.rg.containsUser(followee): 49 | vminus = len(self.tg.get_followers(followee)) # ~ d - (k) 50 | uplus = len(self.tg.get_followees(user)) # ~ d + (i) 51 | import math 52 | try: 53 | weight = math.sqrt(vminus / (uplus + vminus + 0.0)) 54 | except ZeroDivisionError: 55 | weight = 1 56 | zid = self.rg.user[followee] 57 | z = self.Z[zid] 58 | err = weight - z.dot(p) 59 | self.loss += err ** 2 60 | zs += -1.0 * err * p 61 | self.Z[zid] += self.config.lr * (self.config.alpha * err * p - self.config.lambdaZ * z) 62 | 63 | self.P[u] += self.config.lr * (error * q - self.config.alpha * zs - self.config.lambdaP * p) 64 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) 65 | 66 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 67 | + self.config.lambdaZ * (self.Z * self.Z).sum() 68 | 69 | iteration += 1 70 | if self.isConverged(iteration): 71 | break 72 | 73 | 74 | if __name__ == '__main__': 75 | rmses = [] 76 | maes = [] 77 | tcsr = SocialRec() 78 | # print(bmf.rg.trainSet_u[1]) 79 | for i in range(tcsr.config.k_fold_num): 80 | print('the %dth cross validation training' % i) 81 | tcsr.train_model(i) 82 | rmse, mae = tcsr.predict_model() 83 | rmses.append(rmse) 84 | maes.append(mae) 85 | rmse_avg = sum(rmses) / 5 86 | mae_avg = sum(maes) / 5 87 | print("the rmses are %s" % rmses) 88 | print("the maes are %s" % maes) 89 | print("the average of rmses is %s " % rmse_avg) 90 | print("the average of maes is %s " % mae_avg) 91 | -------------------------------------------------------------------------------- /model/social_reg.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from mf import MF 7 | from reader.trust import TrustGetter 8 | from utility.matrix import SimMatrix 9 | from utility.similarity import pearson_sp, cosine_sp 10 | from utility import util 11 | 12 | 13 | class SocialReg(MF): 14 | """ 15 | docstring for SocialReg 16 | 17 | Ma H, Zhou D, Liu C, et al. Recommender systems with social regularization[C]//Proceedings of the fourth ACM international conference on Web search and data mining. ACM, 2011: 287-296. 18 | """ 19 | 20 | def __init__(self): 21 | super(SocialReg, self).__init__() 22 | # self.config.lambdaP = 0.001 23 | # self.config.lambdaQ = 0.001 24 | self.config.alpha = 0.1 25 | self.tg = TrustGetter() 26 | # self.init_model() 27 | 28 | def init_model(self, k): 29 | super(SocialReg, self).init_model(k) 30 | from collections import defaultdict 31 | self.user_sim = SimMatrix() 32 | print('constructing user-user similarity matrix...') 33 | 34 | # self.user_sim = util.load_data('../data/sim/ft_cf_soreg08_cv1.pkl') 35 | 36 | for u in self.rg.user: 37 | for f in self.tg.get_followees(u): 38 | if self.user_sim.contains(u, f): 39 | continue 40 | sim = self.get_sim(u, f) 41 | self.user_sim.set(u, f, sim) 42 | 43 | # util.save_data(self.user_sim,'../data/sim/ft_cf_soreg08.pkl') 44 | 45 | def get_sim(self, u, k): 46 | sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) + 1.0) / 2.0 # fit the value into range [0.0,1.0] 47 | return sim 48 | 49 | def train_model(self, k): 50 | super(SocialReg, self).train_model(k) 51 | iteration = 0 52 | while iteration < self.config.maxIter: 53 | self.loss = 0 54 | for index, line in enumerate(self.rg.trainSet()): 55 | user, item, rating = line 56 | u = self.rg.user[user] 57 | i = self.rg.item[item] 58 | error = rating - self.predict(user, item) 59 | self.loss += 0.5 * error ** 2 60 | p, q = self.P[u], self.Q[i] 61 | 62 | social_term_p, social_term_loss = np.zeros((self.config.factor)), 0.0 63 | followees = self.tg.get_followees(user) 64 | for followee in followees: 65 | if self.rg.containsUser(followee): 66 | s = self.user_sim[user][followee] 67 | uf = self.P[self.rg.user[followee]] 68 | social_term_p += s * (p - uf) 69 | social_term_loss += s * ((p - uf).dot(p - uf)) 70 | 71 | social_term_m = np.zeros((self.config.factor)) 72 | followers = self.tg.get_followers(user) 73 | for follower in followers: 74 | if self.rg.containsUser(follower): 75 | s = self.user_sim[user][follower] 76 | ug = self.P[self.rg.user[follower]] 77 | social_term_m += s * (p - ug) 78 | 79 | # update latent vectors 80 | self.P[u] += self.config.lr * ( 81 | error * q - self.config.alpha * (social_term_p + social_term_m) - self.config.lambdaP * p) 82 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) 83 | 84 | self.loss += 0.5 * self.config.alpha * social_term_loss 85 | 86 | self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum() + 0.5 * self.config.lambdaQ * ( 87 | self.Q * self.Q).sum() 88 | 89 | iteration += 1 90 | if self.isConverged(iteration): 91 | break 92 | 93 | 94 | if __name__ == '__main__': 95 | # srg = SocialReg() 96 | # srg.train_model(0) 97 | # coldrmse = srg.predict_model_cold_users() 98 | # print('cold start user rmse is :' + str(coldrmse)) 99 | # srg.show_rmse() 100 | 101 | rmses = [] 102 | maes = [] 103 | tcsr = SocialReg() 104 | # print(bmf.rg.trainSet_u[1]) 105 | for i in range(tcsr.config.k_fold_num): 106 | print('the %dth cross validation training' % i) 107 | tcsr.train_model(i) 108 | rmse, mae = tcsr.predict_model() 109 | rmses.append(rmse) 110 | maes.append(mae) 111 | rmse_avg = sum(rmses) / 5 112 | mae_avg = sum(maes) / 5 113 | print("the rmses are %s" % rmses) 114 | print("the maes are %s" % maes) 115 | print("the average of rmses is %s " % rmse_avg) 116 | print("the average of maes is %s " % mae_avg) 117 | -------------------------------------------------------------------------------- /model/social_rste.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") # add current path into environment variable 5 | import numpy as np 6 | from mf import MF 7 | from reader.trust import TrustGetter 8 | 9 | 10 | # from utility.similarity import pearson_sp 11 | 12 | 13 | class RSTE(MF): 14 | """ 15 | docstring for RSTE 16 | 17 | Ma H, King I, Lyu M R. Learning to recommend with social trust ensemble[C]//Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval. ACM, 2009: 203-210. 18 | 19 | """ 20 | 21 | def __init__(self): 22 | super(RSTE, self).__init__() 23 | # self.maxIter=700 24 | self.config.alpha = 0.5 25 | # self.config.lambdaH=0.01 26 | self.tg = TrustGetter() 27 | # self.init_model() 28 | 29 | def init_model(self, k): 30 | super(RSTE, self).init_model(k) 31 | 32 | # from collections import defaultdict 33 | # self.Sim = defaultdict(dict) 34 | # print('constructing similarity matrix...') 35 | # for user in self.rg.user: 36 | # for k in self.tg.get_followees(user): 37 | # if user in self.Sim and k in self.Sim[user]: 38 | # pass 39 | # else: 40 | # self.Sim[user][k]=self.get_sim(user,k) 41 | 42 | def train_model(self, k): 43 | super(RSTE, self).train_model(k) 44 | iteration = 0 45 | while iteration < self.config.maxIter: 46 | self.loss = 0 47 | for index, line in enumerate(self.rg.trainSet()): 48 | user, item, rating = line 49 | 50 | error = rating - self.predict(user, item) 51 | self.loss += error ** 2 52 | social_term, _ = self.get_social_term_Q(user, item) 53 | 54 | u = self.rg.user[user] 55 | i = self.rg.item[item] 56 | p, q = self.P[u], self.Q[i] 57 | 58 | # update latent vectors 59 | 60 | self.P[u] += self.config.lr * (self.config.alpha * error * q + \ 61 | (1 - self.config.alpha) * self.get_social_term_P(user, 62 | item) - self.config.lambdaP * p) 63 | 64 | self.Q[i] += self.config.lr * (error * (self.config.alpha * p + (1 - self.config.alpha) * social_term) \ 65 | - self.config.lambdaQ * q) 66 | 67 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() 68 | 69 | iteration += 1 70 | if self.isConverged(iteration): 71 | break 72 | 73 | def get_social_term_Q(self, user, item): 74 | if self.rg.containsUser(user) and self.rg.containsItem(item): 75 | i = self.rg.item[item] 76 | u = self.rg.user[user] 77 | social_term_loss = 0 78 | social_term = np.zeros(self.config.factor) 79 | followees = self.tg.get_followees(user) 80 | weights = [] 81 | indexes = [] 82 | for followee in followees: 83 | if self.rg.containsUser(followee): # followee is in rating set 84 | indexes.append(self.rg.user[followee]) 85 | weights.append(followees[followee]) 86 | weights = np.array(weights) 87 | qw = weights.sum() 88 | indexes = np.array(indexes) 89 | if qw != 0: 90 | social_term = weights.dot(self.P[indexes]) 91 | social_term /= qw 92 | social_term_loss += weights.dot((self.P[indexes].dot(self.Q[i]))) / qw 93 | return social_term, social_term_loss 94 | 95 | def get_social_term_P(self, user, item): 96 | i = self.rg.item[item] 97 | # social_term_loss = 0 98 | social_term = np.zeros(self.config.factor) 99 | 100 | followers = self.tg.get_followers(user) 101 | weights = [] 102 | indexes = [] 103 | errs = [] 104 | for follower in followers: 105 | if self.rg.containsUser(follower) and self.rg.containsItem(item) and self.rg.containsUserItem(follower, 106 | item): # followee is in rating set 107 | indexes.append(self.rg.user[follower]) 108 | weights.append(followers[follower]) 109 | errs.append(self.rg.trainSet_u[follower][item] - self.predict(follower, item)) 110 | weights = np.array(weights) 111 | indexes = np.array(indexes) 112 | errs = np.array(errs) 113 | qw = weights.sum() 114 | if qw != 0: 115 | for es in errs * weights: 116 | social_term += es * self.Q[i] 117 | social_term /= qw 118 | # social_term_loss += weights.dot((self.P[indexes].dot(self.Q[i]))) 119 | return social_term 120 | 121 | def predict(self, u, i): 122 | if self.rg.containsUser(u) and self.rg.containsItem(i): 123 | _, social_term_loss = self.get_social_term_Q(u, i) 124 | i = self.rg.item[i] 125 | u = self.rg.user[u] 126 | 127 | if social_term_loss != 0: 128 | return self.config.alpha * self.P[u].dot(self.Q[i]) + (1 - self.config.alpha) * social_term_loss 129 | else: 130 | return self.P[u].dot(self.Q[i]) 131 | else: 132 | return self.rg.globalMean 133 | 134 | # def get_sim(self,u,k): 135 | # return (pearson_sp(self.rg.get_row(u), self.rg.get_row(k))+1.0)/2.0 136 | 137 | 138 | if __name__ == '__main__': 139 | rmses = [] 140 | maes = [] 141 | tcsr = RSTE() 142 | # print(bmf.rg.trainSet_u[1]) 143 | for i in range(tcsr.config.k_fold_num): 144 | print('the %dth cross validation training' % i) 145 | tcsr.train_model(i) 146 | rmse, mae = tcsr.predict_model() 147 | rmses.append(rmse) 148 | maes.append(mae) 149 | rmse_avg = sum(rmses) / 5 150 | mae_avg = sum(maes) / 5 151 | print("the rmses are %s" % rmses) 152 | print("the maes are %s" % maes) 153 | print("the average of rmses is %s " % rmse_avg) 154 | print("the average of maes is %s " % mae_avg) 155 | -------------------------------------------------------------------------------- /model/svd++.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | from prettyprinter import cpprint 6 | import numpy as np 7 | from mf import MF 8 | 9 | 10 | class SVDPP(MF): 11 | """ 12 | docstring for SVDPP 13 | implement the SVDPP 14 | 15 | Koren Y. Factor in the neighbors: Scalable and accurate collaborative filtering[J]. ACM Transactions on Knowledge Discovery from Data (TKDD), 2010, 4(1): 1. 16 | """ 17 | 18 | def __init__(self): 19 | super(SVDPP, self).__init__() 20 | self.config.lambdaP = 0.001 21 | self.config.lambdaQ = 0.001 22 | 23 | self.config.lambdaY = 0.001 24 | self.config.lambdaB = 0.001 25 | # self.init_model() 26 | 27 | def init_model(self, k): 28 | super(SVDPP, self).init_model(k) 29 | self.Bu = np.random.rand(self.rg.get_train_size()[0]) / (self.config.factor ** 0.5) # bias value of user 30 | self.Bi = np.random.rand(self.rg.get_train_size()[1]) / (self.config.factor ** 0.5) # bias value of item 31 | self.Y = np.random.rand(self.rg.get_train_size()[1], self.config.factor) / ( 32 | self.config.factor ** 0.5) # implicit preference 33 | self.SY = dict() 34 | 35 | def train_model(self, k): 36 | super(SVDPP, self).train_model(k) 37 | iteration = 0 38 | while iteration < self.config.maxIter: 39 | self.loss = 0 40 | for index, line in enumerate(self.rg.trainSet()): 41 | user, item, rating = line 42 | u = self.rg.user[user] 43 | i = self.rg.item[item] 44 | error = rating - self.predict(user, item) 45 | self.loss += error ** 2 46 | 47 | p, q = self.P[u], self.Q[i] 48 | nu, sum_y = self.get_sum_y(user) 49 | 50 | # update latent vectors 51 | self.P[u] += self.config.lr * (error * q - self.config.lambdaP * p) 52 | self.Q[i] += self.config.lr * (error * (p + sum_y) - self.config.lambdaQ * q) 53 | 54 | self.Bu[u] += self.config.lr * (error - self.config.lambdaB * self.Bu[u]) 55 | self.Bi[i] += self.config.lr * (error - self.config.lambdaB * self.Bi[i]) 56 | 57 | u_items = self.rg.user_rated_items(u) 58 | for j in u_items: 59 | idj = self.rg.item[j] 60 | self.Y[idj] += self.config.lr * (error / np.sqrt(nu) * q - self.config.lambdaY * self.Y[idj]) 61 | 62 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 63 | + self.config.lambdaB * ( 64 | (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaY * ( 65 | self.Y * self.Y).sum() 66 | iteration += 1 67 | if self.isConverged(iteration): 68 | break 69 | 70 | def predict(self, u, i): 71 | if self.rg.containsUser(u) and self.rg.containsItem(i): 72 | _, sum_y = self.get_sum_y(u) 73 | u = self.rg.user[u] 74 | i = self.rg.item[i] 75 | return self.Q[i].dot(self.P[u] + sum_y) + self.rg.globalMean + self.Bi[i] + self.Bu[u] 76 | else: 77 | return self.rg.globalMean 78 | 79 | def get_sum_y(self, u): 80 | if u in self.SY: 81 | return self.SY[u] 82 | u_items = self.rg.user_rated_items(u) 83 | nu = len(u_items) 84 | sum_y = np.zeros(self.config.factor) 85 | for j in u_items: 86 | sum_y += self.Y[self.rg.item[j]] 87 | sum_y /= (np.sqrt(nu)) 88 | self.SY[u] = [nu, sum_y] 89 | return nu, sum_y 90 | 91 | 92 | if __name__ == '__main__': 93 | rmses = [] 94 | maes = [] 95 | tcsr = SVDPP() 96 | # print(bmf.rg.trainSet_u[1]) 97 | for i in range(tcsr.config.k_fold_num): 98 | print('the %dth cross validation training' % i) 99 | tcsr.train_model(i) 100 | rmse, mae = tcsr.predict_model() 101 | rmses.append(rmse) 102 | maes.append(mae) 103 | rmse_avg = sum(rmses) / 5 104 | mae_avg = sum(maes) / 5 105 | print("the rmses are %s" % rmses) 106 | print("the maes are %s" % maes) 107 | print("the average of rmses is %s " % rmse_avg) 108 | print("the average of maes is %s " % mae_avg) 109 | -------------------------------------------------------------------------------- /model/tri_cf.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | import os 4 | sys.path.append("..") 5 | import numpy as np 6 | from mf import MF 7 | from prettyprinter import cpprint 8 | from collections import defaultdict 9 | from prettyprinter import cpprint 10 | from utility.matrix import SimMatrix 11 | from utility.similarity import pearson_sp 12 | from utility import util 13 | 14 | 15 | class TriCFBias(MF): 16 | """ 17 | docstring for TriCFBias 18 | 19 | """ 20 | 21 | def __init__(self): 22 | super(TriCFBias, self).__init__() 23 | # self.config.lr=0.001 24 | self.config.lambdaU = 0.002 25 | self.config.lambdaI = 0.001 26 | 27 | self.config.lambdaP = 0.02 28 | self.config.lambdaQ = 0.03 29 | self.config.lambdaB = 0.01 30 | 31 | self.config.user_near_num = 50 32 | self.config.item_near_num = 50 33 | # self.init_model() 34 | 35 | def init_model(self, k): 36 | super(TriCFBias, self).init_model(k) 37 | self.Bu = np.random.rand(self.rg.get_train_size()[0]) # bias value of user 38 | self.Bi = np.random.rand(self.rg.get_train_size()[1]) # bais value of item 39 | self.build_user_item_sim_CF() 40 | 41 | # construct the u-u,i-i similarity matirx and their's k neighbors 42 | def build_user_item_sim_CF(self): 43 | from collections import defaultdict 44 | self.user_sim = SimMatrix() 45 | self.item_sim = SimMatrix() 46 | self.user_k_neibor = defaultdict(dict) 47 | self.item_k_neibor = defaultdict(dict) 48 | 49 | # compute item-item similarity matrix 50 | print('constructing user-user similarity matrix...') 51 | # self.user_sim = util.load_data('../data/sim/ft_08_uu_tricf.pkl') 52 | for u1 in self.rg.user: 53 | for u2 in self.rg.user: 54 | if u1 != u2: 55 | if self.user_sim.contains(u1, u2): 56 | continue 57 | sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) 58 | sim = round(sim, 5) 59 | self.user_sim.set(u1, u2, sim) 60 | if not os.path.exists('../data/sim'): 61 | os.makedirs('../data/sim') 62 | print('../data/sim folder has been established.') 63 | 64 | util.save_data(self.user_sim, '../data/sim/ft_08_uu_tricf_cv0.pkl') 65 | 66 | # compute the k neighbors of user 67 | # self.user_k_neibor = util.load_data( 68 | # '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf.pkl') 69 | for user in self.rg.user: 70 | matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[ 71 | :self.config.user_near_num] 72 | matchUsers = matchUsers[:self.config.user_near_num] 73 | self.user_k_neibor[user] = dict(matchUsers) 74 | 75 | if not os.path.exists('../data/neibor'): 76 | os.makedirs('../data/neibor') 77 | print('../data/neibor folder has been established.') 78 | 79 | util.save_data(self.user_k_neibor, 80 | '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf_cv0.pkl') 81 | 82 | # compute item-item similarity matrix 83 | print('constructing item-item similarity matrix...') 84 | # self.item_sim = util.load_data('../data/sim/ft_08_ii_tricf.pkl') 85 | for i1 in self.rg.item: 86 | for i2 in self.rg.item: 87 | if i1 != i2: 88 | if self.item_sim.contains(i1, i2): 89 | continue 90 | sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2)) 91 | sim = round(sim, 5) 92 | self.item_sim.set(i1, i2, sim) 93 | util.save_data(self.item_sim, '../data/sim/ft_08_ii_tricf_cv0.pkl') 94 | 95 | # compute the k neighbors of item 96 | # self.item_k_neibor = util.load_data( 97 | # '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf.pkl') 98 | for item in self.rg.item: 99 | matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[ 100 | :self.config.item_near_num] 101 | matchItems = matchItems[:self.config.item_near_num] 102 | self.item_k_neibor[item] = dict(matchItems) 103 | util.save_data(self.item_k_neibor, 104 | '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf_cv0.pkl') 105 | pass 106 | 107 | def train_model(self, k): 108 | super(TriCFBias, self).train_model(k) 109 | print('training model...') 110 | iteration = 0 111 | # faflag=True 112 | while iteration < self.config.maxIter: 113 | self.loss = 0 114 | self.u_near_total_dict = defaultdict() 115 | self.i_near_total_dict = defaultdict() 116 | for index, line in enumerate(self.rg.trainSet()): 117 | user, item, rating = line 118 | u = self.rg.user[user] 119 | i = self.rg.item[item] 120 | 121 | error = rating - self.predict(user, item) 122 | self.loss += error ** 2 123 | p, q = self.P[u], self.Q[i] 124 | 125 | # get the k neighbors of user and item 126 | matchUsers = self.user_k_neibor[user] 127 | matchItems = self.item_k_neibor[item] 128 | 129 | u_near_sum, u_near_total, s = np.zeros((self.config.factor)), 0.0, 0.0 130 | for suser in matchUsers.keys(): 131 | near_user, sim_value = suser, matchUsers[suser] 132 | if sim_value != 0.0: 133 | s += sim_value 134 | pn = self.P[self.rg.user[near_user]] 135 | u_near_sum += sim_value * (pn - p) 136 | u_near_total += sim_value * ((pn - p).dot(pn - p)) 137 | if s != 0.0: 138 | u_near_sum /= s 139 | 140 | i_near_sum, i_near_total, ss = np.zeros((self.config.factor)), 0.0, 0.0 141 | for sitem in matchItems: 142 | near_item, sim_value = sitem, matchItems[sitem] 143 | if sim_value != 0.0: 144 | ss += sim_value 145 | qn = self.Q[self.rg.item[near_item]] 146 | i_near_sum += sim_value * (qn - q) 147 | i_near_total += sim_value * ((qn - q).dot(qn - q)) 148 | if ss != 0.0: 149 | i_near_sum /= ss 150 | 151 | if u not in self.u_near_total_dict: 152 | self.u_near_total_dict[u] = u_near_total 153 | if i not in self.i_near_total_dict: 154 | self.i_near_total_dict[i] = i_near_total 155 | 156 | self.Bu[u] += self.config.lr * (error - self.config.lambdaB * self.Bu[u]) 157 | self.Bi[i] += self.config.lr * (error - self.config.lambdaB * self.Bi[i]) 158 | 159 | self.P[u] += self.config.lr * (error * q - self.config.lambdaU * u_near_sum - self.config.lambdaP * p) 160 | self.Q[i] += self.config.lr * (error * p - self.config.lambdaI * i_near_sum - self.config.lambdaQ * q) 161 | 162 | self.loss += 0.5 * (self.config.lambdaU * u_near_total + self.config.lambdaI * i_near_total) 163 | 164 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 165 | + self.config.lambdaB * ((self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) 166 | 167 | iteration += 1 168 | if self.isConverged(iteration): 169 | break 170 | 171 | # test cold start users among test set 172 | def predict_model_cold_users_improved(self): 173 | res = [] 174 | for user in self.rg.testColdUserSet_u.keys(): 175 | for item in self.rg.testColdUserSet_u[user].keys(): 176 | rating = self.rg.testColdUserSet_u[user][item] 177 | pred = self.predict_improved(user, item) 178 | # denormalize 179 | pred = denormalize(pred, self.config.min_val, self.config.max_val) 180 | pred = self.checkRatingBoundary(pred) 181 | res.append([user, item, rating, pred]) 182 | rmse = Metric.RMSE(res) 183 | return rmse 184 | 185 | 186 | if __name__ == '__main__': 187 | rmses = [] 188 | maes = [] 189 | tcsr = TriCFBias() 190 | # print(bmf.rg.trainSet_u[1]) 191 | for i in range(tcsr.config.k_fold_num): 192 | print('the %dth cross validation training' % i) 193 | tcsr.train_model(i) 194 | rmse, mae = tcsr.predict_model() 195 | rmses.append(rmse) 196 | maes.append(mae) 197 | rmse_avg = sum(rmses) / 5 198 | mae_avg = sum(maes) / 5 199 | print("the rmses are %s" % rmses) 200 | print("the maes are %s" % maes) 201 | print("the average of rmses is %s " % rmse_avg) 202 | print("the average of maes is %s " % mae_avg) -------------------------------------------------------------------------------- /model/trust_svd.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | import math 7 | import numpy as np 8 | from mf import MF 9 | from reader.trust import TrustGetter 10 | 11 | 12 | class TrustSVD(MF): 13 | """ 14 | docstring for TrustSVD 15 | implement the TrustSVD 16 | 17 | Koren Y. Factor in the neighbors: Scalable and accurate collaborative filtering[J]. ACM Transactions on Knowledge Discovery from Data (TKDD), 2010, 4(1): 1. 18 | """ 19 | 20 | def __init__(self): 21 | super(TrustSVD, self).__init__() 22 | 23 | self.config.lr = 0.01 # 0.005 24 | self.config.maxIter = 100 25 | self.config.lambdaP = 1.2 26 | self.config.lambdaQ = 1.2 27 | 28 | self.config.lambdaB = 1.2 29 | self.config.lambdaY = 1.2 30 | self.config.lambdaW = 1.2 31 | self.config.lambdaT = 0.9 32 | 33 | self.tg = TrustGetter() 34 | # self.init_model() 35 | 36 | def init_model(self, k): 37 | super(TrustSVD, self).init_model(k) 38 | self.Bu = np.random.rand(self.rg.get_train_size()[0]) / (self.config.factor ** 0.5) # bias value of user 39 | self.Bi = np.random.rand(self.rg.get_train_size()[1]) / (self.config.factor ** 0.5) # bias value of item 40 | self.Y = np.random.rand(self.rg.get_train_size()[1], self.config.factor) / ( 41 | self.config.factor ** 0.5) # implicit preference 42 | self.W = np.random.rand(self.rg.get_train_size()[0], self.config.factor) / ( 43 | self.config.factor ** 0.5) # implicit preference 44 | 45 | def train_model(self, k): 46 | super(TrustSVD, self).train_model(k) 47 | iteration = 0 48 | while iteration < self.config.maxIter: 49 | self.loss = 0 50 | for index, line in enumerate(self.rg.trainSet()): 51 | user, item, rating = line 52 | u = self.rg.user[user] 53 | i = self.rg.item[item] 54 | error = rating - self.predict(user, item) 55 | self.loss += error ** 2 56 | 57 | p, q = self.P[u], self.Q[i] 58 | nu, sum_y = self.get_sum_y(user) 59 | nv, sum_w = self.get_sum_w(user) 60 | 61 | frac = lambda x: 1.0 / math.sqrt(x) 62 | 63 | # update latent vectors 64 | self.Bu[u] += self.config.lr * (error - self.config.lambdaB * frac(nu) * self.Bu[u]) 65 | self.Bi[i] += self.config.lr * (error - self.config.lambdaB * frac(nv) * self.Bi[i]) 66 | 67 | self.Q[i] += self.config.lr * (error * (p + sum_y + sum_w) - self.config.lambdaQ * frac(nu) * q) 68 | 69 | followees = self.tg.get_followees(user) 70 | ws = np.zeros(self.config.factor) 71 | for followee in followees: 72 | if self.rg.containsUser(user) and self.rg.containsUser(followee): 73 | nw = len(self.tg.get_followers(followee)) 74 | vid = self.rg.user[followee] 75 | w = self.W[vid] 76 | weight = 1 # followees[followee] 77 | err = w.dot(p) - weight 78 | self.loss += err ** 2 79 | ws += err * w 80 | self.W[vid] += self.config.lr * ( 81 | err * frac(nv) * q - self.config.lambdaT * err * p - self.config.lambdaW * frac( 82 | nw) * w) # 更新w 83 | self.P[u] += self.config.lr * (error * q - self.config.lambdaT * ws - ( 84 | self.config.lambdaP * frac(nu) + self.config.lambdaT * frac(nv)) * p) 85 | 86 | u_items = self.rg.user_rated_items(u) 87 | for j in u_items: 88 | idj = self.rg.item[j] 89 | self.Y[idj] += self.config.lr * ( 90 | error * frac(nu) * q - self.config.lambdaY * frac(nv) * self.Y[idj]) 91 | 92 | self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ 93 | + self.config.lambdaB * ( 94 | (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaY * ( 95 | self.Y * self.Y).sum() + self.config.lambdaW * (self.W * self.W).sum() 96 | iteration += 1 97 | if self.isConverged(iteration): 98 | break 99 | 100 | def predict(self, u, i): 101 | if self.rg.containsUser(u) and self.rg.containsItem(i): 102 | _, sum_y = self.get_sum_y(u) 103 | _, sum_w = self.get_sum_w(u) 104 | u = self.rg.user[u] 105 | i = self.rg.item[i] 106 | return self.Q[i].dot(self.P[u] + sum_y + sum_w) + self.rg.globalMean + self.Bi[i] + self.Bu[u] 107 | elif self.rg.containsUser(u) and not self.rg.containsItem(i): 108 | return self.rg.userMeans[u] 109 | elif not self.rg.containsUser(u) and self.rg.containsItem(i): 110 | return self.rg.itemMeans[i] 111 | else: 112 | return self.rg.globalMean 113 | 114 | def get_sum_y(self, u): 115 | u_items = self.rg.user_rated_items(u) 116 | nu = len(u_items) 117 | sum_y = np.zeros(self.config.factor) 118 | for j in u_items: 119 | sum_y += self.Y[self.rg.item[j]] 120 | sum_y /= (np.sqrt(nu)) 121 | return nu, sum_y 122 | 123 | def get_sum_w(self, u): 124 | followees = self.tg.get_followees(u) 125 | nu = 1 126 | sum_w = np.zeros(self.config.factor) 127 | for v in followees.keys(): 128 | if self.rg.containsUser(v): 129 | nu += 1 130 | sum_w += self.W[self.rg.user[v]] 131 | sum_w /= np.sqrt(nu) 132 | return nu, sum_w 133 | 134 | 135 | if __name__ == '__main__': 136 | bmf = TrustSVD() 137 | bmf.train_model(0) 138 | coldrmse = bmf.predict_model_cold_users() 139 | print('cold start user rmse is :' + str(coldrmse)) 140 | bmf.show_rmse() 141 | -------------------------------------------------------------------------------- /model/trust_walker.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from mf import MF 7 | from utility.tools import sigmoid_2 8 | from utility.similarity import cosine_improved_sp 9 | from reader.trust import TrustGetter 10 | 11 | 12 | class TrustWalker(MF): 13 | """ 14 | docstring for TrustWalker 15 | 16 | Jamali M, Ester M. Trustwalker: a random walk model for combining trust-based and item-based recommendation[C]//Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2009: 397-406. 17 | """ 18 | 19 | def __init__(self): 20 | super(TrustWalker, self).__init__() 21 | np.random.seed(0) 22 | self.tg = TrustGetter() 23 | # self.init_model() 24 | 25 | def init_model(self, k): 26 | super(TrustWalker, self).init_model(k) 27 | self.p = 1.0 28 | pass 29 | 30 | def single_random_walk(self, user=5, item=3, k=0): 31 | print(user, item, k) 32 | print('%s%d' % ('k=', k)) 33 | if self.rg.containsUserItem(user, item): # judge whether user u rate on item i, if so, return the rating. 34 | return self.p, self.rg.trainSet_u[user][item] 35 | else: 36 | rand_num = np.random.rand(1) # get random number 37 | # compute the stop probability 38 | stop_prob, max_item, p_j = self.get_stop_prob(user, item, k) 39 | print('stop probability:' + str(stop_prob)) 40 | # print('%s%d'%('stop probbability:',stop_prob)) 41 | # the probability of stopping walk 42 | print(rand_num, stop_prob) 43 | if rand_num < stop_prob or k >= 6: # no more than six steps 44 | # get the most similar item j, and return r(u,j) 45 | rating = self.rg.trainSet_u[user][max_item] 46 | self.p = self.p * stop_prob * p_j 47 | return (self.p, rating) 48 | # compute the probability of next random walk 49 | else: 50 | # get next user for random walk 51 | next_user, tu_prob = self.get_followee_user(user) # if user don't have friends in trust network 52 | print('next step user is:' + str(next_user)) 53 | if next_user == None: # if no next user 54 | _, max_item, p_j = self.get_stop_prob(user, item, -1) # no sense if k=-1 55 | if max_item == 0: # if no next user and no similar users 56 | return self.p, 0 57 | rating = self.rg.trainSet_u[user][max_item] 58 | self.p = self.p * p_j 59 | return (self.p, rating) 60 | 61 | self.p = self.p * (1 - stop_prob) * tu_prob 62 | k += 1 63 | return self.single_random_walk(user=next_user, item=item, k=k) # 特么忘了return 当然返回None啦 64 | 65 | def get_followee_user(self, user): 66 | p = 0 67 | followees = list(self.tg.get_followees(user)) 68 | num_foll = len(followees) 69 | if num_foll == 0: 70 | return None, 0 71 | # pick one randomly 72 | ind = np.random.randint(num_foll) 73 | p = 1.0 / num_foll 74 | return followees[ind], p 75 | 76 | # def get_max_item(self,user,item): 77 | # u_items=self.rg.user_rated_items(user) 78 | # sum_sim=0.0 79 | # max_sim=0 80 | # max_prob_item=0 81 | # if len(u_items)==0: 82 | # return 0,0 83 | # print(u_items) 84 | # for i,u_item in enumerate(u_items): 85 | # sim=self.get_sim(item,u_item) 86 | # sum_sim+=sim 87 | # if sim>max_sim: 88 | # max_sim=sim 89 | # max_prob_item=u_item 90 | # return max_prob_item,max_sim/sum_sim 91 | 92 | def get_stop_prob(self, user, item, k): 93 | p = 1.0 94 | sum_sim = 0.0 95 | max_sim = 0 96 | max_prob = 0.0 97 | max_prob_item = 0 98 | if k == 0: # if k==0,the stop probability=0 99 | self.p = 1.0 100 | return 0, 0, 0 101 | 102 | param = sigmoid_2(k) 103 | 104 | u_items = self.rg.user_rated_items(user) 105 | print(u_items) 106 | if len(u_items) == 0: 107 | return 0, 0, 0 108 | for u_item in u_items: 109 | sim = self.get_sim(item, u_item) 110 | sum_sim += sim 111 | prob = sim * param 112 | if prob > max_prob: 113 | max_sim = sim 114 | max_prob = prob 115 | max_prob_item = u_item 116 | return max_prob, max_prob_item, max_sim / sum_sim # 返回停止概率,最相似item,选择item j的概率 117 | 118 | def get_sim(self, item1, item2): 119 | return cosine_improved_sp(self.rg.get_col(item1), self.rg.get_col(item2)) 120 | 121 | 122 | if __name__ == '__main__': 123 | tw = TrustWalker() 124 | tw.init_model(0) 125 | s = tw.single_random_walk(16, 235) # test on user 16 and item 235 126 | print(s) 127 | -------------------------------------------------------------------------------- /model/user_cf.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | from prettyprinter import cpprint 6 | from mf import MF 7 | from utility.matrix import SimMatrix 8 | from utility.similarity import pearson_sp 9 | 10 | 11 | class UserCF(MF): 12 | """ 13 | docstring for UserCF 14 | implement the UserCF 15 | 16 | Resnick P, Iacovou N, Suchak M, et al. GroupLens: an open architecture for collaborative filtering of netnews[C]//Proceedings of the 1994 ACM conference on Computer supported cooperative work. ACM, 1994: 175-186. 17 | """ 18 | 19 | def __init__(self): 20 | super(UserCF, self).__init__() 21 | self.config.n = 10 22 | # self.init_model(k) 23 | 24 | def init_model(self, k): 25 | super(UserCF, self).init_model(k) 26 | self.user_sim = SimMatrix() 27 | 28 | for u_test in self.rg.testSet_u: 29 | for u_train in self.rg.user: 30 | if u_test != u_train: 31 | if self.user_sim.contains(u_test, u_train): 32 | continue 33 | sim = pearson_sp(self.rg.get_row(u_test), self.rg.get_row(u_train)) 34 | self.user_sim.set(u_test, u_train, sim) 35 | 36 | def predict(self, u, i): 37 | matchUsers = sorted(self.user_sim[u].items(), key=lambda x: x[1], reverse=True) 38 | userCount = self.config.n 39 | if userCount > len(matchUsers): 40 | userCount = len(matchUsers) 41 | 42 | sum, denom = 0, 0 43 | for n in range(userCount): 44 | similarUser = matchUsers[n][0] 45 | if self.rg.containsUserItem(similarUser, i): 46 | similarity = matchUsers[n][1] 47 | rating = self.rg.trainSet_u[similarUser][i] 48 | sum += similarity * (rating - self.rg.userMeans[similarUser]) 49 | denom += similarity 50 | if sum == 0: 51 | if not self.rg.containsUser(u): 52 | return self.rg.globalMean 53 | return self.rg.userMeans[u] 54 | pred = self.rg.userMeans[u] + sum / float(denom) 55 | return pred 56 | 57 | 58 | if __name__ == '__main__': 59 | uc = UserCF() 60 | uc.init_model(0) 61 | print(uc.predict_model()) 62 | print(uc.predict_model_cold_users()) 63 | uc.init_model(1) 64 | print(uc.predict_model()) 65 | print(uc.predict_model_cold_users()) 66 | -------------------------------------------------------------------------------- /reader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/reader/__init__.py -------------------------------------------------------------------------------- /reader/rating.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import os 6 | from collections import defaultdict 7 | import numpy as np 8 | 9 | from utility.tools import normalize 10 | from configx.configx import ConfigX 11 | 12 | 13 | class RatingGetter(object): 14 | """ 15 | docstring for RatingGetter 16 | read rating data and save the global parameters 17 | """ 18 | 19 | def __init__(self, k): 20 | super(RatingGetter, self).__init__() 21 | self.config = ConfigX() 22 | self.k_current = k 23 | self.user = {} 24 | self.item = {} 25 | self.all_User = {} 26 | self.all_Item = {} 27 | self.id2user = {} 28 | self.id2item = {} 29 | self.dataSet_u = defaultdict(dict) 30 | self.trainSet_u = defaultdict(dict) 31 | self.trainSet_i = defaultdict(dict) 32 | self.testSet_u = defaultdict(dict) # used to store the test set by hierarchy user:[item,rating] 33 | self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating] 34 | self.testColdUserSet_u = defaultdict(dict) # cold start users in test set 35 | self.trainHotUserSet = [] # hot users in train set 36 | self.trainSetLength = 0 37 | self.testSetLength = 0 38 | 39 | self.userMeans = {} # used to store the mean values of users's ratings 40 | self.itemMeans = {} # used to store the mean values of items's ratings 41 | self.globalMean = 0 42 | 43 | self.generate_data_set() # generate train and test set 44 | self.getDataSet() 45 | self.get_data_statistics() 46 | self.get_cold_start_users() 47 | 48 | def generate_data_set(self): 49 | for index, line in enumerate(self.trainSet()): 50 | u, i, r = line 51 | # print(u,i,r) 52 | if not u in self.user: 53 | self.user[u] = len(self.user) 54 | self.id2user[self.user[u]] = u 55 | if not i in self.item: 56 | self.item[i] = len(self.item) 57 | self.id2item[self.item[i]] = i 58 | 59 | self.trainSet_u[u][i] = r 60 | self.trainSet_i[i][u] = r 61 | self.trainSetLength = index + 1 62 | self.all_User.update(self.user) 63 | self.all_Item.update(self.item) 64 | 65 | for index, line in enumerate(self.testSet()): 66 | u, i, r = line 67 | if not u in self.user: 68 | self.all_User[u] = len(self.all_User) 69 | if not i in self.item: 70 | self.all_Item[i] = len(self.all_Item) 71 | self.testSet_u[u][i] = r 72 | self.testSet_i[i][u] = r 73 | self.testSetLength = index + 1 74 | # print(self.trainSetLength) 75 | # print(self.testSetLength) 76 | pass 77 | 78 | # for cross validation 79 | def trainSet(self): 80 | k = self.k_current 81 | for i in range(self.config.k_fold_num): 82 | if i != k: 83 | data_path = self.config.rating_cv_path + self.config.dataset_name + "-" + str(i) + ".csv" 84 | # if not os.path.exists 85 | if not os.path.isfile(data_path): 86 | print("the format of ratings data is wrong!") 87 | sys.exit() 88 | with open(data_path, 'r') as f: 89 | for index, line in enumerate(f): 90 | u, i, r = line.strip('\r\n').split(self.config.sep) 91 | r = normalize(float(r)) # scale the rating score to [0-1] 92 | yield (int(float(u)), int(float(i)), float(r)) 93 | 94 | def testSet(self): 95 | k = self.k_current 96 | data_path = self.config.rating_cv_path + self.config.dataset_name + "-" + str(k) + ".csv" 97 | if not os.path.isfile(data_path): 98 | print("the format of ratings data is wrong!") 99 | sys.exit() 100 | with open(data_path, 'r') as f: 101 | for index, line in enumerate(f): 102 | u, i, r = line.strip('\r\n').split(self.config.sep) 103 | yield (int(float(u)), int(float(i)), float(r)) 104 | 105 | # for random 106 | # def trainSet(self): 107 | # np.random.seed(self.config.random_state) 108 | # with open(self.config.rating_path,'r') as f: 109 | # for index,line in enumerate(f): 110 | # rand_num=np.random.rand() 111 | # if rand_num < self.config.size: 112 | # u,i,r=line.strip('\r\n').split(self.config.sep) 113 | # r=normalize(float(r)) #scale the rating score to [0-1] 114 | # yield (int(u),int(i),float(r)) 115 | 116 | # def testSet(self): 117 | # np.random.seed(self.config.random_state) 118 | # with open(self.config.rating_path,'r') as f: 119 | # for index,line in enumerate(f): 120 | # rand_num=np.random.rand() 121 | # if rand_num >= self.config.size: 122 | # u,i,r=line.strip('\r\n').split(self.config.sep) 123 | # yield (int(u),int(i),float(r)) 124 | 125 | def getDataSet(self): 126 | with open(self.config.rating_path, 'r') as f: 127 | for index, line in enumerate(f): 128 | u, i, r = line.strip('\r\n').split(self.config.sep) 129 | self.dataSet_u[int(u)][int(i)] = float(r) 130 | 131 | def get_train_size(self): 132 | return (len(self.user), len(self.item)) 133 | 134 | # get cold start users in test set 135 | def get_cold_start_users(self): 136 | for user in self.testSet_u.keys(): 137 | rating_length = len(self.trainSet_u[user]) 138 | if rating_length <= self.config.coldUserRating: 139 | self.testColdUserSet_u[user] = self.testSet_u[user] 140 | # print('cold start users count', len(self.testColdUserSet_u)) 141 | 142 | def get_data_statistics(self): 143 | 144 | total_rating = 0.0 145 | total_length = 0 146 | for u in self.user: 147 | u_total = sum(self.trainSet_u[u].values()) 148 | u_length = len(self.trainSet_u[u]) 149 | total_rating += u_total 150 | total_length += u_length 151 | self.userMeans[u] = u_total / float(u_length) 152 | 153 | for i in self.item: 154 | self.itemMeans[i] = sum(self.trainSet_i[i].values()) / float(len(self.trainSet_i[i])) 155 | 156 | if total_length == 0: 157 | self.globalMean = 0 158 | else: 159 | self.globalMean = total_rating / total_length 160 | 161 | def containsUser(self, u): 162 | 'whether user is in training set' 163 | if u in self.user: 164 | return True 165 | else: 166 | return False 167 | 168 | def containsItem(self, i): 169 | 'whether item is in training set' 170 | if i in self.item: 171 | return True 172 | else: 173 | return False 174 | 175 | def containsUserItem(self, user, item): 176 | if user in self.trainSet_u: 177 | if item in self.trainSet_u[user]: 178 | # print(user) 179 | # print(item) 180 | # print(self.trainSet_u[user][item]) 181 | return True 182 | return False 183 | 184 | def get_row(self, u): 185 | return self.trainSet_u[u] 186 | 187 | def get_col(self, c): 188 | return self.trainSet_i[c] 189 | 190 | def user_rated_items(self, u): 191 | return self.trainSet_u[u].keys() 192 | 193 | 194 | if __name__ == '__main__': 195 | rg = RatingGetter(0) 196 | # for ind,entry in enumerate(rg.testSet()): 197 | # if ind<80: 198 | # print(entry) 199 | # # user,item,rating = entry 200 | 201 | # print(rg.trainSet_u[52]) 202 | # print(rg.trainSet_u[10]) 203 | -------------------------------------------------------------------------------- /reader/trust.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | import os 7 | from collections import defaultdict 8 | 9 | from configx.configx import ConfigX 10 | 11 | 12 | class TrustGetter(object): 13 | """ 14 | docstring for TrustGetter 15 | read trust data and save the global parameters 16 | 17 | """ 18 | 19 | def __init__(self): 20 | super(TrustGetter, self).__init__() 21 | self.config = ConfigX() 22 | 23 | self.user = {} # used to store the order of users 24 | self.relations = self.get_relations() 25 | self.followees = defaultdict(dict) 26 | self.followers = {} 27 | self.matrix_User = {} 28 | self.matrix_Item = {} 29 | self.generate_data_set() 30 | 31 | def generate_data_set(self): 32 | triple = [] 33 | for line in self.relations: 34 | userId1, userId2, weight = line 35 | # add relations to dict 36 | if not userId1 in self.followees: 37 | self.followees[userId1] = {} 38 | self.followees[userId1][userId2] = weight 39 | if not userId2 in self.followers: 40 | self.followers[userId2] = {} 41 | self.followers[userId2][userId1] = weight 42 | # order the user 43 | if not userId1 in self.user: 44 | userid1 = self.user[userId1] = len(self.user) 45 | if not userId2 in self.user: 46 | userid2 = self.user[userId2] = len(self.user) 47 | if not userid1 in self.matrix_User: 48 | self.matrix_User[userid1] = {} 49 | if not userid2 in self.matrix_User: 50 | self.matrix_Item[userid2] = {} 51 | self.matrix_User[userid1][userid2] = weight 52 | self.matrix_Item[userid2][userid1] = weight 53 | 54 | def get_relations(self): 55 | if not os.path.isfile(self.config.trust_path): 56 | print("the format of trust data is wrong") 57 | sys.exit() 58 | with open(self.config.trust_path, 'r') as f: 59 | for index, line in enumerate(f): 60 | u_from, u_to, t = line.strip('\r\n').split(self.config.sep) 61 | yield (int(u_from), int(u_to), float(t)) 62 | 63 | def get_followees(self, u): 64 | if u in self.followees: 65 | return self.followees[u] 66 | else: 67 | return {} 68 | 69 | def get_followers(self, u): 70 | if u in self.followers: 71 | return self.followers[u] 72 | else: 73 | return {} 74 | 75 | def weight(self, u, k): 76 | if u in self.followees and k in self.followees[u]: 77 | return self.followees[u][k] 78 | else: 79 | return 0 80 | 81 | 82 | if __name__ == '__main__': 83 | tg = TrustGetter() 84 | s = tg.get_followees(2).keys() 85 | print(s) 86 | -------------------------------------------------------------------------------- /utility/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hongleizhang/RSAlgorithms/b6be615c8c554e82a7c97e0ebff16631865c2b01/utility/__init__.py -------------------------------------------------------------------------------- /utility/cross_validation.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import os 6 | import numpy as np 7 | import pandas as pd 8 | from scipy.sparse import coo_matrix 9 | from configx.configx import ConfigX 10 | 11 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 12 | Split ratings into five folds 13 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 14 | 15 | 16 | def split_5_folds(configx): 17 | K = configx.k_fold_num 18 | names = ['user_id', 'item_id', 'rating'] 19 | if not os.path.isfile(configx.rating_path): 20 | print("the format of rating data is wrong") 21 | sys.exit() 22 | df = pd.read_csv(configx.rating_path, sep=configx.sep, names=names) 23 | ratings = coo_matrix((df.rating, (df.user_id, df.item_id))) 24 | users = np.unique(ratings.row) 25 | ratings = ratings.tocsr() 26 | 27 | rows = list() 28 | cols = list() 29 | vals = list() 30 | nonzeros = list() 31 | 32 | for k in range(K): 33 | size_of_bucket = int(ratings.nnz / K) 34 | if k == K - 1: 35 | size_of_bucket += ratings.nnz % K 36 | rows.append(np.zeros(size_of_bucket)) 37 | cols.append(np.zeros(size_of_bucket)) 38 | vals.append(np.zeros(size_of_bucket)) 39 | nonzeros.append(0) 40 | 41 | for i, user in enumerate(users): 42 | items = ratings[user, :].indices 43 | rating_vals = ratings[user, :].data 44 | index_list = [i for i in range(K)] * int(len(items) / float(K) + 1) 45 | np.random.shuffle(index_list) 46 | index_list = np.array(index_list) 47 | 48 | for k in range(K): 49 | k_index_list = (index_list[:len(items)] == k) 50 | from_ind = nonzeros[k] 51 | to_ind = nonzeros[k] + sum(k_index_list) 52 | 53 | if to_ind >= len(rows[k]): 54 | rows[k] = np.append(rows[k], np.zeros(size_of_bucket)) 55 | cols[k] = np.append(cols[k], np.zeros(size_of_bucket)) 56 | vals[k] = np.append(vals[k], np.zeros(size_of_bucket)) 57 | k_index_list = (index_list[:len(items)] == k) 58 | 59 | rows[k][from_ind:to_ind] = [user] * sum(k_index_list) 60 | cols[k][from_ind:to_ind] = items[k_index_list] 61 | vals[k][from_ind:to_ind] = rating_vals[k_index_list] 62 | nonzeros[k] += sum(k_index_list) 63 | 64 | if not os.path.exists('../data/cv'): 65 | os.makedirs('../data/cv') 66 | print('../data/cv folder has been established.') 67 | 68 | for k, (row, col, val, nonzero) in enumerate(zip(rows, cols, vals, nonzeros)): 69 | bucket_df = pd.DataFrame({'user': row[:nonzero], 'item': col[:nonzero], 'rating': val[:nonzero]}, 70 | columns=['user', 'item', 'rating']) 71 | bucket_df.to_csv("../data/cv/%s-%d.csv" % (configx.dataset_name, k), sep=configx.sep, header=False, index=False) 72 | print("%s -fold%d data generated finished!" % (configx.dataset_name, k)) 73 | 74 | print("All Data Generated Done!") 75 | 76 | 77 | if __name__ == "__main__": 78 | configx = ConfigX() 79 | split_5_folds(configx) 80 | -------------------------------------------------------------------------------- /utility/data_prepro.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import pandas as pd 6 | from configx.configx import ConfigX 7 | 8 | config = ConfigX() 9 | data = pd.read_table(config.trust_path, sep=' ', header=None) 10 | # the number of links 11 | print(len(data)) 12 | 13 | # the number of followers 14 | print(len(data[0].unique())) 15 | 16 | # the number of followees 17 | print(len(data[1].unique())) 18 | -------------------------------------------------------------------------------- /utility/data_statistics.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | from metrics.metric import Metric 7 | from utility.tools import denormalize, sigmoid 8 | from reader.rating import RatingGetter 9 | from reader.trust import TrustGetter 10 | from configx.configx import ConfigX 11 | 12 | 13 | class DataStatis(object): 14 | """docstring for DataStatis""" 15 | 16 | def __init__(self): 17 | super(DataStatis, self).__init__() 18 | self.config = ConfigX() 19 | self.rg = RatingGetter() # loading raing data 20 | self.tg = TrustGetter() 21 | self.cold_rating = 0 22 | self.cold_social = 0 23 | self.cold_rating_social = 0 24 | self.cold_rating_warm_social = 0 25 | self.warm_rating_cold_social = 0 26 | self.warm_rating_warm_social = 0 27 | 28 | def getDataStatis(self): 29 | # print(self.rg.dataSet_u[2]) 30 | for user in self.rg.dataSet_u: 31 | # print(user) 32 | num_rating = len(self.rg.dataSet_u[user]) 33 | num_social = len(self.tg.followees[user]) 34 | 35 | if (num_rating < 5): 36 | self.cold_rating += 1 37 | if (num_social < 5): 38 | self.cold_social += 1 39 | 40 | if (num_rating < 5 and num_social < 5): 41 | self.cold_rating_social += 1 42 | if (num_rating < 5 and num_social >= 5): 43 | self.cold_rating_warm_social += 1 44 | if (num_rating >= 5 and num_social <= 5): 45 | self.warm_rating_cold_social += 1 46 | if (num_rating >= 5 and num_social >= 5): 47 | self.warm_rating_warm_social += 1 48 | 49 | pass 50 | 51 | 52 | if __name__ == '__main__': 53 | ds = DataStatis() 54 | ds.getDataStatis() 55 | print(ds.cold_rating) 56 | print(ds.cold_social) 57 | print(ds.cold_rating_social) 58 | print(ds.cold_rating_warm_social) 59 | print(ds.warm_rating_cold_social) 60 | print(ds.warm_rating_warm_social) 61 | -------------------------------------------------------------------------------- /utility/draw_figure.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pylab as plt 2 | 3 | 4 | def show_rmse(): 5 | ''' 6 | show figure 7 | ''' 8 | 9 | # epinions 10 | mfr = [1.16677, 1.18048, 1.19479] 11 | sr = [1.09047, 1.10516, 1.13036] 12 | my = [1.08060, 1.09336, 1.11335] 13 | x = [0.8, 0.6, 0.4] 14 | 15 | plt.plot(x, mfr, label='PMF') 16 | plt.plot(x, sr, label='SocialReg') 17 | plt.plot(x, my, linewidth='2', label='SocialEmbeddings') 18 | 19 | plt.xlabel('ratio of train set') 20 | plt.ylabel('RMSE') 21 | plt.title('Epinions(d=30)-all users') 22 | plt.legend() 23 | plt.show() 24 | pass 25 | 26 | 27 | show_rmse() 28 | -------------------------------------------------------------------------------- /utility/matrix.py: -------------------------------------------------------------------------------- 1 | class SimMatrix(object): 2 | def __init__(self): 3 | self.symMatrix = {} 4 | 5 | def __getitem__(self, item): 6 | if item in self.symMatrix: 7 | return self.symMatrix[item] 8 | return {} 9 | 10 | def set(self, i, j, val): 11 | if not i in self.symMatrix: 12 | self.symMatrix[i] = {} 13 | self.symMatrix[i][j] = val 14 | if not j in self.symMatrix: 15 | self.symMatrix[j] = {} 16 | self.symMatrix[j][i] = val 17 | 18 | def get(self, i, j): 19 | if not i in self.symMatrix or not j in self.symMatrix[i]: 20 | return 0 21 | return self.symMatrix[i][j] 22 | 23 | def contains(self, i, j): 24 | if i in self.symMatrix and j in self.symMatrix[i]: 25 | return True 26 | else: 27 | return False 28 | -------------------------------------------------------------------------------- /utility/similarity.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | import numpy as np 6 | from math import sqrt 7 | from utility.tools import sigmoid_2 8 | 9 | 10 | # x1,x2 is the form of np.array. 11 | 12 | def euclidean(x1, x2): 13 | # find common ratings 14 | new_x1, new_x2 = common(x1, x2) 15 | # compute the euclidean between two vectors 16 | diff = new_x1 - new_x2 17 | denom = sqrt((diff.dot(diff))) 18 | try: 19 | return 1 / denom 20 | except ZeroDivisionError: 21 | return 0 22 | 23 | 24 | def cosine(x1, x2): 25 | # find common ratings 26 | new_x1, new_x2 = common(x1, x2) 27 | # compute the cosine similarity between two vectors 28 | sum = new_x1.dot(new_x2) 29 | denom = sqrt(new_x1.dot(new_x1) * new_x2.dot(new_x2)) 30 | try: 31 | return float(sum) / denom 32 | except ZeroDivisionError: 33 | return 0 34 | 35 | 36 | def pearson(x1, x2): 37 | # find common ratings 38 | new_x1, new_x2 = common(x1, x2) 39 | # compute the pearson similarity between two vectors 40 | ind1 = new_x1 > 0 41 | ind2 = new_x2 > 0 42 | try: 43 | mean_x1 = float(new_x1.sum()) / ind1.sum() 44 | mean_x2 = float(new_x2.sum()) / ind2.sum() 45 | new_x1 = new_x1 - mean_x1 46 | new_x2 = new_x2 - mean_x2 47 | sum = new_x1.dot(new_x2) 48 | denom = sqrt((new_x1.dot(new_x1)) * (new_x2.dot(new_x2))) 49 | return float(sum) / denom 50 | except ZeroDivisionError: 51 | return 0 52 | 53 | 54 | def common(x1, x2): 55 | # find common ratings 56 | common = (x1 != 0) & (x2 != 0) 57 | new_x1 = x1[common] 58 | new_x2 = x2[common] 59 | return new_x1, new_x2 60 | 61 | 62 | # x1,x2 is the form of dict. 63 | 64 | def cosine_sp(x1, x2): 65 | 'x1,x2 are dicts,this version is for sparse representation' 66 | total = 0 67 | denom1 = 0 68 | denom2 = 0 69 | # x1_l,x2_l=len(x1),len(x2) 70 | # if x2_l>x1_l: 71 | # x1,x2=x2,x1 72 | for k in x1: 73 | if k in x2: 74 | total += x1[k] * x2[k] 75 | denom1 += x1[k] ** 2 76 | denom2 += x2[k] ** 2 # .pop(k) 77 | # else: 78 | # denom1+=x1[k]**2 79 | # for j in x2: 80 | # denom2+=x2[j]**2 81 | try: 82 | return (total + 0.0) / (sqrt(denom1) * sqrt(denom2)) 83 | except ZeroDivisionError: 84 | return 0 85 | 86 | 87 | def cosine_improved_sp(x1, x2): 88 | 'x1,x2 are dicts,this version is for sparse representation' 89 | total = 0 90 | denom1 = 0 91 | denom2 = 0 92 | nu = 0 93 | for k in x1: 94 | if k in x2: 95 | nu += 1 96 | total += x1[k] * x2[k] 97 | denom1 += x1[k] ** 2 98 | denom2 += x2[k] ** 2 99 | try: 100 | return (total + 0.0) / (sqrt(denom1) * sqrt(denom2)) * sigmoid_2(nu) 101 | except ZeroDivisionError: 102 | return 0 103 | 104 | 105 | # def pearson_sp(x1, x2): 106 | # total = 0 107 | # denom1 = 0 108 | # denom2 = 0 109 | # try: 110 | # mean1 = sum(x1.values()) / (len(x1) + 0.0) 111 | # mean2 = sum(x2.values()) / (len(x2) + 0.0) 112 | # for k in x1: 113 | # if k in x2: 114 | # total += (x1[k] - mean1) * (x2[k] - mean2) 115 | # denom1 += (x1[k] - mean1) ** 2 116 | # denom2 += (x2[k] - mean2) ** 2 117 | # return (total + 0.0) / (sqrt(denom1) * sqrt(denom2)) 118 | # except ZeroDivisionError: 119 | # return 0 120 | 121 | # improved pearson 122 | def pearson_sp(x1, x2): 123 | common = set(x1.keys()) & set(x2.keys()) 124 | if len(common) == 0: 125 | return 0 126 | ratingList1 = [] 127 | ratingList2 = [] 128 | for i in common: 129 | ratingList1.append(x1[i]) 130 | ratingList2.append(x2[i]) 131 | if len(ratingList1) == 0 or len(ratingList2) == 0: 132 | return 0 133 | avg1 = sum(ratingList1) / len(ratingList1) 134 | avg2 = sum(ratingList2) / len(ratingList2) 135 | mult = 0.0 136 | sum1 = 0.0 137 | sum2 = 0.0 138 | for i in range(len(ratingList1)): 139 | mult += (ratingList1[i] - avg1) * (ratingList2[i] - avg2) 140 | sum1 += pow(ratingList1[i] - avg1, 2) 141 | sum2 += pow(ratingList2[i] - avg2, 2) 142 | if sum1 == 0 or sum2 == 0: 143 | return 0 144 | return mult / (sqrt(sum1) * sqrt(sum2)) 145 | 146 | 147 | # TrustWalker userd 148 | def pearson_improved_sp(x1, x2): 149 | total = 0.0 150 | denom1 = 0 151 | denom2 = 0 152 | nu = 0 153 | try: 154 | mean1 = sum(x1.values()) / (len(x1) + 0.0) 155 | mean2 = sum(x2.values()) / (len(x2) + 0.0) 156 | for k in x1: 157 | if k in x2: 158 | # print('k'+str(k)) 159 | nu += 1 160 | total += (x1[k] - mean1) * (x2[k] - mean2) 161 | # print('t'+str(total)) 162 | denom1 += (x1[k] - mean1) ** 2 163 | denom2 += (x2[k] - mean2) ** 2 164 | # print('nu:'+str(nu)) 165 | # print(total) 166 | return (total + 0.0) / (sqrt(denom1) * sqrt(denom2)) * sigmoid_2(nu) 167 | except ZeroDivisionError: 168 | return 0 169 | 170 | 171 | def euclidean_sp(x1, x2): 172 | total = 0.0 173 | for k in x1: 174 | if k in x2: 175 | total += sqrt(x1[k] - x2[k]) 176 | try: 177 | return 1.0 / total 178 | except ZeroDivisionError: 179 | return 0 180 | -------------------------------------------------------------------------------- /utility/tools.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | import numpy as np 7 | from numpy.linalg import norm 8 | from configx.configx import ConfigX 9 | 10 | config = ConfigX() 11 | 12 | 13 | def l1(x): 14 | return norm(x, ord=1) 15 | 16 | 17 | def l2(x): 18 | return norm(x) 19 | 20 | 21 | def normalize(rating, minVal=config.min_val, maxVal=config.max_val): 22 | 'get the normalized value using min-max normalization' 23 | if maxVal > minVal: 24 | return float(rating - minVal) / (maxVal - minVal) + 0.01 25 | elif maxVal == minVal: 26 | return rating / maxVal 27 | else: 28 | print('error... maximum value is less than minimum value.') 29 | raise ArithmeticError 30 | 31 | 32 | def denormalize(rating, minVal=config.min_val, maxVal=config.max_val): 33 | return minVal + (rating - 0.01) * (maxVal - minVal) 34 | 35 | 36 | def sigmoid(z): 37 | return 1.0 / (1.0 + np.exp(-z)) 38 | 39 | 40 | def sigmoid_deriv(z): 41 | return sigmoid(z) * (1.0 - sigmoid(z)) 42 | 43 | 44 | def sigmoid_2(z): 45 | return 1.0 / (1.0 + np.exp(-z / 2.0)) 46 | -------------------------------------------------------------------------------- /utility/util.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import sys 3 | 4 | sys.path.append("..") 5 | 6 | import pickle 7 | 8 | 9 | def save_data(obj, filename): 10 | pickle.dump(obj, open(filename, 'wb')) 11 | pass 12 | 13 | 14 | def load_data(filename): 15 | f = open(filename, 'rb') 16 | model = pickle.load(f) 17 | print(filename + ' load data model finished.') 18 | return model 19 | --------------------------------------------------------------------------------