├── .DS_Store
├── Crawler
    ├── .DS_Store
    └── crawl_app_info
    │   ├── .DS_Store
    │   ├── crawl_app_info.py
    │   ├── data
    │       └── .DS_Store
    │   ├── gen_appid_and_appname.py
    │   ├── kill.sh
    │   └── run_crawl_app_info.sh
├── NLP
    ├── LDA
    │   └── LDA.md
    ├── doc2vec
    │   └── doc2vec.md
    ├── textrank
    │   └── textrank.md
    ├── tfidf
    │   └── tfidf.md
    └── word2vec
    │   └── word2vec.md
├── README.md
├── bandit.md
├── cold_start
    ├── ContentItemKNN.py
    ├── README.md
    └── bandit.md
├── data_tools
    ├── .DS_Store
    ├── Excel
    │   └── add_info_to_excel
    │   │   ├── data
    │   │       ├── docid_ttaccount
    │   │       ├── result_log_2019110811_12_ab_0.xlsx
    │   │       └── result_log_2019110811_12_ab_0_m.xlsx
    │   │   ├── modify_excel.py
    │   │   └── run.sh
    ├── Hadoop
    │   └── get_docid_pv_cl_by_hadoop
    │   │   ├── hadoop_script
    │   │       ├── mapper.py
    │   │       └── reducer.py
    │   │   ├── local_test_hadoop.sh
    │   │   └── run_hadoop.sh
    ├── KV
    │   └── get_doc_text_by_kv
    │   │   ├── KVClient.py
    │   │   ├── KVClient.pyc
    │   │   ├── get_doc_text_multiprocessing.py
    │   │   ├── run.sh
    │   │   └── ttypes.py
    ├── Matplotlib
    │   └── matplotlib_ctr
    │   │   ├── ctr.jpg
    │   │   ├── matplotlib_ctr.py
    │   │   ├── matplotlib_rank.py
    │   │   └── rank.jpg
    └── Redis
    │   └── get_doc_hot
    │       ├── get_doc_hot.py
    │       ├── run.sh
    │       ├── ttypes.py
    │       └── ttypes.pyc
├── job request
    ├── ability.md
    ├── mianshi.md
    └── ziliao.md
├── rank
    ├── .DS_Store
    ├── Basic-DeepFM-model
    │   ├── .DS_Store
    │   ├── .ipynb_checkpoints
    │   │   └── DeepFM-StepByStep-checkpoint.ipynb
    │   ├── DataReader.py
    │   ├── DeepFM-StepByStep.ipynb
    │   ├── DeepFM.py
    │   ├── __pycache__
    │   │   ├── DataReader.cpython-37.pyc
    │   │   ├── DeepFM.cpython-37.pyc
    │   │   ├── config.cpython-37.pyc
    │   │   └── metrics.cpython-37.pyc
    │   ├── config.py
    │   ├── data
    │   │   ├── test.csv
    │   │   └── train.csv
    │   ├── fig
    │   │   ├── DNN.png
    │   │   ├── DeepFM.png
    │   │   └── FM.png
    │   ├── main.py
    │   ├── metrics.py
    │   └── output
    │   │   ├── DNN_Mean-0.31183_Std0.29369.csv
    │   │   ├── DeepFM_Mean-0.11470_Std0.37335.csv
    │   │   ├── DeepFM_Mean0.01434_Std0.10176.csv
    │   │   ├── DeepFM_Mean0.05735_Std0.20027.csv
    │   │   ├── DeepFM_Mean0.26137_Std0.00210.csv
    │   │   └── FM_Mean0.23297_Std0.05576.csv
    ├── GBDT+LR-Demo
    │   ├── GBDT_LR.py
    │   └── model.txt
    ├── fm.py
    ├── rank_model.md
    ├── recommendation-FFM-Demo
    │   ├── FFM_model.py
    │   └── TFModel
    │   │   ├── FFM-0.data-00000-of-00001
    │   │   ├── FFM-0.index
    │   │   ├── FFM-0.meta
    │   │   ├── FFM
    │   │       ├── events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local
    │   │       ├── events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local
    │   │       └── events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local
    │   │   └── checkpoint
    ├── recommendation-FM-demo
    │   ├── .DS_Store
    │   ├── FM_model.py
    │   └── data
    │   │   ├── .DS_Store
    │   │   ├── ua.base
    │   │   ├── ua.csv
    │   │   └── ua.test
    └── 推荐系统已读论文
    │   ├── Attentional Factorization Machines Learning the Weight of Feature Interactions via Attention Networks.pdf
    │   ├── DRN A Deep Reinforcement Learning Framework for News Recommendation.pdf
    │   ├── Deep & Cross Network for Ad Click Predictions.pdf
    │   ├── Deep Reinforcement Learning for List-wise Recommendation.pdf
    │   ├── Deep interest network.pdf
    │   ├── Entire Space Multi-Task Model An E ective Approach for Estimating Post-Click Conversion Rate.pdf
    │   ├── MLR.pdf
    │   ├── Neural Factorization Machines for Sparse Predictive Analytics.pdf
    │   ├── Product-based Neural Networks for User Response Prediction.pdf
    │   ├── deepfm.pdf
    │   ├── facebook-GBDT-LR.pdf
    │   ├── linucb.pdf
    │   └── wide&deep.pdf
├── recall
    ├── .DS_Store
    ├── code
    │   ├── .DS_Store
    │   ├── MostPopular.py
    │   ├── TagBased.py
    │   ├── baseline.py
    │   ├── data.py
    │   ├── experiment.py
    │   ├── graph.py
    │   ├── itemcf.py
    │   ├── itemcf_norm.py
    │   ├── lfm.py
    │   ├── metric.py
    │   ├── time.py
    │   └── usercf.py
    ├── recall.md
    ├── 向量召回快速检索方法.md
    └── 工程经验.md
├── 推荐算法.xmind
├── 用户冷启动.xmind
└── 用户画像.xmind


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/.DS_Store


--------------------------------------------------------------------------------
/Crawler/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/Crawler/.DS_Store


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/Crawler/crawl_app_info/.DS_Store


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/crawl_app_info.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/Crawler/crawl_app_info/crawl_app_info.py


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/Crawler/crawl_app_info/data/.DS_Store


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/gen_appid_and_appname.py:
--------------------------------------------------------------------------------
 1 | #coding:gbk
 2 | import sys
 3 | import re
 4 | 
 5 | appid_list = []
 6 | appname_list = []
 7 | 
 8 | 
 9 | def judge_is_appid(app):
10 |     parts = app.split('.')
11 |     if len(parts) <= 1: # QQ
12 |         return False
13 |     if len(parts[-1]) == 0: # Link.
14 |         return False
15 |     if parts[-1] == 'com':
16 |         return False        # Booking.com
17 |     max_part_len = max(map(lambda x: len(x), parts))
18 |     if max_part_len <= 1:
19 |         return False    # B.A.W
20 |     for part in parts:
21 |         re_result = re.search('^[0-9]+$', part)
22 |         if re_result:
23 |             return False    # STracter_V2.0
24 |     re_result = re.search('^[0-9a-zA-Z\._]+$', app)
25 |     if re_result:
26 |         return True
27 |     else:
28 |         return False
29 | 
30 | 
31 | def output(appxx_list, output_path):
32 |     fw = open(output_path, 'w')
33 |     for appxx in appxx_list:
34 |         fw.write(appxx + '\n')
35 |     fw.close()
36 | 
37 | 
38 | for line in file(sys.argv[1]):
39 |     items1 = line.strip().split('\t')
40 |     if len(items1) != 2:
41 |         continue
42 |     app = items1[0]
43 |     times = int(items1[1])
44 |     if times < int(sys.argv[2]):
45 | 	continue
46 |     if judge_is_appid(app):
47 |         appid_list.append(app)
48 |     else:
49 |         appname_list.append(app)
50 | 
51 | 
52 | output(appid_list, sys.argv[3])
53 | output(appname_list, sys.argv[4])
54 | 


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/kill.sh:
--------------------------------------------------------------------------------
1 | ps -ef | grep crawl_app_en_info.py | grep -v vim | awk '{print $2}' | xargs kill -9 
2 | 


--------------------------------------------------------------------------------
/Crawler/crawl_app_info/run_crawl_app_info.sh:
--------------------------------------------------------------------------------
1 | python gen_appid_and_appname.py ../3_stat_app_times/data/app_times 300 ./data/appid ./data/appname
2 | 
3 | python crawl_app_info.py id ./data/appid ./data/appid_info
4 | 
5 | python crawl_app_info.py name ./data/appname ./data/appname_info
6 | 


--------------------------------------------------------------------------------
/NLP/LDA/LDA.md:
--------------------------------------------------------------------------------
 1 | # LDA简介
 2 | LDA是一种文档主题模型，包含词、主题和文档三层结构。
 3 | 
 4 | LDA认为一篇文档由一些主题按照一定概率组成，一个主题又由一些词语按照一定概率组成。
 5 | 
 6 | 早期人们用词袋模型对一篇文章进行建模，把一篇文档表示为若干单词的计数。这就造成词袋向量的维数巨大，在使用分类模型进行训练时，非常容易造成训练缓慢以及过拟合。
 7 | 
 8 | LDA本质上把词袋模型进行了降维，把一篇文档以主题的形式进行了表示。主题的个数通常为几百，大大加快了训练速度，并且相对不容易造成过拟合。从某种程度上来说，主题是对若干词语的抽象表示。
 9 | 
10 | 以最近一部电视剧《南方有乔木》为例。假设一篇文章介绍了这部电视剧的主要内容。我们可以把这篇文章表示为：
11 | 
12 | 	0.30*"创业"+0.3*"三角恋"+0.2*"无人机"
13 | 
14 | 然后我们可以把三角恋表示为：
15 | 
16 | 	0.4*"南乔"+0.3*"时樾"+0.3*"安宁"
17 | 
18 | 需要指出的是，计算出文档、主题以及词语之间的表示关系，需要基于大量的文档，这样才具有普遍的意义。LDA正是提供了这种算法，自动从训练文档中计算出这种对应关系。
19 | 
20 | # LDA生成流程
21 | 
22 | 对于语料库中的每篇文档，LDA定义了如下生成过程（generativeprocess）：
23 | 1.对每一篇文档，从主题分布中抽取一个主题；
24 | 2.从上述被抽到的主题所对应的单词分布中抽取一个单词；
25 | 3.重复上述过程直至遍历文档中的每一个单词。
26 | 
27 | 
28 | # 预处理
29 | 
30 | 计算LDA时，需要删除停用词，加载之前保存的停用词。
31 | 
32 | 	def load_stopwords():
33 | 	    with open("stopwords.txt") as F:
34 | 	        stopwords=F.readlines()
35 | 	        F.close()
36 | 	    return [word.strip() for word in stopwords]
37 | 
38 | # 使用LDA提取文档特征
39 | 通常LDA的结果可以作为进一步文档分类、文档相似度计算以及文档聚类的依据，可以把LDA当做一种特征提取方法。
40 | 


--------------------------------------------------------------------------------
/NLP/doc2vec/doc2vec.md:
--------------------------------------------------------------------------------
 1 | # 如何将句子做嵌入
 2 | ## bag of words
 3 | 词袋模型，未考虑到单词顺序，未考虑单词的语义信息
 4 | ## average word vectors
 5 | 词向量取平均，未考虑单词顺序
 6 | ## tfidf-weighting word vector
 7 | 词向量根据tfidf加权，也未考虑单词顺序
 8 | ## lda
 9 | 文档或句子的主题分布
10 | 
11 | # doc2vec
12 | 每次从一句话中滑动采样固定长度的词，取其中一个词作预测词，其他的作输入词。输入词对应的词向量word vector和本句话对应的句子向量Paragraph vector作为输入层的输入，将本句话的向量和本次采样的词向量相加求平均或者累加构成一个新的向量X，进而使用这个向量X预测此次窗口内的预测词。（预测句子中的下一个单词）
13 | 
14 | Doc2vec相对于word2vec不同之处在于，在输入层，增添了一个新句子向量Paragraph vector，Paragraph vector可以被看作是另一个词向量，它扮演了一个记忆
15 | 
16 | 每次训练也是滑动截取句子中一小部分词来训练，Paragraph Vector在同一个句子的若干次训练中是共享的，所以同一句话会有多次训练，每次训练中输入都包含Paragraph vector。
17 | 
18 | 它可以被看作是句子的主旨，有了它，该句子的主旨每次都会被放入作为输入的一部分来训练。这样每次训练过程中，不光是训练了词，得到了词向量。同时随着一句话每次滑动取若干词训练的过程中，作为每次训练的输入层一部分的共享Paragraph vector，该向量表达的主旨会越来越准确
19 | ## PV-DM
20 | 根据每个单词的上下文，预测下一个词
21 | ## PV-DBOW
22 | 忽略输入的上下文，让模型去预测段落中的随机一个单词


--------------------------------------------------------------------------------
/NLP/textrank/textrank.md:
--------------------------------------------------------------------------------
 1 | # 算法作用
 2 | 利用TextRank和TFIDF算法自动提取文档的关键字，在推荐系统用户画像还有文本聚类都很有意义。
 3 | 
 4 | # TextRank
 5 | TextRank的思路来自于PageRank。PageRank最开始用来计算网页的重要性。整个互联网可以看作一张有向图，节点是网页。如果网页A存在到网页B的链接，那么有一条从网页A指向网页B的有向边，指向同一个网页的链接越多，该网页的重要性或者说PageRank值更大。综合考虑Title和Keywords等其它因素之后，Google通过PageRank来调整结果，使那些更重要的网页在搜索结果排名更靠前。
 6 | 
 7 | TextRank的原理和PageRank类似，一篇文档去掉停用词以后，每个单词相当于一个网页，有时候也会指定某些词性的单词才参与计算TextRank，比如名词和动词。网页有明显的指向关系，但是文档的单词之间只有前后关系，所以要指定一个滑动的窗口大小，比如前后5个单词。在滑动窗口内的单词之间的前后关系当做网页之间的指向关系，参与TextRank的计算。
 8 | 
 9 | # TFIDF
10 | TFIDF的一个基本假设是，一个单词的重要性由词频决定，如果一个单词在一句话里出现频率高，同时在其他句子里出现频率低，那么这个单词对这句话就非常重要，对于一个文档也是如此。
11 | 
12 | # 提取关键字
13 | ## 使用TextRank提取关键字
14 | Jieba提供了TextRank实现，直接使用默认参数就可以完成关键字的提取。
15 | 
16 |     # 引入TextRank关键词抽取接口
17 |     textrank = analyse.textrank
18 | 
19 |     # 基于TextRank算法进行关键词抽取
20 |     keywords = textrank(text)
21 |     # 输出抽取出的关键词
22 |     for keyword in keywords:
23 |         print keyword + "/"
24 | 
25 | Jieba提供了接口，设置关键字的个数以及提取的关键字的词性，比如：
26 | 
27 | - topK，指定关键字的个数
28 | - allowPOS，指定关键字的词性，常见的词性包括：
29 | 	- n 名词
30 | 	- nr 人名
31 | 	- ns 地名
32 | 	- nz 其它专名
33 | 	- t 时间词
34 | 	- v 动词
35 | 	- vd 副动词
36 | 	- vn 名动词
37 | 
38 | 我们只提取10个关键字，且只关注名词和动词以及名动词。
39 | 
40 |     # 基于TextRank算法进行关键词抽取
41 |     keywords = textrank(text,topK = 10, withWeight = False, allowPOS = ('n','ns','vn','v'))
42 |     
43 | 生成的结果如下所示，基本满足我们的需求了。
44 | 
45 | 	叙军/
46 | 	远程/
47 | 	空袭/
48 | 	电视台/
49 | 	战术/
50 | 	反击/
51 | 	空军/
52 | 	现代化/
53 | 	叙利亚/
54 | 	地对地/
55 | 	
56 | ## 使用TFIDF提取关键字
57 | 使用TFIDF提取关键字的方法和TextRank类似。
58 | 
59 |     # TFIDF
60 |     keywords_tfidf = analyse.extract_tags(text,topK = 10, withWeight = False, allowPOS = ('n','ns','vn','v','nz'))
61 | 
62 |     # 输出抽取出的关键词
63 |     for keyword in keywords_tfidf:
64 |         print keyword + "/"
65 | 
66 | 生成的结果如下所示，与TextRank相比差别不大。
67 | 
68 | 	叙军/
69 | 	地对地/
70 | 	空袭/
71 | 	弹道导弹/
72 | 	远程/
73 | 	叙利亚/
74 | 	电视台/
75 | 	反击/
76 | 	战术/
77 | 	撒手锏/
78 | 


--------------------------------------------------------------------------------
/NLP/tfidf/tfidf.md:
--------------------------------------------------------------------------------
 1 | # 词袋模型
 2 | 文本特征提取有两个非常重要的模型：
 3 | 
 4 | - 词集模型：单词构成的集合，集合自然每个元素都只有一个，也即词集中的每个单词都只有一个。
 5 | - 词袋模型：在词集的基础上如果一个单词在文档中出现不止一次，统计其出现的次数（频数）。
 6 | 两者本质上的区别，词袋是在词集的基础上增加了频率的维度
 7 | 
 8 | 库函数：from sklearn.feature_extraction.text import CountVectorizer
 9 | 
10 | # TF-IDF模型
11 | 文本处理领域还有一种特征提取方法，叫做TF-IDF模型（term frequency–inverse document frequency，词频与逆向文件频率）。TF-IDF是一种统计方法，用以评估某一字词对于一个文件集或一个语料库的重要程度。字词的重要性随着它在文件中出现的次数成正比增加，但同时会随着它在语料库中出现的频率成反比下降。TF-IDF加权的各种形式常被搜索引擎应用，作为文件与用户查询之间相关程度的度量或评级。
12 | 
13 | TF-IDF的主要思想是，如果某个词或短语在一篇文章中出现的频率TF(Term Frequency，词频)，词频高，并且在其他文章中很少出现，则认为此词或者短语具有很好的类别区分能力，适合用来分类。TF-IDF实际上是：TF * IDF。TF表示词条在文档d中出现的频率。IDF（inverse document frequency，逆向文件频率）的主要思想是：如果包含词条t的文档越少，也就是n越小，IDF越大，则说明词条t具有很好的类别区分能力。如果某一类文档C中包含词条t的文档数为m，而其他类包含t的文档总数为k，显然所有包含t的文档数n=m+k，当m大的时候，n也大，按照IDF公式得到的IDF的值会小，就说明该词条t类别区分能力不强。但是实际上，如果一个词条在一个类的文档中频繁出现，则说明该词条能够很好代表这个类的文本的特征，这样的词条应该给它们赋予较高的权重，并选来作为该类文本的特征词以区别与其他类文档。
14 | 
15 | 库函数：from sklearn.feature_extraction.text import TfidfTransformer
16 | 
17 | # 词汇表模型
18 | 词袋模型可以很好的表现文本由哪些单词组成，但是却无法表达出单词之间的前后关系，于是人们借鉴了词袋模型的思想，使用生成的词汇表对原有句子按照单词逐个进行编码。
19 | 
20 | 库函数来自于TensorFlow：tf.contrib.learn.preprocessing.VocabularyProcessor ()
21 | 
22 | 假设有如下句子需要处理：
23 | x_text =['i love you','me too']
24 | 
25 | 基于以上句子生成词汇表，并对'i me too'这句话进行编码：
26 | 
27 |     vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
28 |     vocab_processor.fit(x_text)
29 |     print next(vocab_processor.transform(['i me too'])).tolist()
30 |     x = np.array(list(vocab_processor.fit_transform(x_text)))
31 |     print x
32 |     
33 | 运行程序，x_text使用词汇表编码后的数据为：
34 |  [[1 2 3 0]
35 |  [4 5 0 0]]
36 | 'i me too'这句话编码的结果为：
37 | [1, 4, 5, 0]
38 | 


--------------------------------------------------------------------------------
/NLP/word2vec/word2vec.md:
--------------------------------------------------------------------------------
 1 | # 算法原理
 2 | ## CBOW 和 Skip-Gram 模型
 3 | 1. CBOW是上下文预测中间词
 4 | 2. Skip-gram是中间词预测上下文
 5 | - Q：w2v和dnn的区别是什么？
 6 | dnn训练太慢了，w2v做了优化
 7 | ## 使用霍夫曼树代替隐藏层和输出层神经元
 8 | 叶子结点是输出层，叶子结点个数为词汇表大小，内部节点起到隐藏层作用
 9 | 霍夫曼树中权值高的叶子结点越靠近根节点。权值高的节点编码值短，权值低的节点编码值长，由此树的带权路径最短，符合信息论。
10 | 不一样的是w2v中左子树编码1右子树编码0，左边权重不小于右边
11 | 左子树走是负类，霍夫曼编码1.判别正负的方法是使用sigmod函数
12 | 复杂度降低，符合贪心思想
13 | - Q：缺点？
14 | 如果遇到一个非常生僻的词，会在霍夫曼树下走很久。过于复杂
15 | ## negative sampling-负采样
16 | 对于某个词w，可以采样content(w)个词，我们现在采样和w不同的neg个词，则neg个词都是content(w)的负例。进行二元逻辑回归，得到负采样对应的每个词对应的模型参数和词向量。
17 | 不需要霍夫曼树就可以训练模型，还比较简单。
18 | 
19 | # 训练word2vec
20 | ## 中文切词
21 | 与处理英文不同，中文没有切词，需要使用jieba进行切词处理。
22 | ## 模型训练
23 | 库函数：pip install gensim
24 | 	def train_word2vec(filename):
25 | 	    #模型文件不存在才处理
26 | 	    if not os.path.exists(word2vec_file):
27 | 	        sentences = LineSentence(filename)
28 | 	        #sg=0 使用cbow训练, sg=1对低频词较为敏感
29 | 	        model = Word2Vec(sentences,
30 | 	                         size=n_dim, window=5, min_count=2, sg=1, workers=2)
31 | 	        model.save(word2vec_file)
32 | ## 测试效果
33 | model.most_similar(u'微信')
34 | model.most_similar(positive=[u'足球'+u'明星'])
35 | model.most_similar(positive=[u'球星'],negative=[u'明星'])
36 | model.wv.similarity(u'微信', u'陌陌')


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 项目介绍
 2 | 
 3 | 介绍作者在学习推荐系统过程中学习到的知识，包括：
 4 | - 算法原理
 5 | - 数据样本/数据集链接
 6 | - 代码实现（python，scala）
 7 | - 相关脚本（主要是hadoop streaming/kv/redis/excel等工具）
 8 | - 参考论文 
 9 | 
10 | # 目录规划
11 | ## 岗位要求
12 | - 能力介绍
13 | - 面试问题
14 | - 相关资料
15 | ## 数据处理
16 | - 从Hadoop提取数据，利用Mapreduce
17 | - 从KV提取数
18 | - 从Redis提取
19 | - 利用matplotlib对比实验
20 | - 生成excel
21 | 
22 | ## 爬虫
23 | - 去应用宝/小米商城/豌豆荚爬取app的topic
24 | - 爬取新闻
25 | - 爬取小说
26 | 
27 | ## NLP
28 | - 分词（jieba）
29 | - 词嵌入（Word2Vec）
30 | - FastText
31 | - TextCNN
32 | ## 用户画像
33 | 
34 | ## 召回
35 | - ItemCF(使用sklearn 版本和 不使用sklearn版本）
36 | - UserCF(使用sklearn 版本和 不使用sklearn版本）
37 | - LFM
38 | - Graph—Based
39 | 
40 | ## 冷启动
41 | - 基于地域
42 | - 基于兴趣
43 | 
44 | ## 个性化排序
45 | - FM
46 | - FFM
47 | - DeepFM
48 | - Wide & Deep
49 | 


--------------------------------------------------------------------------------
/bandit.md:
--------------------------------------------------------------------------------
 1 | Exploration and Exploitation(EE问题，探索与开发）是推荐领常见的问题，主要是为了平衡准确性和多样性。
 2 | Exploitation是指我们要迎合用户兴趣给他推荐他想要的，Exploitation指的是怕用户腻，所以要推一些新的东西，万一用户感兴趣呢是吧。
 3 | 
 4 | 多臂老虎机问题就很像推荐这个场景，我们不知道每个机器吐钱的分布，只能根据现有的知识摇，如果一直摇所知最高的就是Exploitation，但这个吐钱最多的未必是真的，还需要通过Exploitation来探索。
 5 | # 先验知识
 6 | 1. 累积遗憾
 7 | 错误的选择到底有多少遗憾，累计遗憾如下
 8 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119000259278.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
 9 | t表示轮数，r表示回报。左边可以认为是最理想情况下的总收益，右边是自己策略的总收益。
10 | 
11 | 2. Beta分布
12 | beta分布可以看作一个概率的概率分布，当你不知道一个东西的具体概率是多少时，它可以给出了所有概率出现的可能性大小。
13 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119000841403.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
14 | a和b分别代表在a+b次伯努利实验中成功和失败的次数
15 | # 朴素Bandit算法
16 | 先随机尝试若干次，统计每个臂的平均收益，选最多的那个一直摇
17 | # Epsilon-Greedy算法
18 | 选0-1间较小的数epsilon，每次以epsilon的概率选一个臂，以1-epsilon的概率选目前最好的臂，根据该次回报值对回报期望进行更新。
19 | 优点是有了探索的行为，并且可以调整权重，epsilon=0相当于完全不探索。
20 | 缺点是实际上epsilon理想情况下应该是动态的，前期多探索，等后期稳定了就少探索一点。
21 | # 汤普森采样
22 | 使用了beta分布，假设每个臂都有一个吐钱的概率p，同时每个臂都可以维护一个beta分布。
23 | 诶次选择臂的方式是：用每个臂现有的beta分布产生一个随机数b，选择所有数中最大的那个摇
24 | # UCB算法
25 | 解决Epsilon-Greedy的缺点，因为之前提到了Epsilon-Greedy并没有利用到之前的信息，包括臂吐钱的概率和探索的次数。
26 | 根据当前每个臂探索的次数和吐钱的次数，其实可以计算出每个臂吐钱的观测概率P'，真实的概率假设是P，核心就是计算gap
27 | 我们先确定两个事实：(1)每个臂选择的次数越多，gap越小 (2)对于没被选中的老虎机，gap会随着轮数的增大而增大
28 | ucb算法中p=p’+gap的计算如下：
29 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119002551164.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
30 | T是目前的试验次数，n是臂被试次数
31 | # LinUCB
32 | 传统的实现方法存在很大的缺陷，主要是缺乏用附加信息刻画决策过程的机制。
33 | 在推荐系统中，通常把待推荐的商品作为MAB问题的arm。UCB是context-free类的算法，没有充分利用推荐场景的上下文信息，为所有用户的选择展现商品的策略都是相同的，忽略了用户作为一个个活生生的个性本身的兴趣点、偏好、购买力等因素，因而，同一个商品在不同的用户、不同的情景下接受程度是不同的。故在实际的推荐系统中，context-free的MAB算法基本都不会被采用。
34 | 在LinUCB中，每一个arm维护一组参数，用户和每一个arm的组合可以形成一个上下文特征（上下文特征的特征维度为d），那么对于一个用户来说，在每个arm上所能够获得的期望收益如下：
35 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119003327116.png#pic_center)
36 | 对于一个老虎机来说，假设收集到了m次反馈，特征向量可以写作Da(维度为md)，假设我们收到的反馈为Ca(维度为m1)，那么通过求解下面的loss，我们可以得到当前每个老虎机的参数的最优解：
37 | 其实就是岭回归，我们很容易得到最优解为：
38 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004320393.png#pic_center)
39 | 既然是UCB方法的扩展，我们除了得到期望值外，我们还需要一个置信上界，但是，我们没法继续用Chernoff-Hoeffding Bound的定理来量化这个上界，幸运的是，这个上界已经被人找到了：
40 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004445232.png#pic_center)
41 | 因此，我们推荐的item就能够确定了：
42 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004512284.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
43 | 可以看到，我们在计算参数及最后推荐结果的时候，用到了以下几部分的信息：上下文特征x，用户的反馈c。而这些信息都是可以每次都存储下来的，因此在收集到了一定的信息之后，参数都可以动态更新，因此我们说LinUCB是一种在线学习方法。
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/cold_start/ContentItemKNN.py:
--------------------------------------------------------------------------------
 1 | def ContentItemKNN(train, content, K, N):
 2 |     '''
 3 |     :params: train, 训练数据
 4 |     :params: content, 物品内容信息
 5 |     :params: K, 取相似Top-K相似物品
 6 |     :params: N, 推荐TopN物品的个数
 7 |     :return: GetRecommendation, 获取推荐结果的接口
 8 |     '''
 9 |     
10 |     # 建立word-item倒排表
11 |     word_item = {}
12 |     for item in content:
13 |         for word in content[item]:
14 |             if word not in word_item:
15 |                 word_item[word] = {}
16 |             word_item[word][item] = 1
17 |             
18 |     for word in word_item:
19 |         for item in word_item[word]:
20 |             word_item[word][item] /= math.log(1 + len(word_item[word]))
21 | 
22 |     # 计算相似度
23 |     item_sim = {}
24 |     mo = {}
25 |     for word in word_item:
26 |         for u in word_item[word]:
27 |             if u not in item_sim:
28 |                 item_sim[u] = {}
29 |                 mo[u] = 0
30 |             mo[u] += word_item[word][u] ** 2
31 |             for v in word_item[word]:
32 |                 if u == v: continue
33 |                 if v not in item_sim[u]:
34 |                     item_sim[u][v] = 0
35 |                 item_sim[u][v] += word_item[word][u] * word_item[word][v]
36 |     for u in item_sim:
37 |         for v in item_sim[u]:
38 |             item_sim[u][v] /= math.sqrt(mo[u] * mo[v])
39 |                 
40 |     # 按照相似度排序
41 |     sorted_item_sim = {k: list(sorted(v.items(), \
42 |                                key=lambda x: x[1], reverse=True)) \
43 |                        for k, v in item_sim.items()}
44 |         
45 |     # 获取接口函数
46 |     def GetRecommendation(user):
47 |         items = {}
48 |         seen_items = set(train[user])
49 |         for item in train[user]:
50 |             for u, _ in sorted_item_sim[item][:K]:
51 |                 # 要去掉用户见过的
52 |                 if u not in seen_items:
53 |                     if u not in items:
54 |                         items[u] = 0
55 |                     items[u] += item_sim[item][u]
56 |         recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
57 |         return recs
58 |     
59 |     return GetRecommendation


--------------------------------------------------------------------------------
/cold_start/README.md:
--------------------------------------------------------------------------------
 1 | # 从数据角度
 2 | ## 用户冷启动：
 3 | 1. 提供非个性化的推荐
 4 | - 利用先验数据做推荐
 5 | 比如热门标的物（新上的热门电视剧），生活必需品（纸巾牙刷），专家知识（婚恋网站给男生推荐同城美女）
 6 | - 多样化选择
 7 | 比如视频，每个大类都推荐一个；图像视频文章等聚类后推荐多种
 8 | 2. 利用用户注册时提供的信息
 9 | - 人口统计学数据。比如年龄，性别，地域，学历，职业构建用户画像
10 | - 利用社交关系
11 | - 用户填写兴趣点
12 | 3. 基于内容推荐
13 | 推荐同类同标签的
14 | 4. 快速试探
15 | 多臂老虎机
16 | 5. 兴趣迁移
17 | 今日头条的用户特征迁移到抖音
18 | ## 物品冷启动：
19 | 1. 利用物品标签信息推荐
20 | - 利用物品跟用户行为的相似性
21 | 比如用户喜欢喜剧，物品属性是喜剧
22 | - 利用物品跟物品的相似性
23 | 比如物品a和物品b相似度高，把b推荐给喜欢a的
24 | 2. 快速试探
25 | 强化学习
26 | ## 系统冷启动：
27 | 1. 利用用户注册时提供的信息
28 | 2. 基于内容做推荐
29 | 3. 兴趣迁移
30 | 
31 | # 从模型角度
32 | ## 用户冷启动
33 | 1. CF推荐算法
34 | 启发式相似度
35 | 基于社群关系
36 | 2. 基于模型
37 | 矩阵分解
38 | 概率模型
39 | 玻尔兹曼机模型


--------------------------------------------------------------------------------
/cold_start/bandit.md:
--------------------------------------------------------------------------------
 1 | Exploration and Exploitation(EE问题，探索与开发）是推荐领常见的问题，主要是为了平衡准确性和多样性。
 2 | Exploitation是指我们要迎合用户兴趣给他推荐他想要的，Exploitation指的是怕用户腻，所以要推一些新的东西，万一用户感兴趣呢是吧。
 3 | 
 4 | 多臂老虎机问题就很像推荐这个场景，我们不知道每个机器吐钱的分布，只能根据现有的知识摇，如果一直摇所知最高的就是Exploitation，但这个吐钱最多的未必是真的，还需要通过Exploitation来探索。
 5 | # 先验知识
 6 | 1. 累积遗憾
 7 | 错误的选择到底有多少遗憾，累计遗憾如下
 8 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119000259278.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
 9 | t表示轮数，r表示回报。左边可以认为是最理想情况下的总收益，右边是自己策略的总收益。
10 | 
11 | 2. Beta分布
12 | beta分布可以看作一个概率的概率分布，当你不知道一个东西的具体概率是多少时，它可以给出了所有概率出现的可能性大小。
13 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119000841403.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
14 | a和b分别代表在a+b次伯努利实验中成功和失败的次数
15 | # 朴素Bandit算法
16 | 先随机尝试若干次，统计每个臂的平均收益，选最多的那个一直摇
17 | # Epsilon-Greedy算法
18 | 选0-1间较小的数epsilon，每次以epsilon的概率选一个臂，以1-epsilon的概率选目前最好的臂，根据该次回报值对回报期望进行更新。
19 | 优点是有了探索的行为，并且可以调整权重，epsilon=0相当于完全不探索。
20 | 缺点是实际上epsilon理想情况下应该是动态的，前期多探索，等后期稳定了就少探索一点。
21 | # 汤普森采样
22 | 使用了beta分布，假设每个臂都有一个吐钱的概率p，同时每个臂都可以维护一个beta分布。
23 | 诶次选择臂的方式是：用每个臂现有的beta分布产生一个随机数b，选择所有数中最大的那个摇
24 | # UCB算法
25 | 解决Epsilon-Greedy的缺点，因为之前提到了Epsilon-Greedy并没有利用到之前的信息，包括臂吐钱的概率和探索的次数。
26 | 根据当前每个臂探索的次数和吐钱的次数，其实可以计算出每个臂吐钱的观测概率P'，真实的概率假设是P，核心就是计算gap
27 | 我们先确定两个事实：(1)每个臂选择的次数越多，gap越小 (2)对于没被选中的老虎机，gap会随着轮数的增大而增大
28 | ucb算法中p=p’+gap的计算如下：
29 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119002551164.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
30 | T是目前的试验次数，n是臂被试次数
31 | # LinUCB
32 | 传统的实现方法存在很大的缺陷，主要是缺乏用附加信息刻画决策过程的机制。
33 | 在推荐系统中，通常把待推荐的商品作为MAB问题的arm。UCB是context-free类的算法，没有充分利用推荐场景的上下文信息，为所有用户的选择展现商品的策略都是相同的，忽略了用户作为一个个活生生的个性本身的兴趣点、偏好、购买力等因素，因而，同一个商品在不同的用户、不同的情景下接受程度是不同的。故在实际的推荐系统中，context-free的MAB算法基本都不会被采用。
34 | 在LinUCB中，每一个arm维护一组参数，用户和每一个arm的组合可以形成一个上下文特征（上下文特征的特征维度为d），那么对于一个用户来说，在每个arm上所能够获得的期望收益如下：
35 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119003327116.png#pic_center)
36 | 对于一个老虎机来说，假设收集到了m次反馈，特征向量可以写作Da(维度为md)，假设我们收到的反馈为Ca(维度为m1)，那么通过求解下面的loss，我们可以得到当前每个老虎机的参数的最优解：
37 | 其实就是岭回归，我们很容易得到最优解为：
38 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004320393.png#pic_center)
39 | 既然是UCB方法的扩展，我们除了得到期望值外，我们还需要一个置信上界，但是，我们没法继续用Chernoff-Hoeffding Bound的定理来量化这个上界，幸运的是，这个上界已经被人找到了：
40 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004445232.png#pic_center)
41 | 因此，我们推荐的item就能够确定了：
42 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201119004512284.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70#pic_center)
43 | 可以看到，我们在计算参数及最后推荐结果的时候，用到了以下几部分的信息：上下文特征x，用户的反馈c。而这些信息都是可以每次都存储下来的，因此在收集到了一定的信息之后，参数都可以动态更新，因此我们说LinUCB是一种在线学习方法。
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/data_tools/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/.DS_Store


--------------------------------------------------------------------------------
/data_tools/Excel/add_info_to_excel/data/docid_ttaccount:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Excel/add_info_to_excel/data/docid_ttaccount


--------------------------------------------------------------------------------
/data_tools/Excel/add_info_to_excel/data/result_log_2019110811_12_ab_0.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Excel/add_info_to_excel/data/result_log_2019110811_12_ab_0.xlsx


--------------------------------------------------------------------------------
/data_tools/Excel/add_info_to_excel/data/result_log_2019110811_12_ab_0_m.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Excel/add_info_to_excel/data/result_log_2019110811_12_ab_0_m.xlsx


--------------------------------------------------------------------------------
/data_tools/Excel/add_info_to_excel/modify_excel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Excel/add_info_to_excel/modify_excel.py


--------------------------------------------------------------------------------
/data_tools/Excel/add_info_to_excel/run.sh:
--------------------------------------------------------------------------------
1 | python modify_excel.py ./data/docid_ttaccount ./data/result_log_2019110811_12_ab_0.xlsx ./data/result_log_2019110811_12_ab_0_m.xlsx
2 | 


--------------------------------------------------------------------------------
/data_tools/Hadoop/get_docid_pv_cl_by_hadoop/hadoop_script/mapper.py:
--------------------------------------------------------------------------------
 1 | #coding=gbk
 2 | import sys
 3 | import time
 4 | 
 5 | for line in sys.stdin:
 6 |     tups = line.strip().decode("gbk","ignore").split("\t")
 7 |     if len(tups) < 36:
 8 |         continue
 9 |     if tups[0] == "req":
10 |         (req,mid,tm,action,topic,mark,title,keywords,userinfo,imsi,url,account,channel,art_source,OS,account_openid,abtestid,sub_topic,image_type,read_duration,position,app_ver,aduser_flag,location,pagetime,rec_reason,adid,vulgar,sub_list,ip,action_source,recall_word,video_type,channel_id,doc_id,product) = tups[0:36]
11 |         if channel_id != "1":
12 |             continue
13 |         #if topic == "outer_video":
14 |         #    continue
15 |         #if ".mp4" in url:
16 |         #    continue
17 |         if art_source == "15":
18 |             continue
19 |         if not mid:
20 |             continue
21 |         if not doc_id:
22 |             continue
23 |         if doc_id.startswith("AD_") == True:
24 |             continue
25 |         if action != "6":
26 |             continue
27 |         #try:
28 |         #    duration = float(read_duration)
29 |         #except:
30 |         #    continue
31 |         #if duration >= 0 and duration < 3:
32 |         #    continue
33 |         if product != sys.argv[1]:
34 |             continue
35 |         if int(app_ver) < 6511:
36 |             continue
37 |         if rec_reason != '931':
38 |             continue
39 |         output = doc_id + "\tcl"
40 |         print output.encode("gbk","ignore")
41 |     if len(tups) < 37:
42 |         continue
43 |     if tups[0] == "resp":
44 |         (resp,mid,tm,article_cnt,num,mark_tag,title,reason,read_num,topic,keywords,pub_time,article_template,img_list,url,account,channel,flag,openid,ab_test_id,sub_topic,userinfo,position,version,aduser_flag,user_location,pagetime,rec_reason,ad_id,vulgar,book_word,ip,recall_word,video_type,channel_id,doc_id,product) = tups[0:37]
45 |         if channel_id != "1":
46 |             continue
47 |         #if topic == "outer_video":
48 |         #    continue
49 |         #if ".mp4" in url:
50 |         #    continue
51 |         if not mid:
52 |             continue
53 |         if not doc_id:
54 |             continue
55 |         if doc_id.startswith("AD_") == True:
56 |             continue
57 |         if product != sys.argv[1]:
58 |             continue
59 |         if int(version) < 6511:
60 |             continue
61 |         if rec_reason != '931':
62 |             continue
63 |         output = doc_id + "\tpv"
64 |         print output.encode("gbk","ignore")
65 | 


--------------------------------------------------------------------------------
/data_tools/Hadoop/get_docid_pv_cl_by_hadoop/hadoop_script/reducer.py:
--------------------------------------------------------------------------------
 1 | #coding=gbk
 2 | import sys
 3 | import traceback
 4 | import os
 5 | import time
 6 | import math
 7 | 
 8 | docid = ""
 9 | pre_docid = ""
10 | pv = 0
11 | cl = 0
12 | 
13 | for line in sys.stdin:
14 |     line = line.strip().decode("gbk","ignore")
15 |     item = line.split('\t')
16 |     if len(item) != 2:
17 |         continue
18 |     docid = item[0]
19 |     if docid != pre_docid and pre_docid != "":
20 |         output = pre_docid + "\t" + str(pv) + "\t" + str(cl)
21 |         print output.encode("gbk","ignore")
22 |         pv = 0
23 |         cl = 0
24 |     if item[1] == "pv":
25 |         pv += 1
26 |     elif item[1] == "cl":
27 |         cl += 1
28 |     pre_docid = docid
29 | 
30 | if docid != "":
31 |     print docid + "\t" + str(pv) + "\t" + str(cl)
32 | 


--------------------------------------------------------------------------------
/data_tools/Hadoop/get_docid_pv_cl_by_hadoop/local_test_hadoop.sh:
--------------------------------------------------------------------------------
1 | head -n 100000 data/part-00000 | python hadoop_script/mapper.py sgsapp | sort | python hadoop_script/reducer.py
2 | 


--------------------------------------------------------------------------------
/data_tools/Hadoop/get_docid_pv_cl_by_hadoop/run_hadoop.sh:
--------------------------------------------------------------------------------
 1 | start_day_hour=`date -d "-2 hours" +"%Y%m%d%H"`
 2 | start_day_hour=`date -d "-1 days" +"%Y%m%d"`23
 3 | start_day_hour_2=${start_day_hour:0:8}" "${start_day_hour:8:10}
 4 | echo ${start_day_hour}
 5 | echo ${start_day_hour_2}
 6 | 
 7 | product='sgsapp'
 8 | 
 9 | interval=24
10 | 
11 | HADOOP_DATA_DIR=**
12 | HADOOP_OUTPUT_DIR=***/${start_day_hour}
13 | 
14 | hadoop fs -test -e ${HADOOP_DATA_DIR}
15 | if [ $? -ne 0 ];then
16 |     echo "hadoop data dir ${HADOOP_DATA_DIR} does not exists!"
17 |     exit -1
18 | fi
19 | 
20 | hadoop fs -test -e ${HADOOP_OUTPUT_DIR}
21 | if [ $? -eq 0 ];then
22 |     hadoop fs -rmr ${HADOOP_OUTPUT_DIR}
23 | fi
24 | 
25 | for((i=0;i<${interval};i++))
26 | do
27 |     
28 |     tm=`date -d "${start_day_hour_2} -${i} hours" +"%Y%m%d%H"`
29 |     file_dir=${HADOOP_DATA_DIR}/online1_${tm}
30 |     #hadoop fs -test -e ${file_dir}
31 |     #if [ $? -eq 0 ];then
32 |     #    inputs=${inputs}" -input ${file_dir}"
33 |     #fi
34 |     inputs=${inputs}" -input ${file_dir}"
35 | done
36 | 
37 | echo "Hadoop input dir: ${inputs}"
38 | 
39 | 
40 | hadoop org.apache.hadoop.streaming.HadoopStreaming \
41 | -files hadoop_script \
42 | -D mapred.map.tasks=64 \
43 | -D mapred.reduce.tasks=64 \
44 | -D stream.num.map.output.key.fields=1 \
45 | -D num.key.fields.for.partition=1 \
46 | -D mapred.job.name=get_user_mid \
47 | -D mapred.output.compress=true \
48 | -D mapred.output.compression.type=BLOCK \
49 | -D mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec  \
50 | -D mapred.task.timeout=3600000 \
51 | -D mapreduce.map.memory.mb=2048 \
52 | -D mapreduce.reduce.memory.mb=2048 \
53 | -mapper "python hadoop_script/mapper.py ${product}" \
54 | -reducer "python hadoop_script/reducer.py" \
55 | ${inputs} \
56 | -output ${HADOOP_OUTPUT_DIR} \
57 | -inputformat KeyValueTextInputFormat
58 | 
59 | 
60 | hadoop fs -get ${HADOOP_OUTPUT_DIR} .
61 | lzop -cd ${start_day_hour}/*.lzo > data/${product}_${start_day_hour}
62 | rm -rf ${start_day_hour}
63 | 


--------------------------------------------------------------------------------
/data_tools/KV/get_doc_text_by_kv/KVClient.py:
--------------------------------------------------------------------------------
  1 | #encoding=gbk
  2 | import sys
  3 | import httplib
  4 | import json
  5 | import time
  6 | import traceback
  7 | import re
  8 | import hashlib
  9 | 
 10 | KV_DEFAULT_KEY = "-1"
 11 | KV_PER_SEARCH = 200
 12 | 
 13 | def get_title_md5(title):
 14 |     change_pos = re.compile('"|��|��|,|\.|��|~|��|,|\?|��|:|\(|\)|\[|\]|��|��|-|��|��|��|��|!|;|��')
 15 |     title = change_pos.sub("", title)
 16 |     title_md5 = hashlib.md5(title).hexdigest()
 17 |     return title_md5
 18 | 
 19 | def json_unicode_to_gbk(jo_unicode):
 20 |     if type(jo_unicode) == dict:
 21 |         jo_gbk = {}
 22 |         for key in jo_unicode:
 23 |              value_gbk = json_unicode_to_gbk(jo_unicode[key])
 24 |              jo_gbk[key.encode('gbk')] = value_gbk
 25 |         return jo_gbk
 26 |     elif type(jo_unicode) == list:
 27 |         return [json_unicode_to_gbk(x) for x in jo_unicode]
 28 |     elif type(jo_unicode) == unicode:
 29 |         return jo_unicode.encode('gbk','ignore')
 30 |     else:return jo_unicode
 31 | 
 32 | def transform_key(key):
 33 |     if not key:key = KV_DEFAULT_KEY
 34 |     return key
 35 | 
 36 | #get title by kv
 37 | def mget_title_by_kv(id_list):
 38 |     id = [transform_key(x) for x in id_list]
 39 |     conn = httplib.HTTPConnection("kv.sogou-op.org")
 40 |     request_len = KV_PER_SEARCH
 41 |     total_len = len(id_list)
 42 |     result = []
 43 |     for index in  range(0,total_len/request_len + 1):
 44 |         start_index = index*request_len
 45 |         end_index   = (index+1)*request_len
 46 | 
 47 |         tmp_id_list = id_list[start_index:end_index]
 48 |         if len(tmp_id_list) == 0:continue
 49 |         conn.request("POST" ,url="/mget/110216/article_forward_index/", body="\n".join(tmp_id_list))
 50 |         resp = conn.getresponse()
 51 |         if resp.status == 200:
 52 |             resp_str = resp.read()
 53 |             tups = resp_str.strip('\r\n').split('\r\n$')
 54 |             for term in tups[1:]:
 55 |                 pos = term.find('\r\n')
 56 |                 if pos != -1:
 57 |                     article_str = term[pos+len('\r\n'):]
 58 |                     try:
 59 |                         article = json.loads(article_str,encoding="gbk")
 60 |                         title = article.get("title","").encode('gbk')
 61 |                     except:
 62 |                         traceback.print_exc()
 63 |                         title = None
 64 |                     result.append(title)
 65 |                     continue
 66 |                 result.append("")
 67 |         else:
 68 |             print>>sys.stderr,"[ERROR] KV PI Request, ERROR CODE %s"%(resp.status)
 69 |             result.extend([""] * len(tmp_id_list))
 70 |     return result
 71 | 
 72 | 
 73 | def mget_forward_index(id_list,attr_list=None):
 74 |     id = [transform_key(x) for x in id_list]
 75 |     conn = httplib.HTTPConnection("***")
 76 |     request_len = KV_PER_SEARCH
 77 |     total_len = len(id_list)
 78 |     result = []
 79 |     for index in  range(0,total_len/request_len + 1):
 80 |         start_index = index*request_len
 81 |         end_index   = (index+1)*request_len
 82 | 
 83 |         tmp_id_list = id_list[start_index:end_index]
 84 |         if len(tmp_id_list) == 0:continue
 85 |         conn.request("POST" ,url="***", body="\n".join(tmp_id_list))
 86 |         resp = conn.getresponse()
 87 |         if resp.status == 200:
 88 |             resp_str = resp.read()
 89 |             tups = resp_str.strip('\r\n').split('\r\n$')
 90 |             for term in tups[1:]:
 91 |                 pos = term.find('\r\n')
 92 |                 if pos != -1:
 93 |                     article_str = term[pos+len('\r\n'):]
 94 |                     try:
 95 |                         article = json.loads(article_str,encoding="gbk")
 96 |                         article = json_unicode_to_gbk(article)
 97 |                         #article = eval(article_str)
 98 |                         if attr_list:
 99 |                             for attr in article.keys():
100 |                                 if not attr in attr_list:del article[attr]
101 | 
102 |                     except:
103 |                         traceback.print_exc()
104 |                         article = None 
105 |                 else:
106 |                     article = None
107 |                 result.append(article)
108 |         else:
109 |             print>>sys.stderr,"[ERROR] KV PI Request, ERROR CODE %s"%(resp.status)
110 |             result.extend([None] * len(tmp_id_list))
111 |         time.sleep(0.5)
112 |     return result
113 | 
114 | def get_kv_doc(id_list):
115 |     ret = mget_forward_index(id_list)
116 |     return ret
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/data_tools/KV/get_doc_text_by_kv/KVClient.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/KV/get_doc_text_by_kv/KVClient.pyc


--------------------------------------------------------------------------------
/data_tools/KV/get_doc_text_by_kv/get_doc_text_multiprocessing.py:
--------------------------------------------------------------------------------
 1 | #coding:gbk
 2 | import sys
 3 | import redis
 4 | import KVClient
 5 | import traceback
 6 | import time,datetime
 7 | from multiprocessing import Pool
 8 | 
 9 | def get_doc_text(docid_list):
10 |     docid_text_list= []
11 |     sub_docid_list = []
12 |     for i in range(len(docid_list)):
13 |         sub_docid_list.append(docid_list[i])
14 |         if len(sub_docid_list) == 200 or i == len(docid_list) - 1:
15 |             sub_docid_dict = dict(zip(sub_docid_list, sub_docid_list))
16 |             forward_list = []
17 |             try:
18 |                 forward_list = KVClient.mget_forward_index(sub_docid_list)
19 |             except:
20 |                 traceback.print_exc()
21 |                 continue
22 |             sub_docid_list = []
23 |             for forward in forward_list:
24 |                 _id = ''
25 |                 video_sig = ''
26 |                 title = ''
27 |                 url = ''
28 |                 account_weight = ''
29 |                 if forward is not None:
30 |                     if forward.has_key('_id'):
31 |                         _id = forward['_id']
32 |                     if forward.has_key('video_sig'):
33 |                         video_sig = str(forward['video_sig'])
34 |                     if forward.has_key('title'):
35 |                         title = forward['title']
36 |                     if forward.has_key('url'):
37 |                         url = forward['url']
38 |                     if forward.has_key('account_weight'):
39 |                         account_weight = str(forward['account_weight'])
40 |                     output = _id + '#@#' + video_sig + '#@#' + title + '#@#' + url  + '#@#' + account_weight
41 |                     docid_text_list.append(output)
42 |     return docid_text_list
43 | 
44 | 
45 | def read_data():
46 |     trunk = 10000
47 |     icount = 0 
48 |     texts = []
49 |     for line in file(sys.argv[1]): 
50 |         texts.append(line)
51 |         icount += 1
52 |         if icount % trunk == 0:
53 |             yield texts
54 |             texts = []
55 |     yield texts
56 |  
57 | 
58 | def parallel_deal():
59 |     texts = read_data()
60 |     fw_text = open(sys.argv[2], "w")
61 |     cpus = 10
62 |     ichunk = 0
63 |     for t in texts:
64 |         pool = Pool(cpus)
65 |         step = int(len(t) / cpus)
66 |         tmp = [t[i:i+step] for i in range(0, len(t) , step)]
67 |         results = pool.map(get_doc_text, tmp)
68 |         pool.close()
69 |         pool.join()
70 |         for r in results:
71 |             for i in r:
72 |                 fw_text.write(i + "\n")
73 |         ichunk += 1
74 |         print "finished samples:",len(t) * ichunk
75 |     fw_text.close()
76 |  
77 |  
78 | if __name__ == "__main__":
79 |     parallel_deal()
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/data_tools/KV/get_doc_text_by_kv/run.sh:
--------------------------------------------------------------------------------
1 | python get_doc_text_multiprocessing.py data/total_docs data/docid_text
2 | 


--------------------------------------------------------------------------------
/data_tools/Matplotlib/matplotlib_ctr/ctr.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Matplotlib/matplotlib_ctr/ctr.jpg


--------------------------------------------------------------------------------
/data_tools/Matplotlib/matplotlib_ctr/matplotlib_ctr.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('agg')
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | y1=[0.0098, 0.0101, 0.0103, 0.0107, 0.0102, 0.0102, 0.009, 0.0096, 0.0093, 0.01, 0.0095] 
 6 | y2=[0.0098, 0.0098, 0.0101, 0.0106, 0.0093, 0.0094, 0.0084, 0.0089, 0.0093, 0.0091, 0.0084]
 7 | y3=[0.0099, 0.0099, 0.0104, 0.0111, 0.0102, 0.0099, 0.0092, 0.0095, 0.0099, 0.0101, 0.01]
 8 | 
 9 | date_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
10 | date_labels = ['11-05', '11-06', '11-07', '11-08', '11-09', '11-10', '11-11', '11-12', '11-13', '11-14', '11-15']
11 | 
12 | ctr_values = [0, 0.0075, 0.01, 0.0125, 0.015]
13 | ctr_labels = ['0', '0.0075', '0.01', '0.0125', '0.015']
14 | 
15 | plt.title('abtest id 2 10 vs 5')
16 | plt.xlabel('date')
17 | plt.ylabel('ctr')
18 |  
19 | plt.plot(date_values, y1, 'b', label='abtest_id:2')
20 | plt.plot(date_values, y2, 'r',label='abtest_id:5')
21 | plt.plot(date_values, y3, 'g', label='abtest_id:10')
22 | 
23 | plt.xticks(date_values, date_labels)
24 | #plt.yticks(ctr_values, ctr_labels)
25 | 
26 | for a, b in zip(date_values, y1):
27 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
28 | for a, b in zip(date_values, y2):
29 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
30 | for a, b in zip(date_values, y3):
31 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
32 | 
33 | 
34 | plt.legend(bbox_to_anchor=[0.7, 0.75]) 
35 | plt.grid()
36 | plt.show()
37 | plt.savefig('ctr.jpg')
38 | 


--------------------------------------------------------------------------------
/data_tools/Matplotlib/matplotlib_ctr/matplotlib_rank.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('agg')
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | y1=[5, 17, 17, 17, 19, 19, 19, 20, 12, 19, 19] 
 6 | y2=[5, 6, 15, 16, 5, 11, 13, 17, 12, 9, 3]
 7 | y3=[6, 7, 20, 20, 20, 18, 20, 19, 20, 20, 20]
 8 | 
 9 | date_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
10 | date_labels = ['11-05', '11-06', '11-07', '11-08', '11-09', '11-10', '11-11', '11-12', '11-13', '11-14', '11-15']
11 | 
12 | rank_values = [0, 5, 10, 15, 20]
13 | rank_labels = ['0', '5', '10', '15', '20']
14 | 
15 | plt.title('abtest id 2 10 vs 5')
16 | plt.xlabel('date')
17 | plt.ylabel('rank')
18 |  
19 | plt.plot(date_values, y1, 'b', label='abtest_id:2')
20 | plt.plot(date_values, y2, 'r',label='abtest_id:5')
21 | plt.plot(date_values, y3, 'g', label='abtest_id:10')
22 | 
23 | plt.xticks(date_values, date_labels)
24 | #plt.yticks(rank_values, rank_labels)
25 | 
26 | for a, b in zip(date_values, y1):
27 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
28 | for a, b in zip(date_values, y2):
29 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
30 | for a, b in zip(date_values, y3):
31 |     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
32 | 
33 | 
34 | plt.legend(bbox_to_anchor=[0.5, 0.25]) 
35 | plt.grid()
36 | plt.show()
37 | plt.savefig('rank.jpg')
38 | 


--------------------------------------------------------------------------------
/data_tools/Matplotlib/matplotlib_ctr/rank.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Matplotlib/matplotlib_ctr/rank.jpg


--------------------------------------------------------------------------------
/data_tools/Redis/get_doc_hot/get_doc_hot.py:
--------------------------------------------------------------------------------
  1 | #coding:gbk
  2 | import sys
  3 | import traceback
  4 | from multiprocessing import Pool
  5 | from multiprocessing.pool import ThreadPool
  6 | import redis
  7 | from ttypes import HotFeature
  8 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol
  9 | from thrift.transport.TTransport import TMemoryBuffer
 10 | 
 11 | trunk = 20000
 12 | 
 13 | class Hot:
 14 |     def __init__(self, docid = '', pv = 0, cl = 0, readtime = 0, coldstart_pv = 0, coldstart_cl = 0):
 15 |         self.docid = docid
 16 |         self.pv = pv
 17 |         self.cl = cl
 18 |         self.readtime = readtime
 19 |         self.coldstart_pv = coldstart_pv
 20 |         self.coldstart_cl = coldstart_cl
 21 |     def to_string(self):
 22 | 	return  self.docid + '\t' + str(self.pv) + ' ' + str(self.cl) + ' ' + str('%.2f'%self.readtime) +  ' ' + str(self.cl/(self.pv+0.00001)) + ' ' + str(self.coldstart_pv) + ' ' + str(self.coldstart_cl) + ' ' + str(self.coldstart_cl/(self.coldstart_pv+0.00001))
 23 | 
 24 | 
 25 | def get_doc_hot_by_thread(docid_list):
 26 |     docid_hot_list = []
 27 |     r = redis.Redis(host='***', port=1680, password='**', charset='gbk')
 28 |     redis_result_list = []
 29 |     try:
 30 |         redis_result_list = r.mget(docid_list)
 31 |         if redis_result_list is not None and len(redis_result_list) == len(docid_list):
 32 |             for i in range(len(redis_result_list)):
 33 |                 if redis_result_list[i] is None or len(redis_result_list[i]) == 0:
 34 |                     continue
 35 |                 docid = docid_list[i]
 36 |                 tMemory_o = TMemoryBuffer(redis_result_list[i])
 37 |                 tBinaryProtocol_o = TBinaryProtocol(tMemory_o)
 38 |                 hot_feature = HotFeature()
 39 |                 hot_feature.read(tBinaryProtocol_o)
 40 |                 pv = hot_feature.app_show_num if hot_feature.app_show_num is not None else 0
 41 |                 cl = hot_feature.app_read_num if hot_feature.app_read_num is not None else 0
 42 |                 readtime = hot_feature.app_read_duration_double if hot_feature.app_read_duration_double is not None else 0
 43 |                 coldstart_pv = hot_feature.coldstart_show_num if hot_feature.coldstart_show_num is not None else 0
 44 |                 coldstart_cl = hot_feature.coldstart_read_num if hot_feature.coldstart_read_num is not None else 0
 45 | 
 46 |                 docid_hot_list.append((docid, pv, cl, readtime, coldstart_pv, coldstart_cl))
 47 |     except:
 48 |         traceback.print_exc()
 49 |     return docid_hot_list
 50 | 
 51 | 
 52 | def get_doc_hot(docid_list):
 53 |     docid_hot_dict = {}
 54 |     pool = ThreadPool(processes=10)
 55 |     step = 200
 56 |     tmp = [docid_list[i:i+step] for i in range(0, len(docid_list), step)]
 57 |     map_results = pool.map(get_doc_hot_by_thread, tmp)
 58 |     for r in map_results:
 59 |         for docid, pv, cl, readtime, coldstart_pv, coldstart_cl in r:
 60 |             docid_hot_dict[docid] = Hot(docid, pv, cl, readtime, coldstart_pv, coldstart_cl)
 61 |     return docid_hot_dict
 62 |     
 63 | 
 64 | def read_data():
 65 |     icount = 0 
 66 |     texts = []
 67 |     for line in file(sys.argv[1]): 
 68 |         texts.append(line.strip())
 69 |         icount += 1
 70 |         if icount % trunk == 0:
 71 |             yield texts
 72 |             texts = []
 73 |     yield texts
 74 |  
 75 | 
 76 | def parallel_deal():
 77 |     global trunk
 78 |     texts = read_data()
 79 |     cpus = 10
 80 |     ichunk = 0
 81 |     fw = open(sys.argv[2], 'w')
 82 |     for t in texts:
 83 |         pool = Pool(cpus)
 84 |         step = 2000
 85 |         results = []
 86 |         for i in range(0, len(t), step):
 87 |             sub_t = t[i : min(i + step, len(t))]
 88 |             results.append(pool.apply_async(get_doc_hot, (sub_t,)))
 89 |         pool.close()
 90 |         pool.join()
 91 |         ichunk += 1
 92 |         print "finished samples:", trunk * (ichunk - 1) + len(t)
 93 |         for i in xrange(len(results)):
 94 |             for docid, hot in results[i].get().items():
 95 | 		fw.write(hot.to_string() + '\n')
 96 |     fw.close()
 97 |     
 98 |  
 99 | if __name__ == "__main__":
100 |     parallel_deal()
101 | 
102 | 


--------------------------------------------------------------------------------
/data_tools/Redis/get_doc_hot/run.sh:
--------------------------------------------------------------------------------
1 | python get_doc_hot.py ./data/docid ./data/docid_hot
2 | 


--------------------------------------------------------------------------------
/data_tools/Redis/get_doc_hot/ttypes.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/data_tools/Redis/get_doc_hot/ttypes.pyc


--------------------------------------------------------------------------------
/job request/ability.md:
--------------------------------------------------------------------------------
 1 | # 工程能力
 2 | - 基础技术：shell、awk、python、java，C++
 3 | - 大数据处理：hive、hadoop、spark、spark streaming
 4 | - 高速缓存：redis、couchbase，kafka
 5 | - 在线服务：spring boot、python flask、ngnix5
 6 | - 数据挖掘与模型训练：spark mllib、sklearn、tensorflow
 7 | - 模型在线服务：tensorflow serving、docker、python+tornado
 8 | - 后台效果报表：html/css/jquery、echarts/pyecharts
 9 | - 代码管理：git
10 | 


--------------------------------------------------------------------------------
/job request/mianshi.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/job request/mianshi.md


--------------------------------------------------------------------------------
/job request/ziliao.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/job request/ziliao.md


--------------------------------------------------------------------------------
/rank/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/.DS_Store


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/.DS_Store


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/DataReader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | class FeatureDictionary(object):
 4 |     def __init__(self,trainfile=None,testfile=None,
 5 |                  dfTrain=None,dfTest=None,numeric_cols=[],
 6 |                  ignore_cols=[]):
 7 |         assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set"
 8 |         assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set"
 9 |         assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set"
10 |         assert not ((testfile is not None) and (dfTest is not None)), "only one can be set"
11 | 
12 |         self.trainfile = trainfile
13 |         self.testfile = testfile
14 |         self.dfTrain = dfTrain
15 |         self.dfTest = dfTest
16 |         self.numeric_cols = numeric_cols
17 |         self.ignore_cols = ignore_cols
18 |         self.gen_feat_dict()
19 | 
20 | 
21 | 
22 | 
23 |     def gen_feat_dict(self):
24 |         if self.dfTrain is None:
25 |             dfTrain = pd.read_csv(self.trainfile)
26 | 
27 |         else:
28 |             dfTrain = self.dfTrain
29 | 
30 |         if self.dfTest is None:
31 |             dfTest = pd.read_csv(self.testfile)
32 | 
33 |         else:
34 |             dfTest = self.dfTest
35 | 
36 |         df = pd.concat([dfTrain,dfTest])
37 | 
38 |         self.feat_dict = {}
39 |         tc = 0
40 |         for col in df.columns:
41 |             if col in self.ignore_cols:
42 |                 continue
43 |             if col in self.numeric_cols:
44 |                 self.feat_dict[col] = tc
45 |                 tc += 1
46 | 
47 |             else:
48 |                 us = df[col].unique()
49 |                 print(us)
50 |                 self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc)))
51 |                 tc += len(us)
52 | 
53 |         self.feat_dim = tc
54 | 
55 | 
56 | class DataParser(object):
57 |     def __init__(self,feat_dict):
58 |         self.feat_dict = feat_dict
59 | 
60 |     def parse(self,infile=None,df=None,has_label=False):
61 |         assert not ((infile is None) and (df is None)), "infile or df at least one is set"
62 |         assert not ((infile is not None) and (df is not None)), "only one can be set"
63 | 
64 | 
65 |         if infile is None:
66 |             dfi = df.copy()
67 |         else:
68 |             dfi = pd.read_csv(infile)
69 | 
70 |         if has_label:
71 |             y = dfi['target'].values.tolist()
72 |             dfi.drop(['id','target'],axis=1,inplace=True)
73 |         else:
74 |             ids = dfi['id'].values.tolist()
75 |             dfi.drop(['id'],axis=1,inplace=True)
76 |         # dfi for feature index
77 |         # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
78 |         dfv = dfi.copy()
79 |         for col in dfi.columns:
80 |             if col in self.feat_dict.ignore_cols:
81 |                 dfi.drop(col,axis=1,inplace=True)
82 |                 dfv.drop(col,axis=1,inplace=True)
83 |                 continue
84 |             if col in self.feat_dict.numeric_cols:
85 |                 dfi[col] = self.feat_dict.feat_dict[col]
86 |             else:
87 |                 dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
88 |                 dfv[col] = 1.
89 | 
90 |         xi = dfi.values.tolist()
91 |         xv = dfv.values.tolist()
92 | 
93 |         if has_label:
94 |             return xi,xv,y
95 |         else:
96 |             return xi,xv,ids
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/DeepFM.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import tensorflow.compat.v1 as tf
  4 | tf.disable_v2_behavior()
  5 | from time import time
  6 | from sklearn.base import BaseEstimator, TransformerMixin
  7 | from sklearn.metrics import roc_auc_score
  8 | 
  9 | class DeepFM(BaseEstimator, TransformerMixin):
 10 | 
 11 |     def __init__(self, feature_size, field_size,
 12 |                  embedding_size=8, dropout_fm=[1.0, 1.0],
 13 |                  deep_layers=[32, 32], dropout_deep=[0.5, 0.5, 0.5],
 14 |                  deep_layer_activation=tf.nn.relu,
 15 |                  epoch=10, batch_size=256,
 16 |                  learning_rate=0.001, optimizer="adam",
 17 |                  batch_norm=0, batch_norm_decay=0.995,
 18 |                  verbose=False, random_seed=2016,
 19 |                  use_fm=True, use_deep=True,
 20 |                  loss_type="logloss", eval_metric=roc_auc_score,
 21 |                  l2_reg=0.0, greater_is_better=True):
 22 |         assert (use_fm or use_deep)
 23 |         assert loss_type in ["logloss", "mse"], \
 24 |             "loss_type can be either 'logloss' for classification task or 'mse' for regression task"
 25 | 
 26 |         self.feature_size = feature_size
 27 |         self.field_size = field_size
 28 |         self.embedding_size = embedding_size
 29 | 
 30 |         self.dropout_fm = dropout_fm
 31 |         self.deep_layers = deep_layers
 32 |         self.dropout_dep = dropout_deep
 33 |         self.deep_layers_activation = deep_layer_activation
 34 |         self.use_fm = use_fm
 35 |         self.use_deep = use_deep
 36 |         self.l2_reg = l2_reg
 37 | 
 38 |         self.epoch = epoch
 39 |         self.batch_size = batch_size
 40 |         self.learning_rate = learning_rate
 41 |         self.optimizer_type = optimizer
 42 | 
 43 |         self.batch_norm = batch_norm
 44 |         self.batch_norm_decay = batch_norm_decay
 45 | 
 46 |         self.verbose = verbose
 47 |         self.random_seed = random_seed
 48 |         self.loss_type = loss_type
 49 |         self.eval_metric = eval_metric
 50 |         self.greater_is_better = greater_is_better
 51 |         self.train_result,self.valid_result = [],[]
 52 | 
 53 |         self._init_graph()
 54 | 
 55 |     def _init_graph(self):
 56 |         self.graph = tf.Graph()
 57 |         with self.graph.as_default():
 58 |             tf.set_random_seed(self.random_seed)
 59 | 
 60 |             self.feat_index = tf.placeholder(tf.int32,
 61 |                                              shape=[None,None],
 62 |                                              name='feat_index')
 63 |             self.feat_value = tf.placeholder(tf.float32,
 64 |                                            shape=[None,None],
 65 |                                            name='feat_value')
 66 | 
 67 |             self.label = tf.placeholder(tf.float32,shape=[None,1],name='label')
 68 |             self.dropout_keep_fm = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_fm')
 69 |             self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_deep_deep')
 70 |             self.train_phase = tf.placeholder(tf.bool,name='train_phase')
 71 | 
 72 |             self.weights = self._initialize_weights()
 73 | 
 74 |             # model
 75 |             self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N * F * K
 76 |             feat_value = tf.reshape(self.feat_value,shape=[-1,self.field_size,1])
 77 |             self.embeddings = tf.multiply(self.embeddings,feat_value)
 78 | 
 79 | 
 80 |             # first order term
 81 |             self.y_first_order = tf.nn.embedding_lookup(self.weights['feature_bias'],self.feat_index)
 82 |             self.y_first_order = tf.reduce_sum(tf.multiply(self.y_first_order,feat_value),2)
 83 |             self.y_first_order = tf.nn.dropout(self.y_first_order,self.dropout_keep_fm[0])
 84 | 
 85 |             # second order term
 86 |             # sum-square-part
 87 |             self.summed_features_emb = tf.reduce_sum(self.embeddings,1) # None * k
 88 |             self.summed_features_emb_square = tf.square(self.summed_features_emb) # None * K
 89 | 
 90 |             # squre-sum-part
 91 |             self.squared_features_emb = tf.square(self.embeddings)
 92 |             self.squared_sum_features_emb = tf.reduce_sum(self.squared_features_emb, 1)  # None * K
 93 | 
 94 |             #second order
 95 |             self.y_second_order = 0.5 * tf.subtract(self.summed_features_emb_square,self.squared_sum_features_emb)
 96 |             self.y_second_order = tf.nn.dropout(self.y_second_order,self.dropout_keep_fm[1])
 97 | 
 98 | 
 99 |             # Deep component
100 |             self.y_deep = tf.reshape(self.embeddings,shape=[-1,self.field_size * self.embedding_size])
101 |             self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[0])
102 | 
103 |             for i in range(0,len(self.deep_layers)):
104 |                 self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights["layer_%d" %i]), self.weights["bias_%d"%i])
105 |                 self.y_deep = self.deep_layers_activation(self.y_deep)
106 |                 self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1])
107 | 
108 | 
109 |             #----DeepFM---------
110 |             if self.use_fm and self.use_deep:
111 |                 concat_input = tf.concat([self.y_first_order, self.y_second_order, self.y_deep], axis=1)
112 |             elif self.use_fm:
113 |                 concat_input = tf.concat([self.y_first_order, self.y_second_order], axis=1)
114 |             elif self.use_deep:
115 |                 concat_input = self.y_deep
116 | 
117 |             self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias'])
118 | 
119 |             # loss
120 |             if self.loss_type == "logloss":
121 |                 self.out = tf.nn.sigmoid(self.out)
122 |                 self.loss = tf.losses.log_loss(self.label, self.out)
123 |             elif self.loss_type == "mse":
124 |                 self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out))
125 |             # l2 regularization on weights
126 |             if self.l2_reg > 0:
127 |                 self.loss += tf.contrib.layers.l2_regularizer(
128 |                     self.l2_reg)(self.weights["concat_projection"])
129 |                 if self.use_deep:
130 |                     for i in range(len(self.deep_layers)):
131 |                         self.loss += tf.contrib.layers.l2_regularizer(
132 |                             self.l2_reg)(self.weights["layer_%d" % i])
133 | 
134 | 
135 |             if self.optimizer_type == "adam":
136 |                 self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,
137 |                                                         epsilon=1e-8).minimize(self.loss)
138 |             elif self.optimizer_type == "adagrad":
139 |                 self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate,
140 |                                                            initial_accumulator_value=1e-8).minimize(self.loss)
141 |             elif self.optimizer_type == "gd":
142 |                 self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
143 |             elif self.optimizer_type == "momentum":
144 |                 self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(
145 |                     self.loss)
146 | 
147 | 
148 |             #init
149 |             self.saver = tf.train.Saver()
150 |             init = tf.global_variables_initializer()
151 |             self.sess = tf.Session()
152 |             self.sess.run(init)
153 | 
154 |             # number of params
155 |             total_parameters = 0
156 |             for variable in self.weights.values():
157 |                 shape = variable.get_shape()
158 |                 variable_parameters = 1
159 |                 for dim in shape:
160 |                     variable_parameters *= dim.value
161 |                 total_parameters += variable_parameters
162 |             if self.verbose > 0:
163 |                 print("#params: %d" % total_parameters)
164 | 
165 | 
166 | 
167 | 
168 | 
169 |     def _initialize_weights(self):
170 |         weights = dict()
171 | 
172 |         #embeddings
173 |         weights['feature_embeddings'] = tf.Variable(
174 |             tf.random_normal([self.feature_size,self.embedding_size],0.0,0.01),
175 |             name='feature_embeddings')
176 |         weights['feature_bias'] = tf.Variable(tf.random_normal([self.feature_size,1],0.0,1.0),name='feature_bias')
177 | 
178 | 
179 |         #deep layers
180 |         num_layer = len(self.deep_layers)
181 |         input_size = self.field_size * self.embedding_size
182 |         glorot = np.sqrt(2.0/(input_size + self.deep_layers[0]))
183 | 
184 |         weights['layer_0'] = tf.Variable(
185 |             np.random.normal(loc=0,scale=glorot,size=(input_size,self.deep_layers[0])),dtype=np.float32
186 |         )
187 |         weights['bias_0'] = tf.Variable(
188 |             np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32
189 |         )
190 | 
191 | 
192 |         for i in range(1,num_layer):
193 |             glorot = np.sqrt(2.0 / (self.deep_layers[i - 1] + self.deep_layers[i]))
194 |             weights["layer_%d" % i] = tf.Variable(
195 |                 np.random.normal(loc=0, scale=glorot, size=(self.deep_layers[i - 1], self.deep_layers[i])),
196 |                 dtype=np.float32)  # layers[i-1] * layers[i]
197 |             weights["bias_%d" % i] = tf.Variable(
198 |                 np.random.normal(loc=0, scale=glorot, size=(1, self.deep_layers[i])),
199 |                 dtype=np.float32)  # 1 * layer[i]
200 | 
201 | 
202 |         # final concat projection layer
203 | 
204 |         if self.use_fm and self.use_deep:
205 |             input_size = self.field_size + self.embedding_size + self.deep_layers[-1]
206 |         elif self.use_fm:
207 |             input_size = self.field_size + self.embedding_size
208 |         elif self.use_deep:
209 |             input_size = self.deep_layers[-1]
210 | 
211 |         glorot = np.sqrt(2.0/(input_size + 1))
212 |         weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32)
213 |         weights['concat_bias'] = tf.Variable(tf.constant(0.01),dtype=np.float32)
214 | 
215 | 
216 |         return weights
217 | 
218 | 
219 |     def get_batch(self,Xi,Xv,y,batch_size,index):
220 |         start = index * batch_size
221 |         end = (index + 1) * batch_size
222 |         end = end if end < len(y) else len(y)
223 |         return Xi[start:end],Xv[start:end],[[y_] for y_ in y[start:end]]
224 | 
225 |     # shuffle three lists simutaneously
226 |     def shuffle_in_unison_scary(self, a, b, c):
227 |         rng_state = np.random.get_state()
228 |         np.random.shuffle(a)
229 |         np.random.set_state(rng_state)
230 |         np.random.shuffle(b)
231 |         np.random.set_state(rng_state)
232 |         np.random.shuffle(c)
233 | 
234 | 
235 |     def evaluate(self, Xi, Xv, y):
236 |         """
237 |         :param Xi: list of list of feature indices of each sample in the dataset
238 |         :param Xv: list of list of feature values of each sample in the dataset
239 |         :param y: label of each sample in the dataset
240 |         :return: metric of the evaluation
241 |         """
242 |         y_pred = self.predict(Xi, Xv)
243 |         return self.eval_metric(y, y_pred)
244 | 
245 |     def predict(self, Xi, Xv):
246 |         """
247 |         :param Xi: list of list of feature indices of each sample in the dataset
248 |         :param Xv: list of list of feature values of each sample in the dataset
249 |         :return: predicted probability of each sample
250 |         """
251 |         # dummy y
252 |         dummy_y = [1] * len(Xi)
253 |         batch_index = 0
254 |         Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)
255 |         y_pred = None
256 |         while len(Xi_batch) > 0:
257 |             num_batch = len(y_batch)
258 |             feed_dict = {self.feat_index: Xi_batch,
259 |                          self.feat_value: Xv_batch,
260 |                          self.label: y_batch,
261 |                          self.dropout_keep_fm: [1.0] * len(self.dropout_fm),
262 |                          self.dropout_keep_deep: [1.0] * len(self.dropout_dep),
263 |                          self.train_phase: False}
264 |             batch_out = self.sess.run(self.out, feed_dict=feed_dict)
265 | 
266 |             if batch_index == 0:
267 |                 y_pred = np.reshape(batch_out, (num_batch,))
268 |             else:
269 |                 y_pred = np.concatenate((y_pred, np.reshape(batch_out, (num_batch,))))
270 | 
271 |             batch_index += 1
272 |             Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)
273 | 
274 |         return y_pred
275 | 
276 | 
277 |     def fit_on_batch(self,Xi,Xv,y):
278 |         feed_dict = {self.feat_index:Xi,
279 |                      self.feat_value:Xv,
280 |                      self.label:y,
281 |                      self.dropout_keep_fm:self.dropout_fm,
282 |                      self.dropout_keep_deep:self.dropout_dep,
283 |                      self.train_phase:True}
284 | 
285 |         loss,opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict)
286 | 
287 |         return loss
288 | 
289 |     def fit(self, Xi_train, Xv_train, y_train,
290 |             Xi_valid=None, Xv_valid=None, y_valid=None,
291 |             early_stopping=False, refit=False):
292 |         """
293 |         :param Xi_train: [[ind1_1, ind1_2, ...], [ind2_1, ind2_2, ...], ..., [indi_1, indi_2, ..., indi_j, ...], ...]
294 |                          indi_j is the feature index of feature field j of sample i in the training set
295 |         :param Xv_train: [[val1_1, val1_2, ...], [val2_1, val2_2, ...], ..., [vali_1, vali_2, ..., vali_j, ...], ...]
296 |                          vali_j is the feature value of feature field j of sample i in the training set
297 |                          vali_j can be either binary (1/0, for binary/categorical features) or float (e.g., 10.24, for numerical features)
298 |         :param y_train: label of each sample in the training set
299 |         :param Xi_valid: list of list of feature indices of each sample in the validation set
300 |         :param Xv_valid: list of list of feature values of each sample in the validation set
301 |         :param y_valid: label of each sample in the validation set
302 |         :param early_stopping: perform early stopping or not
303 |         :param refit: refit the model on the train+valid dataset or not
304 |         :return: None
305 |         """
306 |         has_valid = Xv_valid is not None
307 |         for epoch in range(self.epoch):
308 |             t1 = time()
309 |             self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train)
310 |             total_batch = int(len(y_train) / self.batch_size)
311 |             for i in range(total_batch):
312 |                 Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train, self.batch_size, i)
313 |                 self.fit_on_batch(Xi_batch, Xv_batch, y_batch)
314 | 
315 |             # evaluate training and validation datasets
316 |             train_result = self.evaluate(Xi_train, Xv_train, y_train)
317 |             self.train_result.append(train_result)
318 |             if has_valid:
319 |                 valid_result = self.evaluate(Xi_valid, Xv_valid, y_valid)
320 |                 self.valid_result.append(valid_result)
321 |             if self.verbose > 0 and epoch % self.verbose == 0:
322 |                 if has_valid:
323 |                     print("[%d] train-result=%.4f, valid-result=%.4f [%.1f s]"
324 |                         % (epoch + 1, train_result, valid_result, time() - t1))
325 |                 else:
326 |                     print("[%d] train-result=%.4f [%.1f s]"
327 |                         % (epoch + 1, train_result, time() - t1))
328 |             if has_valid and early_stopping and self.training_termination(self.valid_result):
329 |                 break
330 | 
331 |         # fit a few more epoch on train+valid until result reaches the best_train_score
332 |         if has_valid and refit:
333 |             if self.greater_is_better:
334 |                 best_valid_score = max(self.valid_result)
335 |             else:
336 |                 best_valid_score = min(self.valid_result)
337 |             best_epoch = self.valid_result.index(best_valid_score)
338 |             best_train_score = self.train_result[best_epoch]
339 |             Xi_train = Xi_train + Xi_valid
340 |             Xv_train = Xv_train + Xv_valid
341 |             y_train = y_train + y_valid
342 |             for epoch in range(100):
343 |                 self.shuffle_in_unison_scary(Xi_train, Xv_train, y_train)
344 |                 total_batch = int(len(y_train) / self.batch_size)
345 |                 for i in range(total_batch):
346 |                     Xi_batch, Xv_batch, y_batch = self.get_batch(Xi_train, Xv_train, y_train,
347 |                                                                 self.batch_size, i)
348 |                     self.fit_on_batch(Xi_batch, Xv_batch, y_batch)
349 |                 # check
350 |                 train_result = self.evaluate(Xi_train, Xv_train, y_train)
351 |                 if abs(train_result - best_train_score) < 0.001 or \
352 |                     (self.greater_is_better and train_result > best_train_score) or \
353 |                     ((not self.greater_is_better) and train_result < best_train_score):
354 |                     break
355 | 
356 | 
357 |     def training_termination(self, valid_result):
358 |         if len(valid_result) > 5:
359 |             if self.greater_is_better:
360 |                 if valid_result[-1] < valid_result[-2] and \
361 |                     valid_result[-2] < valid_result[-3] and \
362 |                     valid_result[-3] < valid_result[-4] and \
363 |                     valid_result[-4] < valid_result[-5]:
364 |                     return True
365 |             else:
366 |                 if valid_result[-1] > valid_result[-2] and \
367 |                     valid_result[-2] > valid_result[-3] and \
368 |                     valid_result[-3] > valid_result[-4] and \
369 |                     valid_result[-4] > valid_result[-5]:
370 |                     return True
371 |         return False
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/__pycache__/DataReader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/__pycache__/DataReader.cpython-37.pyc


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/__pycache__/DeepFM.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/__pycache__/DeepFM.cpython-37.pyc


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/__pycache__/config.cpython-37.pyc


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/config.py:
--------------------------------------------------------------------------------
 1 | TRAIN_FILE = "/Users/wzk/Documents/tensorflow_practice/recommendation/Basic-DeepFM-model/data/train.csv"
 2 | TEST_FILE = "/Users/wzk/Documents/tensorflow_practice/recommendation/Basic-DeepFM-model/data/test.csv"
 3 | 
 4 | SUB_DIR = "output"
 5 | 
 6 | 
 7 | NUM_SPLITS = 3
 8 | RANDOM_SEED = 2017
 9 | 
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 |     # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 |     # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 |     # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 |     # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 |     # 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 | 
19 | NUMERIC_COLS = [
20 |     # # binary
21 |     # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 |     # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 |     # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 |     # "ps_ind_17_bin", "ps_ind_18_bin",
25 |     # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 |     # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 |     # numeric
28 |     "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 |     "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 | 
31 |     # feature engineering
32 |     "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 | 
35 | IGNORE_COLS = [
36 |     "id", "target",
37 |     "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 |     "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 |     "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 |     "ps_calc_13", "ps_calc_14",
41 |     "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 |     "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/fig/DNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/fig/DNN.png


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/fig/DeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/fig/DeepFM.png


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/fig/FM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/fig/FM.png


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import tensorflow as tf
  5 | from sklearn.metrics import make_scorer
  6 | from sklearn.model_selection import StratifiedKFold
  7 | from DataReader import FeatureDictionary, DataParser
  8 | from matplotlib import pyplot as plt
  9 | 
 10 | import config
 11 | from metrics import gini_norm
 12 | from DeepFM import DeepFM
 13 | 
 14 | def load_data():
 15 |     dfTrain = pd.read_csv(config.TRAIN_FILE)
 16 |     dfTest = pd.read_csv(config.TEST_FILE)
 17 | 
 18 |     def preprocess(df):
 19 |         cols = [c for c in df.columns if c not in ['id','target']]
 20 |         #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1)
 21 |         df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
 22 |         df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']
 23 |         return df
 24 | 
 25 |     dfTrain = preprocess(dfTrain)
 26 |     dfTest = preprocess(dfTest)
 27 | 
 28 |     cols = [c for c in dfTrain.columns if c not in ['id','target']]
 29 |     cols = [c for c in cols if (not c in config.IGNORE_COLS)]
 30 | 
 31 |     X_train = dfTrain[cols].values
 32 |     y_train = dfTrain['target'].values
 33 | 
 34 |     X_test = dfTest[cols].values
 35 |     ids_test = dfTest['id'].values
 36 | 
 37 |     cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
 38 | 
 39 |     return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices
 40 | 
 41 | def run_base_model_dfm(dfTrain,dfTest,folds,dfm_params):
 42 |     fd = FeatureDictionary(dfTrain=dfTrain,
 43 |                            dfTest=dfTest,
 44 |                            numeric_cols=config.NUMERIC_COLS,
 45 |                            ignore_cols = config.IGNORE_COLS)
 46 |     data_parser = DataParser(feat_dict= fd)
 47 |     # Xi_train ：列的序号
 48 |     # Xv_train ：列的对应的值
 49 |     Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
 50 |     Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)
 51 | 
 52 |     print(dfTrain.dtypes)
 53 | 
 54 |     dfm_params['feature_size'] = fd.feat_dim
 55 |     dfm_params['field_size'] = len(Xi_train[0])
 56 | 
 57 |     y_train_meta = np.zeros((dfTrain.shape[0],1),dtype=float)
 58 |     y_test_meta = np.zeros((dfTest.shape[0],1),dtype=float)
 59 | 
 60 |     _get = lambda x,l:[x[i] for i in l]
 61 | 
 62 |     gini_results_cv = np.zeros(len(folds),dtype=float)
 63 |     gini_results_epoch_train = np.zeros((len(folds),dfm_params['epoch']),dtype=float)
 64 |     gini_results_epoch_valid = np.zeros((len(folds),dfm_params['epoch']),dtype=float)
 65 | 
 66 |     for i, (train_idx, valid_idx) in enumerate(folds):
 67 |         Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
 68 |         Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
 69 | 
 70 |         dfm = DeepFM(**dfm_params)
 71 |         dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
 72 | 
 73 |         y_train_meta[valid_idx,0] = dfm.predict(Xi_valid_, Xv_valid_)
 74 |         y_test_meta[:,0] += dfm.predict(Xi_test, Xv_test)
 75 | 
 76 |         gini_results_cv[i] = gini_norm(y_valid_, y_train_meta[valid_idx])
 77 |         gini_results_epoch_train[i] = dfm.train_result
 78 |         gini_results_epoch_valid[i] = dfm.valid_result
 79 | 
 80 |     y_test_meta /= float(len(folds))
 81 | 
 82 |     # save result
 83 |     if dfm_params["use_fm"] and dfm_params["use_deep"]:
 84 |         clf_str = "DeepFM"
 85 |     elif dfm_params["use_fm"]:
 86 |         clf_str = "FM"
 87 |     elif dfm_params["use_deep"]:
 88 |         clf_str = "DNN"
 89 |     print("%s: %.5f (%.5f)"%(clf_str, gini_results_cv.mean(), gini_results_cv.std()))
 90 |     filename = "%s_Mean%.5f_Std%.5f.csv"%(clf_str, gini_results_cv.mean(), gini_results_cv.std())
 91 |     _make_submission(ids_test, y_test_meta, filename)
 92 | 
 93 |     _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str)
 94 | 
 95 |     return y_train_meta, y_test_meta
 96 | 
 97 | def _make_submission(ids, y_pred, filename="submission.csv"):
 98 |     pd.DataFrame({"id": ids, "target": y_pred.flatten()}).to_csv(
 99 |         os.path.join(config.SUB_DIR, filename), index=False, float_format="%.5f")
100 | 
101 | 
102 | def _plot_fig(train_results, valid_results, model_name):
103 |     colors = ["red", "blue", "green"]
104 |     xs = np.arange(1, train_results.shape[1]+1)
105 |     plt.figure()
106 |     legends = []
107 |     for i in range(train_results.shape[0]):
108 |         plt.plot(xs, train_results[i], color=colors[i], linestyle="solid", marker="o")
109 |         plt.plot(xs, valid_results[i], color=colors[i], linestyle="dashed", marker="o")
110 |         legends.append("train-%d"%(i+1))
111 |         legends.append("valid-%d"%(i+1))
112 |     plt.xlabel("Epoch")
113 |     plt.ylabel("Normalized Gini")
114 |     plt.title("%s"%model_name)
115 |     plt.legend(legends)
116 |     plt.savefig("fig/%s.png"%model_name)
117 |     plt.close()
118 | 
119 | 
120 | 
121 | 
122 | 
123 | dfm_params = {
124 |     "use_fm":True,
125 |     "use_deep":True,
126 |     "embedding_size":8,
127 |     "dropout_fm":[1.0,1.0],
128 |     "deep_layers":[32,32],
129 |     "dropout_deep":[0.5,0.5,0.5],
130 |     "deep_layer_activation":tf.nn.relu,
131 |     "epoch":30,
132 |     "batch_size":1024,
133 |     "learning_rate":0.001,
134 |     "optimizer":"adam",
135 |     "batch_norm":1,
136 |     "batch_norm_decay":0.995,
137 |     "l2_reg":0.01,
138 |     "verbose":True,
139 |     "eval_metric":gini_norm,
140 |     "random_seed":config.RANDOM_SEED
141 | }
142 | 
143 | # load data
144 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data()
145 | 
146 | # folds
147 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,
148 |                              random_state=config.RANDOM_SEED).split(X_train, y_train))
149 | 
150 | #y_train_dfm,y_test_dfm = run_base_model_dfm(dfTrain,dfTest,folds,dfm_params)
151 | y_train_dfm, y_test_dfm = run_base_model_dfm(dfTrain, dfTest, folds, dfm_params)
152 | 
153 | 
154 | # ------------------ FM Model ------------------
155 | fm_params = dfm_params.copy()
156 | fm_params["use_deep"] = False
157 | y_train_fm, y_test_fm = run_base_model_dfm(dfTrain, dfTest, folds, fm_params)
158 | 
159 | 
160 | # ------------------ DNN Model ------------------
161 | dnn_params = dfm_params.copy()
162 | dnn_params["use_fm"] = False
163 | y_train_dnn, y_test_dnn = run_base_model_dfm(dfTrain, dfTest, folds, dnn_params)


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/metrics.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def gini(actual, pred):
 5 |     assert (len(actual) == len(pred))
 6 |     all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float)
 7 |     all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))]
 8 |     totalLosses = all[:, 0].sum()
 9 |     giniSum = all[:, 0].cumsum().sum() / totalLosses
10 | 
11 |     giniSum -= (len(actual) + 1) / 2.
12 |     return giniSum / len(actual)
13 | 
14 | def gini_norm(actual, pred):
15 |     return gini(actual, pred) / gini(actual, actual)
16 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/DNN_Mean-0.31183_Std0.29369.csv:
--------------------------------------------------------------------------------
  1 | id,target
  2 | 0,0.54321
  3 | 1,0.54492
  4 | 2,0.54194
  5 | 3,0.54175
  6 | 4,0.54266
  7 | 5,0.54154
  8 | 6,0.54395
  9 | 8,0.54214
 10 | 10,0.54383
 11 | 11,0.54348
 12 | 12,0.54175
 13 | 14,0.54253
 14 | 15,0.54449
 15 | 18,0.54221
 16 | 21,0.54521
 17 | 23,0.54488
 18 | 24,0.54286
 19 | 25,0.54416
 20 | 27,0.54511
 21 | 29,0.54365
 22 | 30,0.54272
 23 | 31,0.54500
 24 | 32,0.54485
 25 | 33,0.54332
 26 | 37,0.54277
 27 | 38,0.54376
 28 | 39,0.54478
 29 | 40,0.54178
 30 | 41,0.54429
 31 | 42,0.54348
 32 | 44,0.54377
 33 | 45,0.54288
 34 | 47,0.54235
 35 | 49,0.54258
 36 | 51,0.54283
 37 | 52,0.54266
 38 | 53,0.54156
 39 | 54,0.54426
 40 | 55,0.54256
 41 | 56,0.54520
 42 | 57,0.54370
 43 | 59,0.54359
 44 | 60,0.54405
 45 | 62,0.54316
 46 | 63,0.54492
 47 | 67,0.54511
 48 | 68,0.54221
 49 | 69,0.54548
 50 | 70,0.54249
 51 | 71,0.54415
 52 | 73,0.54462
 53 | 75,0.54333
 54 | 76,0.54298
 55 | 81,0.54271
 56 | 82,0.54458
 57 | 83,0.54240
 58 | 86,0.54130
 59 | 87,0.54291
 60 | 88,0.54318
 61 | 91,0.54448
 62 | 92,0.54372
 63 | 94,0.54307
 64 | 97,0.54280
 65 | 100,0.54605
 66 | 102,0.54389
 67 | 103,0.54320
 68 | 105,0.54275
 69 | 106,0.54410
 70 | 108,0.54228
 71 | 113,0.54418
 72 | 114,0.54378
 73 | 115,0.54324
 74 | 118,0.54449
 75 | 122,0.54158
 76 | 124,0.54290
 77 | 126,0.54196
 78 | 128,0.54516
 79 | 129,0.54435
 80 | 130,0.54562
 81 | 131,0.54449
 82 | 132,0.54567
 83 | 133,0.54219
 84 | 134,0.54506
 85 | 135,0.54226
 86 | 136,0.54301
 87 | 139,0.54165
 88 | 140,0.54528
 89 | 141,0.54275
 90 | 146,0.54209
 91 | 148,0.54334
 92 | 151,0.54195
 93 | 152,0.54450
 94 | 154,0.54214
 95 | 157,0.54393
 96 | 158,0.54286
 97 | 159,0.54520
 98 | 161,0.54310
 99 | 164,0.54332
100 | 165,0.54339
101 | 167,0.54313
102 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/DeepFM_Mean-0.11470_Std0.37335.csv:
--------------------------------------------------------------------------------
  1 | id,target
  2 | 0,0.46803
  3 | 1,0.55377
  4 | 2,0.53959
  5 | 3,0.48240
  6 | 4,0.42640
  7 | 5,0.53783
  8 | 6,0.43397
  9 | 8,0.54862
 10 | 10,0.39992
 11 | 11,0.48496
 12 | 12,0.56211
 13 | 14,0.48791
 14 | 15,0.40421
 15 | 18,0.38874
 16 | 21,0.48275
 17 | 23,0.36596
 18 | 24,0.54895
 19 | 25,0.55286
 20 | 27,0.46398
 21 | 29,0.25796
 22 | 30,0.52880
 23 | 31,0.53623
 24 | 32,0.38785
 25 | 33,0.49019
 26 | 37,0.53059
 27 | 38,0.32213
 28 | 39,0.48938
 29 | 40,0.44188
 30 | 41,0.39470
 31 | 42,0.43526
 32 | 44,0.38037
 33 | 45,0.44053
 34 | 47,0.47693
 35 | 49,0.43951
 36 | 51,0.52558
 37 | 52,0.56112
 38 | 53,0.63015
 39 | 54,0.28074
 40 | 55,0.50253
 41 | 56,0.36943
 42 | 57,0.41124
 43 | 59,0.47449
 44 | 60,0.41512
 45 | 62,0.45376
 46 | 63,0.56464
 47 | 67,0.48383
 48 | 68,0.44448
 49 | 69,0.43281
 50 | 70,0.41257
 51 | 71,0.36101
 52 | 73,0.24134
 53 | 75,0.48104
 54 | 76,0.41155
 55 | 81,0.52558
 56 | 82,0.40699
 57 | 83,0.35711
 58 | 86,0.36253
 59 | 87,0.42458
 60 | 88,0.57573
 61 | 91,0.50545
 62 | 92,0.57203
 63 | 94,0.53472
 64 | 97,0.47725
 65 | 100,0.42449
 66 | 102,0.49121
 67 | 103,0.48863
 68 | 105,0.59440
 69 | 106,0.40794
 70 | 108,0.49273
 71 | 113,0.33953
 72 | 114,0.50476
 73 | 115,0.53934
 74 | 118,0.48991
 75 | 122,0.50319
 76 | 124,0.41910
 77 | 126,0.41064
 78 | 128,0.36258
 79 | 129,0.31102
 80 | 130,0.45700
 81 | 131,0.55222
 82 | 132,0.47241
 83 | 133,0.47101
 84 | 134,0.45344
 85 | 135,0.55308
 86 | 136,0.50106
 87 | 139,0.42091
 88 | 140,0.44550
 89 | 141,0.42207
 90 | 146,0.46423
 91 | 148,0.52868
 92 | 151,0.44960
 93 | 152,0.26475
 94 | 154,0.56421
 95 | 157,0.58842
 96 | 158,0.42789
 97 | 159,0.43978
 98 | 161,0.62290
 99 | 164,0.54502
100 | 165,0.38185
101 | 167,0.53922
102 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/DeepFM_Mean0.01434_Std0.10176.csv:
--------------------------------------------------------------------------------
  1 | id,target
  2 | 0,0.32278
  3 | 1,0.41663
  4 | 2,0.44417
  5 | 3,0.47512
  6 | 4,0.52361
  7 | 5,0.33677
  8 | 6,0.44370
  9 | 8,0.30100
 10 | 10,0.48097
 11 | 11,0.52027
 12 | 12,0.26543
 13 | 14,0.40398
 14 | 15,0.46376
 15 | 18,0.38902
 16 | 21,0.35526
 17 | 23,0.41269
 18 | 24,0.37623
 19 | 25,0.30560
 20 | 27,0.41068
 21 | 29,0.49968
 22 | 30,0.48046
 23 | 31,0.53911
 24 | 32,0.37760
 25 | 33,0.42462
 26 | 37,0.43910
 27 | 38,0.43226
 28 | 39,0.40951
 29 | 40,0.42573
 30 | 41,0.38593
 31 | 42,0.45659
 32 | 44,0.42400
 33 | 45,0.46563
 34 | 47,0.41856
 35 | 49,0.43669
 36 | 51,0.39470
 37 | 52,0.35710
 38 | 53,0.35468
 39 | 54,0.58721
 40 | 55,0.34572
 41 | 56,0.49496
 42 | 57,0.52123
 43 | 59,0.43579
 44 | 60,0.37308
 45 | 62,0.36949
 46 | 63,0.36458
 47 | 67,0.40002
 48 | 68,0.40630
 49 | 69,0.51984
 50 | 70,0.43685
 51 | 71,0.34467
 52 | 73,0.49609
 53 | 75,0.42494
 54 | 76,0.36640
 55 | 81,0.41558
 56 | 82,0.49456
 57 | 83,0.41528
 58 | 86,0.38741
 59 | 87,0.42377
 60 | 88,0.49288
 61 | 91,0.43845
 62 | 92,0.50188
 63 | 94,0.38807
 64 | 97,0.43247
 65 | 100,0.37401
 66 | 102,0.36822
 67 | 103,0.39734
 68 | 105,0.38886
 69 | 106,0.40349
 70 | 108,0.29820
 71 | 113,0.38590
 72 | 114,0.53072
 73 | 115,0.37515
 74 | 118,0.34776
 75 | 122,0.35378
 76 | 124,0.35501
 77 | 126,0.36031
 78 | 128,0.36464
 79 | 129,0.48796
 80 | 130,0.40816
 81 | 131,0.44641
 82 | 132,0.40488
 83 | 133,0.39336
 84 | 134,0.51089
 85 | 135,0.49477
 86 | 136,0.35754
 87 | 139,0.46074
 88 | 140,0.38236
 89 | 141,0.37077
 90 | 146,0.29805
 91 | 148,0.43685
 92 | 151,0.45538
 93 | 152,0.35027
 94 | 154,0.35761
 95 | 157,0.36037
 96 | 158,0.39519
 97 | 159,0.33552
 98 | 161,0.41159
 99 | 164,0.42803
100 | 165,0.44548
101 | 167,0.39931
102 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/DeepFM_Mean0.05735_Std0.20027.csv:
--------------------------------------------------------------------------------
  1 | id,target
  2 | 0,0.72139
  3 | 1,0.51760
  4 | 2,0.59032
  5 | 3,0.63660
  6 | 4,0.50603
  7 | 5,0.57058
  8 | 6,0.72299
  9 | 8,0.62921
 10 | 10,0.64393
 11 | 11,0.62246
 12 | 12,0.64539
 13 | 14,0.62271
 14 | 15,0.63971
 15 | 18,0.74351
 16 | 21,0.56603
 17 | 23,0.65027
 18 | 24,0.62978
 19 | 25,0.56364
 20 | 27,0.55366
 21 | 29,0.64651
 22 | 30,0.63995
 23 | 31,0.51301
 24 | 32,0.65243
 25 | 33,0.62960
 26 | 37,0.61379
 27 | 38,0.62845
 28 | 39,0.56194
 29 | 40,0.55361
 30 | 41,0.65380
 31 | 42,0.56262
 32 | 44,0.52620
 33 | 45,0.56058
 34 | 47,0.67995
 35 | 49,0.58040
 36 | 51,0.57256
 37 | 52,0.57186
 38 | 53,0.74692
 39 | 54,0.63829
 40 | 55,0.61376
 41 | 56,0.57716
 42 | 57,0.66004
 43 | 59,0.60760
 44 | 60,0.68578
 45 | 62,0.68983
 46 | 63,0.62641
 47 | 67,0.59588
 48 | 68,0.59095
 49 | 69,0.56658
 50 | 70,0.60620
 51 | 71,0.53494
 52 | 73,0.73047
 53 | 75,0.56699
 54 | 76,0.68507
 55 | 81,0.59263
 56 | 82,0.45351
 57 | 83,0.65228
 58 | 86,0.67729
 59 | 87,0.63932
 60 | 88,0.62208
 61 | 91,0.50822
 62 | 92,0.60571
 63 | 94,0.61354
 64 | 97,0.62548
 65 | 100,0.69225
 66 | 102,0.50505
 67 | 103,0.61700
 68 | 105,0.65031
 69 | 106,0.66246
 70 | 108,0.67469
 71 | 113,0.66512
 72 | 114,0.53249
 73 | 115,0.55344
 74 | 118,0.68072
 75 | 122,0.53538
 76 | 124,0.65328
 77 | 126,0.64717
 78 | 128,0.73029
 79 | 129,0.63653
 80 | 130,0.63030
 81 | 131,0.50802
 82 | 132,0.58770
 83 | 133,0.62624
 84 | 134,0.44326
 85 | 135,0.63895
 86 | 136,0.56856
 87 | 139,0.53739
 88 | 140,0.63811
 89 | 141,0.70656
 90 | 146,0.57495
 91 | 148,0.62791
 92 | 151,0.60073
 93 | 152,0.73494
 94 | 154,0.60894
 95 | 157,0.60582
 96 | 158,0.54721
 97 | 159,0.70589
 98 | 161,0.63762
 99 | 164,0.53981
100 | 165,0.65285
101 | 167,0.52954
102 | 


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/DeepFM_Mean0.26137_Std0.00210.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/Basic-DeepFM-model/output/DeepFM_Mean0.26137_Std0.00210.csv


--------------------------------------------------------------------------------
/rank/Basic-DeepFM-model/output/FM_Mean0.23297_Std0.05576.csv:
--------------------------------------------------------------------------------
  1 | id,target
  2 | 0,0.37706
  3 | 1,0.40747
  4 | 2,0.28335
  5 | 3,0.29426
  6 | 4,0.25722
  7 | 5,0.28061
  8 | 6,0.36010
  9 | 8,0.26813
 10 | 10,0.50419
 11 | 11,0.29652
 12 | 12,0.22183
 13 | 14,0.28447
 14 | 15,0.44019
 15 | 18,0.43666
 16 | 21,0.32927
 17 | 23,0.28054
 18 | 24,0.25594
 19 | 25,0.27155
 20 | 27,0.26363
 21 | 29,0.34340
 22 | 30,0.37857
 23 | 31,0.30758
 24 | 32,0.41682
 25 | 33,0.26732
 26 | 37,0.39802
 27 | 38,0.32117
 28 | 39,0.39406
 29 | 40,0.24067
 30 | 41,0.39323
 31 | 42,0.40359
 32 | 44,0.28283
 33 | 45,0.36268
 34 | 47,0.31174
 35 | 49,0.35913
 36 | 51,0.27528
 37 | 52,0.28072
 38 | 53,0.35339
 39 | 54,0.45116
 40 | 55,0.33479
 41 | 56,0.47107
 42 | 57,0.34473
 43 | 59,0.34868
 44 | 60,0.45001
 45 | 62,0.35572
 46 | 63,0.39236
 47 | 67,0.36394
 48 | 68,0.27234
 49 | 69,0.51613
 50 | 70,0.33188
 51 | 71,0.24334
 52 | 73,0.36806
 53 | 75,0.41980
 54 | 76,0.37788
 55 | 81,0.31707
 56 | 82,0.33174
 57 | 83,0.35205
 58 | 86,0.34927
 59 | 87,0.45646
 60 | 88,0.27697
 61 | 91,0.34399
 62 | 92,0.42113
 63 | 94,0.35314
 64 | 97,0.29256
 65 | 100,0.44001
 66 | 102,0.30431
 67 | 103,0.25371
 68 | 105,0.41161
 69 | 106,0.39540
 70 | 108,0.36266
 71 | 113,0.36232
 72 | 114,0.37745
 73 | 115,0.28234
 74 | 118,0.37840
 75 | 122,0.22426
 76 | 124,0.30503
 77 | 126,0.35986
 78 | 128,0.30551
 79 | 129,0.32311
 80 | 130,0.35530
 81 | 131,0.33789
 82 | 132,0.39140
 83 | 133,0.30195
 84 | 134,0.31456
 85 | 135,0.41466
 86 | 136,0.24149
 87 | 139,0.23444
 88 | 140,0.36823
 89 | 141,0.36059
 90 | 146,0.25876
 91 | 148,0.48031
 92 | 151,0.31372
 93 | 152,0.50250
 94 | 154,0.26625
 95 | 157,0.28990
 96 | 158,0.24773
 97 | 159,0.48179
 98 | 161,0.46381
 99 | 164,0.34177
100 | 165,0.48971
101 | 167,0.30779
102 | 


--------------------------------------------------------------------------------
/rank/GBDT+LR-Demo/GBDT_LR.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | from sklearn.metrics import mean_squared_error
 7 | from sklearn.linear_model import LogisticRegression
 8 | 
 9 | print('Load data...')
10 | df_train = pd.read_csv('data/train.csv')
11 | df_test = pd.read_csv('data/test.csv')
12 | 
13 | NUMERIC_COLS = [
14 |     "ps_reg_01", "ps_reg_02", "ps_reg_03",
15 |     "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
16 | ]
17 | 
18 | print(df_test.head(10))
19 | 
20 | y_train = df_train['target']  # training label
21 | y_test = df_test['target']  # testing label
22 | X_train = df_train[NUMERIC_COLS]  # training dataset
23 | X_test = df_test[NUMERIC_COLS]  # testing dataset
24 | 
25 | # create dataset for lightgbm
26 | lgb_train = lgb.Dataset(X_train, y_train)
27 | lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
28 | 
29 | params = {
30 |     'task': 'train',
31 |     'boosting_type': 'gbdt',
32 |     'objective': 'binary',
33 |     'metric': {'binary_logloss'},
34 |     'num_leaves': 64,
35 |     'num_trees': 100,
36 |     'learning_rate': 0.01,
37 |     'feature_fraction': 0.9,
38 |     'bagging_fraction': 0.8,
39 |     'bagging_freq': 5,
40 |     'verbose': 0
41 | }
42 | 
43 | # number of leaves,will be used in feature transformation
44 | num_leaf = 64
45 | 
46 | print('Start training...')
47 | # train
48 | gbm = lgb.train(params,
49 |                 lgb_train,
50 |                 num_boost_round=100,
51 |                 valid_sets=lgb_train)
52 | 
53 | print('Save model...')
54 | # save model to file
55 | gbm.save_model('model.txt')
56 | 
57 | print('Start predicting...')
58 | # predict and get data on leaves, training data
59 | y_pred = gbm.predict(X_train, pred_leaf=True)
60 | 
61 | print(np.array(y_pred).shape)
62 | print(y_pred[:10])
63 | 
64 | print('Writing transformed training data')
65 | transformed_training_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf],
66 |                                        dtype=np.int64)  # N * num_tress * num_leafs
67 | for i in range(0, len(y_pred)):
68 |     temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i])
69 |     transformed_training_matrix[i][temp] += 1
70 | 
71 | 
72 | y_pred = gbm.predict(X_test, pred_leaf=True)
73 | print('Writing transformed testing data')
74 | transformed_testing_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf], dtype=np.int64)
75 | for i in range(0, len(y_pred)):
76 |     temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i])
77 |     transformed_testing_matrix[i][temp] += 1
78 | 
79 | 
80 | lm = LogisticRegression(penalty='l2',C=0.05) # logestic model construction
81 | lm.fit(transformed_training_matrix,y_train)  # fitting the data
82 | y_pred_test = lm.predict_proba(transformed_testing_matrix)   # Give the probabilty on each label
83 | 
84 | print(y_pred_test)
85 | 
86 | NE = (-1) / len(y_pred_test) * sum(((1+y_test)/2 * np.log(y_pred_test[:,1]) +  (1-y_test)/2 * np.log(1 - y_pred_test[:,1])))
87 | print("Normalized Cross Entropy " + str(NE))


--------------------------------------------------------------------------------
/rank/fm.py:
--------------------------------------------------------------------------------
 1 | class FM(object):
 2 |     """
 3 |     Factorization Machine with FTRL optimization
 4 |     """
 5 |     def __init__(self, config):
 6 |         """
 7 |         :param config: configuration of hyperparameters
 8 |         type of dict
 9 |         """
10 |         # number of latent factors
11 |         self.k = config['k']
12 |         self.lr = config['lr']
13 |         self.batch_size = config['batch_size']
14 |         self.reg_l1 = config['reg_l1']
15 |         self.reg_l2 = config['reg_l2']
16 |         # num of features
17 |         self.p = feature_length
18 | 
19 |     def add_placeholders(self):
20 |         self.X = tf.sparse_placeholder('float32', [None, self.p])
21 |         self.y = tf.placeholder('int64', [None,])
22 |         self.keep_prob = tf.placeholder('float32')
23 | 
24 |     def inference(self):
25 |         """
26 |         forward propagation
27 |         :return: labels for each sample
28 |         """
29 |         with tf.variable_scope('linear_layer'):
30 |             b = tf.get_variable('bias', shape=[2],
31 |                                 initializer=tf.zeros_initializer())
32 |             w1 = tf.get_variable('w1', shape=[self.p, 2],
33 |                                  initializer=tf.truncated_normal_initializer(mean=0,stddev=1e-2))
34 |             # shape of [None, 2]
35 |             self.linear_terms = tf.add(tf.sparse_tensor_dense_matmul  (self.X, w1), b)
36 | 
37 |         with tf.variable_scope('interaction_layer'):
38 |             v = tf.get_variable('v', shape=[self.p, self.k],
39 |                                 initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
40 |             # shape of [None, 1]
41 |             self.interaction_terms = tf.multiply(0.5,
42 |                                                  tf.reduce_mean(
43 |                                                      tf.subtract(
44 |                                                          tf.pow(tf.sparse_tensor_dense_matmul(self.X, v), 2),
45 |                                                          tf.sparse_tensor_dense_matmul(tf.pow(self.X, 2), tf.pow(v, 2))),
46 |                                                      1, keep_dims=True))
47 |         # shape of [None, 2]
48 |         self.y_out = tf.add(self.linear_terms, self.interaction_terms)
49 |         self.y_out_prob = tf.nn.softmax(self.y_out)
50 | 
51 |     def add_loss(self):
52 |         cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.y_out)
53 |         mean_loss = tf.reduce_mean(cross_entropy)
54 |         self.loss = mean_loss
55 |         tf.summary.scalar('loss', self.loss)
56 | 
57 |     def add_accuracy(self):
58 |         # accuracy
59 |         self.correct_prediction = tf.equal(tf.cast(tf.argmax(model.y_out,1), tf.int64), model.y)
60 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
61 |         # add summary to accuracy
62 |         tf.summary.scalar('accuracy', self.accuracy)
63 | 
64 |     def train(self):
65 |         # Applies exponential decay to learning rate
66 |         self.global_step = tf.Variable(0, trainable=False)
67 |         # define optimizer
68 |         optimizer = tf.train.FtrlOptimizer(self.lr, l1_regularization_strength=self.reg_l1,
69 |                                            l2_regularization_strength=self.reg_l2)
70 |         extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
71 |         with tf.control_dependencies(extra_update_ops):
72 |             self.train_op = optimizer.minimize(self.loss, global_step=self.global_step)
73 | 
74 |     def build_graph(self):
75 |         """build graph for model"""
76 |         self.add_placeholders()
77 |         self.inference()
78 |         self.add_loss()
79 |         self.add_accuracy()
80 |         self.train()
81 | 
82 | 


--------------------------------------------------------------------------------
/rank/rank_model.md:
--------------------------------------------------------------------------------
  1 | # 因子分解排序
  2 | ## FM
  3 | FM和树模型都能够自动学习特征交叉组合，但树的模型只适合连续型或值空间较小的稀疏数据；另一方面树模型也不能学习到训练数据中很少或没有出现的特征组合，因为树模型只是对历史的一个记忆，泛化能力较弱。相反，FM模型因为通过隐向量的内积来提取特征组合，对于训练数据中很少或没有出现的特征组合也能够学习到
  4 | 
  5 | FM可以解决特征组合以及高维稀疏矩阵问题
  6 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225162835212.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
  7 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225162909827.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
  8 | ## FFM
  9 | FM模型中，每一个特征对应一个向量；FFM中认为每一个特征对于每一个域field对应一个向量。
 10 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225164600582.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 11 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225164804771.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 12 | FFM的组合特征有10项，如下图所示
 13 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225164814121.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 14 | 其中，红色是field编号，蓝色是特征编号
 15 | 
 16 | # 树模型排序
 17 | ## GBDT+LR
 18 | facebook采用了这个模型，树的数量《=500，每棵树的节点《=12，大致有三种组合方案
 19 | 1. 离散特征onehot + 连续特征gbdt
 20 | 2. 低频离散特征onehot + 连续特征/高频离散特征 gbdt
 21 | 3. 原始特征onehot + ID类gbdt + 非id类gbdt
 22 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225193555470.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 23 | 【LR特征如何处理】为了让线性模型能够学习到原始特征与拟合目标之间的非线性关系，通常需要对原始特征做一些非线性转换。常用的转换方法包括：特征聚类、连续特征离散化（包括等频离散、等间距离散，或者采用树模型通过gain来找到最优分裂点进行划分）、特征交叉（数值累加、累乘，类目组合等）等
 24 | 
 25 | 【为什么建树采用GBDT而非RF】：很多实践证明GBDT的效果要优于RF，且GBDT前面的树，特征分裂主要体现对多数样本有区分度的特征；后面的树，主要体现的是经过前N颗树，残差仍然较大的少数样本。优先选用在整体上有区分度的特征，再选用针对少数样本有区分度的特征，思路更加合理，这应该也是用GBDT的原因
 26 | 
 27 | - gbdt只是历史记忆，没有泛化能力
 28 | # 深度模型排序
 29 | 发展历程可以参考我的文章[推荐算法—ctr预估](https://blog.csdn.net/qq_34219959/article/details/103822973)
 30 | 这里只画出结构，写一些面试注意点
 31 | - LR模型采用原始人工特征交叉
 32 | - FM自动学习xi和xj的二阶交叉特征
 33 | - PNN用内积、外积做二阶交叉
 34 | - NFM、AFM采用BI-Interaction方式学习二阶交叉
 35 | - 更高阶：DCN，任意进行特征交叉，且不增加网络参数
 36 | - DIN在embeeding层后做了一个action unit操作，对用户的兴趣分布进行学习后再输入DNN
 37 | ## FNN
 38 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225194754116.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 39 | - 利用DNN优化高阶特征
 40 | - wide deep的深度部分就是这样的结构
 41 | ## Wide Deep
 42 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225222559707.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 43 | 特征工程后的离散特征：线性模型
 44 | 离散+连续：DNN
 45 | ## Deepfm
 46 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225222644220.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 47 | 离散+连续：线性模型
 48 | 离散：FM
 49 | 离散+连续：DNN
 50 | ## PNN
 51 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020022522430542.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 52 | - z是embedding层的复制，p有IPNN和OPNN两种
 53 | - embbeding大小:M,filed大小:N
 54 | - IPNN是内积，OPNN是矩阵
 55 | 
 56 | ## NFM
 57 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020022522462527.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 58 | - Bi-Interaction Layer名字挺高大上的，其实它就是计算FM中的二次项的过程
 59 | ## AFM
 60 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225224657238.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 61 | - attention相等于加权的过程，因此我们的预测公式变为![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301220346890.png)
 62 | 圆圈中有个点的符号代表的含义是element-wise product
 63 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301220619511.png)
 64 | - 后面两部分，则是AFM的创新所在，也就是我们的Attention net
 65 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301221135722.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 66 | - 没经过DNN
 67 | ## DCN
 68 | - 提出了一种新的交叉网络，在每个层上明确地应用特征交叉，有效地学习有界度的预测交叉特征，并且不需要手工特征工程或穷举搜索
 69 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225224718538.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 70 | 交叉层：
 71 | 交叉维度为L层交叉网络层数L + 1
 72 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301211911362.png)
 73 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301211816986.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 74 | ## MLR
 75 | 用分片线性的模式来拟合高维空间的非线性分类面
 76 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301221547838.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 77 | - MLR在建模时引入了L1和L2，模型具有较高的稀疏度， 模型的学习和在线预测性能更好
 78 | ## DIN
 79 | 
 80 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225224752423.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 81 | - 用户特征、用户行为特征、广告特征、上下文特征
 82 | - embedding之后，使用weighted-sum，即attention
 83 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301224310165.png)
 84 | - 评价指标是GAUC
 85 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020030122442342.png)
 86 | 根据用户的展示数或者点击数来对每个用户的AUC进行加权处理
 87 | - Dice激活函数。
 88 | PRelu、Relu 认为分割点都是0，Dice每一个yi对应了一个概率值pi，pi的计算主要分为两步：将yi进行标准化和进行sigmoid变换
 89 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200301225707515.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 90 | - 自适应正则。针对feature id出现的频率，来自适应的调整他们正则化的强度；对于出现频率高的，给与较小的正则化强度；对于出现频率低的，给予较大的正则化强度
 91 | ## DIEN
 92 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225224827644.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 93 | - DIN没有考虑用户历史之间的时序关系, DIEN则使用了GRU来建模用户历史的时间序列
 94 | - 用户历史肯定是一个时间序列，将其喂入RNN，则最后一个状态可以认为包含了所有历史信息。因此，作者用一个双层的GRU来建模用户兴趣。
 95 | - 将用户历史接触过的item embedding微量，喂进第一层GRU，输出的就是用户各时刻的兴趣。这一层被称为Interest Extraction Layer
 96 | - 将第一层的输出，喂进第二层GRU，并用attention score（基于第一层的输出向量与候选物料计算得出）来控制第二层的GRU的update gate。这一层叫做Interest Evolving Layer。
 97 | - Interest Evolving Layer的最后一个状态作为用户兴趣的向量表示，与ad, context的特征一同喂入MLP，预测点击率。
 98 | ## DSIN
 99 | [DSIN](https://zhuanlan.zhihu.com/p/97015090)
100 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225224933683.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
101 | 主要贡献在于对用户的历史点击行为划分为不同session，对每个session使用Transformer学习session embedding，最后使用BiLSTM对session序列建模
102 | - Session Division Layer是对用户的历史行为划分到不同session
103 | 将间隔超过30分钟作为session的划分。
104 | - Session Interest Interacting Layer是学习session之间的表征
105 | 相同session内的行为是高度相关的，在session内的一些随意行为会偏离整个session表达。为了刻画相同session内行为间的相关性，同时减小不相关行为的影响。
106 | DSIN使用**multi-head self-attention**对每个session建模。为了刻画不同session间的顺序，DSIN使用了**Bias Encoding**
107 | Bias Encoding：
108 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200302104939957.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
109 | multi-head self-attention：
110 | 将第k个session划分为H个head
111 | ![](https://img-blog.csdnimg.cn/20200302105442214.png)
112 | - Session Interest Interacting Layer是学习session之间的演变
113 | 使用双向LSTM建模session之间的演变
114 | - Session Interest Activating Layer是学习当前item和历史点击session的相关性
115 | 通过Attention机制刻画Item和session之间的相关性。用户的session与目标物品越相近，越应该赋予更大的权重
116 | ## MIND
117 | 召回阶段建模表达用户的多样兴趣
118 | 既然使用一个向量表达用户多样兴趣有困难，那么为什么不使用一组向量呢？具体来说，如果我们可以对用户历史行为的embedding进行聚类，聚类后的每个簇代表用户的一组兴趣
119 | 
120 | 胶囊网络(Capsule Network)
121 | Capsule输入是一组向量，对这组向量进行仿射变换之后求加权和，把加权和输入非线性激活函数得到一个向量的输出
122 | 如果我们K个capsule，就会有K个输出向量
123 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200404164212641.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
124 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200404165952426.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
125 | 
126 | ## MIMN
127 | - **UIC的提出**：（ 背景）存储用户行为序列需要空间太大，且RNN类模型速度慢。（改进）于是构建了一个单独的模块UIC来完成用户行为序列的建模计算工作。UIC server负责存储每个用户最近的行为兴趣，而且UIC server的核心在于其更新策略，即用户行为序列的更新只依赖于具体的触发事件，而不依赖于请求。
128 | - 一方面是NTM中基本的memory read和memory write操作；另一方面是为提取高阶信息而采用多通道GRU的memory induction unit。网络的右侧则为传统的embedding+MLP的经典结构
129 | ## DMR
130 | DMR（Deep Match to Rank）
131 | 通过User-to-Item子网络和Item-to-Item子网络来表征U2I相关性，进一步提升模型的表达能力
132 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200325092025809.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/FFM_model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import pandas as pd
  3 | import numpy as np
  4 | import os
  5 | 
  6 | 
  7 | input_x_size = 20
  8 | field_size = 2
  9 | 
 10 | vector_dimension = 3
 11 | 
 12 | total_plan_train_steps = 1000
 13 | # 使用SGD，每一个样本进行依次梯度下降，更新参数
 14 | batch_size = 1
 15 | 
 16 | all_data_size = 1000
 17 | 
 18 | lr = 0.01
 19 | 
 20 | MODEL_SAVE_PATH = "TFModel"
 21 | MODEL_NAME = "FFM"
 22 | 
 23 | def createTwoDimensionWeight(input_x_size,field_size,vector_dimension):
 24 |     weights = tf.truncated_normal([input_x_size,field_size,vector_dimension])
 25 | 
 26 |     tf_weights = tf.Variable(weights)
 27 | 
 28 |     return tf_weights
 29 | 
 30 | def createOneDimensionWeight(input_x_size):
 31 |     weights = tf.truncated_normal([input_x_size])
 32 |     tf_weights = tf.Variable(weights)
 33 |     return tf_weights
 34 | 
 35 | def createZeroDimensionWeight():
 36 |     weights = tf.truncated_normal([1])
 37 |     tf_weights = tf.Variable(weights)
 38 |     return tf_weights
 39 | 
 40 | def inference(input_x,input_x_field,zeroWeights,oneDimWeights,thirdWeight):
 41 |     """计算回归模型输出的值"""
 42 | 
 43 |     secondValue = tf.reduce_sum(tf.multiply(oneDimWeights,input_x,name='secondValue'))
 44 | 
 45 |     firstTwoValue = tf.add(zeroWeights, secondValue, name="firstTwoValue")
 46 | 
 47 |     thirdValue = tf.Variable(0.0,dtype=tf.float32)
 48 |     input_shape = input_x_size
 49 | 
 50 |     for i in range(input_shape):
 51 |         featureIndex1 = i
 52 |         fieldIndex1 = int(input_x_field[i])
 53 |         for j in range(i+1,input_shape):
 54 |             featureIndex2 = j
 55 |             fieldIndex2 = int(input_x_field[j])
 56 |             vectorLeft = tf.convert_to_tensor([[featureIndex1,fieldIndex2,i] for i in range(vector_dimension)])
 57 |             weightLeft = tf.gather_nd(thirdWeight,vectorLeft)
 58 |             weightLeftAfterCut = tf.squeeze(weightLeft)
 59 | 
 60 |             vectorRight = tf.convert_to_tensor([[featureIndex2,fieldIndex1,i] for i in range(vector_dimension)])
 61 |             weightRight = tf.gather_nd(thirdWeight,vectorRight)
 62 |             weightRightAfterCut = tf.squeeze(weightRight)
 63 | 
 64 |             tempValue = tf.reduce_sum(tf.multiply(weightLeftAfterCut,weightRightAfterCut))
 65 | 
 66 |             indices2 = [i]
 67 |             indices3 = [j]
 68 | 
 69 |             xi = tf.squeeze(tf.gather_nd(input_x, indices2))
 70 |             xj = tf.squeeze(tf.gather_nd(input_x, indices3))
 71 | 
 72 |             product = tf.reduce_sum(tf.multiply(xi, xj))
 73 | 
 74 |             secondItemVal = tf.multiply(tempValue, product)
 75 | 
 76 |             tf.assign(thirdValue, tf.add(thirdValue, secondItemVal))
 77 | 
 78 |     return tf.add(firstTwoValue,thirdValue)
 79 | 
 80 | def gen_data():
 81 |     labels = [-1,1]
 82 |     y = [np.random.choice(labels,1)[0] for _ in range(all_data_size)]
 83 |     x_field = [i // 10 for i in range(input_x_size)]
 84 |     x = np.random.randint(0,2,size=(all_data_size,input_x_size))
 85 |     return x,y,x_field
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     global_step = tf.Variable(0,trainable=False)
 90 |     trainx,trainy,trainx_field = gen_data()
 91 |     #
 92 |     input_x = tf.placeholder(tf.float32,[input_x_size ])
 93 |     input_y = tf.placeholder(tf.float32)
 94 |     #
 95 | 
 96 |     lambda_w = tf.constant(0.001, name='lambda_w')
 97 |     lambda_v = tf.constant(0.001, name='lambda_v')
 98 | 
 99 |     zeroWeights = createZeroDimensionWeight()
100 | 
101 |     oneDimWeights = createOneDimensionWeight(input_x_size)
102 | 
103 |     thirdWeight = createTwoDimensionWeight(input_x_size,  # 创建二次项的权重变量
104 |                                            field_size,
105 |                                            vector_dimension)  # n * f * k
106 | 
107 |     y_ = inference(input_x, trainx_field,zeroWeights,oneDimWeights,thirdWeight)
108 | 
109 |     l2_norm = tf.reduce_sum(
110 |         tf.add(
111 |             tf.multiply(lambda_w, tf.pow(oneDimWeights, 2)),
112 |             tf.reduce_sum(tf.multiply(lambda_v, tf.pow(thirdWeight, 2)),axis=[1,2])
113 |         )
114 |     )
115 | 
116 |     loss = tf.log(1 + tf.exp(-input_y * y_)) + l2_norm
117 | 
118 |     train_step = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(loss)
119 | 
120 |     saver = tf.train.Saver()
121 |     with tf.Session() as sess:
122 |         sess.run(tf.global_variables_initializer())
123 |         for i in range(total_plan_train_steps):
124 |             for t in range(all_data_size):
125 |                 input_x_batch = trainx[t]
126 |                 input_y_batch = trainy[t]
127 |                 predict_loss,_, steps = sess.run([loss,train_step, global_step],
128 |                                                feed_dict={input_x: input_x_batch, input_y: input_y_batch})
129 | 
130 |                 print("After  {step} training   step(s)   ,   loss    on    training    batch   is  {predict_loss} "
131 |                       .format(step=steps, predict_loss=predict_loss))
132 | 
133 |                 saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=steps)
134 |                 writer = tf.summary.FileWriter(os.path.join(MODEL_SAVE_PATH, MODEL_NAME), tf.get_default_graph())
135 |                 writer.close()
136 |         #
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM-0.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM-0.data-00000-of-00001


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM-0.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM-0.index


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM-0.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM-0.meta


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FFM-Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local


--------------------------------------------------------------------------------
/rank/recommendation-FFM-Demo/TFModel/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "FFM-0"
2 | all_model_checkpoint_paths: "FFM-0"
3 | 


--------------------------------------------------------------------------------
/rank/recommendation-FM-demo/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FM-demo/.DS_Store


--------------------------------------------------------------------------------
/rank/recommendation-FM-demo/FM_model.py:
--------------------------------------------------------------------------------
  1 | from itertools import count
  2 | from collections import defaultdict
  3 | from scipy.sparse import csr
  4 | import numpy as np
  5 | import pandas as pd
  6 | import numpy as np
  7 | from sklearn.feature_extraction import DictVectorizer
  8 | import tensorflow as tf
  9 | import tensorflow.compat.v1 as tf
 10 | tf.disable_v2_behavior()
 11 | from tqdm import tqdm_notebook as tqdm
 12 | 
 13 | 
 14 | def vectorize_dic(dic,ix=None,p=None,n=0,g=0):
 15 |     """
 16 |     dic -- dictionary of feature lists. Keys are the name of features
 17 |     ix -- index generator (default None)
 18 |     p -- dimension of feature space (number of columns in the sparse matrix) (default None)
 19 |     """
 20 |     if ix==None:
 21 |         ix = dict()
 22 | 
 23 |     nz = n * g
 24 | 
 25 |     col_ix = np.empty(nz,dtype = int)
 26 | 
 27 |     i = 0
 28 |     for k,lis in dic.items():
 29 |         for t in range(len(lis)):
 30 |             ix[str(lis[t]) + str(k)] = ix.get(str(lis[t]) + str(k),0) + 1
 31 |             col_ix[i+t*g] = ix[str(lis[t]) + str(k)]
 32 |         i += 1
 33 | 
 34 |     row_ix = np.repeat(np.arange(0,n),g)
 35 |     data = np.ones(nz)
 36 |     if p == None:
 37 |         p = len(ix)
 38 | 
 39 |     ixx = np.where(col_ix < p)
 40 |     return csr.csr_matrix((data[ixx],(row_ix[ixx],col_ix[ixx])),shape=(n,p)),ix
 41 | 
 42 | 
 43 | def batcher(X_, y_=None, batch_size=-1):
 44 |     n_samples = X_.shape[0]
 45 | 
 46 |     if batch_size == -1:
 47 |         batch_size = n_samples
 48 |     if batch_size < 1:
 49 |        raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size))
 50 | 
 51 |     for i in range(0, n_samples, batch_size):
 52 |         upper_bound = min(i + batch_size, n_samples)
 53 |         ret_x = X_[i:upper_bound]
 54 |         ret_y = None
 55 |         if y_ is not None:
 56 |             ret_y = y_[i:i + batch_size]
 57 |             yield (ret_x, ret_y)
 58 | 
 59 | 
 60 | cols = ['user','item','rating','timestamp']
 61 | 
 62 | train = pd.read_csv('/Users/wzk/Documents/tensorflow_practice/recommendation/recommendation-FM-demo/data/ua.base',delimiter='\t',names = cols)
 63 | test = pd.read_csv('/Users/wzk/Documents/tensorflow_practice/recommendation/recommendation-FM-demo/data/ua.test',delimiter='\t',names = cols)
 64 | 
 65 | x_train,ix = vectorize_dic({'users':train['user'].values,
 66 |                             'items':train['item'].values},n=len(train.index),g=2)
 67 | 
 68 | 
 69 | x_test,ix = vectorize_dic({'users':test['user'].values,
 70 |                            'items':test['item'].values},ix,x_train.shape[1],n=len(test.index),g=2)
 71 | 
 72 | 
 73 | print(x_train)
 74 | y_train = train['rating'].values
 75 | y_test = test['rating'].values
 76 | 
 77 | x_train = x_train.todense()
 78 | x_test = x_test.todense()
 79 | 
 80 | print(x_train)
 81 | 
 82 | print(x_train.shape)
 83 | print (x_test.shape)
 84 | 
 85 | 
 86 | n,p = x_train.shape
 87 | 
 88 | k = 10
 89 | 
 90 | x = tf.placeholder('float',[None,p])
 91 | 
 92 | y = tf.placeholder('float',[None,1])
 93 | 
 94 | w0 = tf.Variable(tf.zeros([1]))
 95 | w = tf.Variable(tf.zeros([p]))
 96 | 
 97 | v = tf.Variable(tf.random_normal([k,p],mean=0,stddev=0.01))
 98 | 
 99 | #y_hat = tf.Variable(tf.zeros([n,1]))
100 | 
101 | linear_terms = tf.add(w0,tf.reduce_sum(tf.multiply(w,x),1,keep_dims=True)) # n * 1
102 | pair_interactions = 0.5 * tf.reduce_sum(
103 |     tf.subtract(
104 |         tf.pow(
105 |             tf.matmul(x,tf.transpose(v)),2),
106 |         tf.matmul(tf.pow(x,2),tf.transpose(tf.pow(v,2)))
107 |     ),axis = 1 , keep_dims=True)
108 | 
109 | 
110 | y_hat = tf.add(linear_terms,pair_interactions)
111 | 
112 | lambda_w = tf.constant(0.001,name='lambda_w')
113 | lambda_v = tf.constant(0.001,name='lambda_v')
114 | 
115 | l2_norm = tf.reduce_sum(
116 |     tf.add(
117 |         tf.multiply(lambda_w,tf.pow(w,2)),
118 |         tf.multiply(lambda_v,tf.pow(v,2))
119 |     )
120 | )
121 | 
122 | error = tf.reduce_mean(tf.square(y-y_hat))
123 | loss = tf.add(error,l2_norm)
124 | 
125 | 
126 | train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
127 | 
128 | 
129 | epochs = 10
130 | batch_size = 1000
131 | 
132 | # Launch the graph
133 | init = tf.global_variables_initializer()
134 | with tf.Session() as sess:
135 |     sess.run(init)
136 | 
137 |     for epoch in tqdm(range(epochs), unit='epoch'):
138 |         perm = np.random.permutation(x_train.shape[0])
139 |         # iterate over batches
140 |         for bX, bY in batcher(x_train[perm], y_train[perm], batch_size):
141 |             _,t = sess.run([train_op,loss], feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)})
142 |             print(t)
143 | 
144 | 
145 |     errors = []
146 |     for bX, bY in batcher(x_test, y_test):
147 |         errors.append(sess.run(error, feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)}))
148 |         print(errors)
149 |     RMSE = np.sqrt(np.array(errors).mean())
150 |     print (RMSE)
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/rank/recommendation-FM-demo/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/recommendation-FM-demo/data/.DS_Store


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Attentional Factorization Machines Learning the Weight of Feature Interactions via Attention Networks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Attentional Factorization Machines Learning the Weight of Feature Interactions via Attention Networks.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/DRN A Deep Reinforcement Learning Framework for News Recommendation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/DRN A Deep Reinforcement Learning Framework for News Recommendation.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Deep & Cross Network for Ad Click Predictions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Deep & Cross Network for Ad Click Predictions.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Deep Reinforcement Learning for List-wise Recommendation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Deep Reinforcement Learning for List-wise Recommendation.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Deep interest network.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Deep interest network.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Entire Space Multi-Task Model An E ective Approach for Estimating Post-Click Conversion Rate.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Entire Space Multi-Task Model An E ective Approach for Estimating Post-Click Conversion Rate.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/MLR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/MLR.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Neural Factorization Machines for Sparse Predictive Analytics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Neural Factorization Machines for Sparse Predictive Analytics.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/Product-based Neural Networks for User Response Prediction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/Product-based Neural Networks for User Response Prediction.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/deepfm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/deepfm.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/facebook-GBDT-LR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/facebook-GBDT-LR.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/linucb.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/linucb.pdf


--------------------------------------------------------------------------------
/rank/推荐系统已读论文/wide&deep.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/rank/推荐系统已读论文/wide&deep.pdf


--------------------------------------------------------------------------------
/recall/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/recall/.DS_Store


--------------------------------------------------------------------------------
/recall/code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/recall/code/.DS_Store


--------------------------------------------------------------------------------
/recall/code/MostPopular.py:
--------------------------------------------------------------------------------
  1 | # 1. MostPopular算法
  2 | def MostPopular(train, profile, N):
  3 |     '''
  4 |     :params: train, 训练数据
  5 |     :params: profile, 用户的注册信息
  6 |     :params: N, 推荐TopN物品的个数
  7 |     :return: GetRecommendation, 获取推荐结果的接口
  8 |     '''
  9 |     
 10 |     items = {}
 11 |     for user in train:
 12 |         for item in train[user]:
 13 |             if item not in items:
 14 |                 items[item] = 0
 15 |             items[item] += 1
 16 |     items = list(sorted(items.items(), key=lambda x: x[1], reverse=True))
 17 |         
 18 |     # 获取接口函数
 19 |     def GetRecommendation(user):
 20 |         seen_items = set(train[user]) if user in train else set()
 21 |         recs = [x for x in items if x[0] not in seen_items][:N]
 22 |         return recs
 23 |     
 24 |     return GetRecommendation
 25 | 
 26 | # 2. GenderMostPopular算法
 27 | def GenderMostPopular(train, profile, N):
 28 |     '''
 29 |     :params: train, 训练数据
 30 |     :params: profile, 用户的注册信息
 31 |     :params: N, 推荐TopN物品的个数
 32 |     :return: GetRecommendation, 获取推荐结果的接口
 33 |     '''
 34 |     
 35 |     mitems, fitems = {}, {} # 男、女
 36 |     for user in train:
 37 |         if profile[user]['gender'] == 'm':
 38 |             tmp = mitems
 39 |         elif profile[user]['gender'] == 'f':
 40 |             tmp = fitems
 41 |         for item in train[user]:
 42 |             if item not in tmp:
 43 |                 tmp[item] = 0
 44 |             tmp[item] += 1
 45 |     mitems = list(sorted(mitems.items(), key=lambda x: x[1], reverse=True))
 46 |     fitems = list(sorted(fitems.items(), key=lambda x: x[1], reverse=True))
 47 |     
 48 |     mostPopular = MostPopular(train, profile, N)
 49 |     
 50 |     # 获取接口函数
 51 |     def GetRecommendation(user):
 52 |         seen_items = set(train[user]) if user in train else set()
 53 |         if profile[user]['gender'] == 'm':
 54 |             recs = [x for x in mitems if x[0] not in seen_items][:N]
 55 |         elif profile[user]['gender'] == 'f':
 56 |             recs = [x for x in fitems if x[0] not in seen_items][:N]
 57 |         else: # 没有提供性别信息的，按照MostPopular推荐
 58 |             recs = mostPopular(user)
 59 |         return recs
 60 |     
 61 |     return GetRecommendation
 62 | 
 63 | # 3. AgeMostPopular算法
 64 | def AgeMostPopular(train, profile, N):
 65 |     '''
 66 |     :params: train, 训练数据
 67 |     :params: profile, 用户的注册信息
 68 |     :params: N, 推荐TopN物品的个数
 69 |     :return: GetRecommendation, 获取推荐结果的接口
 70 |     '''
 71 |     
 72 |     # 对年龄进行分段
 73 |     ages = []
 74 |     for user in profile:
 75 |         if profile[user]['age'] >= 0:
 76 |             ages.append(profile[user]['age'])
 77 |     maxAge, minAge = max(ages), min(ages)
 78 |     items = [{} for _ in range(int(maxAge // 10 + 1))]
 79 |     
 80 |     # 分年龄段进行统计
 81 |     for user in train:
 82 |         if profile[user]['age'] >= 0:
 83 |             age = profile[user]['age'] // 10
 84 |             for item in train[user]:
 85 |                 if item not in items[age]:
 86 |                     items[age][item] = 0
 87 |                 items[age][item] += 1
 88 |     for i in range(len(items)):
 89 |         items[i] = list(sorted(items[i].items(), key=lambda x: x[1], reverse=True))
 90 |     
 91 |     mostPopular = MostPopular(train, profile, N)
 92 |     
 93 |     # 获取接口函数
 94 |     def GetRecommendation(user):
 95 |         seen_items = set(train[user]) if user in train else set()
 96 |         if profile[user]['age'] >= 0:
 97 |             age = profile[user]['age'] // 10
 98 |             # 年龄信息异常的，按照全局推荐
 99 |             if age >= len(items) or len(items[age]) == 0:
100 |                 recs = mostPopular(user)
101 |             else:
102 |                 recs = [x for x in items[age] if x[0] not in seen_items][:N]
103 |         else: # 没有提供年龄信息的，按照全局推荐
104 |             recs = mostPopular(user)
105 |         return recs
106 |     
107 |     return GetRecommendation
108 | 
109 | # 4. CountryMostPopular算法
110 | def CountryMostPopular(train, profile, N):
111 |     '''
112 |     :params: train, 训练数据
113 |     :params: profile, 用户的注册信息
114 |     :params: N, 推荐TopN物品的个数
115 |     :return: GetRecommendation, 获取推荐结果的接口
116 |     '''
117 |         
118 |     # 分城市进行统计
119 |     items = {}
120 |     for user in train:
121 |         country = profile[user]['country']
122 |         if country not in items:
123 |             items[country] = {}
124 |         for item in train[user]:
125 |             if item not in items[country]:
126 |                 items[country][item] = 0
127 |             items[country][item] += 1
128 |     for country in items:
129 |         items[country] = list(sorted(items[country].items(), key=lambda x: x[1], reverse=True))
130 |     
131 |     mostPopular = MostPopular(train, profile, N)
132 |     
133 |     # 获取接口函数
134 |     def GetRecommendation(user):
135 |         seen_items = set(train[user]) if user in train else set()
136 |         country = profile[user]['country']
137 |         if country in items:
138 |             recs = [x for x in items[country] if x[0] not in seen_items][:N]
139 |         else: # 没有提供城市信息的，按照全局推荐
140 |             recs = mostPopular(user)
141 |         return recs
142 |     
143 |     return GetRecommendation
144 | 
145 | # 5. DemographicMostPopular算法
146 | def DemographicMostPopular(train, profile, N):
147 |     '''
148 |     :params: train, 训练数据
149 |     :params: profile, 用户的注册信息
150 |     :params: N, 推荐TopN物品的个数
151 |     :return: GetRecommendation, 获取推荐结果的接口
152 |     '''
153 | 
154 |     # 建立多重字典，将缺失值当成other，同归为一类进行处理
155 |     items = {}
156 |     for user in train:
157 |         gender = profile[user]['gender']
158 |         if gender not in items:
159 |             items[gender] = {}
160 |         age = profile[user]['age'] // 10
161 |         if age not in items[gender]:
162 |             items[gender][age] = {}
163 |         country = profile[user]['country']
164 |         if country not in items[gender][age]:
165 |             items[gender][age][country] = {}
166 |         for item in train[user]:
167 |             if item not in items[gender][age][country]:
168 |                 items[gender][age][country][item] = 0
169 |             items[gender][age][country][item] += 1
170 |     for gender in items:
171 |         for age in items[gender]:
172 |             for country in items[gender][age]:
173 |                 items[gender][age][country] = list(sorted(items[gender][age][country].items(), 
174 |                                                           key=lambda x: x[1], reverse=True))
175 |                 
176 |     mostPopular = MostPopular(train, profile, N)
177 |     
178 |     # 获取接口函数
179 |     def GetRecommendation(user):
180 |         seen_items = set(train[user]) if user in train else set()
181 |         gender = profile[user]['gender']
182 |         age = profile[user]['age']
183 |         country = profile[user]['country']
184 |         if gender not in items or age not in items[gender] or country not in items[gender][age]:
185 |             recs = mostPopular(user)
186 |         else:
187 |             recs = [x for x in items[gender][age][country] if x[0] not in seen_items][:N]
188 |         return recs
189 |     
190 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/TagBased.py:
--------------------------------------------------------------------------------
  1 | # 1. 基于热门标签的推荐
  2 | def SimpleTagBased(train, N):
  3 |     '''
  4 |     :params: train, 训练数据集
  5 |     :params: N, 超参数，设置取TopN推荐物品数目
  6 |     :return: GetRecommendation，推荐接口函数
  7 |     '''
  8 |     # 统计user_tags和tag_items
  9 |     user_tags, tag_items = {}, {}
 10 |     for user in train:
 11 |         user_tags[user] = {}
 12 |         for item in train[user]:
 13 |             for tag in train[user][item]:
 14 |                 if tag not in user_tags[user]:
 15 |                     user_tags[user][tag] = 0
 16 |                 user_tags[user][tag] += 1
 17 |                 if tag not in tag_items:
 18 |                     tag_items[tag] = {}
 19 |                 if item not in tag_items[tag]:
 20 |                     tag_items[tag][item] = 0
 21 |                 tag_items[tag][item] += 1
 22 |     
 23 |     def GetRecommendation(user):
 24 |         # 按照打分推荐N个未见过的
 25 |         if user not in user_tags:
 26 |             return []
 27 |         seen_items = set(train[user])
 28 |         item_score = {}
 29 |         for tag in user_tags[user]:
 30 |             for item in tag_items[tag]:
 31 |                 if item in seen_items:
 32 |                     continue
 33 |                 if item not in item_score:
 34 |                     item_score[item] = 0
 35 |                 item_score[item] += user_tags[user][tag] * tag_items[tag][item]
 36 |         item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
 37 |         return item_score[:N]
 38 |     
 39 |     return GetRecommendation
 40 | 
 41 | # 2. 改进一：为热门标签加入惩罚项
 42 | def TagBasedTFIDF(train, N):
 43 |     '''
 44 |     :params: train, 训练数据集
 45 |     :params: N, 超参数，设置取TopN推荐物品数目
 46 |     :return: GetRecommendation，推荐接口函数
 47 |     '''
 48 |     # 统计user_tags和tag_items
 49 |     user_tags, tag_items = {}, {}
 50 |     # 统计标签的热门程度，即打过此标签的不同用户数
 51 |     tag_pop = {}
 52 |     for user in train:
 53 |         user_tags[user] = {}
 54 |         for item in train[user]:
 55 |             for tag in train[user][item]:
 56 |                 if tag not in user_tags[user]:
 57 |                     user_tags[user][tag] = 0
 58 |                 user_tags[user][tag] += 1
 59 |                 if tag not in tag_items:
 60 |                     tag_items[tag] = {}
 61 |                 if item not in tag_items[tag]:
 62 |                     tag_items[tag][item] = 0
 63 |                 tag_items[tag][item] += 1
 64 |                 if tag not in tag_pop:
 65 |                     tag_pop[tag] = set()
 66 |                 tag_pop[tag].add(user)
 67 |     tag_pop = {k: len(v) for k, v in tag_pop.items()}
 68 |     
 69 |     def GetRecommendation(user):
 70 |         # 按照打分推荐N个未见过的
 71 |         if user not in user_tags:
 72 |             return []
 73 |         seen_items = set(train[user])
 74 |         item_score = {}
 75 |         for tag in user_tags[user]:
 76 |             for item in tag_items[tag]:
 77 |                 if item in seen_items:
 78 |                     continue
 79 |                 if item not in item_score:
 80 |                     item_score[item] = 0
 81 |                 item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag]
 82 |         item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
 83 |         return item_score[:N]
 84 |     
 85 |     return GetRecommendation
 86 | 
 87 | # 3. 改进二：同时也为热门商品加入惩罚项
 88 | def TagBasedTFIDF_Improved(train, N):
 89 |     '''
 90 |     :params: train, 训练数据集
 91 |     :params: N, 超参数，设置取TopN推荐物品数目
 92 |     :return: GetRecommendation，推荐接口函数
 93 |     '''
 94 |     # 统计user_tags和tag_items
 95 |     user_tags, tag_items = {}, {}
 96 |     # 统计标签和物品的热门程度，即打过此标签的不同用户数，和物品对应的不同用户数
 97 |     tag_pop, item_pop = {}, {}
 98 |     for user in train:
 99 |         user_tags[user] = {}
100 |         for item in train[user]:
101 |             if item not in item_pop:
102 |                 item_pop[item] = 0
103 |             item_pop[item] += 1
104 |             for tag in train[user][item]:
105 |                 if tag not in user_tags[user]:
106 |                     user_tags[user][tag] = 0
107 |                 user_tags[user][tag] += 1
108 |                 if tag not in tag_items:
109 |                     tag_items[tag] = {}
110 |                 if item not in tag_items[tag]:
111 |                     tag_items[tag][item] = 0
112 |                 tag_items[tag][item] += 1
113 |                 if tag not in tag_pop:
114 |                     tag_pop[tag] = set()
115 |                 tag_pop[tag].add(user)
116 |     tag_pop = {k: len(v) for k, v in tag_pop.items()}
117 |     
118 |     def GetRecommendation(user):
119 |         # 按照打分推荐N个未见过的
120 |         if user not in user_tags:
121 |             return []
122 |         seen_items = set(train[user])
123 |         item_score = {}
124 |         for tag in user_tags[user]:
125 |             for item in tag_items[tag]:
126 |                 if item in seen_items:
127 |                     continue
128 |                 if item not in item_score:
129 |                     item_score[item] = 0
130 |                 item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag] / item_pop[item]
131 |         item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
132 |         return item_score[:N]
133 |     
134 |     return GetRecommendation
135 | 
136 | # 4. 基于标签改进的推荐
137 | def ExpandTagBased(train, N, M=20):
138 |     '''
139 |     :params: train, 训练数据集
140 |     :params: N, 超参数，设置取TopN推荐物品数目
141 |     :params: M，超参数，设置取TopM的标签填补不满M个标签的用户
142 |     :return: GetRecommendation，推荐接口函数
143 |     '''
144 |     
145 |     # 1. 计算标签之间的相似度
146 |     item_tag = {}
147 |     for user in train:
148 |         for item in train[user]:
149 |             if item not in item_tag:
150 |                 item_tag[item] = set()
151 |             for tag in train[user][item]:
152 |                 item_tag[item].add(tag)
153 |     tag_sim, tag_cnt = {}, {}
154 |     for item in item_tag:
155 |         for u in item_tag[item]:
156 |             if u not in tag_cnt:
157 |                 tag_cnt[u] = 0
158 |             tag_cnt[u] += 1
159 |             if u not in tag_sim:
160 |                 tag_sim[u] = {}
161 |             for v in item_tag[item]:
162 |                 if u == v:
163 |                     continue
164 |                 if v not in tag_sim[u]:
165 |                     tag_sim[u][v] = 0
166 |                 tag_sim[u][v] += 1
167 |     for u in tag_sim:
168 |         for v in tag_sim[u]:
169 |             tag_sim[u][v] /= math.sqrt(tag_cnt[u] * tag_cnt[v])
170 |     
171 |     # 2. 为每个用户扩展标签
172 |     user_tags = {}
173 |     for user in train:
174 |         if user not in user_tags:
175 |             user_tags[user] = {}
176 |         for item in train[user]:
177 |             for tag in train[user][item]:
178 |                 if tag not in user_tags[user]:
179 |                     user_tags[user][tag] = 0
180 |                 user_tags[user][tag] += 1
181 |     expand_tags = {}
182 |     for user in user_tags:
183 |         if len(user_tags[user]) >= M:
184 |             expand_tags[user] = user_tags[user]
185 |             continue
186 |         # 不满M个的进行标签扩展
187 |         expand_tags[user] = {}
188 |         seen_tags = set(user_tags[user])
189 |         for tag in user_tags[user]:
190 |             for t in tag_sim[tag]:
191 |                 if t in seen_tags:
192 |                     continue
193 |                 if t not in expand_tags[user]:
194 |                     expand_tags[user][t] = 0
195 |                 expand_tags[user][t] += user_tags[user][tag] * tag_sim[tag][t]
196 |         expand_tags[user].update(user_tags[user])
197 |         expand_tags[user] = dict(list(sorted(expand_tags[user].items(), key=lambda x: x[1], reverse=True))[:M])
198 |         
199 |     # 3. SimpleTagBased算法
200 |     tag_items = {}
201 |     for user in train:
202 |         for item in train[user]:
203 |             for tag in train[user][item]:
204 |                 if tag not in tag_items:
205 |                     tag_items[tag] = {}
206 |                 if item not in tag_items[tag]:
207 |                     tag_items[tag][item] = 0
208 |                 tag_items[tag][item] += 1
209 |     
210 |     def GetRecommendation(user):
211 |         # 按照打分推荐N个未见过的
212 |         if user not in user_tags:
213 |             return []
214 |         seen_items = set(train[user])
215 |         item_score = {}
216 |         for tag in expand_tags[user]:
217 |             for item in tag_items[tag]:
218 |                 if item in seen_items:
219 |                     continue
220 |                 if item not in item_score:
221 |                     item_score[item] = 0
222 |                 item_score[item] += expand_tags[user][tag] * tag_items[tag][item]
223 |         item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
224 |         return item_score[:N]
225 |     
226 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/baseline.py:
--------------------------------------------------------------------------------
 1 | # 随机推荐
 2 | def Random(train, K, N):
 3 |     '''
 4 |     :params: train, 训练数据集
 5 |     :params: K, 可忽略
 6 |     :params: N, 超参数，设置取TopN推荐物品数目
 7 |     :return: GetRecommendation，推荐接口函数
 8 |     '''
 9 |     items = {}
10 |     for user in train:
11 |         for item in train[user]:
12 |             items[item] = 1
13 |     
14 |     def GetRecommendation(user):
15 |         # 随机推荐N个未见过的
16 |         user_items = set(train[user])
17 |         rec_items = {k: items[k] for k in items if k not in user_items}
18 |         rec_items = list(rec_items.items())
19 |         random.shuffle(rec_items)
20 |         return rec_items[:N]
21 |     
22 |     return GetRecommendation
23 | 
24 | # 热门推荐
25 | def MostPopular(train, K, N):
26 |     '''
27 |     :params: train, 训练数据集
28 |     :params: K, 可忽略
29 |     :params: N, 超参数，设置取TopN推荐物品数目
30 |     :return: GetRecommendation, 推荐接口函数
31 |     '''
32 |     items = {}
33 |     for user in train:
34 |         for item in train[user]:
35 |             if item not in items:
36 |                 items[item] = 0
37 |             items[item] += 1
38 |         
39 |     def GetRecommendation(user):
40 |         # 随机推荐N个没见过的最热门的
41 |         user_items = set(train[user])
42 |         rec_items = {k: items[k] for k in items if k not in user_items}
43 |         rec_items = list(sorted(rec_items.items(), key=lambda x: x[1], reverse=True))
44 |         return rec_items[:N]
45 |     
46 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/data.py:
--------------------------------------------------------------------------------
 1 | # 导入包
 2 | import random
 3 | import math
 4 | import time
 5 | from tqdm import tqdm
 6 | 
 7 | # 定义装饰器，监控运行时间
 8 | def timmer(func):
 9 |     def wrapper(*args, **kwargs):
10 |         start_time = time.time()
11 |         res = func(*args, **kwargs)
12 |         stop_time = time.time()
13 |         print('Func %s, run time: %s' % (func.__name__, stop_time - start_time))
14 |         return res
15 |     return wrapper
16 | 
17 | class Dataset():
18 |     
19 |     def __init__(self, fp):
20 |         # fp: data file path
21 |         self.data = self.loadData(fp)
22 |     
23 |     @timmer
24 |     def loadData(self, fp):
25 |         data = []
26 |         for l in open(fp):
27 |             data.append(tuple(map(int, l.strip().split('::')[:2])))
28 |         return data
29 |     
30 |     @timmer
31 |     def splitData(self, M, k, seed=1):
32 |         '''
33 |         :params: data, 加载的所有(user, item)数据条目
34 |         :params: M, 划分的数目，最后需要取M折的平均
35 |         :params: k, 本次是第几次划分，k~[0, M)
36 |         :params: seed, random的种子数，对于不同的k应设置成一样的
37 |         :return: train, test
38 |         '''
39 |         train, test = [], []
40 |         random.seed(seed)
41 |         for user, item in self.data:
42 |             # 这里与书中的不一致，本人认为取M-1较为合理，因randint是左右都覆盖的
43 |             if random.randint(0, M-1) == k:  
44 |                 test.append((user, item))
45 |             else:
46 |                 train.append((user, item))
47 | 
48 |         # 处理成字典的形式，user->set(items)
49 |         def convert_dict(data):
50 |             data_dict = {}
51 |             for user, item in data:
52 |                 if user not in data_dict:
53 |                     data_dict[user] = set()
54 |                 data_dict[user].add(item)
55 |             data_dict = {k: list(data_dict[k]) for k in data_dict}
56 |             return data_dict
57 | 
58 |         return convert_dict(train), convert_dict(test)


--------------------------------------------------------------------------------
/recall/code/experiment.py:
--------------------------------------------------------------------------------
 1 | class Experiment():
 2 |     
 3 |     def __init__(self, M, K, N, fp='../dataset/ml-1m/ratings.dat', rt='UserCF'):
 4 |         '''
 5 |         :params: M, 进行多少次实验
 6 |         :params: K, TopK相似用户的个数
 7 |         :params: N, TopN推荐物品的个数
 8 |         :params: fp, 数据文件路径
 9 |         :params: rt, 推荐算法类型
10 |         '''
11 |         self.M = M
12 |         self.K = K
13 |         self.N = N
14 |         self.fp = fp
15 |         self.rt = rt
16 |         self.alg = {'Random': Random, 'MostPopular': MostPopular, \
17 |                     'UserCF': UserCF, 'UserIIF': UserIIF}
18 |     
19 |     # 定义单次实验
20 |     @timmer
21 |     def worker(self, train, test):
22 |         '''
23 |         :params: train, 训练数据集
24 |         :params: test, 测试数据集
25 |         :return: 各指标的值
26 |         '''
27 |         getRecommendation = self.alg[self.rt](train, self.K, self.N)
28 |         metric = Metric(train, test, getRecommendation)
29 |         return metric.eval()
30 |     
31 |     # 多次实验取平均
32 |     @timmer
33 |     def run(self):
34 |         metrics = {'Precision': 0, 'Recall': 0, 
35 |                    'Coverage': 0, 'Popularity': 0}
36 |         dataset = Dataset(self.fp)
37 |         for ii in range(self.M):
38 |             train, test = dataset.splitData(self.M, ii)
39 |             print('Experiment {}:'.format(ii))
40 |             metric = self.worker(train, test)
41 |             metrics = {k: metrics[k]+metric[k] for k in metrics}
42 |         metrics = {k: metrics[k] / self.M for k in metrics}
43 |         print('Average Result (M={}, K={}, N={}): {}'.format(\
44 |                               self.M, self.K, self.N, metrics))


--------------------------------------------------------------------------------
/recall/code/graph.py:
--------------------------------------------------------------------------------
 1 | def PersonalRank(train, alpha, N):
 2 |     '''
 3 |     :params: train, 训练数据
 4 |     :params: alpha, 继续随机游走的概率
 5 |     :params: N, 推荐TopN物品的个数
 6 |     :return: GetRecommendation, 获取推荐结果的接口
 7 |     ''' 
 8 |     
 9 |     # 构建索引
10 |     items = []
11 |     for user in train:
12 |         items.extend(train[user])
13 |     id2item = list(set(items))
14 |     users = {u: i for i, u in enumerate(train.keys())}
15 |     items = {u: i+len(users) for i, u in enumerate(id2item)}
16 |     
17 |     # 计算转移矩阵（注意！！！要按照出度进行归一化）
18 |     item_user = {}
19 |     for user in train:
20 |         for item in train[user]:
21 |             if item not in item_user:
22 |                 item_user[item] = []
23 |             item_user[item].append(user)
24 |             
25 |     data, row, col = [], [], []
26 |     for u in train:
27 |         for v in train[u]:
28 |             data.append(1 / len(train[u]))
29 |             row.append(users[u])
30 |             col.append(items[v])
31 |     for u in item_user:
32 |         for v in item_user[u]:
33 |             data.append(1 / len(item_user[u]))
34 |             row.append(items[u])
35 |             col.append(users[v])
36 |             
37 |     M = csc_matrix((data, (row, col)), shape=(len(data), len(data)))
38 |     
39 |     # 获取接口函数
40 |     def GetRecommendation(user):
41 |         seen_items = set(train[user])
42 |         # 解矩阵方程 r = (1-a)r0 + a(M.T)r
43 |         r0 = [0] * len(data)
44 |         r0[users[user]] = 1
45 |         r0 = csc_matrix(r0)
46 |         r = (1 - alpha) * linalg.inv(eye(len(data)) - alpha * M.T) * r0
47 |         r = r.T.toarray()[0][len(users):]
48 |         idx = np.argsort(-r)[:N]
49 |         recs = [(id2item[ii], r[ii]) for ii in idx]
50 |         return recs
51 |     
52 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/itemcf.py:
--------------------------------------------------------------------------------
 1 | # 基于物品余弦相似度的推荐
 2 | def ItemCF(train, K, N):
 3 |     '''
 4 |     :params: train, 训练数据集
 5 |     :params: K, 超参数，设置取TopK相似物品数目
 6 |     :params: N, 超参数，设置取TopN推荐物品数目
 7 |     :return: GetRecommendation, 推荐接口函数
 8 |     '''
 9 |     # 计算物品相似度矩阵
10 |     sim = {}
11 |     num = {}
12 |     for user in train:
13 |         items = train[user]
14 |         for i in range(len(items)):
15 |             u = items[i]
16 |             if u not in num:
17 |                 num[u] = 0
18 |             num[u] += 1
19 |             if u not in sim:
20 |                 sim[u] = {}
21 |             for j in range(len(items)):
22 |                 if j == i: continue
23 |                 v = items[j]
24 |                 if v not in sim[u]:
25 |                     sim[u][v] = 0
26 |                 sim[u][v] += 1
27 |                 # Itemiuf，主要是改进了这里
28 |                 # sim[u][v] += 1 / math.log(1 + len(items))
29 |                 
30 |     for u in sim:
31 |         for v in sim[u]:
32 |             sim[u][v] /= math.sqrt(num[u] * num[v])
33 |     
34 |     # 按照相似度排序
35 |     sorted_item_sim = {k: list(sorted(v.items(), \
36 |                                key=lambda x: x[1], reverse=True)) \
37 |                        for k, v in sim.items()}
38 |     
39 |     # 获取接口函数
40 |     def GetRecommendation(user):
41 |         items = {}
42 |         seen_items = set(train[user])
43 |         for item in train[user]:
44 |             for u, _ in sorted_item_sim[item][:K]:
45 |                 if u not in seen_items:
46 |                     if u not in items:
47 |                         items[u] = 0
48 |                     items[u] += sim[item][u]
49 |         recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
50 |         return recs
51 |     
52 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/itemcf_norm.py:
--------------------------------------------------------------------------------
 1 | # 基于归一化的物品余弦相似度的推荐
 2 | def ItemCF_Norm(train, K, N):
 3 |     '''
 4 |     :params: train, 训练数据集
 5 |     :params: K, 超参数，设置取TopK相似物品数目
 6 |     :params: N, 超参数，设置取TopN推荐物品数目
 7 |     :return: GetRecommendation, 推荐接口函数
 8 |     '''
 9 |     # 计算物品相似度矩阵
10 |     sim = {}
11 |     num = {}
12 |     for user in train:
13 |         items = train[user]
14 |         for i in range(len(items)):
15 |             u = items[i]
16 |             if u not in num:
17 |                 num[u] = 0
18 |             num[u] += 1
19 |             if u not in sim:
20 |                 sim[u] = {}
21 |             for j in range(len(items)):
22 |                 if j == i: continue
23 |                 v = items[j]
24 |                 if v not in sim[u]:
25 |                     sim[u][v] = 0
26 |                 sim[u][v] += 1
27 |     for u in sim:
28 |         for v in sim[u]:
29 |             sim[u][v] /= math.sqrt(num[u] * num[v])
30 |             
31 |     # 对相似度矩阵进行按行归一化
32 |     for u in sim:
33 |         s = 0
34 |         for v in sim[u]:
35 |             s += sim[u][v]
36 |         if s > 0:
37 |             for v in sim[u]:
38 |                 sim[u][v] /= s
39 |     
40 |     # 按照相似度排序
41 |     sorted_item_sim = {k: list(sorted(v.items(), \
42 |                                key=lambda x: x[1], reverse=True)) \
43 |                        for k, v in sim.items()}
44 |     
45 |     # 获取接口函数
46 |     def GetRecommendation(user):
47 |         items = {}
48 |         seen_items = set(train[user])
49 |         for item in train[user]:
50 |             for u, _ in sorted_item_sim[item][:K]:
51 |                 if u not in seen_items:
52 |                     if u not in items:
53 |                         items[u] = 0
54 |                     items[u] += sim[item][u]
55 |         recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
56 |         return recs
57 |     
58 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/lfm.py:
--------------------------------------------------------------------------------
 1 | def LFM(train, ratio, K, lr, step, lmbda, N):
 2 |     '''
 3 |     :params: train, 训练数据
 4 |     :params: ratio, 负采样的正负比例
 5 |     :params: K, 隐语义个数
 6 |     :params: lr, 初始学习率
 7 |     :params: step, 迭代次数
 8 |     :params: lmbda, 正则化系数
 9 |     :params: N, 推荐TopN物品的个数
10 |     :return: GetRecommendation, 获取推荐结果的接口
11 |     '''
12 |     
13 |     all_items = {}
14 |     for user in train:
15 |         for item in train[user]:
16 |             if item not in all_items:
17 |                 all_items[item] = 0
18 |             all_items[item] += 1
19 |     all_items = list(all_items.items())
20 |     items = [x[0] for x in all_items]
21 |     pops = [x[1] for x in all_items]
22 |     
23 |     # 负采样函数(注意！！！要按照流行度进行采样)
24 |     def nSample(data, ratio):
25 |         new_data = {}
26 |         # 正样本
27 |         for user in data:
28 |             if user not in new_data:
29 |                 new_data[user] = {}
30 |             for item in data[user]:
31 |                 new_data[user][item] = 1
32 |         # 负样本
33 |         for user in new_data:
34 |             seen = set(new_data[user])
35 |             pos_num = len(seen)
36 |             item = np.random.choice(items, int(pos_num * ratio * 3), pops)
37 |             item = [x for x in item if x not in seen][:int(pos_num * ratio)]
38 |             new_data[user].update({x: 0 for x in item})
39 |         
40 |         return new_data
41 |                 
42 |     # 训练
43 |     P, Q = {}, {}
44 |     for user in train:
45 |         P[user] = np.random.random(K)
46 |     for item in items:
47 |         Q[item] = np.random.random(K)
48 |             
49 |     for s in trange(step):
50 |         data = nSample(train, ratio)
51 |         for user in data:
52 |             for item in data[user]:
53 |                 eui = data[user][item] - (P[user] * Q[item]).sum()
54 |                 P[user] += lr * (Q[item] * eui - lmbda * P[user])
55 |                 Q[item] += lr * (P[user] * eui - lmbda * Q[item])
56 |         lr *= 0.9 # 调整学习率
57 |         
58 |     # 获取接口函数
59 |     def GetRecommendation(user):
60 |         seen_items = set(train[user])
61 |         recs = {}
62 |         for item in items:
63 |             if item not in seen_items:
64 |                 recs[item] = (P[user] * Q[item]).sum()
65 |         recs = list(sorted(recs.items(), key=lambda x: x[1], reverse=True))[:N]
66 |         return recs
67 |     
68 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/metric.py:
--------------------------------------------------------------------------------
 1 | class Metric():
 2 |     
 3 |     def __init__(self, train, test, GetRecommendation):
 4 |         '''
 5 |         :params: train, 训练数据
 6 |         :params: test, 测试数据
 7 |         :params: GetRecommendation, 为某个用户获取推荐物品的接口函数
 8 |         '''
 9 |         self.train = train
10 |         self.test = test
11 |         self.GetRecommendation = GetRecommendation
12 |         self.recs = self.getRec()
13 |         
14 |     # 为test中的每个用户进行推荐
15 |     def getRec(self):
16 |         recs = {}
17 |         for user in self.test:
18 |             rank = self.GetRecommendation(user)
19 |             recs[user] = rank
20 |         return recs
21 |         
22 |     # 定义精确率指标计算方式
23 |     def precision(self):
24 |         all, hit = 0, 0
25 |         for user in self.test:
26 |             test_items = set(self.test[user])
27 |             rank = self.recs[user]
28 |             for item, score in rank:
29 |                 if item in test_items:
30 |                     hit += 1
31 |             all += len(rank)
32 |         return round(hit / all * 100, 2)
33 |     
34 |     # 定义召回率指标计算方式
35 |     def recall(self):
36 |         all, hit = 0, 0
37 |         for user in self.test:
38 |             test_items = set(self.test[user])
39 |             rank = self.recs[user]
40 |             for item, score in rank:
41 |                 if item in test_items:
42 |                     hit += 1
43 |             all += len(test_items)
44 |         return round(hit / all * 100, 2)
45 |     
46 |     # 定义覆盖率指标计算方式
47 |     def coverage(self):
48 |         all_item, recom_item = set(), set()
49 |         for user in self.test:
50 |             for item in self.train[user]:
51 |                 all_item.add(item)
52 |             rank = self.recs[user]
53 |             for item, score in rank:
54 |                 recom_item.add(item)
55 |         return round(len(recom_item) / len(all_item) * 100, 2)
56 |     
57 |     # 定义新颖度指标计算方式
58 |     def popularity(self):
59 |         # 计算物品的流行度
60 |         item_pop = {}
61 |         for user in self.train:
62 |             for item in self.train[user]:
63 |                 if item not in item_pop:
64 |                     item_pop[item] = 0
65 |                 item_pop[item] += 1
66 | 
67 |         num, pop = 0, 0
68 |         for user in self.test:
69 |             rank = self.recs[user]
70 |             for item, score in rank:
71 |                 # 取对数，防止因长尾问题带来的被流行物品所主导
72 |                 pop += math.log(1 + item_pop[item])
73 |                 num += 1
74 |         return round(pop / num, 6)
75 |     
76 |     def eval(self):
77 |         metric = {'Precision': self.precision(),
78 |                   'Recall': self.recall(),
79 |                   'Coverage': self.coverage(),
80 |                   'Popularity': self.popularity()}
81 |         print('Metric:', metric)
82 |         return metric


--------------------------------------------------------------------------------
/recall/code/time.py:
--------------------------------------------------------------------------------
  1 | # 1. 给用户推荐近期最热门的物品
  2 | def RecentPopular(train, K, N, alpha=1.0, t0=int(time.time())):
  3 |     '''
  4 |     :params: train, 训练数据集
  5 |     :params: K, 可忽略
  6 |     :params: N, 超参数，设置取TopN推荐物品数目
  7 |     :params: alpha, 时间衰减因子
  8 |     :params: t0, 当前的时间戳
  9 |     :return: GetRecommendation，推荐接口函数
 10 |     '''
 11 |     
 12 |     item_score = {}
 13 |     for user in train:
 14 |         for item, t in train[user]:
 15 |             if item not in item_score:
 16 |                 item_score[item] = 0
 17 |             item_score[item] += 1.0 / (alpha * (t0 - t))
 18 |         
 19 |     item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
 20 |     
 21 |     def GetRecommendation(user):
 22 |         # 随机推荐N个未见过的
 23 |         user_items = set(train[user])
 24 |         rec_items = [x for x in item_score if x[0] not in user_items]
 25 |         return rec_items[:N]
 26 |     
 27 |     return GetRecommendation
 28 | 
 29 | # 2. 时间上下文相关的ItemCF算法
 30 | def TItemCF(train, K, N, alpha=1.0, beta=1.0, t0=int(time.time())):
 31 |     '''
 32 |     :params: train, 训练数据集
 33 |     :params: K, 超参数，设置取TopK相似物品数目
 34 |     :params: N, 超参数，设置取TopN推荐物品数目
 35 |     :params: alpha, 计算item相似度的时间衰减因子
 36 |     :params: beta, 推荐打分时的时间衰减因子
 37 |     :params: t0, 当前的时间戳
 38 |     :return: GetRecommendation, 推荐接口函数
 39 |     '''
 40 |     # 计算物品相似度矩阵
 41 |     sim = {}
 42 |     num = {}
 43 |     for user in train:
 44 |         items = train[user]
 45 |         for i in range(len(items)):
 46 |             u, t1 = items[i]
 47 |             if u not in num:
 48 |                 num[u] = 0
 49 |             num[u] += 1
 50 |             if u not in sim:
 51 |                 sim[u] = {}
 52 |             for j in range(len(items)):
 53 |                 if j == i: continue
 54 |                 v, t2 = items[j]
 55 |                 if v not in sim[u]:
 56 |                     sim[u][v] = 0
 57 |                 sim[u][v] += 1.0 / (alpha * (abs(t1 - t2) + 1))
 58 |     for u in sim:
 59 |         for v in sim[u]:
 60 |             sim[u][v] /= math.sqrt(num[u] * num[v])
 61 |     
 62 |     # 按照相似度排序
 63 |     sorted_item_sim = {k: list(sorted(v.items(), \
 64 |                                key=lambda x: x[1], reverse=True)) \
 65 |                        for k, v in sim.items()}
 66 |     
 67 |     # 获取接口函数
 68 |     def GetRecommendation(user):
 69 |         items = {}
 70 |         seen_items = set(train[user])
 71 |         for item, t in train[user]:
 72 |             for u, _ in sorted_item_sim[item][:K]:
 73 |                 if u not in seen_items:
 74 |                     if u not in items:
 75 |                         items[u] = 0
 76 |                     items[u] += sim[item][u] / (1 + beta * (t0 - t))
 77 |         recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
 78 |         return recs
 79 |     
 80 |     return GetRecommendation
 81 | 
 82 | # 3. 时间上下文相关的UserCF算法
 83 | def TUserCF(train, K, N, alpha=1.0, beta=1.0, t0=int(time.time())):
 84 |     '''
 85 |     :params: train, 训练数据集
 86 |     :params: K, 超参数，设置取TopK相似用户数目
 87 |     :params: N, 超参数，设置取TopN推荐物品数目
 88 |     :params: alpha, 计算item相似度的时间衰减因子
 89 |     :params: beta, 推荐打分时的时间衰减因子
 90 |     :params: t0, 当前的时间戳
 91 |     :return: GetRecommendation, 推荐接口函数
 92 |     '''
 93 |     # 计算item->user的倒排索引
 94 |     item_users = {}
 95 |     for user in train:
 96 |         for item, t in train[user]:
 97 |             if item not in item_users:
 98 |                 item_users[item] = []
 99 |             item_users[item].append((user, t))
100 |     
101 |     # 计算用户相似度矩阵
102 |     sim = {}
103 |     num = {}
104 |     for item in item_users:
105 |         users = item_users[item]
106 |         for i in range(len(users)):
107 |             u, t1 = users[i]
108 |             if u not in num:
109 |                 num[u] = 0
110 |             num[u] += 1
111 |             if u not in sim:
112 |                 sim[u] = {}
113 |             for j in range(len(users)):
114 |                 if j == i: continue
115 |                 v, t2 = users[j]
116 |                 if v not in sim[u]:
117 |                     sim[u][v] = 0
118 |                 sim[u][v] += 1.0 / (alpha * (abs(t1 - t2) + 1))
119 |     for u in sim:
120 |         for v in sim[u]:
121 |             sim[u][v] /= math.sqrt(num[u] * num[v])
122 |     
123 |     # 按照相似度排序
124 |     sorted_user_sim = {k: list(sorted(v.items(), \
125 |                                key=lambda x: x[1], reverse=True)) \
126 |                        for k, v in sim.items()}
127 |     
128 |     # 获取接口函数
129 |     def GetRecommendation(user):
130 |         items = {}
131 |         seen_items = set(train[user])
132 |         recs = []
133 |         if user in sorted_user_sim:
134 |             for u, _ in sorted_user_sim[user][:K]:
135 |                 for item, _ in train[u]:
136 |                     if item not in seen_items:
137 |                         if item not in items:
138 |                             items[item] = 0
139 |                         items[item] += sim[user][u] / (1 + beta * (t0 - t))
140 |             recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
141 |         return recs
142 |     
143 |     return GetRecommendation


--------------------------------------------------------------------------------
/recall/code/usercf.py:
--------------------------------------------------------------------------------
 1 | # 基于用户余弦相似度的推荐
 2 | def UserCF(train, K, N):
 3 |     '''
 4 |     :params: train, 训练数据集
 5 |     :params: K, 超参数，设置取TopK相似用户数目
 6 |     :params: N, 超参数，设置取TopN推荐物品数目
 7 |     :return: GetRecommendation, 推荐接口函数
 8 |     '''
 9 |     # 计算item->user的倒排索引
10 |     item_users = {}
11 |     for user in train:
12 |         for item in train[user]:
13 |             if item not in item_users:
14 |                 item_users[item] = []
15 |             item_users[item].append(user)
16 |     
17 |     # 计算用户相似度矩阵
18 |     sim = {}
19 |     num = {}
20 |     for item in item_users:
21 |         users = item_users[item]
22 |         for i in range(len(users)):
23 |             u = users[i]
24 |             if u not in num:
25 |                 num[u] = 0
26 |             num[u] += 1
27 |             if u not in sim:
28 |                 sim[u] = {}
29 |             for j in range(len(users)):
30 |                 if j == i: continue
31 |                 v = users[j]
32 |                 if v not in sim[u]:
33 |                     sim[u][v] = 0
34 |                 sim[u][v] += 1
35 |                 # Useriff，主要是改进了这里
36 |                 # sim[u][v] += 1 / math.log(1 + len(users))
37 |     for u in sim:
38 |         for v in sim[u]:
39 |             sim[u][v] /= math.sqrt(num[u] * num[v])
40 |     
41 |     # 按照相似度排序
42 |     sorted_user_sim = {k: list(sorted(v.items(), \
43 |                                key=lambda x: x[1], reverse=True)) \
44 |                        for k, v in sim.items()}
45 |     
46 |     # 获取接口函数
47 |     def GetRecommendation(user):
48 |         items = {}
49 |         seen_items = set(train[user])
50 |         for u, _ in sorted_user_sim[user][:K]:
51 |             for item in train[u]:
52 |                 # 要去掉用户见过的
53 |                 if item not in seen_items:
54 |                     if item not in items:
55 |                         items[item] = 0
56 |                     items[item] += sim[user][u]
57 |         recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
58 |         return recs
59 |     
60 |     return GetRecommendation
61 | 


--------------------------------------------------------------------------------
/recall/recall.md:
--------------------------------------------------------------------------------
  1 | # 召回
  2 | ## 内容召回
  3 | ### word2vec
  4 | - 了解skip-gram和cbow两种网络的结构
  5 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225161740893.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
  6 | - 了解优化方法：Hierarchical Softmax和Negative Sampling
  7 | （1）Hierarchical Softmax
  8 | 霍夫曼树，频度高的词越靠近根节点，复杂度从n降到log2n
  9 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225161832322.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 10 | （2）负采样
 11 | 每次都只是通每次都只是通过采样neg个不同的中心词做负例，就可以训练模型过采样neg个不同的中心词做负例，转化为二分类问题。
 12 | 采样的词尽量是热门词
 13 | ### LDA
 14 | 我们认为一篇文章的每个词都是通过“以一定概率选择了某个主题，并从这个主题中以一定概率选择某个词语”这样一个过程得到。
 15 | 文档到主题服从多项式分布，主题到词服从多项式分布。
 16 | **核心思想：LDA的目的就是要识别主题，即把文档—词汇矩阵变成文档—主题矩阵（分布）和主题—词汇矩阵（分布）**
 17 | 
 18 | 对于语料库中的每篇文档，LDA定义了如下生成过程：
 19 | 1.对每一篇文档，从主题分布中抽取一个主题；
 20 | 2.从上述被抽到的主题所对应的单词分布中抽取一个单词；
 21 | 3.重复上述过程直至遍历文档中的每一个单词。
 22 | [博客](https://www.jianshu.com/p/fa97454c9ffd)
 23 | 
 24 | 
 25 | 
 26 | ## 行为召回
 27 | ### ItemCF
 28 | - 对活跃用户进行惩罚。
 29 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225130520304.png)
 30 | - UserCF的推荐结果着重于反映和用户兴趣相似的小群体的热点，而ItemCF的推荐结果着重于维系用户的历史兴趣
 31 | - UserCF比较适合用于新闻推荐等热门程度和实时性较强的场景，ItemCF则适用于图书、电商、电影等场景
 32 | ### UserCF
 33 | - 对热门物品进行惩罚
 34 | - 在计算用户行为之间的相似度时，建立Item-User的倒排表，这样在同一个Item下面的User两两之间一定是在这个Item上有交集的，所以只需要遍历所有的Item，对其下所有的User两两进行统计即可，这样可以极大降低时间复杂度。
 35 | 
 36 | **CF实效成本低，全量发现能力弱，基于历史相似扩展；存在冷启动问题**
 37 | ### Swing
 38 | 阿里原创算法-swing，基于图结构做match，计算商品间的相似度
 39 | 如果多个user在点击了s的同时，都只共同点了某一个其他的item，那么这个item和s一定是强关联的
 40 | 
 41 | ### 关联规则
 42 | 找出用户购买的所有物品数据里频繁出现的项集活序列，来做频繁集挖掘，找到满足支持度阈值的关联物品的频繁N项集或者序列。如果用户购买了频繁N项集或者序列里的部分物品，那么我们可以将频繁项集或序列里的其他物品按一定的评分准则推荐给用户，这个评分准则可以包括支持度，置信度和提升度等
 43 | 
 44 | 常用的关联推荐算法有Apriori，FP Tree和PrefixSpan。如果大家不熟悉这些算法，可以参考我的另外几篇文章：
 45 | [Apriori算法](https://blog.csdn.net/qq_34219959/article/details/102381162)
 46 | [FpGrowth算法](https://blog.csdn.net/qq_34219959/article/details/102390588)
 47 | [序列模式挖掘PrefixSpan算法](https://blog.csdn.net/qq_34219959/article/details/97015246)
 48 | ### 聚类协同
 49 | 常用的聚类推荐算法有K-Means, BIRCH, DBSCAN和谱聚类
 50 | 
 51 | 介绍下DBSCAN
 52 | - DBSCAN的主要优点有：
 53 | （1） 可以对任意形状的稠密数据集进行聚类，相对的，K-Means之类的聚类算法一般只适用于凸数据集。
 54 | （2） 可以在聚类的同时发现异常点，对数据集中的异常点不敏感。
 55 | （3） 聚类结果没有偏倚，相对的，K-Means之类的聚类算法初始值对聚类结果有很大影响。
 56 | - DBSCAN的主要缺点有：
 57 | （1）如果样本集的密度不均匀、聚类间距差相差很大时，聚类质量较差，这时用DBSCAN聚类一般不适合。
 58 | （2） 如果样本集较大时，聚类收敛时间较长，此时可以对搜索最近邻时建立的KD树或者球树进行规模限制来改进。
 59 | （3） 调参相对于传统的K-Means之类的聚类算法稍复杂，主要需要对距离阈值ϵ，邻域样本数阈值MinPts联合调参，不同的参数组合对最后的聚类效果有较大影响
 60 | ## 矩阵分解
 61 | ### 隐语义LFM
 62 | - 用两个低阶向量相乘来模拟实际的User-Item矩阵
 63 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225130819331.png)
 64 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225131153230.png)
 65 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225131412990.png)
 66 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225131401259.png)
 67 | - label要怎么标注？
 68 | 一般数据集里面的都是只有正向反馈，即只有标签1，这时就需要进行负采样，即采出标签0来。采样是针对每个用户来进行的，对于每个用户，负采样的Item要遵循如下原则：
 69 | （1）对每个用户，要保证正负样本的平衡(数目相似)。
 70 | （2）对每个用户采样负样本时，要选取那些很热门，而用户却没有行为的物品。
 71 | - 离线计算的空间复杂度：基于邻域的方法需要维护一张离线的相关表。假设有M个用户和N个物品，UserCF需要$O(M∗M)$的空间，ItemCF需要$O(N∗N)$的空间，而对于LFM，有F个隐类的话，需要$O(F∗(M+N))$的空间
 72 | - LFM不太适合用于物品数非常庞大的系统，如果要用，我们也需要一个比较快的算法给用户先计算一个比较小的候选列表，然后再用LFM重新排名。另一方面，LFM在生成一个用户推荐列表时速度太慢，因此不能在线实时计算，而需要离线将所有用户的推荐结果事先计算好存储在数据库中。因此，LFM不能进行在线实时推荐，也就是说，当用户有了新的行为后，他的推荐列表不会发生变化。
 73 | ## 图召回
 74 | ### PersonalRank
 75 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020022513152310.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 76 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200225131613954.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 77 | ## 图嵌入
 78 | - deepwalk
 79 | - line
 80 | 相比DeepWalk纯粹随机游走的序列生成方式，LINE可以应用于有向图、无向图以及边有权重的网络，并通过将一阶、二阶的邻近关系引入目标函数，能够使最终学出的node embedding的分布更为均衡平滑，避免DeepWalk容易使node embedding聚集的情况发生。
 81 | - node2vec
 82 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020030222532484.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 83 | - SDNE
 84 | 相比于node2vec对游走方式的改进，SDNE模型主要从目标函数的设计上解决embedding网络的局部结构和全局结构的问题。而相比LINE分开学习局部结构和全局结构的做法，SDNE一次性的进行了整体的优化，更有利于获取整体最优的embedding。
 85 | - 阿里EGES
 86 | 阿里通过引入side information解决embedding问题非常棘手的冷启动问题，不同的side information有权重。
 87 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200302233300405.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 88 | ### 图神经网络模型召回
 89 | 知识图谱是图神经网络的特例，但是，知识图谱编码的是静态知识，而不是用户直接的行为数据，和具体应用距离较远，这可能是导致两者在推荐领域表现差异的主要原因
 90 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200229234345769.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 91 | 知识图谱其实是图神经网络的一个比较特殊的具体实例，但是，知识图谱因为编码的是静态知识，而不是用户比较直接的行为数据，和具体应用距离比较远，这可能是导致两者在推荐领域表现差异的主要原因。信息在图中的传播性，所以对于推荐的冷启动以及数据稀疏场景应该特别有用。
 92 | 图神经网络做推荐，因为需要全局信息，所以计算速度是个问题
 93 | GraphSAGE 则通过一些手段比如从临近节点进行采样等减少计算规模，加快计算速度，很多后期改进计算效率的方法都是从这个工作衍生的；而 PinSage 在 GraphSAGE 基础上 ( 这是同一拨人做的 )，进一步采取大规模分布式计算，拓展了图计算的实用性，可以计算 Pinterest 的30亿规模节点、180亿规模边的巨型图，并产生了较好的落地效果。
 94 | ## item2vec（embedding）
 95 | 思想：用户、物品分别embedding
 96 | 好处：多路召回每路截断条数的超参个性化问题等会自然被消解掉
 97 | 坏处：召回内容头部问题（训练数据对头部领域的降采样，减少某些领域主导，以及在模型角度鼓励多样性等不同的方法）
 98 | 注意：召回阶段使用模型召回，也应该同步采用和排序模型相同的优化目标，尤其是如果排序阶段采用多目标优化的情况下，召回模型也应该对应采取相同的多目标优化。
 99 | ### FM模型召回
100 | [FM模型召回](https://zhuanlan.zhihu.com/p/58160982?from_voters_page=true)
101 | 1. 离线训练
102 | 我们想要的其实是：每个特征和这个特征对应的训练好的embedding向量
103 | 2. 映射函数
104 | 用户特征，物品特征以及上下文特征。
105 | 用户向量存入在线数据库中比如Redis，物品向量存入Faiss(Facebook开源的embedding高效匹配库)数据库中
106 | ### 双塔模型
107 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200303153108766.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
108 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200302203528282.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
109 | ### aribnb的embedding策略
110 | 1. click session：切片点击序列，然后每个序列用w2v来embedding。序列要求（停留时间超过30s才是有效物品；超过30分钟没有动作就切片）。
111 | 2. book session：购买行为很稀疏（总量小，单一用户行为少），所以不能用w2v那一套，只能基于某些属性规则做相似user和相似listing的聚合。
112 | ### 用户行为序列召回
113 | 输入是用户行为过的物品序列，可以只用物品 ID 表征，也可以融入物品的 Side  Information，需要通过一定的方法把这些进行糅合到一个 embedding 里，代表了用户兴趣。
114 | GRU ( RNN 的变体模型 ) 可能是聚合用户行为序列效果最好又比较简单的模型
115 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200229230814739.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
116 | 用户往往是多兴趣的，所以又引申出了多兴趣拆分。
117 | ### 多兴趣拆分
118 | 用户多兴趣拆分与用户行为序列召回相比，输入是一样的，输出不同，由输出单独一个用户 embedding，换成输出多个用户兴趣 embedding 
119 | 把不同的 Item，聚类到不同的兴趣类别里去，聚类方法常用胶囊网络和 Memory Network
120 | ## 知识图谱融合
121 | 用于做推荐，一般有两大类知识图谱融合模式：知识图谱 Embedding 模式 ( KGE ) 及图路径模式
122 | - 知识图谱 Embedding 模式：用 TransE 将节点和边转换成 Embedding ，计算距离扩展物品的信息含量。（可解释性不佳）
123 | - 图路径模式：人工定义的知识图谱中知识的关联和传播模式，通过中间属性来对知识传播进行路径搭建（效果不好）
124 | ## 深度树TDM
125 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020030214313682.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
126 | - 检索方法是自顶向下Beam Search
127 | - 如何对每层选取Top-K节点，具体做法就如上图中的红色框的部分，该部分的输入包括用户的历史行为特征以及节点的Embedding特征，在对每一层选取Top-K的时候，需要将这一层的每一个节点输入左侧模型中得到相应的预测分数，最终根据分数来取Top。（涉及到对负样本的采样操作）
128 | - 在初始化树结构的时候，首先借助商品的类别信息进行排序，将相同类别的商品放到一起，然后递归的将同类别中的商品等量的分到两个子类中，直到集合中只包含一项，利用这种自顶向下的方式来初始化一棵树。基于该树采样生成深度模型训练所需的样本，然后进一步训练模型，训练结束之后可以得到每个树节点对应的Embedding向量，利用节点的Embedding向量，采用K-Means聚类方法来重新构建一颗树，最后基于这颗新生成的树，重新训练深层网络


--------------------------------------------------------------------------------
/recall/向量召回快速检索方法.md:
--------------------------------------------------------------------------------
 1 | # 导语
 2 | 为什么要用向量快速检索呢？因为实际上现在各家公司主召回都会使用向量化召回，但是工业界数据规模太大，精确的近邻搜索太过困难，研究随之转向了在精确性和搜索时间做取舍，即Approximate Nearest Neighbor Search (ANNS)
 3 | 本文会介绍常用的一些快速检索方法原理，即其效果
 4 | # 线性扫描
 5 | 将待预测样本和候选样本逐一比对，最终挑选出距离最接近的k个样本即可，时间复杂度O(n)。对于样本数量较少的情况，这种方法简单稳定，已经能有不错的效果。但是数据规模较大时，时间开销严重无法接受
 6 | 
 7 | # KDTree
 8 | ## 构造
 9 | kd树是二叉树，核心思想是对 k 维特征空间不断切分（假设特征维度是768，对于(0,1,2,...,767)中的每一个维度，以中值递归切分）构造的树，每一个节点是一个超矩形，小于结点的样本划分到左子树，大于结点的样本划分到右子树。
10 | ## 检索
11 | 检索时（1）从根结点出发，递归地向下访问kd树。若目标点 [公式] 当前维的坐标小于切分点的坐标，移动到左子树，否则移动到右子树，直至到达叶结点；（2）以此叶结点为“最近点”，递归地向上回退，查找该结点的兄弟结点中是否存在更近的点，若存在则更新“最近点”，否则回退；未到达根结点时继续执行（2）；（3）回退到根结点时，搜索结束。
12 | ## 特点
13 | kd树在维数小于20时效率最高，一般适用于训练实例数远大于空间维数时的k近邻搜索；当空间维数接近训练实例数时，它的效率会迅速下降，几乎接近线形扫描。
14 | 
15 | # BallTree
16 | ## 构造
17 | KD 树沿坐标轴分割数据，BallTree将在一系列嵌套的超球面上分割数据，即使用超球面而不是超矩形划分区域。
18 | 
19 | 具体而言，BallTree 将数据递归地划分到由质心 C 和 半径 r 定义的节点上，以使得节点内的每个点都位于由质心C和半径 r 定义的超球面内。通过使用三角不等式 减少近邻搜索的候选点数。
20 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205223011249.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
21 | 
22 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205223242248.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
23 | ## 检索
24 | 1. 从根节点开始从上至下递归遍历每个可能包含最终近邻的子空间
25 | 2. 如果子空间的半径 R(pi)与 r之和大于中心点 pi 到目标点 q 的距离，则圆必相交。接着在满足这样条件的子空间样本点内递归搜索满足条件的点就是我们想要的最近邻点了。
26 | ## 特点
27 | 虽然在构建数据结构的花费上大过于KDtree，但是在高维甚至很高维的数据上都表现的很高效
28 | # Annoy
29 | annoy全称“Approximate Nearest Neighbors Oh Yeah”，是一种适合实际应用的快速相似查找算法。Annoy 同样通过建立一个二叉树来使得每个点查找时间复杂度是O(log n)，和kd树不同的是，annoy没有对k维特征进行切分。
30 | 
31 | annoy的每一次空间划分，可以看作聚类数为2的KMeans过程。收敛后在产生的两个聚类中心连线之间建立一条垂线（图中的黑线），把数据空间划分为两部分。
32 | ## 构造
33 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205224305625.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
34 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205224318260.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
35 | 最终生成的二叉树具有如下类似结构，二叉树底层是叶子节点记录原始数据节点，其他中间节点记录的是分割超平面的信息
36 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205224346338.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
37 | 
38 | ## 检索
39 | 查询过程和kd树类似，先从根向叶子结点递归查找，再向上回溯即可
40 | ## 特点
41 | annoy接口中一般需要调整的参数有两个：查找返回的topk近邻和树的个数。一般树越多，精准率越高但是对内存的开销也越大，需要权衡取舍
42 | 
43 | # NSW
44 | NSW（Navigable Small World graphs）是基于图存储的数据结构
45 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205225210437.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
46 | 朴素查找法：不少人脑子里都冒出过这样的朴素想法，把某些点和点之间连上线，构成一个查找图，存下来备用；当我想查找与粉色点最近的一点时，我从任意一个黑色点出发，计算它和粉色点的距离，与这个任意黑色点有连接关系的点我们称之为“友点”（直译），然后我要计算这个黑色点的所有“友点”与粉色点的距离，从所有“友点”中选出与粉色点最近的一个点，把这个点作为下一个进入点，继续按照上面的步骤查找下去。如果当前黑色点对粉色点的距离比所有“友点”都近，终止查找，这个黑色点就是我们要找的离粉色点最近的点
47 | 
48 | 朴素想法之所以叫朴素想法就是因为它的缺点非常多。首先，我们发现图中的K点是无法被查询到的，因为K点没有友点，怎么办？。其次，如果我们要查找距离粉色点最近的两个点，而这两个近点之间如果没有连线，那么将大大影响效率（比如L和E点，如果L和E有连线，那么我们可以轻易用上述方法查出距离粉色点最近的两个点），怎么办？。最后一个大问题，D点真的需要这么多“友点”吗？谁是谁的友点应该怎么确定呢？
49 | 
50 | 三条规定：关于K点的问题，我们规定在构图时所有数据向量节点都必须有友点。关于L和E的问题，我们规定在构图时所有距离相近（相似）到一定程度的向量必须互为友点。关于D点问题，权衡构造这张图的时间复杂度，我们规定尽量减少每个节点的“友点”数量。
51 | 
52 | ## 构造
53 | 在图论中有一个很好的剖分法则专门解决上一节中提到的朴素想法的缺陷问题------德劳内（Delaunay）三角剖分算法，这个算法可以达成如下要求：1，图中每个点都有“友点”。2，相近的点都互为“友点”。3，图中所有连接（线段）的数量最少。效果如下图。
54 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205230828875.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
55 | 但NSW没有采用德劳内三角剖分法来构成德劳内三角网图，原因之一是德劳内三角剖分构图算法时间复杂度太高，换句话说，构图太耗时。原因之二是德劳内三角形的查找效率并不一定最高，如果初始点和查找点距离很远的话我们需要进行多次跳转才能查到其临近点，需要“高速公路”机制（Expressway mechanism, 这里指部分远点之间拥有线段连接，以便于快速查找）。在理想状态下，我们的算法不仅要满足上面三条需求，还要算法复杂度低，同时配有高速公路机制的构图法。
56 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205231020768.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
57 | NSW朴素构图算法在这里：向图中逐个插入点，插图一个全新点时，通过朴素想法中的朴素查找法（通过计算“友点”和待插入点的距离来判断下一个进入点是哪个点）查找到与这个全新点最近的m个点（m由用户设置），连接全新点到m个点的连线。完了。
58 | 
59 | 首先，我们的构图算法是逐点随机插入的，这就意味着在图构建的早期，很有可能构建出“高速公路”。
60 | 
61 | ## 检索
62 | 1. 算法计算从查询 $q$ 到当前顶点的朋友列表的每个顶点的距离，然后选择具有最小距离的顶点。
63 | 2. 如果查询与所选顶点之间的距离小于查询与当前元素之间的距离，则算法移动到所选顶点，并且它变为新的当前顶点。
64 | 3. 算法在达到局部最小值时停止：一个顶点，其朋友列表不包含比顶点本身更接近查询的顶点
65 | # HNSW
66 | HNSW加入了跳表结构做了进一步优化。最底层是所有数据点，每一个点都有50%概率进入上一层的有序链表。这样可以保证表层是“高速通道”，底层是精细查找。通过层状结构，将边按特征半径进行分层，使每个顶点在所有层中平均度数变为常数，从而将NSW的计算复杂度由多重对数复杂度降到了对数复杂度。
67 | ## 构造
68 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201205225029284.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
69 | 
70 | 第0层中，是数据集中的所有点，你需要设置一个常数ml，通过公式floor(-ln(uniform(0,1)) x ml)来计算这个点可以深入到第几层。公式中x是乘号，floor（）的含义是向下取整，uniform（0,1）的含义是在均匀分布中随机取出一个值，ln（）表示取对数。
71 | ## 查找
72 | 1. 该算法贪婪地遍历来自上层的元素，直到达到局部最小值。
73 | 2. 之后，搜索切换到较低层（具有较短 link），从元素重新开始，该元素是前一层中的局部最小值，并且该过程重复。
74 | 3. 通过采用层状结构，将边按特征半径进行分层，从而将 NSW 的计算复杂度由多重对数复杂度降到了对数复杂度。
75 | 


--------------------------------------------------------------------------------
/recall/工程经验.md:
--------------------------------------------------------------------------------
 1 | # 工程经验
 2 | ## 负采样
 3 | 负采样带来的问题是CTR预估值的漂移，，比如真实CTR是0.1%，进行0.01的负采样之后，CTR将会攀升到10%左右。而为了进行准确的竞价以及ROI预估等，CTR预估模型是要提供准确的有物理意义的CTR值的。
 4 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/2020030316484180.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 5 | 其中q是校正后的CTR，p是模型的预估CTR，w是负采样频率。大家可以利用简单的转换关系就可以得出上述公式，有兴趣的同学可以手动推导一下。
 6 | ## 在线学习算法FTRL
 7 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200303164911944.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
 8 | FTRL是对w每一维分开训练更新的，每一维使用的是不同的学习速率，这种方法考虑了训练样本本身在不同特征上分布的不均匀性。（对于稀疏特征很好）。
 9 | - predict：L1范数加策略，训练结果w很稀疏，在用w做predict的时候节省了内存（w很小的时候就为0）
10 | - training：先采样减少负样本数目，在训练的时候再用权重弥补负样本；在线丢弃训练数据中很少出现的特征
11 | ## 模型的实效性问题和更新策略
12 | 1. gbdt为例，GBDT的部分几天更新一次，而LR的部分进行准实时的更新。
13 | 2. 双塔模型，embedding离线训练存储好，线上实现LR或浅层NN
14 | 3. PMML。达到End2End训练+End2End部署，PMML的全称是“预测模型标记语言”，是一种通用的以XML的形式表示不同模型结构参数的标记语言。
15 | 4. TensorFlow Serving。利用TensorFlow自带的模型序列化函数可将训练好的模型参数和结构保存至某文件路径。最便捷的serving方式是使用Docker建立模型Serving API
16 | ## 线上评估方法
17 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200303150054555.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MjE5OTU5,size_16,color_FFFFFF,t_70)
18 | 
19 | 必须要考虑位置偏差的存在，避免来自算法A的视频总排在第一位。因此需要以相等的概率让算法A和算法B交替领先
20 | 


--------------------------------------------------------------------------------
/推荐算法.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/推荐算法.xmind


--------------------------------------------------------------------------------
/用户冷启动.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/用户冷启动.xmind


--------------------------------------------------------------------------------
/用户画像.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iwtbs/recommend-algorithm/613b6ba55a165c3665eda6f9fc87a16311063249/用户画像.xmind


--------------------------------------------------------------------------------