├── .idea
├── TensorFlow_Practice.iml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── ctr_of_recommendation
├── AFM_Demo
│ ├── AFM.py
│ ├── DataReader.py
│ ├── __pycache__
│ │ ├── AFM.cpython-36.pyc
│ │ ├── DataReader.cpython-36.pyc
│ │ └── config.cpython-36.pyc
│ ├── config.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ └── main.py
├── DCN_Demo
│ ├── DCN.py
│ ├── DataLoader.py
│ ├── __pycache__
│ │ ├── DCN.cpython-36.pyc
│ │ ├── DataLoader.cpython-36.pyc
│ │ └── config.cpython-36.pyc
│ ├── config.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ └── main.py
├── DIEN_Demo
│ ├── GruCell.py
│ ├── data_iterator.py
│ ├── model.py
│ ├── rnn.py
│ ├── source_code
│ │ ├── Dice.py
│ │ ├── data_iterator.py
│ │ ├── generate_voc.py
│ │ ├── local_aggretor.py
│ │ ├── model.py
│ │ ├── process_data.py
│ │ ├── rnn.py
│ │ ├── shuffle.py
│ │ ├── split_by_user.py
│ │ ├── train.py
│ │ └── utils.py
│ ├── train.py
│ ├── utils.py
│ └── vecAttGruCell.py
├── DIN_Demo
│ ├── .idea
│ │ ├── Basic-DIN-Demo.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── Dice.py
│ ├── README.md
│ ├── build_dataset.py
│ ├── convert_pd.py
│ ├── input.py
│ ├── model.py
│ ├── remap_id.py
│ ├── train.py
│ └── utils
│ │ ├── 0_download_raw.sh
│ │ ├── 1_convert_pd.py
│ │ ├── 2_remap_id.py
│ │ └── auc.png
├── DSIN_Demo
│ ├── config.py
│ ├── dsin.py
│ ├── gen_dsin_input.py
│ ├── gen_sampled_data.py
│ ├── gen_sessions.py
│ └── train_dsin.py
├── DeepFM_model
│ ├── .ipynb_checkpoints
│ │ └── DeepFM-StepByStep-checkpoint.ipynb
│ ├── DataReader.py
│ ├── DeepFM-StepByStep.ipynb
│ ├── DeepFM.py
│ ├── config.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ ├── fig
│ │ ├── DNN.png
│ │ ├── DeepFM.png
│ │ └── FM.png
│ ├── main.py
│ ├── metrics.py
│ └── output
│ │ ├── DNN_Mean-0.31183_Std0.29369.csv
│ │ ├── DeepFM_Mean-0.11470_Std0.37335.csv
│ │ ├── DeepFM_Mean0.01434_Std0.10176.csv
│ │ ├── DeepFM_Mean0.05735_Std0.20027.csv
│ │ ├── DeepFM_Mean0.26137_Std0.00210.csv
│ │ └── FM_Mean0.23297_Std0.05576.csv
├── FFM_Demo
│ ├── FFM_model.py
│ └── TFModel
│ │ ├── FFM-0.data-00000-of-00001
│ │ ├── FFM-0.index
│ │ ├── FFM-0.meta
│ │ ├── FFM
│ │ ├── events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local
│ │ ├── events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local
│ │ └── events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local
│ │ └── checkpoint
├── FM_demo
│ ├── FM_model.py
│ └── data
│ │ ├── ua.base
│ │ └── ua.test
├── FNN_demo
│ ├── FNN.py
│ ├── __init__.py
│ └── preprocess.py
├── GBDT+LR-Demo
│ ├── .idea
│ │ ├── GBDT+LR-Demo.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── GBDT_LR.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ └── model.txt
├── MLR(LS-PLM)_Demo
│ ├── .idea
│ │ ├── Basic-MLR-Demo.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── __pycache__
│ │ └── data.cpython-37.pyc
│ ├── data.py
│ ├── data
│ │ ├── adult
│ │ ├── adult.data.txt
│ │ ├── adult.names
│ │ ├── adult.test.txt
│ │ └── old.adult.names
│ ├── lr.py
│ ├── mlr.py
│ └── plotResult.py
├── NFM_Demo
│ ├── DataReader.py
│ ├── NFM.py
│ ├── __pycache__
│ │ ├── DataReader.cpython-36.pyc
│ │ ├── NFM.cpython-36.pyc
│ │ └── config.cpython-36.pyc
│ ├── config.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ └── main.py
├── PNN_Demo
│ ├── .idea
│ │ ├── Basic-PNN-Demo.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── DataReader.py
│ ├── PNN.py
│ ├── __pycache__
│ │ ├── DataReader.cpython-36.pyc
│ │ ├── DataReader.cpython-37.pyc
│ │ ├── PNN.cpython-36.pyc
│ │ ├── PNN.cpython-37.pyc
│ │ ├── config.cpython-36.pyc
│ │ └── config.cpython-37.pyc
│ ├── config.py
│ ├── data
│ │ ├── test.csv
│ │ └── train.csv
│ ├── main.py
│ └── sfsfs.py
└── Wide&Deep_Demo
│ ├── .gitignore
│ ├── .idea
│ ├── Wide&Deep.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
│ ├── README.md
│ ├── data
│ ├── Index
│ ├── adult.names
│ ├── data_download.py
│ └── old.adult.names
│ ├── wide_component.py
│ └── wide_deep.py
├── related_papers
├── 2016--Wide & Deep Learning for Recommender Systems.pdf
├── 2016-PNN-Product-based Neural Networks for User Response Prediction.pdf
├── 2017-Google-Deep & Cross Network for Ad Click Predictions.pdf
├── 2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf
├── 2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf
├── An overview of gradient descent optimization algorithms.pdf
├── Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf
├── Deep Neural Networks for YouTube Recommendations.pdf
├── DeepFM.pdf
├── FFM.pdf
├── FM.pdf
├── NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf
├── README.md
└── (GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf
└── test.txt
/.idea/TensorFlow_Practice.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow Practice For Recommendation System
2 |
3 | # 1. 介绍
4 | 机器学习与深度学习系列(ctr预估):
5 | 仓库主要分享推荐系统相关论文和一些关于推荐的传统模型和深度模型学习实践代码的Demo,持续更新中。
6 |
7 | # 2. 目录
8 | ## 2.1 广告CTR预估模型(ctr_of_recommendation:已更新)
9 |
10 | | shllow model | deep model |
11 | | :----------: | :---------: |
12 | | GBDT + LR | FNN |
13 | | MLP(LS-PLM) | PNN |
14 | | FM | Wide & Deep |
15 | | FFM | DeepFM |
16 | | AFM | NFM |
17 | | AutoInt | DCN |
18 | | ... | DIEN |
19 | | | DSIN |
20 |
21 |
22 |
23 |
24 | ## 2.2 推荐系统
25 | - [x] Deep Neural Network for Youtube Recommendations
26 |
27 | ## 2.3 深度学习
28 | - [x] LSTM原理与实践
29 |
30 | ## 2.4 学习笔记(related_papers:更新中)
31 | - [ ] Batch Normalization
32 |
33 | - [ ] TensorLayer
34 |
35 | - [x] 推荐相关已读论文
36 |
37 |
38 |
39 | **模型中的推荐相关数据集下载**:微云链接:https://share.weiyun.com/2zOvtF2s 密码:8nddnd
40 |
41 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/DataReader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | class FeatureDictionary(object):
4 | def __init__(self,trainfile=None,testfile=None,
5 | dfTrain=None,dfTest=None,numeric_cols=[],
6 | ignore_cols=[]):
7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set"
8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set"
9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set"
10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set"
11 |
12 | self.trainfile = trainfile
13 | self.testfile = testfile
14 | self.dfTrain = dfTrain
15 | self.dfTest = dfTest
16 | self.numeric_cols = numeric_cols
17 | self.ignore_cols = ignore_cols
18 | self.gen_feat_dict()
19 |
20 |
21 |
22 |
23 | def gen_feat_dict(self):
24 | if self.dfTrain is None:
25 | dfTrain = pd.read_csv(self.trainfile)
26 |
27 | else:
28 | dfTrain = self.dfTrain
29 |
30 | if self.dfTest is None:
31 | dfTest = pd.read_csv(self.testfile)
32 |
33 | else:
34 | dfTest = self.dfTest
35 |
36 | df = pd.concat([dfTrain,dfTest])
37 |
38 | self.feat_dict = {}
39 | tc = 0
40 | for col in df.columns:
41 | if col in self.ignore_cols:
42 | continue
43 | if col in self.numeric_cols:
44 | self.feat_dict[col] = tc
45 | tc += 1
46 |
47 | else:
48 | us = df[col].unique()
49 | print(us)
50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc)))
51 | tc += len(us)
52 |
53 | self.feat_dim = tc
54 |
55 |
56 | class DataParser(object):
57 | def __init__(self,feat_dict):
58 | self.feat_dict = feat_dict
59 |
60 | def parse(self,infile=None,df=None,has_label=False):
61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set"
62 | assert not ((infile is not None) and (df is not None)), "only one can be set"
63 |
64 |
65 | if infile is None:
66 | dfi = df.copy()
67 | else:
68 | dfi = pd.read_csv(infile)
69 |
70 | if has_label:
71 | y = dfi['target'].values.tolist()
72 | dfi.drop(['id','target'],axis=1,inplace=True)
73 | else:
74 | ids = dfi['id'].values.tolist()
75 | dfi.drop(['id'],axis=1,inplace=True)
76 | # dfi for feature index
77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
78 | dfv = dfi.copy()
79 | for col in dfi.columns:
80 | if col in self.feat_dict.ignore_cols:
81 | dfi.drop(col,axis=1,inplace=True)
82 | dfv.drop(col,axis=1,inplace=True)
83 | continue
84 | if col in self.feat_dict.numeric_cols:
85 | dfi[col] = self.feat_dict.feat_dict[col]
86 | else:
87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
88 | dfv[col] = 1.
89 |
90 | xi = dfi.values.tolist()
91 | xv = dfv.values.tolist()
92 |
93 | if has_label:
94 | return xi,xv,y
95 | else:
96 | return xi,xv,ids
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/__pycache__/AFM.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/AFM.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/__pycache__/DataReader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/DataReader.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_FILE = "data/train.csv"
2 | TEST_FILE = "data/test.csv"
3 |
4 | SUB_DIR = "output"
5 |
6 |
7 | NUM_SPLITS = 3
8 | RANDOM_SEED = 2017
9 |
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 | # 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 |
19 | NUMERIC_COLS = [
20 | # # binary
21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 | # "ps_ind_17_bin", "ps_ind_18_bin",
25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 | # numeric
28 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 |
31 | # feature engineering
32 | "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 |
35 | IGNORE_COLS = [
36 | "id", "target",
37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 | "ps_calc_13", "ps_calc_14",
41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/AFM_Demo/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import tensorflow as tf
5 | from sklearn.metrics import make_scorer
6 | from sklearn.model_selection import StratifiedKFold
7 | from DataReader import FeatureDictionary, DataParser
8 | from matplotlib import pyplot as plt
9 | import config
10 | from AFM import AFM
11 |
12 | def load_data():
13 | dfTrain = pd.read_csv(config.TRAIN_FILE)
14 | dfTest = pd.read_csv(config.TEST_FILE)
15 |
16 | def preprocess(df):
17 | cols = [c for c in df.columns if c not in ['id','target']]
18 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1)
19 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
20 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']
21 | return df
22 |
23 | dfTrain = preprocess(dfTrain)
24 | dfTest = preprocess(dfTest)
25 |
26 | cols = [c for c in dfTrain.columns if c not in ['id','target']]
27 | cols = [c for c in cols if (not c in config.IGNORE_COLS)]
28 |
29 | X_train = dfTrain[cols].values
30 | y_train = dfTrain['target'].values
31 |
32 | X_test = dfTest[cols].values
33 | ids_test = dfTest['id'].values
34 |
35 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
36 |
37 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices
38 |
39 | def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params):
40 | fd = FeatureDictionary(dfTrain=dfTrain,
41 | dfTest=dfTest,
42 | numeric_cols=config.NUMERIC_COLS,
43 | ignore_cols = config.IGNORE_COLS)
44 | data_parser = DataParser(feat_dict= fd)
45 | # Xi_train :列的序号
46 | # Xv_train :列的对应的值
47 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
48 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)
49 |
50 | print(dfTrain.dtypes)
51 |
52 | pnn_params['feature_size'] = fd.feat_dim
53 | pnn_params['field_size'] = len(Xi_train[0])
54 |
55 |
56 | _get = lambda x,l:[x[i] for i in l]
57 |
58 |
59 |
60 | for i, (train_idx, valid_idx) in enumerate(folds):
61 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
62 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
63 |
64 | afm = AFM(**pnn_params)
65 | afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
66 |
67 |
68 | pnn_params = {
69 | "embedding_size":8,
70 | "attention_size":10,
71 | "deep_layers":[32,32],
72 | "dropout_deep":[0.5,0.5,0.5],
73 | "deep_layer_activation":tf.nn.relu,
74 | "epoch":30,
75 | "batch_size":1024,
76 | "learning_rate":0.001,
77 | "optimizer":"adam",
78 | "batch_norm":1,
79 | "batch_norm_decay":0.995,
80 | "verbose":True,
81 | "random_seed":config.RANDOM_SEED,
82 | "deep_init_size":50,
83 | "use_inner":False
84 |
85 | }
86 |
87 | # load data
88 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data()
89 |
90 | # folds
91 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,
92 | random_state=config.RANDOM_SEED).split(X_train, y_train))
93 |
94 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params)
95 | y_train_pnn, y_test_pnn = run_base_model_nfm(dfTrain, dfTest, folds, pnn_params)
96 |
97 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/DataLoader.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import pandas as pd
4 |
5 |
6 | class FeatureDictionary(object):
7 | def __init__(self, trainfile=None,testfile=None,
8 | numeric_cols=[],
9 | ignore_cols=[],
10 | cate_cols=[]):
11 |
12 | self.trainfile = trainfile
13 | #self.testfile = testfile
14 | self.testfile = testfile
15 | self.cate_cols = cate_cols
16 | self.numeric_cols = numeric_cols
17 | self.ignore_cols = ignore_cols
18 | self.gen_feat_dict()
19 |
20 | def gen_feat_dict(self):
21 | df = pd.concat([self.trainfile,self.testfile])
22 | self.feat_dict = {}
23 | self.feat_len = {}
24 | tc = 0
25 | for col in df.columns:
26 | if col in self.ignore_cols or col in self.numeric_cols:
27 | continue
28 | else:
29 | us = df[col].unique()
30 | self.feat_dict[col] = dict(zip(us, range(tc, len(us) + tc)))
31 | tc += len(us)
32 | self.feat_dim = tc
33 |
34 |
35 |
36 |
37 | class DataParser(object):
38 | def __init__(self, feat_dict):
39 | self.feat_dict = feat_dict
40 |
41 |
42 | def parse(self, infile=None, df=None, has_label=False):
43 | assert not ((infile is None) and (df is None)), "infile or df at least one is set"
44 | assert not ((infile is not None) and (df is not None)), "only one can be set"
45 | if infile is None:
46 | dfi = df.copy()
47 | else:
48 | dfi = pd.read_csv(infile)
49 | if has_label:
50 | y = dfi["target"].values.tolist()
51 | dfi.drop(["id", "target"], axis=1, inplace=True)
52 | else:
53 | ids = dfi["id"].values.tolist()
54 | dfi.drop(["id"], axis=1, inplace=True)
55 | # dfi for feature index
56 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
57 |
58 | numeric_Xv = dfi[self.feat_dict.numeric_cols].values.tolist()
59 | dfi.drop(self.feat_dict.numeric_cols,axis=1,inplace=True)
60 |
61 | dfv = dfi.copy()
62 | for col in dfi.columns:
63 | if col in self.feat_dict.ignore_cols:
64 | dfi.drop(col, axis=1, inplace=True)
65 | dfv.drop(col, axis=1, inplace=True)
66 | continue
67 | else:
68 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
69 | dfv[col] = 1.
70 |
71 | # list of list of feature indices of each sample in the dataset
72 | cate_Xi = dfi.values.tolist()
73 | # list of list of feature values of each sample in the dataset
74 | cate_Xv = dfv.values.tolist()
75 | if has_label:
76 | return cate_Xi, cate_Xv,numeric_Xv,y
77 | else:
78 | return cate_Xi, cate_Xv,numeric_Xv,ids
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/__pycache__/DCN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/DCN.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/__pycache__/DataLoader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/DataLoader.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_FILE = "data/train.csv"
2 | TEST_FILE = "data/test.csv"
3 |
4 | SUB_DIR = "output"
5 |
6 |
7 | NUM_SPLITS = 3
8 | RANDOM_SEED = 2017
9 |
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 | 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 | 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 | 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 | 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 | 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 |
19 | NUMERIC_COLS = [
20 | # # binary
21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 | # "ps_ind_17_bin", "ps_ind_18_bin",
25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 | # numeric
28 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 |
31 | # feature engineering
32 | "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 |
35 | IGNORE_COLS = [
36 | "id", "target",
37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 | "ps_calc_13", "ps_calc_14",
41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DCN_Demo/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | import pandas as pd
4 | import numpy as np
5 |
6 | import config
7 |
8 | from sklearn.model_selection import StratifiedKFold
9 | from DataLoader import FeatureDictionary, DataParser
10 |
11 | from DCN import DCN
12 |
13 |
14 |
15 | def load_data():
16 | dfTrain = pd.read_csv(config.TRAIN_FILE)
17 | dfTest = pd.read_csv(config.TEST_FILE)
18 |
19 | def preprocess(df):
20 | cols = [c for c in df.columns if c not in ["id", "target"]]
21 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
22 | df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"]
23 | return df
24 |
25 | dfTrain = preprocess(dfTrain)
26 | dfTest = preprocess(dfTest)
27 |
28 | cols = [c for c in dfTrain.columns if c not in ["id", "target"]]
29 | cols = [c for c in cols if (not c in config.IGNORE_COLS)]
30 |
31 | X_train = dfTrain[cols].values
32 | y_train = dfTrain["target"].values
33 | X_test = dfTest[cols].values
34 | ids_test = dfTest["id"].values
35 |
36 | return dfTrain, dfTest, X_train, y_train, X_test, ids_test,
37 |
38 |
39 | def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):
40 |
41 | fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS,
42 | ignore_cols=config.IGNORE_COLS,
43 | cate_cols = config.CATEGORICAL_COLS)
44 |
45 | print(fd.feat_dim)
46 | print(fd.feat_dict)
47 |
48 | data_parser = DataParser(feat_dict=fd)
49 | cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True)
50 | cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest)
51 |
52 | dcn_params["cate_feature_size"] = fd.feat_dim
53 | dcn_params["field_size"] = len(cate_Xi_train[0])
54 | dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)
55 |
56 | _get = lambda x, l: [x[i] for i in l]
57 |
58 | for i, (train_idx, valid_idx) in enumerate(folds):
59 | cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx)
60 | cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx)
61 |
62 | dcn = DCN(**dcn_params)
63 |
64 | dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_)
65 |
66 | #dfTrain = pd.read_csv(config.TRAIN_FILE,nrows=10000,index_col=None).to_csv(config.TRAIN_FILE,index=False)
67 | #dfTest = pd.read_csv(config.TEST_FILE,nrows=2000,index_col=None).to_csv(config.TEST_FILE,index=False)
68 |
69 | dfTrain, dfTest, X_train, y_train, X_test, ids_test = load_data()
70 | print('load_data_over')
71 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,
72 | random_state=config.RANDOM_SEED).split(X_train, y_train))
73 | print('process_data_over')
74 |
75 | dcn_params = {
76 |
77 | "embedding_size": 8,
78 | "deep_layers": [32, 32],
79 | "dropout_deep": [0.5, 0.5, 0.5],
80 | "deep_layers_activation": tf.nn.relu,
81 | "epoch": 30,
82 | "batch_size": 1024,
83 | "learning_rate": 0.001,
84 | "optimizer_type": "adam",
85 | "batch_norm": 1,
86 | "batch_norm_decay": 0.995,
87 | "l2_reg": 0.01,
88 | "verbose": True,
89 | "random_seed": config.RANDOM_SEED,
90 | "cross_layer_num":3
91 | }
92 | print('start train')
93 | run_base_model_dcn(dfTrain, dfTest, folds, dcn_params)
94 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/GruCell.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.ops.rnn_cell import *
3 | from tensorflow.python.ops.rnn_cell_impl import _Linear
4 |
5 | from tensorflow.python.ops import math_ops
6 | from tensorflow.python.ops import init_ops
7 | from tensorflow.python.ops import array_ops
8 | from tensorflow.python.ops import variable_scope as vs
9 |
10 | class GRUCell(RNNCell):
11 |
12 |
13 | def __init__(self,
14 | num_units,
15 | activation=None,
16 | reuse=None,
17 | kernel_initializer=None,
18 | bias_initializer=None):
19 | super(GRUCell, self).__init__(_reuse=reuse)
20 | self._num_units = num_units
21 | self._activation = activation or math_ops.tanh
22 | self._kernel_initializer = kernel_initializer
23 | self._bias_initializer = bias_initializer
24 | self._gate_linear = None
25 | self._candidate_linear = None
26 |
27 | @property
28 | def state_size(self):
29 | return self._num_units
30 |
31 | @property
32 | def output_size(self):
33 | return self._num_units
34 |
35 | def call(self, inputs, state):
36 | """Gated recurrent unit (GRU) with nunits cells."""
37 | if self._gate_linear is None:
38 | bias_ones = self._bias_initializer
39 | if self._bias_initializer is None:
40 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
41 | with vs.variable_scope("gates"): # Reset gate and update gate.
42 | self._gate_linear = _Linear(
43 | [inputs, state],
44 | 2 * self._num_units,
45 | True,
46 | bias_initializer=bias_ones,
47 | kernel_initializer=self._kernel_initializer)
48 |
49 | value = math_ops.sigmoid(self._gate_linear([inputs, state]))
50 | r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
51 |
52 | r_state = r * state
53 | if self._candidate_linear is None:
54 | with vs.variable_scope("candidate"):
55 | self._candidate_linear = _Linear(
56 | [inputs, r_state],
57 | self._num_units,
58 | True,
59 | bias_initializer=self._bias_initializer,
60 | kernel_initializer=self._kernel_initializer)
61 | c = self._activation(self._candidate_linear([inputs, r_state]))
62 | new_h = u * state + (1 - u) * c
63 | return new_h, new_h
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/Dice.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | def dice(_x, axis=-1, epsilon=0.000000001, name=''):
4 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
5 | alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1],
6 | initializer=tf.constant_initializer(0.0),
7 | dtype=tf.float32)
8 | input_shape = list(_x.get_shape())
9 |
10 | reduction_axes = list(range(len(input_shape)))
11 | del reduction_axes[axis]
12 | broadcast_shape = [1] * len(input_shape)
13 | broadcast_shape[axis] = input_shape[axis]
14 |
15 | # case: train mode (uses stats of the current batch)
16 | mean = tf.reduce_mean(_x, axis=reduction_axes)
17 | brodcast_mean = tf.reshape(mean, broadcast_shape)
18 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes)
19 | std = tf.sqrt(std)
20 | brodcast_std = tf.reshape(std, broadcast_shape)
21 | x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon)
22 | # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False)
23 | x_p = tf.sigmoid(x_normed)
24 |
25 |
26 | return alphas * (1.0 - x_p) * _x + x_p * _x
27 |
28 | def parametric_relu(_x):
29 | alphas = tf.get_variable('alpha', _x.get_shape()[-1],
30 | initializer=tf.constant_initializer(0.0),
31 | dtype=tf.float32)
32 | pos = tf.nn.relu(_x)
33 | neg = alphas * (_x - abs(_x)) * 0.5
34 |
35 | return pos + neg
36 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/generate_voc.py:
--------------------------------------------------------------------------------
1 | import cPickle
2 |
3 | f_train = open("local_train_splitByUser", "r")
4 | uid_dict = {}
5 | mid_dict = {}
6 | cat_dict = {}
7 |
8 | iddd = 0
9 | for line in f_train:
10 | arr = line.strip("\n").split("\t")
11 | clk = arr[0]
12 | uid = arr[1]
13 | mid = arr[2]
14 | cat = arr[3]
15 | mid_list = arr[4]
16 | cat_list = arr[5]
17 | if uid not in uid_dict:
18 | uid_dict[uid] = 0
19 | uid_dict[uid] += 1
20 | if mid not in mid_dict:
21 | mid_dict[mid] = 0
22 | mid_dict[mid] += 1
23 | if cat not in cat_dict:
24 | cat_dict[cat] = 0
25 | cat_dict[cat] += 1
26 | if len(mid_list) == 0:
27 | continue
28 | for m in mid_list.split(""):
29 | if m not in mid_dict:
30 | mid_dict[m] = 0
31 | mid_dict[m] += 1
32 | #print iddd
33 | iddd+=1
34 | for c in cat_list.split(""):
35 | if c not in cat_dict:
36 | cat_dict[c] = 0
37 | cat_dict[c] += 1
38 |
39 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True)
40 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True)
41 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True)
42 |
43 | uid_voc = {}
44 | index = 0
45 | for key, value in sorted_uid_dict:
46 | uid_voc[key] = index
47 | index += 1
48 |
49 | mid_voc = {}
50 | mid_voc["default_mid"] = 0
51 | index = 1
52 | for key, value in sorted_mid_dict:
53 | mid_voc[key] = index
54 | index += 1
55 |
56 | cat_voc = {}
57 | cat_voc["default_cat"] = 0
58 | index = 1
59 | for key, value in sorted_cat_dict:
60 | cat_voc[key] = index
61 | index += 1
62 |
63 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w"))
64 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w"))
65 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w"))
66 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/local_aggretor.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import hashlib
3 | import random
4 |
5 | fin = open("jointed-new-split-info", "r")
6 | ftrain = open("local_train", "w")
7 | ftest = open("local_test", "w")
8 |
9 | last_user = "0"
10 | common_fea = ""
11 | line_idx = 0
12 | for line in fin:
13 | items = line.strip().split("\t")
14 | ds = items[0]
15 | clk = int(items[1])
16 | user = items[2]
17 | movie_id = items[3]
18 | dt = items[5]
19 | cat1 = items[6]
20 |
21 | if ds=="20180118":
22 | fo = ftrain
23 | else:
24 | fo = ftest
25 | if user != last_user:
26 | movie_id_list = []
27 | cate1_list = []
28 | #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + ""
29 | else:
30 | history_clk_num = len(movie_id_list)
31 | cat_str = ""
32 | mid_str = ""
33 | for c1 in cate1_list:
34 | cat_str += c1 + ""
35 | for mid in movie_id_list:
36 | mid_str += mid + ""
37 | if len(cat_str) > 0: cat_str = cat_str[:-1]
38 | if len(mid_str) > 0: mid_str = mid_str[:-1]
39 | if history_clk_num >= 1: # 8 is the average length of user behavior
40 | print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str
41 | last_user = user
42 | if clk:
43 | movie_id_list.append(movie_id)
44 | cate1_list.append(cat1)
45 | line_idx += 1
46 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/process_data.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import random
3 | import time
4 |
5 | def process_meta(file):
6 | fi = open(file, "r")
7 | fo = open("item-info", "w")
8 | for line in fi:
9 | obj = eval(line)
10 | cat = obj["categories"][0][-1]
11 | print>>fo, obj["asin"] + "\t" + cat
12 |
13 | def process_reviews(file):
14 | fi = open(file, "r")
15 | user_map = {}
16 | fo = open("reviews-info", "w")
17 | for line in fi:
18 | obj = eval(line)
19 | userID = obj["reviewerID"]
20 | itemID = obj["asin"]
21 | rating = obj["overall"]
22 | time = obj["unixReviewTime"]
23 | print>>fo, userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time)
24 |
25 | def manual_join():
26 | f_rev = open("reviews-info", "r")
27 | user_map = {}
28 | item_list = []
29 | for line in f_rev:
30 | line = line.strip()
31 | items = line.split("\t")
32 | #loctime = time.localtime(float(items[-1]))
33 | #items[-1] = time.strftime('%Y-%m-%d', loctime)
34 | if items[0] not in user_map:
35 | user_map[items[0]]= []
36 | user_map[items[0]].append(("\t".join(items), float(items[-1])))
37 | item_list.append(items[1])
38 | f_meta = open("item-info", "r")
39 | meta_map = {}
40 | for line in f_meta:
41 | arr = line.strip().split("\t")
42 | if arr[0] not in meta_map:
43 | meta_map[arr[0]] = arr[1]
44 | arr = line.strip().split("\t")
45 | fo = open("jointed-new", "w")
46 | for key in user_map:
47 | sorted_user_bh = sorted(user_map[key], key=lambda x:x[1])
48 | for line, t in sorted_user_bh:
49 | items = line.split("\t")
50 | asin = items[1]
51 | j = 0
52 | while True:
53 | asin_neg_index = random.randint(0, len(item_list) - 1)
54 | asin_neg = item_list[asin_neg_index]
55 | if asin_neg == asin:
56 | continue
57 | items[1] = asin_neg
58 | print>>fo, "0" + "\t" + "\t".join(items) + "\t" + meta_map[asin_neg]
59 | j += 1
60 | if j == 1: #negative sampling frequency
61 | break
62 | if asin in meta_map:
63 | print>>fo, "1" + "\t" + line + "\t" + meta_map[asin]
64 | else:
65 | print>>fo, "1" + "\t" + line + "\t" + "default_cat"
66 |
67 |
68 | def split_test():
69 | fi = open("jointed-new", "r")
70 | fo = open("jointed-new-split-info", "w")
71 | user_count = {}
72 | for line in fi:
73 | line = line.strip()
74 | user = line.split("\t")[1]
75 | if user not in user_count:
76 | user_count[user] = 0
77 | user_count[user] += 1
78 | fi.seek(0)
79 | i = 0
80 | last_user = "A26ZDKC53OP6JD"
81 | for line in fi:
82 | line = line.strip()
83 | user = line.split("\t")[1]
84 | if user == last_user:
85 | if i < user_count[user] - 2: # 1 + negative samples
86 | print>> fo, "20180118" + "\t" + line
87 | else:
88 | print>>fo, "20190119" + "\t" + line
89 | else:
90 | last_user = user
91 | i = 0
92 | if i < user_count[user] - 2:
93 | print>> fo, "20180118" + "\t" + line
94 | else:
95 | print>>fo, "20190119" + "\t" + line
96 | i += 1
97 |
98 | process_meta(sys.argv[1])
99 | process_reviews(sys.argv[2])
100 | manual_join()
101 | split_test()
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/shuffle.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import random
4 |
5 | import tempfile
6 | from subprocess import call
7 |
8 |
9 | def main(file, temporary=False):
10 | tf_os, tpath = tempfile.mkstemp(dir='/home/mouna.mn/code/DIN-V2-CODE')
11 | tf = open(tpath, 'w')
12 |
13 | fd = open(file, "r")
14 | for l in fd:
15 | print >> tf, l.strip("\n")
16 | tf.close()
17 |
18 | lines = open(tpath, 'r').readlines()
19 | random.shuffle(lines)
20 | if temporary:
21 | path, filename = os.path.split(os.path.realpath(file))
22 | fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path)
23 | else:
24 | fd = open(file + '.shuf', 'w')
25 |
26 | for l in lines:
27 | s = l.strip("\n")
28 | print >> fd, s
29 |
30 | if temporary:
31 | fd.seek(0)
32 | else:
33 | fd.close()
34 |
35 | os.remove(tpath)
36 |
37 | return fd
38 |
39 |
40 | if __name__ == '__main__':
41 | main(sys.argv[1])
42 |
43 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/source_code/split_by_user.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | fi = open("local_test", "r")
4 | ftrain = open("local_train_splitByUser", "w")
5 | ftest = open("local_test_splitByUser", "w")
6 |
7 | while True:
8 | rand_int = random.randint(1, 10)
9 | noclk_line = fi.readline().strip()
10 | clk_line = fi.readline().strip()
11 | if noclk_line == "" or clk_line == "":
12 | break
13 | if rand_int == 2:
14 | print >> ftest, noclk_line
15 | print >> ftest, clk_line
16 | else:
17 | print >> ftrain, noclk_line
18 | print >> ftrain, clk_line
19 |
20 |
21 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | def prelu(_x, scope=''):
4 | """parametric ReLU activation"""
5 | with tf.variable_scope(name_or_scope=scope, default_name="prelu"):
6 | _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1],
7 | dtype=_x.dtype, initializer=tf.constant_initializer(0.1))
8 | return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x)
9 |
10 | def dice(_x,axis=-1,epsilon=0.000000001,name=""):
11 | with tf.variable_scope(name,reuse=tf.AUTO_REUSE):
12 | alphas = tf.get_variable('alpha'+name,_x.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32)
13 | input_shape = list(_x.get_shape())
14 |
15 | reduction_axis = list(range(len(input_shape)))
16 | del reduction_axis[axis]
17 | broadcast_shape = [1] * len(input_shape)
18 | broadcast_shape[axis] = input_shape[axis]
19 |
20 | mean = tf.reduce_mean(_x, axis=reduction_axis)
21 | brodcast_mean = tf.reshape(mean, broadcast_shape)
22 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axis)
23 | std = tf.sqrt(std)
24 | brodcast_std = tf.reshape(std, broadcast_shape)
25 | x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon)
26 | # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False)
27 | x_p = tf.sigmoid(x_normed)
28 |
29 | return alphas * (1.0 - x_p) * _x + x_p * _x
30 |
31 |
32 | def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False):
33 | if isinstance(facts, tuple):
34 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
35 | facts = tf.concat(facts, 2)
36 | if len(facts.get_shape().as_list()) == 2:
37 | facts = tf.expand_dims(facts, 1)
38 |
39 | if time_major:
40 | # (T,B,D) => (B,T,D)
41 | facts = tf.array_ops.transpose(facts, [1, 0, 2])
42 |
43 |
44 | mask = tf.equal(mask,tf.ones_like(mask))
45 | facts_size = facts.get_shape().as_list()[-1] # Hidden size for rnn layer
46 | query = tf.layers.dense(query,facts_size,activation=None,name='f1'+stag)
47 | query = prelu(query)
48 |
49 | queries = tf.tile(query,[1,tf.shape(facts)[1]]) # Batch * Time * Hidden size
50 | queries = tf.reshape(queries,tf.shape(facts))
51 | din_all = tf.concat([queries,facts,queries-facts,queries*facts],axis=-1) # Batch * Time * (4 * Hidden size)
52 | d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag)
53 | d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag)
54 | d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) # Batch * Time * 1
55 |
56 | d_layer_3_all = tf.reshape(d_layer_3_all,[-1,1,tf.shape(facts)[1]]) # Batch * 1 * time
57 | scores = d_layer_3_all
58 |
59 | key_masks = tf.expand_dims(mask,1) # Batch * 1 * Time
60 | paddings = tf.ones_like(scores) * (-2 ** 32 + 1)
61 |
62 | if not forCnn:
63 | scores = tf.where(key_masks, scores, paddings) # [B, 1, T] ,没有的地方用paddings填充
64 |
65 | # Activation
66 | if softmax_stag:
67 | scores = tf.nn.softmax(scores) # [B, 1, T]
68 |
69 | # Weighted sum
70 | if mode == 'SUM':
71 | output = tf.matmul(scores,facts) # Batch * 1 * Hidden Size
72 | else:
73 | scores = tf.reshape(scores,[-1,tf.shape(facts)[1]]) # Batch * Time
74 | output = facts * tf.expand_dims(scores,-1) # Batch * Time * Hidden Size
75 | output = tf.reshape(output,tf.shape(facts))
76 | if return_alphas:
77 | return output,scores
78 | else:
79 | return output
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIEN_Demo/vecAttGruCell.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.ops.rnn_cell import *
3 | from tensorflow.python.ops.rnn_cell_impl import _Linear
4 |
5 | from tensorflow.python.ops import math_ops
6 | from tensorflow.python.ops import init_ops
7 | from tensorflow.python.ops import array_ops
8 | from tensorflow.python.ops import variable_scope as vs
9 |
10 |
11 | class VecAttGRUCell(RNNCell):
12 | def __init__(self,
13 | num_units,
14 | activation=None,
15 | reuse=None,
16 | kernel_initializer=None,
17 | bias_initializer=None):
18 | super(VecAttGRUCell, self).__init__(_reuse=reuse)
19 | self._num_units = num_units
20 | self._activation = activation or math_ops.tanh
21 | self._kernel_initializer = kernel_initializer
22 | self._bias_initializer = bias_initializer
23 | self._gate_linear = None
24 | self._candidate_linear = None
25 |
26 | @property
27 | def state_size(self):
28 | return self._num_units
29 |
30 | @property
31 | def output_size(self):
32 | return self._num_units
33 |
34 | # 一个类实例也可以变成一个可调用对象,只需要实现一个特殊方法__call__()。
35 | def __call__(self, inputs, state, att_score):
36 | return self.call(inputs, state, att_score)
37 |
38 | def call(self, inputs, state, att_score=None):
39 | if self._gate_linear is None:
40 | bias_ones = self._bias_initializer
41 | if self._bias_initializer is None:
42 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
43 | with vs.variable_scope("gates"):
44 | self._gate_linear = _Linear(
45 | [inputs, state],
46 | 2 * self._num_units,
47 | True,
48 | bias_initializer=bias_ones,
49 | kernel_initializer=self._kernel_initializer
50 | )
51 |
52 | value = math_ops.sigmoid(self._gate_linear([inputs, state]))
53 | r, u = array_ops.split(value, num_or_size_splits=2, axis=1)
54 | r_state = r * state
55 | if self._candidate_linear is None:
56 | with vs.variable_scope("candidate"):
57 | self._candidate_linear = _Linear(
58 | [inputs, r_state],
59 | self._num_units,
60 | True,
61 | bias_initializer=self._bias_initializer,
62 | kernel_initializer=self._kernel_initializer
63 | )
64 |
65 | c = self._activation(self._candidate_linear([inputs, r_state]))
66 | u = (1.0 - att_score) * u
67 | new_h = u * state + (1 - u) * c
68 | return new_h, new_h
69 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/.idea/Basic-DIN-Demo.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/Dice.py:
--------------------------------------------------------------------------------
1 | # 激活函数dice
2 | import tensorflow as tf
3 |
4 | def dice(_x,axis=-1,epsilon=0.0000001,name=''):
5 | # α也是一个需要训练的参数
6 | alphas = tf.get_variable('alpha'+name,_x.get_shape()[-1],
7 | initializer = tf.constant_initializer(0.0),
8 | dtype=tf.float32)
9 |
10 | input_shape = list(_x.get_shape())
11 | reduction_axes = list(range(len(input_shape)))
12 |
13 | del reduction_axes[axis] # [0]
14 |
15 | broadcast_shape = [1] * len(input_shape) #[1,1]
16 | broadcast_shape[axis] = input_shape[axis] # [1 * hidden_unit_size]
17 |
18 | # case: train mode (uses stats of the current batch)
19 | mean = tf.reduce_mean(_x, axis=reduction_axes) # [1 * hidden_unit_size]
20 | brodcast_mean = tf.reshape(mean, broadcast_shape)
21 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes)
22 | std = tf.sqrt(std)
23 | brodcast_std = tf.reshape(std, broadcast_shape) #[1 * hidden_unit_size]
24 | # x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon)
25 | x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) # a simple way to use BN to calculate x_p
26 | x_p = tf.sigmoid(x_normed)
27 |
28 | return alphas * (1.0 - x_p) * _x + x_p * _x
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/README.md:
--------------------------------------------------------------------------------
1 | # DeepInterestNetwork
2 | Deep Interest Network for Click-Through Rate Prediction
3 |
4 | ## Introduction
5 | This is an implementation of the paper [Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/abs/1706.06978) Guorui Zhou, Chengru Song, Xiaoqiang Zhu, Han Zhu, Ying Fan, Na Mou, Xiao Ma, Yanghui Yan, Xingya Dai, Junqi Jin, Han Li, Kun Gai
6 |
7 | Thanks to Jinze Bai and Chang Zhou.
8 |
9 | Bibtex:
10 | ```sh
11 | @article{Zhou2017Deep,
12 | title={Deep Interest Network for Click-Through Rate Prediction},
13 | author={Zhou, Guorui and Song, Chengru and Zhu, Xiaoqiang and Ma, Xiao and Yan, Yanghui and Dai, Xingya and Zhu, Han and Jin, Junqi and Li, Han and Gai, Kun},
14 | year={2017},
15 | }
16 | ```
17 |
18 | ## Requirements
19 | * Python >= 3.6.1
20 | * NumPy >= 1.12.1
21 | * Pandas >= 0.20.1
22 | * TensorFlow >= 1.4.0 (Probably earlier version should work too, though I didn't test it)
23 | * GPU with memory >= 10G
24 |
25 | ## Download dataset and preprocess
26 | * Step 1: Download the amazon product dataset of electronics category, which has 498,196 products and 7,824,482 records, and extract it to `raw_data/` folder.
27 | ```sh
28 | mkdir raw_data/;
29 | cd utils;
30 | bash 0_download_raw.sh;
31 | ```
32 | * Step 2: Convert raw data to pandas dataframe, and remap categorical id.
33 | ```sh
34 | python 1_convert_pd.py;
35 | python 2_remap_id.py
36 | ```
37 |
38 | ## Training and Evaluation
39 | This implementation not only contains the DIN method, but also provides all the competitors' method, including Wide&Deep, PNN, DeepFM. The training procedures of all method is as follows:
40 | * Step 1: Choose a method and enter the folder.
41 | ```
42 | cd din;
43 | ```
44 | Alternatively, you could also run other competitors's methods directly by `cd deepFM` `cd pnn` `cd wide_deep`,
45 | and follow the same instructions below.
46 |
47 | * Step 2: Building the dataset adapted to current method.
48 | ```
49 | python build_dataset.py
50 | ```
51 | * Step 3: Start training and evaluating using default arguments in background mode.
52 | ```
53 | python train.py >log.txt 2>&1 &
54 | ```
55 | * Step 4: Check training and evaluating progress.
56 | ```
57 | tail -f log.txt
58 | tensorboard --logdir=save_path
59 | ```
60 |
61 | ## Dice
62 | There is also an implementation of Dice in folder 'din', you can try dice following the code annotation in `din/model.py` or replacing model.py with model\_dice.py
63 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/build_dataset.py:
--------------------------------------------------------------------------------
1 | import random
2 | import pickle
3 |
4 | random.seed(1234)
5 |
6 | with open('data/remap.pkl', 'rb') as f:
7 | reviews_df = pickle.load(f)
8 | cate_list = pickle.load(f)
9 | user_count, item_count, cate_count, example_count = pickle.load(f)
10 |
11 | train_set = []
12 | test_set = []
13 | for reviewerID, hist in reviews_df.groupby('reviewerID'):
14 | pos_list = hist['asin'].tolist()
15 | def gen_neg():
16 | neg = pos_list[0]
17 | while neg in pos_list:
18 | neg = random.randint(0, item_count-1)
19 | return neg
20 | neg_list = [gen_neg() for i in range(len(pos_list))]
21 |
22 | for i in range(1, len(pos_list)):
23 | hist = pos_list[:i]
24 | if i != len(pos_list) - 1:
25 | train_set.append((reviewerID, hist, pos_list[i], 1))
26 | train_set.append((reviewerID, hist, neg_list[i], 0))
27 | else:
28 | label = (pos_list[i], neg_list[i])
29 | test_set.append((reviewerID, hist, label))
30 |
31 | random.shuffle(train_set)
32 | random.shuffle(test_set)
33 |
34 | assert len(test_set) == user_count
35 | # assert(len(test_set) + len(train_set) // 2 == reviews_df.shape[0])
36 |
37 | with open('dataset.pkl', 'wb') as f:
38 | pickle.dump(train_set, f, pickle.HIGHEST_PROTOCOL)
39 | pickle.dump(test_set, f, pickle.HIGHEST_PROTOCOL)
40 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL)
41 | pickle.dump((user_count, item_count, cate_count), f, pickle.HIGHEST_PROTOCOL)
42 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/convert_pd.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import pandas as pd
3 |
4 |
5 | def to_df(file_path):
6 | with open(file_path, 'r') as fin:
7 | df = {}
8 | i = 0
9 | for line in fin:
10 | df[i] = eval(line)
11 | i += 1
12 | df = pd.DataFrame.from_dict(df, orient='index')
13 | return df
14 |
15 |
16 | reviews_df = to_df('data/reviews_Electronics_5.json')
17 | with open('data/reviews.pkl', 'wb') as f:
18 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL)
19 |
20 | meta_df = to_df('data/meta_Electronics.json')
21 | meta_df = meta_df[meta_df['asin'].isin(reviews_df['asin'].unique())]
22 | meta_df = meta_df.reset_index(drop=True)
23 | with open('data/meta.pkl', 'wb') as f:
24 | pickle.dump(meta_df, f, pickle.HIGHEST_PROTOCOL)
25 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/input.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class DataInput:
4 | def __init__(self, data, batch_size):
5 |
6 | self.batch_size = batch_size
7 | self.data = data
8 | self.epoch_size = len(self.data) // self.batch_size
9 | if self.epoch_size * self.batch_size < len(self.data):
10 | self.epoch_size += 1
11 | self.i = 0
12 |
13 | def __iter__(self):
14 | return self
15 |
16 | def __next__(self):
17 |
18 | if self.i == self.epoch_size:
19 | raise StopIteration
20 |
21 | ts = self.data[self.i * self.batch_size : min((self.i+1) * self.batch_size,
22 | len(self.data))]
23 | self.i += 1
24 |
25 | u, i, y, sl = [], [], [], []
26 | for t in ts:
27 | u.append(t[0])
28 | i.append(t[2])
29 | y.append(t[3])
30 | sl.append(len(t[1]))
31 | max_sl = max(sl)
32 |
33 | hist_i = np.zeros([len(ts), max_sl], np.int64)
34 |
35 | k = 0
36 | for t in ts:
37 | for l in range(len(t[1])):
38 | hist_i[k][l] = t[1][l]
39 | k += 1
40 |
41 | return self.i, (u, i, y, hist_i, sl)
42 |
43 | class DataInputTest:
44 | def __init__(self,data,batch_size):
45 | self.batch_size = batch_size
46 | self.data = data
47 | self.epoch_size = len(self.data) // self.batch_size
48 | if self.epoch_size * self.batch_size < len(self.data):
49 | self.epoch_size += 1
50 | self.i = 0
51 |
52 |
53 | def __iter__(self):
54 | return self
55 |
56 | def __next__(self):
57 | if self.i == self.epoch_size:
58 | raise StopIteration
59 | ts = self.data[self.i*self.batch_size:min((self.i+1) * self.batch_size,len(self.data))]
60 | self.i += 1
61 |
62 | u,i,j,sl = [],[],[],[]
63 |
64 | for t in ts:
65 | u.append(t[0])
66 | i.append(t[2][0])
67 | j.append(t[2][1])
68 | sl.append(len(t[1]))
69 | max_sl = max(sl)
70 |
71 | hist_i = np.zeros([len(ts), max_sl], np.int64)
72 |
73 | k = 0
74 | for t in ts:
75 | for l in range(len(t[1])):
76 | hist_i[k][l] = t[1][l]
77 | k += 1
78 |
79 | return self.i, (u, i, j, hist_i, sl)
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/remap_id.py:
--------------------------------------------------------------------------------
1 | import random
2 | import pickle
3 | import numpy as np
4 |
5 | random.seed(1234)
6 |
7 | with open('data/reviews.pkl', 'rb') as f:
8 | reviews_df = pickle.load(f)
9 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']]
10 | with open('data/meta.pkl', 'rb') as f:
11 | meta_df = pickle.load(f)
12 | meta_df = meta_df[['asin', 'categories']]
13 | meta_df['categories'] = meta_df['categories'].map(lambda x: x[-1][-1])
14 |
15 |
16 | def build_map(df, col_name):
17 | key = sorted(df[col_name].unique().tolist())
18 | m = dict(zip(key, range(len(key))))
19 | df[col_name] = df[col_name].map(lambda x: m[x])
20 | return m, key
21 |
22 |
23 | asin_map, asin_key = build_map(meta_df, 'asin')
24 | cate_map, cate_key = build_map(meta_df, 'categories')
25 | revi_map, revi_key = build_map(reviews_df, 'reviewerID')
26 |
27 | user_count, item_count, cate_count, example_count = \
28 | len(revi_map), len(asin_map), len(cate_map), reviews_df.shape[0]
29 | print('user_count: %d\titem_count: %d\tcate_count: %d\texample_count: %d' %
30 | (user_count, item_count, cate_count, example_count))
31 |
32 | meta_df = meta_df.sort_values('asin')
33 | meta_df = meta_df.reset_index(drop=True)
34 | reviews_df['asin'] = reviews_df['asin'].map(lambda x: asin_map[x])
35 | reviews_df = reviews_df.sort_values(['reviewerID', 'unixReviewTime'])
36 | reviews_df = reviews_df.reset_index(drop=True)
37 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']]
38 |
39 | cate_list = [meta_df['categories'][i] for i in range(len(asin_map))]
40 | cate_list = np.array(cate_list, dtype=np.int32)
41 |
42 | with open('data/remap.pkl', 'wb') as f:
43 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) # uid, iid
44 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL) # cid of iid line
45 | pickle.dump((user_count, item_count, cate_count, example_count),
46 | f, pickle.HIGHEST_PROTOCOL)
47 | pickle.dump((asin_key, cate_key, revi_key), f, pickle.HIGHEST_PROTOCOL)
48 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/train.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import numpy as np
4 | import tensorflow as tf
5 | import random
6 |
7 | from model import Model
8 | from input import DataInput, DataInputTest
9 |
10 | import time
11 |
12 | import sys
13 |
14 |
15 | random.seed(1234)
16 | np.random.seed(1234)
17 | tf.set_random_seed(1234)
18 |
19 | train_batch_size = 32
20 | test_batch_size = 512
21 |
22 | with open('dataset.pkl', 'rb') as f:
23 | train_set = pickle.load(f)
24 | test_set = pickle.load(f)
25 | cate_list = pickle.load(f)
26 | user_count, item_count, cate_count = pickle.load(f)
27 |
28 | print(user_count, item_count, cate_count)
29 |
30 | # catelist是item到cate的转换关系
31 | print(len(cate_list))
32 |
33 | print(test_set[:5])
34 |
35 | best_auc = 0.0
36 |
37 | def calc_auc(raw_arr):
38 | arr = sorted(raw_arr,key=lambda d:d[2])
39 | auc = 0.0
40 | fp1,tp1,fp2,tp2 = 0.0,0.0,0.0,0.0
41 |
42 | for record in arr:
43 | fp2 += record[0]
44 | tp2 += record[1]
45 |
46 | auc += (fp2 - fp1) * (tp2 + tp1)
47 | fp1,tp1 = fp2,tp2
48 |
49 | threshold = len(arr) - 1e-3
50 | if tp2 > threshold or fp2 > threshold:
51 | return -0.5
52 |
53 | if tp2 * fp2 > 0.0: # normal auc
54 | return (1.0 - auc / (2.0 * tp2 * fp2))
55 | else:
56 | return None
57 |
58 |
59 | def _auc_arr(score):
60 | score_p = score[:,0]
61 | score_n = score[:,1]
62 |
63 | score_arr = []
64 | for s in score_p.tolist():
65 | score_arr.append([0,1,s])
66 | for s in score_n.tolist():
67 | score_arr.append([1,0,s])
68 | return score_arr
69 |
70 | def _eval(sess,model):
71 | auc_sum = 0.0
72 | score_arr = []
73 | for _,uij in DataInputTest(test_set,test_batch_size):
74 | auc_,score_ = model.eval(sess,uij)
75 | score_arr += _auc_arr(score_)
76 | auc_sum += auc_ * len(uij[0])
77 |
78 | test_gauc = auc_sum / len(test_set)
79 |
80 | Auc = calc_auc(score_arr)
81 |
82 | global best_auc
83 | if best_auc < test_gauc:
84 | best_auc = test_gauc
85 | model.save(sess, 'save_path/ckpt')
86 | return test_gauc, Auc
87 |
88 |
89 |
90 | with tf.Session() as sess:
91 | model = Model(user_count,item_count,cate_count,cate_list)
92 | sess.run(tf.global_variables_initializer())
93 | sess.run(tf.local_variables_initializer())
94 |
95 | lr = 1.0
96 |
97 | start_time = time.time()
98 |
99 | for _ in range(50):
100 |
101 | random.shuffle(train_set)
102 |
103 | epoch_size = round(len(train_set)/ train_batch_size)
104 |
105 | loss_sum = 0.0
106 |
107 | for _,uij in DataInput(train_set,train_batch_size):
108 | loss = model.train(sess,uij,lr)
109 | loss_sum += loss
110 |
111 | if model.global_step.eval() % 10 == 0:
112 | test_gauc,Auc = _eval(sess,model)
113 |
114 | if model.global_step.eval() % 1000 == 0:
115 | test_gauc, Auc = _eval(sess, model)
116 | print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' %
117 | (model.global_epoch_step.eval(), model.global_step.eval(),
118 | loss_sum / 1000, test_gauc, Auc))
119 | sys.stdout.flush()
120 | loss_sum = 0.0
121 |
122 | if model.global_step.eval() % 336000 == 0:
123 | lr = 0.1
124 |
125 | print('Epoch %d DONE\tCost time: %.2f' %
126 | (model.global_epoch_step.eval(), time.time() - start_time))
127 | sys.stdout.flush()
128 | model.global_epoch_step_op.eval()
129 |
130 | print('best test_gauc:', best_auc)
131 | sys.stdout.flush()
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/utils/0_download_raw.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | cd ../raw_data
4 | wget -c http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
5 | gzip -d reviews_Electronics_5.json.gz
6 | wget -c http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz
7 | gzip -d meta_Electronics.json.gz
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/utils/1_convert_pd.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import pandas as pd
3 |
4 | def to_df(file_path):
5 | with open(file_path, 'r') as fin:
6 | df = {}
7 | i = 0
8 | for line in fin:
9 | df[i] = eval(line)
10 | i += 1
11 | df = pd.DataFrame.from_dict(df, orient='index')
12 | return df
13 |
14 | reviews_df = to_df('../raw_data/reviews_Electronics_5.json')
15 | with open('../raw_data/reviews.pkl', 'wb') as f:
16 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL)
17 |
18 | meta_df = to_df('../raw_data/meta_Electronics.json')
19 | meta_df = meta_df[meta_df['asin'].isin(reviews_df['asin'].unique())]
20 | meta_df = meta_df.reset_index(drop=True)
21 | with open('../raw_data/meta.pkl', 'wb') as f:
22 | pickle.dump(meta_df, f, pickle.HIGHEST_PROTOCOL)
23 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/utils/2_remap_id.py:
--------------------------------------------------------------------------------
1 | import random
2 | import pickle
3 | import numpy as np
4 |
5 | random.seed(1234)
6 |
7 | with open('../raw_data/reviews.pkl', 'rb') as f:
8 | reviews_df = pickle.load(f)
9 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']]
10 | with open('../raw_data/meta.pkl', 'rb') as f:
11 | meta_df = pickle.load(f)
12 | meta_df = meta_df[['asin', 'categories']]
13 | meta_df['categories'] = meta_df['categories'].map(lambda x: x[-1][-1])
14 |
15 |
16 | def build_map(df, col_name):
17 | key = sorted(df[col_name].unique().tolist())
18 | m = dict(zip(key, range(len(key))))
19 | df[col_name] = df[col_name].map(lambda x: m[x])
20 | return m, key
21 |
22 | asin_map, asin_key = build_map(meta_df, 'asin')
23 | cate_map, cate_key = build_map(meta_df, 'categories')
24 | revi_map, revi_key = build_map(reviews_df, 'reviewerID')
25 |
26 | user_count, item_count, cate_count, example_count =\
27 | len(revi_map), len(asin_map), len(cate_map), reviews_df.shape[0]
28 | print('user_count: %d\titem_count: %d\tcate_count: %d\texample_count: %d' %
29 | (user_count, item_count, cate_count, example_count))
30 |
31 | meta_df = meta_df.sort_values('asin')
32 | meta_df = meta_df.reset_index(drop=True)
33 | reviews_df['asin'] = reviews_df['asin'].map(lambda x: asin_map[x])
34 | reviews_df = reviews_df.sort_values(['reviewerID', 'unixReviewTime'])
35 | reviews_df = reviews_df.reset_index(drop=True)
36 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']]
37 |
38 | cate_list = [meta_df['categories'][i] for i in range(len(asin_map))]
39 | cate_list = np.array(cate_list, dtype=np.int32)
40 |
41 |
42 | with open('../raw_data/remap.pkl', 'wb') as f:
43 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) # uid, iid
44 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL) # cid of iid line
45 | pickle.dump((user_count, item_count, cate_count, example_count),
46 | f, pickle.HIGHEST_PROTOCOL)
47 | pickle.dump((asin_key, cate_key, revi_key), f, pickle.HIGHEST_PROTOCOL)
48 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DIN_Demo/utils/auc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DIN_Demo/utils/auc.png
--------------------------------------------------------------------------------
/ctr_of_recommendation/DSIN_Demo/config.py:
--------------------------------------------------------------------------------
1 | FRAC = 0.25
2 |
3 | DIN_SESS_MAX_LEN = 50
4 |
5 | DSIN_SESS_COUNT = 5
6 | DSIN_SESS_MAX_LEN = 10
--------------------------------------------------------------------------------
/ctr_of_recommendation/DSIN_Demo/gen_sampled_data.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 |
4 | import numpy as np
5 | import pandas as pd
6 | from sklearn.preprocessing import LabelEncoder
7 |
8 | from config import FRAC
9 |
10 | if __name__ == "__main__":
11 |
12 | user = pd.read_csv('../data/user_profile.csv')
13 | sample = pd.read_csv('../data/raw_sample.csv')
14 |
15 | if not os.path.exists('../sampled_data/'):
16 | os.mkdir('../sampled_data/')
17 |
18 | if os.path.exists('../sampled_data/user_profile_' + str(FRAC) + '_.pkl') and os.path.exists(
19 | '../sampled_data/raw_sample_' + str(FRAC) + '_.pkl'):
20 | user_sub = pd.read_pickle(
21 | '../sampled_data/user_profile_' + str(FRAC) + '_.pkl')
22 | sample_sub = pd.read_pickle(
23 | '../sampled_data/raw_sample_' + str(FRAC) + '_.pkl')
24 | else:
25 |
26 | if FRAC < 1.0:
27 | user_sub = user.sample(frac=FRAC, random_state=1024)
28 | else:
29 | user_sub = user
30 | sample_sub = sample.loc[sample.user.isin(user_sub.userid.unique())]
31 | pd.to_pickle(user_sub, '../sampled_data/user_profile_' +
32 | str(FRAC) + '.pkl')
33 | pd.to_pickle(sample_sub, '../sampled_data/raw_sample_' +
34 | str(FRAC) + '.pkl')
35 |
36 | if os.path.exists('../data/behavior_log_pv.pkl'):
37 | log = pd.read_pickle('../data/behavior_log_pv.pkl')
38 | else:
39 | log = pd.read_csv('../data/behavior_log.csv')
40 | log = log.loc[log['btag'] == 'pv']
41 | pd.to_pickle(log, '../data/behavior_log_pv.pkl')
42 |
43 | userset = user_sub.userid.unique()
44 | log = log.loc[log.user.isin(userset)]
45 | # pd.to_pickle(log, '../sampled_data/behavior_log_pv_user_filter_' + str(FRAC) + '_.pkl')
46 |
47 | ad = pd.read_csv('../data/ad_feature.csv')
48 | ad['brand'] = ad['brand'].fillna(-1)
49 |
50 | lbe = LabelEncoder()
51 | # unique_cate_id = ad['cate_id'].unique()
52 | # log = log.loc[log.cate.isin(unique_cate_id)]
53 |
54 | unique_cate_id = np.concatenate(
55 | (ad['cate_id'].unique(), log['cate'].unique()))
56 |
57 | lbe.fit(unique_cate_id)
58 | ad['cate_id'] = lbe.transform(ad['cate_id']) + 1
59 | log['cate'] = lbe.transform(log['cate']) + 1
60 |
61 | lbe = LabelEncoder()
62 | # unique_brand = np.ad['brand'].unique()
63 | # log = log.loc[log.brand.isin(unique_brand)]
64 |
65 | unique_brand = np.concatenate(
66 | (ad['brand'].unique(), log['brand'].unique()))
67 |
68 | lbe.fit(unique_brand)
69 | ad['brand'] = lbe.transform(ad['brand']) + 1
70 | log['brand'] = lbe.transform(log['brand']) + 1
71 |
72 | log = log.loc[log.user.isin(sample_sub.user.unique())]
73 | log.drop(columns=['btag'], inplace=True)
74 | log = log.loc[log['time_stamp'] > 0]
75 |
76 | pd.to_pickle(ad, '../sampled_data/ad_feature_enc_' + str(FRAC) + '.pkl')
77 | pd.to_pickle(
78 | log, '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl')
79 |
80 | print("0_gen_sampled_data done")
81 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DSIN_Demo/gen_sessions.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gc
3 |
4 | import pandas as pd
5 | from joblib import Parallel, delayed
6 |
7 | from config import FRAC
8 |
9 |
10 | def gen_session_list_dsin(uid, t):
11 | t.sort_values('time_stamp', inplace=True, ascending=True)
12 | last_time = 1483574401 # pd.to_datetime("2017-01-05 00:00:01")
13 | session_list = []
14 | session = []
15 | for row in t.iterrows():
16 | time_stamp = row[1]['time_stamp']
17 | # pd_time = pd.to_datetime(timestamp_datetime(time_stamp))
18 | delta = time_stamp - last_time
19 | cate_id = row[1]['cate']
20 | brand = row[1]['brand']
21 | # delta.total_seconds()
22 | if delta > 30 * 60: # Session begin when current behavior and the last behavior are separated by more than 30 minutes.
23 | if len(session) > 2: # Only use sessions that have >2 behaviors
24 | session_list.append(session[:])
25 | session = []
26 |
27 | session.append((cate_id, brand, time_stamp))
28 | last_time = time_stamp
29 | if len(session) > 2:
30 | session_list.append(session[:])
31 | return uid, session_list
32 |
33 |
34 | def gen_session_list_din(uid, t):
35 | t.sort_values('time_stamp', inplace=True, ascending=True)
36 | session_list = []
37 | session = []
38 | for row in t.iterrows():
39 | time_stamp = row[1]['time_stamp']
40 | # pd_time = pd.to_datetime(timestamp_datetime())
41 | # delta = pd_time - last_time
42 | cate_id = row[1]['cate']
43 | brand = row[1]['brand']
44 | session.append((cate_id, brand, time_stamp))
45 |
46 | if len(session) > 2:
47 | session_list.append(session[:])
48 | return uid, session_list
49 |
50 |
51 | def applyParallel(df_grouped, func, n_jobs, backend='multiprocessing'):
52 | """Use Parallel and delayed """ # backend='threading'
53 | results = Parallel(n_jobs=n_jobs, verbose=4, backend=backend)(
54 | delayed(func)(name, group) for name, group in df_grouped)
55 |
56 | return {k: v for k, v in results}
57 |
58 |
59 | def gen_user_hist_sessions(model, FRAC=0.25):
60 | if model not in ['din', 'dsin']:
61 | raise ValueError('model must be din or dmsn')
62 |
63 | print("gen " + model + " hist sess", FRAC)
64 | name = '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl'
65 | data = pd.read_pickle(name)
66 | data = data.loc[data.time_stamp >= 1493769600] # 0503-0513
67 | # 0504~1493856000
68 | # 0503 1493769600
69 |
70 | user = pd.read_pickle('../sampled_data/user_profile_' + str(FRAC) + '.pkl')
71 |
72 | n_samples = user.shape[0]
73 | print(n_samples)
74 | batch_size = 150000
75 | iters = (n_samples - 1) // batch_size + 1
76 |
77 | print("total", iters, "iters", "batch_size", batch_size)
78 | for i in range(0, iters):
79 | target_user = user['userid'].values[i * batch_size:(i + 1) * batch_size]
80 | sub_data = data.loc[data.user.isin(target_user)]
81 | print(i, 'iter start')
82 | df_grouped = sub_data.groupby('user')
83 | if model == 'din':
84 | user_hist_session = applyParallel(
85 | df_grouped, gen_session_list_din, n_jobs=20, backend='loky')
86 | else:
87 | user_hist_session = applyParallel(
88 | df_grouped, gen_session_list_dsin, n_jobs=20, backend='multiprocessing')
89 | pd.to_pickle(user_hist_session, '../sampled_data/user_hist_session_' +
90 | str(FRAC) + '_' + model + '_' + str(i) + '.pkl')
91 | print(i, 'pickled')
92 | del user_hist_session
93 | gc.collect()
94 | print(i, 'del')
95 |
96 | print("1_gen " + model + " hist sess done")
97 |
98 |
99 | if __name__ == "__main__":
100 | gen_user_hist_sessions('din', FRAC)
101 | gen_user_hist_sessions('dsin', FRAC)
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DSIN_Demo/train_dsin.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pandas as pd
4 | import tensorflow as tf
5 | from sklearn.metrics import log_loss, roc_auc_score
6 | from tensorflow.python.keras import backend as K
7 |
8 | from config import DSIN_SESS_COUNT, DSIN_SESS_MAX_LEN, FRAC
9 | from dsin import DSIN
10 |
11 |
12 |
13 | if __name__ == '__main__':
14 | SESS_COUNT = DSIN_SESS_COUNT
15 | SESS_MAX_LEN = DSIN_SESS_MAX_LEN
16 |
17 | # 离散特征和连续特征的名称和维度,维度指不同取值总共的个数
18 | fd = pd.read_pickle('../model_input/dsin_fd_' +
19 | str(FRAC) + '_' + str(SESS_COUNT) + '.pkl')
20 |
21 | # 模型的输入
22 | model_input = pd.read_pickle(
23 | '../model_input/dsin_input_' + str(FRAC) + '_' + str(SESS_COUNT) + '.pkl')
24 |
25 | # 模型的label
26 | label = pd.read_pickle('../model_input/dsin_label_' +
27 | str(FRAC) + '_' + str(SESS_COUNT) + '.pkl')
28 |
29 | sample_sub = pd.read_pickle(
30 | '../sampled_data/raw_sample_' + str(FRAC) + '.pkl')
31 |
32 | # 划分训练集和测试集
33 | sample_sub['idx'] = list(range(sample_sub.shape[0]))
34 | train_idx = sample_sub.loc[sample_sub.time_stamp <
35 | 1494633600, 'idx'].values
36 | test_idx = sample_sub.loc[sample_sub.time_stamp >=
37 | 1494633600, 'idx'].values
38 |
39 | train_input = [i[train_idx] for i in model_input]
40 | test_input = [i[test_idx] for i in model_input]
41 |
42 | train_label = label[train_idx]
43 | test_label = label[test_idx]
44 |
45 | sess_count = SESS_COUNT
46 | sess_len_max = SESS_MAX_LEN
47 | BATCH_SIZE = 4096
48 |
49 | sess_feature = ['cate_id', 'brand']
50 | TEST_BATCH_SIZE = 2 ** 16
51 |
52 | model = DSIN(fd, sess_feature, embedding_size=4, sess_max_count=sess_count,
53 | sess_len_max=sess_len_max, dnn_hidden_units=(200, 80), att_head_num=8,
54 | att_embedding_size=1, bias_encoding=False)
55 |
56 | model.compile('adagrad', 'binary_crossentropy',
57 | metrics=['binary_crossentropy', ])
58 |
59 | hist_ = model.fit(train_input, train_label, batch_size=BATCH_SIZE,
60 | epochs=1, initial_epoch=0, verbose=1, )
61 |
62 | pred_ans = model.predict(test_input, TEST_BATCH_SIZE)
63 |
64 | print()
65 | print("test LogLoss", round(log_loss(test_label, pred_ans), 4), "test AUC",
66 | round(roc_auc_score(test_label, pred_ans), 4))
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/DataReader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | class FeatureDictionary(object):
4 | def __init__(self,trainfile=None,testfile=None,
5 | dfTrain=None,dfTest=None,numeric_cols=[],
6 | ignore_cols=[]):
7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set"
8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set"
9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set"
10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set"
11 |
12 | self.trainfile = trainfile
13 | self.testfile = testfile
14 | self.dfTrain = dfTrain
15 | self.dfTest = dfTest
16 | self.numeric_cols = numeric_cols
17 | self.ignore_cols = ignore_cols
18 | self.gen_feat_dict()
19 |
20 |
21 |
22 |
23 | def gen_feat_dict(self):
24 | if self.dfTrain is None:
25 | dfTrain = pd.read_csv(self.trainfile)
26 |
27 | else:
28 | dfTrain = self.dfTrain
29 |
30 | if self.dfTest is None:
31 | dfTest = pd.read_csv(self.testfile)
32 |
33 | else:
34 | dfTest = self.dfTest
35 |
36 | df = pd.concat([dfTrain,dfTest])
37 |
38 | self.feat_dict = {}
39 | tc = 0
40 | for col in df.columns:
41 | if col in self.ignore_cols:
42 | continue
43 | if col in self.numeric_cols:
44 | self.feat_dict[col] = tc
45 | tc += 1
46 |
47 | else:
48 | us = df[col].unique()
49 | print(us)
50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc)))
51 | tc += len(us)
52 |
53 | self.feat_dim = tc
54 |
55 |
56 | class DataParser(object):
57 | def __init__(self,feat_dict):
58 | self.feat_dict = feat_dict
59 |
60 | def parse(self,infile=None,df=None,has_label=False):
61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set"
62 | assert not ((infile is not None) and (df is not None)), "only one can be set"
63 |
64 |
65 | if infile is None:
66 | dfi = df.copy()
67 | else:
68 | dfi = pd.read_csv(infile)
69 |
70 | if has_label:
71 | y = dfi['target'].values.tolist()
72 | dfi.drop(['id','target'],axis=1,inplace=True)
73 | else:
74 | ids = dfi['id'].values.tolist()
75 | dfi.drop(['id'],axis=1,inplace=True)
76 | # dfi for feature index
77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
78 | dfv = dfi.copy()
79 | for col in dfi.columns:
80 | if col in self.feat_dict.ignore_cols:
81 | dfi.drop(col,axis=1,inplace=True)
82 | dfv.drop(col,axis=1,inplace=True)
83 | continue
84 | if col in self.feat_dict.numeric_cols:
85 | dfi[col] = self.feat_dict.feat_dict[col]
86 | else:
87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
88 | dfv[col] = 1.
89 |
90 | xi = dfi.values.tolist()
91 | xv = dfv.values.tolist()
92 |
93 | if has_label:
94 | return xi,xv,y
95 | else:
96 | return xi,xv,ids
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_FILE = "data/train.csv"
2 | TEST_FILE = "data/test.csv"
3 |
4 | SUB_DIR = "output"
5 |
6 |
7 | NUM_SPLITS = 3
8 | RANDOM_SEED = 2017
9 |
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 | # 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 |
19 | NUMERIC_COLS = [
20 | # # binary
21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 | # "ps_ind_17_bin", "ps_ind_18_bin",
25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 | # numeric
28 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 |
31 | # feature engineering
32 | "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 |
35 | IGNORE_COLS = [
36 | "id", "target",
37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 | "ps_calc_13", "ps_calc_14",
41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/fig/DNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/DNN.png
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/fig/DeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/DeepFM.png
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/fig/FM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/FM.png
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/metrics.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 |
4 | def gini(actual, pred):
5 | assert (len(actual) == len(pred))
6 | all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float)
7 | all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))]
8 | totalLosses = all[:, 0].sum()
9 | giniSum = all[:, 0].cumsum().sum() / totalLosses
10 |
11 | giniSum -= (len(actual) + 1) / 2.
12 | return giniSum / len(actual)
13 |
14 | def gini_norm(actual, pred):
15 | return gini(actual, pred) / gini(actual, actual)
16 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/DNN_Mean-0.31183_Std0.29369.csv:
--------------------------------------------------------------------------------
1 | id,target
2 | 0,0.54321
3 | 1,0.54492
4 | 2,0.54194
5 | 3,0.54175
6 | 4,0.54266
7 | 5,0.54154
8 | 6,0.54395
9 | 8,0.54214
10 | 10,0.54383
11 | 11,0.54348
12 | 12,0.54175
13 | 14,0.54253
14 | 15,0.54449
15 | 18,0.54221
16 | 21,0.54521
17 | 23,0.54488
18 | 24,0.54286
19 | 25,0.54416
20 | 27,0.54511
21 | 29,0.54365
22 | 30,0.54272
23 | 31,0.54500
24 | 32,0.54485
25 | 33,0.54332
26 | 37,0.54277
27 | 38,0.54376
28 | 39,0.54478
29 | 40,0.54178
30 | 41,0.54429
31 | 42,0.54348
32 | 44,0.54377
33 | 45,0.54288
34 | 47,0.54235
35 | 49,0.54258
36 | 51,0.54283
37 | 52,0.54266
38 | 53,0.54156
39 | 54,0.54426
40 | 55,0.54256
41 | 56,0.54520
42 | 57,0.54370
43 | 59,0.54359
44 | 60,0.54405
45 | 62,0.54316
46 | 63,0.54492
47 | 67,0.54511
48 | 68,0.54221
49 | 69,0.54548
50 | 70,0.54249
51 | 71,0.54415
52 | 73,0.54462
53 | 75,0.54333
54 | 76,0.54298
55 | 81,0.54271
56 | 82,0.54458
57 | 83,0.54240
58 | 86,0.54130
59 | 87,0.54291
60 | 88,0.54318
61 | 91,0.54448
62 | 92,0.54372
63 | 94,0.54307
64 | 97,0.54280
65 | 100,0.54605
66 | 102,0.54389
67 | 103,0.54320
68 | 105,0.54275
69 | 106,0.54410
70 | 108,0.54228
71 | 113,0.54418
72 | 114,0.54378
73 | 115,0.54324
74 | 118,0.54449
75 | 122,0.54158
76 | 124,0.54290
77 | 126,0.54196
78 | 128,0.54516
79 | 129,0.54435
80 | 130,0.54562
81 | 131,0.54449
82 | 132,0.54567
83 | 133,0.54219
84 | 134,0.54506
85 | 135,0.54226
86 | 136,0.54301
87 | 139,0.54165
88 | 140,0.54528
89 | 141,0.54275
90 | 146,0.54209
91 | 148,0.54334
92 | 151,0.54195
93 | 152,0.54450
94 | 154,0.54214
95 | 157,0.54393
96 | 158,0.54286
97 | 159,0.54520
98 | 161,0.54310
99 | 164,0.54332
100 | 165,0.54339
101 | 167,0.54313
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean-0.11470_Std0.37335.csv:
--------------------------------------------------------------------------------
1 | id,target
2 | 0,0.46803
3 | 1,0.55377
4 | 2,0.53959
5 | 3,0.48240
6 | 4,0.42640
7 | 5,0.53783
8 | 6,0.43397
9 | 8,0.54862
10 | 10,0.39992
11 | 11,0.48496
12 | 12,0.56211
13 | 14,0.48791
14 | 15,0.40421
15 | 18,0.38874
16 | 21,0.48275
17 | 23,0.36596
18 | 24,0.54895
19 | 25,0.55286
20 | 27,0.46398
21 | 29,0.25796
22 | 30,0.52880
23 | 31,0.53623
24 | 32,0.38785
25 | 33,0.49019
26 | 37,0.53059
27 | 38,0.32213
28 | 39,0.48938
29 | 40,0.44188
30 | 41,0.39470
31 | 42,0.43526
32 | 44,0.38037
33 | 45,0.44053
34 | 47,0.47693
35 | 49,0.43951
36 | 51,0.52558
37 | 52,0.56112
38 | 53,0.63015
39 | 54,0.28074
40 | 55,0.50253
41 | 56,0.36943
42 | 57,0.41124
43 | 59,0.47449
44 | 60,0.41512
45 | 62,0.45376
46 | 63,0.56464
47 | 67,0.48383
48 | 68,0.44448
49 | 69,0.43281
50 | 70,0.41257
51 | 71,0.36101
52 | 73,0.24134
53 | 75,0.48104
54 | 76,0.41155
55 | 81,0.52558
56 | 82,0.40699
57 | 83,0.35711
58 | 86,0.36253
59 | 87,0.42458
60 | 88,0.57573
61 | 91,0.50545
62 | 92,0.57203
63 | 94,0.53472
64 | 97,0.47725
65 | 100,0.42449
66 | 102,0.49121
67 | 103,0.48863
68 | 105,0.59440
69 | 106,0.40794
70 | 108,0.49273
71 | 113,0.33953
72 | 114,0.50476
73 | 115,0.53934
74 | 118,0.48991
75 | 122,0.50319
76 | 124,0.41910
77 | 126,0.41064
78 | 128,0.36258
79 | 129,0.31102
80 | 130,0.45700
81 | 131,0.55222
82 | 132,0.47241
83 | 133,0.47101
84 | 134,0.45344
85 | 135,0.55308
86 | 136,0.50106
87 | 139,0.42091
88 | 140,0.44550
89 | 141,0.42207
90 | 146,0.46423
91 | 148,0.52868
92 | 151,0.44960
93 | 152,0.26475
94 | 154,0.56421
95 | 157,0.58842
96 | 158,0.42789
97 | 159,0.43978
98 | 161,0.62290
99 | 164,0.54502
100 | 165,0.38185
101 | 167,0.53922
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.01434_Std0.10176.csv:
--------------------------------------------------------------------------------
1 | id,target
2 | 0,0.32278
3 | 1,0.41663
4 | 2,0.44417
5 | 3,0.47512
6 | 4,0.52361
7 | 5,0.33677
8 | 6,0.44370
9 | 8,0.30100
10 | 10,0.48097
11 | 11,0.52027
12 | 12,0.26543
13 | 14,0.40398
14 | 15,0.46376
15 | 18,0.38902
16 | 21,0.35526
17 | 23,0.41269
18 | 24,0.37623
19 | 25,0.30560
20 | 27,0.41068
21 | 29,0.49968
22 | 30,0.48046
23 | 31,0.53911
24 | 32,0.37760
25 | 33,0.42462
26 | 37,0.43910
27 | 38,0.43226
28 | 39,0.40951
29 | 40,0.42573
30 | 41,0.38593
31 | 42,0.45659
32 | 44,0.42400
33 | 45,0.46563
34 | 47,0.41856
35 | 49,0.43669
36 | 51,0.39470
37 | 52,0.35710
38 | 53,0.35468
39 | 54,0.58721
40 | 55,0.34572
41 | 56,0.49496
42 | 57,0.52123
43 | 59,0.43579
44 | 60,0.37308
45 | 62,0.36949
46 | 63,0.36458
47 | 67,0.40002
48 | 68,0.40630
49 | 69,0.51984
50 | 70,0.43685
51 | 71,0.34467
52 | 73,0.49609
53 | 75,0.42494
54 | 76,0.36640
55 | 81,0.41558
56 | 82,0.49456
57 | 83,0.41528
58 | 86,0.38741
59 | 87,0.42377
60 | 88,0.49288
61 | 91,0.43845
62 | 92,0.50188
63 | 94,0.38807
64 | 97,0.43247
65 | 100,0.37401
66 | 102,0.36822
67 | 103,0.39734
68 | 105,0.38886
69 | 106,0.40349
70 | 108,0.29820
71 | 113,0.38590
72 | 114,0.53072
73 | 115,0.37515
74 | 118,0.34776
75 | 122,0.35378
76 | 124,0.35501
77 | 126,0.36031
78 | 128,0.36464
79 | 129,0.48796
80 | 130,0.40816
81 | 131,0.44641
82 | 132,0.40488
83 | 133,0.39336
84 | 134,0.51089
85 | 135,0.49477
86 | 136,0.35754
87 | 139,0.46074
88 | 140,0.38236
89 | 141,0.37077
90 | 146,0.29805
91 | 148,0.43685
92 | 151,0.45538
93 | 152,0.35027
94 | 154,0.35761
95 | 157,0.36037
96 | 158,0.39519
97 | 159,0.33552
98 | 161,0.41159
99 | 164,0.42803
100 | 165,0.44548
101 | 167,0.39931
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.05735_Std0.20027.csv:
--------------------------------------------------------------------------------
1 | id,target
2 | 0,0.72139
3 | 1,0.51760
4 | 2,0.59032
5 | 3,0.63660
6 | 4,0.50603
7 | 5,0.57058
8 | 6,0.72299
9 | 8,0.62921
10 | 10,0.64393
11 | 11,0.62246
12 | 12,0.64539
13 | 14,0.62271
14 | 15,0.63971
15 | 18,0.74351
16 | 21,0.56603
17 | 23,0.65027
18 | 24,0.62978
19 | 25,0.56364
20 | 27,0.55366
21 | 29,0.64651
22 | 30,0.63995
23 | 31,0.51301
24 | 32,0.65243
25 | 33,0.62960
26 | 37,0.61379
27 | 38,0.62845
28 | 39,0.56194
29 | 40,0.55361
30 | 41,0.65380
31 | 42,0.56262
32 | 44,0.52620
33 | 45,0.56058
34 | 47,0.67995
35 | 49,0.58040
36 | 51,0.57256
37 | 52,0.57186
38 | 53,0.74692
39 | 54,0.63829
40 | 55,0.61376
41 | 56,0.57716
42 | 57,0.66004
43 | 59,0.60760
44 | 60,0.68578
45 | 62,0.68983
46 | 63,0.62641
47 | 67,0.59588
48 | 68,0.59095
49 | 69,0.56658
50 | 70,0.60620
51 | 71,0.53494
52 | 73,0.73047
53 | 75,0.56699
54 | 76,0.68507
55 | 81,0.59263
56 | 82,0.45351
57 | 83,0.65228
58 | 86,0.67729
59 | 87,0.63932
60 | 88,0.62208
61 | 91,0.50822
62 | 92,0.60571
63 | 94,0.61354
64 | 97,0.62548
65 | 100,0.69225
66 | 102,0.50505
67 | 103,0.61700
68 | 105,0.65031
69 | 106,0.66246
70 | 108,0.67469
71 | 113,0.66512
72 | 114,0.53249
73 | 115,0.55344
74 | 118,0.68072
75 | 122,0.53538
76 | 124,0.65328
77 | 126,0.64717
78 | 128,0.73029
79 | 129,0.63653
80 | 130,0.63030
81 | 131,0.50802
82 | 132,0.58770
83 | 133,0.62624
84 | 134,0.44326
85 | 135,0.63895
86 | 136,0.56856
87 | 139,0.53739
88 | 140,0.63811
89 | 141,0.70656
90 | 146,0.57495
91 | 148,0.62791
92 | 151,0.60073
93 | 152,0.73494
94 | 154,0.60894
95 | 157,0.60582
96 | 158,0.54721
97 | 159,0.70589
98 | 161,0.63762
99 | 164,0.53981
100 | 165,0.65285
101 | 167,0.52954
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.26137_Std0.00210.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.26137_Std0.00210.csv
--------------------------------------------------------------------------------
/ctr_of_recommendation/DeepFM_model/output/FM_Mean0.23297_Std0.05576.csv:
--------------------------------------------------------------------------------
1 | id,target
2 | 0,0.37706
3 | 1,0.40747
4 | 2,0.28335
5 | 3,0.29426
6 | 4,0.25722
7 | 5,0.28061
8 | 6,0.36010
9 | 8,0.26813
10 | 10,0.50419
11 | 11,0.29652
12 | 12,0.22183
13 | 14,0.28447
14 | 15,0.44019
15 | 18,0.43666
16 | 21,0.32927
17 | 23,0.28054
18 | 24,0.25594
19 | 25,0.27155
20 | 27,0.26363
21 | 29,0.34340
22 | 30,0.37857
23 | 31,0.30758
24 | 32,0.41682
25 | 33,0.26732
26 | 37,0.39802
27 | 38,0.32117
28 | 39,0.39406
29 | 40,0.24067
30 | 41,0.39323
31 | 42,0.40359
32 | 44,0.28283
33 | 45,0.36268
34 | 47,0.31174
35 | 49,0.35913
36 | 51,0.27528
37 | 52,0.28072
38 | 53,0.35339
39 | 54,0.45116
40 | 55,0.33479
41 | 56,0.47107
42 | 57,0.34473
43 | 59,0.34868
44 | 60,0.45001
45 | 62,0.35572
46 | 63,0.39236
47 | 67,0.36394
48 | 68,0.27234
49 | 69,0.51613
50 | 70,0.33188
51 | 71,0.24334
52 | 73,0.36806
53 | 75,0.41980
54 | 76,0.37788
55 | 81,0.31707
56 | 82,0.33174
57 | 83,0.35205
58 | 86,0.34927
59 | 87,0.45646
60 | 88,0.27697
61 | 91,0.34399
62 | 92,0.42113
63 | 94,0.35314
64 | 97,0.29256
65 | 100,0.44001
66 | 102,0.30431
67 | 103,0.25371
68 | 105,0.41161
69 | 106,0.39540
70 | 108,0.36266
71 | 113,0.36232
72 | 114,0.37745
73 | 115,0.28234
74 | 118,0.37840
75 | 122,0.22426
76 | 124,0.30503
77 | 126,0.35986
78 | 128,0.30551
79 | 129,0.32311
80 | 130,0.35530
81 | 131,0.33789
82 | 132,0.39140
83 | 133,0.30195
84 | 134,0.31456
85 | 135,0.41466
86 | 136,0.24149
87 | 139,0.23444
88 | 140,0.36823
89 | 141,0.36059
90 | 146,0.25876
91 | 148,0.48031
92 | 151,0.31372
93 | 152,0.50250
94 | 154,0.26625
95 | 157,0.28990
96 | 158,0.24773
97 | 159,0.48179
98 | 161,0.46381
99 | 164,0.34177
100 | 165,0.48971
101 | 167,0.30779
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/FFM_model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import pandas as pd
3 | import numpy as np
4 | import os
5 |
6 |
7 | input_x_size = 20
8 | field_size = 2
9 |
10 | vector_dimension = 3
11 |
12 | total_plan_train_steps = 1000
13 | # 使用SGD,每一个样本进行依次梯度下降,更新参数
14 | batch_size = 1
15 |
16 | all_data_size = 1000
17 |
18 | lr = 0.01
19 |
20 | MODEL_SAVE_PATH = "TFModel"
21 | MODEL_NAME = "FFM"
22 |
23 | def createTwoDimensionWeight(input_x_size,field_size,vector_dimension):
24 | weights = tf.truncated_normal([input_x_size,field_size,vector_dimension])
25 |
26 | tf_weights = tf.Variable(weights)
27 |
28 | return tf_weights
29 |
30 | def createOneDimensionWeight(input_x_size):
31 | weights = tf.truncated_normal([input_x_size])
32 | tf_weights = tf.Variable(weights)
33 | return tf_weights
34 |
35 | def createZeroDimensionWeight():
36 | weights = tf.truncated_normal([1])
37 | tf_weights = tf.Variable(weights)
38 | return tf_weights
39 |
40 | def inference(input_x,input_x_field,zeroWeights,oneDimWeights,thirdWeight):
41 | """计算回归模型输出的值"""
42 |
43 | secondValue = tf.reduce_sum(tf.multiply(oneDimWeights,input_x,name='secondValue'))
44 |
45 | firstTwoValue = tf.add(zeroWeights, secondValue, name="firstTwoValue")
46 |
47 | thirdValue = tf.Variable(0.0,dtype=tf.float32)
48 | input_shape = input_x_size
49 |
50 | for i in range(input_shape):
51 | featureIndex1 = i
52 | fieldIndex1 = int(input_x_field[i])
53 | for j in range(i+1,input_shape):
54 | featureIndex2 = j
55 | fieldIndex2 = int(input_x_field[j])
56 | vectorLeft = tf.convert_to_tensor([[featureIndex1,fieldIndex2,i] for i in range(vector_dimension)])
57 | weightLeft = tf.gather_nd(thirdWeight,vectorLeft)
58 | weightLeftAfterCut = tf.squeeze(weightLeft)
59 |
60 | vectorRight = tf.convert_to_tensor([[featureIndex2,fieldIndex1,i] for i in range(vector_dimension)])
61 | weightRight = tf.gather_nd(thirdWeight,vectorRight)
62 | weightRightAfterCut = tf.squeeze(weightRight)
63 |
64 | tempValue = tf.reduce_sum(tf.multiply(weightLeftAfterCut,weightRightAfterCut))
65 |
66 | indices2 = [i]
67 | indices3 = [j]
68 |
69 | xi = tf.squeeze(tf.gather_nd(input_x, indices2))
70 | xj = tf.squeeze(tf.gather_nd(input_x, indices3))
71 |
72 | product = tf.reduce_sum(tf.multiply(xi, xj))
73 |
74 | secondItemVal = tf.multiply(tempValue, product)
75 |
76 | tf.assign(thirdValue, tf.add(thirdValue, secondItemVal))
77 |
78 | return tf.add(firstTwoValue,thirdValue)
79 |
80 | def gen_data():
81 | labels = [-1,1]
82 | y = [np.random.choice(labels,1)[0] for _ in range(all_data_size)]
83 | x_field = [i // 10 for i in range(input_x_size)]
84 | x = np.random.randint(0,2,size=(all_data_size,input_x_size))
85 | return x,y,x_field
86 |
87 |
88 | if __name__ == '__main__':
89 | global_step = tf.Variable(0,trainable=False)
90 | trainx,trainy,trainx_field = gen_data()
91 | #
92 | input_x = tf.placeholder(tf.float32,[input_x_size ])
93 | input_y = tf.placeholder(tf.float32)
94 | #
95 |
96 | lambda_w = tf.constant(0.001, name='lambda_w')
97 | lambda_v = tf.constant(0.001, name='lambda_v')
98 |
99 | zeroWeights = createZeroDimensionWeight()
100 |
101 | oneDimWeights = createOneDimensionWeight(input_x_size)
102 |
103 | thirdWeight = createTwoDimensionWeight(input_x_size, # 创建二次项的权重变量
104 | field_size,
105 | vector_dimension) # n * f * k
106 |
107 | y_ = inference(input_x, trainx_field,zeroWeights,oneDimWeights,thirdWeight)
108 |
109 | l2_norm = tf.reduce_sum(
110 | tf.add(
111 | tf.multiply(lambda_w, tf.pow(oneDimWeights, 2)),
112 | tf.reduce_sum(tf.multiply(lambda_v, tf.pow(thirdWeight, 2)),axis=[1,2])
113 | )
114 | )
115 |
116 | loss = tf.log(1 + tf.exp(input_y * y_)) + l2_norm
117 |
118 | train_step = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(loss)
119 |
120 | saver = tf.train.Saver()
121 | with tf.Session() as sess:
122 | sess.run(tf.global_variables_initializer())
123 | for i in range(total_plan_train_steps):
124 | for t in range(all_data_size):
125 | input_x_batch = trainx[t]
126 | input_y_batch = trainy[t]
127 | predict_loss,_, steps = sess.run([loss,train_step, global_step],
128 | feed_dict={input_x: input_x_batch, input_y: input_y_batch})
129 |
130 | print("After {step} training step(s) , loss on training batch is {predict_loss} "
131 | .format(step=steps, predict_loss=predict_loss))
132 |
133 | saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=steps)
134 | writer = tf.summary.FileWriter(os.path.join(MODEL_SAVE_PATH, MODEL_NAME), tf.get_default_graph())
135 | writer.close()
136 | #
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.data-00000-of-00001
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.index
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.meta
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local
--------------------------------------------------------------------------------
/ctr_of_recommendation/FFM_Demo/TFModel/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "FFM-0"
2 | all_model_checkpoint_paths: "FFM-0"
3 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/FM_demo/FM_model.py:
--------------------------------------------------------------------------------
1 | from itertools import count
2 | from collections import defaultdict
3 | from scipy.sparse import csr
4 | import numpy as np
5 | import pandas as pd
6 | import numpy as np
7 | from sklearn.feature_extraction import DictVectorizer
8 | import tensorflow as tf
9 | from tqdm import tqdm_notebook as tqdm
10 |
11 |
12 | def vectorize_dic(dic,ix=None,p=None,n=0,g=0):
13 | """
14 | dic -- dictionary of feature lists. Keys are the name of features
15 | ix -- index generator (default None)
16 | p -- dimension of feature space (number of columns in the sparse matrix) (default None)
17 | """
18 | if ix==None:
19 | ix = dict()
20 |
21 | nz = n * g
22 |
23 | col_ix = np.empty(nz,dtype = int)
24 |
25 | i = 0
26 | for k,lis in dic.items():
27 | for t in range(len(lis)):
28 | ix[str(lis[t]) + str(k)] = ix.get(str(lis[t]) + str(k),0) + 1
29 | col_ix[i+t*g] = ix[str(lis[t]) + str(k)]
30 | i += 1
31 |
32 | row_ix = np.repeat(np.arange(0,n),g)
33 | data = np.ones(nz)
34 | if p == None:
35 | p = len(ix)
36 |
37 | ixx = np.where(col_ix < p)
38 | return csr.csr_matrix((data[ixx],(row_ix[ixx],col_ix[ixx])),shape=(n,p)),ix
39 |
40 |
41 | def batcher(X_, y_=None, batch_size=-1):
42 | n_samples = X_.shape[0]
43 |
44 | if batch_size == -1:
45 | batch_size = n_samples
46 | if batch_size < 1:
47 | raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size))
48 |
49 | for i in range(0, n_samples, batch_size):
50 | upper_bound = min(i + batch_size, n_samples)
51 | ret_x = X_[i:upper_bound]
52 | ret_y = None
53 | if y_ is not None:
54 | ret_y = y_[i:i + batch_size]
55 | yield (ret_x, ret_y)
56 |
57 |
58 | cols = ['user','item','rating','timestamp']
59 |
60 | train = pd.read_csv('data/ua.base',delimiter='\t',names = cols)
61 | test = pd.read_csv('data/ua.test',delimiter='\t',names = cols)
62 |
63 | x_train,ix = vectorize_dic({'users':train['user'].values,
64 | 'items':train['item'].values},n=len(train.index),g=2)
65 |
66 |
67 | x_test,ix = vectorize_dic({'users':test['user'].values,
68 | 'items':test['item'].values},ix,x_train.shape[1],n=len(test.index),g=2)
69 |
70 |
71 | print(x_train)
72 | y_train = train['rating'].values
73 | y_test = test['rating'].values
74 |
75 | x_train = x_train.todense()
76 | x_test = x_test.todense()
77 |
78 | print(x_train)
79 |
80 | print(x_train.shape)
81 | print (x_test.shape)
82 |
83 |
84 | n,p = x_train.shape
85 |
86 | k = 10
87 |
88 | x = tf.placeholder('float',[None,p])
89 |
90 | y = tf.placeholder('float',[None,1])
91 |
92 | w0 = tf.Variable(tf.zeros([1]))
93 | w = tf.Variable(tf.zeros([p]))
94 |
95 | v = tf.Variable(tf.random_normal([k,p],mean=0,stddev=0.01))
96 |
97 | #y_hat = tf.Variable(tf.zeros([n,1]))
98 |
99 | linear_terms = tf.add(w0,tf.reduce_sum(tf.multiply(w,x),1,keep_dims=True)) # n * 1
100 | pair_interactions = 0.5 * tf.reduce_sum(
101 | tf.subtract(
102 | tf.pow(
103 | tf.matmul(x,tf.transpose(v)),2),
104 | tf.matmul(tf.pow(x,2),tf.transpose(tf.pow(v,2)))
105 | ),axis = 1 , keep_dims=True)
106 |
107 |
108 | y_hat = tf.add(linear_terms,pair_interactions)
109 |
110 | lambda_w = tf.constant(0.001,name='lambda_w')
111 | lambda_v = tf.constant(0.001,name='lambda_v')
112 |
113 | l2_norm = tf.reduce_sum(
114 | tf.add(
115 | tf.multiply(lambda_w,tf.pow(w,2)),
116 | tf.multiply(lambda_v,tf.pow(v,2))
117 | )
118 | )
119 |
120 | error = tf.reduce_mean(tf.square(y-y_hat))
121 | loss = tf.add(error,l2_norm)
122 |
123 |
124 | train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
125 |
126 |
127 | epochs = 10
128 | batch_size = 1000
129 |
130 | # Launch the graph
131 | init = tf.global_variables_initializer()
132 | with tf.Session() as sess:
133 | sess.run(init)
134 |
135 | for epoch in tqdm(range(epochs), unit='epoch'):
136 | perm = np.random.permutation(x_train.shape[0])
137 | # iterate over batches
138 | for bX, bY in batcher(x_train[perm], y_train[perm], batch_size):
139 | _,t = sess.run([train_op,loss], feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)})
140 | print(t)
141 |
142 |
143 | errors = []
144 | for bX, bY in batcher(x_test, y_test):
145 | errors.append(sess.run(error, feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)}))
146 | print(errors)
147 | RMSE = np.sqrt(np.array(errors).mean())
148 | print (RMSE)
149 |
150 |
151 |
152 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/FNN_demo/FNN.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from const import *
4 | from utils import tf_estimator_model, add_layer_summary, build_estimator_helper
5 | from model.FNN.preprocess import build_features
6 |
7 | @tf_estimator_model
8 | def model_fn(features, labels, mode, params):
9 | feature_columns= build_features()
10 |
11 | input = tf.feature_column.input_layer(features, feature_columns)
12 |
13 | with tf.variable_scope('init_fm_embedding'):
14 | # method1: load from census_checkpoint directly
15 | embeddings = tf.Variable( tf.contrib.framework.load_variable(
16 | './census_checkpoint/FM',
17 | 'fm_interaction/v'
18 | ) )
19 | weight = tf.Variable( tf.contrib.framework.load_variable(
20 | './census_checkpoint/FM',
21 | 'linear/w'
22 | ) )
23 | dense = tf.add(tf.matmul(input, embeddings), tf.matmul(input, weight))
24 | add_layer_summary('input', dense)
25 |
26 | with tf.variable_scope( 'Dense' ):
27 | for i, unit in enumerate( params['hidden_units'] ):
28 | dense = tf.layers.dense( dense, units=unit, activation='relu', name='dense{}'.format( i ) )
29 | dense = tf.layers.batch_normalization( dense, center=True, scale=True, trainable=True,
30 | training=(mode == tf.estimator.ModeKeys.TRAIN) )
31 | dense = tf.layers.dropout( dense, rate=params['dropout_rate'],
32 | training=(mode == tf.estimator.ModeKeys.TRAIN) )
33 | add_layer_summary( dense.name, dense )
34 |
35 | with tf.variable_scope('output'):
36 | y = tf.layers.dense(dense, units= 1, name = 'output')
37 | tf.summary.histogram(y.name, y)
38 |
39 | return y
40 |
41 | build_estimator = build_estimator_helper(
42 | model_fn = {
43 | 'census':model_fn
44 | },
45 | params = {
46 | 'census': {
47 | 'dropout_rate':0.2,
48 | 'learning_rate': 0.01,
49 | 'hidden_units':[24,12,1]
50 | }
51 | }
52 | )
53 |
54 |
55 | # check name of all the tensor in the census_checkpoint
56 |
57 | if __name__ == '__main__':
58 | print ('checking name of all the tensor in the FNN pretrain census_checkpoint')
59 | from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
60 | latest_ckp = tf.train.latest_checkpoint('./census_checkpoint/FM')
61 | print_tensors_in_checkpoint_file( latest_ckp, all_tensors=True )
62 | print_tensors_in_checkpoint_file(latest_ckp, all_tensors=False, tensor_name='fm_interaction/v' )
--------------------------------------------------------------------------------
/ctr_of_recommendation/FNN_demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FNN_demo/__init__.py
--------------------------------------------------------------------------------
/ctr_of_recommendation/FNN_demo/preprocess.py:
--------------------------------------------------------------------------------
1 | from const import *
2 | import tensorflow as tf
3 |
4 | def build_features():
5 | f_sparse = []
6 |
7 | for col, config in EMB_CONFIGS.items():
8 | ind = tf.feature_column.categorical_column_with_hash_bucket(col, hash_bucket_size = config['hash_size'])
9 | one_hot = tf.feature_column.indicator_column(ind)
10 | f_sparse.append(one_hot)
11 |
12 | for col, config in BUCKET_CONFIGS.items():
13 | num = tf.feature_column.numeric_column( col )
14 | bucket = tf.feature_column.bucketized_column( num, boundaries = config['bin'] )
15 | f_sparse.append( bucket )
16 |
17 | return f_sparse
18 |
19 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/GBDT+LR-Demo/.idea/GBDT+LR-Demo.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/GBDT+LR-Demo/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/GBDT+LR-Demo/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/GBDT+LR-Demo/GBDT_LR.py:
--------------------------------------------------------------------------------
1 | import lightgbm as lgb
2 |
3 | import pandas as pd
4 | import numpy as np
5 |
6 | from sklearn.metrics import mean_squared_error
7 | from sklearn.linear_model import LogisticRegression
8 |
9 | print('Load data...')
10 | df_train = pd.read_csv('data/train.csv')
11 | df_test = pd.read_csv('data/test.csv')
12 |
13 | NUMERIC_COLS = [
14 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
15 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
16 | ]
17 |
18 | print(df_test.head(10))
19 |
20 | y_train = df_train['target'] # training label
21 | y_test = df_test['target'] # testing label
22 | X_train = df_train[NUMERIC_COLS] # training dataset
23 | X_test = df_test[NUMERIC_COLS] # testing dataset
24 |
25 | # create dataset for lightgbm
26 | lgb_train = lgb.Dataset(X_train, y_train)
27 | lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
28 |
29 | params = {
30 | 'task': 'train',
31 | 'boosting_type': 'gbdt',
32 | 'objective': 'binary',
33 | 'metric': {'binary_logloss'},
34 | 'num_leaves': 64,
35 | 'num_trees': 100,
36 | 'learning_rate': 0.01,
37 | 'feature_fraction': 0.9,
38 | 'bagging_fraction': 0.8,
39 | 'bagging_freq': 5,
40 | 'verbose': 0
41 | }
42 |
43 | # number of leaves,will be used in feature transformation
44 | num_leaf = 64
45 |
46 | print('Start training...')
47 | # train
48 | gbm = lgb.train(params,
49 | lgb_train,
50 | num_boost_round=100,
51 | valid_sets=lgb_train)
52 |
53 | print('Save model...')
54 | # save model to file
55 | gbm.save_model('model.txt')
56 |
57 | print('Start predicting...')
58 | # predict and get data on leaves, training data
59 | y_pred = gbm.predict(X_train, pred_leaf=True)
60 |
61 | print(np.array(y_pred).shape)
62 | print(y_pred[:10])
63 |
64 | print('Writing transformed training data')
65 | transformed_training_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf],
66 | dtype=np.int64) # N * num_tress * num_leafs
67 | for i in range(0, len(y_pred)):
68 | temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i])
69 | transformed_training_matrix[i][temp] += 1
70 |
71 |
72 | y_pred = gbm.predict(X_test, pred_leaf=True)
73 | print('Writing transformed testing data')
74 | transformed_testing_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf], dtype=np.int64)
75 | for i in range(0, len(y_pred)):
76 | temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i])
77 | transformed_testing_matrix[i][temp] += 1
78 |
79 |
80 | lm = LogisticRegression(penalty='l2',C=0.05) # logestic model construction
81 | lm.fit(transformed_training_matrix,y_train) # fitting the data
82 | y_pred_test = lm.predict_proba(transformed_testing_matrix) # Give the probabilty on each label
83 |
84 | print(y_pred_test)
85 |
86 | NE = (-1) / len(y_pred_test) * sum(((1+y_test)/2 * np.log(y_pred_test[:,1]) + (1-y_test)/2 * np.log(1 - y_pred_test[:,1])))
87 | print("Normalized Cross Entropy " + str(NE))
88 | # 此NE中只有分子没有分母所有较大
89 | # NE为2.22左右
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/Basic-MLR-Demo.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/__pycache__/data.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/MLR(LS-PLM)_Demo/__pycache__/data.cpython-37.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/data.py:
--------------------------------------------------------------------------------
1 | # 数据预处理
2 | '''
3 | 数据中存在连续特征和离散特征,所以先要对数据进行一个简单的处理,处理包括将离散特征转换为one-hot以及对连续特征进行标准化。
4 | 有一个需要注意的地方,训练集和测试集中离散特征出现的个数可能不一样,因此需要先将数据合并,然后转换成one-hot,最后再分开,
5 | '''
6 |
7 | from random import random
8 | import pandas as pd
9 | from sklearn.preprocessing import StandardScaler
10 |
11 | def get_data():
12 | train_data = pd.read_table("data/adult.data.txt",header=None,delimiter=',')
13 | test_data = pd.read_table("data/adult.test.txt",header=None,delimiter=',')
14 |
15 | all_columns = ['age','workclass','fnlwgt','education','education-num',
16 | 'marital-status','occupation','relationship','race','sex',
17 | 'capital-gain','capital-loss','hours-per-week','native-country','label','type']
18 |
19 | continus_columns = ['age','fnlwgt','education-num','capital-gain','capital-loss','hours-per-week']
20 | dummy_columns = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country']
21 |
22 | train_data['type'] = 1
23 | test_data['type'] = 2
24 |
25 | all_data = pd.concat([train_data,test_data],axis=0)
26 | all_data.columns = all_columns
27 |
28 | all_data = pd.get_dummies(all_data,columns=dummy_columns)
29 |
30 |
31 | test_data = all_data[all_data['type']==2].drop(['type'],axis=1)
32 | train_data = all_data[all_data['type']==1].drop(['type'],axis=1)
33 |
34 | train_data['label'] = train_data['label'].map(lambda x: 1 if x.strip() == '>50K' else 0)
35 | test_data['label'] = test_data['label'].map(lambda x: 1 if x.strip() == '>50K.' else 0)
36 |
37 |
38 | for col in continus_columns:
39 | ss = StandardScaler()
40 | train_data[col] = ss.fit_transform(train_data[[col]])
41 | test_data[col] = ss.transform(test_data[[col]])
42 |
43 |
44 | train_y = train_data['label']
45 | train_x = train_data.drop(['label'],axis=1)
46 | test_y = test_data['label']
47 | test_x = test_data.drop(['label'],axis=1)
48 |
49 | return train_x,train_y,test_x,test_y
50 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/data/adult:
--------------------------------------------------------------------------------
1 | Index of adult
2 |
3 | 02 Dec 1996 140 Index
4 | 10 Aug 1996 3974305 adult.data
5 | 10 Aug 1996 4267 adult.names
6 | 10 Aug 1996 2003153 adult.test
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/data/adult.names:
--------------------------------------------------------------------------------
1 | | This data was extracted from the census bureau database found at
2 | | http://www.census.gov/ftp/pub/DES/www/welcome.html
3 | | Donor: Ronny Kohavi and Barry Becker,
4 | | Data Mining and Visualization
5 | | Silicon Graphics.
6 | | e-mail: ronnyk@sgi.com for questions.
7 | | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random).
8 | | 48842 instances, mix of continuous and discrete (train=32561, test=16281)
9 | | 45222 if instances with unknown values are removed (train=30162, test=15060)
10 | | Duplicate or conflicting instances : 6
11 | | Class probabilities for adult.all file
12 | | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns)
13 | | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns)
14 | |
15 | | Extraction was done by Barry Becker from the 1994 Census database. A set of
16 | | reasonably clean records was extracted using the following conditions:
17 | | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))
18 | |
19 | | Prediction task is to determine whether a person makes over 50K
20 | | a year.
21 | |
22 | | First cited in:
23 | | @inproceedings{kohavi-nbtree,
24 | | author={Ron Kohavi},
25 | | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a
26 | | Decision-Tree Hybrid},
27 | | booktitle={Proceedings of the Second International Conference on
28 | | Knowledge Discovery and Data Mining},
29 | | year = 1996,
30 | | pages={to appear}}
31 | |
32 | | Error Accuracy reported as follows, after removal of unknowns from
33 | | train/test sets):
34 | | C4.5 : 84.46+-0.30
35 | | Naive-Bayes: 83.88+-0.30
36 | | NBTree : 85.90+-0.28
37 | |
38 | |
39 | | Following algorithms were later run with the following error rates,
40 | | all after removal of unknowns and using the original train/test split.
41 | | All these numbers are straight runs using MLC++ with default values.
42 | |
43 | | Algorithm Error
44 | | -- ---------------- -----
45 | | 1 C4.5 15.54
46 | | 2 C4.5-auto 14.46
47 | | 3 C4.5 rules 14.94
48 | | 4 Voted ID3 (0.6) 15.64
49 | | 5 Voted ID3 (0.8) 16.47
50 | | 6 T2 16.84
51 | | 7 1R 19.54
52 | | 8 NBTree 14.10
53 | | 9 CN2 16.00
54 | | 10 HOODG 14.82
55 | | 11 FSS Naive Bayes 14.05
56 | | 12 IDTM (Decision table) 14.46
57 | | 13 Naive-Bayes 16.12
58 | | 14 Nearest-neighbor (1) 21.42
59 | | 15 Nearest-neighbor (3) 20.35
60 | | 16 OC1 15.04
61 | | 17 Pebls Crashed. Unknown why (bounds WERE increased)
62 | |
63 | | Conversion of original data as follows:
64 | | 1. Discretized agrossincome into two ranges with threshold 50,000.
65 | | 2. Convert U.S. to US to avoid periods.
66 | | 3. Convert Unknown to "?"
67 | | 4. Run MLC++ GenCVFiles to generate data,test.
68 | |
69 | | Description of fnlwgt (final weight)
70 | |
71 | | The weights on the CPS files are controlled to independent estimates of the
72 | | civilian noninstitutional population of the US. These are prepared monthly
73 | | for us by Population Division here at the Census Bureau. We use 3 sets of
74 | | controls.
75 | | These are:
76 | | 1. A single cell estimate of the population 16+ for each state.
77 | | 2. Controls for Hispanic Origin by age and sex.
78 | | 3. Controls by Race, age and sex.
79 | |
80 | | We use all three sets of controls in our weighting program and "rake" through
81 | | them 6 times so that by the end we come back to all the controls we used.
82 | |
83 | | The term estimate refers to population totals derived from CPS by creating
84 | | "weighted tallies" of any specified socio-economic characteristics of the
85 | | population.
86 | |
87 | | People with similar demographic characteristics should have
88 | | similar weights. There is one important caveat to remember
89 | | about this statement. That is that since the CPS sample is
90 | | actually a collection of 51 state samples, each with its own
91 | | probability of selection, the statement only applies within
92 | | state.
93 |
94 |
95 | >50K, <=50K.
96 |
97 | age: continuous.
98 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.
99 | fnlwgt: continuous.
100 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.
101 | education-num: continuous.
102 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.
103 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.
104 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
105 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
106 | sex: Female, Male.
107 | capital-gain: continuous.
108 | capital-loss: continuous.
109 | hours-per-week: continuous.
110 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
111 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/data/old.adult.names:
--------------------------------------------------------------------------------
1 | 1. Title of Database: adult
2 | 2. Sources:
3 | (a) Original owners of database (name/phone/snail address/email address)
4 | US Census Bureau.
5 | (b) Donor of database (name/phone/snail address/email address)
6 | Ronny Kohavi and Barry Becker,
7 | Data Mining and Visualization
8 | Silicon Graphics.
9 | e-mail: ronnyk@sgi.com
10 | (c) Date received (databases may change over time without name change!)
11 | 05/19/96
12 | 3. Past Usage:
13 | (a) Complete reference of article where it was described/used
14 | @inproceedings{kohavi-nbtree,
15 | author={Ron Kohavi},
16 | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a
17 | Decision-Tree Hybrid},
18 | booktitle={Proceedings of the Second International Conference on
19 | Knowledge Discovery and Data Mining},
20 | year = 1996,
21 | pages={to appear}}
22 | (b) Indication of what attribute(s) were being predicted
23 | Salary greater or less than 50,000.
24 | (b) Indication of study's results (i.e. Is it a good domain to use?)
25 | Hard domain with a nice number of records.
26 | The following results obtained using MLC++ with default settings
27 | for the algorithms mentioned below.
28 |
29 | Algorithm Error
30 | -- ---------------- -----
31 | 1 C4.5 15.54
32 | 2 C4.5-auto 14.46
33 | 3 C4.5 rules 14.94
34 | 4 Voted ID3 (0.6) 15.64
35 | 5 Voted ID3 (0.8) 16.47
36 | 6 T2 16.84
37 | 7 1R 19.54
38 | 8 NBTree 14.10
39 | 9 CN2 16.00
40 | 10 HOODG 14.82
41 | 11 FSS Naive Bayes 14.05
42 | 12 IDTM (Decision table) 14.46
43 | 13 Naive-Bayes 16.12
44 | 14 Nearest-neighbor (1) 21.42
45 | 15 Nearest-neighbor (3) 20.35
46 | 16 OC1 15.04
47 | 17 Pebls Crashed. Unknown why (bounds WERE increased)
48 |
49 | 4. Relevant Information Paragraph:
50 | Extraction was done by Barry Becker from the 1994 Census database. A set
51 | of reasonably clean records was extracted using the following conditions:
52 | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))
53 |
54 | 5. Number of Instances
55 | 48842 instances, mix of continuous and discrete (train=32561, test=16281)
56 | 45222 if instances with unknown values are removed (train=30162, test=15060)
57 | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random).
58 |
59 | 6. Number of Attributes
60 | 6 continuous, 8 nominal attributes.
61 |
62 | 7. Attribute Information:
63 |
64 | age: continuous.
65 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.
66 | fnlwgt: continuous.
67 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.
68 | education-num: continuous.
69 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.
70 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.
71 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
72 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
73 | sex: Female, Male.
74 | capital-gain: continuous.
75 | capital-loss: continuous.
76 | hours-per-week: continuous.
77 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
78 | class: >50K, <=50K
79 |
80 | 8. Missing Attribute Values:
81 |
82 | 7% have missing values.
83 |
84 | 9. Class Distribution:
85 |
86 | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns)
87 | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns)
88 |
89 |
90 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/lr.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import time
3 | from sklearn.metrics import roc_auc_score
4 | from data import get_data
5 | import pandas as pd
6 |
7 |
8 |
9 | x=tf.placeholder(tf.float32,shape=[None,108])
10 | y=tf.placeholder(tf.float32,shape=[None])
11 |
12 | m=1
13 | learning_rate=0.3
14 | w=tf.Variable(tf.random_normal([108,m], 0.0, 0.5),name='u')
15 |
16 |
17 | W=tf.matmul(x,w)
18 | p2=tf.reduce_sum(tf.nn.sigmoid(W),1)
19 |
20 | pred=p2
21 |
22 | cost1=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
23 |
24 | cost=tf.add_n([cost1])
25 | train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost)
26 |
27 |
28 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
29 | sess = tf.Session()
30 | sess.run(init_op)
31 | train_x,train_y,test_x,test_y = get_data()
32 |
33 | result = []
34 | time_s=time.time()
35 | for epoch in range(0,10000):
36 | f_dict = {x: train_x, y: train_y}
37 | _, cost_, predict_= sess.run([train_op, cost, pred],feed_dict=f_dict)
38 | auc=roc_auc_score(train_y, predict_)
39 | time_t=time.time()
40 | if epoch % 100 == 0:
41 | f_dict = {x: test_x, y: test_y}
42 | _, cost_, predict_test = sess.run([train_op, cost, pred], feed_dict=f_dict)
43 | test_auc = roc_auc_score(test_y, predict_test)
44 | print("%d %ld cost:%f,train_auc:%f,test_auc:%f" % (epoch, (time_t-time_s),cost_,auc,test_auc))
45 | result.append([epoch, (time_t - time_s), auc, test_auc])
46 |
47 | pd.DataFrame(result, columns=['epoch', 'time', 'train_auc', 'test_auc']).to_csv("data/lr.csv")
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/mlr.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import time
3 | from sklearn.metrics import roc_auc_score
4 | from data import get_data
5 | import pandas as pd
6 |
7 |
8 | x = tf.placeholder(tf.float32,shape=[None,108])
9 | y = tf.placeholder(tf.float32,shape=[None])
10 |
11 |
12 | m = 2
13 | learning_rate = 0.3
14 | u = tf.Variable(tf.random_normal([108,m],0.0,0.5),name='u')
15 | w = tf.Variable(tf.random_normal([108,m],0.0,0.5),name='w')
16 |
17 | U = tf.matmul(x,u)
18 | p1 = tf.nn.softmax(U)
19 |
20 | W = tf.matmul(x,w)
21 | p2 = tf.nn.sigmoid(W)
22 |
23 | pred = tf.reduce_sum(tf.multiply(p1,p2),1)
24 |
25 | cost1=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
26 | cost=tf.add_n([cost1])
27 | train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost)
28 | train_x,train_y,test_x,test_y = get_data()
29 | time_s=time.time()
30 | result = []
31 | with tf.Session() as sess:
32 | sess.run(tf.global_variables_initializer())
33 | for epoch in range(0, 10000):
34 | f_dict = {x: train_x, y: train_y}
35 |
36 | _, cost_, predict_ = sess.run([train_op, cost, pred], feed_dict=f_dict)
37 |
38 | auc = roc_auc_score(train_y, predict_)
39 | time_t = time.time()
40 | if epoch % 100 == 0:
41 | f_dict = {x: test_x, y: test_y}
42 | _, cost_, predict_test = sess.run([train_op, cost, pred], feed_dict=f_dict)
43 | test_auc = roc_auc_score(test_y, predict_test)
44 | print("%d %ld cost:%f,train_auc:%f,test_auc:%f" % (epoch, (time_t - time_s), cost_, auc, test_auc))
45 | result.append([epoch,(time_t - time_s),auc,test_auc])
46 |
47 | pd.DataFrame(result,columns=['epoch','time','train_auc','test_auc']).to_csv("data/mlr_"+str(m)+'.csv')
--------------------------------------------------------------------------------
/ctr_of_recommendation/MLR(LS-PLM)_Demo/plotResult.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import pandas as pd
3 |
4 | mlr5 = pd.read_csv("data/mlr_5.csv",index_col=0)
5 |
6 | mlr10 = pd.read_csv("data/mlr_10.csv",index_col=0)
7 |
8 | mlr15 = pd.read_csv("data/mlr_15.csv",index_col=0)
9 |
10 | mlr20 = pd.read_csv("data/mlr_20.csv",index_col=0)
11 |
12 | lr = pd.read_csv("data/lr.csv",index_col=0)
13 |
14 |
15 |
16 |
17 | epoch = mlr5['epoch']
18 | train_auc5 = mlr5['test_auc']
19 | train_auc10 = mlr10['test_auc']
20 | train_auc15 = mlr15['test_auc']
21 | train_auc20 = mlr20['test_auc']
22 | train_auclr = lr['train_auc']
23 |
24 | l1,= plt.plot(epoch,train_auc5,label='mlr-5')
25 | l2,= plt.plot(epoch,train_auc10,label='mlr-10')
26 | l3, = plt.plot(epoch,train_auc15,label='mlr-15')
27 | l4, = plt.plot(epoch,train_auc20,label='mlr-20')
28 | l5, = plt.plot(epoch,train_auclr,label='lr')
29 | plt.xlabel('epoch')
30 | plt.ylabel('auc')
31 | plt.title('mlr,lr test_auc')
32 | plt.grid()
33 | plt.legend(handles = [l1, l2,l3,l4,l5], labels = ['mlr-5', 'mlr-10','mlr-15','mlr-20','lr'], loc = 'best')
34 | plt.savefig('data/test_zhexian.png')
35 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/DataReader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | class FeatureDictionary(object):
4 | def __init__(self,trainfile=None,testfile=None,
5 | dfTrain=None,dfTest=None,numeric_cols=[],
6 | ignore_cols=[]):
7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set"
8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set"
9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set"
10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set"
11 |
12 | self.trainfile = trainfile
13 | self.testfile = testfile
14 | self.dfTrain = dfTrain
15 | self.dfTest = dfTest
16 | self.numeric_cols = numeric_cols
17 | self.ignore_cols = ignore_cols
18 | self.gen_feat_dict()
19 |
20 |
21 |
22 |
23 | def gen_feat_dict(self):
24 | if self.dfTrain is None:
25 | dfTrain = pd.read_csv(self.trainfile)
26 |
27 | else:
28 | dfTrain = self.dfTrain
29 |
30 | if self.dfTest is None:
31 | dfTest = pd.read_csv(self.testfile)
32 |
33 | else:
34 | dfTest = self.dfTest
35 |
36 | df = pd.concat([dfTrain,dfTest])
37 |
38 | self.feat_dict = {}
39 | tc = 0
40 | for col in df.columns:
41 | if col in self.ignore_cols:
42 | continue
43 | if col in self.numeric_cols:
44 | self.feat_dict[col] = tc
45 | tc += 1
46 |
47 | else:
48 | us = df[col].unique()
49 | print(us)
50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc)))
51 | tc += len(us)
52 |
53 | self.feat_dim = tc
54 |
55 |
56 | class DataParser(object):
57 | def __init__(self,feat_dict):
58 | self.feat_dict = feat_dict
59 |
60 | def parse(self,infile=None,df=None,has_label=False):
61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set"
62 | assert not ((infile is not None) and (df is not None)), "only one can be set"
63 |
64 |
65 | if infile is None:
66 | dfi = df.copy()
67 | else:
68 | dfi = pd.read_csv(infile)
69 |
70 | if has_label:
71 | y = dfi['target'].values.tolist()
72 | dfi.drop(['id','target'],axis=1,inplace=True)
73 | else:
74 | ids = dfi['id'].values.tolist()
75 | dfi.drop(['id'],axis=1,inplace=True)
76 | # dfi for feature index
77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
78 | dfv = dfi.copy()
79 | for col in dfi.columns:
80 | if col in self.feat_dict.ignore_cols:
81 | dfi.drop(col,axis=1,inplace=True)
82 | dfv.drop(col,axis=1,inplace=True)
83 | continue
84 | if col in self.feat_dict.numeric_cols:
85 | dfi[col] = self.feat_dict.feat_dict[col]
86 | else:
87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
88 | dfv[col] = 1.
89 |
90 | xi = dfi.values.tolist()
91 | xv = dfv.values.tolist()
92 |
93 | if has_label:
94 | return xi,xv,y
95 | else:
96 | return xi,xv,ids
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/__pycache__/DataReader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/DataReader.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/__pycache__/NFM.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/NFM.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_FILE = "data/train.csv"
2 | TEST_FILE = "data/test.csv"
3 |
4 | SUB_DIR = "output"
5 |
6 |
7 | NUM_SPLITS = 3
8 | RANDOM_SEED = 2017
9 |
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 | # 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 |
19 | NUMERIC_COLS = [
20 | # # binary
21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 | # "ps_ind_17_bin", "ps_ind_18_bin",
25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 | # numeric
28 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 |
31 | # feature engineering
32 | "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 |
35 | IGNORE_COLS = [
36 | "id", "target",
37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 | "ps_calc_13", "ps_calc_14",
41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/NFM_Demo/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import tensorflow as tf
5 | from sklearn.metrics import make_scorer
6 | from sklearn.model_selection import StratifiedKFold
7 | from DataReader import FeatureDictionary, DataParser
8 | from matplotlib import pyplot as plt
9 | import config
10 | from NFM import NFM
11 |
12 | def load_data():
13 | dfTrain = pd.read_csv(config.TRAIN_FILE)
14 | dfTest = pd.read_csv(config.TEST_FILE)
15 |
16 | def preprocess(df):
17 | cols = [c for c in df.columns if c not in ['id','target']]
18 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1)
19 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
20 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']
21 | return df
22 |
23 | dfTrain = preprocess(dfTrain)
24 | dfTest = preprocess(dfTest)
25 |
26 | cols = [c for c in dfTrain.columns if c not in ['id','target']]
27 | cols = [c for c in cols if (not c in config.IGNORE_COLS)]
28 |
29 | X_train = dfTrain[cols].values
30 | y_train = dfTrain['target'].values
31 |
32 | X_test = dfTest[cols].values
33 | ids_test = dfTest['id'].values
34 |
35 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
36 |
37 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices
38 |
39 | def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params):
40 | fd = FeatureDictionary(dfTrain=dfTrain,
41 | dfTest=dfTest,
42 | numeric_cols=config.NUMERIC_COLS,
43 | ignore_cols = config.IGNORE_COLS)
44 | data_parser = DataParser(feat_dict= fd)
45 | # Xi_train :列的序号
46 | # Xv_train :列的对应的值
47 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
48 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)
49 |
50 | print(dfTrain.dtypes)
51 |
52 | pnn_params['feature_size'] = fd.feat_dim
53 | pnn_params['field_size'] = len(Xi_train[0])
54 |
55 |
56 | _get = lambda x,l:[x[i] for i in l]
57 |
58 |
59 |
60 | for i, (train_idx, valid_idx) in enumerate(folds):
61 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
62 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
63 |
64 | nfm = NFM(**pnn_params)
65 | nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | pnn_params = {
74 | "embedding_size":8,
75 | "deep_layers":[32,32],
76 | "dropout_deep":[0.5,0.5,0.5],
77 | "deep_layer_activation":tf.nn.relu,
78 | "epoch":30,
79 | "batch_size":1024,
80 | "learning_rate":0.001,
81 | "optimizer":"adam",
82 | "batch_norm":1,
83 | "batch_norm_decay":0.995,
84 | "verbose":True,
85 | "random_seed":config.RANDOM_SEED,
86 | "deep_init_size":50,
87 | "use_inner":False
88 |
89 | }
90 |
91 | # load data
92 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data()
93 |
94 | # folds
95 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,
96 | random_state=config.RANDOM_SEED).split(X_train, y_train))
97 |
98 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params)
99 | y_train_pnn, y_test_pnn = run_base_model_nfm(dfTrain, dfTest, folds, pnn_params)
100 |
101 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/.idea/Basic-PNN-Demo.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/DataReader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | class FeatureDictionary(object):
4 | def __init__(self,trainfile=None,testfile=None,
5 | dfTrain=None,dfTest=None,numeric_cols=[],
6 | ignore_cols=[]):
7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set"
8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set"
9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set"
10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set"
11 |
12 | self.trainfile = trainfile
13 | self.testfile = testfile
14 | self.dfTrain = dfTrain
15 | self.dfTest = dfTest
16 | self.numeric_cols = numeric_cols
17 | self.ignore_cols = ignore_cols
18 | self.gen_feat_dict()
19 |
20 |
21 |
22 |
23 | def gen_feat_dict(self):
24 | if self.dfTrain is None:
25 | dfTrain = pd.read_csv(self.trainfile)
26 |
27 | else:
28 | dfTrain = self.dfTrain
29 |
30 | if self.dfTest is None:
31 | dfTest = pd.read_csv(self.testfile)
32 |
33 | else:
34 | dfTest = self.dfTest
35 |
36 | df = pd.concat([dfTrain,dfTest])
37 |
38 | self.feat_dict = {}
39 | tc = 0
40 | for col in df.columns:
41 | if col in self.ignore_cols:
42 | continue
43 | if col in self.numeric_cols:
44 | self.feat_dict[col] = tc
45 | tc += 1
46 |
47 | else:
48 | us = df[col].unique()
49 | print(us)
50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc)))
51 | tc += len(us)
52 |
53 | self.feat_dim = tc
54 |
55 |
56 | class DataParser(object):
57 | def __init__(self,feat_dict):
58 | self.feat_dict = feat_dict
59 |
60 | def parse(self,infile=None,df=None,has_label=False):
61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set"
62 | assert not ((infile is not None) and (df is not None)), "only one can be set"
63 |
64 |
65 | if infile is None:
66 | dfi = df.copy()
67 | else:
68 | dfi = pd.read_csv(infile)
69 |
70 | if has_label:
71 | y = dfi['target'].values.tolist()
72 | dfi.drop(['id','target'],axis=1,inplace=True)
73 | else:
74 | ids = dfi['id'].values.tolist()
75 | dfi.drop(['id'],axis=1,inplace=True)
76 | # dfi for feature index
77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24)
78 | dfv = dfi.copy()
79 | for col in dfi.columns:
80 | if col in self.feat_dict.ignore_cols:
81 | dfi.drop(col,axis=1,inplace=True)
82 | dfv.drop(col,axis=1,inplace=True)
83 | continue
84 | if col in self.feat_dict.numeric_cols:
85 | dfi[col] = self.feat_dict.feat_dict[col]
86 | else:
87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
88 | dfv[col] = 1.
89 |
90 | xi = dfi.values.tolist()
91 | xv = dfv.values.tolist()
92 |
93 | if has_label:
94 | return xi,xv,y
95 | else:
96 | return xi,xv,ids
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-37.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-37.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-37.pyc
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_FILE = "data/train.csv"
2 | TEST_FILE = "data/test.csv"
3 |
4 | SUB_DIR = "output"
5 |
6 |
7 | NUM_SPLITS = 3
8 | RANDOM_SEED = 2017
9 |
10 | # types of columns of the dataset dataframe
11 | CATEGORICAL_COLS = [
12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
16 | # 'ps_car_10_cat', 'ps_car_11_cat',
17 | ]
18 |
19 | NUMERIC_COLS = [
20 | # # binary
21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
24 | # "ps_ind_17_bin", "ps_ind_18_bin",
25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
27 | # numeric
28 | "ps_reg_01", "ps_reg_02", "ps_reg_03",
29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
30 |
31 | # feature engineering
32 | "missing_feat", "ps_car_13_x_ps_reg_03",
33 | ]
34 |
35 | IGNORE_COLS = [
36 | "id", "target",
37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
40 | "ps_calc_13", "ps_calc_14",
41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
43 | ]
44 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import tensorflow as tf
5 | from sklearn.metrics import make_scorer
6 | from sklearn.model_selection import StratifiedKFold
7 | from DataReader import FeatureDictionary, DataParser
8 | from matplotlib import pyplot as plt
9 |
10 | import config
11 | from PNN import PNN
12 |
13 | def load_data():
14 | dfTrain = pd.read_csv(config.TRAIN_FILE)
15 | dfTest = pd.read_csv(config.TEST_FILE)
16 |
17 | def preprocess(df):
18 | cols = [c for c in df.columns if c not in ['id','target']]
19 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1)
20 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
21 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']
22 | return df
23 |
24 | dfTrain = preprocess(dfTrain)
25 | dfTest = preprocess(dfTest)
26 |
27 | cols = [c for c in dfTrain.columns if c not in ['id','target']]
28 | cols = [c for c in cols if (not c in config.IGNORE_COLS)]
29 |
30 | X_train = dfTrain[cols].values
31 | y_train = dfTrain['target'].values
32 |
33 | X_test = dfTest[cols].values
34 | ids_test = dfTest['id'].values
35 |
36 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
37 |
38 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices
39 |
40 | def run_base_model_pnn(dfTrain,dfTest,folds,pnn_params):
41 | fd = FeatureDictionary(dfTrain=dfTrain,
42 | dfTest=dfTest,
43 | numeric_cols=config.NUMERIC_COLS,
44 | ignore_cols = config.IGNORE_COLS)
45 | data_parser = DataParser(feat_dict= fd)
46 | # Xi_train :列的序号
47 | # Xv_train :列的对应的值
48 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
49 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)
50 |
51 | print(dfTrain.dtypes)
52 |
53 | pnn_params['feature_size'] = fd.feat_dim
54 | pnn_params['field_size'] = len(Xi_train[0])
55 |
56 |
57 | _get = lambda x,l:[x[i] for i in l]
58 |
59 |
60 |
61 | for i, (train_idx, valid_idx) in enumerate(folds):
62 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
63 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
64 |
65 | pnn = PNN(**pnn_params)
66 | pnn.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | pnn_params = {
75 | "embedding_size":8,
76 | "deep_layers":[32,32],
77 | "dropout_deep":[0.5,0.5,0.5],
78 | "deep_layer_activation":tf.nn.relu,
79 | "epoch":30,
80 | "batch_size":1024,
81 | "learning_rate":0.001,
82 | "optimizer":"adam",
83 | "batch_norm":1,
84 | "batch_norm_decay":0.995,
85 | "verbose":True,
86 | "random_seed":config.RANDOM_SEED,
87 | "deep_init_size":50,
88 | "use_inner":False
89 |
90 | }
91 |
92 | # load data
93 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data()
94 |
95 | # folds
96 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,
97 | random_state=config.RANDOM_SEED).split(X_train, y_train))
98 |
99 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params)
100 | y_train_pnn, y_test_pnn = run_base_model_pnn(dfTrain, dfTest, folds, pnn_params)
101 |
102 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/PNN_Demo/sfsfs.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 |
5 |
6 | t1 = tf.convert_to_tensor([[2,2],
7 | [2,3]])
8 |
9 | t1_1 = tf.reshape(t1,shape=[2,2,1])
10 | t1_2 = tf.reshape(t1,shape=[2,1,2])
11 |
12 | t = tf.matmul(t1_1,t1_2)
13 |
14 | with tf.Session() as sess:
15 | print(sess.run(t))
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/.gitignore:
--------------------------------------------------------------------------------
1 | model/
2 | data/adult.data
3 | data/adult.test
4 | /absl_example.py
5 | /origin_wide_deep.py
6 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/.idea/Wide&Deep.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/README.md:
--------------------------------------------------------------------------------
1 | # 所用数据集
2 | 预测某人年收入超过5万美元的概率。
3 |
4 | 下载: `python data_download.py`
5 |
6 | # Wide Linear Model
7 | `wide_component.py`
8 |
9 | # Wide & Deep Model
10 | `wide_deep.py`
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/data/Index:
--------------------------------------------------------------------------------
1 | Index of adult
2 |
3 | 02 Dec 1996 140 Index
4 | 10 Aug 1996 3974305 adult.data
5 | 10 Aug 1996 4267 adult.names
6 | 10 Aug 1996 2003153 adult.test
7 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/data/adult.names:
--------------------------------------------------------------------------------
1 | | This data was extracted from the census bureau database found at
2 | | http://www.census.gov/ftp/pub/DES/www/welcome.html
3 | | Donor: Ronny Kohavi and Barry Becker,
4 | | Data Mining and Visualization
5 | | Silicon Graphics.
6 | | e-mail: ronnyk@sgi.com for questions.
7 | | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random).
8 | | 48842 instances, mix of continuous and discrete (train=32561, test=16281)
9 | | 45222 if instances with unknown values are removed (train=30162, test=15060)
10 | | Duplicate or conflicting instances : 6
11 | | Class probabilities for adult.all file
12 | | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns)
13 | | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns)
14 | |
15 | | Extraction was done by Barry Becker from the 1994 Census database. A set of
16 | | reasonably clean records was extracted using the following conditions:
17 | | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))
18 | |
19 | | Prediction task is to determine whether a person makes over 50K
20 | | a year.
21 | |
22 | | First cited in:
23 | | @inproceedings{kohavi-nbtree,
24 | | author={Ron Kohavi},
25 | | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a
26 | | Decision-Tree Hybrid},
27 | | booktitle={Proceedings of the Second International Conference on
28 | | Knowledge Discovery and Data Mining},
29 | | year = 1996,
30 | | pages={to appear}}
31 | |
32 | | Error Accuracy reported as follows, after removal of unknowns from
33 | | train/test sets):
34 | | C4.5 : 84.46+-0.30
35 | | Naive-Bayes: 83.88+-0.30
36 | | NBTree : 85.90+-0.28
37 | |
38 | |
39 | | Following algorithms were later run with the following error rates,
40 | | all after removal of unknowns and using the original train/test split.
41 | | All these numbers are straight runs using MLC++ with default values.
42 | |
43 | | Algorithm Error
44 | | -- ---------------- -----
45 | | 1 C4.5 15.54
46 | | 2 C4.5-auto 14.46
47 | | 3 C4.5 rules 14.94
48 | | 4 Voted ID3 (0.6) 15.64
49 | | 5 Voted ID3 (0.8) 16.47
50 | | 6 T2 16.84
51 | | 7 1R 19.54
52 | | 8 NBTree 14.10
53 | | 9 CN2 16.00
54 | | 10 HOODG 14.82
55 | | 11 FSS Naive Bayes 14.05
56 | | 12 IDTM (Decision table) 14.46
57 | | 13 Naive-Bayes 16.12
58 | | 14 Nearest-neighbor (1) 21.42
59 | | 15 Nearest-neighbor (3) 20.35
60 | | 16 OC1 15.04
61 | | 17 Pebls Crashed. Unknown why (bounds WERE increased)
62 | |
63 | | Conversion of original data as follows:
64 | | 1. Discretized agrossincome into two ranges with threshold 50,000.
65 | | 2. Convert U.S. to US to avoid periods.
66 | | 3. Convert Unknown to "?"
67 | | 4. Run MLC++ GenCVFiles to generate data,test.
68 | |
69 | | Description of fnlwgt (final weight)
70 | |
71 | | The weights on the CPS files are controlled to independent estimates of the
72 | | civilian noninstitutional population of the US. These are prepared monthly
73 | | for us by Population Division here at the Census Bureau. We use 3 sets of
74 | | controls.
75 | | These are:
76 | | 1. A single cell estimate of the population 16+ for each state.
77 | | 2. Controls for Hispanic Origin by age and sex.
78 | | 3. Controls by Race, age and sex.
79 | |
80 | | We use all three sets of controls in our weighting program and "rake" through
81 | | them 6 times so that by the end we come back to all the controls we used.
82 | |
83 | | The term estimate refers to population totals derived from CPS by creating
84 | | "weighted tallies" of any specified socio-economic characteristics of the
85 | | population.
86 | |
87 | | People with similar demographic characteristics should have
88 | | similar weights. There is one important caveat to remember
89 | | about this statement. That is that since the CPS sample is
90 | | actually a collection of 51 state samples, each with its own
91 | | probability of selection, the statement only applies within
92 | | state.
93 |
94 |
95 | >50K, <=50K.
96 |
97 | age: continuous.
98 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.
99 | fnlwgt: continuous.
100 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.
101 | education-num: continuous.
102 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.
103 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.
104 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
105 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
106 | sex: Female, Male.
107 | capital-gain: continuous.
108 | capital-loss: continuous.
109 | hours-per-week: continuous.
110 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
111 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/data/data_download.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Download and clean the Census Income Dataset."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import argparse
22 | import os
23 | import sys
24 |
25 | from six.moves import urllib
26 | import tensorflow as tf
27 |
28 | DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult'
29 | TRAINING_FILE = 'adult.data'
30 | TRAINING_URL = '%s/%s' % (DATA_URL, TRAINING_FILE)
31 | EVAL_FILE = 'adult.test'
32 | EVAL_URL = '%s/%s' % (DATA_URL, EVAL_FILE)
33 |
34 | parser = argparse.ArgumentParser()
35 |
36 | parser.add_argument(
37 | '--data_dir', type=str, default='./',
38 | help='Directory to download census data')
39 |
40 |
41 | def _download_and_clean_file(filename, url):
42 | """Downloads data from url, and makes changes to match the CSV format."""
43 | temp_file, _ = urllib.request.urlretrieve(url)
44 | with tf.gfile.Open(temp_file, 'r') as temp_eval_file:
45 | with tf.gfile.Open(filename, 'w') as eval_file:
46 | for line in temp_eval_file:
47 | line = line.strip()
48 | line = line.replace(', ', ',')
49 | if not line or ',' not in line:
50 | continue
51 | if line[-1] == '.':
52 | line = line[:-1]
53 | line += '\n'
54 | eval_file.write(line)
55 | tf.gfile.Remove(temp_file)
56 |
57 |
58 | def main(_):
59 | if not tf.gfile.Exists(FLAGS.data_dir):
60 | tf.gfile.MkDir(FLAGS.data_dir)
61 |
62 | training_file_path = os.path.join(FLAGS.data_dir, TRAINING_FILE)
63 | _download_and_clean_file(training_file_path, TRAINING_URL)
64 |
65 | eval_file_path = os.path.join(FLAGS.data_dir, EVAL_FILE)
66 | _download_and_clean_file(eval_file_path, EVAL_URL)
67 |
68 |
69 | if __name__ == '__main__':
70 | FLAGS, unparsed = parser.parse_known_args()
71 | tf.app.run(argv=[sys.argv[0]] + unparsed)
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/data/old.adult.names:
--------------------------------------------------------------------------------
1 | 1. Title of Database: adult
2 | 2. Sources:
3 | (a) Original owners of database (name/phone/snail address/email address)
4 | US Census Bureau.
5 | (b) Donor of database (name/phone/snail address/email address)
6 | Ronny Kohavi and Barry Becker,
7 | Data Mining and Visualization
8 | Silicon Graphics.
9 | e-mail: ronnyk@sgi.com
10 | (c) Date received (databases may change over time without name change!)
11 | 05/19/96
12 | 3. Past Usage:
13 | (a) Complete reference of article where it was described/used
14 | @inproceedings{kohavi-nbtree,
15 | author={Ron Kohavi},
16 | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a
17 | Decision-Tree Hybrid},
18 | booktitle={Proceedings of the Second International Conference on
19 | Knowledge Discovery and Data Mining},
20 | year = 1996,
21 | pages={to appear}}
22 | (b) Indication of what attribute(s) were being predicted
23 | Salary greater or less than 50,000.
24 | (b) Indication of study's results (i.e. Is it a good domain to use?)
25 | Hard domain with a nice number of records.
26 | The following results obtained using MLC++ with default settings
27 | for the algorithms mentioned below.
28 |
29 | Algorithm Error
30 | -- ---------------- -----
31 | 1 C4.5 15.54
32 | 2 C4.5-auto 14.46
33 | 3 C4.5 rules 14.94
34 | 4 Voted ID3 (0.6) 15.64
35 | 5 Voted ID3 (0.8) 16.47
36 | 6 T2 16.84
37 | 7 1R 19.54
38 | 8 NBTree 14.10
39 | 9 CN2 16.00
40 | 10 HOODG 14.82
41 | 11 FSS Naive Bayes 14.05
42 | 12 IDTM (Decision table) 14.46
43 | 13 Naive-Bayes 16.12
44 | 14 Nearest-neighbor (1) 21.42
45 | 15 Nearest-neighbor (3) 20.35
46 | 16 OC1 15.04
47 | 17 Pebls Crashed. Unknown why (bounds WERE increased)
48 |
49 | 4. Relevant Information Paragraph:
50 | Extraction was done by Barry Becker from the 1994 Census database. A set
51 | of reasonably clean records was extracted using the following conditions:
52 | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))
53 |
54 | 5. Number of Instances
55 | 48842 instances, mix of continuous and discrete (train=32561, test=16281)
56 | 45222 if instances with unknown values are removed (train=30162, test=15060)
57 | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random).
58 |
59 | 6. Number of Attributes
60 | 6 continuous, 8 nominal attributes.
61 |
62 | 7. Attribute Information:
63 |
64 | age: continuous.
65 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.
66 | fnlwgt: continuous.
67 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.
68 | education-num: continuous.
69 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.
70 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.
71 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.
72 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.
73 | sex: Female, Male.
74 | capital-gain: continuous.
75 | capital-loss: continuous.
76 | hours-per-week: continuous.
77 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.
78 | class: >50K, <=50K
79 |
80 | 8. Missing Attribute Values:
81 |
82 | 7% have missing values.
83 |
84 | 9. Class Distribution:
85 |
86 | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns)
87 | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns)
88 |
89 |
90 |
--------------------------------------------------------------------------------
/ctr_of_recommendation/Wide&Deep_Demo/wide_deep.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from absl import flags
3 | from absl import app
4 |
5 |
6 | # 1. 最基本的特征:
7 |
8 | # Continuous columns. Wide和Deep组件都会用到。
9 | age = tf.feature_column.numeric_column('age')
10 | education_num = tf.feature_column.numeric_column('education_num')
11 | capital_gain = tf.feature_column.numeric_column('capital_gain')
12 | capital_loss = tf.feature_column.numeric_column('capital_loss')
13 | hours_per_week = tf.feature_column.numeric_column('hours_per_week')
14 |
15 | # 离散特征
16 | education = tf.feature_column.categorical_column_with_vocabulary_list(
17 | 'education', [
18 | 'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
19 | 'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
20 | '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
21 |
22 | marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
23 | 'marital_status', [
24 | 'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
25 | 'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
26 |
27 | relationship = tf.feature_column.categorical_column_with_vocabulary_list(
28 | 'relationship', [
29 | 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
30 | 'Other-relative'])
31 |
32 | workclass = tf.feature_column.categorical_column_with_vocabulary_list(
33 | 'workclass', [
34 | 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
35 | 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
36 |
37 | # 展示一下这个API
38 | occupation = tf.feature_column.categorical_column_with_hash_bucket(
39 | 'occupation', hash_bucket_size=1000
40 | )
41 |
42 | # Transformations
43 | age_buckets = tf.feature_column.bucketized_column(
44 | age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]
45 | )
46 |
47 | # 2. The Wide Model: Linear Model with CrossedFeatureColumns
48 | """
49 | The wide model is a linear model with a wide set of *sparse and crossed feature* columns
50 | Wide部分用了一个规范化后的连续特征age_buckets,其他的连续特征没有使用
51 | """
52 | base_columns = [
53 | # 全是离散特征
54 | education, marital_status, relationship, workclass, occupation,
55 | age_buckets,
56 | ]
57 |
58 | crossed_columns = [
59 | tf.feature_column.crossed_column(
60 | ['education', 'occupation'], hash_bucket_size=1000),
61 | tf.feature_column.crossed_column(
62 | [age_buckets, 'education', 'occupation'], hash_bucket_size=1000
63 | )
64 | ]
65 |
66 | # 3. The Deep Model: Neural Network with Embeddings
67 | """
68 | 1. Sparse Features -> Embedding vector -> 串联(Embedding vector, 连续特征) -> 输入到Hidden Layer
69 | 2. Embedding Values随机初始化
70 | 3. 另外一种处理离散特征的方法是:one-hot or multi-hot representation. 但是仅仅适用于维度较低的,embedding是更加通用的做法
71 | 4. embedding_column(embedding);indicator_column(multi-hot);
72 | """
73 | deep_columns = [
74 | age,
75 | education_num,
76 | capital_gain,
77 | capital_loss,
78 | hours_per_week,
79 | tf.feature_column.indicator_column(workclass),
80 | tf.feature_column.indicator_column(education),
81 | tf.feature_column.indicator_column(marital_status),
82 | tf.feature_column.indicator_column(relationship),
83 |
84 | # To show an example of embedding
85 | tf.feature_column.embedding_column(occupation, dimension=8)
86 | ]
87 |
88 | model_dir = './model/wide_deep'
89 |
90 | # 4. Combine Wide & Deep:wide基础上组合Deep
91 | model = tf.estimator.DNNLinearCombinedClassifier(
92 | model_dir = model_dir,
93 | linear_feature_columns=base_columns + crossed_columns,
94 | dnn_feature_columns=deep_columns,
95 | dnn_hidden_units=[100,50]
96 | )
97 |
98 | # 5. Train & Evaluate:训练和评估
99 | _CSV_COLUMNS = [
100 | 'age', 'workclass', 'fnlwgt', 'education', 'education_num',
101 | 'marital_status', 'occupation', 'relationship', 'race', 'gender',
102 | 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
103 | 'income_bracket'
104 | ]
105 | _CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''],
106 | [0], [0], [0], [''], ['']]
107 | _NUM_EXAMPLES = {
108 | 'train': 32561,
109 | 'validation': 16281,
110 | }
111 |
112 | def input_fn(data_file, num_epochs, shuffle, batch_size):
113 | """为Estimator创建一个input function"""
114 | assert tf.gfile.Exists(data_file), "{0} not found.".format(data_file)
115 |
116 | def parse_csv(line):
117 | print("Parsing", data_file)
118 | # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充
119 | columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS)
120 | features = dict(zip(_CSV_COLUMNS, columns))
121 | labels = features.pop('income_bracket')
122 | return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor, 表示x == y, element-wise
123 |
124 | dataset = tf.data.TextLineDataset(data_file) \
125 | .map(parse_csv, num_parallel_calls=5)
126 |
127 | if shuffle:
128 | dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train'] + _NUM_EXAMPLES['validation'])
129 |
130 | dataset = dataset.repeat(num_epochs)
131 | dataset = dataset.batch(batch_size)
132 |
133 | iterator = dataset.make_one_shot_iterator()
134 | batch_features, batch_labels = iterator.get_next()
135 | return batch_features, batch_labels
136 |
137 | # Train + Eval
138 | train_epochs = 6
139 | epochs_per_eval = 2
140 | batch_size = 40
141 | train_file = './data/adult.data'
142 | test_file = './data/adult.test'
143 |
144 | for n in range(train_epochs // epochs_per_eval):
145 | model.train(input_fn=lambda: input_fn(train_file, epochs_per_eval, True, batch_size))
146 | results = model.evaluate(input_fn=lambda: input_fn(
147 | test_file, 1, False, batch_size))
148 |
149 | # Display Eval results
150 | print("Results at epoch {0}".format((n+1) * epochs_per_eval))
151 | print('-'*30)
152 |
153 | for key in sorted(results):
154 | print("{0:20}: {1:.4f}".format(key, results[key]))
155 |
156 |
157 |
158 |
--------------------------------------------------------------------------------
/related_papers/2016--Wide & Deep Learning for Recommender Systems.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2016--Wide & Deep Learning for Recommender Systems.pdf
--------------------------------------------------------------------------------
/related_papers/2016-PNN-Product-based Neural Networks for User Response Prediction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2016-PNN-Product-based Neural Networks for User Response Prediction.pdf
--------------------------------------------------------------------------------
/related_papers/2017-Google-Deep & Cross Network for Ad Click Predictions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-Google-Deep & Cross Network for Ad Click Predictions.pdf
--------------------------------------------------------------------------------
/related_papers/2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf
--------------------------------------------------------------------------------
/related_papers/2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf
--------------------------------------------------------------------------------
/related_papers/An overview of gradient descent optimization algorithms.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/An overview of gradient descent optimization algorithms.pdf
--------------------------------------------------------------------------------
/related_papers/Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf
--------------------------------------------------------------------------------
/related_papers/Deep Neural Networks for YouTube Recommendations.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/Deep Neural Networks for YouTube Recommendations.pdf
--------------------------------------------------------------------------------
/related_papers/DeepFM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/DeepFM.pdf
--------------------------------------------------------------------------------
/related_papers/FFM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/FFM.pdf
--------------------------------------------------------------------------------
/related_papers/FM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/FM.pdf
--------------------------------------------------------------------------------
/related_papers/NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf
--------------------------------------------------------------------------------
/related_papers/README.md:
--------------------------------------------------------------------------------
1 | # 已读论文汇总
2 |
3 | 机器学习与深度学习系列~(ctr预估):
4 | 这里是本人已经读过的相关论文汇总,加上了自己当时的笔记分享给大家。
5 |
6 |
7 |
--------------------------------------------------------------------------------
/related_papers/(GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/(GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf
--------------------------------------------------------------------------------
/test.txt:
--------------------------------------------------------------------------------
1 | Mysterious code
2 | GG
--------------------------------------------------------------------------------