├── .idea ├── TensorFlow_Practice.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md ├── ctr_of_recommendation ├── AFM_Demo │ ├── AFM.py │ ├── DataReader.py │ ├── __pycache__ │ │ ├── AFM.cpython-36.pyc │ │ ├── DataReader.cpython-36.pyc │ │ └── config.cpython-36.pyc │ ├── config.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ └── main.py ├── DCN_Demo │ ├── DCN.py │ ├── DataLoader.py │ ├── __pycache__ │ │ ├── DCN.cpython-36.pyc │ │ ├── DataLoader.cpython-36.pyc │ │ └── config.cpython-36.pyc │ ├── config.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ └── main.py ├── DIEN_Demo │ ├── GruCell.py │ ├── data_iterator.py │ ├── model.py │ ├── rnn.py │ ├── source_code │ │ ├── Dice.py │ │ ├── data_iterator.py │ │ ├── generate_voc.py │ │ ├── local_aggretor.py │ │ ├── model.py │ │ ├── process_data.py │ │ ├── rnn.py │ │ ├── shuffle.py │ │ ├── split_by_user.py │ │ ├── train.py │ │ └── utils.py │ ├── train.py │ ├── utils.py │ └── vecAttGruCell.py ├── DIN_Demo │ ├── .idea │ │ ├── Basic-DIN-Demo.iml │ │ ├── misc.xml │ │ ├── modules.xml │ │ └── workspace.xml │ ├── Dice.py │ ├── README.md │ ├── build_dataset.py │ ├── convert_pd.py │ ├── input.py │ ├── model.py │ ├── remap_id.py │ ├── train.py │ └── utils │ │ ├── 0_download_raw.sh │ │ ├── 1_convert_pd.py │ │ ├── 2_remap_id.py │ │ └── auc.png ├── DSIN_Demo │ ├── config.py │ ├── dsin.py │ ├── gen_dsin_input.py │ ├── gen_sampled_data.py │ ├── gen_sessions.py │ └── train_dsin.py ├── DeepFM_model │ ├── .ipynb_checkpoints │ │ └── DeepFM-StepByStep-checkpoint.ipynb │ ├── DataReader.py │ ├── DeepFM-StepByStep.ipynb │ ├── DeepFM.py │ ├── config.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ ├── fig │ │ ├── DNN.png │ │ ├── DeepFM.png │ │ └── FM.png │ ├── main.py │ ├── metrics.py │ └── output │ │ ├── DNN_Mean-0.31183_Std0.29369.csv │ │ ├── DeepFM_Mean-0.11470_Std0.37335.csv │ │ ├── DeepFM_Mean0.01434_Std0.10176.csv │ │ ├── DeepFM_Mean0.05735_Std0.20027.csv │ │ ├── DeepFM_Mean0.26137_Std0.00210.csv │ │ └── FM_Mean0.23297_Std0.05576.csv ├── FFM_Demo │ ├── FFM_model.py │ └── TFModel │ │ ├── FFM-0.data-00000-of-00001 │ │ ├── FFM-0.index │ │ ├── FFM-0.meta │ │ ├── FFM │ │ ├── events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local │ │ ├── events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local │ │ └── events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local │ │ └── checkpoint ├── FM_demo │ ├── FM_model.py │ └── data │ │ ├── ua.base │ │ └── ua.test ├── FNN_demo │ ├── FNN.py │ ├── __init__.py │ └── preprocess.py ├── GBDT+LR-Demo │ ├── .idea │ │ ├── GBDT+LR-Demo.iml │ │ ├── misc.xml │ │ ├── modules.xml │ │ └── workspace.xml │ ├── GBDT_LR.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ └── model.txt ├── MLR(LS-PLM)_Demo │ ├── .idea │ │ ├── Basic-MLR-Demo.iml │ │ ├── misc.xml │ │ ├── modules.xml │ │ └── workspace.xml │ ├── __pycache__ │ │ └── data.cpython-37.pyc │ ├── data.py │ ├── data │ │ ├── adult │ │ ├── adult.data.txt │ │ ├── adult.names │ │ ├── adult.test.txt │ │ └── old.adult.names │ ├── lr.py │ ├── mlr.py │ └── plotResult.py ├── NFM_Demo │ ├── DataReader.py │ ├── NFM.py │ ├── __pycache__ │ │ ├── DataReader.cpython-36.pyc │ │ ├── NFM.cpython-36.pyc │ │ └── config.cpython-36.pyc │ ├── config.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ └── main.py ├── PNN_Demo │ ├── .idea │ │ ├── Basic-PNN-Demo.iml │ │ ├── misc.xml │ │ ├── modules.xml │ │ └── workspace.xml │ ├── DataReader.py │ ├── PNN.py │ ├── __pycache__ │ │ ├── DataReader.cpython-36.pyc │ │ ├── DataReader.cpython-37.pyc │ │ ├── PNN.cpython-36.pyc │ │ ├── PNN.cpython-37.pyc │ │ ├── config.cpython-36.pyc │ │ └── config.cpython-37.pyc │ ├── config.py │ ├── data │ │ ├── test.csv │ │ └── train.csv │ ├── main.py │ └── sfsfs.py └── Wide&Deep_Demo │ ├── .gitignore │ ├── .idea │ ├── Wide&Deep.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml │ ├── README.md │ ├── data │ ├── Index │ ├── adult.names │ ├── data_download.py │ └── old.adult.names │ ├── wide_component.py │ └── wide_deep.py ├── related_papers ├── 2016--Wide & Deep Learning for Recommender Systems.pdf ├── 2016-PNN-Product-based Neural Networks for User Response Prediction.pdf ├── 2017-Google-Deep & Cross Network for Ad Click Predictions.pdf ├── 2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf ├── 2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf ├── An overview of gradient descent optimization algorithms.pdf ├── Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf ├── Deep Neural Networks for YouTube Recommendations.pdf ├── DeepFM.pdf ├── FFM.pdf ├── FM.pdf ├── NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf ├── README.md └── (GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf └── test.txt /.idea/TensorFlow_Practice.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Practice For Recommendation System 2 | 3 | # 1. 介绍 4 | 机器学习与深度学习系列(ctr预估): 5 | 仓库主要分享推荐系统相关论文和一些关于推荐的传统模型和深度模型学习实践代码的Demo,持续更新中。 6 | 7 | # 2. 目录 8 | ## 2.1 广告CTR预估模型(ctr_of_recommendation:已更新) 9 | 10 | | shllow model | deep model | 11 | | :----------: | :---------: | 12 | | GBDT + LR | FNN | 13 | | MLP(LS-PLM) | PNN | 14 | | FM | Wide & Deep | 15 | | FFM | DeepFM | 16 | | AFM | NFM | 17 | | AutoInt | DCN | 18 | | ... | DIEN | 19 | | | DSIN | 20 | 21 | 22 | 23 | 24 | ## 2.2 推荐系统 25 | - [x] Deep Neural Network for Youtube Recommendations 26 | 27 | ## 2.3 深度学习 28 | - [x] LSTM原理与实践 29 | 30 | ## 2.4 学习笔记(related_papers:更新中) 31 | - [ ] Batch Normalization 32 | 33 | - [ ] TensorLayer 34 | 35 | - [x] 推荐相关已读论文 36 | 37 | ​ 38 | 39 | **模型中的推荐相关数据集下载**:微云链接:https://share.weiyun.com/2zOvtF2s 密码:8nddnd 40 | 41 | -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/DataReader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class FeatureDictionary(object): 4 | def __init__(self,trainfile=None,testfile=None, 5 | dfTrain=None,dfTest=None,numeric_cols=[], 6 | ignore_cols=[]): 7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set" 8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set" 9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set" 10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set" 11 | 12 | self.trainfile = trainfile 13 | self.testfile = testfile 14 | self.dfTrain = dfTrain 15 | self.dfTest = dfTest 16 | self.numeric_cols = numeric_cols 17 | self.ignore_cols = ignore_cols 18 | self.gen_feat_dict() 19 | 20 | 21 | 22 | 23 | def gen_feat_dict(self): 24 | if self.dfTrain is None: 25 | dfTrain = pd.read_csv(self.trainfile) 26 | 27 | else: 28 | dfTrain = self.dfTrain 29 | 30 | if self.dfTest is None: 31 | dfTest = pd.read_csv(self.testfile) 32 | 33 | else: 34 | dfTest = self.dfTest 35 | 36 | df = pd.concat([dfTrain,dfTest]) 37 | 38 | self.feat_dict = {} 39 | tc = 0 40 | for col in df.columns: 41 | if col in self.ignore_cols: 42 | continue 43 | if col in self.numeric_cols: 44 | self.feat_dict[col] = tc 45 | tc += 1 46 | 47 | else: 48 | us = df[col].unique() 49 | print(us) 50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc))) 51 | tc += len(us) 52 | 53 | self.feat_dim = tc 54 | 55 | 56 | class DataParser(object): 57 | def __init__(self,feat_dict): 58 | self.feat_dict = feat_dict 59 | 60 | def parse(self,infile=None,df=None,has_label=False): 61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set" 62 | assert not ((infile is not None) and (df is not None)), "only one can be set" 63 | 64 | 65 | if infile is None: 66 | dfi = df.copy() 67 | else: 68 | dfi = pd.read_csv(infile) 69 | 70 | if has_label: 71 | y = dfi['target'].values.tolist() 72 | dfi.drop(['id','target'],axis=1,inplace=True) 73 | else: 74 | ids = dfi['id'].values.tolist() 75 | dfi.drop(['id'],axis=1,inplace=True) 76 | # dfi for feature index 77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) 78 | dfv = dfi.copy() 79 | for col in dfi.columns: 80 | if col in self.feat_dict.ignore_cols: 81 | dfi.drop(col,axis=1,inplace=True) 82 | dfv.drop(col,axis=1,inplace=True) 83 | continue 84 | if col in self.feat_dict.numeric_cols: 85 | dfi[col] = self.feat_dict.feat_dict[col] 86 | else: 87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) 88 | dfv[col] = 1. 89 | 90 | xi = dfi.values.tolist() 91 | xv = dfv.values.tolist() 92 | 93 | if has_label: 94 | return xi,xv,y 95 | else: 96 | return xi,xv,ids 97 | 98 | 99 | -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/__pycache__/AFM.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/AFM.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/__pycache__/DataReader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/DataReader.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/AFM_Demo/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_FILE = "data/train.csv" 2 | TEST_FILE = "data/test.csv" 3 | 4 | SUB_DIR = "output" 5 | 6 | 7 | NUM_SPLITS = 3 8 | RANDOM_SEED = 2017 9 | 10 | # types of columns of the dataset dataframe 11 | CATEGORICAL_COLS = [ 12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat', 13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 16 | # 'ps_car_10_cat', 'ps_car_11_cat', 17 | ] 18 | 19 | NUMERIC_COLS = [ 20 | # # binary 21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin", 22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin", 23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin", 24 | # "ps_ind_17_bin", "ps_ind_18_bin", 25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin", 27 | # numeric 28 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 30 | 31 | # feature engineering 32 | "missing_feat", "ps_car_13_x_ps_reg_03", 33 | ] 34 | 35 | IGNORE_COLS = [ 36 | "id", "target", 37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04", 38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", 39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12", 40 | "ps_calc_13", "ps_calc_14", 41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin" 43 | ] 44 | -------------------------------------------------------------------------------- /ctr_of_recommendation/AFM_Demo/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | from sklearn.metrics import make_scorer 6 | from sklearn.model_selection import StratifiedKFold 7 | from DataReader import FeatureDictionary, DataParser 8 | from matplotlib import pyplot as plt 9 | import config 10 | from AFM import AFM 11 | 12 | def load_data(): 13 | dfTrain = pd.read_csv(config.TRAIN_FILE) 14 | dfTest = pd.read_csv(config.TEST_FILE) 15 | 16 | def preprocess(df): 17 | cols = [c for c in df.columns if c not in ['id','target']] 18 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1) 19 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1) 20 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03'] 21 | return df 22 | 23 | dfTrain = preprocess(dfTrain) 24 | dfTest = preprocess(dfTest) 25 | 26 | cols = [c for c in dfTrain.columns if c not in ['id','target']] 27 | cols = [c for c in cols if (not c in config.IGNORE_COLS)] 28 | 29 | X_train = dfTrain[cols].values 30 | y_train = dfTrain['target'].values 31 | 32 | X_test = dfTest[cols].values 33 | ids_test = dfTest['id'].values 34 | 35 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS] 36 | 37 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices 38 | 39 | def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params): 40 | fd = FeatureDictionary(dfTrain=dfTrain, 41 | dfTest=dfTest, 42 | numeric_cols=config.NUMERIC_COLS, 43 | ignore_cols = config.IGNORE_COLS) 44 | data_parser = DataParser(feat_dict= fd) 45 | # Xi_train :列的序号 46 | # Xv_train :列的对应的值 47 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) 48 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) 49 | 50 | print(dfTrain.dtypes) 51 | 52 | pnn_params['feature_size'] = fd.feat_dim 53 | pnn_params['field_size'] = len(Xi_train[0]) 54 | 55 | 56 | _get = lambda x,l:[x[i] for i in l] 57 | 58 | 59 | 60 | for i, (train_idx, valid_idx) in enumerate(folds): 61 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) 62 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) 63 | 64 | afm = AFM(**pnn_params) 65 | afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) 66 | 67 | 68 | pnn_params = { 69 | "embedding_size":8, 70 | "attention_size":10, 71 | "deep_layers":[32,32], 72 | "dropout_deep":[0.5,0.5,0.5], 73 | "deep_layer_activation":tf.nn.relu, 74 | "epoch":30, 75 | "batch_size":1024, 76 | "learning_rate":0.001, 77 | "optimizer":"adam", 78 | "batch_norm":1, 79 | "batch_norm_decay":0.995, 80 | "verbose":True, 81 | "random_seed":config.RANDOM_SEED, 82 | "deep_init_size":50, 83 | "use_inner":False 84 | 85 | } 86 | 87 | # load data 88 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data() 89 | 90 | # folds 91 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True, 92 | random_state=config.RANDOM_SEED).split(X_train, y_train)) 93 | 94 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params) 95 | y_train_pnn, y_test_pnn = run_base_model_nfm(dfTrain, dfTest, folds, pnn_params) 96 | 97 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/DataLoader.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | 5 | 6 | class FeatureDictionary(object): 7 | def __init__(self, trainfile=None,testfile=None, 8 | numeric_cols=[], 9 | ignore_cols=[], 10 | cate_cols=[]): 11 | 12 | self.trainfile = trainfile 13 | #self.testfile = testfile 14 | self.testfile = testfile 15 | self.cate_cols = cate_cols 16 | self.numeric_cols = numeric_cols 17 | self.ignore_cols = ignore_cols 18 | self.gen_feat_dict() 19 | 20 | def gen_feat_dict(self): 21 | df = pd.concat([self.trainfile,self.testfile]) 22 | self.feat_dict = {} 23 | self.feat_len = {} 24 | tc = 0 25 | for col in df.columns: 26 | if col in self.ignore_cols or col in self.numeric_cols: 27 | continue 28 | else: 29 | us = df[col].unique() 30 | self.feat_dict[col] = dict(zip(us, range(tc, len(us) + tc))) 31 | tc += len(us) 32 | self.feat_dim = tc 33 | 34 | 35 | 36 | 37 | class DataParser(object): 38 | def __init__(self, feat_dict): 39 | self.feat_dict = feat_dict 40 | 41 | 42 | def parse(self, infile=None, df=None, has_label=False): 43 | assert not ((infile is None) and (df is None)), "infile or df at least one is set" 44 | assert not ((infile is not None) and (df is not None)), "only one can be set" 45 | if infile is None: 46 | dfi = df.copy() 47 | else: 48 | dfi = pd.read_csv(infile) 49 | if has_label: 50 | y = dfi["target"].values.tolist() 51 | dfi.drop(["id", "target"], axis=1, inplace=True) 52 | else: 53 | ids = dfi["id"].values.tolist() 54 | dfi.drop(["id"], axis=1, inplace=True) 55 | # dfi for feature index 56 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) 57 | 58 | numeric_Xv = dfi[self.feat_dict.numeric_cols].values.tolist() 59 | dfi.drop(self.feat_dict.numeric_cols,axis=1,inplace=True) 60 | 61 | dfv = dfi.copy() 62 | for col in dfi.columns: 63 | if col in self.feat_dict.ignore_cols: 64 | dfi.drop(col, axis=1, inplace=True) 65 | dfv.drop(col, axis=1, inplace=True) 66 | continue 67 | else: 68 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) 69 | dfv[col] = 1. 70 | 71 | # list of list of feature indices of each sample in the dataset 72 | cate_Xi = dfi.values.tolist() 73 | # list of list of feature values of each sample in the dataset 74 | cate_Xv = dfv.values.tolist() 75 | if has_label: 76 | return cate_Xi, cate_Xv,numeric_Xv,y 77 | else: 78 | return cate_Xi, cate_Xv,numeric_Xv,ids -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/__pycache__/DCN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/DCN.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/__pycache__/DataLoader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/DataLoader.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DCN_Demo/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_FILE = "data/train.csv" 2 | TEST_FILE = "data/test.csv" 3 | 4 | SUB_DIR = "output" 5 | 6 | 7 | NUM_SPLITS = 3 8 | RANDOM_SEED = 2017 9 | 10 | # types of columns of the dataset dataframe 11 | CATEGORICAL_COLS = [ 12 | 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat', 13 | 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 14 | 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 15 | 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 16 | 'ps_car_10_cat', 'ps_car_11_cat', 17 | ] 18 | 19 | NUMERIC_COLS = [ 20 | # # binary 21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin", 22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin", 23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin", 24 | # "ps_ind_17_bin", "ps_ind_18_bin", 25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin", 27 | # numeric 28 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 30 | 31 | # feature engineering 32 | "missing_feat", "ps_car_13_x_ps_reg_03", 33 | ] 34 | 35 | IGNORE_COLS = [ 36 | "id", "target", 37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04", 38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", 39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12", 40 | "ps_calc_13", "ps_calc_14", 41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin" 43 | ] 44 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DCN_Demo/main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | import config 7 | 8 | from sklearn.model_selection import StratifiedKFold 9 | from DataLoader import FeatureDictionary, DataParser 10 | 11 | from DCN import DCN 12 | 13 | 14 | 15 | def load_data(): 16 | dfTrain = pd.read_csv(config.TRAIN_FILE) 17 | dfTest = pd.read_csv(config.TEST_FILE) 18 | 19 | def preprocess(df): 20 | cols = [c for c in df.columns if c not in ["id", "target"]] 21 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1) 22 | df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"] 23 | return df 24 | 25 | dfTrain = preprocess(dfTrain) 26 | dfTest = preprocess(dfTest) 27 | 28 | cols = [c for c in dfTrain.columns if c not in ["id", "target"]] 29 | cols = [c for c in cols if (not c in config.IGNORE_COLS)] 30 | 31 | X_train = dfTrain[cols].values 32 | y_train = dfTrain["target"].values 33 | X_test = dfTest[cols].values 34 | ids_test = dfTest["id"].values 35 | 36 | return dfTrain, dfTest, X_train, y_train, X_test, ids_test, 37 | 38 | 39 | def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params): 40 | 41 | fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS, 42 | ignore_cols=config.IGNORE_COLS, 43 | cate_cols = config.CATEGORICAL_COLS) 44 | 45 | print(fd.feat_dim) 46 | print(fd.feat_dict) 47 | 48 | data_parser = DataParser(feat_dict=fd) 49 | cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True) 50 | cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest) 51 | 52 | dcn_params["cate_feature_size"] = fd.feat_dim 53 | dcn_params["field_size"] = len(cate_Xi_train[0]) 54 | dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS) 55 | 56 | _get = lambda x, l: [x[i] for i in l] 57 | 58 | for i, (train_idx, valid_idx) in enumerate(folds): 59 | cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx) 60 | cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx) 61 | 62 | dcn = DCN(**dcn_params) 63 | 64 | dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_) 65 | 66 | #dfTrain = pd.read_csv(config.TRAIN_FILE,nrows=10000,index_col=None).to_csv(config.TRAIN_FILE,index=False) 67 | #dfTest = pd.read_csv(config.TEST_FILE,nrows=2000,index_col=None).to_csv(config.TEST_FILE,index=False) 68 | 69 | dfTrain, dfTest, X_train, y_train, X_test, ids_test = load_data() 70 | print('load_data_over') 71 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True, 72 | random_state=config.RANDOM_SEED).split(X_train, y_train)) 73 | print('process_data_over') 74 | 75 | dcn_params = { 76 | 77 | "embedding_size": 8, 78 | "deep_layers": [32, 32], 79 | "dropout_deep": [0.5, 0.5, 0.5], 80 | "deep_layers_activation": tf.nn.relu, 81 | "epoch": 30, 82 | "batch_size": 1024, 83 | "learning_rate": 0.001, 84 | "optimizer_type": "adam", 85 | "batch_norm": 1, 86 | "batch_norm_decay": 0.995, 87 | "l2_reg": 0.01, 88 | "verbose": True, 89 | "random_seed": config.RANDOM_SEED, 90 | "cross_layer_num":3 91 | } 92 | print('start train') 93 | run_base_model_dcn(dfTrain, dfTest, folds, dcn_params) 94 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/GruCell.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops.rnn_cell import * 3 | from tensorflow.python.ops.rnn_cell_impl import _Linear 4 | 5 | from tensorflow.python.ops import math_ops 6 | from tensorflow.python.ops import init_ops 7 | from tensorflow.python.ops import array_ops 8 | from tensorflow.python.ops import variable_scope as vs 9 | 10 | class GRUCell(RNNCell): 11 | 12 | 13 | def __init__(self, 14 | num_units, 15 | activation=None, 16 | reuse=None, 17 | kernel_initializer=None, 18 | bias_initializer=None): 19 | super(GRUCell, self).__init__(_reuse=reuse) 20 | self._num_units = num_units 21 | self._activation = activation or math_ops.tanh 22 | self._kernel_initializer = kernel_initializer 23 | self._bias_initializer = bias_initializer 24 | self._gate_linear = None 25 | self._candidate_linear = None 26 | 27 | @property 28 | def state_size(self): 29 | return self._num_units 30 | 31 | @property 32 | def output_size(self): 33 | return self._num_units 34 | 35 | def call(self, inputs, state): 36 | """Gated recurrent unit (GRU) with nunits cells.""" 37 | if self._gate_linear is None: 38 | bias_ones = self._bias_initializer 39 | if self._bias_initializer is None: 40 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) 41 | with vs.variable_scope("gates"): # Reset gate and update gate. 42 | self._gate_linear = _Linear( 43 | [inputs, state], 44 | 2 * self._num_units, 45 | True, 46 | bias_initializer=bias_ones, 47 | kernel_initializer=self._kernel_initializer) 48 | 49 | value = math_ops.sigmoid(self._gate_linear([inputs, state])) 50 | r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) 51 | 52 | r_state = r * state 53 | if self._candidate_linear is None: 54 | with vs.variable_scope("candidate"): 55 | self._candidate_linear = _Linear( 56 | [inputs, r_state], 57 | self._num_units, 58 | True, 59 | bias_initializer=self._bias_initializer, 60 | kernel_initializer=self._kernel_initializer) 61 | c = self._activation(self._candidate_linear([inputs, r_state])) 62 | new_h = u * state + (1 - u) * c 63 | return new_h, new_h -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/Dice.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def dice(_x, axis=-1, epsilon=0.000000001, name=''): 4 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE): 5 | alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1], 6 | initializer=tf.constant_initializer(0.0), 7 | dtype=tf.float32) 8 | input_shape = list(_x.get_shape()) 9 | 10 | reduction_axes = list(range(len(input_shape))) 11 | del reduction_axes[axis] 12 | broadcast_shape = [1] * len(input_shape) 13 | broadcast_shape[axis] = input_shape[axis] 14 | 15 | # case: train mode (uses stats of the current batch) 16 | mean = tf.reduce_mean(_x, axis=reduction_axes) 17 | brodcast_mean = tf.reshape(mean, broadcast_shape) 18 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes) 19 | std = tf.sqrt(std) 20 | brodcast_std = tf.reshape(std, broadcast_shape) 21 | x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon) 22 | # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) 23 | x_p = tf.sigmoid(x_normed) 24 | 25 | 26 | return alphas * (1.0 - x_p) * _x + x_p * _x 27 | 28 | def parametric_relu(_x): 29 | alphas = tf.get_variable('alpha', _x.get_shape()[-1], 30 | initializer=tf.constant_initializer(0.0), 31 | dtype=tf.float32) 32 | pos = tf.nn.relu(_x) 33 | neg = alphas * (_x - abs(_x)) * 0.5 34 | 35 | return pos + neg 36 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/generate_voc.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | 3 | f_train = open("local_train_splitByUser", "r") 4 | uid_dict = {} 5 | mid_dict = {} 6 | cat_dict = {} 7 | 8 | iddd = 0 9 | for line in f_train: 10 | arr = line.strip("\n").split("\t") 11 | clk = arr[0] 12 | uid = arr[1] 13 | mid = arr[2] 14 | cat = arr[3] 15 | mid_list = arr[4] 16 | cat_list = arr[5] 17 | if uid not in uid_dict: 18 | uid_dict[uid] = 0 19 | uid_dict[uid] += 1 20 | if mid not in mid_dict: 21 | mid_dict[mid] = 0 22 | mid_dict[mid] += 1 23 | if cat not in cat_dict: 24 | cat_dict[cat] = 0 25 | cat_dict[cat] += 1 26 | if len(mid_list) == 0: 27 | continue 28 | for m in mid_list.split(""): 29 | if m not in mid_dict: 30 | mid_dict[m] = 0 31 | mid_dict[m] += 1 32 | #print iddd 33 | iddd+=1 34 | for c in cat_list.split(""): 35 | if c not in cat_dict: 36 | cat_dict[c] = 0 37 | cat_dict[c] += 1 38 | 39 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True) 40 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True) 41 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True) 42 | 43 | uid_voc = {} 44 | index = 0 45 | for key, value in sorted_uid_dict: 46 | uid_voc[key] = index 47 | index += 1 48 | 49 | mid_voc = {} 50 | mid_voc["default_mid"] = 0 51 | index = 1 52 | for key, value in sorted_mid_dict: 53 | mid_voc[key] = index 54 | index += 1 55 | 56 | cat_voc = {} 57 | cat_voc["default_cat"] = 0 58 | index = 1 59 | for key, value in sorted_cat_dict: 60 | cat_voc[key] = index 61 | index += 1 62 | 63 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w")) 64 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w")) 65 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w")) 66 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/local_aggretor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import hashlib 3 | import random 4 | 5 | fin = open("jointed-new-split-info", "r") 6 | ftrain = open("local_train", "w") 7 | ftest = open("local_test", "w") 8 | 9 | last_user = "0" 10 | common_fea = "" 11 | line_idx = 0 12 | for line in fin: 13 | items = line.strip().split("\t") 14 | ds = items[0] 15 | clk = int(items[1]) 16 | user = items[2] 17 | movie_id = items[3] 18 | dt = items[5] 19 | cat1 = items[6] 20 | 21 | if ds=="20180118": 22 | fo = ftrain 23 | else: 24 | fo = ftest 25 | if user != last_user: 26 | movie_id_list = [] 27 | cate1_list = [] 28 | #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" 29 | else: 30 | history_clk_num = len(movie_id_list) 31 | cat_str = "" 32 | mid_str = "" 33 | for c1 in cate1_list: 34 | cat_str += c1 + "" 35 | for mid in movie_id_list: 36 | mid_str += mid + "" 37 | if len(cat_str) > 0: cat_str = cat_str[:-1] 38 | if len(mid_str) > 0: mid_str = mid_str[:-1] 39 | if history_clk_num >= 1: # 8 is the average length of user behavior 40 | print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str 41 | last_user = user 42 | if clk: 43 | movie_id_list.append(movie_id) 44 | cate1_list.append(cat1) 45 | line_idx += 1 46 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/process_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import time 4 | 5 | def process_meta(file): 6 | fi = open(file, "r") 7 | fo = open("item-info", "w") 8 | for line in fi: 9 | obj = eval(line) 10 | cat = obj["categories"][0][-1] 11 | print>>fo, obj["asin"] + "\t" + cat 12 | 13 | def process_reviews(file): 14 | fi = open(file, "r") 15 | user_map = {} 16 | fo = open("reviews-info", "w") 17 | for line in fi: 18 | obj = eval(line) 19 | userID = obj["reviewerID"] 20 | itemID = obj["asin"] 21 | rating = obj["overall"] 22 | time = obj["unixReviewTime"] 23 | print>>fo, userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time) 24 | 25 | def manual_join(): 26 | f_rev = open("reviews-info", "r") 27 | user_map = {} 28 | item_list = [] 29 | for line in f_rev: 30 | line = line.strip() 31 | items = line.split("\t") 32 | #loctime = time.localtime(float(items[-1])) 33 | #items[-1] = time.strftime('%Y-%m-%d', loctime) 34 | if items[0] not in user_map: 35 | user_map[items[0]]= [] 36 | user_map[items[0]].append(("\t".join(items), float(items[-1]))) 37 | item_list.append(items[1]) 38 | f_meta = open("item-info", "r") 39 | meta_map = {} 40 | for line in f_meta: 41 | arr = line.strip().split("\t") 42 | if arr[0] not in meta_map: 43 | meta_map[arr[0]] = arr[1] 44 | arr = line.strip().split("\t") 45 | fo = open("jointed-new", "w") 46 | for key in user_map: 47 | sorted_user_bh = sorted(user_map[key], key=lambda x:x[1]) 48 | for line, t in sorted_user_bh: 49 | items = line.split("\t") 50 | asin = items[1] 51 | j = 0 52 | while True: 53 | asin_neg_index = random.randint(0, len(item_list) - 1) 54 | asin_neg = item_list[asin_neg_index] 55 | if asin_neg == asin: 56 | continue 57 | items[1] = asin_neg 58 | print>>fo, "0" + "\t" + "\t".join(items) + "\t" + meta_map[asin_neg] 59 | j += 1 60 | if j == 1: #negative sampling frequency 61 | break 62 | if asin in meta_map: 63 | print>>fo, "1" + "\t" + line + "\t" + meta_map[asin] 64 | else: 65 | print>>fo, "1" + "\t" + line + "\t" + "default_cat" 66 | 67 | 68 | def split_test(): 69 | fi = open("jointed-new", "r") 70 | fo = open("jointed-new-split-info", "w") 71 | user_count = {} 72 | for line in fi: 73 | line = line.strip() 74 | user = line.split("\t")[1] 75 | if user not in user_count: 76 | user_count[user] = 0 77 | user_count[user] += 1 78 | fi.seek(0) 79 | i = 0 80 | last_user = "A26ZDKC53OP6JD" 81 | for line in fi: 82 | line = line.strip() 83 | user = line.split("\t")[1] 84 | if user == last_user: 85 | if i < user_count[user] - 2: # 1 + negative samples 86 | print>> fo, "20180118" + "\t" + line 87 | else: 88 | print>>fo, "20190119" + "\t" + line 89 | else: 90 | last_user = user 91 | i = 0 92 | if i < user_count[user] - 2: 93 | print>> fo, "20180118" + "\t" + line 94 | else: 95 | print>>fo, "20190119" + "\t" + line 96 | i += 1 97 | 98 | process_meta(sys.argv[1]) 99 | process_reviews(sys.argv[2]) 100 | manual_join() 101 | split_test() 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/shuffle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | 5 | import tempfile 6 | from subprocess import call 7 | 8 | 9 | def main(file, temporary=False): 10 | tf_os, tpath = tempfile.mkstemp(dir='/home/mouna.mn/code/DIN-V2-CODE') 11 | tf = open(tpath, 'w') 12 | 13 | fd = open(file, "r") 14 | for l in fd: 15 | print >> tf, l.strip("\n") 16 | tf.close() 17 | 18 | lines = open(tpath, 'r').readlines() 19 | random.shuffle(lines) 20 | if temporary: 21 | path, filename = os.path.split(os.path.realpath(file)) 22 | fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path) 23 | else: 24 | fd = open(file + '.shuf', 'w') 25 | 26 | for l in lines: 27 | s = l.strip("\n") 28 | print >> fd, s 29 | 30 | if temporary: 31 | fd.seek(0) 32 | else: 33 | fd.close() 34 | 35 | os.remove(tpath) 36 | 37 | return fd 38 | 39 | 40 | if __name__ == '__main__': 41 | main(sys.argv[1]) 42 | 43 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/source_code/split_by_user.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | fi = open("local_test", "r") 4 | ftrain = open("local_train_splitByUser", "w") 5 | ftest = open("local_test_splitByUser", "w") 6 | 7 | while True: 8 | rand_int = random.randint(1, 10) 9 | noclk_line = fi.readline().strip() 10 | clk_line = fi.readline().strip() 11 | if noclk_line == "" or clk_line == "": 12 | break 13 | if rand_int == 2: 14 | print >> ftest, noclk_line 15 | print >> ftest, clk_line 16 | else: 17 | print >> ftrain, noclk_line 18 | print >> ftrain, clk_line 19 | 20 | 21 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def prelu(_x, scope=''): 4 | """parametric ReLU activation""" 5 | with tf.variable_scope(name_or_scope=scope, default_name="prelu"): 6 | _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1], 7 | dtype=_x.dtype, initializer=tf.constant_initializer(0.1)) 8 | return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x) 9 | 10 | def dice(_x,axis=-1,epsilon=0.000000001,name=""): 11 | with tf.variable_scope(name,reuse=tf.AUTO_REUSE): 12 | alphas = tf.get_variable('alpha'+name,_x.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32) 13 | input_shape = list(_x.get_shape()) 14 | 15 | reduction_axis = list(range(len(input_shape))) 16 | del reduction_axis[axis] 17 | broadcast_shape = [1] * len(input_shape) 18 | broadcast_shape[axis] = input_shape[axis] 19 | 20 | mean = tf.reduce_mean(_x, axis=reduction_axis) 21 | brodcast_mean = tf.reshape(mean, broadcast_shape) 22 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axis) 23 | std = tf.sqrt(std) 24 | brodcast_std = tf.reshape(std, broadcast_shape) 25 | x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon) 26 | # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) 27 | x_p = tf.sigmoid(x_normed) 28 | 29 | return alphas * (1.0 - x_p) * _x + x_p * _x 30 | 31 | 32 | def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False): 33 | if isinstance(facts, tuple): 34 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 35 | facts = tf.concat(facts, 2) 36 | if len(facts.get_shape().as_list()) == 2: 37 | facts = tf.expand_dims(facts, 1) 38 | 39 | if time_major: 40 | # (T,B,D) => (B,T,D) 41 | facts = tf.array_ops.transpose(facts, [1, 0, 2]) 42 | 43 | 44 | mask = tf.equal(mask,tf.ones_like(mask)) 45 | facts_size = facts.get_shape().as_list()[-1] # Hidden size for rnn layer 46 | query = tf.layers.dense(query,facts_size,activation=None,name='f1'+stag) 47 | query = prelu(query) 48 | 49 | queries = tf.tile(query,[1,tf.shape(facts)[1]]) # Batch * Time * Hidden size 50 | queries = tf.reshape(queries,tf.shape(facts)) 51 | din_all = tf.concat([queries,facts,queries-facts,queries*facts],axis=-1) # Batch * Time * (4 * Hidden size) 52 | d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag) 53 | d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag) 54 | d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) # Batch * Time * 1 55 | 56 | d_layer_3_all = tf.reshape(d_layer_3_all,[-1,1,tf.shape(facts)[1]]) # Batch * 1 * time 57 | scores = d_layer_3_all 58 | 59 | key_masks = tf.expand_dims(mask,1) # Batch * 1 * Time 60 | paddings = tf.ones_like(scores) * (-2 ** 32 + 1) 61 | 62 | if not forCnn: 63 | scores = tf.where(key_masks, scores, paddings) # [B, 1, T] ,没有的地方用paddings填充 64 | 65 | # Activation 66 | if softmax_stag: 67 | scores = tf.nn.softmax(scores) # [B, 1, T] 68 | 69 | # Weighted sum 70 | if mode == 'SUM': 71 | output = tf.matmul(scores,facts) # Batch * 1 * Hidden Size 72 | else: 73 | scores = tf.reshape(scores,[-1,tf.shape(facts)[1]]) # Batch * Time 74 | output = facts * tf.expand_dims(scores,-1) # Batch * Time * Hidden Size 75 | output = tf.reshape(output,tf.shape(facts)) 76 | if return_alphas: 77 | return output,scores 78 | else: 79 | return output 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIEN_Demo/vecAttGruCell.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops.rnn_cell import * 3 | from tensorflow.python.ops.rnn_cell_impl import _Linear 4 | 5 | from tensorflow.python.ops import math_ops 6 | from tensorflow.python.ops import init_ops 7 | from tensorflow.python.ops import array_ops 8 | from tensorflow.python.ops import variable_scope as vs 9 | 10 | 11 | class VecAttGRUCell(RNNCell): 12 | def __init__(self, 13 | num_units, 14 | activation=None, 15 | reuse=None, 16 | kernel_initializer=None, 17 | bias_initializer=None): 18 | super(VecAttGRUCell, self).__init__(_reuse=reuse) 19 | self._num_units = num_units 20 | self._activation = activation or math_ops.tanh 21 | self._kernel_initializer = kernel_initializer 22 | self._bias_initializer = bias_initializer 23 | self._gate_linear = None 24 | self._candidate_linear = None 25 | 26 | @property 27 | def state_size(self): 28 | return self._num_units 29 | 30 | @property 31 | def output_size(self): 32 | return self._num_units 33 | 34 | # 一个类实例也可以变成一个可调用对象,只需要实现一个特殊方法__call__()。 35 | def __call__(self, inputs, state, att_score): 36 | return self.call(inputs, state, att_score) 37 | 38 | def call(self, inputs, state, att_score=None): 39 | if self._gate_linear is None: 40 | bias_ones = self._bias_initializer 41 | if self._bias_initializer is None: 42 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) 43 | with vs.variable_scope("gates"): 44 | self._gate_linear = _Linear( 45 | [inputs, state], 46 | 2 * self._num_units, 47 | True, 48 | bias_initializer=bias_ones, 49 | kernel_initializer=self._kernel_initializer 50 | ) 51 | 52 | value = math_ops.sigmoid(self._gate_linear([inputs, state])) 53 | r, u = array_ops.split(value, num_or_size_splits=2, axis=1) 54 | r_state = r * state 55 | if self._candidate_linear is None: 56 | with vs.variable_scope("candidate"): 57 | self._candidate_linear = _Linear( 58 | [inputs, r_state], 59 | self._num_units, 60 | True, 61 | bias_initializer=self._bias_initializer, 62 | kernel_initializer=self._kernel_initializer 63 | ) 64 | 65 | c = self._activation(self._candidate_linear([inputs, r_state])) 66 | u = (1.0 - att_score) * u 67 | new_h = u * state + (1 - u) * c 68 | return new_h, new_h 69 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/.idea/Basic-DIN-Demo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/Dice.py: -------------------------------------------------------------------------------- 1 | # 激活函数dice 2 | import tensorflow as tf 3 | 4 | def dice(_x,axis=-1,epsilon=0.0000001,name=''): 5 | # α也是一个需要训练的参数 6 | alphas = tf.get_variable('alpha'+name,_x.get_shape()[-1], 7 | initializer = tf.constant_initializer(0.0), 8 | dtype=tf.float32) 9 | 10 | input_shape = list(_x.get_shape()) 11 | reduction_axes = list(range(len(input_shape))) 12 | 13 | del reduction_axes[axis] # [0] 14 | 15 | broadcast_shape = [1] * len(input_shape) #[1,1] 16 | broadcast_shape[axis] = input_shape[axis] # [1 * hidden_unit_size] 17 | 18 | # case: train mode (uses stats of the current batch) 19 | mean = tf.reduce_mean(_x, axis=reduction_axes) # [1 * hidden_unit_size] 20 | brodcast_mean = tf.reshape(mean, broadcast_shape) 21 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes) 22 | std = tf.sqrt(std) 23 | brodcast_std = tf.reshape(std, broadcast_shape) #[1 * hidden_unit_size] 24 | # x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon) 25 | x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) # a simple way to use BN to calculate x_p 26 | x_p = tf.sigmoid(x_normed) 27 | 28 | return alphas * (1.0 - x_p) * _x + x_p * _x -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/README.md: -------------------------------------------------------------------------------- 1 | # DeepInterestNetwork 2 | Deep Interest Network for Click-Through Rate Prediction 3 | 4 | ## Introduction 5 | This is an implementation of the paper [Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/abs/1706.06978) Guorui Zhou, Chengru Song, Xiaoqiang Zhu, Han Zhu, Ying Fan, Na Mou, Xiao Ma, Yanghui Yan, Xingya Dai, Junqi Jin, Han Li, Kun Gai 6 | 7 | Thanks to Jinze Bai and Chang Zhou. 8 | 9 | Bibtex: 10 | ```sh 11 | @article{Zhou2017Deep, 12 | title={Deep Interest Network for Click-Through Rate Prediction}, 13 | author={Zhou, Guorui and Song, Chengru and Zhu, Xiaoqiang and Ma, Xiao and Yan, Yanghui and Dai, Xingya and Zhu, Han and Jin, Junqi and Li, Han and Gai, Kun}, 14 | year={2017}, 15 | } 16 | ``` 17 | 18 | ## Requirements 19 | * Python >= 3.6.1 20 | * NumPy >= 1.12.1 21 | * Pandas >= 0.20.1 22 | * TensorFlow >= 1.4.0 (Probably earlier version should work too, though I didn't test it) 23 | * GPU with memory >= 10G 24 | 25 | ## Download dataset and preprocess 26 | * Step 1: Download the amazon product dataset of electronics category, which has 498,196 products and 7,824,482 records, and extract it to `raw_data/` folder. 27 | ```sh 28 | mkdir raw_data/; 29 | cd utils; 30 | bash 0_download_raw.sh; 31 | ``` 32 | * Step 2: Convert raw data to pandas dataframe, and remap categorical id. 33 | ```sh 34 | python 1_convert_pd.py; 35 | python 2_remap_id.py 36 | ``` 37 | 38 | ## Training and Evaluation 39 | This implementation not only contains the DIN method, but also provides all the competitors' method, including Wide&Deep, PNN, DeepFM. The training procedures of all method is as follows: 40 | * Step 1: Choose a method and enter the folder. 41 | ``` 42 | cd din; 43 | ``` 44 | Alternatively, you could also run other competitors's methods directly by `cd deepFM` `cd pnn` `cd wide_deep`, 45 | and follow the same instructions below. 46 | 47 | * Step 2: Building the dataset adapted to current method. 48 | ``` 49 | python build_dataset.py 50 | ``` 51 | * Step 3: Start training and evaluating using default arguments in background mode. 52 | ``` 53 | python train.py >log.txt 2>&1 & 54 | ``` 55 | * Step 4: Check training and evaluating progress. 56 | ``` 57 | tail -f log.txt 58 | tensorboard --logdir=save_path 59 | ``` 60 | 61 | ## Dice 62 | There is also an implementation of Dice in folder 'din', you can try dice following the code annotation in `din/model.py` or replacing model.py with model\_dice.py 63 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/build_dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pickle 3 | 4 | random.seed(1234) 5 | 6 | with open('data/remap.pkl', 'rb') as f: 7 | reviews_df = pickle.load(f) 8 | cate_list = pickle.load(f) 9 | user_count, item_count, cate_count, example_count = pickle.load(f) 10 | 11 | train_set = [] 12 | test_set = [] 13 | for reviewerID, hist in reviews_df.groupby('reviewerID'): 14 | pos_list = hist['asin'].tolist() 15 | def gen_neg(): 16 | neg = pos_list[0] 17 | while neg in pos_list: 18 | neg = random.randint(0, item_count-1) 19 | return neg 20 | neg_list = [gen_neg() for i in range(len(pos_list))] 21 | 22 | for i in range(1, len(pos_list)): 23 | hist = pos_list[:i] 24 | if i != len(pos_list) - 1: 25 | train_set.append((reviewerID, hist, pos_list[i], 1)) 26 | train_set.append((reviewerID, hist, neg_list[i], 0)) 27 | else: 28 | label = (pos_list[i], neg_list[i]) 29 | test_set.append((reviewerID, hist, label)) 30 | 31 | random.shuffle(train_set) 32 | random.shuffle(test_set) 33 | 34 | assert len(test_set) == user_count 35 | # assert(len(test_set) + len(train_set) // 2 == reviews_df.shape[0]) 36 | 37 | with open('dataset.pkl', 'wb') as f: 38 | pickle.dump(train_set, f, pickle.HIGHEST_PROTOCOL) 39 | pickle.dump(test_set, f, pickle.HIGHEST_PROTOCOL) 40 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL) 41 | pickle.dump((user_count, item_count, cate_count), f, pickle.HIGHEST_PROTOCOL) 42 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/convert_pd.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import pandas as pd 3 | 4 | 5 | def to_df(file_path): 6 | with open(file_path, 'r') as fin: 7 | df = {} 8 | i = 0 9 | for line in fin: 10 | df[i] = eval(line) 11 | i += 1 12 | df = pd.DataFrame.from_dict(df, orient='index') 13 | return df 14 | 15 | 16 | reviews_df = to_df('data/reviews_Electronics_5.json') 17 | with open('data/reviews.pkl', 'wb') as f: 18 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) 19 | 20 | meta_df = to_df('data/meta_Electronics.json') 21 | meta_df = meta_df[meta_df['asin'].isin(reviews_df['asin'].unique())] 22 | meta_df = meta_df.reset_index(drop=True) 23 | with open('data/meta.pkl', 'wb') as f: 24 | pickle.dump(meta_df, f, pickle.HIGHEST_PROTOCOL) 25 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DataInput: 4 | def __init__(self, data, batch_size): 5 | 6 | self.batch_size = batch_size 7 | self.data = data 8 | self.epoch_size = len(self.data) // self.batch_size 9 | if self.epoch_size * self.batch_size < len(self.data): 10 | self.epoch_size += 1 11 | self.i = 0 12 | 13 | def __iter__(self): 14 | return self 15 | 16 | def __next__(self): 17 | 18 | if self.i == self.epoch_size: 19 | raise StopIteration 20 | 21 | ts = self.data[self.i * self.batch_size : min((self.i+1) * self.batch_size, 22 | len(self.data))] 23 | self.i += 1 24 | 25 | u, i, y, sl = [], [], [], [] 26 | for t in ts: 27 | u.append(t[0]) 28 | i.append(t[2]) 29 | y.append(t[3]) 30 | sl.append(len(t[1])) 31 | max_sl = max(sl) 32 | 33 | hist_i = np.zeros([len(ts), max_sl], np.int64) 34 | 35 | k = 0 36 | for t in ts: 37 | for l in range(len(t[1])): 38 | hist_i[k][l] = t[1][l] 39 | k += 1 40 | 41 | return self.i, (u, i, y, hist_i, sl) 42 | 43 | class DataInputTest: 44 | def __init__(self,data,batch_size): 45 | self.batch_size = batch_size 46 | self.data = data 47 | self.epoch_size = len(self.data) // self.batch_size 48 | if self.epoch_size * self.batch_size < len(self.data): 49 | self.epoch_size += 1 50 | self.i = 0 51 | 52 | 53 | def __iter__(self): 54 | return self 55 | 56 | def __next__(self): 57 | if self.i == self.epoch_size: 58 | raise StopIteration 59 | ts = self.data[self.i*self.batch_size:min((self.i+1) * self.batch_size,len(self.data))] 60 | self.i += 1 61 | 62 | u,i,j,sl = [],[],[],[] 63 | 64 | for t in ts: 65 | u.append(t[0]) 66 | i.append(t[2][0]) 67 | j.append(t[2][1]) 68 | sl.append(len(t[1])) 69 | max_sl = max(sl) 70 | 71 | hist_i = np.zeros([len(ts), max_sl], np.int64) 72 | 73 | k = 0 74 | for t in ts: 75 | for l in range(len(t[1])): 76 | hist_i[k][l] = t[1][l] 77 | k += 1 78 | 79 | return self.i, (u, i, j, hist_i, sl) 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/remap_id.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pickle 3 | import numpy as np 4 | 5 | random.seed(1234) 6 | 7 | with open('data/reviews.pkl', 'rb') as f: 8 | reviews_df = pickle.load(f) 9 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']] 10 | with open('data/meta.pkl', 'rb') as f: 11 | meta_df = pickle.load(f) 12 | meta_df = meta_df[['asin', 'categories']] 13 | meta_df['categories'] = meta_df['categories'].map(lambda x: x[-1][-1]) 14 | 15 | 16 | def build_map(df, col_name): 17 | key = sorted(df[col_name].unique().tolist()) 18 | m = dict(zip(key, range(len(key)))) 19 | df[col_name] = df[col_name].map(lambda x: m[x]) 20 | return m, key 21 | 22 | 23 | asin_map, asin_key = build_map(meta_df, 'asin') 24 | cate_map, cate_key = build_map(meta_df, 'categories') 25 | revi_map, revi_key = build_map(reviews_df, 'reviewerID') 26 | 27 | user_count, item_count, cate_count, example_count = \ 28 | len(revi_map), len(asin_map), len(cate_map), reviews_df.shape[0] 29 | print('user_count: %d\titem_count: %d\tcate_count: %d\texample_count: %d' % 30 | (user_count, item_count, cate_count, example_count)) 31 | 32 | meta_df = meta_df.sort_values('asin') 33 | meta_df = meta_df.reset_index(drop=True) 34 | reviews_df['asin'] = reviews_df['asin'].map(lambda x: asin_map[x]) 35 | reviews_df = reviews_df.sort_values(['reviewerID', 'unixReviewTime']) 36 | reviews_df = reviews_df.reset_index(drop=True) 37 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']] 38 | 39 | cate_list = [meta_df['categories'][i] for i in range(len(asin_map))] 40 | cate_list = np.array(cate_list, dtype=np.int32) 41 | 42 | with open('data/remap.pkl', 'wb') as f: 43 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) # uid, iid 44 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL) # cid of iid line 45 | pickle.dump((user_count, item_count, cate_count, example_count), 46 | f, pickle.HIGHEST_PROTOCOL) 47 | pickle.dump((asin_key, cate_key, revi_key), f, pickle.HIGHEST_PROTOCOL) 48 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/train.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import random 6 | 7 | from model import Model 8 | from input import DataInput, DataInputTest 9 | 10 | import time 11 | 12 | import sys 13 | 14 | 15 | random.seed(1234) 16 | np.random.seed(1234) 17 | tf.set_random_seed(1234) 18 | 19 | train_batch_size = 32 20 | test_batch_size = 512 21 | 22 | with open('dataset.pkl', 'rb') as f: 23 | train_set = pickle.load(f) 24 | test_set = pickle.load(f) 25 | cate_list = pickle.load(f) 26 | user_count, item_count, cate_count = pickle.load(f) 27 | 28 | print(user_count, item_count, cate_count) 29 | 30 | # catelist是item到cate的转换关系 31 | print(len(cate_list)) 32 | 33 | print(test_set[:5]) 34 | 35 | best_auc = 0.0 36 | 37 | def calc_auc(raw_arr): 38 | arr = sorted(raw_arr,key=lambda d:d[2]) 39 | auc = 0.0 40 | fp1,tp1,fp2,tp2 = 0.0,0.0,0.0,0.0 41 | 42 | for record in arr: 43 | fp2 += record[0] 44 | tp2 += record[1] 45 | 46 | auc += (fp2 - fp1) * (tp2 + tp1) 47 | fp1,tp1 = fp2,tp2 48 | 49 | threshold = len(arr) - 1e-3 50 | if tp2 > threshold or fp2 > threshold: 51 | return -0.5 52 | 53 | if tp2 * fp2 > 0.0: # normal auc 54 | return (1.0 - auc / (2.0 * tp2 * fp2)) 55 | else: 56 | return None 57 | 58 | 59 | def _auc_arr(score): 60 | score_p = score[:,0] 61 | score_n = score[:,1] 62 | 63 | score_arr = [] 64 | for s in score_p.tolist(): 65 | score_arr.append([0,1,s]) 66 | for s in score_n.tolist(): 67 | score_arr.append([1,0,s]) 68 | return score_arr 69 | 70 | def _eval(sess,model): 71 | auc_sum = 0.0 72 | score_arr = [] 73 | for _,uij in DataInputTest(test_set,test_batch_size): 74 | auc_,score_ = model.eval(sess,uij) 75 | score_arr += _auc_arr(score_) 76 | auc_sum += auc_ * len(uij[0]) 77 | 78 | test_gauc = auc_sum / len(test_set) 79 | 80 | Auc = calc_auc(score_arr) 81 | 82 | global best_auc 83 | if best_auc < test_gauc: 84 | best_auc = test_gauc 85 | model.save(sess, 'save_path/ckpt') 86 | return test_gauc, Auc 87 | 88 | 89 | 90 | with tf.Session() as sess: 91 | model = Model(user_count,item_count,cate_count,cate_list) 92 | sess.run(tf.global_variables_initializer()) 93 | sess.run(tf.local_variables_initializer()) 94 | 95 | lr = 1.0 96 | 97 | start_time = time.time() 98 | 99 | for _ in range(50): 100 | 101 | random.shuffle(train_set) 102 | 103 | epoch_size = round(len(train_set)/ train_batch_size) 104 | 105 | loss_sum = 0.0 106 | 107 | for _,uij in DataInput(train_set,train_batch_size): 108 | loss = model.train(sess,uij,lr) 109 | loss_sum += loss 110 | 111 | if model.global_step.eval() % 10 == 0: 112 | test_gauc,Auc = _eval(sess,model) 113 | 114 | if model.global_step.eval() % 1000 == 0: 115 | test_gauc, Auc = _eval(sess, model) 116 | print('Epoch %d Global_step %d\tTrain_loss: %.4f\tEval_GAUC: %.4f\tEval_AUC: %.4f' % 117 | (model.global_epoch_step.eval(), model.global_step.eval(), 118 | loss_sum / 1000, test_gauc, Auc)) 119 | sys.stdout.flush() 120 | loss_sum = 0.0 121 | 122 | if model.global_step.eval() % 336000 == 0: 123 | lr = 0.1 124 | 125 | print('Epoch %d DONE\tCost time: %.2f' % 126 | (model.global_epoch_step.eval(), time.time() - start_time)) 127 | sys.stdout.flush() 128 | model.global_epoch_step_op.eval() 129 | 130 | print('best test_gauc:', best_auc) 131 | sys.stdout.flush() 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/utils/0_download_raw.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | cd ../raw_data 4 | wget -c http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz 5 | gzip -d reviews_Electronics_5.json.gz 6 | wget -c http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz 7 | gzip -d meta_Electronics.json.gz 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/utils/1_convert_pd.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import pandas as pd 3 | 4 | def to_df(file_path): 5 | with open(file_path, 'r') as fin: 6 | df = {} 7 | i = 0 8 | for line in fin: 9 | df[i] = eval(line) 10 | i += 1 11 | df = pd.DataFrame.from_dict(df, orient='index') 12 | return df 13 | 14 | reviews_df = to_df('../raw_data/reviews_Electronics_5.json') 15 | with open('../raw_data/reviews.pkl', 'wb') as f: 16 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) 17 | 18 | meta_df = to_df('../raw_data/meta_Electronics.json') 19 | meta_df = meta_df[meta_df['asin'].isin(reviews_df['asin'].unique())] 20 | meta_df = meta_df.reset_index(drop=True) 21 | with open('../raw_data/meta.pkl', 'wb') as f: 22 | pickle.dump(meta_df, f, pickle.HIGHEST_PROTOCOL) 23 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/utils/2_remap_id.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pickle 3 | import numpy as np 4 | 5 | random.seed(1234) 6 | 7 | with open('../raw_data/reviews.pkl', 'rb') as f: 8 | reviews_df = pickle.load(f) 9 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']] 10 | with open('../raw_data/meta.pkl', 'rb') as f: 11 | meta_df = pickle.load(f) 12 | meta_df = meta_df[['asin', 'categories']] 13 | meta_df['categories'] = meta_df['categories'].map(lambda x: x[-1][-1]) 14 | 15 | 16 | def build_map(df, col_name): 17 | key = sorted(df[col_name].unique().tolist()) 18 | m = dict(zip(key, range(len(key)))) 19 | df[col_name] = df[col_name].map(lambda x: m[x]) 20 | return m, key 21 | 22 | asin_map, asin_key = build_map(meta_df, 'asin') 23 | cate_map, cate_key = build_map(meta_df, 'categories') 24 | revi_map, revi_key = build_map(reviews_df, 'reviewerID') 25 | 26 | user_count, item_count, cate_count, example_count =\ 27 | len(revi_map), len(asin_map), len(cate_map), reviews_df.shape[0] 28 | print('user_count: %d\titem_count: %d\tcate_count: %d\texample_count: %d' % 29 | (user_count, item_count, cate_count, example_count)) 30 | 31 | meta_df = meta_df.sort_values('asin') 32 | meta_df = meta_df.reset_index(drop=True) 33 | reviews_df['asin'] = reviews_df['asin'].map(lambda x: asin_map[x]) 34 | reviews_df = reviews_df.sort_values(['reviewerID', 'unixReviewTime']) 35 | reviews_df = reviews_df.reset_index(drop=True) 36 | reviews_df = reviews_df[['reviewerID', 'asin', 'unixReviewTime']] 37 | 38 | cate_list = [meta_df['categories'][i] for i in range(len(asin_map))] 39 | cate_list = np.array(cate_list, dtype=np.int32) 40 | 41 | 42 | with open('../raw_data/remap.pkl', 'wb') as f: 43 | pickle.dump(reviews_df, f, pickle.HIGHEST_PROTOCOL) # uid, iid 44 | pickle.dump(cate_list, f, pickle.HIGHEST_PROTOCOL) # cid of iid line 45 | pickle.dump((user_count, item_count, cate_count, example_count), 46 | f, pickle.HIGHEST_PROTOCOL) 47 | pickle.dump((asin_key, cate_key, revi_key), f, pickle.HIGHEST_PROTOCOL) 48 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DIN_Demo/utils/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DIN_Demo/utils/auc.png -------------------------------------------------------------------------------- /ctr_of_recommendation/DSIN_Demo/config.py: -------------------------------------------------------------------------------- 1 | FRAC = 0.25 2 | 3 | DIN_SESS_MAX_LEN = 50 4 | 5 | DSIN_SESS_COUNT = 5 6 | DSIN_SESS_MAX_LEN = 10 -------------------------------------------------------------------------------- /ctr_of_recommendation/DSIN_Demo/gen_sampled_data.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import LabelEncoder 7 | 8 | from config import FRAC 9 | 10 | if __name__ == "__main__": 11 | 12 | user = pd.read_csv('../data/user_profile.csv') 13 | sample = pd.read_csv('../data/raw_sample.csv') 14 | 15 | if not os.path.exists('../sampled_data/'): 16 | os.mkdir('../sampled_data/') 17 | 18 | if os.path.exists('../sampled_data/user_profile_' + str(FRAC) + '_.pkl') and os.path.exists( 19 | '../sampled_data/raw_sample_' + str(FRAC) + '_.pkl'): 20 | user_sub = pd.read_pickle( 21 | '../sampled_data/user_profile_' + str(FRAC) + '_.pkl') 22 | sample_sub = pd.read_pickle( 23 | '../sampled_data/raw_sample_' + str(FRAC) + '_.pkl') 24 | else: 25 | 26 | if FRAC < 1.0: 27 | user_sub = user.sample(frac=FRAC, random_state=1024) 28 | else: 29 | user_sub = user 30 | sample_sub = sample.loc[sample.user.isin(user_sub.userid.unique())] 31 | pd.to_pickle(user_sub, '../sampled_data/user_profile_' + 32 | str(FRAC) + '.pkl') 33 | pd.to_pickle(sample_sub, '../sampled_data/raw_sample_' + 34 | str(FRAC) + '.pkl') 35 | 36 | if os.path.exists('../data/behavior_log_pv.pkl'): 37 | log = pd.read_pickle('../data/behavior_log_pv.pkl') 38 | else: 39 | log = pd.read_csv('../data/behavior_log.csv') 40 | log = log.loc[log['btag'] == 'pv'] 41 | pd.to_pickle(log, '../data/behavior_log_pv.pkl') 42 | 43 | userset = user_sub.userid.unique() 44 | log = log.loc[log.user.isin(userset)] 45 | # pd.to_pickle(log, '../sampled_data/behavior_log_pv_user_filter_' + str(FRAC) + '_.pkl') 46 | 47 | ad = pd.read_csv('../data/ad_feature.csv') 48 | ad['brand'] = ad['brand'].fillna(-1) 49 | 50 | lbe = LabelEncoder() 51 | # unique_cate_id = ad['cate_id'].unique() 52 | # log = log.loc[log.cate.isin(unique_cate_id)] 53 | 54 | unique_cate_id = np.concatenate( 55 | (ad['cate_id'].unique(), log['cate'].unique())) 56 | 57 | lbe.fit(unique_cate_id) 58 | ad['cate_id'] = lbe.transform(ad['cate_id']) + 1 59 | log['cate'] = lbe.transform(log['cate']) + 1 60 | 61 | lbe = LabelEncoder() 62 | # unique_brand = np.ad['brand'].unique() 63 | # log = log.loc[log.brand.isin(unique_brand)] 64 | 65 | unique_brand = np.concatenate( 66 | (ad['brand'].unique(), log['brand'].unique())) 67 | 68 | lbe.fit(unique_brand) 69 | ad['brand'] = lbe.transform(ad['brand']) + 1 70 | log['brand'] = lbe.transform(log['brand']) + 1 71 | 72 | log = log.loc[log.user.isin(sample_sub.user.unique())] 73 | log.drop(columns=['btag'], inplace=True) 74 | log = log.loc[log['time_stamp'] > 0] 75 | 76 | pd.to_pickle(ad, '../sampled_data/ad_feature_enc_' + str(FRAC) + '.pkl') 77 | pd.to_pickle( 78 | log, '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl') 79 | 80 | print("0_gen_sampled_data done") 81 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DSIN_Demo/gen_sessions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import gc 3 | 4 | import pandas as pd 5 | from joblib import Parallel, delayed 6 | 7 | from config import FRAC 8 | 9 | 10 | def gen_session_list_dsin(uid, t): 11 | t.sort_values('time_stamp', inplace=True, ascending=True) 12 | last_time = 1483574401 # pd.to_datetime("2017-01-05 00:00:01") 13 | session_list = [] 14 | session = [] 15 | for row in t.iterrows(): 16 | time_stamp = row[1]['time_stamp'] 17 | # pd_time = pd.to_datetime(timestamp_datetime(time_stamp)) 18 | delta = time_stamp - last_time 19 | cate_id = row[1]['cate'] 20 | brand = row[1]['brand'] 21 | # delta.total_seconds() 22 | if delta > 30 * 60: # Session begin when current behavior and the last behavior are separated by more than 30 minutes. 23 | if len(session) > 2: # Only use sessions that have >2 behaviors 24 | session_list.append(session[:]) 25 | session = [] 26 | 27 | session.append((cate_id, brand, time_stamp)) 28 | last_time = time_stamp 29 | if len(session) > 2: 30 | session_list.append(session[:]) 31 | return uid, session_list 32 | 33 | 34 | def gen_session_list_din(uid, t): 35 | t.sort_values('time_stamp', inplace=True, ascending=True) 36 | session_list = [] 37 | session = [] 38 | for row in t.iterrows(): 39 | time_stamp = row[1]['time_stamp'] 40 | # pd_time = pd.to_datetime(timestamp_datetime()) 41 | # delta = pd_time - last_time 42 | cate_id = row[1]['cate'] 43 | brand = row[1]['brand'] 44 | session.append((cate_id, brand, time_stamp)) 45 | 46 | if len(session) > 2: 47 | session_list.append(session[:]) 48 | return uid, session_list 49 | 50 | 51 | def applyParallel(df_grouped, func, n_jobs, backend='multiprocessing'): 52 | """Use Parallel and delayed """ # backend='threading' 53 | results = Parallel(n_jobs=n_jobs, verbose=4, backend=backend)( 54 | delayed(func)(name, group) for name, group in df_grouped) 55 | 56 | return {k: v for k, v in results} 57 | 58 | 59 | def gen_user_hist_sessions(model, FRAC=0.25): 60 | if model not in ['din', 'dsin']: 61 | raise ValueError('model must be din or dmsn') 62 | 63 | print("gen " + model + " hist sess", FRAC) 64 | name = '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl' 65 | data = pd.read_pickle(name) 66 | data = data.loc[data.time_stamp >= 1493769600] # 0503-0513 67 | # 0504~1493856000 68 | # 0503 1493769600 69 | 70 | user = pd.read_pickle('../sampled_data/user_profile_' + str(FRAC) + '.pkl') 71 | 72 | n_samples = user.shape[0] 73 | print(n_samples) 74 | batch_size = 150000 75 | iters = (n_samples - 1) // batch_size + 1 76 | 77 | print("total", iters, "iters", "batch_size", batch_size) 78 | for i in range(0, iters): 79 | target_user = user['userid'].values[i * batch_size:(i + 1) * batch_size] 80 | sub_data = data.loc[data.user.isin(target_user)] 81 | print(i, 'iter start') 82 | df_grouped = sub_data.groupby('user') 83 | if model == 'din': 84 | user_hist_session = applyParallel( 85 | df_grouped, gen_session_list_din, n_jobs=20, backend='loky') 86 | else: 87 | user_hist_session = applyParallel( 88 | df_grouped, gen_session_list_dsin, n_jobs=20, backend='multiprocessing') 89 | pd.to_pickle(user_hist_session, '../sampled_data/user_hist_session_' + 90 | str(FRAC) + '_' + model + '_' + str(i) + '.pkl') 91 | print(i, 'pickled') 92 | del user_hist_session 93 | gc.collect() 94 | print(i, 'del') 95 | 96 | print("1_gen " + model + " hist sess done") 97 | 98 | 99 | if __name__ == "__main__": 100 | gen_user_hist_sessions('din', FRAC) 101 | gen_user_hist_sessions('dsin', FRAC) 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DSIN_Demo/train_dsin.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import tensorflow as tf 5 | from sklearn.metrics import log_loss, roc_auc_score 6 | from tensorflow.python.keras import backend as K 7 | 8 | from config import DSIN_SESS_COUNT, DSIN_SESS_MAX_LEN, FRAC 9 | from dsin import DSIN 10 | 11 | 12 | 13 | if __name__ == '__main__': 14 | SESS_COUNT = DSIN_SESS_COUNT 15 | SESS_MAX_LEN = DSIN_SESS_MAX_LEN 16 | 17 | # 离散特征和连续特征的名称和维度,维度指不同取值总共的个数 18 | fd = pd.read_pickle('../model_input/dsin_fd_' + 19 | str(FRAC) + '_' + str(SESS_COUNT) + '.pkl') 20 | 21 | # 模型的输入 22 | model_input = pd.read_pickle( 23 | '../model_input/dsin_input_' + str(FRAC) + '_' + str(SESS_COUNT) + '.pkl') 24 | 25 | # 模型的label 26 | label = pd.read_pickle('../model_input/dsin_label_' + 27 | str(FRAC) + '_' + str(SESS_COUNT) + '.pkl') 28 | 29 | sample_sub = pd.read_pickle( 30 | '../sampled_data/raw_sample_' + str(FRAC) + '.pkl') 31 | 32 | # 划分训练集和测试集 33 | sample_sub['idx'] = list(range(sample_sub.shape[0])) 34 | train_idx = sample_sub.loc[sample_sub.time_stamp < 35 | 1494633600, 'idx'].values 36 | test_idx = sample_sub.loc[sample_sub.time_stamp >= 37 | 1494633600, 'idx'].values 38 | 39 | train_input = [i[train_idx] for i in model_input] 40 | test_input = [i[test_idx] for i in model_input] 41 | 42 | train_label = label[train_idx] 43 | test_label = label[test_idx] 44 | 45 | sess_count = SESS_COUNT 46 | sess_len_max = SESS_MAX_LEN 47 | BATCH_SIZE = 4096 48 | 49 | sess_feature = ['cate_id', 'brand'] 50 | TEST_BATCH_SIZE = 2 ** 16 51 | 52 | model = DSIN(fd, sess_feature, embedding_size=4, sess_max_count=sess_count, 53 | sess_len_max=sess_len_max, dnn_hidden_units=(200, 80), att_head_num=8, 54 | att_embedding_size=1, bias_encoding=False) 55 | 56 | model.compile('adagrad', 'binary_crossentropy', 57 | metrics=['binary_crossentropy', ]) 58 | 59 | hist_ = model.fit(train_input, train_label, batch_size=BATCH_SIZE, 60 | epochs=1, initial_epoch=0, verbose=1, ) 61 | 62 | pred_ans = model.predict(test_input, TEST_BATCH_SIZE) 63 | 64 | print() 65 | print("test LogLoss", round(log_loss(test_label, pred_ans), 4), "test AUC", 66 | round(roc_auc_score(test_label, pred_ans), 4)) -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/DataReader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class FeatureDictionary(object): 4 | def __init__(self,trainfile=None,testfile=None, 5 | dfTrain=None,dfTest=None,numeric_cols=[], 6 | ignore_cols=[]): 7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set" 8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set" 9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set" 10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set" 11 | 12 | self.trainfile = trainfile 13 | self.testfile = testfile 14 | self.dfTrain = dfTrain 15 | self.dfTest = dfTest 16 | self.numeric_cols = numeric_cols 17 | self.ignore_cols = ignore_cols 18 | self.gen_feat_dict() 19 | 20 | 21 | 22 | 23 | def gen_feat_dict(self): 24 | if self.dfTrain is None: 25 | dfTrain = pd.read_csv(self.trainfile) 26 | 27 | else: 28 | dfTrain = self.dfTrain 29 | 30 | if self.dfTest is None: 31 | dfTest = pd.read_csv(self.testfile) 32 | 33 | else: 34 | dfTest = self.dfTest 35 | 36 | df = pd.concat([dfTrain,dfTest]) 37 | 38 | self.feat_dict = {} 39 | tc = 0 40 | for col in df.columns: 41 | if col in self.ignore_cols: 42 | continue 43 | if col in self.numeric_cols: 44 | self.feat_dict[col] = tc 45 | tc += 1 46 | 47 | else: 48 | us = df[col].unique() 49 | print(us) 50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc))) 51 | tc += len(us) 52 | 53 | self.feat_dim = tc 54 | 55 | 56 | class DataParser(object): 57 | def __init__(self,feat_dict): 58 | self.feat_dict = feat_dict 59 | 60 | def parse(self,infile=None,df=None,has_label=False): 61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set" 62 | assert not ((infile is not None) and (df is not None)), "only one can be set" 63 | 64 | 65 | if infile is None: 66 | dfi = df.copy() 67 | else: 68 | dfi = pd.read_csv(infile) 69 | 70 | if has_label: 71 | y = dfi['target'].values.tolist() 72 | dfi.drop(['id','target'],axis=1,inplace=True) 73 | else: 74 | ids = dfi['id'].values.tolist() 75 | dfi.drop(['id'],axis=1,inplace=True) 76 | # dfi for feature index 77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) 78 | dfv = dfi.copy() 79 | for col in dfi.columns: 80 | if col in self.feat_dict.ignore_cols: 81 | dfi.drop(col,axis=1,inplace=True) 82 | dfv.drop(col,axis=1,inplace=True) 83 | continue 84 | if col in self.feat_dict.numeric_cols: 85 | dfi[col] = self.feat_dict.feat_dict[col] 86 | else: 87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) 88 | dfv[col] = 1. 89 | 90 | xi = dfi.values.tolist() 91 | xv = dfv.values.tolist() 92 | 93 | if has_label: 94 | return xi,xv,y 95 | else: 96 | return xi,xv,ids 97 | 98 | 99 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_FILE = "data/train.csv" 2 | TEST_FILE = "data/test.csv" 3 | 4 | SUB_DIR = "output" 5 | 6 | 7 | NUM_SPLITS = 3 8 | RANDOM_SEED = 2017 9 | 10 | # types of columns of the dataset dataframe 11 | CATEGORICAL_COLS = [ 12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat', 13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 16 | # 'ps_car_10_cat', 'ps_car_11_cat', 17 | ] 18 | 19 | NUMERIC_COLS = [ 20 | # # binary 21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin", 22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin", 23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin", 24 | # "ps_ind_17_bin", "ps_ind_18_bin", 25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin", 27 | # numeric 28 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 30 | 31 | # feature engineering 32 | "missing_feat", "ps_car_13_x_ps_reg_03", 33 | ] 34 | 35 | IGNORE_COLS = [ 36 | "id", "target", 37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04", 38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", 39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12", 40 | "ps_calc_13", "ps_calc_14", 41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin" 43 | ] 44 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/fig/DNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/DNN.png -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/fig/DeepFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/DeepFM.png -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/fig/FM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/fig/FM.png -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/metrics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def gini(actual, pred): 5 | assert (len(actual) == len(pred)) 6 | all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float) 7 | all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))] 8 | totalLosses = all[:, 0].sum() 9 | giniSum = all[:, 0].cumsum().sum() / totalLosses 10 | 11 | giniSum -= (len(actual) + 1) / 2. 12 | return giniSum / len(actual) 13 | 14 | def gini_norm(actual, pred): 15 | return gini(actual, pred) / gini(actual, actual) 16 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/DNN_Mean-0.31183_Std0.29369.csv: -------------------------------------------------------------------------------- 1 | id,target 2 | 0,0.54321 3 | 1,0.54492 4 | 2,0.54194 5 | 3,0.54175 6 | 4,0.54266 7 | 5,0.54154 8 | 6,0.54395 9 | 8,0.54214 10 | 10,0.54383 11 | 11,0.54348 12 | 12,0.54175 13 | 14,0.54253 14 | 15,0.54449 15 | 18,0.54221 16 | 21,0.54521 17 | 23,0.54488 18 | 24,0.54286 19 | 25,0.54416 20 | 27,0.54511 21 | 29,0.54365 22 | 30,0.54272 23 | 31,0.54500 24 | 32,0.54485 25 | 33,0.54332 26 | 37,0.54277 27 | 38,0.54376 28 | 39,0.54478 29 | 40,0.54178 30 | 41,0.54429 31 | 42,0.54348 32 | 44,0.54377 33 | 45,0.54288 34 | 47,0.54235 35 | 49,0.54258 36 | 51,0.54283 37 | 52,0.54266 38 | 53,0.54156 39 | 54,0.54426 40 | 55,0.54256 41 | 56,0.54520 42 | 57,0.54370 43 | 59,0.54359 44 | 60,0.54405 45 | 62,0.54316 46 | 63,0.54492 47 | 67,0.54511 48 | 68,0.54221 49 | 69,0.54548 50 | 70,0.54249 51 | 71,0.54415 52 | 73,0.54462 53 | 75,0.54333 54 | 76,0.54298 55 | 81,0.54271 56 | 82,0.54458 57 | 83,0.54240 58 | 86,0.54130 59 | 87,0.54291 60 | 88,0.54318 61 | 91,0.54448 62 | 92,0.54372 63 | 94,0.54307 64 | 97,0.54280 65 | 100,0.54605 66 | 102,0.54389 67 | 103,0.54320 68 | 105,0.54275 69 | 106,0.54410 70 | 108,0.54228 71 | 113,0.54418 72 | 114,0.54378 73 | 115,0.54324 74 | 118,0.54449 75 | 122,0.54158 76 | 124,0.54290 77 | 126,0.54196 78 | 128,0.54516 79 | 129,0.54435 80 | 130,0.54562 81 | 131,0.54449 82 | 132,0.54567 83 | 133,0.54219 84 | 134,0.54506 85 | 135,0.54226 86 | 136,0.54301 87 | 139,0.54165 88 | 140,0.54528 89 | 141,0.54275 90 | 146,0.54209 91 | 148,0.54334 92 | 151,0.54195 93 | 152,0.54450 94 | 154,0.54214 95 | 157,0.54393 96 | 158,0.54286 97 | 159,0.54520 98 | 161,0.54310 99 | 164,0.54332 100 | 165,0.54339 101 | 167,0.54313 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean-0.11470_Std0.37335.csv: -------------------------------------------------------------------------------- 1 | id,target 2 | 0,0.46803 3 | 1,0.55377 4 | 2,0.53959 5 | 3,0.48240 6 | 4,0.42640 7 | 5,0.53783 8 | 6,0.43397 9 | 8,0.54862 10 | 10,0.39992 11 | 11,0.48496 12 | 12,0.56211 13 | 14,0.48791 14 | 15,0.40421 15 | 18,0.38874 16 | 21,0.48275 17 | 23,0.36596 18 | 24,0.54895 19 | 25,0.55286 20 | 27,0.46398 21 | 29,0.25796 22 | 30,0.52880 23 | 31,0.53623 24 | 32,0.38785 25 | 33,0.49019 26 | 37,0.53059 27 | 38,0.32213 28 | 39,0.48938 29 | 40,0.44188 30 | 41,0.39470 31 | 42,0.43526 32 | 44,0.38037 33 | 45,0.44053 34 | 47,0.47693 35 | 49,0.43951 36 | 51,0.52558 37 | 52,0.56112 38 | 53,0.63015 39 | 54,0.28074 40 | 55,0.50253 41 | 56,0.36943 42 | 57,0.41124 43 | 59,0.47449 44 | 60,0.41512 45 | 62,0.45376 46 | 63,0.56464 47 | 67,0.48383 48 | 68,0.44448 49 | 69,0.43281 50 | 70,0.41257 51 | 71,0.36101 52 | 73,0.24134 53 | 75,0.48104 54 | 76,0.41155 55 | 81,0.52558 56 | 82,0.40699 57 | 83,0.35711 58 | 86,0.36253 59 | 87,0.42458 60 | 88,0.57573 61 | 91,0.50545 62 | 92,0.57203 63 | 94,0.53472 64 | 97,0.47725 65 | 100,0.42449 66 | 102,0.49121 67 | 103,0.48863 68 | 105,0.59440 69 | 106,0.40794 70 | 108,0.49273 71 | 113,0.33953 72 | 114,0.50476 73 | 115,0.53934 74 | 118,0.48991 75 | 122,0.50319 76 | 124,0.41910 77 | 126,0.41064 78 | 128,0.36258 79 | 129,0.31102 80 | 130,0.45700 81 | 131,0.55222 82 | 132,0.47241 83 | 133,0.47101 84 | 134,0.45344 85 | 135,0.55308 86 | 136,0.50106 87 | 139,0.42091 88 | 140,0.44550 89 | 141,0.42207 90 | 146,0.46423 91 | 148,0.52868 92 | 151,0.44960 93 | 152,0.26475 94 | 154,0.56421 95 | 157,0.58842 96 | 158,0.42789 97 | 159,0.43978 98 | 161,0.62290 99 | 164,0.54502 100 | 165,0.38185 101 | 167,0.53922 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.01434_Std0.10176.csv: -------------------------------------------------------------------------------- 1 | id,target 2 | 0,0.32278 3 | 1,0.41663 4 | 2,0.44417 5 | 3,0.47512 6 | 4,0.52361 7 | 5,0.33677 8 | 6,0.44370 9 | 8,0.30100 10 | 10,0.48097 11 | 11,0.52027 12 | 12,0.26543 13 | 14,0.40398 14 | 15,0.46376 15 | 18,0.38902 16 | 21,0.35526 17 | 23,0.41269 18 | 24,0.37623 19 | 25,0.30560 20 | 27,0.41068 21 | 29,0.49968 22 | 30,0.48046 23 | 31,0.53911 24 | 32,0.37760 25 | 33,0.42462 26 | 37,0.43910 27 | 38,0.43226 28 | 39,0.40951 29 | 40,0.42573 30 | 41,0.38593 31 | 42,0.45659 32 | 44,0.42400 33 | 45,0.46563 34 | 47,0.41856 35 | 49,0.43669 36 | 51,0.39470 37 | 52,0.35710 38 | 53,0.35468 39 | 54,0.58721 40 | 55,0.34572 41 | 56,0.49496 42 | 57,0.52123 43 | 59,0.43579 44 | 60,0.37308 45 | 62,0.36949 46 | 63,0.36458 47 | 67,0.40002 48 | 68,0.40630 49 | 69,0.51984 50 | 70,0.43685 51 | 71,0.34467 52 | 73,0.49609 53 | 75,0.42494 54 | 76,0.36640 55 | 81,0.41558 56 | 82,0.49456 57 | 83,0.41528 58 | 86,0.38741 59 | 87,0.42377 60 | 88,0.49288 61 | 91,0.43845 62 | 92,0.50188 63 | 94,0.38807 64 | 97,0.43247 65 | 100,0.37401 66 | 102,0.36822 67 | 103,0.39734 68 | 105,0.38886 69 | 106,0.40349 70 | 108,0.29820 71 | 113,0.38590 72 | 114,0.53072 73 | 115,0.37515 74 | 118,0.34776 75 | 122,0.35378 76 | 124,0.35501 77 | 126,0.36031 78 | 128,0.36464 79 | 129,0.48796 80 | 130,0.40816 81 | 131,0.44641 82 | 132,0.40488 83 | 133,0.39336 84 | 134,0.51089 85 | 135,0.49477 86 | 136,0.35754 87 | 139,0.46074 88 | 140,0.38236 89 | 141,0.37077 90 | 146,0.29805 91 | 148,0.43685 92 | 151,0.45538 93 | 152,0.35027 94 | 154,0.35761 95 | 157,0.36037 96 | 158,0.39519 97 | 159,0.33552 98 | 161,0.41159 99 | 164,0.42803 100 | 165,0.44548 101 | 167,0.39931 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.05735_Std0.20027.csv: -------------------------------------------------------------------------------- 1 | id,target 2 | 0,0.72139 3 | 1,0.51760 4 | 2,0.59032 5 | 3,0.63660 6 | 4,0.50603 7 | 5,0.57058 8 | 6,0.72299 9 | 8,0.62921 10 | 10,0.64393 11 | 11,0.62246 12 | 12,0.64539 13 | 14,0.62271 14 | 15,0.63971 15 | 18,0.74351 16 | 21,0.56603 17 | 23,0.65027 18 | 24,0.62978 19 | 25,0.56364 20 | 27,0.55366 21 | 29,0.64651 22 | 30,0.63995 23 | 31,0.51301 24 | 32,0.65243 25 | 33,0.62960 26 | 37,0.61379 27 | 38,0.62845 28 | 39,0.56194 29 | 40,0.55361 30 | 41,0.65380 31 | 42,0.56262 32 | 44,0.52620 33 | 45,0.56058 34 | 47,0.67995 35 | 49,0.58040 36 | 51,0.57256 37 | 52,0.57186 38 | 53,0.74692 39 | 54,0.63829 40 | 55,0.61376 41 | 56,0.57716 42 | 57,0.66004 43 | 59,0.60760 44 | 60,0.68578 45 | 62,0.68983 46 | 63,0.62641 47 | 67,0.59588 48 | 68,0.59095 49 | 69,0.56658 50 | 70,0.60620 51 | 71,0.53494 52 | 73,0.73047 53 | 75,0.56699 54 | 76,0.68507 55 | 81,0.59263 56 | 82,0.45351 57 | 83,0.65228 58 | 86,0.67729 59 | 87,0.63932 60 | 88,0.62208 61 | 91,0.50822 62 | 92,0.60571 63 | 94,0.61354 64 | 97,0.62548 65 | 100,0.69225 66 | 102,0.50505 67 | 103,0.61700 68 | 105,0.65031 69 | 106,0.66246 70 | 108,0.67469 71 | 113,0.66512 72 | 114,0.53249 73 | 115,0.55344 74 | 118,0.68072 75 | 122,0.53538 76 | 124,0.65328 77 | 126,0.64717 78 | 128,0.73029 79 | 129,0.63653 80 | 130,0.63030 81 | 131,0.50802 82 | 132,0.58770 83 | 133,0.62624 84 | 134,0.44326 85 | 135,0.63895 86 | 136,0.56856 87 | 139,0.53739 88 | 140,0.63811 89 | 141,0.70656 90 | 146,0.57495 91 | 148,0.62791 92 | 151,0.60073 93 | 152,0.73494 94 | 154,0.60894 95 | 157,0.60582 96 | 158,0.54721 97 | 159,0.70589 98 | 161,0.63762 99 | 164,0.53981 100 | 165,0.65285 101 | 167,0.52954 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.26137_Std0.00210.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/DeepFM_model/output/DeepFM_Mean0.26137_Std0.00210.csv -------------------------------------------------------------------------------- /ctr_of_recommendation/DeepFM_model/output/FM_Mean0.23297_Std0.05576.csv: -------------------------------------------------------------------------------- 1 | id,target 2 | 0,0.37706 3 | 1,0.40747 4 | 2,0.28335 5 | 3,0.29426 6 | 4,0.25722 7 | 5,0.28061 8 | 6,0.36010 9 | 8,0.26813 10 | 10,0.50419 11 | 11,0.29652 12 | 12,0.22183 13 | 14,0.28447 14 | 15,0.44019 15 | 18,0.43666 16 | 21,0.32927 17 | 23,0.28054 18 | 24,0.25594 19 | 25,0.27155 20 | 27,0.26363 21 | 29,0.34340 22 | 30,0.37857 23 | 31,0.30758 24 | 32,0.41682 25 | 33,0.26732 26 | 37,0.39802 27 | 38,0.32117 28 | 39,0.39406 29 | 40,0.24067 30 | 41,0.39323 31 | 42,0.40359 32 | 44,0.28283 33 | 45,0.36268 34 | 47,0.31174 35 | 49,0.35913 36 | 51,0.27528 37 | 52,0.28072 38 | 53,0.35339 39 | 54,0.45116 40 | 55,0.33479 41 | 56,0.47107 42 | 57,0.34473 43 | 59,0.34868 44 | 60,0.45001 45 | 62,0.35572 46 | 63,0.39236 47 | 67,0.36394 48 | 68,0.27234 49 | 69,0.51613 50 | 70,0.33188 51 | 71,0.24334 52 | 73,0.36806 53 | 75,0.41980 54 | 76,0.37788 55 | 81,0.31707 56 | 82,0.33174 57 | 83,0.35205 58 | 86,0.34927 59 | 87,0.45646 60 | 88,0.27697 61 | 91,0.34399 62 | 92,0.42113 63 | 94,0.35314 64 | 97,0.29256 65 | 100,0.44001 66 | 102,0.30431 67 | 103,0.25371 68 | 105,0.41161 69 | 106,0.39540 70 | 108,0.36266 71 | 113,0.36232 72 | 114,0.37745 73 | 115,0.28234 74 | 118,0.37840 75 | 122,0.22426 76 | 124,0.30503 77 | 126,0.35986 78 | 128,0.30551 79 | 129,0.32311 80 | 130,0.35530 81 | 131,0.33789 82 | 132,0.39140 83 | 133,0.30195 84 | 134,0.31456 85 | 135,0.41466 86 | 136,0.24149 87 | 139,0.23444 88 | 140,0.36823 89 | 141,0.36059 90 | 146,0.25876 91 | 148,0.48031 92 | 151,0.31372 93 | 152,0.50250 94 | 154,0.26625 95 | 157,0.28990 96 | 158,0.24773 97 | 159,0.48179 98 | 161,0.46381 99 | 164,0.34177 100 | 165,0.48971 101 | 167,0.30779 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/FFM_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | 6 | 7 | input_x_size = 20 8 | field_size = 2 9 | 10 | vector_dimension = 3 11 | 12 | total_plan_train_steps = 1000 13 | # 使用SGD,每一个样本进行依次梯度下降,更新参数 14 | batch_size = 1 15 | 16 | all_data_size = 1000 17 | 18 | lr = 0.01 19 | 20 | MODEL_SAVE_PATH = "TFModel" 21 | MODEL_NAME = "FFM" 22 | 23 | def createTwoDimensionWeight(input_x_size,field_size,vector_dimension): 24 | weights = tf.truncated_normal([input_x_size,field_size,vector_dimension]) 25 | 26 | tf_weights = tf.Variable(weights) 27 | 28 | return tf_weights 29 | 30 | def createOneDimensionWeight(input_x_size): 31 | weights = tf.truncated_normal([input_x_size]) 32 | tf_weights = tf.Variable(weights) 33 | return tf_weights 34 | 35 | def createZeroDimensionWeight(): 36 | weights = tf.truncated_normal([1]) 37 | tf_weights = tf.Variable(weights) 38 | return tf_weights 39 | 40 | def inference(input_x,input_x_field,zeroWeights,oneDimWeights,thirdWeight): 41 | """计算回归模型输出的值""" 42 | 43 | secondValue = tf.reduce_sum(tf.multiply(oneDimWeights,input_x,name='secondValue')) 44 | 45 | firstTwoValue = tf.add(zeroWeights, secondValue, name="firstTwoValue") 46 | 47 | thirdValue = tf.Variable(0.0,dtype=tf.float32) 48 | input_shape = input_x_size 49 | 50 | for i in range(input_shape): 51 | featureIndex1 = i 52 | fieldIndex1 = int(input_x_field[i]) 53 | for j in range(i+1,input_shape): 54 | featureIndex2 = j 55 | fieldIndex2 = int(input_x_field[j]) 56 | vectorLeft = tf.convert_to_tensor([[featureIndex1,fieldIndex2,i] for i in range(vector_dimension)]) 57 | weightLeft = tf.gather_nd(thirdWeight,vectorLeft) 58 | weightLeftAfterCut = tf.squeeze(weightLeft) 59 | 60 | vectorRight = tf.convert_to_tensor([[featureIndex2,fieldIndex1,i] for i in range(vector_dimension)]) 61 | weightRight = tf.gather_nd(thirdWeight,vectorRight) 62 | weightRightAfterCut = tf.squeeze(weightRight) 63 | 64 | tempValue = tf.reduce_sum(tf.multiply(weightLeftAfterCut,weightRightAfterCut)) 65 | 66 | indices2 = [i] 67 | indices3 = [j] 68 | 69 | xi = tf.squeeze(tf.gather_nd(input_x, indices2)) 70 | xj = tf.squeeze(tf.gather_nd(input_x, indices3)) 71 | 72 | product = tf.reduce_sum(tf.multiply(xi, xj)) 73 | 74 | secondItemVal = tf.multiply(tempValue, product) 75 | 76 | tf.assign(thirdValue, tf.add(thirdValue, secondItemVal)) 77 | 78 | return tf.add(firstTwoValue,thirdValue) 79 | 80 | def gen_data(): 81 | labels = [-1,1] 82 | y = [np.random.choice(labels,1)[0] for _ in range(all_data_size)] 83 | x_field = [i // 10 for i in range(input_x_size)] 84 | x = np.random.randint(0,2,size=(all_data_size,input_x_size)) 85 | return x,y,x_field 86 | 87 | 88 | if __name__ == '__main__': 89 | global_step = tf.Variable(0,trainable=False) 90 | trainx,trainy,trainx_field = gen_data() 91 | # 92 | input_x = tf.placeholder(tf.float32,[input_x_size ]) 93 | input_y = tf.placeholder(tf.float32) 94 | # 95 | 96 | lambda_w = tf.constant(0.001, name='lambda_w') 97 | lambda_v = tf.constant(0.001, name='lambda_v') 98 | 99 | zeroWeights = createZeroDimensionWeight() 100 | 101 | oneDimWeights = createOneDimensionWeight(input_x_size) 102 | 103 | thirdWeight = createTwoDimensionWeight(input_x_size, # 创建二次项的权重变量 104 | field_size, 105 | vector_dimension) # n * f * k 106 | 107 | y_ = inference(input_x, trainx_field,zeroWeights,oneDimWeights,thirdWeight) 108 | 109 | l2_norm = tf.reduce_sum( 110 | tf.add( 111 | tf.multiply(lambda_w, tf.pow(oneDimWeights, 2)), 112 | tf.reduce_sum(tf.multiply(lambda_v, tf.pow(thirdWeight, 2)),axis=[1,2]) 113 | ) 114 | ) 115 | 116 | loss = tf.log(1 + tf.exp(input_y * y_)) + l2_norm 117 | 118 | train_step = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(loss) 119 | 120 | saver = tf.train.Saver() 121 | with tf.Session() as sess: 122 | sess.run(tf.global_variables_initializer()) 123 | for i in range(total_plan_train_steps): 124 | for t in range(all_data_size): 125 | input_x_batch = trainx[t] 126 | input_y_batch = trainy[t] 127 | predict_loss,_, steps = sess.run([loss,train_step, global_step], 128 | feed_dict={input_x: input_x_batch, input_y: input_y_batch}) 129 | 130 | print("After {step} training step(s) , loss on training batch is {predict_loss} " 131 | .format(step=steps, predict_loss=predict_loss)) 132 | 133 | saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=steps) 134 | writer = tf.summary.FileWriter(os.path.join(MODEL_SAVE_PATH, MODEL_NAME), tf.get_default_graph()) 135 | writer.close() 136 | # 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.data-00000-of-00001 -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.index -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM-0.meta -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523526908.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527022.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527136.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527252.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523527416.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530263.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530409.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530500.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530509.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530517.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530526.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530538.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530548.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530556.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530568.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530579.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530589.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530598.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530606.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530618.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530632.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530643.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530653.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530660.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530668.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530675.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530686.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530695.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530703.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530710.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530718.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530726.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530736.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530744.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530751.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530759.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530766.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530774.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530781.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530789.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530798.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530808.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530820.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530827.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530835.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530844.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530852.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530860.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530868.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530875.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530883.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530891.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530898.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530906.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530913.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530921.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530930.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530938.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530945.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530953.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530961.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530968.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530976.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523530984.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537511.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537521.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537530.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537538.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537547.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537556.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537565.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537574.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537583.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537591.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537600.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537608.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537616.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537624.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537632.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537641.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537652.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537662.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537672.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537682.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537691.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537700.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537709.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537719.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537728.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537736.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537745.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537754.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537763.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537772.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537781.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537790.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537799.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537807.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537815.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537825.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537834.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537843.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537852.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537861.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537871.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537880.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537888.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537897.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537906.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537915.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FFM_Demo/TFModel/FFM/events.out.tfevents.1523537925.meituan-sxwdeMacBook-Pro-4.local -------------------------------------------------------------------------------- /ctr_of_recommendation/FFM_Demo/TFModel/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "FFM-0" 2 | all_model_checkpoint_paths: "FFM-0" 3 | -------------------------------------------------------------------------------- /ctr_of_recommendation/FM_demo/FM_model.py: -------------------------------------------------------------------------------- 1 | from itertools import count 2 | from collections import defaultdict 3 | from scipy.sparse import csr 4 | import numpy as np 5 | import pandas as pd 6 | import numpy as np 7 | from sklearn.feature_extraction import DictVectorizer 8 | import tensorflow as tf 9 | from tqdm import tqdm_notebook as tqdm 10 | 11 | 12 | def vectorize_dic(dic,ix=None,p=None,n=0,g=0): 13 | """ 14 | dic -- dictionary of feature lists. Keys are the name of features 15 | ix -- index generator (default None) 16 | p -- dimension of feature space (number of columns in the sparse matrix) (default None) 17 | """ 18 | if ix==None: 19 | ix = dict() 20 | 21 | nz = n * g 22 | 23 | col_ix = np.empty(nz,dtype = int) 24 | 25 | i = 0 26 | for k,lis in dic.items(): 27 | for t in range(len(lis)): 28 | ix[str(lis[t]) + str(k)] = ix.get(str(lis[t]) + str(k),0) + 1 29 | col_ix[i+t*g] = ix[str(lis[t]) + str(k)] 30 | i += 1 31 | 32 | row_ix = np.repeat(np.arange(0,n),g) 33 | data = np.ones(nz) 34 | if p == None: 35 | p = len(ix) 36 | 37 | ixx = np.where(col_ix < p) 38 | return csr.csr_matrix((data[ixx],(row_ix[ixx],col_ix[ixx])),shape=(n,p)),ix 39 | 40 | 41 | def batcher(X_, y_=None, batch_size=-1): 42 | n_samples = X_.shape[0] 43 | 44 | if batch_size == -1: 45 | batch_size = n_samples 46 | if batch_size < 1: 47 | raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size)) 48 | 49 | for i in range(0, n_samples, batch_size): 50 | upper_bound = min(i + batch_size, n_samples) 51 | ret_x = X_[i:upper_bound] 52 | ret_y = None 53 | if y_ is not None: 54 | ret_y = y_[i:i + batch_size] 55 | yield (ret_x, ret_y) 56 | 57 | 58 | cols = ['user','item','rating','timestamp'] 59 | 60 | train = pd.read_csv('data/ua.base',delimiter='\t',names = cols) 61 | test = pd.read_csv('data/ua.test',delimiter='\t',names = cols) 62 | 63 | x_train,ix = vectorize_dic({'users':train['user'].values, 64 | 'items':train['item'].values},n=len(train.index),g=2) 65 | 66 | 67 | x_test,ix = vectorize_dic({'users':test['user'].values, 68 | 'items':test['item'].values},ix,x_train.shape[1],n=len(test.index),g=2) 69 | 70 | 71 | print(x_train) 72 | y_train = train['rating'].values 73 | y_test = test['rating'].values 74 | 75 | x_train = x_train.todense() 76 | x_test = x_test.todense() 77 | 78 | print(x_train) 79 | 80 | print(x_train.shape) 81 | print (x_test.shape) 82 | 83 | 84 | n,p = x_train.shape 85 | 86 | k = 10 87 | 88 | x = tf.placeholder('float',[None,p]) 89 | 90 | y = tf.placeholder('float',[None,1]) 91 | 92 | w0 = tf.Variable(tf.zeros([1])) 93 | w = tf.Variable(tf.zeros([p])) 94 | 95 | v = tf.Variable(tf.random_normal([k,p],mean=0,stddev=0.01)) 96 | 97 | #y_hat = tf.Variable(tf.zeros([n,1])) 98 | 99 | linear_terms = tf.add(w0,tf.reduce_sum(tf.multiply(w,x),1,keep_dims=True)) # n * 1 100 | pair_interactions = 0.5 * tf.reduce_sum( 101 | tf.subtract( 102 | tf.pow( 103 | tf.matmul(x,tf.transpose(v)),2), 104 | tf.matmul(tf.pow(x,2),tf.transpose(tf.pow(v,2))) 105 | ),axis = 1 , keep_dims=True) 106 | 107 | 108 | y_hat = tf.add(linear_terms,pair_interactions) 109 | 110 | lambda_w = tf.constant(0.001,name='lambda_w') 111 | lambda_v = tf.constant(0.001,name='lambda_v') 112 | 113 | l2_norm = tf.reduce_sum( 114 | tf.add( 115 | tf.multiply(lambda_w,tf.pow(w,2)), 116 | tf.multiply(lambda_v,tf.pow(v,2)) 117 | ) 118 | ) 119 | 120 | error = tf.reduce_mean(tf.square(y-y_hat)) 121 | loss = tf.add(error,l2_norm) 122 | 123 | 124 | train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss) 125 | 126 | 127 | epochs = 10 128 | batch_size = 1000 129 | 130 | # Launch the graph 131 | init = tf.global_variables_initializer() 132 | with tf.Session() as sess: 133 | sess.run(init) 134 | 135 | for epoch in tqdm(range(epochs), unit='epoch'): 136 | perm = np.random.permutation(x_train.shape[0]) 137 | # iterate over batches 138 | for bX, bY in batcher(x_train[perm], y_train[perm], batch_size): 139 | _,t = sess.run([train_op,loss], feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)}) 140 | print(t) 141 | 142 | 143 | errors = [] 144 | for bX, bY in batcher(x_test, y_test): 145 | errors.append(sess.run(error, feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)})) 146 | print(errors) 147 | RMSE = np.sqrt(np.array(errors).mean()) 148 | print (RMSE) 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /ctr_of_recommendation/FNN_demo/FNN.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from const import * 4 | from utils import tf_estimator_model, add_layer_summary, build_estimator_helper 5 | from model.FNN.preprocess import build_features 6 | 7 | @tf_estimator_model 8 | def model_fn(features, labels, mode, params): 9 | feature_columns= build_features() 10 | 11 | input = tf.feature_column.input_layer(features, feature_columns) 12 | 13 | with tf.variable_scope('init_fm_embedding'): 14 | # method1: load from census_checkpoint directly 15 | embeddings = tf.Variable( tf.contrib.framework.load_variable( 16 | './census_checkpoint/FM', 17 | 'fm_interaction/v' 18 | ) ) 19 | weight = tf.Variable( tf.contrib.framework.load_variable( 20 | './census_checkpoint/FM', 21 | 'linear/w' 22 | ) ) 23 | dense = tf.add(tf.matmul(input, embeddings), tf.matmul(input, weight)) 24 | add_layer_summary('input', dense) 25 | 26 | with tf.variable_scope( 'Dense' ): 27 | for i, unit in enumerate( params['hidden_units'] ): 28 | dense = tf.layers.dense( dense, units=unit, activation='relu', name='dense{}'.format( i ) ) 29 | dense = tf.layers.batch_normalization( dense, center=True, scale=True, trainable=True, 30 | training=(mode == tf.estimator.ModeKeys.TRAIN) ) 31 | dense = tf.layers.dropout( dense, rate=params['dropout_rate'], 32 | training=(mode == tf.estimator.ModeKeys.TRAIN) ) 33 | add_layer_summary( dense.name, dense ) 34 | 35 | with tf.variable_scope('output'): 36 | y = tf.layers.dense(dense, units= 1, name = 'output') 37 | tf.summary.histogram(y.name, y) 38 | 39 | return y 40 | 41 | build_estimator = build_estimator_helper( 42 | model_fn = { 43 | 'census':model_fn 44 | }, 45 | params = { 46 | 'census': { 47 | 'dropout_rate':0.2, 48 | 'learning_rate': 0.01, 49 | 'hidden_units':[24,12,1] 50 | } 51 | } 52 | ) 53 | 54 | 55 | # check name of all the tensor in the census_checkpoint 56 | 57 | if __name__ == '__main__': 58 | print ('checking name of all the tensor in the FNN pretrain census_checkpoint') 59 | from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file 60 | latest_ckp = tf.train.latest_checkpoint('./census_checkpoint/FM') 61 | print_tensors_in_checkpoint_file( latest_ckp, all_tensors=True ) 62 | print_tensors_in_checkpoint_file(latest_ckp, all_tensors=False, tensor_name='fm_interaction/v' ) -------------------------------------------------------------------------------- /ctr_of_recommendation/FNN_demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/FNN_demo/__init__.py -------------------------------------------------------------------------------- /ctr_of_recommendation/FNN_demo/preprocess.py: -------------------------------------------------------------------------------- 1 | from const import * 2 | import tensorflow as tf 3 | 4 | def build_features(): 5 | f_sparse = [] 6 | 7 | for col, config in EMB_CONFIGS.items(): 8 | ind = tf.feature_column.categorical_column_with_hash_bucket(col, hash_bucket_size = config['hash_size']) 9 | one_hot = tf.feature_column.indicator_column(ind) 10 | f_sparse.append(one_hot) 11 | 12 | for col, config in BUCKET_CONFIGS.items(): 13 | num = tf.feature_column.numeric_column( col ) 14 | bucket = tf.feature_column.bucketized_column( num, boundaries = config['bin'] ) 15 | f_sparse.append( bucket ) 16 | 17 | return f_sparse 18 | 19 | -------------------------------------------------------------------------------- /ctr_of_recommendation/GBDT+LR-Demo/.idea/GBDT+LR-Demo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /ctr_of_recommendation/GBDT+LR-Demo/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/GBDT+LR-Demo/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/GBDT+LR-Demo/GBDT_LR.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from sklearn.metrics import mean_squared_error 7 | from sklearn.linear_model import LogisticRegression 8 | 9 | print('Load data...') 10 | df_train = pd.read_csv('data/train.csv') 11 | df_test = pd.read_csv('data/test.csv') 12 | 13 | NUMERIC_COLS = [ 14 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 15 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 16 | ] 17 | 18 | print(df_test.head(10)) 19 | 20 | y_train = df_train['target'] # training label 21 | y_test = df_test['target'] # testing label 22 | X_train = df_train[NUMERIC_COLS] # training dataset 23 | X_test = df_test[NUMERIC_COLS] # testing dataset 24 | 25 | # create dataset for lightgbm 26 | lgb_train = lgb.Dataset(X_train, y_train) 27 | lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) 28 | 29 | params = { 30 | 'task': 'train', 31 | 'boosting_type': 'gbdt', 32 | 'objective': 'binary', 33 | 'metric': {'binary_logloss'}, 34 | 'num_leaves': 64, 35 | 'num_trees': 100, 36 | 'learning_rate': 0.01, 37 | 'feature_fraction': 0.9, 38 | 'bagging_fraction': 0.8, 39 | 'bagging_freq': 5, 40 | 'verbose': 0 41 | } 42 | 43 | # number of leaves,will be used in feature transformation 44 | num_leaf = 64 45 | 46 | print('Start training...') 47 | # train 48 | gbm = lgb.train(params, 49 | lgb_train, 50 | num_boost_round=100, 51 | valid_sets=lgb_train) 52 | 53 | print('Save model...') 54 | # save model to file 55 | gbm.save_model('model.txt') 56 | 57 | print('Start predicting...') 58 | # predict and get data on leaves, training data 59 | y_pred = gbm.predict(X_train, pred_leaf=True) 60 | 61 | print(np.array(y_pred).shape) 62 | print(y_pred[:10]) 63 | 64 | print('Writing transformed training data') 65 | transformed_training_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf], 66 | dtype=np.int64) # N * num_tress * num_leafs 67 | for i in range(0, len(y_pred)): 68 | temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i]) 69 | transformed_training_matrix[i][temp] += 1 70 | 71 | 72 | y_pred = gbm.predict(X_test, pred_leaf=True) 73 | print('Writing transformed testing data') 74 | transformed_testing_matrix = np.zeros([len(y_pred), len(y_pred[0]) * num_leaf], dtype=np.int64) 75 | for i in range(0, len(y_pred)): 76 | temp = np.arange(len(y_pred[0])) * num_leaf + np.array(y_pred[i]) 77 | transformed_testing_matrix[i][temp] += 1 78 | 79 | 80 | lm = LogisticRegression(penalty='l2',C=0.05) # logestic model construction 81 | lm.fit(transformed_training_matrix,y_train) # fitting the data 82 | y_pred_test = lm.predict_proba(transformed_testing_matrix) # Give the probabilty on each label 83 | 84 | print(y_pred_test) 85 | 86 | NE = (-1) / len(y_pred_test) * sum(((1+y_test)/2 * np.log(y_pred_test[:,1]) + (1-y_test)/2 * np.log(1 - y_pred_test[:,1]))) 87 | print("Normalized Cross Entropy " + str(NE)) 88 | # 此NE中只有分子没有分母所有较大 89 | # NE为2.22左右 -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/Basic-MLR-Demo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/MLR(LS-PLM)_Demo/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/data.py: -------------------------------------------------------------------------------- 1 | # 数据预处理 2 | ''' 3 | 数据中存在连续特征和离散特征,所以先要对数据进行一个简单的处理,处理包括将离散特征转换为one-hot以及对连续特征进行标准化。 4 | 有一个需要注意的地方,训练集和测试集中离散特征出现的个数可能不一样,因此需要先将数据合并,然后转换成one-hot,最后再分开, 5 | ''' 6 | 7 | from random import random 8 | import pandas as pd 9 | from sklearn.preprocessing import StandardScaler 10 | 11 | def get_data(): 12 | train_data = pd.read_table("data/adult.data.txt",header=None,delimiter=',') 13 | test_data = pd.read_table("data/adult.test.txt",header=None,delimiter=',') 14 | 15 | all_columns = ['age','workclass','fnlwgt','education','education-num', 16 | 'marital-status','occupation','relationship','race','sex', 17 | 'capital-gain','capital-loss','hours-per-week','native-country','label','type'] 18 | 19 | continus_columns = ['age','fnlwgt','education-num','capital-gain','capital-loss','hours-per-week'] 20 | dummy_columns = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country'] 21 | 22 | train_data['type'] = 1 23 | test_data['type'] = 2 24 | 25 | all_data = pd.concat([train_data,test_data],axis=0) 26 | all_data.columns = all_columns 27 | 28 | all_data = pd.get_dummies(all_data,columns=dummy_columns) 29 | 30 | 31 | test_data = all_data[all_data['type']==2].drop(['type'],axis=1) 32 | train_data = all_data[all_data['type']==1].drop(['type'],axis=1) 33 | 34 | train_data['label'] = train_data['label'].map(lambda x: 1 if x.strip() == '>50K' else 0) 35 | test_data['label'] = test_data['label'].map(lambda x: 1 if x.strip() == '>50K.' else 0) 36 | 37 | 38 | for col in continus_columns: 39 | ss = StandardScaler() 40 | train_data[col] = ss.fit_transform(train_data[[col]]) 41 | test_data[col] = ss.transform(test_data[[col]]) 42 | 43 | 44 | train_y = train_data['label'] 45 | train_x = train_data.drop(['label'],axis=1) 46 | test_y = test_data['label'] 47 | test_x = test_data.drop(['label'],axis=1) 48 | 49 | return train_x,train_y,test_x,test_y 50 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/data/adult: -------------------------------------------------------------------------------- 1 | Index of adult 2 | 3 | 02 Dec 1996 140 Index 4 | 10 Aug 1996 3974305 adult.data 5 | 10 Aug 1996 4267 adult.names 6 | 10 Aug 1996 2003153 adult.test 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/data/adult.names: -------------------------------------------------------------------------------- 1 | | This data was extracted from the census bureau database found at 2 | | http://www.census.gov/ftp/pub/DES/www/welcome.html 3 | | Donor: Ronny Kohavi and Barry Becker, 4 | | Data Mining and Visualization 5 | | Silicon Graphics. 6 | | e-mail: ronnyk@sgi.com for questions. 7 | | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random). 8 | | 48842 instances, mix of continuous and discrete (train=32561, test=16281) 9 | | 45222 if instances with unknown values are removed (train=30162, test=15060) 10 | | Duplicate or conflicting instances : 6 11 | | Class probabilities for adult.all file 12 | | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns) 13 | | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns) 14 | | 15 | | Extraction was done by Barry Becker from the 1994 Census database. A set of 16 | | reasonably clean records was extracted using the following conditions: 17 | | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) 18 | | 19 | | Prediction task is to determine whether a person makes over 50K 20 | | a year. 21 | | 22 | | First cited in: 23 | | @inproceedings{kohavi-nbtree, 24 | | author={Ron Kohavi}, 25 | | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a 26 | | Decision-Tree Hybrid}, 27 | | booktitle={Proceedings of the Second International Conference on 28 | | Knowledge Discovery and Data Mining}, 29 | | year = 1996, 30 | | pages={to appear}} 31 | | 32 | | Error Accuracy reported as follows, after removal of unknowns from 33 | | train/test sets): 34 | | C4.5 : 84.46+-0.30 35 | | Naive-Bayes: 83.88+-0.30 36 | | NBTree : 85.90+-0.28 37 | | 38 | | 39 | | Following algorithms were later run with the following error rates, 40 | | all after removal of unknowns and using the original train/test split. 41 | | All these numbers are straight runs using MLC++ with default values. 42 | | 43 | | Algorithm Error 44 | | -- ---------------- ----- 45 | | 1 C4.5 15.54 46 | | 2 C4.5-auto 14.46 47 | | 3 C4.5 rules 14.94 48 | | 4 Voted ID3 (0.6) 15.64 49 | | 5 Voted ID3 (0.8) 16.47 50 | | 6 T2 16.84 51 | | 7 1R 19.54 52 | | 8 NBTree 14.10 53 | | 9 CN2 16.00 54 | | 10 HOODG 14.82 55 | | 11 FSS Naive Bayes 14.05 56 | | 12 IDTM (Decision table) 14.46 57 | | 13 Naive-Bayes 16.12 58 | | 14 Nearest-neighbor (1) 21.42 59 | | 15 Nearest-neighbor (3) 20.35 60 | | 16 OC1 15.04 61 | | 17 Pebls Crashed. Unknown why (bounds WERE increased) 62 | | 63 | | Conversion of original data as follows: 64 | | 1. Discretized agrossincome into two ranges with threshold 50,000. 65 | | 2. Convert U.S. to US to avoid periods. 66 | | 3. Convert Unknown to "?" 67 | | 4. Run MLC++ GenCVFiles to generate data,test. 68 | | 69 | | Description of fnlwgt (final weight) 70 | | 71 | | The weights on the CPS files are controlled to independent estimates of the 72 | | civilian noninstitutional population of the US. These are prepared monthly 73 | | for us by Population Division here at the Census Bureau. We use 3 sets of 74 | | controls. 75 | | These are: 76 | | 1. A single cell estimate of the population 16+ for each state. 77 | | 2. Controls for Hispanic Origin by age and sex. 78 | | 3. Controls by Race, age and sex. 79 | | 80 | | We use all three sets of controls in our weighting program and "rake" through 81 | | them 6 times so that by the end we come back to all the controls we used. 82 | | 83 | | The term estimate refers to population totals derived from CPS by creating 84 | | "weighted tallies" of any specified socio-economic characteristics of the 85 | | population. 86 | | 87 | | People with similar demographic characteristics should have 88 | | similar weights. There is one important caveat to remember 89 | | about this statement. That is that since the CPS sample is 90 | | actually a collection of 51 state samples, each with its own 91 | | probability of selection, the statement only applies within 92 | | state. 93 | 94 | 95 | >50K, <=50K. 96 | 97 | age: continuous. 98 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked. 99 | fnlwgt: continuous. 100 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool. 101 | education-num: continuous. 102 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse. 103 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces. 104 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried. 105 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black. 106 | sex: Female, Male. 107 | capital-gain: continuous. 108 | capital-loss: continuous. 109 | hours-per-week: continuous. 110 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. 111 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/data/old.adult.names: -------------------------------------------------------------------------------- 1 | 1. Title of Database: adult 2 | 2. Sources: 3 | (a) Original owners of database (name/phone/snail address/email address) 4 | US Census Bureau. 5 | (b) Donor of database (name/phone/snail address/email address) 6 | Ronny Kohavi and Barry Becker, 7 | Data Mining and Visualization 8 | Silicon Graphics. 9 | e-mail: ronnyk@sgi.com 10 | (c) Date received (databases may change over time without name change!) 11 | 05/19/96 12 | 3. Past Usage: 13 | (a) Complete reference of article where it was described/used 14 | @inproceedings{kohavi-nbtree, 15 | author={Ron Kohavi}, 16 | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a 17 | Decision-Tree Hybrid}, 18 | booktitle={Proceedings of the Second International Conference on 19 | Knowledge Discovery and Data Mining}, 20 | year = 1996, 21 | pages={to appear}} 22 | (b) Indication of what attribute(s) were being predicted 23 | Salary greater or less than 50,000. 24 | (b) Indication of study's results (i.e. Is it a good domain to use?) 25 | Hard domain with a nice number of records. 26 | The following results obtained using MLC++ with default settings 27 | for the algorithms mentioned below. 28 | 29 | Algorithm Error 30 | -- ---------------- ----- 31 | 1 C4.5 15.54 32 | 2 C4.5-auto 14.46 33 | 3 C4.5 rules 14.94 34 | 4 Voted ID3 (0.6) 15.64 35 | 5 Voted ID3 (0.8) 16.47 36 | 6 T2 16.84 37 | 7 1R 19.54 38 | 8 NBTree 14.10 39 | 9 CN2 16.00 40 | 10 HOODG 14.82 41 | 11 FSS Naive Bayes 14.05 42 | 12 IDTM (Decision table) 14.46 43 | 13 Naive-Bayes 16.12 44 | 14 Nearest-neighbor (1) 21.42 45 | 15 Nearest-neighbor (3) 20.35 46 | 16 OC1 15.04 47 | 17 Pebls Crashed. Unknown why (bounds WERE increased) 48 | 49 | 4. Relevant Information Paragraph: 50 | Extraction was done by Barry Becker from the 1994 Census database. A set 51 | of reasonably clean records was extracted using the following conditions: 52 | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) 53 | 54 | 5. Number of Instances 55 | 48842 instances, mix of continuous and discrete (train=32561, test=16281) 56 | 45222 if instances with unknown values are removed (train=30162, test=15060) 57 | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random). 58 | 59 | 6. Number of Attributes 60 | 6 continuous, 8 nominal attributes. 61 | 62 | 7. Attribute Information: 63 | 64 | age: continuous. 65 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked. 66 | fnlwgt: continuous. 67 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool. 68 | education-num: continuous. 69 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse. 70 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces. 71 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried. 72 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black. 73 | sex: Female, Male. 74 | capital-gain: continuous. 75 | capital-loss: continuous. 76 | hours-per-week: continuous. 77 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. 78 | class: >50K, <=50K 79 | 80 | 8. Missing Attribute Values: 81 | 82 | 7% have missing values. 83 | 84 | 9. Class Distribution: 85 | 86 | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns) 87 | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns) 88 | 89 | 90 | -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/lr.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | from sklearn.metrics import roc_auc_score 4 | from data import get_data 5 | import pandas as pd 6 | 7 | 8 | 9 | x=tf.placeholder(tf.float32,shape=[None,108]) 10 | y=tf.placeholder(tf.float32,shape=[None]) 11 | 12 | m=1 13 | learning_rate=0.3 14 | w=tf.Variable(tf.random_normal([108,m], 0.0, 0.5),name='u') 15 | 16 | 17 | W=tf.matmul(x,w) 18 | p2=tf.reduce_sum(tf.nn.sigmoid(W),1) 19 | 20 | pred=p2 21 | 22 | cost1=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y)) 23 | 24 | cost=tf.add_n([cost1]) 25 | train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost) 26 | 27 | 28 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()) 29 | sess = tf.Session() 30 | sess.run(init_op) 31 | train_x,train_y,test_x,test_y = get_data() 32 | 33 | result = [] 34 | time_s=time.time() 35 | for epoch in range(0,10000): 36 | f_dict = {x: train_x, y: train_y} 37 | _, cost_, predict_= sess.run([train_op, cost, pred],feed_dict=f_dict) 38 | auc=roc_auc_score(train_y, predict_) 39 | time_t=time.time() 40 | if epoch % 100 == 0: 41 | f_dict = {x: test_x, y: test_y} 42 | _, cost_, predict_test = sess.run([train_op, cost, pred], feed_dict=f_dict) 43 | test_auc = roc_auc_score(test_y, predict_test) 44 | print("%d %ld cost:%f,train_auc:%f,test_auc:%f" % (epoch, (time_t-time_s),cost_,auc,test_auc)) 45 | result.append([epoch, (time_t - time_s), auc, test_auc]) 46 | 47 | pd.DataFrame(result, columns=['epoch', 'time', 'train_auc', 'test_auc']).to_csv("data/lr.csv") -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/mlr.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | from sklearn.metrics import roc_auc_score 4 | from data import get_data 5 | import pandas as pd 6 | 7 | 8 | x = tf.placeholder(tf.float32,shape=[None,108]) 9 | y = tf.placeholder(tf.float32,shape=[None]) 10 | 11 | 12 | m = 2 13 | learning_rate = 0.3 14 | u = tf.Variable(tf.random_normal([108,m],0.0,0.5),name='u') 15 | w = tf.Variable(tf.random_normal([108,m],0.0,0.5),name='w') 16 | 17 | U = tf.matmul(x,u) 18 | p1 = tf.nn.softmax(U) 19 | 20 | W = tf.matmul(x,w) 21 | p2 = tf.nn.sigmoid(W) 22 | 23 | pred = tf.reduce_sum(tf.multiply(p1,p2),1) 24 | 25 | cost1=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y)) 26 | cost=tf.add_n([cost1]) 27 | train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost) 28 | train_x,train_y,test_x,test_y = get_data() 29 | time_s=time.time() 30 | result = [] 31 | with tf.Session() as sess: 32 | sess.run(tf.global_variables_initializer()) 33 | for epoch in range(0, 10000): 34 | f_dict = {x: train_x, y: train_y} 35 | 36 | _, cost_, predict_ = sess.run([train_op, cost, pred], feed_dict=f_dict) 37 | 38 | auc = roc_auc_score(train_y, predict_) 39 | time_t = time.time() 40 | if epoch % 100 == 0: 41 | f_dict = {x: test_x, y: test_y} 42 | _, cost_, predict_test = sess.run([train_op, cost, pred], feed_dict=f_dict) 43 | test_auc = roc_auc_score(test_y, predict_test) 44 | print("%d %ld cost:%f,train_auc:%f,test_auc:%f" % (epoch, (time_t - time_s), cost_, auc, test_auc)) 45 | result.append([epoch,(time_t - time_s),auc,test_auc]) 46 | 47 | pd.DataFrame(result,columns=['epoch','time','train_auc','test_auc']).to_csv("data/mlr_"+str(m)+'.csv') -------------------------------------------------------------------------------- /ctr_of_recommendation/MLR(LS-PLM)_Demo/plotResult.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | mlr5 = pd.read_csv("data/mlr_5.csv",index_col=0) 5 | 6 | mlr10 = pd.read_csv("data/mlr_10.csv",index_col=0) 7 | 8 | mlr15 = pd.read_csv("data/mlr_15.csv",index_col=0) 9 | 10 | mlr20 = pd.read_csv("data/mlr_20.csv",index_col=0) 11 | 12 | lr = pd.read_csv("data/lr.csv",index_col=0) 13 | 14 | 15 | 16 | 17 | epoch = mlr5['epoch'] 18 | train_auc5 = mlr5['test_auc'] 19 | train_auc10 = mlr10['test_auc'] 20 | train_auc15 = mlr15['test_auc'] 21 | train_auc20 = mlr20['test_auc'] 22 | train_auclr = lr['train_auc'] 23 | 24 | l1,= plt.plot(epoch,train_auc5,label='mlr-5') 25 | l2,= plt.plot(epoch,train_auc10,label='mlr-10') 26 | l3, = plt.plot(epoch,train_auc15,label='mlr-15') 27 | l4, = plt.plot(epoch,train_auc20,label='mlr-20') 28 | l5, = plt.plot(epoch,train_auclr,label='lr') 29 | plt.xlabel('epoch') 30 | plt.ylabel('auc') 31 | plt.title('mlr,lr test_auc') 32 | plt.grid() 33 | plt.legend(handles = [l1, l2,l3,l4,l5], labels = ['mlr-5', 'mlr-10','mlr-15','mlr-20','lr'], loc = 'best') 34 | plt.savefig('data/test_zhexian.png') 35 | -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/DataReader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class FeatureDictionary(object): 4 | def __init__(self,trainfile=None,testfile=None, 5 | dfTrain=None,dfTest=None,numeric_cols=[], 6 | ignore_cols=[]): 7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set" 8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set" 9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set" 10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set" 11 | 12 | self.trainfile = trainfile 13 | self.testfile = testfile 14 | self.dfTrain = dfTrain 15 | self.dfTest = dfTest 16 | self.numeric_cols = numeric_cols 17 | self.ignore_cols = ignore_cols 18 | self.gen_feat_dict() 19 | 20 | 21 | 22 | 23 | def gen_feat_dict(self): 24 | if self.dfTrain is None: 25 | dfTrain = pd.read_csv(self.trainfile) 26 | 27 | else: 28 | dfTrain = self.dfTrain 29 | 30 | if self.dfTest is None: 31 | dfTest = pd.read_csv(self.testfile) 32 | 33 | else: 34 | dfTest = self.dfTest 35 | 36 | df = pd.concat([dfTrain,dfTest]) 37 | 38 | self.feat_dict = {} 39 | tc = 0 40 | for col in df.columns: 41 | if col in self.ignore_cols: 42 | continue 43 | if col in self.numeric_cols: 44 | self.feat_dict[col] = tc 45 | tc += 1 46 | 47 | else: 48 | us = df[col].unique() 49 | print(us) 50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc))) 51 | tc += len(us) 52 | 53 | self.feat_dim = tc 54 | 55 | 56 | class DataParser(object): 57 | def __init__(self,feat_dict): 58 | self.feat_dict = feat_dict 59 | 60 | def parse(self,infile=None,df=None,has_label=False): 61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set" 62 | assert not ((infile is not None) and (df is not None)), "only one can be set" 63 | 64 | 65 | if infile is None: 66 | dfi = df.copy() 67 | else: 68 | dfi = pd.read_csv(infile) 69 | 70 | if has_label: 71 | y = dfi['target'].values.tolist() 72 | dfi.drop(['id','target'],axis=1,inplace=True) 73 | else: 74 | ids = dfi['id'].values.tolist() 75 | dfi.drop(['id'],axis=1,inplace=True) 76 | # dfi for feature index 77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) 78 | dfv = dfi.copy() 79 | for col in dfi.columns: 80 | if col in self.feat_dict.ignore_cols: 81 | dfi.drop(col,axis=1,inplace=True) 82 | dfv.drop(col,axis=1,inplace=True) 83 | continue 84 | if col in self.feat_dict.numeric_cols: 85 | dfi[col] = self.feat_dict.feat_dict[col] 86 | else: 87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) 88 | dfv[col] = 1. 89 | 90 | xi = dfi.values.tolist() 91 | xv = dfv.values.tolist() 92 | 93 | if has_label: 94 | return xi,xv,y 95 | else: 96 | return xi,xv,ids 97 | 98 | 99 | -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/__pycache__/DataReader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/DataReader.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/__pycache__/NFM.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/NFM.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/NFM_Demo/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_FILE = "data/train.csv" 2 | TEST_FILE = "data/test.csv" 3 | 4 | SUB_DIR = "output" 5 | 6 | 7 | NUM_SPLITS = 3 8 | RANDOM_SEED = 2017 9 | 10 | # types of columns of the dataset dataframe 11 | CATEGORICAL_COLS = [ 12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat', 13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 16 | # 'ps_car_10_cat', 'ps_car_11_cat', 17 | ] 18 | 19 | NUMERIC_COLS = [ 20 | # # binary 21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin", 22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin", 23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin", 24 | # "ps_ind_17_bin", "ps_ind_18_bin", 25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin", 27 | # numeric 28 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 30 | 31 | # feature engineering 32 | "missing_feat", "ps_car_13_x_ps_reg_03", 33 | ] 34 | 35 | IGNORE_COLS = [ 36 | "id", "target", 37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04", 38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", 39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12", 40 | "ps_calc_13", "ps_calc_14", 41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin" 43 | ] 44 | -------------------------------------------------------------------------------- /ctr_of_recommendation/NFM_Demo/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | from sklearn.metrics import make_scorer 6 | from sklearn.model_selection import StratifiedKFold 7 | from DataReader import FeatureDictionary, DataParser 8 | from matplotlib import pyplot as plt 9 | import config 10 | from NFM import NFM 11 | 12 | def load_data(): 13 | dfTrain = pd.read_csv(config.TRAIN_FILE) 14 | dfTest = pd.read_csv(config.TEST_FILE) 15 | 16 | def preprocess(df): 17 | cols = [c for c in df.columns if c not in ['id','target']] 18 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1) 19 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1) 20 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03'] 21 | return df 22 | 23 | dfTrain = preprocess(dfTrain) 24 | dfTest = preprocess(dfTest) 25 | 26 | cols = [c for c in dfTrain.columns if c not in ['id','target']] 27 | cols = [c for c in cols if (not c in config.IGNORE_COLS)] 28 | 29 | X_train = dfTrain[cols].values 30 | y_train = dfTrain['target'].values 31 | 32 | X_test = dfTest[cols].values 33 | ids_test = dfTest['id'].values 34 | 35 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS] 36 | 37 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices 38 | 39 | def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params): 40 | fd = FeatureDictionary(dfTrain=dfTrain, 41 | dfTest=dfTest, 42 | numeric_cols=config.NUMERIC_COLS, 43 | ignore_cols = config.IGNORE_COLS) 44 | data_parser = DataParser(feat_dict= fd) 45 | # Xi_train :列的序号 46 | # Xv_train :列的对应的值 47 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) 48 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) 49 | 50 | print(dfTrain.dtypes) 51 | 52 | pnn_params['feature_size'] = fd.feat_dim 53 | pnn_params['field_size'] = len(Xi_train[0]) 54 | 55 | 56 | _get = lambda x,l:[x[i] for i in l] 57 | 58 | 59 | 60 | for i, (train_idx, valid_idx) in enumerate(folds): 61 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) 62 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) 63 | 64 | nfm = NFM(**pnn_params) 65 | nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | pnn_params = { 74 | "embedding_size":8, 75 | "deep_layers":[32,32], 76 | "dropout_deep":[0.5,0.5,0.5], 77 | "deep_layer_activation":tf.nn.relu, 78 | "epoch":30, 79 | "batch_size":1024, 80 | "learning_rate":0.001, 81 | "optimizer":"adam", 82 | "batch_norm":1, 83 | "batch_norm_decay":0.995, 84 | "verbose":True, 85 | "random_seed":config.RANDOM_SEED, 86 | "deep_init_size":50, 87 | "use_inner":False 88 | 89 | } 90 | 91 | # load data 92 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data() 93 | 94 | # folds 95 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True, 96 | random_state=config.RANDOM_SEED).split(X_train, y_train)) 97 | 98 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params) 99 | y_train_pnn, y_test_pnn = run_base_model_nfm(dfTrain, dfTest, folds, pnn_params) 100 | 101 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/.idea/Basic-PNN-Demo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/DataReader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class FeatureDictionary(object): 4 | def __init__(self,trainfile=None,testfile=None, 5 | dfTrain=None,dfTest=None,numeric_cols=[], 6 | ignore_cols=[]): 7 | assert not ((trainfile is None) and (dfTrain is None)), "trainfile or dfTrain at least one is set" 8 | assert not ((trainfile is not None) and (dfTrain is not None)), "only one can be set" 9 | assert not ((testfile is None) and (dfTest is None)), "testfile or dfTest at least one is set" 10 | assert not ((testfile is not None) and (dfTest is not None)), "only one can be set" 11 | 12 | self.trainfile = trainfile 13 | self.testfile = testfile 14 | self.dfTrain = dfTrain 15 | self.dfTest = dfTest 16 | self.numeric_cols = numeric_cols 17 | self.ignore_cols = ignore_cols 18 | self.gen_feat_dict() 19 | 20 | 21 | 22 | 23 | def gen_feat_dict(self): 24 | if self.dfTrain is None: 25 | dfTrain = pd.read_csv(self.trainfile) 26 | 27 | else: 28 | dfTrain = self.dfTrain 29 | 30 | if self.dfTest is None: 31 | dfTest = pd.read_csv(self.testfile) 32 | 33 | else: 34 | dfTest = self.dfTest 35 | 36 | df = pd.concat([dfTrain,dfTest]) 37 | 38 | self.feat_dict = {} 39 | tc = 0 40 | for col in df.columns: 41 | if col in self.ignore_cols: 42 | continue 43 | if col in self.numeric_cols: 44 | self.feat_dict[col] = tc 45 | tc += 1 46 | 47 | else: 48 | us = df[col].unique() 49 | print(us) 50 | self.feat_dict[col] = dict(zip(us,range(tc,len(us)+tc))) 51 | tc += len(us) 52 | 53 | self.feat_dim = tc 54 | 55 | 56 | class DataParser(object): 57 | def __init__(self,feat_dict): 58 | self.feat_dict = feat_dict 59 | 60 | def parse(self,infile=None,df=None,has_label=False): 61 | assert not ((infile is None) and (df is None)), "infile or df at least one is set" 62 | assert not ((infile is not None) and (df is not None)), "only one can be set" 63 | 64 | 65 | if infile is None: 66 | dfi = df.copy() 67 | else: 68 | dfi = pd.read_csv(infile) 69 | 70 | if has_label: 71 | y = dfi['target'].values.tolist() 72 | dfi.drop(['id','target'],axis=1,inplace=True) 73 | else: 74 | ids = dfi['id'].values.tolist() 75 | dfi.drop(['id'],axis=1,inplace=True) 76 | # dfi for feature index 77 | # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) 78 | dfv = dfi.copy() 79 | for col in dfi.columns: 80 | if col in self.feat_dict.ignore_cols: 81 | dfi.drop(col,axis=1,inplace=True) 82 | dfv.drop(col,axis=1,inplace=True) 83 | continue 84 | if col in self.feat_dict.numeric_cols: 85 | dfi[col] = self.feat_dict.feat_dict[col] 86 | else: 87 | dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) 88 | dfv[col] = 1. 89 | 90 | xi = dfi.values.tolist() 91 | xv = dfv.values.tolist() 92 | 93 | if has_label: 94 | return xi,xv,y 95 | else: 96 | return xi,xv,ids 97 | 98 | 99 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/DataReader.cpython-37.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/PNN.cpython-37.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/ctr_of_recommendation/PNN_Demo/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_FILE = "data/train.csv" 2 | TEST_FILE = "data/test.csv" 3 | 4 | SUB_DIR = "output" 5 | 6 | 7 | NUM_SPLITS = 3 8 | RANDOM_SEED = 2017 9 | 10 | # types of columns of the dataset dataframe 11 | CATEGORICAL_COLS = [ 12 | # 'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat', 13 | # 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 14 | # 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 15 | # 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 16 | # 'ps_car_10_cat', 'ps_car_11_cat', 17 | ] 18 | 19 | NUMERIC_COLS = [ 20 | # # binary 21 | # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin", 22 | # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin", 23 | # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin", 24 | # "ps_ind_17_bin", "ps_ind_18_bin", 25 | # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 26 | # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin", 27 | # numeric 28 | "ps_reg_01", "ps_reg_02", "ps_reg_03", 29 | "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15", 30 | 31 | # feature engineering 32 | "missing_feat", "ps_car_13_x_ps_reg_03", 33 | ] 34 | 35 | IGNORE_COLS = [ 36 | "id", "target", 37 | "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04", 38 | "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", 39 | "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12", 40 | "ps_calc_13", "ps_calc_14", 41 | "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", 42 | "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin" 43 | ] 44 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | from sklearn.metrics import make_scorer 6 | from sklearn.model_selection import StratifiedKFold 7 | from DataReader import FeatureDictionary, DataParser 8 | from matplotlib import pyplot as plt 9 | 10 | import config 11 | from PNN import PNN 12 | 13 | def load_data(): 14 | dfTrain = pd.read_csv(config.TRAIN_FILE) 15 | dfTest = pd.read_csv(config.TEST_FILE) 16 | 17 | def preprocess(df): 18 | cols = [c for c in df.columns if c not in ['id','target']] 19 | #df['missing_feat'] = np.sum(df[df[cols]==-1].values,axis=1) 20 | df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1) 21 | df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03'] 22 | return df 23 | 24 | dfTrain = preprocess(dfTrain) 25 | dfTest = preprocess(dfTest) 26 | 27 | cols = [c for c in dfTrain.columns if c not in ['id','target']] 28 | cols = [c for c in cols if (not c in config.IGNORE_COLS)] 29 | 30 | X_train = dfTrain[cols].values 31 | y_train = dfTrain['target'].values 32 | 33 | X_test = dfTest[cols].values 34 | ids_test = dfTest['id'].values 35 | 36 | cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS] 37 | 38 | return dfTrain,dfTest,X_train,y_train,X_test,ids_test,cat_features_indices 39 | 40 | def run_base_model_pnn(dfTrain,dfTest,folds,pnn_params): 41 | fd = FeatureDictionary(dfTrain=dfTrain, 42 | dfTest=dfTest, 43 | numeric_cols=config.NUMERIC_COLS, 44 | ignore_cols = config.IGNORE_COLS) 45 | data_parser = DataParser(feat_dict= fd) 46 | # Xi_train :列的序号 47 | # Xv_train :列的对应的值 48 | Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) 49 | Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) 50 | 51 | print(dfTrain.dtypes) 52 | 53 | pnn_params['feature_size'] = fd.feat_dim 54 | pnn_params['field_size'] = len(Xi_train[0]) 55 | 56 | 57 | _get = lambda x,l:[x[i] for i in l] 58 | 59 | 60 | 61 | for i, (train_idx, valid_idx) in enumerate(folds): 62 | Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) 63 | Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) 64 | 65 | pnn = PNN(**pnn_params) 66 | pnn.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | pnn_params = { 75 | "embedding_size":8, 76 | "deep_layers":[32,32], 77 | "dropout_deep":[0.5,0.5,0.5], 78 | "deep_layer_activation":tf.nn.relu, 79 | "epoch":30, 80 | "batch_size":1024, 81 | "learning_rate":0.001, 82 | "optimizer":"adam", 83 | "batch_norm":1, 84 | "batch_norm_decay":0.995, 85 | "verbose":True, 86 | "random_seed":config.RANDOM_SEED, 87 | "deep_init_size":50, 88 | "use_inner":False 89 | 90 | } 91 | 92 | # load data 93 | dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data() 94 | 95 | # folds 96 | folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True, 97 | random_state=config.RANDOM_SEED).split(X_train, y_train)) 98 | 99 | #y_train_pnn,y_test_pnn = run_base_model_pnn(dfTrain,dfTest,folds,pnn_params) 100 | y_train_pnn, y_test_pnn = run_base_model_pnn(dfTrain, dfTest, folds, pnn_params) 101 | 102 | -------------------------------------------------------------------------------- /ctr_of_recommendation/PNN_Demo/sfsfs.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | 5 | 6 | t1 = tf.convert_to_tensor([[2,2], 7 | [2,3]]) 8 | 9 | t1_1 = tf.reshape(t1,shape=[2,2,1]) 10 | t1_2 = tf.reshape(t1,shape=[2,1,2]) 11 | 12 | t = tf.matmul(t1_1,t1_2) 13 | 14 | with tf.Session() as sess: 15 | print(sess.run(t)) -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/.gitignore: -------------------------------------------------------------------------------- 1 | model/ 2 | data/adult.data 3 | data/adult.test 4 | /absl_example.py 5 | /origin_wide_deep.py 6 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/.idea/Wide&Deep.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/README.md: -------------------------------------------------------------------------------- 1 | # 所用数据集 2 | 预测某人年收入超过5万美元的概率。 3 | 4 | 下载: `python data_download.py` 5 | 6 | # Wide Linear Model 7 | `wide_component.py` 8 | 9 | # Wide & Deep Model 10 | `wide_deep.py` -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/data/Index: -------------------------------------------------------------------------------- 1 | Index of adult 2 | 3 | 02 Dec 1996 140 Index 4 | 10 Aug 1996 3974305 adult.data 5 | 10 Aug 1996 4267 adult.names 6 | 10 Aug 1996 2003153 adult.test 7 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/data/adult.names: -------------------------------------------------------------------------------- 1 | | This data was extracted from the census bureau database found at 2 | | http://www.census.gov/ftp/pub/DES/www/welcome.html 3 | | Donor: Ronny Kohavi and Barry Becker, 4 | | Data Mining and Visualization 5 | | Silicon Graphics. 6 | | e-mail: ronnyk@sgi.com for questions. 7 | | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random). 8 | | 48842 instances, mix of continuous and discrete (train=32561, test=16281) 9 | | 45222 if instances with unknown values are removed (train=30162, test=15060) 10 | | Duplicate or conflicting instances : 6 11 | | Class probabilities for adult.all file 12 | | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns) 13 | | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns) 14 | | 15 | | Extraction was done by Barry Becker from the 1994 Census database. A set of 16 | | reasonably clean records was extracted using the following conditions: 17 | | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) 18 | | 19 | | Prediction task is to determine whether a person makes over 50K 20 | | a year. 21 | | 22 | | First cited in: 23 | | @inproceedings{kohavi-nbtree, 24 | | author={Ron Kohavi}, 25 | | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a 26 | | Decision-Tree Hybrid}, 27 | | booktitle={Proceedings of the Second International Conference on 28 | | Knowledge Discovery and Data Mining}, 29 | | year = 1996, 30 | | pages={to appear}} 31 | | 32 | | Error Accuracy reported as follows, after removal of unknowns from 33 | | train/test sets): 34 | | C4.5 : 84.46+-0.30 35 | | Naive-Bayes: 83.88+-0.30 36 | | NBTree : 85.90+-0.28 37 | | 38 | | 39 | | Following algorithms were later run with the following error rates, 40 | | all after removal of unknowns and using the original train/test split. 41 | | All these numbers are straight runs using MLC++ with default values. 42 | | 43 | | Algorithm Error 44 | | -- ---------------- ----- 45 | | 1 C4.5 15.54 46 | | 2 C4.5-auto 14.46 47 | | 3 C4.5 rules 14.94 48 | | 4 Voted ID3 (0.6) 15.64 49 | | 5 Voted ID3 (0.8) 16.47 50 | | 6 T2 16.84 51 | | 7 1R 19.54 52 | | 8 NBTree 14.10 53 | | 9 CN2 16.00 54 | | 10 HOODG 14.82 55 | | 11 FSS Naive Bayes 14.05 56 | | 12 IDTM (Decision table) 14.46 57 | | 13 Naive-Bayes 16.12 58 | | 14 Nearest-neighbor (1) 21.42 59 | | 15 Nearest-neighbor (3) 20.35 60 | | 16 OC1 15.04 61 | | 17 Pebls Crashed. Unknown why (bounds WERE increased) 62 | | 63 | | Conversion of original data as follows: 64 | | 1. Discretized agrossincome into two ranges with threshold 50,000. 65 | | 2. Convert U.S. to US to avoid periods. 66 | | 3. Convert Unknown to "?" 67 | | 4. Run MLC++ GenCVFiles to generate data,test. 68 | | 69 | | Description of fnlwgt (final weight) 70 | | 71 | | The weights on the CPS files are controlled to independent estimates of the 72 | | civilian noninstitutional population of the US. These are prepared monthly 73 | | for us by Population Division here at the Census Bureau. We use 3 sets of 74 | | controls. 75 | | These are: 76 | | 1. A single cell estimate of the population 16+ for each state. 77 | | 2. Controls for Hispanic Origin by age and sex. 78 | | 3. Controls by Race, age and sex. 79 | | 80 | | We use all three sets of controls in our weighting program and "rake" through 81 | | them 6 times so that by the end we come back to all the controls we used. 82 | | 83 | | The term estimate refers to population totals derived from CPS by creating 84 | | "weighted tallies" of any specified socio-economic characteristics of the 85 | | population. 86 | | 87 | | People with similar demographic characteristics should have 88 | | similar weights. There is one important caveat to remember 89 | | about this statement. That is that since the CPS sample is 90 | | actually a collection of 51 state samples, each with its own 91 | | probability of selection, the statement only applies within 92 | | state. 93 | 94 | 95 | >50K, <=50K. 96 | 97 | age: continuous. 98 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked. 99 | fnlwgt: continuous. 100 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool. 101 | education-num: continuous. 102 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse. 103 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces. 104 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried. 105 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black. 106 | sex: Female, Male. 107 | capital-gain: continuous. 108 | capital-loss: continuous. 109 | hours-per-week: continuous. 110 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. 111 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/data/data_download.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Download and clean the Census Income Dataset.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import argparse 22 | import os 23 | import sys 24 | 25 | from six.moves import urllib 26 | import tensorflow as tf 27 | 28 | DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult' 29 | TRAINING_FILE = 'adult.data' 30 | TRAINING_URL = '%s/%s' % (DATA_URL, TRAINING_FILE) 31 | EVAL_FILE = 'adult.test' 32 | EVAL_URL = '%s/%s' % (DATA_URL, EVAL_FILE) 33 | 34 | parser = argparse.ArgumentParser() 35 | 36 | parser.add_argument( 37 | '--data_dir', type=str, default='./', 38 | help='Directory to download census data') 39 | 40 | 41 | def _download_and_clean_file(filename, url): 42 | """Downloads data from url, and makes changes to match the CSV format.""" 43 | temp_file, _ = urllib.request.urlretrieve(url) 44 | with tf.gfile.Open(temp_file, 'r') as temp_eval_file: 45 | with tf.gfile.Open(filename, 'w') as eval_file: 46 | for line in temp_eval_file: 47 | line = line.strip() 48 | line = line.replace(', ', ',') 49 | if not line or ',' not in line: 50 | continue 51 | if line[-1] == '.': 52 | line = line[:-1] 53 | line += '\n' 54 | eval_file.write(line) 55 | tf.gfile.Remove(temp_file) 56 | 57 | 58 | def main(_): 59 | if not tf.gfile.Exists(FLAGS.data_dir): 60 | tf.gfile.MkDir(FLAGS.data_dir) 61 | 62 | training_file_path = os.path.join(FLAGS.data_dir, TRAINING_FILE) 63 | _download_and_clean_file(training_file_path, TRAINING_URL) 64 | 65 | eval_file_path = os.path.join(FLAGS.data_dir, EVAL_FILE) 66 | _download_and_clean_file(eval_file_path, EVAL_URL) 67 | 68 | 69 | if __name__ == '__main__': 70 | FLAGS, unparsed = parser.parse_known_args() 71 | tf.app.run(argv=[sys.argv[0]] + unparsed) -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/data/old.adult.names: -------------------------------------------------------------------------------- 1 | 1. Title of Database: adult 2 | 2. Sources: 3 | (a) Original owners of database (name/phone/snail address/email address) 4 | US Census Bureau. 5 | (b) Donor of database (name/phone/snail address/email address) 6 | Ronny Kohavi and Barry Becker, 7 | Data Mining and Visualization 8 | Silicon Graphics. 9 | e-mail: ronnyk@sgi.com 10 | (c) Date received (databases may change over time without name change!) 11 | 05/19/96 12 | 3. Past Usage: 13 | (a) Complete reference of article where it was described/used 14 | @inproceedings{kohavi-nbtree, 15 | author={Ron Kohavi}, 16 | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a 17 | Decision-Tree Hybrid}, 18 | booktitle={Proceedings of the Second International Conference on 19 | Knowledge Discovery and Data Mining}, 20 | year = 1996, 21 | pages={to appear}} 22 | (b) Indication of what attribute(s) were being predicted 23 | Salary greater or less than 50,000. 24 | (b) Indication of study's results (i.e. Is it a good domain to use?) 25 | Hard domain with a nice number of records. 26 | The following results obtained using MLC++ with default settings 27 | for the algorithms mentioned below. 28 | 29 | Algorithm Error 30 | -- ---------------- ----- 31 | 1 C4.5 15.54 32 | 2 C4.5-auto 14.46 33 | 3 C4.5 rules 14.94 34 | 4 Voted ID3 (0.6) 15.64 35 | 5 Voted ID3 (0.8) 16.47 36 | 6 T2 16.84 37 | 7 1R 19.54 38 | 8 NBTree 14.10 39 | 9 CN2 16.00 40 | 10 HOODG 14.82 41 | 11 FSS Naive Bayes 14.05 42 | 12 IDTM (Decision table) 14.46 43 | 13 Naive-Bayes 16.12 44 | 14 Nearest-neighbor (1) 21.42 45 | 15 Nearest-neighbor (3) 20.35 46 | 16 OC1 15.04 47 | 17 Pebls Crashed. Unknown why (bounds WERE increased) 48 | 49 | 4. Relevant Information Paragraph: 50 | Extraction was done by Barry Becker from the 1994 Census database. A set 51 | of reasonably clean records was extracted using the following conditions: 52 | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) 53 | 54 | 5. Number of Instances 55 | 48842 instances, mix of continuous and discrete (train=32561, test=16281) 56 | 45222 if instances with unknown values are removed (train=30162, test=15060) 57 | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random). 58 | 59 | 6. Number of Attributes 60 | 6 continuous, 8 nominal attributes. 61 | 62 | 7. Attribute Information: 63 | 64 | age: continuous. 65 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked. 66 | fnlwgt: continuous. 67 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool. 68 | education-num: continuous. 69 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse. 70 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces. 71 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried. 72 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black. 73 | sex: Female, Male. 74 | capital-gain: continuous. 75 | capital-loss: continuous. 76 | hours-per-week: continuous. 77 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. 78 | class: >50K, <=50K 79 | 80 | 8. Missing Attribute Values: 81 | 82 | 7% have missing values. 83 | 84 | 9. Class Distribution: 85 | 86 | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns) 87 | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns) 88 | 89 | 90 | -------------------------------------------------------------------------------- /ctr_of_recommendation/Wide&Deep_Demo/wide_deep.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from absl import flags 3 | from absl import app 4 | 5 | 6 | # 1. 最基本的特征: 7 | 8 | # Continuous columns. Wide和Deep组件都会用到。 9 | age = tf.feature_column.numeric_column('age') 10 | education_num = tf.feature_column.numeric_column('education_num') 11 | capital_gain = tf.feature_column.numeric_column('capital_gain') 12 | capital_loss = tf.feature_column.numeric_column('capital_loss') 13 | hours_per_week = tf.feature_column.numeric_column('hours_per_week') 14 | 15 | # 离散特征 16 | education = tf.feature_column.categorical_column_with_vocabulary_list( 17 | 'education', [ 18 | 'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college', 19 | 'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school', 20 | '5th-6th', '10th', '1st-4th', 'Preschool', '12th']) 21 | 22 | marital_status = tf.feature_column.categorical_column_with_vocabulary_list( 23 | 'marital_status', [ 24 | 'Married-civ-spouse', 'Divorced', 'Married-spouse-absent', 25 | 'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed']) 26 | 27 | relationship = tf.feature_column.categorical_column_with_vocabulary_list( 28 | 'relationship', [ 29 | 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', 30 | 'Other-relative']) 31 | 32 | workclass = tf.feature_column.categorical_column_with_vocabulary_list( 33 | 'workclass', [ 34 | 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov', 35 | 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked']) 36 | 37 | # 展示一下这个API 38 | occupation = tf.feature_column.categorical_column_with_hash_bucket( 39 | 'occupation', hash_bucket_size=1000 40 | ) 41 | 42 | # Transformations 43 | age_buckets = tf.feature_column.bucketized_column( 44 | age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65] 45 | ) 46 | 47 | # 2. The Wide Model: Linear Model with CrossedFeatureColumns 48 | """ 49 | The wide model is a linear model with a wide set of *sparse and crossed feature* columns 50 | Wide部分用了一个规范化后的连续特征age_buckets,其他的连续特征没有使用 51 | """ 52 | base_columns = [ 53 | # 全是离散特征 54 | education, marital_status, relationship, workclass, occupation, 55 | age_buckets, 56 | ] 57 | 58 | crossed_columns = [ 59 | tf.feature_column.crossed_column( 60 | ['education', 'occupation'], hash_bucket_size=1000), 61 | tf.feature_column.crossed_column( 62 | [age_buckets, 'education', 'occupation'], hash_bucket_size=1000 63 | ) 64 | ] 65 | 66 | # 3. The Deep Model: Neural Network with Embeddings 67 | """ 68 | 1. Sparse Features -> Embedding vector -> 串联(Embedding vector, 连续特征) -> 输入到Hidden Layer 69 | 2. Embedding Values随机初始化 70 | 3. 另外一种处理离散特征的方法是:one-hot or multi-hot representation. 但是仅仅适用于维度较低的,embedding是更加通用的做法 71 | 4. embedding_column(embedding);indicator_column(multi-hot); 72 | """ 73 | deep_columns = [ 74 | age, 75 | education_num, 76 | capital_gain, 77 | capital_loss, 78 | hours_per_week, 79 | tf.feature_column.indicator_column(workclass), 80 | tf.feature_column.indicator_column(education), 81 | tf.feature_column.indicator_column(marital_status), 82 | tf.feature_column.indicator_column(relationship), 83 | 84 | # To show an example of embedding 85 | tf.feature_column.embedding_column(occupation, dimension=8) 86 | ] 87 | 88 | model_dir = './model/wide_deep' 89 | 90 | # 4. Combine Wide & Deep:wide基础上组合Deep 91 | model = tf.estimator.DNNLinearCombinedClassifier( 92 | model_dir = model_dir, 93 | linear_feature_columns=base_columns + crossed_columns, 94 | dnn_feature_columns=deep_columns, 95 | dnn_hidden_units=[100,50] 96 | ) 97 | 98 | # 5. Train & Evaluate:训练和评估 99 | _CSV_COLUMNS = [ 100 | 'age', 'workclass', 'fnlwgt', 'education', 'education_num', 101 | 'marital_status', 'occupation', 'relationship', 'race', 'gender', 102 | 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 103 | 'income_bracket' 104 | ] 105 | _CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''], 106 | [0], [0], [0], [''], ['']] 107 | _NUM_EXAMPLES = { 108 | 'train': 32561, 109 | 'validation': 16281, 110 | } 111 | 112 | def input_fn(data_file, num_epochs, shuffle, batch_size): 113 | """为Estimator创建一个input function""" 114 | assert tf.gfile.Exists(data_file), "{0} not found.".format(data_file) 115 | 116 | def parse_csv(line): 117 | print("Parsing", data_file) 118 | # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充 119 | columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS) 120 | features = dict(zip(_CSV_COLUMNS, columns)) 121 | labels = features.pop('income_bracket') 122 | return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor, 表示x == y, element-wise 123 | 124 | dataset = tf.data.TextLineDataset(data_file) \ 125 | .map(parse_csv, num_parallel_calls=5) 126 | 127 | if shuffle: 128 | dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train'] + _NUM_EXAMPLES['validation']) 129 | 130 | dataset = dataset.repeat(num_epochs) 131 | dataset = dataset.batch(batch_size) 132 | 133 | iterator = dataset.make_one_shot_iterator() 134 | batch_features, batch_labels = iterator.get_next() 135 | return batch_features, batch_labels 136 | 137 | # Train + Eval 138 | train_epochs = 6 139 | epochs_per_eval = 2 140 | batch_size = 40 141 | train_file = './data/adult.data' 142 | test_file = './data/adult.test' 143 | 144 | for n in range(train_epochs // epochs_per_eval): 145 | model.train(input_fn=lambda: input_fn(train_file, epochs_per_eval, True, batch_size)) 146 | results = model.evaluate(input_fn=lambda: input_fn( 147 | test_file, 1, False, batch_size)) 148 | 149 | # Display Eval results 150 | print("Results at epoch {0}".format((n+1) * epochs_per_eval)) 151 | print('-'*30) 152 | 153 | for key in sorted(results): 154 | print("{0:20}: {1:.4f}".format(key, results[key])) 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /related_papers/2016--Wide & Deep Learning for Recommender Systems.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2016--Wide & Deep Learning for Recommender Systems.pdf -------------------------------------------------------------------------------- /related_papers/2016-PNN-Product-based Neural Networks for User Response Prediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2016-PNN-Product-based Neural Networks for User Response Prediction.pdf -------------------------------------------------------------------------------- /related_papers/2017-Google-Deep & Cross Network for Ad Click Predictions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-Google-Deep & Cross Network for Ad Click Predictions.pdf -------------------------------------------------------------------------------- /related_papers/2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-阿里-Deep Interest Network for Click-Through Rate Prediction.pdf -------------------------------------------------------------------------------- /related_papers/2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/2017-阿里-MLR-Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction.pdf -------------------------------------------------------------------------------- /related_papers/An overview of gradient descent optimization algorithms.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/An overview of gradient descent optimization algorithms.pdf -------------------------------------------------------------------------------- /related_papers/Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/Attentional Factorization Machines- Learning the Weight of Feature Interactions via Attention Networks.pdf -------------------------------------------------------------------------------- /related_papers/Deep Neural Networks for YouTube Recommendations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/Deep Neural Networks for YouTube Recommendations.pdf -------------------------------------------------------------------------------- /related_papers/DeepFM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/DeepFM.pdf -------------------------------------------------------------------------------- /related_papers/FFM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/FFM.pdf -------------------------------------------------------------------------------- /related_papers/FM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/FM.pdf -------------------------------------------------------------------------------- /related_papers/NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/NFM-Neural Factorization Machines for Sparse Predictive Analytics.pdf -------------------------------------------------------------------------------- /related_papers/README.md: -------------------------------------------------------------------------------- 1 | # 已读论文汇总 2 | 3 | 机器学习与深度学习系列~(ctr预估): 4 | 这里是本人已经读过的相关论文汇总,加上了自己当时的笔记分享给大家。 5 | 6 | 7 | -------------------------------------------------------------------------------- /related_papers/(GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jesse-csj/TensorFlow_Practice/1c578bce4a941654e862c0c6a635b51859a39d7b/related_papers/(GBDT+LR)Practical Lessons from Predicting Clicks on Ads at Facebook.pdf -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- 1 | Mysterious code 2 | GG --------------------------------------------------------------------------------