├── .gitignore
├── image
    ├── saddle_point.png
    ├── 20191215145026134.png
    ├── image-20210705202821879.png
    ├── image-20210708141144900.png
    ├── image-20210708142307912.png
    ├── image-20210708142402038.png
    └── image-20210708170933328.png
├── model
    ├── __init__.py
    ├── evaluation.py
    ├── FocalLoss.py
    ├── get_data.py
    ├── lr_cosine.py
    ├── deepfm.py
    ├── mmoe.py
    └── ple.py
└── readme.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | data
3 | user_data


--------------------------------------------------------------------------------
/image/saddle_point.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/saddle_point.png


--------------------------------------------------------------------------------
/image/20191215145026134.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/20191215145026134.png


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Jul  9 21:08:26 2021
4 | 
5 | @author: tunan
6 | """
7 | 
8 | 


--------------------------------------------------------------------------------
/image/image-20210705202821879.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/image-20210705202821879.png


--------------------------------------------------------------------------------
/image/image-20210708141144900.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/image-20210708141144900.png


--------------------------------------------------------------------------------
/image/image-20210708142307912.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/image-20210708142307912.png


--------------------------------------------------------------------------------
/image/image-20210708142402038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/image-20210708142402038.png


--------------------------------------------------------------------------------
/image/image-20210708170933328.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SummerRaining/multi_task-learning/HEAD/image/image-20210708170933328.png


--------------------------------------------------------------------------------
/model/evaluation.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import time
 3 | from collections import defaultdict
 4 | import numpy as np
 5 | from sklearn.metrics import roc_auc_score
 6 | 
 7 | 
 8 | def uAUC(labels, preds, user_id_list):
 9 |     """Calculate user AUC"""
10 |     user_pred = defaultdict(lambda: [])
11 |     user_truth = defaultdict(lambda: [])
12 |     for idx, truth in enumerate(labels):
13 |         user_id = user_id_list[idx]
14 |         pred = preds[idx]
15 |         truth = labels[idx]
16 |         user_pred[user_id].append(pred)
17 |         user_truth[user_id].append(truth)
18 | 
19 |     user_flag = defaultdict(lambda: False)
20 |     for user_id in set(user_id_list):
21 |         truths = user_truth[user_id]
22 |         flag = False
23 |         # 若全是正样本或全是负样本，则flag为False
24 |         for i in range(len(truths) - 1):
25 |             if truths[i] != truths[i + 1]:
26 |                 flag = True
27 |                 break
28 |         user_flag[user_id] = flag
29 | 
30 |     total_auc = 0.0
31 |     size = 0.0
32 |     for user_id in user_flag:
33 |         if user_flag[user_id]:
34 |             auc = roc_auc_score(np.asarray(user_truth[user_id]), np.asarray(user_pred[user_id]))
35 |             total_auc += auc 
36 |             size += 1.0
37 |     user_auc = float(total_auc)/size
38 |     return user_auc
39 | 
40 | 
41 | def compute_weighted_score(score_dict, weight_dict):
42 |     '''基于多个行为的uAUC值，计算加权uAUC
43 |     Input:
44 |         scores_dict: 多个行为的uAUC值映射字典, dict
45 |         weights_dict: 多个行为的权重映射字典, dict
46 |     Output:
47 |         score: 加权uAUC值, float
48 |     '''
49 |     score = 0.0
50 |     weight_sum = 0.0
51 |     for action in score_dict:
52 |         weight = float(weight_dict[action])
53 |         score += weight*score_dict[action]
54 |         weight_sum += weight
55 |     score /= float(weight_sum)
56 |     score = round(score, 6)
57 |     return score
58 | 
59 | 
60 | def evaluate_deepctr(val_labels,val_pred_ans,userid_list):
61 |     eval_dict = {}
62 |     target = ["read_comment", "like", "click_avatar", "forward"]
63 |     for i, action in enumerate(target):
64 |         eval_dict[action] = uAUC(val_labels[i], val_pred_ans[i], userid_list)
65 |     print(eval_dict)
66 |     weight_dict = {"read_comment": 4, "like": 3, "click_avatar": 2, "favorite": 1, "forward": 1,
67 |                    "comment": 1, "follow": 1}
68 |     weight_auc = compute_weighted_score(eval_dict, weight_dict)
69 |     print("Weighted uAUC: ", weight_auc)
70 |     return weight_auc
71 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## 推荐算法模型笔记：
 2 | 
 3 | ### 实验总结
 4 | 
 5 | 我对deepfm，mmoe，ple和focal loss 学习率退火算法分别进行了实验，在《微信大数据挑战赛》的数据集上进行训练并且评估结果。其中所有模型的结构都没有调整过超参数，只是将模型跑通了，该对比实验并不能说明这几种模型的优劣。
 6 | 
 7 | | 模型名                           | read_comment在验证集上的auc |
 8 | | -------------------------------- | --------------------------- |
 9 | | DeepFm                           | 0.9233                      |
10 | | MMOE                             | 0.9260                      |
11 | | PLE                              | 0.9224                      |
12 | | 带focal loss和学习率退火的deepfm | 0.9307                      |
13 | 
14 | 复现实验的流程：
15 | 
16 | 1. 下载《微信大数据挑战赛》的数据集，然后放在data文件夹下。比赛链接 https://algo.weixin.qq.com/。
17 | 2. 新建一个user_data文件夹，然后运行get_data.py文件，它会将数据集的内容进行特征处理后用pickle保存起来。
18 | 3. 分别运行deepfm.py,mmoe.py,ple.py和FocalLoss.py 文件，可以得到结果。
19 | 
20 | 
21 | 
22 | ### DeepFM模型
23 | 
24 | **Fm流程**：what：factorization machine 因子分解机，用于获取稀疏特征的交互信息。
25 | 
26 | Why：对于高基的离散变量一般使用onehot的方式编码，而对特征的进行二阶交互会更大的增加特征数量，特征数量从n维到n(n-1)维。因此产生大量的稀疏特征。
27 | 
28 | How：FM是在逻辑回归上增加了二阶特征，通过对每个特征学习一个隐向量vi，任意两个特征的交互表示为$<v_i,v_j>x_ix_j$。输出结果为：“线性部分”加上“所有二阶交互特征”，然后加上sigmoid激活函数。 
29 | 
30 | ​	$$ y_{fm} = w_0+<w,x>+\sum_{i,j\in(1,n)}<v_i,v_j>x_ix_j $$
31 | 
32 | ​	$$ output = sigmoid(y_{fm}) $$ 
33 | 
34 | 优点分析：
35 | 
36 | 1. FM的学习参数量为nd，d为隐向量的维数。相比于所有的二阶交互参数量为n(n-1）,减少很多。
37 | 2. 可以学习到训练集上不存在的交互特征，传统的二阶特征，如果训练集上的$x_ix_j$全部为0，那么$x_ix_j$的参数$w_{ij}$无法学习。而FM可以通过$<v_i,v_j>$来计算得到。每个参数学习的数据量增加了。
38 | 3. Fm时间可以分解成，先相加再相乘- 先相乘再相加形式。时间复杂度为，O(nd)。
39 | 
40 | **Deepfm**：
41 | 
42 | why：FM对于输入特征的一阶信息只做了简单的线性运算，因为在FM之外增加了dnn部分，对于输入增加更加复杂的非线性运算。（注意：DNN部分无法获得特征间的高阶交互）
43 | 
44 | how:
45 | 
46 | 1. 线性部分和二阶Fm部分与FM一致。
47 | 
48 | 2. 将FM学习的隐向量拼接在一起，添加多个全连接层，最后全连接到1维输出。
49 | 
50 | 3. 将线性输出，二阶fm输出，dnn输出相加。使用sigmoid激活函数得到模型输出。
51 | 
52 |    $$ y_{deepfm} = y_{fm}+y_{dnn}$$
53 |    
54 |    $$ y_{dnn} = dense(concate([v_1,...,v_n])) $$
55 | 
56 | ### MMOE模型
57 | 
58 | 简述结构：
59 | 
60 | 1. 将输入的离散变量和连续变量分别embedding后拼接在一起，经过几次全连接到固定维的向量x。
61 | 2. x分别经过m个子网络（专家网络）得到m个专家向量，每个子网络是相同大小的全连接。对于每个任务学习一个门网络，门网络将x作为输入，输出softmax后概率向量，并使用该概率对m个专家做加权平均。
62 | 3.  专家加权平均后的结果就作为对应塔任务网络的输入。每个塔网络接受专家加权平均值作为输入，经过一次全连接映射到一维的向量，做分类或回归任务。
63 | 
64 | ### PLE模型
65 | 
66 | 简述结构：
67 | 
68 | 1. PLE模型的想法是，解决多任务训练中的翘翘板问题。多任务模型例如：mmoe，每个任务的损失梯度更新的时候会对所有的参数都更新。但有时不同的任务所提取的特征是冲突的，甚至是相反的；这样就会导致A任务表现上升时，B任务的表现下降。
69 | 2. PLE模型，提出每个任务都有自己的模块，然后所有任务同时共享一个模块。例如：A任务有两个expert （e1，e2），B任务有三个expert（e3,e4,e5），它们共享两个expert(e6,e7).A任务的tower等于(e1,e2,e6,e7)四个expert的加权平均，权重由门网络学习gate1(x)。同理：B任务的tower等于(e3,e4,e5,e6,e7)五个expert的加权平均，权重是gate2(x).每个任务的输出，由tower全连接到对应的维度。
70 | 3. 优势：这样在A任务的损失进行参数更新时，只影响了A自己的模块（e1，e2）和共享模块，而不影响B的模块。避免了两个任务同时学习时，产生冲突。
71 | 
72 | ### Focal Loss
73 | 
74 | 1. 原理：平衡易判断的样本和判断困难的样本之间的权重,将预测的很准的样本的损失减小。
75 | 2. 计算公式：$$loss = (1−p_t)^{\gamma}log(p_t)\quad  p_t=\begin{cases} p,y_t = 1\\ 1-p,y_t = 0 \end{cases} $$ ; 当判断错误时，$p_t$接近0，$1-p_t$接近1，权重等于1，对损失没有影响。判断正确时,$1-p_t$接近0，权重接近0，降低了正确分类样本的损失。
76 | 3. 在目标检测场景中，负样本非常多而且容易判断，而正样本很少且判断困难。所以focal loss可以自动降低负样本的权重。
77 | 


--------------------------------------------------------------------------------
/model/FocalLoss.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Jul  1 17:30:45 2021
 4 | 
 5 | @author: shujie.wang
 6 | """
 7 | 
 8 | import tensorflow.keras.backend as K
 9 | import tensorflow as tf
10 | 
11 | def binary_focal_loss(gamma=2, alpha=0.25):
12 |     """
13 |     Binary form of focal loss.
14 |     适用于二分类问题的focal loss
15 |     
16 |     focal_loss(p_t) = -alpha_t * (1 - p_t)**gamma * log(p_t)
17 |         where p = sigmoid(x), p_t = p or 1 - p depending on if the label is 1 or 0, respectively.
18 |     References:
19 |         https://arxiv.org/pdf/1708.02002.pdf
20 |     Usage:
21 |      model.compile(loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
22 |     """
23 |     alpha = tf.constant(alpha, dtype=tf.float32)
24 |     gamma = tf.constant(gamma, dtype=tf.float32)
25 | 
26 |     def binary_focal_loss_fixed(y_true, y_pred):
27 |         """
28 |         y_true shape need be (None,1)
29 |         y_pred need be compute after sigmoid
30 |         """
31 |         y_true = tf.cast(y_true, tf.float32)
32 |         alpha_t = y_true*(1-alpha) + (K.ones_like(y_true)-y_true)*alpha
33 |     
34 |         p_t = y_true*y_pred + (K.ones_like(y_true)-y_true)*(K.ones_like(y_true)-y_pred) + K.epsilon()
35 |         focal_loss = - alpha_t * K.pow((K.ones_like(y_true)-p_t),gamma) * K.log(p_t)
36 |         return K.mean(focal_loss)
37 |     return binary_focal_loss_fixed
38 | 
39 | if __name__ == '__main__':
40 |     import pickle as pkl
41 |     import numpy as np
42 |     import gc
43 |     from deepfm import build_FM
44 |     from tensorflow.keras import optimizers,initializers
45 |     from lr_cosine import CosineAnnealing
46 |     
47 |     target = ["read_comment", "like", "click_avatar", "forward"]
48 |     sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
49 |     varlen_features = ['manual_tag_list','manual_keyword_list']
50 |     dense_features = ['videoplayseconds']
51 |     #1.加载数据
52 |     with open('../user_data/data.pkl','rb') as f:
53 |         train,val,test,encoder = pkl.load(f)
54 |     train_num = len(train)
55 |     
56 |     #2.生成输入特征设置
57 |     sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
58 |     varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
59 |     feature_names = sparse_features+varlen_features+dense_features
60 |     
61 |     # 3.generate input data for model
62 |     train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
63 |     val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
64 |     test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}
65 |     
66 |     train_labels = train['read_comment'].values
67 |     val_labels = val['read_comment'].values
68 |     
69 |     del train,val #多余的特征删除，释放内存。
70 |     gc.collect()
71 |     
72 |     model = build_FM(sparse_features,dense_features,sparse_max_len,embed_dim = 16, 
73 |                dnn_hidden_units=(64,64),varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
74 |                dropout = 0.1,embedding_reg_l2 = 1e-6,dnn_reg_l2 = 0.0)
75 |     
76 |     loss = binary_focal_loss(gamma=2, alpha=0.1)
77 |     reduce_lr = CosineAnnealing(eta_max = 1,eta_min = 0,
78 |                 num_step_per_epoch=(train_num//10240)+1,lr_list = [1,2,2])
79 |     adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
80 |     model.compile(adam, loss = loss ,metrics = [tf.keras.metrics.AUC()],)
81 |     
82 |     history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
83 |                         batch_size=10240, epochs=5, verbose=1,callbacks=[reduce_lr],)


--------------------------------------------------------------------------------
/model/get_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Jun 15 23:35:55 2021
 4 | 
 5 | @author: tunan
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import os
11 | from time import time
12 | import pickle as pkl
13 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences
14 | import copy
15 | 
16 | def split(x):
17 |     if not isinstance(x,str):
18 |         return []
19 |     key_ans = x.strip().split(';')
20 |     for key in key_ans:
21 |         if key not in key2index:
22 |             # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input
23 |             key2index[key] = len(key2index) + 1
24 |     return list(map(lambda x: key2index[x], key_ans))
25 | 
26 | def preprocess(sample,dense_features):
27 |     '''
28 |     特征工程：对数值型特征取对数;对id型特征+1;缺失值补充0。
29 |     '''
30 |     sample[dense_features] = sample[dense_features].fillna(0.0)
31 |     sample[dense_features] = np.log(sample[dense_features] + 1.0)
32 |     
33 |     sample[["authorid", "bgm_song_id", "bgm_singer_id"]] += 1  # 0 用于填未知
34 |     sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds"]] = \
35 |         sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds"]].fillna(0)
36 |     sample["videoplayseconds"] = np.log(sample["videoplayseconds"] + 1.0)
37 |     sample[["authorid", "bgm_song_id", "bgm_singer_id"]] = \
38 |         sample[["authorid", "bgm_song_id", "bgm_singer_id"]].astype(int)
39 |     return sample
40 | 
41 | 
42 | if __name__ == "__main__":    
43 |     target = ["read_comment", "like", "click_avatar", "forward"]
44 |     sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
45 |     varlen_features = ['manual_tag_list','manual_keyword_list']
46 |     dense_features = ['videoplayseconds']
47 |     data = pd.read_csv('../data/wechat_algo_data1/user_action.csv')
48 |     test = pd.read_csv('../data/wechat_algo_data1/test_a.csv') #预测数据
49 |     test['date_'] = 15
50 |     data = pd.concat([data,test])
51 |     
52 |     #1. merge features to data
53 |     feed = pd.read_csv('../data/wechat_algo_data1/feed_info.csv') #视频特征。
54 |     feed = feed[['feedid', 'authorid', 'videoplayseconds', 'bgm_song_id', 'bgm_singer_id','manual_tag_list','manual_keyword_list']]
55 |     data = data.merge(feed, how='left',on='feedid') #行为数据拼接，作者id，bgm_song_id 
56 |     data = preprocess(data,dense_features) #特征处理
57 |     data = data[dense_features+sparse_features+varlen_features+['date_']+target]
58 |     
59 |     #2. varlen features encode
60 |     encoder = {}
61 |     global key2index
62 |     for f in ['manual_keyword_list','manual_tag_list']:
63 |         key2index = {}
64 |         f_list = list(map(split, data[f].values))
65 |         f_length = np.array(list(map(len, f_list)))
66 |         max_len = max(f_length)
67 |         print(f'{f} max length is {max_len}')
68 |         # Notice : padding=`post`
69 |         data[f] = list(pad_sequences(f_list, maxlen=max_len, padding='post', ))
70 |         encoder[f] = copy.copy(key2index)
71 |     
72 |     # 3.sparse feature encode
73 |     for featid in sparse_features:
74 |         print(f"encode {featid} feature id")
75 |         encoder[featid] = {uid:ucode+1 for ucode,uid in enumerate(data[featid].unique())} 
76 |         data[featid] = data[featid].apply(lambda x: encoder[featid].get(x,0))
77 |         
78 |     print('data.shape', data.shape)
79 |     print('data.columns', data.columns.tolist())
80 |     print('unique date_: ', data['date_'].unique())
81 |     data = data.sample(frac = 1.0)
82 | 
83 |     train = data[data['date_'] < 14].drop(['date_'],axis = 1)
84 |     val = data[data['date_'] == 14].drop(['date_'],axis = 1)  # 第14天样本作为验证集
85 |     test = data[data['date_'] == 15].drop(['date_'],axis = 1)
86 |     with open('../user_data/data.pkl','wb') as f:
87 |         pkl.dump([train,val,test,encoder],f)
88 | 


--------------------------------------------------------------------------------
/model/lr_cosine.py:
--------------------------------------------------------------------------------
  1 | from keras import *
  2 | import keras
  3 | from keras import layers
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | import keras.backend as K
  9 | 
 10 | class CosineAnnealing(callbacks.Callback):
 11 |     """Cosine annealing according to DECOUPLED WEIGHT DECAY REGULARIZATION.
 12 | 
 13 |     # Arguments
 14 |         eta_max: float, eta_max in eq(5).
 15 |         eta_min: float, eta_min in eq(5).
 16 |         total_iteration: int, Ti in eq(5).
 17 |         iteration: int, T_cur in eq(5).
 18 |         verbose: 0 or 1.
 19 |     """
 20 | 
 21 |     def __init__(self, eta_max=1, eta_min=0, num_step_per_epoch = 100,lr_list = [],verbose=0, **kwargs):
 22 |         
 23 |         super(CosineAnnealing, self).__init__()
 24 | 
 25 |         global lr_log
 26 |         
 27 |         self.lr_list = lr_list
 28 |         lr_log = []
 29 |         self.eta_max = eta_max
 30 |         self.eta_min = eta_min
 31 |         self.verbose = verbose
 32 |         
 33 |         self.iteration = 0
 34 |         self.cur_epoch = 0
 35 |         self.num_start = 0
 36 |         self.total_epoch = lr_list[self.num_start]
 37 |         self.num_step_per_epoch = num_step_per_epoch
 38 |         self.total_iteration = self.total_epoch*num_step_per_epoch
 39 |     
 40 |     def on_train_begin(self, logs=None):
 41 |         self.lr = K.get_value(self.model.optimizer.lr)
 42 |         #防止多个epoch分开训练。
 43 |         eta_t = self.eta_min + (self.eta_max - self.eta_min) * 0.5 * (1 + np.cos(np.pi * self.iteration / self.total_iteration))
 44 |         new_lr = self.lr * eta_t
 45 |         K.set_value(self.model.optimizer.lr, new_lr)
 46 |         
 47 |     def on_train_end(self, logs=None):
 48 |         K.set_value(self.model.optimizer.lr, self.lr)
 49 |     
 50 |     def on_epoch_end(self, epoch, logs=None):
 51 |         self.cur_epoch += 1
 52 |         if self.cur_epoch == self.total_epoch:
 53 |             self.cur_epoch = 0
 54 |             self.num_start += 1
 55 |             self.total_epoch = self.lr_list[min(self.num_start,len(self.lr_list)-1)]
 56 |             
 57 |             self.iteration = 0
 58 |             self.total_iteration = self.total_epoch*self.num_step_per_epoch
 59 | 
 60 |     def on_batch_end(self, epoch, logs=None):
 61 |         self.iteration += 1
 62 |         logs = logs or {}
 63 |         logs['lr'] = K.get_value(self.model.optimizer.lr)
 64 |         
 65 |         eta_t = self.eta_min + (self.eta_max - self.eta_min) * 0.5 * (1 + np.cos(np.pi * self.iteration / self.total_iteration))
 66 |         new_lr = self.lr * eta_t
 67 |         K.set_value(self.model.optimizer.lr, new_lr)
 68 |         if self.verbose > 0:
 69 |             print('\nEpoch %05d: CosineAnnealing '
 70 |                   'learning rate to %s.' % (epoch + 1, new_lr))
 71 |         lr_log.append(logs['lr'])
 72 | 
 73 | if __name__ == '__main__':
 74 |     # 准备数据集
 75 |     num_train, num_test = 2000, 100
 76 |     num_features = 200
 77 |     
 78 |     true_w, true_b = np.ones((num_features, 1)) * 0.01, 0.05
 79 |     
 80 |     features = np.random.normal(0, 1, (num_train + num_test, num_features))
 81 |     noises = np.random.normal(0, 1, (num_train + num_test, 1)) * 0.01
 82 |     labels = np.dot(features, true_w) + true_b + noises
 83 |     
 84 |     train_data, test_data = features[:num_train, :], features[num_train:, :]
 85 |     train_labels, test_labels = labels[:num_train], labels[num_train:]
 86 |     
 87 |     # 选择模型
 88 |     model = keras.models.Sequential([
 89 |         layers.Dense(units=128, activation='relu', input_dim=200), 
 90 |         layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.00)),
 91 |         layers.Dense(1)
 92 |     ])
 93 |     
 94 |     model.summary()
 95 |     model.compile(optimizer='adam',loss='mse',metrics=['mse'])
 96 |     #需要传入参数，max，min。lr会在max和min之间衰减，乘上原来的lr。
 97 |     #num_step_per_epoch每个epoch会训练多少次。
 98 |     #lr_list每次的重启周期，例如这里2个epoch是一个周期，4个epoch一个周期，8，15，32.等。
 99 |     
100 |     lr_list = [2,4,8,16,32]
101 |     reduce_lr = CosineAnnealing(eta_max=1, eta_min=0, num_step_per_epoch=(2000 // 16), lr_list = lr_list)
102 |     # for e in range(62):
103 |         # model.fit(train_data, train_labels, batch_size=16, epochs=1, validation_data=(test_data, test_labels), callbacks=[reduce_lr])
104 |     model.fit(train_data, train_labels, batch_size=16, epochs=62, validation_data=(test_data, test_labels), callbacks=[reduce_lr])
105 |     plt.plot(lr_log)
106 |     
107 | 


--------------------------------------------------------------------------------
/model/deepfm.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Jul  1 17:27:12 2021
  4 | 
  5 | @author: shujie.wang
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | from tensorflow.keras.layers import *
 11 | from tensorflow.keras import regularizers
 12 | import tensorflow.keras.backend as K
 13 | import matplotlib.pyplot as plt
 14 | import tensorflow as tf
 15 | from tensorflow.keras.models import Model,load_model
 16 | from tensorflow.keras.utils import plot_model
 17 | from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping
 18 | import os
 19 | from tensorflow.keras import optimizers,initializers
 20 | 
 21 | from tensorflow.python.keras.initializers import glorot_normal #xariver。截断的正态分布，标准差。
 22 | from tensorflow.python.keras.layers import Layer
 23 | import pickle as pkl
 24 | import gc
 25 | from time import time
 26 | 
 27 | class MyMeanPool(Layer):
 28 |     def __init__(self, axis, **kwargs):
 29 |         super(MyMeanPool, self).__init__(**kwargs)
 30 |         self.axis = axis
 31 | 
 32 |     def call(self, x, mask):
 33 |         mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
 34 |         x = x * mask
 35 |         return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)
 36 | 
 37 | def secondary_fm(W):
 38 |     #先相加再平方。
 39 |     frs_part = Add()(W)
 40 |     frs_part = Multiply()([frs_part,frs_part]) 
 41 |     #先平方再相加
 42 |     scd_part = Add()([Multiply()([_x,_x]) for _x in W])
 43 |     #相减，乘0.5.
 44 |     fm_part = Subtract()([frs_part,scd_part])
 45 |     fm_part = Lambda(lambda x:K.sum(x,axis = 1,keepdims = True)*0.5)(fm_part)
 46 |     return fm_part
 47 | 
 48 | 
 49 | def build_FM(sparse_cols,dense_cols,sparse_max_len,embed_dim = 16, 
 50 |                dnn_hidden_units=(128, 128),varlens_cols = [],varlens_max_len = {},
 51 |                dropout = 0,embedding_reg_l2 = 1e-6,dnn_reg_l2 = 0.0):
 52 |     ''' 
 53 |     sparse_cols,dense_cols:离散变量名，连续变量名。
 54 |     sparse_max_len：字典：离散变量对应的最大的取值范围。
 55 |     varlens_cols:可变离散变量名。
 56 |     varlens_max_len:可变离散变量的最大取值范围。
 57 |     '''
 58 |     
 59 |     #输入部分，分为sparse,varlens,dense部分。
 60 |     sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
 61 |     dense_inputs = {f:Input([1],name = f) for f in dense_cols}
 62 |     varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
 63 |         
 64 |     input_embed = {}
 65 |     #离散特征，embedding到k维，得到其隐向量。wi
 66 |     for f in sparse_cols:
 67 |         _input = sparse_inputs[f]
 68 |         embedding = Embedding(sparse_max_len[f], embed_dim, 
 69 |             embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
 70 |         input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
 71 |         
 72 |     #多标签离散变量
 73 |     for f in varlens_inputs:
 74 |         _input = varlens_inputs[f]
 75 |         mask = Masking(mask_value = 0).compute_mask(_input)
 76 |         embedding = Embedding(varlens_max_len[f], embed_dim,
 77 |             embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
 78 |         _embed =Reshape([-1,embed_dim])(embedding(_input))
 79 |         out_embed = MyMeanPool(axis=1)(_embed,mask)
 80 |         input_embed[f] = out_embed
 81 |     
 82 |     #连续变量
 83 |     for f in dense_inputs:
 84 |         _input = dense_inputs[f]
 85 |         _embed = Dense(embed_dim,use_bias = False,activation = 'linear')(_input)
 86 |         input_embed[f] = _embed
 87 |         
 88 |     feature_name =  sparse_cols+varlens_cols+dense_cols
 89 |     fm_embed = [input_embed[f] for f in feature_name]
 90 |     fm_part = secondary_fm(fm_embed)
 91 |     
 92 |     #离散变量和连续变量拼接成dnn feature
 93 |     dnn_feature = Concatenate(axis = -1)(fm_embed)
 94 |     for num in dnn_hidden_units:
 95 |         dnn_feature = Dropout(dropout)(Dense(num,activation='relu',
 96 |                     kernel_regularizer=regularizers.l2(dnn_reg_l2))(dnn_feature))
 97 |         
 98 |     dnn_output = Dense(1,activation = 'linear', kernel_regularizer=regularizers.l2(dnn_reg_l2),
 99 |           use_bias = True)(dnn_feature)
100 |     logits = Activation('sigmoid')(Add()([fm_part,dnn_output]))
101 |     inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
102 |                 +[dense_inputs[f] for f in dense_inputs]
103 |     model = Model(inputs,logits) 
104 |     return model
105 | 
106 | if __name__ == '__main__':
107 |     target = ["read_comment", "like", "click_avatar", "forward"]
108 |     sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
109 |     varlen_features = ['manual_tag_list','manual_keyword_list']
110 |     dense_features = ['videoplayseconds']
111 |     #1.加载数据
112 |     with open('../user_data/data.pkl','rb') as f:
113 |         train,val,test,encoder = pkl.load(f)
114 |     train_num = len(train)
115 |     
116 |     #2.生成输入特征设置
117 |     sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
118 |     varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
119 |     feature_names = sparse_features+varlen_features+dense_features
120 |     
121 |     # 3.generate input data for model
122 |     train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
123 |     val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
124 |     test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}
125 |     
126 |     train_labels = train['read_comment'].values
127 |     val_labels = val['read_comment'].values
128 |     
129 |     del train,val #多余的特征删除，释放内存。
130 |     gc.collect()
131 |     
132 |     model = build_FM(sparse_features,dense_features,sparse_max_len,embed_dim = 16, 
133 |                dnn_hidden_units=(64,64),varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
134 |                dropout = 0.1,embedding_reg_l2 = 1e-6,dnn_reg_l2 = 0.0)
135 |     
136 |     adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
137 |     model.compile(adam, loss = 'binary_crossentropy' ,metrics = [tf.keras.metrics.AUC()],)
138 |     
139 |     history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
140 |                         batch_size=10240, epochs=4, verbose=1)
141 |         


--------------------------------------------------------------------------------
/model/mmoe.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Jul  1 17:28:27 2021
  4 | 
  5 | @author: shujie.wang
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | from tensorflow.keras.layers import *
 11 | from tensorflow.keras import regularizers
 12 | import tensorflow.keras.backend as K
 13 | import matplotlib.pyplot as plt
 14 | import tensorflow as tf
 15 | from tensorflow.keras.models import Model,load_model
 16 | from tensorflow.keras.utils import plot_model
 17 | from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping
 18 | import os
 19 | from tensorflow.keras import optimizers,initializers
 20 | 
 21 | from tensorflow.python.keras.initializers import glorot_normal #xariver。截断的正态分布，标准差。
 22 | from tensorflow.python.keras.layers import Layer
 23 | import pickle as pkl
 24 | import gc
 25 | from time import time
 26 | 
 27 | class MyMeanPool(Layer):
 28 |     def __init__(self, axis, **kwargs):
 29 |         super(MyMeanPool, self).__init__(**kwargs)
 30 |         self.axis = axis
 31 | 
 32 |     def call(self, x, mask):
 33 |         mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
 34 |         x = x * mask
 35 |         return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)
 36 |     
 37 | class Mmoe_layer(tf.keras.layers.Layer):
 38 |     def __init__(self,expert_dim,n_expert,n_task):
 39 |         super(Mmoe_layer, self).__init__()
 40 |         self.n_task = n_task
 41 |         self.expert_layer = [Dense(expert_dim,activation = 'relu') for i in range(n_expert)]
 42 |         self.gate_layers = [Dense(n_expert,activation = 'softmax') for i in range(n_task)]
 43 |     
 44 |     def call(self,x):
 45 |         #多个专家网络
 46 |         E_net = [expert(x) for expert in self.expert_layer]
 47 |         E_net = Concatenate(axis = 1)([e[:,tf.newaxis,:] for e in E_net]) #(bs,n_expert,n_dims)
 48 |         #多个门网络
 49 |         gate_net = [gate(x) for gate in self.gate_layers]     #n_task个(bs,n_expert)
 50 |         
 51 |         #每个towers等于，对应的门网络乘上所有的专家网络。
 52 |         towers = []
 53 |         for i in range(self.n_task):
 54 |             g = tf.expand_dims(gate_net[i],axis = -1)  #(bs,n_expert,1)
 55 |             _tower = tf.matmul(E_net, g,transpose_a=True)
 56 |             towers.append(Flatten()(_tower))           #(bs,expert_dim)
 57 |             
 58 |         return towers
 59 | 
 60 | def build_mmoe(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim,
 61 |               varlens_cols,varlens_max_len,n_expert,n_task,target = [],
 62 |               dnn_hidden_units = (64,),dnn_reg_l2 = 1e-5,drop_rate = 0.1,
 63 |                 embedding_reg_l2 = 1e-6):
 64 |     
 65 |     
 66 |     #输入部分，分为sparse,varlens,dense部分。
 67 |     sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
 68 |     dense_inputs = {f:Input([1],name = f) for f in dense_cols}
 69 |     varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
 70 |         
 71 |     input_embed = {}
 72 |     #离散特征，embedding到k维
 73 |     for f in sparse_cols:
 74 |         _input = sparse_inputs[f]
 75 |         embedding = Embedding(sparse_max_len[f], embed_dim, 
 76 |             embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
 77 |         input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
 78 |         
 79 |     #多标签离散变量
 80 |     for f in varlens_inputs:
 81 |         _input = varlens_inputs[f]
 82 |         mask = Masking(mask_value = 0).compute_mask(_input)
 83 |         embedding = Embedding(varlens_max_len[f], embed_dim,
 84 |             embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
 85 |         _embed =Reshape([-1,embed_dim])(embedding(_input))
 86 |         out_embed = MyMeanPool(axis=1)(_embed,mask)
 87 |         input_embed[f] = out_embed
 88 |         
 89 |     input_embed.update(dense_inputs) #加入连续变量
 90 |     input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])
 91 |     for num in dnn_hidden_units:
 92 |         input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
 93 |                     kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
 94 |     
 95 |     #mmoe网络层
 96 |     towers = Mmoe_layer(expert_dim,n_expert,n_task)(input_embed)
 97 |     outputs = [Dense(1,activation = 'sigmoid', kernel_regularizer=regularizers.l2(dnn_reg_l2),
 98 |                      name = f,use_bias = True)(_t) for _t,f in zip(towers,target)]
 99 |     inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
100 |                 +[dense_inputs[f] for f in dense_inputs]
101 |     model = Model(inputs,outputs) 
102 |     return model
103 | 
104 | if __name__ == '__main__':    
105 |     target = ["read_comment", "like", "click_avatar", "forward"]
106 |     sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
107 |     varlen_features = ['manual_tag_list','manual_keyword_list']
108 |     dense_features = ['videoplayseconds']
109 |     #1.加载数据
110 |     with open('../user_data/data.pkl','rb') as f:
111 |         train,val,test,encoder = pkl.load(f)
112 |     train_num = len(train)
113 |     
114 |     #2.生成输入特征设置
115 |     sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
116 |     varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
117 |     feature_names = sparse_features+varlen_features+dense_features
118 |     
119 |     # 3.generate input data for model
120 |     train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
121 |     val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
122 |     test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}
123 |     
124 |     train_labels = [train[y].values for y in target]
125 |     val_labels = [val[y].values for y in target]
126 |     
127 |     del train,val #多余的特征删除，释放内存。
128 |     gc.collect()
129 |     
130 |     # 4.Define Model,train,predict and evaluate
131 |     model = build_mmoe(sparse_features,dense_features,sparse_max_len,embed_dim = 16,expert_dim = 32,
132 |               n_task = 4,n_expert = 4,varlens_cols = varlen_features,varlens_max_len = varlens_max_len,
133 |               dnn_hidden_units = (64,64),target = target,dnn_reg_l2 = 1e-5,drop_rate = 0.1)
134 | 
135 |     adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
136 |     model.compile(adam, loss = 'binary_crossentropy' ,metrics = [tf.keras.metrics.AUC()],)
137 |     
138 |     history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
139 |                         batch_size=10240, epochs=4, verbose=1)
140 |         


--------------------------------------------------------------------------------
/model/ple.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Jul  1 17:29:13 2021
  4 | 
  5 | @author: shujie.wang
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | from tensorflow.keras.layers import *
 11 | from tensorflow.keras import regularizers
 12 | import tensorflow.keras.backend as K
 13 | import matplotlib.pyplot as plt
 14 | import tensorflow as tf
 15 | from tensorflow.keras.models import Model,load_model
 16 | from tensorflow.keras.utils import plot_model
 17 | from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping
 18 | import os
 19 | from tensorflow.keras import optimizers,initializers
 20 | 
 21 | from tensorflow.python.keras.initializers import glorot_normal #xariver。截断的正态分布，标准差。
 22 | from tensorflow.python.keras.layers import Layer
 23 | import pickle as pkl
 24 | import gc
 25 | from time import time
 26 | 
 27 | class MyMeanPool(Layer):
 28 |     def __init__(self, axis, **kwargs):
 29 |         super(MyMeanPool, self).__init__(**kwargs)
 30 |         self.axis = axis
 31 | 
 32 |     def call(self, x, mask):
 33 |         mask = tf.expand_dims(tf.cast(mask,tf.float32),axis = -1)
 34 |         x = x * mask
 35 |         return K.sum(x, axis=self.axis) / (K.sum(mask, axis=self.axis) + 1e-9)
 36 | 
 37 | class PleLayer(tf.keras.layers.Layer):
 38 |     '''
 39 |     n_experts:list,每个任务使用几个expert。[2,3]第一个任务使用2个expert，第二个任务使用3个expert。
 40 |     n_expert_share:int,共享的部分设置的expert个数。
 41 |     expert_dim:int,每个专家网络输出的向量维度。
 42 |     n_task:int,任务个数。
 43 |     '''
 44 |     def __init__(self,n_task,n_experts,expert_dim,n_expert_share,dnn_reg_l2 = 1e-5):
 45 |         super(PleLayer, self).__init__()
 46 |         self.n_task = n_task
 47 |         
 48 |         # 生成多个任务特定网络和1个共享网络。
 49 |         self.E_layer = []
 50 |         for i in range(n_task):
 51 |             sub_exp = [Dense(expert_dim,activation = 'relu') for j in range(n_experts[i])]
 52 |             self.E_layer.append(sub_exp)
 53 |             
 54 |         self.share_layer = [Dense(expert_dim,activation = 'relu') for j in range(n_expert_share)]
 55 |         #定义门控网络
 56 |         self.gate_layers = [Dense(n_expert_share+n_experts[i],kernel_regularizer=regularizers.l2(dnn_reg_l2),
 57 |                                   activation = 'softmax') for i in range(n_task)]
 58 | 
 59 |     def call(self,x):
 60 |         #特定网络和共享网络
 61 |         E_net = [[expert(x) for expert in sub_expert] for sub_expert in self.E_layer]
 62 |         share_net = [expert(x) for expert in self.share_layer]
 63 |         
 64 |         #门的权重乘上，指定任务和共享任务的输出。
 65 |         towers = []
 66 |         for i in range(self.n_task):
 67 |             g = self.gate_layers[i](x)
 68 |             g = tf.expand_dims(g,axis = -1) #(bs,n_expert_share+n_experts[i],1)
 69 |             _e = share_net+E_net[i]  
 70 |             _e = Concatenate(axis = 1)([expert[:,tf.newaxis,:] for expert in _e]) #(bs,n_expert_share+n_experts[i],expert_dim)
 71 |             _tower = tf.matmul(_e, g,transpose_a=True)
 72 |             towers.append(Flatten()(_tower)) #(bs,expert_dim)
 73 |         return towers
 74 | 
 75 | def build_ple(sparse_cols,dense_cols,sparse_max_len,embed_dim,expert_dim = 4,
 76 |               varlens_cols = [],varlens_max_len = [],dnn_hidden_units = (64,64),
 77 |               n_task = 2,n_experts = [2,2],n_expert_share = 4,dnn_reg_l2 = 1e-6,
 78 |               drop_rate = 0.0,embedding_reg_l2 = 1e-6,targets = []):
 79 | 
 80 |    #输入部分，分为sparse,varlens,dense部分。
 81 |     sparse_inputs = {f:Input([1],name = f) for f in sparse_cols}
 82 |     dense_inputs = {f:Input([1],name = f) for f in dense_cols}
 83 |     varlens_inputs = {f:Input([None,1],name = f) for f in varlens_cols}
 84 |         
 85 |     input_embed = {}
 86 |     #离散特征，embedding到k维
 87 |     for f in sparse_cols:
 88 |         _input = sparse_inputs[f]
 89 |         embedding = Embedding(sparse_max_len[f], embed_dim, 
 90 |             embeddings_regularizer=tf.keras.regularizers.l2(embedding_reg_l2)) 
 91 |         input_embed[f] =Flatten()(embedding(_input)) #(bs,k)
 92 |         
 93 |     #多标签离散变量
 94 |     for f in varlens_inputs:
 95 |         _input = varlens_inputs[f]
 96 |         mask = Masking(mask_value = 0).compute_mask(_input)
 97 |         embedding = Embedding(varlens_max_len[f], embed_dim,
 98 |             embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
 99 |         _embed =Reshape([-1,embed_dim])(embedding(_input))
100 |         out_embed = MyMeanPool(axis=1)(_embed,mask)
101 |         input_embed[f] = out_embed
102 |         
103 |     input_embed.update(dense_inputs) #加入连续变量
104 |     input_embed = Concatenate(axis = -1)([input_embed[f] for f in input_embed])    
105 |                                   
106 |     for num in dnn_hidden_units:
107 |         input_embed = Dropout(drop_rate)(Dense(num,activation = 'relu',
108 |                     kernel_regularizer=regularizers.l2(dnn_reg_l2))(input_embed))
109 |     #Ple网络层
110 |     towers = PleLayer(n_task,n_experts,expert_dim,n_expert_share)(input_embed)
111 |     outputs = [Dense(1,activation = 'sigmoid',kernel_regularizer=regularizers.l2(dnn_reg_l2),
112 |                        name = f,use_bias = True)(_t) for f,_t in zip(targets,towers)]
113 |     inputs = [sparse_inputs[f] for f in sparse_inputs]+[varlens_inputs[f] for f in varlens_inputs]\
114 |                 +[dense_inputs[f] for f in dense_inputs]
115 |     model = Model(inputs,outputs) 
116 |     return model
117 | 
118 | if __name__ == '__main__':    
119 |     target = ["read_comment", "like", "click_avatar", "forward"]
120 |     sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
121 |     varlen_features = ['manual_tag_list','manual_keyword_list']
122 |     dense_features = ['videoplayseconds']
123 |     #1.加载数据
124 |     with open('../user_data/data.pkl','rb') as f:
125 |         train,val,test,encoder = pkl.load(f)
126 |     train_num = len(train)
127 |     
128 |     #2.生成输入特征设置
129 |     sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
130 |     varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
131 |     feature_names = sparse_features+varlen_features+dense_features
132 |     
133 |     # 3.generate input data for model
134 |     train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
135 |     val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
136 |     test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}
137 |     
138 |     train_labels = [train[y].values for y in target]
139 |     val_labels = [val[y].values for y in target]
140 |     
141 |     del train,val #多余的特征删除，释放内存。
142 |     gc.collect()
143 |     
144 |     # 4.Define Model,train,predict and evaluate
145 |     model = build_ple(sparse_features,dense_features,sparse_max_len,embed_dim = 16,expert_dim = 32,
146 |               varlens_cols = varlen_features,varlens_max_len = varlens_max_len,dnn_hidden_units = (64,),
147 |               n_task = 4,n_experts = [4,4,4,4],n_expert_share = 8,dnn_reg_l2 = 1e-6,
148 |               drop_rate = 0.1,embedding_reg_l2 = 1e-6,targets = target)
149 | 
150 |     adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
151 |     model.compile(adam, loss = 'binary_crossentropy' ,metrics = [tf.keras.metrics.AUC()],)
152 |     
153 |     history = model.fit(train_model_input, train_labels,validation_data = (val_model_input,val_labels),
154 |                         batch_size=10240, epochs=4, verbose=1)
155 |         
156 |     
157 |     
158 | 


--------------------------------------------------------------------------------