├── LTR ├── __init__.py ├── pics │ ├── pic1.jpg │ ├── pic2.jpg │ ├── pic3.jpg │ └── pic4.jpg ├── README.md ├── train_ltr_tower.py ├── config.py ├── PairWiseTower.py └── build_feature.py ├── pics ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg ├── 5.jpg ├── 6.jpg ├── 7.jpeg ├── 8.png ├── 9.jpeg ├── 10.jpeg ├── 11.jpeg └── 12.jpeg ├── data_config.py ├── README.md ├── common_utils.py ├── config.py ├── ParallelTowers.py ├── train_main.py ├── MultiInterestSenetTT.py ├── tool_layers.py └── model_layers.py /LTR/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pics/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/1.jpg -------------------------------------------------------------------------------- /pics/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/2.jpg -------------------------------------------------------------------------------- /pics/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/3.jpg -------------------------------------------------------------------------------- /pics/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/4.jpg -------------------------------------------------------------------------------- /pics/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/5.jpg -------------------------------------------------------------------------------- /pics/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/6.jpg -------------------------------------------------------------------------------- /pics/7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/7.jpeg -------------------------------------------------------------------------------- /pics/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/8.png -------------------------------------------------------------------------------- /pics/9.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/9.jpeg -------------------------------------------------------------------------------- /pics/10.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/10.jpeg -------------------------------------------------------------------------------- /pics/11.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/11.jpeg -------------------------------------------------------------------------------- /pics/12.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/12.jpeg -------------------------------------------------------------------------------- /LTR/pics/pic1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic1.jpg -------------------------------------------------------------------------------- /LTR/pics/pic2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic2.jpg -------------------------------------------------------------------------------- /LTR/pics/pic3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic3.jpg -------------------------------------------------------------------------------- /LTR/pics/pic4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic4.jpg -------------------------------------------------------------------------------- /LTR/README.md: -------------------------------------------------------------------------------- 1 | # 基于Learning to Rank的双塔召回模型方案实现参考 2 | 3 | ### 2023.03.01(复现 by stefan) 4 | 5 | ![图](./pics/pic1.jpg) 6 | 7 | ![图](./pics/pic2.jpg) 8 | 9 | ![图](./pics/pic3.jpg) 10 | 11 | ![图](./pics/pic4.jpg) -------------------------------------------------------------------------------- /data_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-02-28 4 | 5 | import tensorflow as tf 6 | 7 | recall_config = { 8 | # user 9 | 10 | 11 | # prop 12 | 13 | 14 | # label 15 | "is_click": tf.io.FixedLenFeature([], tf.int64), 16 | 17 | } 18 | 19 | 20 | data_config = { 21 | "recall": recall_config, 22 | } 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 双塔模型实践记录 2 | 3 | ## 负采样 4 | 5 | 6 | 7 | ## 模型结构优化 8 | - base 双塔 9 | 10 | ![图](./pics/1.jpg) 11 | 12 | - 双塔 (ResNet) 13 | 14 | ![图](./pics/4.jpg) 15 | 16 | - 双塔 + SeNet 17 | 18 | ![图](./pics/2.jpg) 19 | 20 | - 多兴趣SENet双塔模型 Multi-Interest-Senet-Two-Towers (MISTT) 21 | 22 | ![图](./pics/3.jpg) 23 | 24 | - 腾讯并联双塔 (Parallel Towers) 25 | 26 | ![图](./pics/5.jpg) 27 | 28 | 29 | ![图](./pics/6.jpg) 30 | 31 | -------------------------------------------------------------------------------- /common_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-02-28 4 | 5 | 6 | import tensorflow as tf 7 | import os 8 | 9 | 10 | def parse_exmp(example_proto, feature_description): 11 | feature_dict = tf.io.parse_single_example(example_proto, feature_description) 12 | label = feature_dict.pop("is_click") 13 | return feature_dict, label 14 | 15 | 16 | def get_file_list(data): 17 | if isinstance(data, str) and os.path.isdir(data): 18 | files = [data + '/' + x for x in os.listdir(data)] if os.path.isdir(data) else data 19 | else: 20 | files = data 21 | 22 | return files 23 | 24 | 25 | def read_data(path, shuffle_buffer_size=20000, batch_size=2048, if_shuffle=False, feat_desc=None): 26 | file_names = get_file_list(path) 27 | dataset = tf.data.Dataset.list_files(file_names) 28 | dataset = dataset.interleave( 29 | lambda filename: tf.data.TFRecordDataset(file_names), 30 | cycle_length=8 31 | ) 32 | if if_shuffle: 33 | dataset = dataset.shuffle(shuffle_buffer_size) 34 | dataset = dataset.map(lambda x: parse_exmp(x, feat_desc), num_parallel_calls=8) 35 | dataset = dataset.batch(batch_size) 36 | return dataset -------------------------------------------------------------------------------- /LTR/train_ltr_tower.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # import sys 5 | from data_process.common_utils import read_data_ltr 6 | from data_process.data_config import data_config 7 | from recall.LTR import config 8 | from recall.LTR.PairWiseTower import build_ltr_tower 9 | import tensorflow as tf 10 | 11 | FLAGS = config.FLAGS 12 | 13 | # read data 14 | train_set = read_data_ltr(path=FLAGS.train_data, class_num=FLAGS.class_num, batch_size=FLAGS.batch_size, if_shuffle=True, 15 | feat_desc=data_config['recall-ltr']) 16 | 17 | test_set = read_data_ltr(path=FLAGS.test_data, class_num=FLAGS.class_num, batch_size=FLAGS.batch_size, if_shuffle=False, 18 | feat_desc=data_config['recall-ltr']) 19 | 20 | # define model 21 | all_model, user_model, item_model = build_ltr_tower(FLAGS.temperature, FLAGS.city_dict, FLAGS.shangquan_dict, FLAGS.comm_dict, FLAGS.price_dict, 22 | FLAGS.area_dict, FLAGS.tower_num_layer, FLAGS.tower_num_layer_units) 23 | 24 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.online_logs, embeddings_freq=1, 25 | embeddings_data=train_set) 26 | 27 | all_model.fit( 28 | x=train_set, 29 | epochs=FLAGS.epoch, 30 | callbacks=[tensorboard_callback] 31 | ) 32 | 33 | # save models 34 | item_model.save(FLAGS.item_model_pb, save_format='tf') # 保存item model的weights用于离线获取emb 35 | 36 | user_model.save(FLAGS.user_model_pb, save_format='tf') # 保存user model的pb模型用于在线预测 37 | -------------------------------------------------------------------------------- /LTR/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2023-03-01 4 | 5 | import tensorflow as tf 6 | import datetime 7 | 8 | """ 9 | recall-ltr 10 | 模型相关参数配置 11 | """ 12 | flags = tf.compat.v1.flags 13 | 14 | flags.DEFINE_string("item_model_pb", "./item_model_pb", "Base directory for the item model.") 15 | flags.DEFINE_string("user_model_pb", "./user_model_pb", "Base directory for the user model.") 16 | 17 | flags.DEFINE_string("city_dict", "../../demo_data/city_dict", "Path to the city_dict.") 18 | flags.DEFINE_string("shangquan_dict", "../../demo_data/shangquan_dict", "Path to the shangquan_dict.") 19 | flags.DEFINE_string("comm_dict", "../../demo_data/comm_dict", "Path to the comm_dict.") 20 | flags.DEFINE_string("price_dict", "../../demo_data/price_dict", "Path to the price_dict.") 21 | flags.DEFINE_string("area_dict", "../../demo_data/area_dict", "Path to the area_dict.") 22 | 23 | flags.DEFINE_string("train_data", "../../demo_data/part-r-00001", "Path to the train data") 24 | flags.DEFINE_string("test_data", "../../demo_data/part-r-00001", "Path to the evaluation data.") 25 | 26 | flags.DEFINE_string("online_logs", "./online_logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 27 | "Path to the log.") 28 | 29 | flags.DEFINE_float("temperature", 0.05, "temperature") 30 | flags.DEFINE_integer("tower_num_layer", 3, "num of layers") 31 | flags.DEFINE_string("tower_num_layer_units", "256,128,64", "hidden units of layers") 32 | flags.DEFINE_integer("class_num", 6, "class_num") 33 | 34 | flags.DEFINE_integer("epoch", 20 , "Training epochs") # 40 35 | flags.DEFINE_integer("batch_size", 1024, "Training batch size") # 40960 36 | 37 | 38 | 39 | FLAGS = flags.FLAGS 40 | 41 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-02-28 4 | 5 | import tensorflow as tf 6 | import datetime 7 | 8 | """ 9 | recall 10 | 模型相关参数配置 11 | """ 12 | 13 | flags = tf.compat.v1.flags 14 | 15 | flags.DEFINE_boolean("if_use_senet", False, "plus senet module.") 16 | flags.DEFINE_boolean("if_use_MISTT", False, "plus MISTT module.") 17 | flags.DEFINE_boolean("if_use_twoResNet", False, "plus ResNet module.") 18 | flags.DEFINE_boolean("if_use_parallel", True, "plus parallel module.") 19 | 20 | flags.DEFINE_string("item_model_pb", "./item_model_pb", "Base directory for the item model.") 21 | flags.DEFINE_string("user_model_pb", "./user_model_pb", "Base directory for the user model.") 22 | flags.DEFINE_string("item_model_weights", "./item_model_weights", "Base directory for the item model weights.") 23 | 24 | flags.DEFINE_string("city_dict", "../demo_data/city_dict", "Path to the city_dict.") 25 | flags.DEFINE_string("shangquan_dict", "../demo_data/shangquan_dict", "Path to the shangquan_dict.") 26 | flags.DEFINE_string("comm_dict", "../demo_data/comm_dict", "Path to the comm_dict.") 27 | flags.DEFINE_string("price_dict", "../demo_data/price_dict", "Path to the price_dict.") 28 | flags.DEFINE_string("area_dict", "../demo_data/area_dict", "Path to the area_dict.") 29 | 30 | flags.DEFINE_string("train_data", "../demo_data/part-r-00003-Copy1", "Path to the train data") 31 | flags.DEFINE_string("eval_data", "../demo_data/part-r-00003-Copy1", "Path to the evaluation data.") 32 | 33 | flags.DEFINE_string("online_logs", "../online_logs", "Path to the log.") 34 | 35 | flags.DEFINE_integer("batch_size", 1024, "Training batch size") # 40960 36 | flags.DEFINE_integer("epoch", 1, "Training epochs") # 40 37 | flags.DEFINE_float("temperature", 0.001, "temperature") 38 | flags.DEFINE_integer("tower_num_layer", 3, "num of layers") 39 | flags.DEFINE_string("tower_num_layer_units", "256,128,64", "hidden units of layers") 40 | flags.DEFINE_string("cin_size", "64,64", "a list of the number of layers") 41 | 42 | FLAGS = flags.FLAGS 43 | -------------------------------------------------------------------------------- /ParallelTowers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-03-03 4 | 5 | """ 6 | 参考腾讯并联双塔模型架构 尝试复现模型 7 | 8 | 主要创新思路在于: 9 | 1、尝试通过"并联"多个双塔结构(MLP、DCN、FM、FFM、CIN)增加双塔模型的"宽度"来缓解双塔内积的瓶颈从而提升效果; 10 | 2、对"并联"的多个双塔引入 LR 进行带权融合,LR 权重最终融入到 userembedding 中,使得最终的模型仍然保持的内积形式。 11 | """ 12 | 13 | from tensorflow.keras.models import Model 14 | from tensorflow.keras.optimizers import Adam 15 | from layers.tool_layers import * 16 | from layers.model_layers import MyDense, parallel_layer 17 | 18 | 19 | # build model 20 | def parallel_towers(temperature, city_dict, shangquan_dict, comm_dict, price_dict, area_dict, tower_num_layer, 21 | tower_num_layer_units): 22 | # ********************************** 输入层 **********************************# 23 | # define input 24 | 25 | 26 | # common emb (目前只有一个城市的数据,适当调整dim) 27 | 28 | 29 | # user feature 30 | 31 | 32 | # item features 33 | 34 | 35 | # ********************************** 表示层 **********************************# 36 | user_mlp_inputs = concatenate([...], 37 | axis=-1, name='user_mlp_inputs') 38 | user_fm_inputs = tf.stack([...], 39 | axis=1, name='user_fm_inputs') 40 | user_dcn_inputs = concatenate([...], axis=-1, name="user_dcn_inputs") 41 | user_cin_inputs = tf.stack([...], axis=1, name="user_cin_inputs") 42 | 43 | # 计算user_tower的并联输出 44 | user_mlp_dcn_out, user_fm_out, user_cin_out = parallel_layer(tower_num_layer, tower_num_layer_units, 45 | user_mlp_inputs, user_fm_inputs, user_dcn_inputs, 46 | user_cin_inputs) 47 | 48 | item_mlp_inputs = concatenate([...], 49 | axis=-1, name="item_mlp_inputs") 50 | item_fm_inputs = tf.stack([...], axis=1, 51 | name="item_fm_inputs") 52 | 53 | item_dcn_inputs = concatenate([...], axis=-1, name="item_dcn_inputs") 54 | item_cin_inputs = tf.stack([...], axis=1, name="item_cin_inputs") 55 | 56 | # 计算item_tower的并联输出 57 | item_mlp_dcn_out, item_fm_out, item_cin_out = parallel_layer(tower_num_layer, tower_num_layer_units, 58 | item_mlp_inputs, item_fm_inputs, item_dcn_inputs, 59 | item_cin_inputs) 60 | 61 | # ********************************** 匹配层 **********************************# 62 | # 按照不同并联模型分别进行 hadamard 积, 在顶层做两侧特征的交互 63 | user_item_mlp_dcn_hdm = tf.multiply(user_mlp_dcn_out, item_mlp_dcn_out) 64 | user_item_fm_hdm = tf.multiply(user_fm_out, item_fm_out) 65 | user_item_cin_hdm = tf.multiply(user_cin_out, item_cin_out) 66 | 67 | # 使用LR学习"并联"的多个双塔的权重 68 | my_dense = MyDense(1) 69 | concat_inputs = concatenate([user_item_mlp_dcn_hdm, user_item_fm_hdm, user_item_cin_hdm], axis=-1) 70 | out = my_dense(concat_inputs) 71 | lr_weights = my_dense.weights[0] 72 | lr_weights = tf.reshape(lr_weights, [1, lr_weights.shape[0]]) 73 | 74 | user_input = [...] 75 | item_input = [...] 76 | 77 | # 获取双塔各自的emb输出 78 | user_parallel_out = concatenate([user_mlp_dcn_out, user_fm_out, user_cin_out], axis=-1, name="user_tower_out") 79 | item_parallel_out = concatenate([item_mlp_dcn_out, item_fm_out, item_cin_out], axis=-1, name="item_tower") 80 | 81 | user_output = tf.multiply(user_parallel_out, lr_weights, name="user_tower") # 预先融合LR的权重进user embedding 82 | item_output = item_parallel_out 83 | 84 | user_model = Model(inputs=user_input, outputs=user_output) 85 | item_model = Model(inputs=item_input, outputs=item_output) 86 | all_model = Model(inputs=user_input + item_input, outputs=out) # 模型评估以out为评估对象 87 | 88 | all_model.compile( 89 | loss='binary_crossentropy', 90 | optimizer=Adam(1e-3), 91 | metrics=[tf.keras.metrics.AUC()] 92 | ) 93 | 94 | all_model.summary() 95 | 96 | return all_model, user_model, item_model 97 | -------------------------------------------------------------------------------- /train_main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # import sys 5 | # sys.path.insert(0, r'/code/Stefan/909_recall/dm-recommend-tf2/') # 线上要加入搜索目录的路径 6 | 7 | from data_process.common_utils import * 8 | from data_process.data_config import * 9 | from recall import config 10 | from recall import TwoTowers, TwoTowersSENet, MultiInterestSenetTT, TwoResNet, ParallelTowers 11 | 12 | FLAGS = config.FLAGS 13 | 14 | # read data 15 | train_set = read_data(path=FLAGS.train_data, batch_size=FLAGS.batch_size, if_shuffle=True, 16 | feat_desc=data_config["909-recall"]) 17 | test_set = read_data(path=FLAGS.eval_data, batch_size=FLAGS.batch_size, feat_desc=data_config["909-recall"]) 18 | 19 | # define models 20 | if FLAGS.if_use_senet: 21 | all_model, user_model, item_model = TwoTowersSENet.two_towers(FLAGS.temperature, FLAGS.city_dict, 22 | FLAGS.shangquan_dict, 23 | FLAGS.comm_dict, FLAGS.tower_num_layer, 24 | FLAGS.tower_num_layer_units.split(',')) 25 | elif FLAGS.if_use_MISTT: 26 | all_model, user_model, item_model = MultiInterestSenetTT.buildMISTT(FLAGS.temperature, FLAGS.city_dict, 27 | FLAGS.shangquan_dict, 28 | FLAGS.comm_dict, FLAGS.tower_num_layer, 29 | FLAGS.tower_num_layer_units.split(',')) 30 | elif FLAGS.if_use_twoResNet: 31 | all_model, user_model, item_model = TwoResNet.two_towers(FLAGS.temperature, FLAGS.city_dict, 32 | FLAGS.shangquan_dict, 33 | FLAGS.comm_dict, FLAGS.tower_num_layer, 34 | FLAGS.tower_num_layer_units.split(',')) 35 | elif FLAGS.if_use_parallel: 36 | all_model, user_model, item_model = ParallelTowers.parallel_towers(FLAGS.temperature, FLAGS.city_dict, 37 | FLAGS.shangquan_dict, FLAGS.comm_dict, 38 | FLAGS.price_dict, FLAGS.area_dict, 39 | FLAGS.tower_num_layer, 40 | FLAGS.tower_num_layer_units.split(',')) 41 | else: 42 | all_model, user_model, item_model = TwoTowers.two_towers(FLAGS.temperature, FLAGS.city_dict, 43 | FLAGS.shangquan_dict, 44 | FLAGS.comm_dict, FLAGS.tower_num_layer, 45 | FLAGS.tower_num_layer_units.split(',')) 46 | 47 | # define callbacks 48 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.online_logs, embeddings_freq=1, 49 | embeddings_data=train_set) 50 | early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=8) # 早停法,防止过拟合 51 | plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", verbose=1, mode='max', factor=0.5, 52 | patience=2) # 当评价指标不在提升时,减少学习率 53 | 54 | batch_print_callback = tf.keras.callbacks.LambdaCallback( 55 | on_batch_begin=lambda batch, logs: print(batch)) 56 | 57 | # run train 58 | all_model.fit( 59 | x=train_set, 60 | epochs=FLAGS.epoch, 61 | validation_data=test_set, 62 | callbacks=[tensorboard_callback, early_stopping, plateau, batch_print_callback] 63 | ) 64 | 65 | # save models 66 | item_model.save_weights(FLAGS.item_model_weights) # 保存item model的weights用于离线获取emb 67 | 68 | item_model.save(FLAGS.item_model_pb, save_format='tf') # 保存item model的weights用于离线获取emb 69 | 70 | user_model.save(FLAGS.user_model_pb, save_format='tf') # 保存user model的pb模型用于在线预测 71 | -------------------------------------------------------------------------------- /MultiInterestSenetTT.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-03-02 4 | 5 | """ 6 | 多兴趣SENet双塔模型Multi-Interest-Senet-Two-Towers (MISTT) 7 | 8 | 基于SENet,把User侧和Item侧的Embedding,打成多兴趣的。就是说,比如在用户侧塔,可以配置不同的SENet模块及对应的DNN结构,来强化不同方面兴趣的 9 | Embedding表达。Item侧也可以如此办理,或者Item侧如果信息比较单一,可以仍然只打出一个Item Embedding,只需要维度上能和User侧多兴趣Embedding对齐即可 10 | """ 11 | 12 | from tensorflow.keras.models import Model 13 | from tensorflow.keras.optimizers import Adam 14 | from layers.tool_layers import * 15 | from layers.model_layers import Tower, SENetLayer 16 | 17 | 18 | # build model 19 | def buildMISTT(temperature, city_dict, shangquan_dict, comm_dict, tower_num_layer, tower_num_layer_units): 20 | # define input 21 | ... 22 | 23 | 24 | user_feature_judge_1 = SENetLayer(last_shape=int(user_emb_feature_pooling.shape[-1]), reduction=16, 25 | name='user_embedding_senet_1')(user_emb_feature_pooling) 26 | 27 | user_feature_judge_2 = SENetLayer(last_shape=int(user_emb_feature_pooling.shape[-1]), reduction=16, 28 | name='user_embedding_senet_2')(user_emb_feature_pooling) 29 | 30 | user_embedding_senet_1 = [] 31 | user_embedding_senet_2 = [] 32 | for i in range(user_feature_judge_1.shape[1]): # 2个senet特征权重维度一样,取其中一个即可 33 | x_1 = tf.slice(user_feature_judge_1, [0, i], [-1, 1]) # 取出senet1激活tensor中的第i个激活值 34 | x_2 = tf.slice(user_feature_judge_2, [0, i], [-1, 1]) 35 | emb_with_jugde_1 = tf.multiply(user_emb_feature_list[i], x_1) 36 | emb_with_jugde_2 = tf.multiply(user_emb_feature_list[i], x_2) 37 | user_embedding_senet_1.append(emb_with_jugde_1) 38 | user_embedding_senet_2.append(emb_with_jugde_2) 39 | 40 | user_feature_1 = concatenate(user_embedding_senet_1, axis=1, name='user_feature_1') 41 | user_feature_2 = concatenate(user_embedding_senet_2, axis=1, name='user_feature_2') 42 | 43 | # 计算user tower的2个输出 44 | user_tower_out_1 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 45 | activation=tf.nn.leaky_relu, name='user_tower_1')(user_feature_1) 46 | 47 | user_tower_out_2 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 48 | activation=tf.nn.leaky_relu, name='user_tower_2')(user_feature_2) 49 | 50 | user_tower_out = Add(name="user_tower")([user_tower_out_1, user_tower_out_2]) 51 | 52 | # item feature 53 | ... 54 | 55 | item_embed_features_judge_1 = SENetLayer(last_shape=int(item_embed_features_pooling.shape[-1]), reduction=16, 56 | name='item_embedding_senet_1')(item_embed_features_pooling) 57 | 58 | item_embed_features_judge_2 = SENetLayer(last_shape=int(item_embed_features_pooling.shape[-1]), reduction=16, 59 | name='item_embedding_senet_2')(item_embed_features_pooling) 60 | 61 | item_embedding_senet_1 = [] 62 | item_embedding_senet_2 = [] 63 | for i in range(item_embed_features_judge_1.shape[1]): 64 | x_1 = tf.slice(item_embed_features_judge_1, [0, i], [-1, 1]) # 取出senet激活tensor中的第i个激活值 65 | x_2 = tf.slice(item_embed_features_judge_2, [0, i], [-1, 1]) 66 | emb_with_jugde_1 = tf.multiply(item_embedding_features_list[i], x_1) 67 | emb_with_jugde_2 = tf.multiply(item_embedding_features_list[i], x_2) 68 | item_embedding_senet_1.append(emb_with_jugde_1) 69 | item_embedding_senet_2.append(emb_with_jugde_2) 70 | 71 | item_feature_1 = concatenate(item_embedding_senet_1 + [item_prop_age, item_age_power2], axis=1, 72 | name='item_feature_1') 73 | item_feature_2 = concatenate(item_embedding_senet_2 + [item_prop_age, item_age_power2], axis=1, 74 | name='item_feature_2') 75 | 76 | # 计算2个item tower的输出 77 | item_tower_out_1 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 78 | activation=tf.nn.leaky_relu, name='item_tower_1')(item_feature_1) 79 | item_tower_out_2 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 80 | activation=tf.nn.leaky_relu, name='item_tower_2')(item_feature_2) 81 | 82 | item_tower_out = Add(name="item_tower")([item_tower_out_1, item_tower_out_2]) 83 | 84 | # 计算内积,添加温度系数 85 | inner_product = tf.reduce_sum(tf.multiply(user_tower_out, item_tower_out), axis=1, keepdims=True) 86 | out_with_tempreture = tf.keras.activations.sigmoid(inner_product / temperature) 87 | out = Reshape((1,))(out_with_tempreture) 88 | 89 | user_input = [...] 90 | item_input = [...] 91 | 92 | user_output = user_tower_out 93 | item_output = item_tower_out 94 | 95 | user_model = Model(inputs=user_input, outputs=user_output) 96 | item_model = Model(inputs=item_input, outputs=item_output) 97 | all_model = Model(inputs=user_input + item_input, outputs=out) # 模型评估以out为评估对象 98 | 99 | all_model.compile( 100 | loss='binary_crossentropy', 101 | optimizer=Adam(1e-3), 102 | metrics=[tf.keras.metrics.AUC()] 103 | ) 104 | 105 | all_model.summary() 106 | 107 | return all_model, user_model, item_model 108 | -------------------------------------------------------------------------------- /LTR/PairWiseTower.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2023-03-01 4 | 5 | """ 6 | 基于Learning to Rank的双塔召回 7 | Pairwise-双塔模型 8 | """ 9 | from keras import Model 10 | from keras.layers import Reshape, concatenate, Dense 11 | 12 | from layers.metrics import softmaxloss, myLtrAcc 13 | from layers.model_layers import Tower 14 | from layers.tool_layers import L2_norm_layer, MySoftmax 15 | from recall.LTR.build_feature import build_feature_column 16 | import tensorflow as tf 17 | 18 | def build_ltr_tower(temperature, city_dict, shangquan_dict, comm_dict, price_dict, area_dict, tower_num_layer, 19 | tower_num_layer_units): 20 | # 输入、特征 21 | feature_columns = build_feature_column(city_dict, shangquan_dict, comm_dict, price_dict, area_dict) 22 | 23 | # user Tower 24 | # 计算user tower的输出 25 | user_tower_out = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 26 | activation=tf.nn.leaky_relu, name='user_tower')(feature_columns['user_ft']) 27 | user_tower_out_norm = L2_norm_layer(axis=-1, name='user_tower_norm')(user_tower_out) 28 | 29 | # item tower (参数共享) 初始化 30 | item_model = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units, 31 | activation=tf.nn.leaky_relu, name='item_tower') 32 | 33 | # pos house tower的输出 34 | house_pos_out = item_model(feature_columns['pos_house_ft']) 35 | house_pos_out_norm = L2_norm_layer(axis=-1, name='pos_tower')(house_pos_out) 36 | 37 | # neg1 house tower的输出 38 | house_neg_1_out = item_model(feature_columns['neg_house_1_ft']) 39 | house_neg_1_out_norm = L2_norm_layer(axis=-1, name='neg_tower_1')(house_neg_1_out) 40 | 41 | # neg2 house tower的输出 42 | house_neg_2_out = item_model(feature_columns['neg_house_2_ft']) 43 | house_neg_2_out_norm = L2_norm_layer(axis=-1, name='neg_tower_2')(house_neg_2_out) 44 | 45 | # neg3 house tower的输出 46 | house_neg_3_out = item_model(feature_columns['neg_house_3_ft']) 47 | house_neg_3_out_norm = L2_norm_layer(axis=-1, name='neg_tower_3')(house_neg_3_out) 48 | 49 | # neg4 house tower的输出 50 | house_neg_4_out = item_model(feature_columns['neg_house_4_ft']) 51 | house_neg_4_out_norm = L2_norm_layer(axis=-1, name='neg_tower_4')(house_neg_4_out) 52 | 53 | # neg5 house tower的输出 54 | house_neg_5_out = item_model(feature_columns['neg_house_5_ft']) 55 | house_neg_5_out_norm = L2_norm_layer(axis=-1, name='neg_tower_5')(house_neg_5_out) 56 | 57 | # user x pos_house inner product 58 | user_pos_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_pos_out_norm), axis=1, keepdims=True) 59 | out_with_tempreture = tf.keras.activations.sigmoid(user_pos_inner_product / temperature) 60 | pos_out = Reshape((1,))(out_with_tempreture) 61 | 62 | # user x neg_house inner product 63 | user_neg1_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_1_out_norm), axis=1, keepdims=True) 64 | neg1_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg1_inner_product / temperature)) 65 | 66 | user_neg2_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_2_out_norm), axis=1, keepdims=True) 67 | neg2_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg2_inner_product / temperature)) 68 | 69 | user_neg3_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_3_out_norm), axis=1, keepdims=True) 70 | neg3_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg3_inner_product / temperature)) 71 | 72 | user_neg4_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_4_out_norm), axis=1, keepdims=True) 73 | neg4_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg4_inner_product / temperature)) 74 | 75 | user_neg5_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_5_out_norm), axis=1, keepdims=True) 76 | neg5_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg5_inner_product / temperature)) 77 | 78 | # softmax 取user embedding与pos embedding计算的相似度做为期望预测的label概率 79 | softmax_inputs = { 80 | 'pos': pos_out, 'neg1': neg1_out, 'neg2': neg2_out, 'neg3': neg3_out, 'neg4': neg4_out, 'neg5': neg5_out 81 | } 82 | out_logit = MySoftmax()(softmax_inputs) 83 | 84 | user_input = feature_columns['user_inputs'] 85 | item_input = feature_columns['pos_house_inputs'] 86 | 87 | user_output = user_tower_out_norm 88 | item_output = house_pos_out_norm 89 | 90 | user_model = Model(inputs=user_input, outputs=user_output) 91 | item_model = Model(inputs=item_input, outputs=item_output) 92 | all_model = Model(inputs=feature_columns['total_inputs'], outputs=out_logit) 93 | 94 | all_model.compile( 95 | loss=softmaxloss, 96 | optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), 97 | metrics=[myLtrAcc], 98 | # run_eagerly=True 99 | ) 100 | 101 | all_model.summary() 102 | 103 | return all_model, user_model, item_model 104 | 105 | 106 | def myLtrAcc(y_true, y_pred): 107 | # pos的概率为最大则满足预期 108 | pred_max_index = tf.equal(tf.argmax(y_pred, axis=-1), 0) 109 | correct_count = tf.reduce_sum(tf.cast(pred_max_index, tf.float32)) 110 | return correct_count / tf.cast(len(pred_max_index), 'float32') 111 | 112 | 113 | def softmaxloss(y_true, y_pred): 114 | pos_pred = tf.cast(tf.slice(y_pred, [0, 0], [-1, 1]), 'float32') 115 | return K.mean(-tf.math.log(pos_pred)) 116 | 117 | 118 | -------------------------------------------------------------------------------- /tool_layers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-02-28 4 | import os.path 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import tensorflow as tf 9 | from keras.layers import * 10 | 11 | """自定义工具层 12 | embedding, crossDot, crossMulti, one-hot, dice ,etc 13 | """ 14 | 15 | 16 | class HashBucketsEmbedding(Layer): 17 | def __init__(self, 18 | num_buckets, 19 | emb_size, 20 | **kwargs): 21 | super(HashBucketsEmbedding, self).__init__(**kwargs) 22 | self.num_buckets = num_buckets 23 | self.emb_size = emb_size 24 | 25 | def build(self, input_shape): 26 | super(HashBucketsEmbedding, self).build(input_shape) 27 | self.embedding_layer = Embedding(input_dim=self.num_buckets + 1, 28 | output_dim=self.emb_size, 29 | name='embedding') 30 | 31 | def call(self, input): 32 | emb_input = [] 33 | for i in range(input.shape[1]): 34 | x = tf.as_string(tf.slice(input, [0, i], [-1, 1])) 35 | emb_input.append(x) 36 | emb_input = tf.concat(emb_input, 1) 37 | emb_input = tf.strings.to_hash_bucket_strong(emb_input, self.num_buckets, [1, 2]) # hash 38 | out = self.embedding_layer(emb_input) 39 | return out 40 | 41 | def get_config(self): 42 | config = super().get_config() 43 | config.update({ 44 | "num_buckets": self.num_buckets, 45 | "emb_size": self.emb_size, 46 | }) 47 | return config 48 | 49 | 50 | # tensorflow 内置字典查询 51 | class VocabLayer(Layer): 52 | def __init__(self, vocab_path, vocab_name, in_type=tf.int64, out_type=tf.int64, sep='\t', **kwargs): 53 | super(VocabLayer, self).__init__(**kwargs) 54 | self.vocab_path = vocab_path 55 | self.vocab_name = vocab_name 56 | self.in_type = in_type 57 | self.out_type = out_type 58 | self.sep = sep 59 | 60 | def build(self, input_shape): 61 | super(VocabLayer, self).build(input_shape) 62 | if os.path.isdir(self.vocab_path): 63 | tmp = [] 64 | for fp in os.listdir(self.vocab_path): 65 | f = pd.read_csv(os.path.join(self.vocab_path, fp), sep=self.sep, names=['key', 'value']) 66 | tmp.append(f) 67 | self.vocab = pd.concat(tmp, axis=0, ignore_index=True) 68 | else: 69 | self.vocab = pd.read_csv(self.vocab_path, names=['key', 'value'], sep=self.sep, header=None) 70 | 71 | # self.vocab['key'] = self.vocab['key'].apply(lambda x: int(x)) 72 | self.table = tf.lookup.StaticHashTable(initializer=tf.lookup.KeyValueTensorInitializer( 73 | keys=tf.constant(self.vocab['key'].values, dtype=self.in_type), 74 | values=tf.constant(self.vocab['value'].values, dtype=self.out_type), ), 75 | default_value=tf.constant(0, dtype=self.out_type), name=self.vocab_name) 76 | 77 | def call(self, input): 78 | token_ids = self.table.lookup(input) 79 | return token_ids 80 | 81 | def get_config(self): 82 | config = super().get_config() 83 | config.update({ 84 | "vocab_path": self.vocab_path, 85 | "vocab_name": self.vocab_name, 86 | "in_type": self.in_type, 87 | "out_type": self.out_type, 88 | "sep": self.sep, 89 | }) 90 | return config 91 | 92 | 93 | class L2_norm_layer(Layer): 94 | def __init__(self, axis, **kwargs): 95 | super(L2_norm_layer, self).__init__(**kwargs) 96 | self.axis = axis 97 | 98 | def call(self, inputs): 99 | return tf.nn.l2_normalize(inputs, axis=self.axis) 100 | 101 | 102 | class Power_layer(Layer): 103 | def __init__(self, y, **kwargs): 104 | super(Power_layer, self).__init__(**kwargs) 105 | self.y = tf.constant([y], dtype=tf.float32) 106 | 107 | def call(self, inputs): 108 | return tf.math.pow(inputs, self.y) 109 | 110 | 111 | class CrossDotLayer(Layer): 112 | def __init__(self, axes, **kwargs): 113 | super(CrossDotLayer, self).__init__(**kwargs) 114 | self.axes = axes 115 | self.supports_masking = True 116 | 117 | def call(self, emb1, emb2, mask=None): 118 | return Dot(self.axes)([emb1, emb2]) 119 | 120 | def compute_mask(self, inputs, mask=None): 121 | return None # mask 到该层结束,不向下传递 122 | 123 | 124 | class CrossMultiplyLayer(Layer): 125 | def __init__(self, **kwargs): 126 | super(CrossMultiplyLayer, self).__init__(**kwargs) 127 | self.supports_masking = True 128 | 129 | def call(self, emb1, emb2, mask=None): 130 | return Multiply()([emb1, emb2]) 131 | 132 | def compute_mask(self, inputs, mask=None): 133 | return None 134 | 135 | 136 | class OneHotEncodingLayer(Layer): 137 | def __init__(self, num_classes, **kwargs): 138 | super(OneHotEncodingLayer, self).__init__(**kwargs) 139 | self.num_classes = num_classes 140 | 141 | def call(self, inputs): 142 | return tf.one_hot(inputs, self.num_classes) 143 | 144 | 145 | class Dice(Layer): 146 | def __init__(self): 147 | super(Dice, self).__init__() 148 | self.bn = BatchNormalization(center=False, scale=False) 149 | self.alpha = self.add_weight(shape=(), dtype=tf.float32, name='alpha') 150 | 151 | def call(self, x): 152 | x_normed = self.bn(x) 153 | x_p = tf.sigmoid(x_normed) 154 | 155 | return self.alpha * (1.0 - x_p) * x + x_p * x 156 | 157 | 158 | class MySoftmax(Layer): 159 | def __init__(self): 160 | super(MySoftmax, self).__init__() 161 | 162 | def call(self, inputs): 163 | pos_out = inputs['pos'] 164 | neg1_out = inputs['neg1'] 165 | neg2_out = inputs['neg2'] 166 | neg3_out = inputs['neg3'] 167 | neg4_out = inputs['neg4'] 168 | neg5_out = inputs['neg5'] 169 | sum_e_xj = tf.exp(pos_out) + tf.exp(neg1_out) + tf.exp(neg2_out) + tf.exp(neg3_out) + tf.exp(neg4_out) + tf.exp(neg5_out) 170 | return concatenate([tf.exp(pos_out) / sum_e_xj, 171 | tf.exp(neg1_out) / sum_e_xj, 172 | tf.exp(neg2_out) / sum_e_xj, 173 | tf.exp(neg3_out) / sum_e_xj, 174 | tf.exp(neg4_out) / sum_e_xj, 175 | tf.exp(neg5_out) / sum_e_xj, 176 | ], axis=-1, name='softmax_pred') 177 | -------------------------------------------------------------------------------- /LTR/build_feature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2023-02-21 4 | 5 | 6 | from layers.tool_layers import * 7 | from layers.model_layers import GlobalAveragePooling1DSef 8 | 9 | 10 | def build_feature_column(city_dict, shangquan_dict, comm_dict, price_dict, area_dict): 11 | # define input 12 | # user 13 | user_city_seq = tf.keras.Input(shape=(5,), name='user_city_seq', dtype=tf.int64) 14 | user_shangquan_seq = tf.keras.Input(shape=(5,), name='user_shangquan_seq', dtype=tf.int64) 15 | user_comm_seq = tf.keras.Input(shape=(5,), name='user_comm_seq', dtype=tf.int64) 16 | user_price_seq = tf.keras.Input(shape=(5,), name='user_price_seq', dtype=tf.int64) 17 | user_area_seq = tf.keras.Input(shape=(5,), name='user_area_seq', dtype=tf.int64) 18 | 19 | # house pos ft 20 | city_id = tf.keras.Input(shape=(1,), name='city_id', dtype=tf.int64) 21 | comm_id = tf.keras.Input(shape=(1,), name='comm_id', dtype=tf.int64) 22 | 23 | 24 | # house neg n1 25 | city_id_n1 = tf.keras.Input(shape=(1,), name='city_id_n1', dtype=tf.int64) 26 | comm_id_n1 = tf.keras.Input(shape=(1,), name='comm_id_n1', dtype=tf.int64) 27 | 28 | 29 | # house neg n2 30 | city_id_n2 = tf.keras.Input(shape=(1,), name='city_id_n2', dtype=tf.int64) 31 | comm_id_n2 = tf.keras.Input(shape=(1,), name='comm_id_n2', dtype=tf.int64) 32 | 33 | 34 | # house neg n3 35 | city_id_n3 = tf.keras.Input(shape=(1,), name='city_id_n3', dtype=tf.int64) 36 | comm_id_n3 = tf.keras.Input(shape=(1,), name='comm_id_n3', dtype=tf.int64) 37 | 38 | 39 | # house neg n4 40 | city_id_n4 = tf.keras.Input(shape=(1,), name='city_id_n4', dtype=tf.int64) 41 | comm_id_n4 = tf.keras.Input(shape=(1,), name='comm_id_n4', dtype=tf.int64) 42 | 43 | 44 | # house neg n5 45 | city_id_n5 = tf.keras.Input(shape=(1,), name='city_id_n5', dtype=tf.int64) 46 | comm_id_n5 = tf.keras.Input(shape=(1,), name='comm_id_n5', dtype=tf.int64) 47 | 48 | 49 | # common emb 区域类特征在底层做交互 50 | city_Embedding = Embedding(input_dim=400, output_dim=16, mask_zero=True, name="city_emb") 51 | comm_Embedding = Embedding(input_dim=400000, output_dim=32, mask_zero=True, name="comm_emb") 52 | 53 | 54 | # user feature 55 | user_city_id_token = VocabLayer(city_dict, 'city_token')(user_city_seq) 56 | user_city_emb_seq = city_Embedding(user_city_id_token) # 以city_id为index取emb shape(None, 5, emb_size) 57 | user_city_emb = GlobalAveragePooling1DSef()(user_city_emb_seq) # shape(None, emb_size) 58 | 59 | 60 | user_comm_id_token = VocabLayer(comm_dict, 'comm_token')(user_comm_seq) 61 | user_comm_emb_seq = comm_Embedding(user_comm_id_token) 62 | user_comm_emb = GlobalAveragePooling1DSef()(user_comm_emb_seq) 63 | 64 | 65 | 66 | # concat user features 67 | user_feature = concatenate([user_city_emb 68 | # , user_shangquan_emb, user_comm_emb, user_price_emb, user_area_emb 69 | ], axis=1, 70 | name='user_feature') 71 | 72 | # house pos features 73 | pos_city_id_token = VocabLayer(city_dict, 'pos_city_token')(city_id) 74 | pos_city_emb = item_city_Embedding(pos_city_id_token) 75 | pos_city_emb = Reshape((16,))(pos_city_emb) 76 | 77 | 78 | pos_comm_id_token = VocabLayer(comm_dict, 'pos_comm_token')(comm_id) 79 | pos_comm_emb = item_comm_Embedding(pos_comm_id_token) 80 | pos_comm_emb = Reshape((32,))(pos_comm_emb) 81 | 82 | 83 | 84 | pos_item_feaure = concatenate([pos_city_emb 85 | # , pos_shangquan_emb, pos_comm_emb, pos_price_emb, pos_area_emb, 86 | # item_floor_emb, item_room_emb, item_hall_emb, item_bathroom_emb, pqs 87 | ], axis=1, 88 | name='pos_item_feaure') 89 | 90 | # house neg1 features 91 | neg_city_id_token1 = VocabLayer(city_dict, 'neg_city_id_token1')(city_id_n1) 92 | neg_city_emb1 = item_city_Embedding(neg_city_id_token1) 93 | neg_city_emb1 = Reshape((16,))(neg_city_emb1) 94 | 95 | 96 | neg_comm_id_token1 = VocabLayer(comm_dict, 'neg_comm_id_token1')(comm_id_n1) 97 | neg_comm_emb1 = item_comm_Embedding(neg_comm_id_token1) 98 | neg_comm_emb1 = Reshape((32,))(neg_comm_emb1) 99 | 100 | 101 | neg_item_feature_1 = concatenate([neg_city_emb1 102 | # , neg_shangquan_emb1, neg_comm_emb1, neg_price_emb1, neg_area_emb1, 103 | # item_floor_emb1, item_room_emb1, item_hall_emb1, item_bathroom_emb1, pqs_n1 104 | ], axis=1, 105 | name='neg_item_feature_1') 106 | 107 | # house neg2 features 108 | neg_city_id_token2 = VocabLayer(city_dict, 'neg_city_id_token2')(city_id_n2) 109 | neg_city_emb2 = item_city_Embedding(neg_city_id_token2) 110 | neg_city_emb2 = Reshape((16,))(neg_city_emb2) 111 | 112 | 113 | neg_comm_id_token2 = VocabLayer(comm_dict, 'neg_comm_id_token2')(comm_id_n2) 114 | neg_comm_emb2 = item_comm_Embedding(neg_comm_id_token2) 115 | neg_comm_emb2 = Reshape((32,))(neg_comm_emb2) 116 | 117 | 118 | 119 | neg_item_feature_2 = concatenate([neg_city_emb2, neg_shangquan_emb2, neg_comm_emb2, neg_price_emb2, neg_area_emb2, 120 | item_floor_emb2, item_room_emb2, item_hall_emb2, item_bathroom_emb2, pqs_n2], 121 | axis=1, 122 | name='neg_item_feature_2') 123 | 124 | # house neg3 features 125 | neg_city_id_token3 = VocabLayer(city_dict, 'neg_city_id_token3')(city_id_n3) 126 | neg_city_emb3 = item_city_Embedding(neg_city_id_token3) 127 | neg_city_emb3 = Reshape((16,))(neg_city_emb3) 128 | 129 | 130 | 131 | neg_comm_id_token3 = VocabLayer(comm_dict, 'neg_comm_id_token3')(comm_id_n3) 132 | neg_comm_emb3 = item_comm_Embedding(neg_comm_id_token3) 133 | neg_comm_emb3 = Reshape((32,))(neg_comm_emb3) 134 | 135 | 136 | neg_item_feature_3 = concatenate([neg_city_emb3, neg_shangquan_emb3, neg_comm_emb3, neg_price_emb3, neg_area_emb3, 137 | item_floor_emb3, item_room_emb3, item_hall_emb3, item_bathroom_emb3, pqs_n3], 138 | axis=1, 139 | name='neg_item_feature_3') 140 | 141 | # house neg4 features 142 | neg_city_id_token4 = VocabLayer(city_dict, 'neg_city_id_token4')(city_id_n4) 143 | neg_city_emb4 = item_city_Embedding(neg_city_id_token4) 144 | neg_city_emb4 = Reshape((16,))(neg_city_emb4) 145 | 146 | 147 | 148 | neg_comm_id_token4 = VocabLayer(comm_dict, 'neg_comm_id_token4')(comm_id_n4) 149 | neg_comm_emb4 = item_comm_Embedding(neg_comm_id_token4) 150 | neg_comm_emb4 = Reshape((32,))(neg_comm_emb4) 151 | 152 | 153 | 154 | neg_item_feature_4 = concatenate([neg_city_emb4 155 | # , neg_shangquan_emb4, neg_comm_emb4, neg_price_emb4, neg_area_emb4, 156 | # item_floor_emb4, item_room_emb4, item_hall_emb4, item_bathroom_emb4, pqs_n4 157 | ], 158 | axis=1, 159 | name='neg_item_feature_4') 160 | 161 | # house neg5 features 162 | neg_city_id_token5 = VocabLayer(city_dict, 'neg_city_id_token5')(city_id_n5) 163 | neg_city_emb5 = item_city_Embedding(neg_city_id_token5) 164 | neg_city_emb5 = Reshape((16,))(neg_city_emb5) 165 | 166 | 167 | 168 | neg_comm_id_token5 = VocabLayer(comm_dict, 'neg_comm_id_token5')(comm_id_n5) 169 | neg_comm_emb5 = item_comm_Embedding(neg_comm_id_token5) 170 | neg_comm_emb5 = Reshape((32,))(neg_comm_emb5) 171 | 172 | 173 | neg_item_feature_5 = concatenate([neg_city_emb5, neg_shangquan_emb5, neg_comm_emb5, neg_price_emb5, neg_area_emb5, 174 | item_floor_emb5, item_room_emb5, item_hall_emb5, item_bathroom_emb5, pqs_n5], 175 | axis=1, 176 | name='neg_item_feature_5') 177 | 178 | user_inputs = [user_city_seq, user_shangquan_seq, user_comm_seq, user_price_seq, user_area_seq] 179 | pos_house_inputs = [city_id, comm_id, shangquan_id, price_id, area_id, floor_loc, room_num, hall, bathroom, pqs] 180 | neg_house_1_inputs = [city_id_n1, comm_id_n1, shangquan_id_n1, price_id_n1, area_id_n1, floor_loc_n1, room_num_n1, 181 | hall_n1, bathroom_n1, pqs_n1] 182 | neg_house_2_inputs = [city_id_n2, comm_id_n2, shangquan_id_n2, price_id_n2, area_id_n2, floor_loc_n2, room_num_n2, 183 | hall_n2, bathroom_n2, pqs_n2] 184 | neg_house_3_inputs = [city_id_n3, comm_id_n3, shangquan_id_n3, price_id_n3, area_id_n3, floor_loc_n3, room_num_n3, 185 | hall_n3, bathroom_n3, pqs_n3] 186 | neg_house_4_inputs = [city_id_n4, comm_id_n4, shangquan_id_n4, price_id_n4, area_id_n4, floor_loc_n4, room_num_n4, 187 | hall_n4, bathroom_n4, pqs_n4] 188 | neg_house_5_inputs = [city_id_n5, comm_id_n5, shangquan_id_n5, price_id_n5, area_id_n5, floor_loc_n5, room_num_n5, 189 | hall_n5, bathroom_n5, pqs_n5] 190 | 191 | result = { 192 | 'user_inputs':user_inputs, 193 | 'pos_house_inputs':pos_house_inputs, 194 | 'neg_house_1_inputs':neg_house_1_inputs, 195 | 'neg_house_2_inputs':neg_house_2_inputs, 196 | 'neg_house_3_inputs':neg_house_3_inputs, 197 | 'neg_house_4_inputs':neg_house_4_inputs, 198 | 'neg_house_5_inputs':neg_house_5_inputs, 199 | 'total_inputs':user_inputs + pos_house_inputs + neg_house_1_inputs + neg_house_2_inputs + neg_house_3_inputs + 200 | neg_house_4_inputs + neg_house_5_inputs, 201 | 'user_ft':user_feature, 202 | 'pos_house_ft':pos_item_feaure, 203 | 'neg_house_1_ft':neg_item_feature_1, 204 | 'neg_house_2_ft': neg_item_feature_2, 205 | 'neg_house_3_ft': neg_item_feature_3, 206 | 'neg_house_4_ft': neg_item_feature_4, 207 | 'neg_house_5_ft': neg_item_feature_5 208 | } 209 | 210 | return result 211 | 212 | -------------------------------------------------------------------------------- /model_layers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: stefan 2022-02-28 4 | # update: 添加注释,完善自定义层 by stefan 2022-03-02 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import initializers, regularizers, constraints 8 | from keras.backend import expand_dims, repeat_elements, sum 9 | from keras.layers import * 10 | from keras.regularizers import l2 11 | 12 | from layers.tool_layers import L2_norm_layer 13 | 14 | """自定义模型层 15 | Tower, DNN, SENet, DIN-Attention, ResNet, FM, DCN, CIN ..etc 16 | """ 17 | 18 | 19 | class FMLayer(Layer): 20 | """Factorization Machine models pairwise (order-2) feature interactions 21 | without linear term and bias. 22 | Input shape 23 | - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. 24 | Output shape 25 | - 2D tensor with shape: ``(batch_size, 1)``. 26 | usage: FMLayer()(tf.stack(cross_emb_list, axis=1, name='fm_inputs')) 27 | """ 28 | 29 | def __init__(self, **kwargs): 30 | super(FMLayer, self).__init__(**kwargs) 31 | 32 | def build(self, input_shape): 33 | if len(input_shape) != 3: 34 | raise ValueError("Unexpected inputs dimensions % d, expect to be 3 dimensions" % (len(input_shape))) 35 | 36 | super(FMLayer, self).build(input_shape) 37 | 38 | def call(self, inputs, *args, **kwargs): 39 | concated_embeds_value = inputs 40 | # 先求和再平方 41 | square_of_sum = tf.square(tf.reduce_sum(concated_embeds_value, axis=1, keepdims=True)) 42 | # 先平方再求和 43 | sum_of_square = tf.reduce_sum(concated_embeds_value * concated_embeds_value, axis=1, keepdims=True) 44 | cross_term = square_of_sum - sum_of_square 45 | cross_term = 0.5 * tf.reduce_sum(cross_term, axis=2, keepdims=False) 46 | 47 | return cross_term 48 | 49 | def compute_output_shape(self, input_shape): 50 | return (None, 1) 51 | 52 | 53 | class ResNetLayer(Layer): 54 | """残差网络,改写卷积为全连接层 55 | Input shape 56 | - 2D tensor with shape: ``(batch_size, input_dim)``. 57 | Output shape 58 | - 2D tensor with shape: ``(batch_size, units)``. 59 | """ 60 | 61 | def __init__(self, hidden_units=None, **kwargs): 62 | super(ResNetLayer, self).__init__(**kwargs) 63 | if hidden_units is None: 64 | hidden_units = [256, 128, 64] 65 | self.hidden_units = hidden_units 66 | self.dense_layers = [] 67 | self.layer_num = len(self.hidden_units) 68 | self.relu = ReLU() 69 | self.batch_norm = BatchNormalization() 70 | self.add = Add() 71 | 72 | def build(self, input_shape): 73 | super(ResNetLayer, self).build(input_shape) 74 | for i in range(self.layer_num): 75 | dense_layer = Dense(self.hidden_units[i], activation=None) 76 | self.dense_layers.append(dense_layer) 77 | self.down_sample = Dense(self.hidden_units[self.layer_num - 1], activation=None) # 最后一层要维度一样,便于最后Add 78 | 79 | def call(self, inputs, **kwargs): 80 | identity = self.down_sample(inputs) 81 | 82 | net = inputs 83 | for i in range(self.layer_num): 84 | net = self.dense_layers[i](net) 85 | if i == 0: 86 | net = self.batch_norm(net) 87 | if i != self.layer_num - 1: 88 | net = self.relu(net) 89 | 90 | output = self.relu(self.add([net, identity])) 91 | return output 92 | 93 | 94 | class Tower(Layer): 95 | def __init__(self, 96 | layer_num, 97 | layer_units, 98 | activation, 99 | **kwargs): 100 | super(Tower, self).__init__(**kwargs) 101 | self.tower_layers = [] 102 | self.layer_num = layer_num 103 | self.layer_units = layer_units 104 | self.activation = activation 105 | 106 | def build(self, input_shape): 107 | super(Tower, self).build(input_shape) 108 | for i in range(self.layer_num): 109 | dense_layer = Dense(self.layer_units[i], activation=self.activation) 110 | self.tower_layers.append(dense_layer) 111 | 112 | def call(self, inputs, **kwargs): 113 | net = inputs 114 | for layer in self.tower_layers: 115 | net = layer(net) 116 | net = Dropout(0.3)(net) 117 | return net 118 | 119 | 120 | class SENetLayer(Layer): 121 | def __init__(self, last_shape, reduction=4, **kwargs): 122 | super(SENetLayer, self).__init__(**kwargs) 123 | self.reduction = reduction 124 | self.last_shape = last_shape 125 | self.excitation_layer = Dense(self.last_shape, activation=tf.keras.activations.hard_sigmoid) 126 | self.squeeze_layer = Dense(self.last_shape // self.reduction, activation='relu') 127 | 128 | def call(self, inputs, **kwargs): 129 | net = self.squeeze_layer(inputs) 130 | net = self.excitation_layer(net) 131 | return net # senet层输出的特征裁判值 132 | 133 | 134 | class Linear(Layer): 135 | def __init__(self, feature_length, w_reg=1e-6): 136 | """ 137 | Linear Layer 138 | Input: 139 | - feature_length: A scalar. The length of features. 140 | - w_reg: A scalar. The regularization coefficient of parameter w. 141 | """ 142 | super(Linear, self).__init__() 143 | self.feature_length = feature_length 144 | self.w_reg = w_reg 145 | 146 | def build(self, input_shape): 147 | self.w = self.add_weight(name="w", 148 | shape=(self.feature_length, 1), 149 | regularizer=l2(self.w_reg), 150 | trainable=True) 151 | 152 | def call(self, inputs, **kwargs): 153 | result = tf.reduce_sum(tf.nn.embedding_lookup(self.w, inputs), axis=1) # (batch_size, 1) 154 | return result 155 | 156 | 157 | class MyDense(Layer): 158 | def __init__(self, units, **kwargs): 159 | super(MyDense, self).__init__(**kwargs) 160 | self.units = units 161 | 162 | def build(self, input_shape): 163 | super(MyDense, self).build(input_shape) # 相当于设置self.build = True 164 | self.w = self.add_weight(shape=(input_shape[-1], self.units), 165 | initializer='random_normal', 166 | trainable=True, 167 | name='w') 168 | self.b = self.add_weight(shape=(self.units,), 169 | initializer='random_normal', 170 | trainable=True, 171 | name='b') 172 | 173 | def call(self, inputs, **kwargs): 174 | return tf.keras.activations.sigmoid(tf.matmul(inputs, self.w) + self.b) 175 | 176 | 177 | class DNNLayer(Layer): 178 | def __init__(self, layer_units, dropout_rate=0.3, **kwargs): 179 | super(DNNLayer, self).__init__(**kwargs) 180 | self.layer_units = layer_units 181 | self.batch_norm = BatchNormalization() 182 | self.dropout_rate = dropout_rate 183 | self.dense_layers = [] 184 | 185 | def build(self, input_shape): 186 | super(DNNLayer, self).build(input_shape) 187 | for i in range(len(self.layer_units)): 188 | dense_layer = Dense(self.layer_units[i], activation='relu') 189 | self.dense_layers.append(dense_layer) 190 | 191 | def call(self, inputs, **kwargs): 192 | net = inputs 193 | for i in range(len(self.dense_layers)): 194 | net = self.dense_layers[i](net) 195 | if i == 0: 196 | net = self.batch_norm(net) # batch_norm加在第一层的输入的线性变换后,激活函数(Relu)之后 197 | net = Dropout(self.dropout_rate)(net) 198 | return net 199 | 200 | 201 | class UserRepresentationLayer(Layer): 202 | def __init__(self, **kwargs): 203 | super(UserRepresentationLayer, self).__init__(**kwargs) 204 | self.ActivationSumPoolingFromDIN = ActivationSumPoolingFromDIN() 205 | 206 | def call(self, inputs, **kwargs): 207 | em, eu, Xu = inputs 208 | ru_ = self.ActivationSumPoolingFromDIN([Xu, em]) 209 | 210 | # ru: user representation 211 | ru = concatenate([ru_, eu], axis=-1) 212 | return ru 213 | 214 | 215 | class UserMatchLayer(Layer): 216 | def __init__(self, **kwargs): 217 | super(UserMatchLayer, self).__init__(**kwargs) 218 | self.l2_norm_layer = L2_norm_layer(axis=-1) 219 | 220 | def relavant_unit(self, ru, r_ul): 221 | ru_norm = self.l2_norm_layer(ru) 222 | r_ul_norm = self.l2_norm_layer(r_ul) 223 | a_l = tf.reduce_sum(tf.multiply(ru_norm, r_ul_norm), axis=1, keepdims=True) 224 | 225 | relavant = {'relavant': tf.multiply(a_l, r_ul), 226 | 'a_l': a_l 227 | } 228 | return relavant 229 | 230 | def call(self, inputs, **kwargs): 231 | ru, ru1, ru2, ru3 = inputs 232 | ru_u1 = self.relavant_unit(ru, ru1) 233 | ru_u2 = self.relavant_unit(ru, ru2) 234 | ru_u3 = self.relavant_unit(ru, ru3) 235 | 236 | result = {'Su': ru_u1['relavant'] + ru_u2['relavant'] + ru_u3['relavant'], 237 | 'Ru': ru_u1['a_l'] + ru_u2['a_l'] + ru_u3['a_l'] 238 | } 239 | return result 240 | 241 | 242 | class TextCNNLayer(Layer): 243 | def __init__(self, filters, kernel_size, hidden_units, **kwargs): 244 | super(TextCNNLayer, self).__init__(**kwargs) 245 | self.filters = filters 246 | self.kernel_size = kernel_size 247 | self.hidden_units = hidden_units 248 | self.convs = [] 249 | self.max_pools = [] 250 | for i in range(len(self.kernel_size)): 251 | self.kernel_size[i] = int(self.kernel_size[i]) if not isinstance(self.kernel_size[i], int) else self.kernel_size[i] 252 | conv_layer = Conv1D(filters=self.filters, kernel_size=self.kernel_size[i], padding='same', strides=1, activation='relu') 253 | max_pool = MaxPooling1D(pool_size=self.kernel_size[i], padding='same') 254 | self.convs.append(conv_layer) 255 | self.max_pools.append(max_pool) 256 | self.batch_norm = BatchNormalization() 257 | self.dense_layer = Dense(self.hidden_units, activation='relu') 258 | 259 | def call(self, inputs, **kwargs): 260 | cnn_i = [] 261 | for i in range(len(self.convs)): 262 | x = self.convs[i](inputs) # 每次对inputs做不同尺度的卷积 263 | x = self.max_pools[i](x) 264 | cnn_i.append(Flatten()(x)) 265 | 266 | cnn = concatenate(cnn_i, axis=-1) 267 | 268 | drop = Dropout(0.3)(cnn) 269 | out = self.dense_layer(drop) 270 | return out 271 | 272 | 273 | class Attention_Layer(Layer): 274 | def __init__(self, att_hidden_units, activation='relu'): 275 | """ 276 | Input shape 277 | - query: 2D tensor with shape: ``(batch_size, input_dim)``. 278 | - key: 3D tensor with shape: ``(batch_size, seq_len, input_dim)``. 279 | - value: 3D tensor with shape: ``(batch_size, seq_len, input_dim)``. 280 | Output shape 281 | - 2D tensor with shape: ``(batch_size, input_dim)``. 282 | """ 283 | super(Attention_Layer, self).__init__() 284 | self.att_dense = [] 285 | self.att_hidden_units = att_hidden_units 286 | self.activation = activation 287 | self.att_final_dense = Dense(1) 288 | self.supports_masking = True 289 | 290 | def build(self, input_shape): 291 | super(Attention_Layer, self).build(input_shape) 292 | for i in range(len(self.att_hidden_units)): 293 | self.att_dense.append(Dense(self.att_hidden_units[i], activation=self.activation)) 294 | 295 | def call(self, inputs, mask=None, **kwargs): 296 | # query: candidate item (None, d * 2), d is the dimension of embedding 297 | # key: hist items (None, seq_len, d * 2) 298 | # value: hist items (None, seq_len, d * 2) 299 | q, k, v = inputs 300 | q = tf.tile(q, multiples=[1, k.shape[1]]) # (None, seq_len * d * 2) 301 | q = tf.reshape(q, shape=[-1, k.shape[1], k.shape[2]]) # (None, seq_len, d * 2) 302 | 303 | # q, k, out product should concat 304 | info = tf.concat([q, k, q - k, q * k], axis=-1) 305 | 306 | # dense 307 | for dense in self.att_dense: 308 | info = dense(info) 309 | 310 | outputs = self.att_final_dense(info) # (None, seq_len, 1) 311 | outputs = tf.squeeze(outputs, axis=-1) # (None, seq_len) 312 | 313 | if mask: 314 | paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) # (None, seq_len) 填充 -inf 315 | outputs = tf.where(tf.equal(mask, 0), paddings, outputs) # (None, seq_len) 316 | # softmax 317 | outputs = tf.nn.softmax(logits=outputs) # (None, seq_len) 318 | outputs = tf.expand_dims(outputs, axis=1) # None, 1, seq_len) 319 | 320 | outputs = tf.matmul(outputs, v) # (None, 1, d * 2) 321 | outputs = tf.squeeze(outputs, axis=1) 322 | 323 | return outputs 324 | 325 | 326 | class SelfAttention_Layer(Layer): 327 | def __init__(self): 328 | super(SelfAttention_Layer, self).__init__() 329 | 330 | def build(self, input_shape): 331 | self.dim = input_shape[0][-1] 332 | self.W = self.add_weight(shape=[self.dim, self.dim], name='weight', 333 | initializer='random_uniform') 334 | 335 | def call(self, inputs, mask=None, **kwargs): 336 | q, k, v = inputs 337 | # pos encoding 338 | k += self.positional_encoding(k) 339 | q += self.positional_encoding(q) 340 | # Nonlinear transformation 341 | q = tf.nn.relu(tf.matmul(q, self.W)) # (None, seq_len, dim) 342 | k = tf.nn.relu(tf.matmul(k, self.W)) # (None, seq_len, dim) 343 | mat_qk = tf.matmul(q, k, transpose_b=True) # (None, seq_len, seq_len) 344 | dk = tf.cast(self.dim, dtype=tf.float32) 345 | # Scaled 346 | scaled_att_logits = mat_qk / tf.sqrt(dk) 347 | # Mask 348 | if mask: 349 | mask = tf.tile(tf.expand_dims(mask, 1), [1, q.shape[1], 1]) # (None, seq_len, seq_len) 350 | paddings = tf.ones_like(scaled_att_logits) * (-2 ** 32 + 1) 351 | outputs = tf.where(tf.equal(mask, 0), paddings, scaled_att_logits) # (None, seq_len, seq_len) 352 | # softmax 353 | outputs = tf.nn.softmax(logits=outputs, axis=-1) # (None, seq_len, seq_len) 354 | # output 355 | outputs = tf.matmul(outputs, v) # (None, seq_len, dim) 356 | outputs = tf.reduce_mean(outputs, axis=1) # (None, dim) 357 | return outputs 358 | 359 | @staticmethod 360 | def get_angles(pos, i, d_model): 361 | angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model)) 362 | return pos * angle_rates 363 | 364 | def positional_encoding(self, QK_input): 365 | angle_rads = self.get_angles(np.arange(QK_input.shape[1])[:, np.newaxis], 366 | np.arange(self.dim)[np.newaxis, :], self.dim) 367 | angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) 368 | angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) 369 | pos_encoding = angle_rads[np.newaxis, ...] 370 | 371 | return tf.cast(pos_encoding, dtype=tf.float32) 372 | 373 | 374 | class BiLSTM_Attention_Layer(Layer): 375 | def __init__(self, lstm_units=None, **kwargs): 376 | super(BiLSTM_Attention_Layer, self).__init__(**kwargs) 377 | self.lstm_units = lstm_units 378 | self.bi_lstm1 = Bidirectional(LSTM(lstm_units, return_sequences=True), merge_mode='concat') 379 | self.bi_lstm2 = Bidirectional(LSTM(lstm_units)) 380 | 381 | def call(self, inputs, **kwargs): 382 | inputs = Reshape((-1, inputs.shape[1]))(inputs) 383 | bilstm_out1 = self.bi_lstm1(inputs) 384 | bilstm_out2 = self.bi_lstm2(bilstm_out1) 385 | #att_out = self.self_att(bilstm_out2) 386 | return bilstm_out2 387 | 388 | 389 | class ActivationSumPoolingFromDIN(Layer): 390 | def __init__(self, att_hidden_units=[64, 32], att_activation='relu'): 391 | """ 392 | 用户行为序列对候选集做atten,然后sum pooling 393 | """ 394 | super(ActivationSumPoolingFromDIN, self).__init__() 395 | 396 | # attention layer 397 | self.attention_layer = Attention_Layer(att_hidden_units, att_activation) 398 | 399 | self.bn = BatchNormalization(trainable=True) 400 | 401 | def call(self, inputs, **kwargs): 402 | seq_embed, item_embed = inputs 403 | user_interest_sum_pool = self.attention_layer([item_embed, seq_embed, seq_embed]) 404 | 405 | # concat user_info(att hist), cadidate item embedding 406 | info_all = tf.concat([user_interest_sum_pool, item_embed], axis=-1) 407 | info_all = self.bn(info_all) 408 | return info_all 409 | 410 | 411 | class MultiHeadSelfAttention(Layer): 412 | def __init__(self, num_units, num_heads=8, dropout_rate=0, **kwargs): 413 | """ 414 | Applies multi-head attention. 415 | Args: 416 | queries: A 3d tensor with shape of [N, T_q, C_q]. 417 | keys: A 3d tensor with shape of [N, T_k, C_k]. 418 | values: A 3d tensor with shape of [N, T_v, C_v] 419 | num_units: A scalar. Attention size. 420 | dropout_rate: A floating point number. 421 | num_heads: An int. Number of heads. 422 | Returns 423 | A 3d tensor with shape of (N, T_q, C) 424 | """ 425 | super(MultiHeadSelfAttention, self).__init__(**kwargs) 426 | self.num_units = num_units 427 | self.num_heads = num_heads 428 | self.dropout_rate = dropout_rate 429 | self.dense_q = Dense(units=self.num_units, use_bias=False, activation='relu') 430 | self.dense_k = Dense(units=self.num_units, use_bias=False, activation='relu') 431 | self.dense_v = Dense(units=self.num_units, use_bias=False, activation='relu') 432 | 433 | def call(self, inputs, **kwargs): 434 | queries, keys, values = inputs 435 | Q = self.dense_q(queries) 436 | K = self.dense_k(keys) 437 | V = self.dense_v(values) 438 | 439 | # Split and concat 440 | Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 441 | K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 442 | V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 443 | 444 | # Multiplication 445 | outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k) 446 | 447 | # Scale 448 | outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) 449 | 450 | # Activation 451 | outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k) 452 | 453 | # Dropouts 454 | outputs = Dropout(self.dropout_rate)(outputs) 455 | 456 | # Weighted sum 457 | outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h) 458 | 459 | # Restore shape 460 | outputs = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2) # (N, T_q, C) 461 | 462 | # Residual connection 463 | outputs += queries 464 | 465 | return outputs 466 | 467 | 468 | class DeepCrossLayer(Layer): 469 | def __init__(self, layer_num, embed_dim, output_dim=0, **kwargs): 470 | """ 471 | DCN Model implements 472 | usage: DeepCrossLayer(2, item_feature.shape[-1], name="deep_cross_features")(item_feature) 473 | """ 474 | super(DeepCrossLayer, self).__init__(**kwargs) 475 | self.layer_num = layer_num 476 | self.embed_dim = embed_dim 477 | 478 | self.w = [] 479 | self.b = [] 480 | for i in range(self.layer_num): 481 | self.w.append(tf.Variable(lambda: tf.random.truncated_normal(shape=(self.embed_dim,), stddev=0.01))) 482 | self.b.append(tf.Variable(lambda: tf.zeros(shape=(embed_dim,)))) 483 | 484 | self.output_dim = output_dim 485 | self.dense = Dense(units=self.output_dim, use_bias=False) 486 | 487 | def cross_layer(self, inputs, i): 488 | x0, xl = inputs 489 | # feature crossing 490 | x1_T = tf.reshape(xl, [-1, 1, self.embed_dim]) 491 | x_lw = tf.tensordot(x1_T, self.w[i], axes=1) 492 | cross = x0 * x_lw 493 | return cross + self.b[i] + xl 494 | 495 | def call(self, inputs, **kwargs): 496 | xl = inputs 497 | for i in range(self.layer_num): 498 | xl = self.cross_layer([inputs, xl], i) 499 | if self.output_dim > 0: 500 | xl = self.dense(xl) 501 | return xl 502 | 503 | 504 | class CINLayer(Layer): 505 | def __init__(self, cin_size=[64, 64], l2_reg=1e-4, **kwargs): 506 | """CIN Model implements 507 | ** only for sparse feature ** 508 | 509 | Input 510 | - cin_size: A list. [H_1, H_2 ,..., H_k], a list of the number of layers 511 | - l2_reg: A scalar. L2 regularization. 512 | - inputs tensor 3-D (batch_size, field_nums, emb_sizes) 513 | usage: CINLayer()(tf.stack([item_shangquan_emb, item_comm_emb], axis=1), name='cin_features') 514 | """ 515 | super(CINLayer, self).__init__(**kwargs) 516 | self.cin_size = cin_size 517 | self.l2_reg = l2_reg 518 | 519 | def build(self, input_shape): 520 | # get the number of embedding fields 521 | self.embedding_nums = input_shape[1] 522 | # a list of the number of CIN 523 | self.field_nums = [self.embedding_nums] + self.cin_size 524 | # filters 525 | self.cin_W = { 526 | 'CIN_W_' + str(i): self.add_weight( 527 | name='CIN_W_' + str(i), 528 | shape=(1, self.field_nums[0] * self.field_nums[i], self.field_nums[i + 1]), 529 | initializer='random_normal', 530 | regularizer=l2(self.l2_reg), 531 | trainable=True) 532 | for i in range(len(self.field_nums) - 1) 533 | } 534 | 535 | def call(self, inputs, **kwargs): 536 | dim = inputs.shape[-1] 537 | hidden_layers_results = [inputs] 538 | # split dimension 2 for convenient calculation 539 | split_X_0 = tf.split(hidden_layers_results[0], dim, 2) # dim * (None, field_nums[0], 1) 540 | for idx, size in enumerate(self.cin_size): 541 | split_X_K = tf.split(hidden_layers_results[-1], dim, 2) # dim * (None, filed_nums[i], 1) 542 | 543 | result_1 = tf.matmul(split_X_0, split_X_K, transpose_b=True) # (dim, None, field_nums[0], field_nums[i]) 544 | 545 | result_2 = tf.reshape(result_1, shape=[dim, -1, self.embedding_nums * self.field_nums[idx]]) 546 | 547 | result_3 = tf.transpose(result_2, perm=[1, 0, 2]) # (None, dim, field_nums[0] * field_nums[i]) 548 | 549 | result_4 = tf.nn.conv1d(input=result_3, filters=self.cin_W['CIN_W_' + str(idx)], stride=1, 550 | padding='VALID') 551 | 552 | result_5 = tf.transpose(result_4, perm=[0, 2, 1]) # (None, field_num[i+1], dim) 553 | 554 | hidden_layers_results.append(result_5) 555 | 556 | final_results = hidden_layers_results[1:] 557 | result = tf.concat(final_results, axis=1) # (None, H_1 + ... + H_K, dim) 558 | result = tf.reduce_sum(result, axis=-1) # (None, dim) 559 | 560 | return result 561 | 562 | 563 | def parallel_layer(num_layer, layer_units, mlp_inputs, fm_inputs, dcn_inputs, cin_inputs): 564 | """ 565 | 腾讯信息流推荐排序中的并联双塔CTR结构 566 | 复现参考 add by stefan 567 | """ 568 | mlp_features = Tower(layer_num=num_layer, layer_units=layer_units, 569 | activation=tf.nn.leaky_relu)(mlp_inputs) 570 | fm_features = FMLayer()(fm_inputs) 571 | dcn_features = DeepCrossLayer(2, dcn_inputs.shape[-1], int(layer_units[-1]))(dcn_inputs) 572 | cin_features = CINLayer(cin_size=[32, 32])(cin_inputs) 573 | 574 | # concat dnn_out and dcn_out 575 | mlp_dcn_features = concatenate([mlp_features, dcn_features], axis=-1) 576 | 577 | return mlp_dcn_features, fm_features, cin_features 578 | 579 | 580 | class GlobalAveragePooling1DSef(Layer): 581 | def __init__(self, data_format='channels_last', keepdims=False, **kwargs): 582 | super(GlobalAveragePooling1DSef, self).__init__(**kwargs) 583 | self.data_format = data_format 584 | self.supports_masking = True 585 | self.keepdims = keepdims 586 | 587 | def call(self, inputs, mask=None, **kwargs): 588 | steps_axis = 1 if self.data_format == 'channels_last' else 2 589 | if mask is not None: 590 | mask = tf.cast(mask, inputs[0].dtype) 591 | mask = tf.expand_dims( 592 | mask, 2 if self.data_format == 'channels_last' else 1) 593 | inputs *= mask 594 | return tf.reduce_sum( 595 | inputs, axis=steps_axis, 596 | keepdims=self.keepdims) / tf.maximum(1.0, tf.reduce_sum( 597 | mask, axis=steps_axis, keepdims=self.keepdims)) 598 | else: 599 | return tf.reduce_mean(inputs, axis=steps_axis, keepdims=self.keepdims) 600 | 601 | def compute_mask(self, inputs, mask=None): 602 | return None 603 | 604 | def get_config(self): 605 | config = super().get_config() 606 | config.update({ 607 | "data_format": self.data_format, 608 | "keepdims": self.keepdims, 609 | }) 610 | return config 611 | 612 | 613 | class MMoE(Layer): 614 | """ 615 | Multi-gate Mixture-of-Experts model. 616 | """ 617 | 618 | def __init__(self, 619 | units, 620 | num_experts, 621 | num_tasks, 622 | use_expert_bias=True, 623 | use_gate_bias=True, 624 | expert_activation='relu', 625 | gate_activation='softmax', 626 | expert_bias_initializer='zeros', 627 | gate_bias_initializer='zeros', 628 | expert_bias_regularizer=None, 629 | gate_bias_regularizer=None, 630 | expert_bias_constraint=None, 631 | gate_bias_constraint=None, 632 | expert_kernel_initializer='VarianceScaling', 633 | gate_kernel_initializer='VarianceScaling', 634 | expert_kernel_regularizer=None, 635 | gate_kernel_regularizer=None, 636 | expert_kernel_constraint=None, 637 | gate_kernel_constraint=None, 638 | activity_regularizer=None, 639 | **kwargs): 640 | """ 641 | Method for instantiating MMoE layer. 642 | :param units: Number of hidden units 643 | :param num_experts: Number of experts 644 | :param num_tasks: Number of tasks 645 | :param use_expert_bias: Boolean to indicate the usage of bias in the expert weights 646 | :param use_gate_bias: Boolean to indicate the usage of bias in the gate weights 647 | :param expert_activation: Activation function of the expert weights 648 | :param gate_activation: Activation function of the gate weights 649 | :param expert_bias_initializer: Initializer for the expert bias 650 | :param gate_bias_initializer: Initializer for the gate bias 651 | :param expert_bias_regularizer: Regularizer for the expert bias 652 | :param gate_bias_regularizer: Regularizer for the gate bias 653 | :param expert_bias_constraint: Constraint for the expert bias 654 | :param gate_bias_constraint: Constraint for the gate bias 655 | :param expert_kernel_initializer: Initializer for the expert weights 656 | :param gate_kernel_initializer: Initializer for the gate weights 657 | :param expert_kernel_regularizer: Regularizer for the expert weights 658 | :param gate_kernel_regularizer: Regularizer for the gate weights 659 | :param expert_kernel_constraint: Constraint for the expert weights 660 | :param gate_kernel_constraint: Constraint for the gate weights 661 | :param activity_regularizer: Regularizer for the activity 662 | :param kwargs: Additional keyword arguments for the Layer class 663 | """ 664 | super(MMoE, self).__init__(**kwargs) 665 | 666 | # Hidden nodes parameter 667 | self.units = units 668 | self.num_experts = num_experts 669 | self.num_tasks = num_tasks 670 | 671 | # Weight parameter 672 | self.expert_kernels = None 673 | self.gate_kernels = None 674 | self.expert_kernel_initializer = initializers.get(expert_kernel_initializer) 675 | self.gate_kernel_initializer = initializers.get(gate_kernel_initializer) 676 | self.expert_kernel_regularizer = regularizers.get(expert_kernel_regularizer) 677 | self.gate_kernel_regularizer = regularizers.get(gate_kernel_regularizer) 678 | self.expert_kernel_constraint = constraints.get(expert_kernel_constraint) 679 | self.gate_kernel_constraint = constraints.get(gate_kernel_constraint) 680 | 681 | # Activation parameter 682 | # self.expert_activation = activations.get(expert_activation) 683 | self.expert_activation = expert_activation 684 | self.gate_activation = gate_activation 685 | 686 | # Bias parameter 687 | self.expert_bias = None 688 | self.gate_bias = None 689 | self.use_expert_bias = use_expert_bias 690 | self.use_gate_bias = use_gate_bias 691 | self.expert_bias_initializer = initializers.get(expert_bias_initializer) 692 | self.gate_bias_initializer = initializers.get(gate_bias_initializer) 693 | self.expert_bias_regularizer = regularizers.get(expert_bias_regularizer) 694 | self.gate_bias_regularizer = regularizers.get(gate_bias_regularizer) 695 | self.expert_bias_constraint = constraints.get(expert_bias_constraint) 696 | self.gate_bias_constraint = constraints.get(gate_bias_constraint) 697 | 698 | # Activity parameter 699 | self.activity_regularizer = regularizers.get(activity_regularizer) 700 | 701 | self.expert_layers = [] 702 | self.gate_layers = [] 703 | for i in range(self.num_experts): 704 | self.expert_layers.append(Dense(self.units, activation=self.expert_activation, 705 | use_bias=self.use_expert_bias, 706 | kernel_initializer=self.expert_kernel_initializer, 707 | kernel_regularizer=self.expert_kernel_regularizer, 708 | bias_regularizer=self.expert_bias_regularizer, 709 | activity_regularizer=None, 710 | kernel_constraint=self.expert_kernel_constraint, 711 | bias_constraint=self.expert_bias_constraint)) 712 | for i in range(self.num_tasks): 713 | self.gate_layers.append(Dense(self.num_experts, activation=self.gate_activation, 714 | use_bias=self.use_gate_bias, 715 | kernel_initializer=self.gate_kernel_initializer, 716 | kernel_regularizer=self.gate_kernel_regularizer, 717 | bias_regularizer=self.gate_bias_regularizer, 718 | activity_regularizer=None, 719 | kernel_constraint=self.gate_kernel_constraint, 720 | bias_constraint=self.gate_bias_constraint)) 721 | 722 | def call(self, inputs, **kwargs): 723 | """ 724 | Method for the forward function of the layer. 725 | :param inputs: Input tensor 726 | :param kwargs: Additional keyword arguments for the base method 727 | :return: A tensor 728 | """ 729 | # assert input_shape is not None and len(input_shape) >= 2 730 | 731 | expert_outputs, gate_outputs, final_outputs = [], [], [] 732 | for expert_layer in self.expert_layers: 733 | expert_output = expand_dims(expert_layer(inputs), axis=2) 734 | expert_outputs.append(expert_output) 735 | expert_outputs = tf.concat(expert_outputs, 2) 736 | 737 | for gate_layer in self.gate_layers: 738 | gate_outputs.append(gate_layer(inputs)) 739 | 740 | for gate_output in gate_outputs: 741 | expanded_gate_output = expand_dims(gate_output, axis=1) 742 | aa = repeat_elements(expanded_gate_output, self.units, axis=1) 743 | weighted_expert_output = expert_outputs * aa 744 | bb = sum(weighted_expert_output, axis=2) 745 | final_outputs.append(bb) 746 | # 返回的矩阵维度 num_tasks * batch * units 747 | 748 | return final_outputs 749 | --------------------------------------------------------------------------------