├── LTR
    ├── __init__.py
    ├── pics
    │   ├── pic1.jpg
    │   ├── pic2.jpg
    │   ├── pic3.jpg
    │   └── pic4.jpg
    ├── README.md
    ├── train_ltr_tower.py
    ├── config.py
    ├── PairWiseTower.py
    └── build_feature.py
├── pics
    ├── 1.jpg
    ├── 2.jpg
    ├── 3.jpg
    ├── 4.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 7.jpeg
    ├── 8.png
    ├── 9.jpeg
    ├── 10.jpeg
    ├── 11.jpeg
    └── 12.jpeg
├── data_config.py
├── README.md
├── common_utils.py
├── config.py
├── ParallelTowers.py
├── train_main.py
├── MultiInterestSenetTT.py
├── tool_layers.py
└── model_layers.py


/LTR/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pics/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/1.jpg


--------------------------------------------------------------------------------
/pics/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/2.jpg


--------------------------------------------------------------------------------
/pics/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/3.jpg


--------------------------------------------------------------------------------
/pics/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/4.jpg


--------------------------------------------------------------------------------
/pics/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/5.jpg


--------------------------------------------------------------------------------
/pics/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/6.jpg


--------------------------------------------------------------------------------
/pics/7.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/7.jpeg


--------------------------------------------------------------------------------
/pics/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/8.png


--------------------------------------------------------------------------------
/pics/9.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/9.jpeg


--------------------------------------------------------------------------------
/pics/10.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/10.jpeg


--------------------------------------------------------------------------------
/pics/11.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/11.jpeg


--------------------------------------------------------------------------------
/pics/12.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/pics/12.jpeg


--------------------------------------------------------------------------------
/LTR/pics/pic1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic1.jpg


--------------------------------------------------------------------------------
/LTR/pics/pic2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic2.jpg


--------------------------------------------------------------------------------
/LTR/pics/pic3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic3.jpg


--------------------------------------------------------------------------------
/LTR/pics/pic4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwleo/recall_two_towers/HEAD/LTR/pics/pic4.jpg


--------------------------------------------------------------------------------
/LTR/README.md:
--------------------------------------------------------------------------------
 1 | # 基于Learning to Rank的双塔召回模型方案实现参考
 2 | 
 3 | ### 2023.03.01（复现 by stefan）
 4 | 
 5 | ![图](./pics/pic1.jpg)
 6 | 
 7 | ![图](./pics/pic2.jpg)
 8 | 
 9 | ![图](./pics/pic3.jpg)
10 | 
11 | ![图](./pics/pic4.jpg)


--------------------------------------------------------------------------------
/data_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | # author: stefan 2022-02-28
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | recall_config = {
 8 |     # user
 9 | 
10 | 
11 |     # prop
12 | 
13 | 
14 |     # label
15 |     "is_click": tf.io.FixedLenFeature([], tf.int64),
16 | 
17 | }
18 | 
19 | 
20 | data_config = {
21 |     "recall": recall_config,
22 | }
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 双塔模型实践记录
 2 | 
 3 | ## 负采样
 4 | 
 5 | 
 6 | 
 7 | ## 模型结构优化
 8 | - base 双塔
 9 | 
10 | ![图](./pics/1.jpg)
11 | 
12 | - 双塔 （ResNet）
13 | 
14 | ![图](./pics/4.jpg)
15 | 
16 | - 双塔 + SeNet
17 | 
18 | ![图](./pics/2.jpg)
19 | 
20 | - 多兴趣SENet双塔模型 Multi-Interest-Senet-Two-Towers (MISTT)
21 | 
22 | ![图](./pics/3.jpg)
23 | 
24 | - 腾讯并联双塔 （Parallel Towers）
25 | 
26 | ![图](./pics/5.jpg)
27 | 
28 | 
29 | ![图](./pics/6.jpg)
30 | 
31 | 


--------------------------------------------------------------------------------
/common_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | # author: stefan 2022-02-28
 4 | 
 5 | 
 6 | import tensorflow as tf
 7 | import os
 8 | 
 9 | 
10 | def parse_exmp(example_proto, feature_description):
11 |     feature_dict = tf.io.parse_single_example(example_proto, feature_description)
12 |     label = feature_dict.pop("is_click")
13 |     return feature_dict, label
14 | 
15 | 
16 | def get_file_list(data):
17 |     if isinstance(data, str) and os.path.isdir(data):
18 |         files = [data + '/' + x for x in os.listdir(data)] if os.path.isdir(data) else data
19 |     else:
20 |         files = data
21 | 
22 |     return files
23 | 
24 | 
25 | def read_data(path, shuffle_buffer_size=20000, batch_size=2048, if_shuffle=False, feat_desc=None):
26 |     file_names = get_file_list(path)
27 |     dataset = tf.data.Dataset.list_files(file_names)
28 |     dataset = dataset.interleave(
29 |         lambda filename: tf.data.TFRecordDataset(file_names),
30 |         cycle_length=8
31 |     )
32 |     if if_shuffle:
33 |         dataset = dataset.shuffle(shuffle_buffer_size)
34 |     dataset = dataset.map(lambda x: parse_exmp(x, feat_desc), num_parallel_calls=8)
35 |     dataset = dataset.batch(batch_size)
36 |     return dataset


--------------------------------------------------------------------------------
/LTR/train_ltr_tower.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # import sys
 5 | from data_process.common_utils import read_data_ltr
 6 | from data_process.data_config import data_config
 7 | from recall.LTR import config
 8 | from recall.LTR.PairWiseTower import build_ltr_tower
 9 | import tensorflow as tf
10 | 
11 | FLAGS = config.FLAGS
12 | 
13 | # read data
14 | train_set = read_data_ltr(path=FLAGS.train_data, class_num=FLAGS.class_num, batch_size=FLAGS.batch_size, if_shuffle=True,
15 |                           feat_desc=data_config['recall-ltr'])
16 | 
17 | test_set = read_data_ltr(path=FLAGS.test_data, class_num=FLAGS.class_num, batch_size=FLAGS.batch_size, if_shuffle=False,
18 |                           feat_desc=data_config['recall-ltr'])
19 | 
20 | # define model
21 | all_model, user_model, item_model = build_ltr_tower(FLAGS.temperature, FLAGS.city_dict, FLAGS.shangquan_dict, FLAGS.comm_dict, FLAGS.price_dict,
22 |                         FLAGS.area_dict, FLAGS.tower_num_layer, FLAGS.tower_num_layer_units)
23 | 
24 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.online_logs, embeddings_freq=1,
25 |                                                       embeddings_data=train_set)
26 | 
27 | all_model.fit(
28 |     x=train_set,
29 |     epochs=FLAGS.epoch,
30 |     callbacks=[tensorboard_callback]
31 | )
32 | 
33 | # save models
34 | item_model.save(FLAGS.item_model_pb, save_format='tf')  # 保存item model的weights用于离线获取emb
35 | 
36 | user_model.save(FLAGS.user_model_pb, save_format='tf')  # 保存user model的pb模型用于在线预测
37 | 


--------------------------------------------------------------------------------
/LTR/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | # author: stefan 2023-03-01
 4 | 
 5 | import tensorflow as tf
 6 | import datetime
 7 | 
 8 | """
 9 | recall-ltr
10 | 模型相关参数配置
11 | """
12 | flags = tf.compat.v1.flags
13 | 
14 | flags.DEFINE_string("item_model_pb", "./item_model_pb", "Base directory for the item model.")
15 | flags.DEFINE_string("user_model_pb", "./user_model_pb", "Base directory for the user model.")
16 | 
17 | flags.DEFINE_string("city_dict", "../../demo_data/city_dict", "Path to the city_dict.")
18 | flags.DEFINE_string("shangquan_dict", "../../demo_data/shangquan_dict", "Path to the shangquan_dict.")
19 | flags.DEFINE_string("comm_dict", "../../demo_data/comm_dict", "Path to the comm_dict.")
20 | flags.DEFINE_string("price_dict", "../../demo_data/price_dict", "Path to the price_dict.")
21 | flags.DEFINE_string("area_dict", "../../demo_data/area_dict", "Path to the area_dict.")
22 | 
23 | flags.DEFINE_string("train_data", "../../demo_data/part-r-00001", "Path to the train data")
24 | flags.DEFINE_string("test_data", "../../demo_data/part-r-00001", "Path to the evaluation data.")
25 | 
26 | flags.DEFINE_string("online_logs", "./online_logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
27 |                     "Path to the log.")
28 | 
29 | flags.DEFINE_float("temperature", 0.05, "temperature")
30 | flags.DEFINE_integer("tower_num_layer", 3, "num of layers")
31 | flags.DEFINE_string("tower_num_layer_units", "256,128,64", "hidden units of layers")
32 | flags.DEFINE_integer("class_num", 6, "class_num")
33 | 
34 | flags.DEFINE_integer("epoch", 20 , "Training epochs")  # 40
35 | flags.DEFINE_integer("batch_size", 1024, "Training batch size")  # 40960
36 | 
37 | 
38 | 
39 | FLAGS = flags.FLAGS
40 | 
41 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | # author: stefan 2022-02-28
 4 | 
 5 | import tensorflow as tf
 6 | import datetime
 7 | 
 8 | """
 9 | recall
10 | 模型相关参数配置
11 | """
12 | 
13 | flags = tf.compat.v1.flags
14 | 
15 | flags.DEFINE_boolean("if_use_senet", False, "plus senet module.")
16 | flags.DEFINE_boolean("if_use_MISTT", False, "plus MISTT module.")
17 | flags.DEFINE_boolean("if_use_twoResNet", False, "plus ResNet module.")
18 | flags.DEFINE_boolean("if_use_parallel", True, "plus parallel module.")
19 | 
20 | flags.DEFINE_string("item_model_pb", "./item_model_pb", "Base directory for the item model.")
21 | flags.DEFINE_string("user_model_pb", "./user_model_pb", "Base directory for the user model.")
22 | flags.DEFINE_string("item_model_weights", "./item_model_weights", "Base directory for the item model weights.")
23 | 
24 | flags.DEFINE_string("city_dict", "../demo_data/city_dict", "Path to the city_dict.")
25 | flags.DEFINE_string("shangquan_dict", "../demo_data/shangquan_dict", "Path to the shangquan_dict.")
26 | flags.DEFINE_string("comm_dict", "../demo_data/comm_dict", "Path to the comm_dict.")
27 | flags.DEFINE_string("price_dict", "../demo_data/price_dict", "Path to the price_dict.")
28 | flags.DEFINE_string("area_dict", "../demo_data/area_dict", "Path to the area_dict.")
29 | 
30 | flags.DEFINE_string("train_data", "../demo_data/part-r-00003-Copy1", "Path to the train data")
31 | flags.DEFINE_string("eval_data", "../demo_data/part-r-00003-Copy1", "Path to the evaluation data.")
32 | 
33 | flags.DEFINE_string("online_logs", "../online_logs", "Path to the log.")
34 | 
35 | flags.DEFINE_integer("batch_size", 1024, "Training batch size")  # 40960
36 | flags.DEFINE_integer("epoch", 1, "Training epochs")    # 40
37 | flags.DEFINE_float("temperature", 0.001, "temperature")
38 | flags.DEFINE_integer("tower_num_layer", 3, "num of layers")
39 | flags.DEFINE_string("tower_num_layer_units", "256,128,64", "hidden units of layers")
40 | flags.DEFINE_string("cin_size", "64,64", "a list of the number of layers")
41 | 
42 | FLAGS = flags.FLAGS
43 | 


--------------------------------------------------------------------------------
/ParallelTowers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | # author: stefan 2022-03-03
 4 | 
 5 | """
 6 | 参考腾讯并联双塔模型架构 尝试复现模型
 7 | 
 8 | 主要创新思路在于：
 9 | 1、尝试通过＂并联＂多个双塔结构（MLP、DCN、FM、FFM、CIN）增加双塔模型的＂宽度＂来缓解双塔内积的瓶颈从而提升效果；
10 | 2、对＂并联＂的多个双塔引入 LR 进行带权融合，LR 权重最终融入到 userembedding 中，使得最终的模型仍然保持的内积形式。
11 | """
12 | 
13 | from tensorflow.keras.models import Model
14 | from tensorflow.keras.optimizers import Adam
15 | from layers.tool_layers import *
16 | from layers.model_layers import MyDense, parallel_layer
17 | 
18 | 
19 | # build model
20 | def parallel_towers(temperature, city_dict, shangquan_dict, comm_dict, price_dict, area_dict, tower_num_layer,
21 |                     tower_num_layer_units):
22 |     # **********************************  输入层 **********************************#
23 |     # define input
24 |     
25 | 
26 |     # common emb  （目前只有一个城市的数据，适当调整dim)
27 | 
28 | 
29 |     # user feature
30 | 
31 | 
32 |     # item features
33 |     
34 | 
35 |     # **********************************  表示层 **********************************#
36 |     user_mlp_inputs = concatenate([...],
37 |                                   axis=-1, name='user_mlp_inputs')
38 |     user_fm_inputs = tf.stack([...],
39 |                               axis=1, name='user_fm_inputs')
40 |     user_dcn_inputs = concatenate([...], axis=-1, name="user_dcn_inputs")
41 |     user_cin_inputs = tf.stack([...], axis=1, name="user_cin_inputs")
42 | 
43 |     # 计算user_tower的并联输出
44 |     user_mlp_dcn_out, user_fm_out, user_cin_out = parallel_layer(tower_num_layer, tower_num_layer_units,
45 |                                                                  user_mlp_inputs, user_fm_inputs, user_dcn_inputs,
46 |                                                                  user_cin_inputs)
47 | 
48 |     item_mlp_inputs = concatenate([...],
49 |                                   axis=-1, name="item_mlp_inputs")
50 |     item_fm_inputs = tf.stack([...], axis=1,
51 |                               name="item_fm_inputs")
52 | 
53 |     item_dcn_inputs = concatenate([...], axis=-1, name="item_dcn_inputs")
54 |     item_cin_inputs = tf.stack([...], axis=1, name="item_cin_inputs")
55 | 
56 |     # 计算item_tower的并联输出
57 |     item_mlp_dcn_out, item_fm_out, item_cin_out = parallel_layer(tower_num_layer, tower_num_layer_units,
58 |                                                                  item_mlp_inputs, item_fm_inputs, item_dcn_inputs,
59 |                                                                  item_cin_inputs)
60 | 
61 |     # **********************************  匹配层 **********************************#
62 |     # 按照不同并联模型分别进行 hadamard 积， 在顶层做两侧特征的交互
63 |     user_item_mlp_dcn_hdm = tf.multiply(user_mlp_dcn_out, item_mlp_dcn_out)
64 |     user_item_fm_hdm = tf.multiply(user_fm_out, item_fm_out)
65 |     user_item_cin_hdm = tf.multiply(user_cin_out, item_cin_out)
66 | 
67 |     # 使用LR学习＂并联＂的多个双塔的权重
68 |     my_dense = MyDense(1)
69 |     concat_inputs = concatenate([user_item_mlp_dcn_hdm, user_item_fm_hdm, user_item_cin_hdm], axis=-1)
70 |     out = my_dense(concat_inputs)
71 |     lr_weights = my_dense.weights[0]
72 |     lr_weights = tf.reshape(lr_weights, [1, lr_weights.shape[0]])
73 | 
74 |     user_input = [...]
75 |     item_input = [...]
76 | 
77 |     # 获取双塔各自的emb输出
78 |     user_parallel_out = concatenate([user_mlp_dcn_out, user_fm_out, user_cin_out], axis=-1, name="user_tower_out")
79 |     item_parallel_out = concatenate([item_mlp_dcn_out, item_fm_out, item_cin_out], axis=-1, name="item_tower")
80 | 
81 |     user_output = tf.multiply(user_parallel_out, lr_weights, name="user_tower")  # 预先融合LR的权重进user embedding
82 |     item_output = item_parallel_out
83 | 
84 |     user_model = Model(inputs=user_input, outputs=user_output)
85 |     item_model = Model(inputs=item_input, outputs=item_output)
86 |     all_model = Model(inputs=user_input + item_input, outputs=out)  # 模型评估以out为评估对象
87 | 
88 |     all_model.compile(
89 |         loss='binary_crossentropy',
90 |         optimizer=Adam(1e-3),
91 |         metrics=[tf.keras.metrics.AUC()]
92 |     )
93 | 
94 |     all_model.summary()
95 | 
96 |     return all_model, user_model, item_model
97 | 


--------------------------------------------------------------------------------
/train_main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # import sys
 5 | # sys.path.insert(0, r'/code/Stefan/909_recall/dm-recommend-tf2/') # 线上要加入搜索目录的路径
 6 | 
 7 | from data_process.common_utils import *
 8 | from data_process.data_config import *
 9 | from recall import config
10 | from recall import TwoTowers, TwoTowersSENet, MultiInterestSenetTT, TwoResNet, ParallelTowers
11 | 
12 | FLAGS = config.FLAGS
13 | 
14 | # read data
15 | train_set = read_data(path=FLAGS.train_data, batch_size=FLAGS.batch_size, if_shuffle=True,
16 |                       feat_desc=data_config["909-recall"])
17 | test_set = read_data(path=FLAGS.eval_data, batch_size=FLAGS.batch_size, feat_desc=data_config["909-recall"])
18 | 
19 | # define models
20 | if FLAGS.if_use_senet:
21 |     all_model, user_model, item_model = TwoTowersSENet.two_towers(FLAGS.temperature, FLAGS.city_dict,
22 |                                                                   FLAGS.shangquan_dict,
23 |                                                                   FLAGS.comm_dict, FLAGS.tower_num_layer,
24 |                                                                   FLAGS.tower_num_layer_units.split(','))
25 | elif FLAGS.if_use_MISTT:
26 |     all_model, user_model, item_model = MultiInterestSenetTT.buildMISTT(FLAGS.temperature, FLAGS.city_dict,
27 |                                                                         FLAGS.shangquan_dict,
28 |                                                                         FLAGS.comm_dict, FLAGS.tower_num_layer,
29 |                                                                         FLAGS.tower_num_layer_units.split(','))
30 | elif FLAGS.if_use_twoResNet:
31 |     all_model, user_model, item_model = TwoResNet.two_towers(FLAGS.temperature, FLAGS.city_dict,
32 |                                                              FLAGS.shangquan_dict,
33 |                                                              FLAGS.comm_dict, FLAGS.tower_num_layer,
34 |                                                              FLAGS.tower_num_layer_units.split(','))
35 | elif FLAGS.if_use_parallel:
36 |     all_model, user_model, item_model = ParallelTowers.parallel_towers(FLAGS.temperature, FLAGS.city_dict,
37 |                                                                        FLAGS.shangquan_dict, FLAGS.comm_dict,
38 |                                                                        FLAGS.price_dict, FLAGS.area_dict,
39 |                                                                        FLAGS.tower_num_layer,
40 |                                                                        FLAGS.tower_num_layer_units.split(','))
41 | else:
42 |     all_model, user_model, item_model = TwoTowers.two_towers(FLAGS.temperature, FLAGS.city_dict,
43 |                                                              FLAGS.shangquan_dict,
44 |                                                              FLAGS.comm_dict, FLAGS.tower_num_layer,
45 |                                                              FLAGS.tower_num_layer_units.split(','))
46 | 
47 | # define callbacks
48 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.online_logs, embeddings_freq=1,
49 |                                                       embeddings_data=train_set)
50 | early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=8)  # 早停法，防止过拟合
51 | plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", verbose=1, mode='max', factor=0.5,
52 |                                                patience=2)  # 当评价指标不在提升时，减少学习率
53 | 
54 | batch_print_callback = tf.keras.callbacks.LambdaCallback(
55 |     on_batch_begin=lambda batch, logs: print(batch))
56 | 
57 | # run train
58 | all_model.fit(
59 |     x=train_set,
60 |     epochs=FLAGS.epoch,
61 |     validation_data=test_set,
62 |     callbacks=[tensorboard_callback, early_stopping, plateau, batch_print_callback]
63 | )
64 | 
65 | # save models
66 | item_model.save_weights(FLAGS.item_model_weights)  # 保存item model的weights用于离线获取emb
67 | 
68 | item_model.save(FLAGS.item_model_pb, save_format='tf')  # 保存item model的weights用于离线获取emb
69 | 
70 | user_model.save(FLAGS.user_model_pb, save_format='tf')  # 保存user model的pb模型用于在线预测
71 | 


--------------------------------------------------------------------------------
/MultiInterestSenetTT.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: stefan 2022-03-02
  4 | 
  5 | """
  6 | 多兴趣SENet双塔模型Multi-Interest-Senet-Two-Towers (MISTT)
  7 | 
  8 | 基于SENet，把User侧和Item侧的Embedding，打成多兴趣的。就是说，比如在用户侧塔，可以配置不同的SENet模块及对应的DNN结构，来强化不同方面兴趣的
  9 | Embedding表达。Item侧也可以如此办理，或者Item侧如果信息比较单一，可以仍然只打出一个Item Embedding，只需要维度上能和User侧多兴趣Embedding对齐即可
 10 | """
 11 | 
 12 | from tensorflow.keras.models import Model
 13 | from tensorflow.keras.optimizers import Adam
 14 | from layers.tool_layers import *
 15 | from layers.model_layers import Tower, SENetLayer
 16 | 
 17 | 
 18 | # build model
 19 | def buildMISTT(temperature, city_dict, shangquan_dict, comm_dict, tower_num_layer, tower_num_layer_units):
 20 |     # define input
 21 |    ...
 22 | 
 23 | 
 24 |     user_feature_judge_1 = SENetLayer(last_shape=int(user_emb_feature_pooling.shape[-1]), reduction=16,
 25 |                                       name='user_embedding_senet_1')(user_emb_feature_pooling)
 26 | 
 27 |     user_feature_judge_2 = SENetLayer(last_shape=int(user_emb_feature_pooling.shape[-1]), reduction=16,
 28 |                                       name='user_embedding_senet_2')(user_emb_feature_pooling)
 29 | 
 30 |     user_embedding_senet_1 = []
 31 |     user_embedding_senet_2 = []
 32 |     for i in range(user_feature_judge_1.shape[1]):  # 2个senet特征权重维度一样，取其中一个即可
 33 |         x_1 = tf.slice(user_feature_judge_1, [0, i], [-1, 1])  # 取出senet1激活tensor中的第i个激活值
 34 |         x_2 = tf.slice(user_feature_judge_2, [0, i], [-1, 1])
 35 |         emb_with_jugde_1 = tf.multiply(user_emb_feature_list[i], x_1)
 36 |         emb_with_jugde_2 = tf.multiply(user_emb_feature_list[i], x_2)
 37 |         user_embedding_senet_1.append(emb_with_jugde_1)
 38 |         user_embedding_senet_2.append(emb_with_jugde_2)
 39 | 
 40 |     user_feature_1 = concatenate(user_embedding_senet_1, axis=1, name='user_feature_1')
 41 |     user_feature_2 = concatenate(user_embedding_senet_2, axis=1, name='user_feature_2')
 42 | 
 43 |     # 计算user tower的2个输出
 44 |     user_tower_out_1 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 45 |                              activation=tf.nn.leaky_relu, name='user_tower_1')(user_feature_1)
 46 | 
 47 |     user_tower_out_2 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 48 |                              activation=tf.nn.leaky_relu, name='user_tower_2')(user_feature_2)
 49 | 
 50 |     user_tower_out = Add(name="user_tower")([user_tower_out_1, user_tower_out_2])
 51 | 
 52 |     # item feature
 53 |     ...
 54 | 
 55 |     item_embed_features_judge_1 = SENetLayer(last_shape=int(item_embed_features_pooling.shape[-1]), reduction=16,
 56 |                                              name='item_embedding_senet_1')(item_embed_features_pooling)
 57 | 
 58 |     item_embed_features_judge_2 = SENetLayer(last_shape=int(item_embed_features_pooling.shape[-1]), reduction=16,
 59 |                                              name='item_embedding_senet_2')(item_embed_features_pooling)
 60 | 
 61 |     item_embedding_senet_1 = []
 62 |     item_embedding_senet_2 = []
 63 |     for i in range(item_embed_features_judge_1.shape[1]):
 64 |         x_1 = tf.slice(item_embed_features_judge_1, [0, i], [-1, 1])  # 取出senet激活tensor中的第i个激活值
 65 |         x_2 = tf.slice(item_embed_features_judge_2, [0, i], [-1, 1])
 66 |         emb_with_jugde_1 = tf.multiply(item_embedding_features_list[i], x_1)
 67 |         emb_with_jugde_2 = tf.multiply(item_embedding_features_list[i], x_2)
 68 |         item_embedding_senet_1.append(emb_with_jugde_1)
 69 |         item_embedding_senet_2.append(emb_with_jugde_2)
 70 | 
 71 |     item_feature_1 = concatenate(item_embedding_senet_1 + [item_prop_age, item_age_power2], axis=1,
 72 |                                  name='item_feature_1')
 73 |     item_feature_2 = concatenate(item_embedding_senet_2 + [item_prop_age, item_age_power2], axis=1,
 74 |                                  name='item_feature_2')
 75 | 
 76 |     # 计算2个item tower的输出
 77 |     item_tower_out_1 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 78 |                              activation=tf.nn.leaky_relu, name='item_tower_1')(item_feature_1)
 79 |     item_tower_out_2 = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 80 |                              activation=tf.nn.leaky_relu, name='item_tower_2')(item_feature_2)
 81 | 
 82 |     item_tower_out = Add(name="item_tower")([item_tower_out_1, item_tower_out_2])
 83 | 
 84 |     # 计算内积，添加温度系数
 85 |     inner_product = tf.reduce_sum(tf.multiply(user_tower_out, item_tower_out), axis=1, keepdims=True)
 86 |     out_with_tempreture = tf.keras.activations.sigmoid(inner_product / temperature)
 87 |     out = Reshape((1,))(out_with_tempreture)
 88 | 
 89 |     user_input = [...]
 90 |     item_input = [...]
 91 | 
 92 |     user_output = user_tower_out
 93 |     item_output = item_tower_out
 94 | 
 95 |     user_model = Model(inputs=user_input, outputs=user_output)
 96 |     item_model = Model(inputs=item_input, outputs=item_output)
 97 |     all_model = Model(inputs=user_input + item_input, outputs=out)  # 模型评估以out为评估对象
 98 | 
 99 |     all_model.compile(
100 |         loss='binary_crossentropy',
101 |         optimizer=Adam(1e-3),
102 |         metrics=[tf.keras.metrics.AUC()]
103 |     )
104 | 
105 |     all_model.summary()
106 | 
107 |     return all_model, user_model, item_model
108 | 


--------------------------------------------------------------------------------
/LTR/PairWiseTower.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: stefan 2023-03-01
  4 | 
  5 | """
  6 | 基于Learning to Rank的双塔召回
  7 | Pairwise-双塔模型
  8 | """
  9 | from keras import Model
 10 | from keras.layers import Reshape, concatenate, Dense
 11 | 
 12 | from layers.metrics import softmaxloss, myLtrAcc
 13 | from layers.model_layers import Tower
 14 | from layers.tool_layers import L2_norm_layer, MySoftmax
 15 | from recall.LTR.build_feature import build_feature_column
 16 | import tensorflow as tf
 17 | 
 18 | def build_ltr_tower(temperature, city_dict, shangquan_dict, comm_dict, price_dict, area_dict, tower_num_layer,
 19 |                    tower_num_layer_units):
 20 |     # 输入、特征
 21 |     feature_columns = build_feature_column(city_dict, shangquan_dict, comm_dict, price_dict, area_dict)
 22 | 
 23 |     # user Tower
 24 |     # 计算user tower的输出
 25 |     user_tower_out = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 26 |                            activation=tf.nn.leaky_relu, name='user_tower')(feature_columns['user_ft'])
 27 |     user_tower_out_norm = L2_norm_layer(axis=-1, name='user_tower_norm')(user_tower_out)
 28 | 
 29 |     # item tower (参数共享) 初始化
 30 |     item_model = Tower(layer_num=tower_num_layer, layer_units=tower_num_layer_units,
 31 |                        activation=tf.nn.leaky_relu, name='item_tower')
 32 | 
 33 |     # pos house tower的输出
 34 |     house_pos_out = item_model(feature_columns['pos_house_ft'])
 35 |     house_pos_out_norm = L2_norm_layer(axis=-1, name='pos_tower')(house_pos_out)
 36 | 
 37 |     # neg1 house tower的输出
 38 |     house_neg_1_out = item_model(feature_columns['neg_house_1_ft'])
 39 |     house_neg_1_out_norm = L2_norm_layer(axis=-1, name='neg_tower_1')(house_neg_1_out)
 40 | 
 41 |     # neg2 house tower的输出
 42 |     house_neg_2_out = item_model(feature_columns['neg_house_2_ft'])
 43 |     house_neg_2_out_norm = L2_norm_layer(axis=-1, name='neg_tower_2')(house_neg_2_out)
 44 | 
 45 |     # neg3 house tower的输出
 46 |     house_neg_3_out = item_model(feature_columns['neg_house_3_ft'])
 47 |     house_neg_3_out_norm = L2_norm_layer(axis=-1, name='neg_tower_3')(house_neg_3_out)
 48 | 
 49 |     # neg4 house tower的输出
 50 |     house_neg_4_out = item_model(feature_columns['neg_house_4_ft'])
 51 |     house_neg_4_out_norm = L2_norm_layer(axis=-1, name='neg_tower_4')(house_neg_4_out)
 52 | 
 53 |     # neg5 house tower的输出
 54 |     house_neg_5_out = item_model(feature_columns['neg_house_5_ft'])
 55 |     house_neg_5_out_norm = L2_norm_layer(axis=-1, name='neg_tower_5')(house_neg_5_out)
 56 | 
 57 |     # user x pos_house inner product
 58 |     user_pos_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_pos_out_norm), axis=1, keepdims=True)
 59 |     out_with_tempreture = tf.keras.activations.sigmoid(user_pos_inner_product / temperature)
 60 |     pos_out = Reshape((1,))(out_with_tempreture)
 61 | 
 62 |     # user x neg_house inner product
 63 |     user_neg1_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_1_out_norm), axis=1, keepdims=True)
 64 |     neg1_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg1_inner_product / temperature))
 65 | 
 66 |     user_neg2_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_2_out_norm), axis=1, keepdims=True)
 67 |     neg2_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg2_inner_product / temperature))
 68 | 
 69 |     user_neg3_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_3_out_norm), axis=1, keepdims=True)
 70 |     neg3_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg3_inner_product / temperature))
 71 | 
 72 |     user_neg4_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_4_out_norm), axis=1, keepdims=True)
 73 |     neg4_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg4_inner_product / temperature))
 74 | 
 75 |     user_neg5_inner_product = tf.reduce_sum(tf.multiply(user_tower_out_norm, house_neg_5_out_norm), axis=1, keepdims=True)
 76 |     neg5_out = Reshape((1,))(tf.keras.activations.sigmoid(user_neg5_inner_product / temperature))
 77 | 
 78 |     # softmax 取user embedding与pos embedding计算的相似度做为期望预测的label概率
 79 |     softmax_inputs = {
 80 |         'pos': pos_out, 'neg1': neg1_out, 'neg2': neg2_out, 'neg3': neg3_out, 'neg4': neg4_out, 'neg5': neg5_out
 81 |     }
 82 |     out_logit = MySoftmax()(softmax_inputs)
 83 | 
 84 |     user_input = feature_columns['user_inputs']
 85 |     item_input = feature_columns['pos_house_inputs']
 86 | 
 87 |     user_output = user_tower_out_norm
 88 |     item_output = house_pos_out_norm
 89 | 
 90 |     user_model = Model(inputs=user_input, outputs=user_output)
 91 |     item_model = Model(inputs=item_input, outputs=item_output)
 92 |     all_model = Model(inputs=feature_columns['total_inputs'], outputs=out_logit)
 93 | 
 94 |     all_model.compile(
 95 |         loss=softmaxloss,
 96 |         optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
 97 |         metrics=[myLtrAcc],
 98 |         # run_eagerly=True
 99 |     )
100 | 
101 |     all_model.summary()
102 | 
103 |     return all_model, user_model, item_model
104 |   
105 |   
106 | def myLtrAcc(y_true, y_pred):
107 |     # pos的概率为最大则满足预期
108 |     pred_max_index = tf.equal(tf.argmax(y_pred, axis=-1), 0) 
109 |     correct_count = tf.reduce_sum(tf.cast(pred_max_index, tf.float32))
110 |     return correct_count / tf.cast(len(pred_max_index), 'float32')
111 |   
112 |   
113 | def softmaxloss(y_true, y_pred):
114 |     pos_pred = tf.cast(tf.slice(y_pred, [0, 0], [-1, 1]), 'float32')
115 |     return K.mean(-tf.math.log(pos_pred))  
116 |   
117 |   
118 | 


--------------------------------------------------------------------------------
/tool_layers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: stefan 2022-02-28
  4 | import os.path
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import tensorflow as tf
  9 | from keras.layers import *
 10 | 
 11 | """自定义工具层
 12 | embedding, crossDot, crossMulti, one-hot, dice ,etc
 13 | """
 14 | 
 15 | 
 16 | class HashBucketsEmbedding(Layer):
 17 |     def __init__(self,
 18 |                  num_buckets,
 19 |                  emb_size,
 20 |                  **kwargs):
 21 |         super(HashBucketsEmbedding, self).__init__(**kwargs)
 22 |         self.num_buckets = num_buckets
 23 |         self.emb_size = emb_size
 24 | 
 25 |     def build(self, input_shape):
 26 |         super(HashBucketsEmbedding, self).build(input_shape)
 27 |         self.embedding_layer = Embedding(input_dim=self.num_buckets + 1,
 28 |                                          output_dim=self.emb_size,
 29 |                                          name='embedding')
 30 | 
 31 |     def call(self, input):
 32 |         emb_input = []
 33 |         for i in range(input.shape[1]):
 34 |             x = tf.as_string(tf.slice(input, [0, i], [-1, 1]))
 35 |             emb_input.append(x)
 36 |         emb_input = tf.concat(emb_input, 1)
 37 |         emb_input = tf.strings.to_hash_bucket_strong(emb_input, self.num_buckets, [1, 2])  # hash
 38 |         out = self.embedding_layer(emb_input)
 39 |         return out
 40 | 
 41 |     def get_config(self):
 42 |         config = super().get_config()
 43 |         config.update({
 44 |             "num_buckets": self.num_buckets,
 45 |             "emb_size": self.emb_size,
 46 |         })
 47 |         return config
 48 | 
 49 | 
 50 | # tensorflow 内置字典查询
 51 | class VocabLayer(Layer):
 52 |     def __init__(self, vocab_path, vocab_name, in_type=tf.int64, out_type=tf.int64, sep='\t', **kwargs):
 53 |         super(VocabLayer, self).__init__(**kwargs)
 54 |         self.vocab_path = vocab_path
 55 |         self.vocab_name = vocab_name
 56 |         self.in_type = in_type
 57 |         self.out_type = out_type
 58 |         self.sep = sep
 59 | 
 60 |     def build(self, input_shape):
 61 |         super(VocabLayer, self).build(input_shape)
 62 |         if os.path.isdir(self.vocab_path):
 63 |             tmp = []
 64 |             for fp in os.listdir(self.vocab_path):
 65 |                 f = pd.read_csv(os.path.join(self.vocab_path, fp), sep=self.sep, names=['key', 'value'])
 66 |                 tmp.append(f)
 67 |             self.vocab = pd.concat(tmp, axis=0, ignore_index=True)
 68 |         else:
 69 |             self.vocab = pd.read_csv(self.vocab_path, names=['key', 'value'], sep=self.sep, header=None)
 70 | 
 71 |         # self.vocab['key'] = self.vocab['key'].apply(lambda x: int(x))
 72 |         self.table = tf.lookup.StaticHashTable(initializer=tf.lookup.KeyValueTensorInitializer(
 73 |             keys=tf.constant(self.vocab['key'].values, dtype=self.in_type),
 74 |             values=tf.constant(self.vocab['value'].values, dtype=self.out_type), ),
 75 |             default_value=tf.constant(0, dtype=self.out_type), name=self.vocab_name)
 76 | 
 77 |     def call(self, input):
 78 |         token_ids = self.table.lookup(input)
 79 |         return token_ids
 80 | 
 81 |     def get_config(self):
 82 |         config = super().get_config()
 83 |         config.update({
 84 |             "vocab_path": self.vocab_path,
 85 |             "vocab_name": self.vocab_name,
 86 |             "in_type": self.in_type,
 87 |             "out_type": self.out_type,
 88 |             "sep": self.sep,
 89 |         })
 90 |         return config
 91 | 
 92 | 
 93 | class L2_norm_layer(Layer):
 94 |     def __init__(self, axis, **kwargs):
 95 |         super(L2_norm_layer, self).__init__(**kwargs)
 96 |         self.axis = axis
 97 | 
 98 |     def call(self, inputs):
 99 |         return tf.nn.l2_normalize(inputs, axis=self.axis)
100 | 
101 | 
102 | class Power_layer(Layer):
103 |     def __init__(self, y, **kwargs):
104 |         super(Power_layer, self).__init__(**kwargs)
105 |         self.y = tf.constant([y], dtype=tf.float32)
106 | 
107 |     def call(self, inputs):
108 |         return tf.math.pow(inputs, self.y)
109 | 
110 | 
111 | class CrossDotLayer(Layer):
112 |     def __init__(self, axes, **kwargs):
113 |         super(CrossDotLayer, self).__init__(**kwargs)
114 |         self.axes = axes
115 |         self.supports_masking = True
116 | 
117 |     def call(self, emb1, emb2, mask=None):
118 |         return Dot(self.axes)([emb1, emb2])
119 | 
120 |     def compute_mask(self, inputs, mask=None):
121 |         return None   # mask 到该层结束，不向下传递
122 | 
123 | 
124 | class CrossMultiplyLayer(Layer):
125 |     def __init__(self, **kwargs):
126 |         super(CrossMultiplyLayer, self).__init__(**kwargs)
127 |         self.supports_masking = True
128 | 
129 |     def call(self, emb1, emb2, mask=None):
130 |         return Multiply()([emb1, emb2])
131 | 
132 |     def compute_mask(self, inputs, mask=None):
133 |         return None
134 | 
135 | 
136 | class OneHotEncodingLayer(Layer):
137 |     def __init__(self, num_classes, **kwargs):
138 |         super(OneHotEncodingLayer, self).__init__(**kwargs)
139 |         self.num_classes = num_classes
140 | 
141 |     def call(self, inputs):
142 |         return tf.one_hot(inputs, self.num_classes)
143 | 
144 | 
145 | class Dice(Layer):
146 |     def __init__(self):
147 |         super(Dice, self).__init__()
148 |         self.bn = BatchNormalization(center=False, scale=False)
149 |         self.alpha = self.add_weight(shape=(), dtype=tf.float32, name='alpha')
150 | 
151 |     def call(self, x):
152 |         x_normed = self.bn(x)
153 |         x_p = tf.sigmoid(x_normed)
154 | 
155 |         return self.alpha * (1.0 - x_p) * x + x_p * x
156 | 
157 | 
158 | class MySoftmax(Layer):
159 |     def __init__(self):
160 |         super(MySoftmax, self).__init__()
161 | 
162 |     def call(self, inputs):
163 |         pos_out = inputs['pos']
164 |         neg1_out = inputs['neg1']
165 |         neg2_out = inputs['neg2']
166 |         neg3_out = inputs['neg3']
167 |         neg4_out = inputs['neg4']
168 |         neg5_out = inputs['neg5']
169 |         sum_e_xj = tf.exp(pos_out) + tf.exp(neg1_out) + tf.exp(neg2_out) + tf.exp(neg3_out) + tf.exp(neg4_out) + tf.exp(neg5_out)
170 |         return concatenate([tf.exp(pos_out) / sum_e_xj,
171 |                             tf.exp(neg1_out) / sum_e_xj,
172 |                             tf.exp(neg2_out) / sum_e_xj,
173 |                             tf.exp(neg3_out) / sum_e_xj,
174 |                             tf.exp(neg4_out) / sum_e_xj,
175 |                             tf.exp(neg5_out) / sum_e_xj,
176 |                             ], axis=-1, name='softmax_pred')
177 | 


--------------------------------------------------------------------------------
/LTR/build_feature.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: stefan 2023-02-21
  4 | 
  5 | 
  6 | from layers.tool_layers import *
  7 | from layers.model_layers import GlobalAveragePooling1DSef
  8 | 
  9 | 
 10 | def build_feature_column(city_dict, shangquan_dict, comm_dict, price_dict, area_dict):
 11 |     # define input
 12 |     # user
 13 |     user_city_seq = tf.keras.Input(shape=(5,), name='user_city_seq', dtype=tf.int64)
 14 |     user_shangquan_seq = tf.keras.Input(shape=(5,), name='user_shangquan_seq', dtype=tf.int64)
 15 |     user_comm_seq = tf.keras.Input(shape=(5,), name='user_comm_seq', dtype=tf.int64)
 16 |     user_price_seq = tf.keras.Input(shape=(5,), name='user_price_seq', dtype=tf.int64)
 17 |     user_area_seq = tf.keras.Input(shape=(5,), name='user_area_seq', dtype=tf.int64)
 18 | 
 19 |     # house pos ft
 20 |     city_id = tf.keras.Input(shape=(1,), name='city_id', dtype=tf.int64)
 21 |     comm_id = tf.keras.Input(shape=(1,), name='comm_id', dtype=tf.int64)
 22 | 
 23 | 
 24 |     # house neg n1
 25 |     city_id_n1 = tf.keras.Input(shape=(1,), name='city_id_n1', dtype=tf.int64)
 26 |     comm_id_n1 = tf.keras.Input(shape=(1,), name='comm_id_n1', dtype=tf.int64)
 27 | 
 28 | 
 29 |     # house neg n2
 30 |     city_id_n2 = tf.keras.Input(shape=(1,), name='city_id_n2', dtype=tf.int64)
 31 |     comm_id_n2 = tf.keras.Input(shape=(1,), name='comm_id_n2', dtype=tf.int64)
 32 | 
 33 | 
 34 |     # house neg n3
 35 |     city_id_n3 = tf.keras.Input(shape=(1,), name='city_id_n3', dtype=tf.int64)
 36 |     comm_id_n3 = tf.keras.Input(shape=(1,), name='comm_id_n3', dtype=tf.int64)
 37 | 
 38 | 
 39 |     # house neg n4
 40 |     city_id_n4 = tf.keras.Input(shape=(1,), name='city_id_n4', dtype=tf.int64)
 41 |     comm_id_n4 = tf.keras.Input(shape=(1,), name='comm_id_n4', dtype=tf.int64)
 42 | 
 43 | 
 44 |     # house neg n5
 45 |     city_id_n5 = tf.keras.Input(shape=(1,), name='city_id_n5', dtype=tf.int64)
 46 |     comm_id_n5 = tf.keras.Input(shape=(1,), name='comm_id_n5', dtype=tf.int64)
 47 | 
 48 | 
 49 |     # common emb 区域类特征在底层做交互
 50 |     city_Embedding = Embedding(input_dim=400, output_dim=16, mask_zero=True, name="city_emb")
 51 |     comm_Embedding = Embedding(input_dim=400000, output_dim=32, mask_zero=True, name="comm_emb")
 52 | 
 53 | 
 54 |     # user feature
 55 |     user_city_id_token = VocabLayer(city_dict, 'city_token')(user_city_seq)
 56 |     user_city_emb_seq = city_Embedding(user_city_id_token)  # 以city_id为index取emb  shape(None, 5, emb_size)
 57 |     user_city_emb = GlobalAveragePooling1DSef()(user_city_emb_seq)  # shape(None, emb_size)
 58 | 
 59 | 
 60 |     user_comm_id_token = VocabLayer(comm_dict, 'comm_token')(user_comm_seq)
 61 |     user_comm_emb_seq = comm_Embedding(user_comm_id_token)
 62 |     user_comm_emb = GlobalAveragePooling1DSef()(user_comm_emb_seq)
 63 | 
 64 | 
 65 | 
 66 |     # concat user features
 67 |     user_feature = concatenate([user_city_emb
 68 |                                    # , user_shangquan_emb, user_comm_emb, user_price_emb, user_area_emb
 69 |                                 ], axis=1,
 70 |                                name='user_feature')
 71 | 
 72 |     # house pos features
 73 |     pos_city_id_token = VocabLayer(city_dict, 'pos_city_token')(city_id)
 74 |     pos_city_emb = item_city_Embedding(pos_city_id_token)
 75 |     pos_city_emb = Reshape((16,))(pos_city_emb)
 76 | 
 77 | 
 78 |     pos_comm_id_token = VocabLayer(comm_dict, 'pos_comm_token')(comm_id)
 79 |     pos_comm_emb = item_comm_Embedding(pos_comm_id_token)
 80 |     pos_comm_emb = Reshape((32,))(pos_comm_emb)
 81 | 
 82 | 
 83 | 
 84 |     pos_item_feaure = concatenate([pos_city_emb
 85 |                                    #    , pos_shangquan_emb, pos_comm_emb, pos_price_emb, pos_area_emb,
 86 |                                    # item_floor_emb, item_room_emb, item_hall_emb, item_bathroom_emb, pqs
 87 |                                    ], axis=1,
 88 |                                   name='pos_item_feaure')
 89 | 
 90 |     # house neg1 features
 91 |     neg_city_id_token1 = VocabLayer(city_dict, 'neg_city_id_token1')(city_id_n1)
 92 |     neg_city_emb1 = item_city_Embedding(neg_city_id_token1)
 93 |     neg_city_emb1 = Reshape((16,))(neg_city_emb1)
 94 | 
 95 | 
 96 |     neg_comm_id_token1 = VocabLayer(comm_dict, 'neg_comm_id_token1')(comm_id_n1)
 97 |     neg_comm_emb1 = item_comm_Embedding(neg_comm_id_token1)
 98 |     neg_comm_emb1 = Reshape((32,))(neg_comm_emb1)
 99 | 
100 | 
101 |     neg_item_feature_1 = concatenate([neg_city_emb1
102 |                                       #    , neg_shangquan_emb1, neg_comm_emb1, neg_price_emb1, neg_area_emb1,
103 |                                       # item_floor_emb1, item_room_emb1, item_hall_emb1, item_bathroom_emb1, pqs_n1
104 |                                       ], axis=1,
105 |                                   name='neg_item_feature_1')
106 | 
107 |     # house neg2 features
108 |     neg_city_id_token2 = VocabLayer(city_dict, 'neg_city_id_token2')(city_id_n2)
109 |     neg_city_emb2 = item_city_Embedding(neg_city_id_token2)
110 |     neg_city_emb2 = Reshape((16,))(neg_city_emb2)
111 | 
112 | 
113 |     neg_comm_id_token2 = VocabLayer(comm_dict, 'neg_comm_id_token2')(comm_id_n2)
114 |     neg_comm_emb2 = item_comm_Embedding(neg_comm_id_token2)
115 |     neg_comm_emb2 = Reshape((32,))(neg_comm_emb2)
116 | 
117 | 
118 | 
119 |     neg_item_feature_2 = concatenate([neg_city_emb2, neg_shangquan_emb2, neg_comm_emb2, neg_price_emb2, neg_area_emb2,
120 |                                       item_floor_emb2, item_room_emb2, item_hall_emb2, item_bathroom_emb2, pqs_n2],
121 |                                      axis=1,
122 |                                      name='neg_item_feature_2')
123 | 
124 |     # house neg3 features
125 |     neg_city_id_token3 = VocabLayer(city_dict, 'neg_city_id_token3')(city_id_n3)
126 |     neg_city_emb3 = item_city_Embedding(neg_city_id_token3)
127 |     neg_city_emb3 = Reshape((16,))(neg_city_emb3)
128 | 
129 | 
130 | 
131 |     neg_comm_id_token3 = VocabLayer(comm_dict, 'neg_comm_id_token3')(comm_id_n3)
132 |     neg_comm_emb3 = item_comm_Embedding(neg_comm_id_token3)
133 |     neg_comm_emb3 = Reshape((32,))(neg_comm_emb3)
134 | 
135 | 
136 |     neg_item_feature_3 = concatenate([neg_city_emb3, neg_shangquan_emb3, neg_comm_emb3, neg_price_emb3, neg_area_emb3,
137 |                                       item_floor_emb3, item_room_emb3, item_hall_emb3, item_bathroom_emb3, pqs_n3],
138 |                                      axis=1,
139 |                                      name='neg_item_feature_3')
140 | 
141 |     # house neg4 features
142 |     neg_city_id_token4 = VocabLayer(city_dict, 'neg_city_id_token4')(city_id_n4)
143 |     neg_city_emb4 = item_city_Embedding(neg_city_id_token4)
144 |     neg_city_emb4 = Reshape((16,))(neg_city_emb4)
145 | 
146 | 
147 | 
148 |     neg_comm_id_token4 = VocabLayer(comm_dict, 'neg_comm_id_token4')(comm_id_n4)
149 |     neg_comm_emb4 = item_comm_Embedding(neg_comm_id_token4)
150 |     neg_comm_emb4 = Reshape((32,))(neg_comm_emb4)
151 | 
152 | 
153 | 
154 |     neg_item_feature_4 = concatenate([neg_city_emb4
155 |                                       #    , neg_shangquan_emb4, neg_comm_emb4, neg_price_emb4, neg_area_emb4,
156 |                                       # item_floor_emb4, item_room_emb4, item_hall_emb4, item_bathroom_emb4, pqs_n4
157 |                                       ],
158 |                                      axis=1,
159 |                                      name='neg_item_feature_4')
160 | 
161 |     # house neg5 features
162 |     neg_city_id_token5 = VocabLayer(city_dict, 'neg_city_id_token5')(city_id_n5)
163 |     neg_city_emb5 = item_city_Embedding(neg_city_id_token5)
164 |     neg_city_emb5 = Reshape((16,))(neg_city_emb5)
165 | 
166 | 
167 | 
168 |     neg_comm_id_token5 = VocabLayer(comm_dict, 'neg_comm_id_token5')(comm_id_n5)
169 |     neg_comm_emb5 = item_comm_Embedding(neg_comm_id_token5)
170 |     neg_comm_emb5 = Reshape((32,))(neg_comm_emb5)
171 | 
172 | 
173 |     neg_item_feature_5 = concatenate([neg_city_emb5, neg_shangquan_emb5, neg_comm_emb5, neg_price_emb5, neg_area_emb5,
174 |                                       item_floor_emb5, item_room_emb5, item_hall_emb5, item_bathroom_emb5, pqs_n5],
175 |                                      axis=1,
176 |                                      name='neg_item_feature_5')
177 | 
178 |     user_inputs = [user_city_seq, user_shangquan_seq, user_comm_seq, user_price_seq, user_area_seq]
179 |     pos_house_inputs = [city_id, comm_id, shangquan_id, price_id, area_id, floor_loc, room_num, hall, bathroom, pqs]
180 |     neg_house_1_inputs = [city_id_n1, comm_id_n1, shangquan_id_n1, price_id_n1, area_id_n1, floor_loc_n1, room_num_n1,
181 |                           hall_n1, bathroom_n1, pqs_n1]
182 |     neg_house_2_inputs = [city_id_n2, comm_id_n2, shangquan_id_n2, price_id_n2, area_id_n2, floor_loc_n2, room_num_n2,
183 |                           hall_n2, bathroom_n2, pqs_n2]
184 |     neg_house_3_inputs = [city_id_n3, comm_id_n3, shangquan_id_n3, price_id_n3, area_id_n3, floor_loc_n3, room_num_n3,
185 |                           hall_n3, bathroom_n3, pqs_n3]
186 |     neg_house_4_inputs = [city_id_n4, comm_id_n4, shangquan_id_n4, price_id_n4, area_id_n4, floor_loc_n4, room_num_n4,
187 |                           hall_n4, bathroom_n4, pqs_n4]
188 |     neg_house_5_inputs = [city_id_n5, comm_id_n5, shangquan_id_n5, price_id_n5, area_id_n5, floor_loc_n5, room_num_n5,
189 |                           hall_n5, bathroom_n5, pqs_n5]
190 | 
191 |     result = {
192 |         'user_inputs':user_inputs,
193 |         'pos_house_inputs':pos_house_inputs,
194 |         'neg_house_1_inputs':neg_house_1_inputs,
195 |         'neg_house_2_inputs':neg_house_2_inputs,
196 |         'neg_house_3_inputs':neg_house_3_inputs,
197 |         'neg_house_4_inputs':neg_house_4_inputs,
198 |         'neg_house_5_inputs':neg_house_5_inputs,
199 |         'total_inputs':user_inputs + pos_house_inputs + neg_house_1_inputs + neg_house_2_inputs + neg_house_3_inputs +
200 |                        neg_house_4_inputs + neg_house_5_inputs,
201 |         'user_ft':user_feature,
202 |         'pos_house_ft':pos_item_feaure,
203 |         'neg_house_1_ft':neg_item_feature_1,
204 |         'neg_house_2_ft': neg_item_feature_2,
205 |         'neg_house_3_ft': neg_item_feature_3,
206 |         'neg_house_4_ft': neg_item_feature_4,
207 |         'neg_house_5_ft': neg_item_feature_5
208 |     }
209 | 
210 |     return result
211 | 
212 | 


--------------------------------------------------------------------------------
/model_layers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: stefan 2022-02-28
  4 | # update: 添加注释，完善自定义层 by stefan 2022-03-02
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from keras import initializers, regularizers, constraints
  8 | from keras.backend import expand_dims, repeat_elements, sum
  9 | from keras.layers import *
 10 | from keras.regularizers import l2
 11 | 
 12 | from layers.tool_layers import L2_norm_layer
 13 | 
 14 | """自定义模型层
 15 | Tower, DNN, SENet, DIN-Attention, ResNet, FM, DCN, CIN ..etc
 16 | """
 17 | 
 18 | 
 19 | class FMLayer(Layer):
 20 |     """Factorization Machine models pairwise (order-2) feature interactions
 21 |          without linear term and bias.
 22 |           Input shape
 23 |             - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
 24 |           Output shape
 25 |             - 2D tensor with shape: ``(batch_size, 1)``.
 26 |         usage: FMLayer()(tf.stack(cross_emb_list, axis=1, name='fm_inputs'))
 27 |     """
 28 | 
 29 |     def __init__(self, **kwargs):
 30 |         super(FMLayer, self).__init__(**kwargs)
 31 | 
 32 |     def build(self, input_shape):
 33 |         if len(input_shape) != 3:
 34 |             raise ValueError("Unexpected inputs dimensions % d, expect to be 3 dimensions" % (len(input_shape)))
 35 | 
 36 |         super(FMLayer, self).build(input_shape)
 37 | 
 38 |     def call(self, inputs, *args, **kwargs):
 39 |         concated_embeds_value = inputs
 40 |         # 先求和再平方
 41 |         square_of_sum = tf.square(tf.reduce_sum(concated_embeds_value, axis=1, keepdims=True))
 42 |         # 先平方再求和
 43 |         sum_of_square = tf.reduce_sum(concated_embeds_value * concated_embeds_value, axis=1, keepdims=True)
 44 |         cross_term = square_of_sum - sum_of_square
 45 |         cross_term = 0.5 * tf.reduce_sum(cross_term, axis=2, keepdims=False)
 46 | 
 47 |         return cross_term
 48 | 
 49 |     def compute_output_shape(self, input_shape):
 50 |         return (None, 1)
 51 | 
 52 | 
 53 | class ResNetLayer(Layer):
 54 |     """残差网络，改写卷积为全连接层
 55 |         Input shape
 56 |             - 2D tensor with shape: ``(batch_size, input_dim)``.
 57 |         Output shape
 58 |             - 2D tensor with shape: ``(batch_size, units)``.
 59 |     """
 60 | 
 61 |     def __init__(self, hidden_units=None, **kwargs):
 62 |         super(ResNetLayer, self).__init__(**kwargs)
 63 |         if hidden_units is None:
 64 |             hidden_units = [256, 128, 64]
 65 |         self.hidden_units = hidden_units
 66 |         self.dense_layers = []
 67 |         self.layer_num = len(self.hidden_units)
 68 |         self.relu = ReLU()
 69 |         self.batch_norm = BatchNormalization()
 70 |         self.add = Add()
 71 | 
 72 |     def build(self, input_shape):
 73 |         super(ResNetLayer, self).build(input_shape)
 74 |         for i in range(self.layer_num):
 75 |             dense_layer = Dense(self.hidden_units[i], activation=None)
 76 |             self.dense_layers.append(dense_layer)
 77 |         self.down_sample = Dense(self.hidden_units[self.layer_num - 1], activation=None)  # 最后一层要维度一样，便于最后Add
 78 | 
 79 |     def call(self, inputs, **kwargs):
 80 |         identity = self.down_sample(inputs)
 81 | 
 82 |         net = inputs
 83 |         for i in range(self.layer_num):
 84 |             net = self.dense_layers[i](net)
 85 |             if i == 0:
 86 |                 net = self.batch_norm(net)
 87 |             if i != self.layer_num - 1:
 88 |                 net = self.relu(net)
 89 | 
 90 |         output = self.relu(self.add([net, identity]))
 91 |         return output
 92 | 
 93 | 
 94 | class Tower(Layer):
 95 |     def __init__(self,
 96 |                  layer_num,
 97 |                  layer_units,
 98 |                  activation,
 99 |                  **kwargs):
100 |         super(Tower, self).__init__(**kwargs)
101 |         self.tower_layers = []
102 |         self.layer_num = layer_num
103 |         self.layer_units = layer_units
104 |         self.activation = activation
105 | 
106 |     def build(self, input_shape):
107 |         super(Tower, self).build(input_shape)
108 |         for i in range(self.layer_num):
109 |             dense_layer = Dense(self.layer_units[i], activation=self.activation)
110 |             self.tower_layers.append(dense_layer)
111 | 
112 |     def call(self, inputs, **kwargs):
113 |         net = inputs
114 |         for layer in self.tower_layers:
115 |             net = layer(net)
116 |         net = Dropout(0.3)(net)
117 |         return net
118 | 
119 | 
120 | class SENetLayer(Layer):
121 |     def __init__(self, last_shape, reduction=4, **kwargs):
122 |         super(SENetLayer, self).__init__(**kwargs)
123 |         self.reduction = reduction
124 |         self.last_shape = last_shape
125 |         self.excitation_layer = Dense(self.last_shape, activation=tf.keras.activations.hard_sigmoid)
126 |         self.squeeze_layer = Dense(self.last_shape // self.reduction, activation='relu')
127 | 
128 |     def call(self, inputs, **kwargs):
129 |         net = self.squeeze_layer(inputs)
130 |         net = self.excitation_layer(net)
131 |         return net  # senet层输出的特征裁判值
132 | 
133 | 
134 | class Linear(Layer):
135 |     def __init__(self, feature_length, w_reg=1e-6):
136 |         """
137 |         Linear Layer
138 |         Input:
139 |             - feature_length: A scalar. The length of features.
140 |             - w_reg: A scalar. The regularization coefficient of parameter w.
141 |         """
142 |         super(Linear, self).__init__()
143 |         self.feature_length = feature_length
144 |         self.w_reg = w_reg
145 | 
146 |     def build(self, input_shape):
147 |         self.w = self.add_weight(name="w",
148 |                                  shape=(self.feature_length, 1),
149 |                                  regularizer=l2(self.w_reg),
150 |                                  trainable=True)
151 | 
152 |     def call(self, inputs, **kwargs):
153 |         result = tf.reduce_sum(tf.nn.embedding_lookup(self.w, inputs), axis=1)  # (batch_size, 1)
154 |         return result
155 | 
156 | 
157 | class MyDense(Layer):
158 |     def __init__(self, units, **kwargs):
159 |         super(MyDense, self).__init__(**kwargs)
160 |         self.units = units
161 | 
162 |     def build(self, input_shape):
163 |         super(MyDense, self).build(input_shape)  # 相当于设置self.build = True
164 |         self.w = self.add_weight(shape=(input_shape[-1], self.units),
165 |                                  initializer='random_normal',
166 |                                  trainable=True,
167 |                                  name='w')
168 |         self.b = self.add_weight(shape=(self.units,),
169 |                                  initializer='random_normal',
170 |                                  trainable=True,
171 |                                  name='b')
172 | 
173 |     def call(self, inputs, **kwargs):
174 |         return tf.keras.activations.sigmoid(tf.matmul(inputs, self.w) + self.b)
175 | 
176 | 
177 | class DNNLayer(Layer):
178 |     def __init__(self, layer_units, dropout_rate=0.3, **kwargs):
179 |         super(DNNLayer, self).__init__(**kwargs)
180 |         self.layer_units = layer_units
181 |         self.batch_norm = BatchNormalization()
182 |         self.dropout_rate = dropout_rate
183 |         self.dense_layers = []
184 | 
185 |     def build(self, input_shape):
186 |         super(DNNLayer, self).build(input_shape)
187 |         for i in range(len(self.layer_units)):
188 |             dense_layer = Dense(self.layer_units[i], activation='relu')
189 |             self.dense_layers.append(dense_layer)
190 | 
191 |     def call(self, inputs, **kwargs):
192 |         net = inputs
193 |         for i in range(len(self.dense_layers)):
194 |             net = self.dense_layers[i](net)
195 |             if i == 0:
196 |                 net = self.batch_norm(net)  # batch_norm加在第一层的输入的线性变换后，激活函数(Relu)之后
197 |         net = Dropout(self.dropout_rate)(net)
198 |         return net
199 | 
200 | 
201 | class UserRepresentationLayer(Layer):
202 |     def __init__(self, **kwargs):
203 |         super(UserRepresentationLayer, self).__init__(**kwargs)
204 |         self.ActivationSumPoolingFromDIN = ActivationSumPoolingFromDIN()
205 | 
206 |     def call(self, inputs, **kwargs):
207 |         em, eu, Xu = inputs
208 |         ru_ = self.ActivationSumPoolingFromDIN([Xu, em])
209 | 
210 |         # ru: user representation
211 |         ru = concatenate([ru_, eu], axis=-1)
212 |         return ru
213 | 
214 | 
215 | class UserMatchLayer(Layer):
216 |     def __init__(self, **kwargs):
217 |         super(UserMatchLayer, self).__init__(**kwargs)
218 |         self.l2_norm_layer = L2_norm_layer(axis=-1)
219 | 
220 |     def relavant_unit(self, ru, r_ul):
221 |         ru_norm = self.l2_norm_layer(ru)
222 |         r_ul_norm = self.l2_norm_layer(r_ul)
223 |         a_l = tf.reduce_sum(tf.multiply(ru_norm, r_ul_norm), axis=1, keepdims=True)
224 | 
225 |         relavant = {'relavant': tf.multiply(a_l, r_ul),
226 |                   'a_l': a_l
227 |                   }
228 |         return relavant
229 | 
230 |     def call(self, inputs, **kwargs):
231 |         ru, ru1, ru2, ru3 = inputs
232 |         ru_u1 = self.relavant_unit(ru, ru1)
233 |         ru_u2 = self.relavant_unit(ru, ru2)
234 |         ru_u3 = self.relavant_unit(ru, ru3)
235 | 
236 |         result = {'Su': ru_u1['relavant'] + ru_u2['relavant'] + ru_u3['relavant'],
237 |                   'Ru': ru_u1['a_l'] + ru_u2['a_l'] + ru_u3['a_l']
238 |                   }
239 |         return result
240 | 
241 | 
242 | class TextCNNLayer(Layer):
243 |     def __init__(self, filters, kernel_size, hidden_units, **kwargs):
244 |         super(TextCNNLayer, self).__init__(**kwargs)
245 |         self.filters = filters
246 |         self.kernel_size = kernel_size
247 |         self.hidden_units = hidden_units
248 |         self.convs = []
249 |         self.max_pools = []
250 |         for i in range(len(self.kernel_size)):
251 |             self.kernel_size[i] = int(self.kernel_size[i]) if not isinstance(self.kernel_size[i], int) else self.kernel_size[i]
252 |             conv_layer = Conv1D(filters=self.filters, kernel_size=self.kernel_size[i], padding='same', strides=1, activation='relu')
253 |             max_pool = MaxPooling1D(pool_size=self.kernel_size[i], padding='same')
254 |             self.convs.append(conv_layer)
255 |             self.max_pools.append(max_pool)
256 |         self.batch_norm = BatchNormalization()
257 |         self.dense_layer = Dense(self.hidden_units, activation='relu')
258 | 
259 |     def call(self, inputs, **kwargs):
260 |         cnn_i = []
261 |         for i in range(len(self.convs)):
262 |             x = self.convs[i](inputs)  # 每次对inputs做不同尺度的卷积
263 |             x = self.max_pools[i](x)
264 |             cnn_i.append(Flatten()(x))
265 | 
266 |         cnn = concatenate(cnn_i, axis=-1)
267 | 
268 |         drop = Dropout(0.3)(cnn)
269 |         out = self.dense_layer(drop)
270 |         return out
271 | 
272 | 
273 | class Attention_Layer(Layer):
274 |     def __init__(self, att_hidden_units, activation='relu'):
275 |         """
276 |             Input shape
277 |                 - query: 2D tensor with shape: ``(batch_size, input_dim)``.
278 |                 - key: 3D tensor with shape: ``(batch_size, seq_len, input_dim)``.
279 |                 - value: 3D tensor with shape: ``(batch_size, seq_len, input_dim)``.
280 |             Output shape
281 |                 - 2D tensor with shape: ``(batch_size, input_dim)``.
282 |         """
283 |         super(Attention_Layer, self).__init__()
284 |         self.att_dense = []
285 |         self.att_hidden_units = att_hidden_units
286 |         self.activation = activation
287 |         self.att_final_dense = Dense(1)
288 |         self.supports_masking = True
289 | 
290 |     def build(self, input_shape):
291 |         super(Attention_Layer, self).build(input_shape)
292 |         for i in range(len(self.att_hidden_units)):
293 |             self.att_dense.append(Dense(self.att_hidden_units[i], activation=self.activation))
294 | 
295 |     def call(self, inputs, mask=None, **kwargs):
296 |         # query: candidate item  (None, d * 2), d is the dimension of embedding
297 |         # key: hist items  (None, seq_len, d * 2)
298 |         # value: hist items  (None, seq_len, d * 2)
299 |         q, k, v = inputs
300 |         q = tf.tile(q, multiples=[1, k.shape[1]])  # (None, seq_len * d * 2)
301 |         q = tf.reshape(q, shape=[-1, k.shape[1], k.shape[2]])  # (None, seq_len, d * 2)
302 | 
303 |         # q, k, out product should concat
304 |         info = tf.concat([q, k, q - k, q * k], axis=-1)
305 | 
306 |         # dense
307 |         for dense in self.att_dense:
308 |             info = dense(info)
309 | 
310 |         outputs = self.att_final_dense(info)  # (None, seq_len, 1)
311 |         outputs = tf.squeeze(outputs, axis=-1)  # (None, seq_len)
312 | 
313 |         if mask:
314 |             paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)  # (None, seq_len)  填充 -inf
315 |             outputs = tf.where(tf.equal(mask, 0), paddings, outputs)  # (None, seq_len)
316 |         # softmax
317 |         outputs = tf.nn.softmax(logits=outputs)  # (None, seq_len)
318 |         outputs = tf.expand_dims(outputs, axis=1)  # None, 1, seq_len)
319 | 
320 |         outputs = tf.matmul(outputs, v)  # (None, 1, d * 2)
321 |         outputs = tf.squeeze(outputs, axis=1)
322 | 
323 |         return outputs
324 | 
325 | 
326 | class SelfAttention_Layer(Layer):
327 |     def __init__(self):
328 |         super(SelfAttention_Layer, self).__init__()
329 | 
330 |     def build(self, input_shape):
331 |         self.dim = input_shape[0][-1]
332 |         self.W = self.add_weight(shape=[self.dim, self.dim], name='weight',
333 |             initializer='random_uniform')
334 | 
335 |     def call(self, inputs, mask=None, **kwargs):
336 |         q, k, v = inputs
337 |         # pos encoding
338 |         k += self.positional_encoding(k)
339 |         q += self.positional_encoding(q)
340 |         # Nonlinear transformation
341 |         q = tf.nn.relu(tf.matmul(q, self.W))  # (None, seq_len, dim)
342 |         k = tf.nn.relu(tf.matmul(k, self.W))  # (None, seq_len, dim)
343 |         mat_qk = tf.matmul(q, k, transpose_b=True)  # (None, seq_len, seq_len)
344 |         dk = tf.cast(self.dim, dtype=tf.float32)
345 |         # Scaled
346 |         scaled_att_logits = mat_qk / tf.sqrt(dk)
347 |         # Mask
348 |         if mask:
349 |             mask = tf.tile(tf.expand_dims(mask, 1), [1, q.shape[1], 1])  # (None, seq_len, seq_len)
350 |             paddings = tf.ones_like(scaled_att_logits) * (-2 ** 32 + 1)
351 |             outputs = tf.where(tf.equal(mask, 0), paddings, scaled_att_logits)  # (None, seq_len, seq_len)
352 |         # softmax
353 |         outputs = tf.nn.softmax(logits=outputs, axis=-1)  # (None, seq_len, seq_len)
354 |         # output
355 |         outputs = tf.matmul(outputs, v)  # (None, seq_len, dim)
356 |         outputs = tf.reduce_mean(outputs, axis=1)  # (None, dim)
357 |         return outputs
358 | 
359 |     @staticmethod
360 |     def get_angles(pos, i, d_model):
361 |         angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
362 |         return pos * angle_rates
363 | 
364 |     def positional_encoding(self, QK_input):
365 |         angle_rads = self.get_angles(np.arange(QK_input.shape[1])[:, np.newaxis],
366 |                                 np.arange(self.dim)[np.newaxis, :], self.dim)
367 |         angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
368 |         angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
369 |         pos_encoding = angle_rads[np.newaxis, ...]
370 | 
371 |         return tf.cast(pos_encoding, dtype=tf.float32)
372 | 
373 | 
374 | class BiLSTM_Attention_Layer(Layer):
375 |     def __init__(self, lstm_units=None, **kwargs):
376 |         super(BiLSTM_Attention_Layer, self).__init__(**kwargs)
377 |         self.lstm_units = lstm_units
378 |         self.bi_lstm1 = Bidirectional(LSTM(lstm_units, return_sequences=True), merge_mode='concat')
379 |         self.bi_lstm2 = Bidirectional(LSTM(lstm_units))
380 | 
381 |     def call(self, inputs, **kwargs):
382 |         inputs = Reshape((-1, inputs.shape[1]))(inputs)
383 |         bilstm_out1 = self.bi_lstm1(inputs)
384 |         bilstm_out2 = self.bi_lstm2(bilstm_out1)
385 |         #att_out = self.self_att(bilstm_out2)
386 |         return bilstm_out2
387 | 
388 | 
389 | class ActivationSumPoolingFromDIN(Layer):
390 |     def __init__(self, att_hidden_units=[64, 32], att_activation='relu'):
391 |         """
392 |         用户行为序列对候选集做atten，然后sum pooling
393 |         """
394 |         super(ActivationSumPoolingFromDIN, self).__init__()
395 | 
396 |         # attention layer
397 |         self.attention_layer = Attention_Layer(att_hidden_units, att_activation)
398 | 
399 |         self.bn = BatchNormalization(trainable=True)
400 | 
401 |     def call(self, inputs, **kwargs):
402 |         seq_embed, item_embed = inputs
403 |         user_interest_sum_pool = self.attention_layer([item_embed, seq_embed, seq_embed])
404 | 
405 |         # concat user_info(att hist), cadidate item embedding
406 |         info_all = tf.concat([user_interest_sum_pool, item_embed], axis=-1)
407 |         info_all = self.bn(info_all)
408 |         return info_all
409 | 
410 | 
411 | class MultiHeadSelfAttention(Layer):
412 |     def __init__(self, num_units, num_heads=8, dropout_rate=0, **kwargs):
413 |         """
414 |             Applies multi-head attention.
415 |                 Args:
416 |                   queries: A 3d tensor with shape of [N, T_q, C_q].
417 |                   keys: A 3d tensor with shape of [N, T_k, C_k].
418 |                   values: A 3d tensor with shape of [N, T_v, C_v]
419 |                   num_units: A scalar. Attention size.
420 |                   dropout_rate: A floating point number.
421 |                   num_heads: An int. Number of heads.
422 |                 Returns
423 |                   A 3d tensor with shape of (N, T_q, C)
424 |         """
425 |         super(MultiHeadSelfAttention, self).__init__(**kwargs)
426 |         self.num_units = num_units
427 |         self.num_heads = num_heads
428 |         self.dropout_rate = dropout_rate
429 |         self.dense_q = Dense(units=self.num_units, use_bias=False, activation='relu')
430 |         self.dense_k = Dense(units=self.num_units, use_bias=False, activation='relu')
431 |         self.dense_v = Dense(units=self.num_units, use_bias=False, activation='relu')
432 | 
433 |     def call(self, inputs, **kwargs):
434 |         queries, keys, values = inputs
435 |         Q = self.dense_q(queries)
436 |         K = self.dense_k(keys)
437 |         V = self.dense_v(values)
438 | 
439 |         # Split and concat
440 |         Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0)  # (h*N, T_q, C/h)
441 |         K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)
442 |         V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)
443 | 
444 |         # Multiplication
445 |         outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)
446 | 
447 |         # Scale
448 |         outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
449 | 
450 |         # Activation
451 |         outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)
452 | 
453 |         # Dropouts
454 |         outputs = Dropout(self.dropout_rate)(outputs)
455 | 
456 |         # Weighted sum
457 |         outputs = tf.matmul(outputs, V_)  # ( h*N, T_q, C/h)
458 | 
459 |         # Restore shape
460 |         outputs = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2)  # (N, T_q, C)
461 | 
462 |         # Residual connection
463 |         outputs += queries
464 | 
465 |         return outputs
466 | 
467 | 
468 | class DeepCrossLayer(Layer):
469 |     def __init__(self, layer_num, embed_dim, output_dim=0, **kwargs):
470 |         """
471 |             DCN Model implements
472 |             usage: DeepCrossLayer(2, item_feature.shape[-1], name="deep_cross_features")(item_feature)
473 |         """
474 |         super(DeepCrossLayer, self).__init__(**kwargs)
475 |         self.layer_num = layer_num
476 |         self.embed_dim = embed_dim
477 | 
478 |         self.w = []
479 |         self.b = []
480 |         for i in range(self.layer_num):
481 |             self.w.append(tf.Variable(lambda: tf.random.truncated_normal(shape=(self.embed_dim,), stddev=0.01)))
482 |             self.b.append(tf.Variable(lambda: tf.zeros(shape=(embed_dim,))))
483 | 
484 |         self.output_dim = output_dim
485 |         self.dense = Dense(units=self.output_dim, use_bias=False)
486 | 
487 |     def cross_layer(self, inputs, i):
488 |         x0, xl = inputs
489 |         # feature crossing
490 |         x1_T = tf.reshape(xl, [-1, 1, self.embed_dim])
491 |         x_lw = tf.tensordot(x1_T, self.w[i], axes=1)
492 |         cross = x0 * x_lw
493 |         return cross + self.b[i] + xl
494 | 
495 |     def call(self, inputs, **kwargs):
496 |         xl = inputs
497 |         for i in range(self.layer_num):
498 |             xl = self.cross_layer([inputs, xl], i)
499 |         if self.output_dim > 0:
500 |             xl = self.dense(xl)
501 |         return xl
502 | 
503 | 
504 | class CINLayer(Layer):
505 |     def __init__(self, cin_size=[64, 64], l2_reg=1e-4, **kwargs):
506 |         """CIN Model implements
507 |         ** only for sparse feature **
508 | 
509 |             Input
510 |                 - cin_size: A list. [H_1, H_2 ,..., H_k], a list of the number of layers
511 |                 - l2_reg: A scalar. L2 regularization.
512 |                 - inputs tensor 3-D (batch_size, field_nums, emb_sizes)
513 |             usage: CINLayer()(tf.stack([item_shangquan_emb, item_comm_emb], axis=1), name='cin_features')
514 |         """
515 |         super(CINLayer, self).__init__(**kwargs)
516 |         self.cin_size = cin_size
517 |         self.l2_reg = l2_reg
518 | 
519 |     def build(self, input_shape):
520 |         # get the number of embedding fields
521 |         self.embedding_nums = input_shape[1]
522 |         # a list of the number of CIN
523 |         self.field_nums = [self.embedding_nums] + self.cin_size
524 |         # filters
525 |         self.cin_W = {
526 |             'CIN_W_' + str(i): self.add_weight(
527 |                 name='CIN_W_' + str(i),
528 |                 shape=(1, self.field_nums[0] * self.field_nums[i], self.field_nums[i + 1]),
529 |                 initializer='random_normal',
530 |                 regularizer=l2(self.l2_reg),
531 |                 trainable=True)
532 |             for i in range(len(self.field_nums) - 1)
533 |         }
534 | 
535 |     def call(self, inputs, **kwargs):
536 |         dim = inputs.shape[-1]
537 |         hidden_layers_results = [inputs]
538 |         # split dimension 2 for convenient calculation
539 |         split_X_0 = tf.split(hidden_layers_results[0], dim, 2)  # dim * (None, field_nums[0], 1)
540 |         for idx, size in enumerate(self.cin_size):
541 |             split_X_K = tf.split(hidden_layers_results[-1], dim, 2)  # dim * (None, filed_nums[i], 1)
542 | 
543 |             result_1 = tf.matmul(split_X_0, split_X_K, transpose_b=True)  # (dim, None, field_nums[0], field_nums[i])
544 | 
545 |             result_2 = tf.reshape(result_1, shape=[dim, -1, self.embedding_nums * self.field_nums[idx]])
546 | 
547 |             result_3 = tf.transpose(result_2, perm=[1, 0, 2])  # (None, dim, field_nums[0] * field_nums[i])
548 | 
549 |             result_4 = tf.nn.conv1d(input=result_3, filters=self.cin_W['CIN_W_' + str(idx)], stride=1,
550 |                                     padding='VALID')
551 | 
552 |             result_5 = tf.transpose(result_4, perm=[0, 2, 1])  # (None, field_num[i+1], dim)
553 | 
554 |             hidden_layers_results.append(result_5)
555 | 
556 |         final_results = hidden_layers_results[1:]
557 |         result = tf.concat(final_results, axis=1)  # (None, H_1 + ... + H_K, dim)
558 |         result = tf.reduce_sum(result, axis=-1)  # (None, dim)
559 | 
560 |         return result
561 | 
562 | 
563 | def parallel_layer(num_layer, layer_units, mlp_inputs, fm_inputs, dcn_inputs, cin_inputs):
564 |     """
565 |         腾讯信息流推荐排序中的并联双塔CTR结构
566 |         复现参考 add by stefan
567 |     """
568 |     mlp_features = Tower(layer_num=num_layer, layer_units=layer_units,
569 |                          activation=tf.nn.leaky_relu)(mlp_inputs)
570 |     fm_features = FMLayer()(fm_inputs)
571 |     dcn_features = DeepCrossLayer(2, dcn_inputs.shape[-1], int(layer_units[-1]))(dcn_inputs)
572 |     cin_features = CINLayer(cin_size=[32, 32])(cin_inputs)
573 | 
574 |     # concat dnn_out and dcn_out
575 |     mlp_dcn_features = concatenate([mlp_features, dcn_features], axis=-1)
576 | 
577 |     return mlp_dcn_features, fm_features, cin_features
578 | 
579 | 
580 | class GlobalAveragePooling1DSef(Layer):
581 |     def __init__(self, data_format='channels_last', keepdims=False, **kwargs):
582 |         super(GlobalAveragePooling1DSef, self).__init__(**kwargs)
583 |         self.data_format = data_format
584 |         self.supports_masking = True
585 |         self.keepdims = keepdims
586 | 
587 |     def call(self, inputs, mask=None, **kwargs):
588 |         steps_axis = 1 if self.data_format == 'channels_last' else 2
589 |         if mask is not None:
590 |             mask = tf.cast(mask, inputs[0].dtype)
591 |             mask = tf.expand_dims(
592 |                 mask, 2 if self.data_format == 'channels_last' else 1)
593 |             inputs *= mask
594 |             return tf.reduce_sum(
595 |                 inputs, axis=steps_axis,
596 |                 keepdims=self.keepdims) / tf.maximum(1.0, tf.reduce_sum(
597 |                 mask, axis=steps_axis, keepdims=self.keepdims))
598 |         else:
599 |             return tf.reduce_mean(inputs, axis=steps_axis, keepdims=self.keepdims)
600 | 
601 |     def compute_mask(self, inputs, mask=None):
602 |         return None
603 | 
604 |     def get_config(self):
605 |         config = super().get_config()
606 |         config.update({
607 |             "data_format": self.data_format,
608 |             "keepdims": self.keepdims,
609 |         })
610 |         return config
611 | 
612 | 
613 | class MMoE(Layer):
614 |     """
615 |     Multi-gate Mixture-of-Experts model.
616 |     """
617 | 
618 |     def __init__(self,
619 |                  units,
620 |                  num_experts,
621 |                  num_tasks,
622 |                  use_expert_bias=True,
623 |                  use_gate_bias=True,
624 |                  expert_activation='relu',
625 |                  gate_activation='softmax',
626 |                  expert_bias_initializer='zeros',
627 |                  gate_bias_initializer='zeros',
628 |                  expert_bias_regularizer=None,
629 |                  gate_bias_regularizer=None,
630 |                  expert_bias_constraint=None,
631 |                  gate_bias_constraint=None,
632 |                  expert_kernel_initializer='VarianceScaling',
633 |                  gate_kernel_initializer='VarianceScaling',
634 |                  expert_kernel_regularizer=None,
635 |                  gate_kernel_regularizer=None,
636 |                  expert_kernel_constraint=None,
637 |                  gate_kernel_constraint=None,
638 |                  activity_regularizer=None,
639 |                  **kwargs):
640 |         """
641 |          Method for instantiating MMoE layer.
642 |         :param units: Number of hidden units
643 |         :param num_experts: Number of experts
644 |         :param num_tasks: Number of tasks
645 |         :param use_expert_bias: Boolean to indicate the usage of bias in the expert weights
646 |         :param use_gate_bias: Boolean to indicate the usage of bias in the gate weights
647 |         :param expert_activation: Activation function of the expert weights
648 |         :param gate_activation: Activation function of the gate weights
649 |         :param expert_bias_initializer: Initializer for the expert bias
650 |         :param gate_bias_initializer: Initializer for the gate bias
651 |         :param expert_bias_regularizer: Regularizer for the expert bias
652 |         :param gate_bias_regularizer: Regularizer for the gate bias
653 |         :param expert_bias_constraint: Constraint for the expert bias
654 |         :param gate_bias_constraint: Constraint for the gate bias
655 |         :param expert_kernel_initializer: Initializer for the expert weights
656 |         :param gate_kernel_initializer: Initializer for the gate weights
657 |         :param expert_kernel_regularizer: Regularizer for the expert weights
658 |         :param gate_kernel_regularizer: Regularizer for the gate weights
659 |         :param expert_kernel_constraint: Constraint for the expert weights
660 |         :param gate_kernel_constraint: Constraint for the gate weights
661 |         :param activity_regularizer: Regularizer for the activity
662 |         :param kwargs: Additional keyword arguments for the Layer class
663 |         """
664 |         super(MMoE, self).__init__(**kwargs)
665 | 
666 |         # Hidden nodes parameter
667 |         self.units = units
668 |         self.num_experts = num_experts
669 |         self.num_tasks = num_tasks
670 | 
671 |         # Weight parameter
672 |         self.expert_kernels = None
673 |         self.gate_kernels = None
674 |         self.expert_kernel_initializer = initializers.get(expert_kernel_initializer)
675 |         self.gate_kernel_initializer = initializers.get(gate_kernel_initializer)
676 |         self.expert_kernel_regularizer = regularizers.get(expert_kernel_regularizer)
677 |         self.gate_kernel_regularizer = regularizers.get(gate_kernel_regularizer)
678 |         self.expert_kernel_constraint = constraints.get(expert_kernel_constraint)
679 |         self.gate_kernel_constraint = constraints.get(gate_kernel_constraint)
680 | 
681 |         # Activation parameter
682 |         # self.expert_activation = activations.get(expert_activation)
683 |         self.expert_activation = expert_activation
684 |         self.gate_activation = gate_activation
685 | 
686 |         # Bias parameter
687 |         self.expert_bias = None
688 |         self.gate_bias = None
689 |         self.use_expert_bias = use_expert_bias
690 |         self.use_gate_bias = use_gate_bias
691 |         self.expert_bias_initializer = initializers.get(expert_bias_initializer)
692 |         self.gate_bias_initializer = initializers.get(gate_bias_initializer)
693 |         self.expert_bias_regularizer = regularizers.get(expert_bias_regularizer)
694 |         self.gate_bias_regularizer = regularizers.get(gate_bias_regularizer)
695 |         self.expert_bias_constraint = constraints.get(expert_bias_constraint)
696 |         self.gate_bias_constraint = constraints.get(gate_bias_constraint)
697 | 
698 |         # Activity parameter
699 |         self.activity_regularizer = regularizers.get(activity_regularizer)
700 | 
701 |         self.expert_layers = []
702 |         self.gate_layers = []
703 |         for i in range(self.num_experts):
704 |             self.expert_layers.append(Dense(self.units, activation=self.expert_activation,
705 |                                             use_bias=self.use_expert_bias,
706 |                                             kernel_initializer=self.expert_kernel_initializer,
707 |                                             kernel_regularizer=self.expert_kernel_regularizer,
708 |                                             bias_regularizer=self.expert_bias_regularizer,
709 |                                             activity_regularizer=None,
710 |                                             kernel_constraint=self.expert_kernel_constraint,
711 |                                             bias_constraint=self.expert_bias_constraint))
712 |         for i in range(self.num_tasks):
713 |             self.gate_layers.append(Dense(self.num_experts, activation=self.gate_activation,
714 |                                           use_bias=self.use_gate_bias,
715 |                                           kernel_initializer=self.gate_kernel_initializer,
716 |                                           kernel_regularizer=self.gate_kernel_regularizer,
717 |                                           bias_regularizer=self.gate_bias_regularizer,
718 |                                           activity_regularizer=None,
719 |                                           kernel_constraint=self.gate_kernel_constraint,
720 |                                           bias_constraint=self.gate_bias_constraint))
721 | 
722 |     def call(self, inputs, **kwargs):
723 |         """
724 |         Method for the forward function of the layer.
725 |         :param inputs: Input tensor
726 |         :param kwargs: Additional keyword arguments for the base method
727 |         :return: A tensor
728 |         """
729 |         # assert input_shape is not None and len(input_shape) >= 2
730 | 
731 |         expert_outputs, gate_outputs, final_outputs = [], [], []
732 |         for expert_layer in self.expert_layers:
733 |             expert_output = expand_dims(expert_layer(inputs), axis=2)
734 |             expert_outputs.append(expert_output)
735 |         expert_outputs = tf.concat(expert_outputs, 2)
736 | 
737 |         for gate_layer in self.gate_layers:
738 |             gate_outputs.append(gate_layer(inputs))
739 | 
740 |         for gate_output in gate_outputs:
741 |             expanded_gate_output = expand_dims(gate_output, axis=1)
742 |             aa = repeat_elements(expanded_gate_output, self.units, axis=1)
743 |             weighted_expert_output = expert_outputs * aa
744 |             bb = sum(weighted_expert_output, axis=2)
745 |             final_outputs.append(bb)
746 |         # 返回的矩阵维度 num_tasks * batch * units
747 | 
748 |         return final_outputs
749 | 


--------------------------------------------------------------------------------