├── README.md
├── deepfm.py
├── features.py
├── textdeepfm.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # [智源 - 看山杯 专家发现算法大赛 2019 ](https://www.biendata.com/competition/zhihu2019/)
 2 | 
 3 | ## 文件说明
 4 | 
 5 | features.py : 从原始数据集中生成特征   
 6 | deepfm.py: 运行deepfm模型并取得预测结果  
 7 | textdeepfm.py: 运行textdeepfm模型并取得预测结果  
 8 | utils.py: deepctr运行环境
 9 | 
10 | ## 模型说明
11 |  
12 | 模型一：DeepFM
13 | 
14 | 运行环境： DeepCTR-Torch (https://github.com/shenweichen/DeepCTR-Torch)
15 | 
16 | 分数（AUC）：线下0.6903   线上0.691804111317667 （没有采用五折交叉验证，可能还没收敛，设置epoch大些）
17 | 
18 | 特征说明
19 |  
20 | **1.用户特征**  
21 | 用户原始特征：gender、frequency、A1、...  
22 | 用户关注和感兴趣的topics数目
23 | 
24 | **2.问题特征 ** 
25 | 问题标题的字、词计数   
26 | 问题描述的字、词计数   
27 | 问题绑定的topic数目
28 | 
29 | **3.用户问题交叉特征**  
30 | 用户关注、感兴趣的话题和问题绑定的话题交集计数   
31 | 邀请距离问题创建的天数
32 | 
33 | 
34 | ---
35 | 
36 | 模型二：TextDeepFM
37 | 
38 | 模型说明：在DeepFM基础上增加文本特征，将用户感兴趣的话题作为用户的embedding，将问题绑定的话题作为问题的embedding。
39 | 文本特征利用TextCNN作为特征提取器，提取的特征和原始特征向量拼接一起传给DNN训练。
40 | 
41 | 分数（AUC）：线下0.7251481779973195  线上0.701741036192302  （没有采用五折交叉验证）
42 | 
43 | ---
44 | 
45 | 
46 | **最后，有意向共同参赛、共同学习的伙伴请添加个人微信号**
47 | ![image](https://img-blog.csdnimg.cn/20191016100122904.jpg?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzMwMzc0NTQ5,size_16,color_FFFFFF,t_70)
48 | 
49 | 


--------------------------------------------------------------------------------
/deepfm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 模型：DeepFM
  3 | 
  4 | 运行环境： DeepCTR-Torch (https://github.com/shenweichen/DeepCTR-Torch)
  5 | 
  6 | 
  7 | 特征说明
  8 | 
  9 | 1.用户特征
 10 | 用户原始特征：gender、frequency、A1、...
 11 | 用户关注和感兴趣的topics数目
 12 | 
 13 | 2.问题特征
 14 | 问题标题的字、词计数
 15 | 问题描述的字、词计数
 16 | 问题绑定的topic数目
 17 | 
 18 | 3.用户问题交叉特征
 19 | 用户关注、感兴趣的话题和问题绑定的话题交集计数
 20 | 邀请距离问题创建的天数
 21 | 
 22 | """
 23 | 
 24 | import pandas as pd
 25 | import numpy as np
 26 | from collections import OrderedDict, namedtuple
 27 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 28 | from sklearn.model_selection import train_test_split
 29 | from itertools import chain
 30 | import torch
 31 | import time
 32 | import matplotlib.pyplot as plt
 33 | import torch.nn as nn
 34 | import torch.nn.functional as F
 35 | import torch.utils.data as Data
 36 | from sklearn.metrics import *
 37 | from torch.utils.data import DataLoader
 38 | from tqdm import tqdm
 39 | from sklearn.preprocessing import LabelEncoder
 40 | import math
 41 | import itertools
 42 | from utils import *
 43 | 
 44 | """ 运行DeepFM """
 45 | 
 46 | path = 'E:\\competition\\看山杯\\data\\data_set_0926\\'
 47 | train = pd.read_csv(path + 'train.txt', sep='\t')
 48 | test = pd.read_csv(path + 'test.txt', sep='\t')
 49 | 
 50 | # 测试
 51 | # train = train[0:10000]
 52 | # test = test[0:10000]
 53 | 
 54 | # print(train.head())
 55 | data = pd.concat([train, test], ignore_index=True, sort=False)
 56 | # print(data.head())
 57 | 
 58 | 
 59 | # 单值类别特征
 60 | fixlen_category_columns = ['m_sex', 'm_access_frequencies', 'm_twoA', 'm_twoB', 'm_twoC', 'm_twoD', 'm_twoE', 'm_categoryA', 'm_categoryA',
 61 |                            'm_categoryB', 'm_categoryC', 'm_categoryD', 'm_categoryE', 'm_num_interest_topic', 'num_topic_attention_intersection',
 62 |                            'q_num_topic_words', 'num_topic_interest_intersection'
 63 |                          ]
 64 | # 数值特征
 65 | fixlen_number_columns = ['m_salt_score', 'm_num_atten_topic', 'q_num_title_chars_words', 'q_num_desc_chars_words', 'q_num_desc_words', 'q_num_title_words',
 66 |                          'days_to_invite'
 67 |                         ]
 68 | 
 69 | target = ['label']
 70 | 
 71 | data[fixlen_category_columns] = data[fixlen_category_columns].fillna('-1', )
 72 | data[fixlen_number_columns] = data[fixlen_number_columns].fillna(0, )
 73 | 
 74 | for feat in fixlen_category_columns:
 75 |     lbe = LabelEncoder()
 76 |     data[feat] = lbe.fit_transform(data[feat])
 77 | 
 78 | mms = MinMaxScaler(feature_range=(0, 1))
 79 | data[fixlen_number_columns] = mms.fit_transform(data[fixlen_number_columns])
 80 | 
 81 | 
 82 | fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique())
 83 |                           for feat in fixlen_category_columns] + [DenseFeat(feat, 1,)for feat in fixlen_number_columns]
 84 | 
 85 | dnn_feature_columns = fixlen_feature_columns
 86 | linear_feature_columns = fixlen_feature_columns
 87 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
 88 | 
 89 | train = data[~data['label'].isnull()]
 90 | test = data[data['label'].isnull()]
 91 | 
 92 | train, vaild = train_test_split(train, test_size=0.2)
 93 | train_model_input = {name: train[name] for name in feature_names}
 94 | vaild_model_input = {name: vaild[name] for name in feature_names}
 95 | 
 96 | 
 97 | device = 'cuda:0'
 98 | """第一步：初始化一个模型类"""
 99 | model = DeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary', l2_reg_embedding=1e-5, device=device)
100 | 
101 | """第二步：调用compile()函数配置模型的优化器、损失函数、评价函数"""
102 | model.compile("adam", "binary_crossentropy", metrics=["binary_crossentropy", "auc"],)
103 | 
104 | """第三步：调用fit()函数训练模型"""
105 | model.fit(train_model_input, train[target].values, batch_size=8192, epochs=10, validation_data=[vaild_model_input, vaild[target].values], verbose=1, model_cache_path='E:\\competition\\看山杯\\models\\deepfm.model')
106 | 
107 | """预测"""
108 | test_model_input = {name: test[name] for name in feature_names}
109 | pred_ans = model.predict(test_model_input, 8192)
110 | pred_ans = pred_ans.reshape(pred_ans.shape[0])
111 | result = test[['questionID', 'memberID', 'time']]
112 | result['result'] = pred_ans
113 | result.to_csv(path + 'submit.txt', sep='\t', index=False)   # 注意提交的时候请把表头去掉
114 | 


--------------------------------------------------------------------------------
/features.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 用户特征
 3 | 1. 用户原始特征：m_sex、m_access_frequencies、......
 4 | 2. 用户关注和感兴趣的topic数
 5 | 
 6 | 问题特征：
 7 | 2. 标题字 词计数
 8 | 3. 描述字 词计数
 9 | 4. topic计数
10 | 
11 | 用户问题交叉特征
12 | 1. 用户关注、感兴趣的话题和问题绑定的话题交集计数
13 | 2. 邀请发送时间较问题创建时间的距离
14 | """
15 | import pandas as pd
16 | import gc
17 | 
18 | path = 'E:\\competition\\看山杯\\data\\data_set_0926\\'
19 | member_path = path + 'member_info_0926.txt'   # 用户信息路径
20 | question_path = path + 'question_info_0926.txt'  # 问题信息路径
21 | invito_path = path + 'invite_info_0926.txt'   # 邀请记录路径
22 | 
23 | def count(x):
24 |     x = str(x)
25 |     if x == '-1':
26 |         return 0
27 |     else:
28 |         return len(x.split(","))
29 | 
30 | user_feature = pd.read_csv(open(member_path, "r", encoding='utf-8'), sep='\t', header=None,
31 |                            names=['memberID', 'm_sex', 'm_keywords', 'm_amount_grade', 'm_hot_grade', 'm_registry_type',
32 |                                   'm_registry_platform', 'm_access_frequencies', 'm_twoA', 'm_twoB', 'm_twoC', 'm_twoD',
33 |                                   'm_twoE', 'm_categoryA', 'm_categoryB', 'm_categoryC', 'm_categoryD', 'm_categoryE',
34 |                                   'm_salt_score', 'm_attention_topics', 'm_interested_topics'])
35 | 
36 | user_feature['m_num_atten_topic'] = user_feature['m_attention_topics'].apply(lambda x: count(x))
37 | user_feature['m_num_interest_topic'] = user_feature['m_interested_topics'].apply(lambda x: count(x))
38 | 
39 | 
40 | question_feature = pd.read_csv(open(question_path, "r", encoding='utf-8'), sep='\t', header=None,
41 |                                names=['questionID', 'q_createTime', 'q_title_chars', 'q_title_words', 'q_desc_chars',
42 |                                'q_desc_words', 'q_topic_IDs'])
43 | 
44 | question_feature['q_num_title_chars_words'] = question_feature['q_title_chars'].apply(lambda x: count(x))
45 | question_feature['q_num_desc_chars_words'] = question_feature['q_desc_chars'].apply(lambda x: count(x))
46 | question_feature['q_num_desc_words'] = question_feature['q_desc_words'].apply(lambda x: count(x))
47 | question_feature['q_num_title_words'] = question_feature['q_title_words'].apply(lambda x: count(x))
48 | question_feature['q_num_topic_words'] = question_feature['q_topic_IDs'].apply(lambda x: count(x))
49 | 
50 | """训练集"""
51 | invite_info_data = pd.read_csv(open(invito_path, "r", encoding='utf-8'), sep='\t', header=None,
52 |                                names=['questionID', 'memberID', 'time', 'label'])
53 | 
54 | invite_info_data = invite_info_data.merge(user_feature, how='left', on='memberID')
55 | invite_info_data = invite_info_data.merge(question_feature, how='left', on='questionID')
56 | 
57 | invite_info_data['day'] = invite_info_data['time'].apply(lambda x: int(x.split('-')[0][1:]))
58 | 
59 | """用户关注、感兴趣的话题和问题绑定的话题交集计数"""
60 | def intersection(x, y):
61 |     x = str(x)
62 |     y = str(y)
63 |     if x == '-1' or y == '-1':
64 |         return 0
65 |     return len(set(x.split(",")) & set(y.split(",")))
66 | 
67 | invite_info_data['num_topic_attention_intersection'] = invite_info_data.apply(lambda row: intersection(row['q_topic_IDs'], row['m_attention_topics']), axis=1)
68 | invite_info_data['num_topic_interest_intersection'] = invite_info_data.apply(lambda row: intersection(row['q_topic_IDs'], row['m_interested_topics']), axis=1)
69 | 
70 | invite_info_data['q_day'] = invite_info_data['q_createTime'].apply(lambda x: int(x.split('-')[0][1:]))
71 | invite_info_data['days_to_invite'] = invite_info_data['day'] - invite_info_data['q_day']
72 | invite_info_data.to_csv(path + "train.txt", sep='\t', index=False)
73 | 
74 | del invite_info_data
75 | gc.collect()
76 | 
77 | """测试集"""
78 | evaluate_path = path + 'invite_info_evaluate_1_0926.txt'   # 测试集路径
79 | evaluate_info_data = pd.read_csv(open(evaluate_path, "r", encoding='utf-8'), sep='\t', header=None,
80 |                                names=['questionID', 'memberID', 'time'])
81 | evaluate_info_data['day'] = evaluate_info_data['time'].apply(lambda x: int(x.split('-')[0][1:]))
82 | evaluate_info_data = evaluate_info_data.merge(user_feature, how='left', on='memberID')
83 | evaluate_info_data = evaluate_info_data.merge(question_feature, how='left', on='questionID')
84 | evaluate_info_data['num_topic_attention_intersection'] = evaluate_info_data.apply(lambda row: intersection(row['q_topic_IDs'], row['m_attention_topics']), axis=1)
85 | evaluate_info_data['num_topic_interest_intersection'] = evaluate_info_data.apply(lambda row: intersection(row['q_topic_IDs'], row['m_interested_topics']), axis=1)
86 | 
87 | evaluate_info_data['q_day'] = evaluate_info_data['q_createTime'].apply(lambda x: int(x.split('-')[0][1:]))
88 | evaluate_info_data['days_to_invite'] = evaluate_info_data['day'] - evaluate_info_data['q_day']
89 | evaluate_info_data.to_csv(path + "test.txt", sep='\t', index=False)
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/textdeepfm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TextDeepFM模型
  3 | 
  4 | 在DeepFM模型的基础上加入文本特征
  5 | 
  6 | 将用户感兴趣的话题作为用户的embedding
  7 | 将问题绑定的话题作为问题的embedding
  8 | """
  9 | import pandas as pd
 10 | import numpy as np
 11 | from collections import OrderedDict, namedtuple
 12 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 13 | from sklearn.model_selection import train_test_split
 14 | from itertools import chain
 15 | import torch
 16 | import time
 17 | import matplotlib.pyplot as plt
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | import torch.utils.data as Data
 21 | from sklearn.metrics import *
 22 | from torch.utils.data import DataLoader
 23 | from tqdm import tqdm
 24 | from sklearn.preprocessing import LabelEncoder
 25 | import math
 26 | import itertools
 27 | from utils import *
 28 | from gensim.models import KeyedVectors
 29 | import pickle
 30 | import gc
 31 | 
 32 | 
 33 | """ 运行TextDeepFM """
 34 | 
 35 | path = 'E:\\competition\\看山杯\\data\\data_set_0926\\'
 36 | train = pd.read_csv(path + 'train.txt', sep='\t')
 37 | test = pd.read_csv(path + 'test.txt', sep='\t')
 38 | data = pd.concat([train, test], ignore_index=True, sort=False)
 39 | 
 40 | 
 41 | # print(data[['m_num_interest_topic', 'q_num_topic_words', 'm_num_atten_topic']].describe())   maxlen  10 ,13, 100
 42 | 
 43 | """单值类别特征、数值特征处理"""
 44 | # 单值类别特征
 45 | fixlen_category_columns = ['m_sex', 'm_access_frequencies', 'm_twoA', 'm_twoB', 'm_twoC', 'm_twoD', 'm_twoE', 'm_categoryA', 'm_categoryA',
 46 |                            'm_categoryB', 'm_categoryC', 'm_categoryD', 'm_categoryE', 'm_num_interest_topic', 'num_topic_attention_intersection',
 47 |                            'q_num_topic_words', 'num_topic_interest_intersection'
 48 |                          ]
 49 | # 数值特征
 50 | fixlen_number_columns = ['m_salt_score', 'm_num_atten_topic', 'q_num_title_chars_words', 'q_num_desc_chars_words', 'q_num_desc_words', 'q_num_title_words',
 51 |                          'days_to_invite'
 52 |                         ]
 53 | 
 54 | target = ['label']
 55 | 
 56 | data[fixlen_category_columns] = data[fixlen_category_columns].fillna('-1', )
 57 | data[fixlen_number_columns] = data[fixlen_number_columns].fillna(0, )
 58 | 
 59 | for feat in fixlen_category_columns:
 60 |     lbe = LabelEncoder()
 61 |     data[feat] = lbe.fit_transform(data[feat])
 62 | 
 63 | mms = MinMaxScaler(feature_range=(0, 1))
 64 | data[fixlen_number_columns] = mms.fit_transform(data[fixlen_number_columns])
 65 | 
 66 | """文本特征处理"""
 67 | # 截断补齐
 68 | def pad_sequences(x, maxlen):
 69 |     x = str(x)
 70 |     if x == 'nan':
 71 |         return [0] * maxlen
 72 |     words = []
 73 |     for t in x.split(" "):
 74 |         words.append(int(t))
 75 |     words = words[0:maxlen]
 76 |     words += [0] * (maxlen-len(words))
 77 |     return words
 78 | 
 79 | def deal_text(values, maxlen=10):
 80 |     temp = []
 81 |     for text in values:
 82 |         temp.append(pad_sequences(text, maxlen))
 83 |     return np.array(temp)
 84 | 
 85 | 
 86 | # 初始化词向量矩阵
 87 | topic2index = pickle.load(open(path+"topic2index.pick", 'rb'))
 88 | topic_word_vector = KeyedVectors.load_word2vec_format(path + "topic_vectors_64d.txt", binary=False)
 89 | embedding_matrix = np.random.uniform(size=(len(topic2index) + 1, 64))
 90 | miss_count = 0
 91 | for word, index in topic2index.items():
 92 |     try:
 93 |         word_vector = None
 94 |         word_vector = topic_word_vector[word]
 95 |         embedding_matrix[index] = word_vector
 96 |     except:
 97 |         miss_count += 1
 98 | print(miss_count, " 个词没有词向量")
 99 | 
100 | text_feature_columns = [TextFeat('q_topic_words', len(topic2index) + 1, 10),
101 |                         # TextFeat('m_attention_topics_words', len(topic2index) + 1, 10),
102 |                         TextFeat('m_interested_topics_words', len(topic2index) + 1, 10)
103 |                         ]
104 | 
105 | fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique())
106 |                           for feat in fixlen_category_columns] + [DenseFeat(feat, 1,)for feat in fixlen_number_columns]
107 | 
108 | dnn_feature_columns = fixlen_feature_columns + text_feature_columns
109 | linear_feature_columns = fixlen_feature_columns + text_feature_columns
110 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
111 | 
112 | train = data[~data['label'].isnull()]
113 | test = data[data['label'].isnull()]
114 | 
115 | del train['time']
116 | del train['memberID']
117 | del train['questionID']
118 | del train['m_attention_topics_words']
119 | del test['m_attention_topics_words']
120 | gc.collect()
121 | 
122 | 
123 | train, vaild = train_test_split(train, test_size=0.2)
124 | train_model_input = {name: train[name] for name in feature_names}
125 | vaild_model_input = {name: vaild[name] for name in feature_names}
126 | train_model_input['q_topic_words'] = deal_text(train_model_input['q_topic_words'])
127 | # train_model_input['m_attention_topics_words'] = deal_text(train_model_input['m_attention_topics_words'])
128 | train_model_input['m_interested_topics_words'] = deal_text(train_model_input['m_interested_topics_words'])
129 | vaild_model_input['q_topic_words'] = deal_text(vaild_model_input['q_topic_words'])
130 | # vaild_model_input['m_attention_topics_words'] = deal_text(vaild_model_input['m_attention_topics_words'])
131 | vaild_model_input['m_interested_topics_words'] = deal_text(vaild_model_input['m_interested_topics_words'])
132 | 
133 | gc.collect()
134 | 
135 | 
136 | device = 'cuda:0'
137 | """第一步：初始化一个模型类"""
138 | model = TextDeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary',
139 |                    l2_reg_embedding=1e-5, device=device, use_textcnn=True, filter_sizes=[1, 2, 3], num_filters=64, text_vocab_size=len(topic2index)+1, text_embedding_size=64, text_embedding_matrix=embedding_matrix, cnn_dropout=0.0)
140 | 
141 | """第二步：调用compile()函数配置模型的优化器、损失函数、评价函数"""
142 | model.compile("adam", "binary_crossentropy", metrics=["binary_crossentropy", "auc"],)
143 | 
144 | """第三步：调用fit()函数训练模型"""
145 | model.fit(train_model_input, train[target].values, batch_size=2048, epochs=40, validation_data=[vaild_model_input, vaild[target].values], verbose=1, model_cache_path='E:\\competition\\看山杯\\models\\textdeepfm.model', early_stop=2)
146 | 
147 | """预测"""
148 | test_model_input = {name: test[name] for name in feature_names}
149 | test_model_input['q_topic_words'] = deal_text(test_model_input['q_topic_words'])
150 | # test_model_input['m_attention_topics_words'] = deal_text(test_model_input['m_attention_topics_words'])
151 | test_model_input['m_interested_topics_words'] = deal_text(test_model_input['m_interested_topics_words'])
152 | pred_ans = model.predict(test_model_input, 2048)
153 | pred_ans = pred_ans.reshape(pred_ans.shape[0])
154 | result = test[['questionID', 'memberID', 'time']]
155 | result['result'] = pred_ans
156 | result.to_csv(path + 'submit.txt', sep='\t', index=False)   # 注意提交的时候请把表头去掉
157 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | from collections import OrderedDict, namedtuple
   3 | from itertools import chain
   4 | import torch
   5 | import time
   6 | import matplotlib.pyplot as plt
   7 | import torch.nn as nn
   8 | import torch.nn.functional as F
   9 | import torch.utils.data as Data
  10 | from sklearn.metrics import *
  11 | from torch.utils.data import DataLoader
  12 | from tqdm import tqdm
  13 | import math
  14 | import itertools
  15 | 
  16 | """
  17 | ++++++++++++++++++++++++ 运行环境 deepctr  可忽略不看 直接跳到末尾+++++++++++++++++++++
  18 | 
  19 | 主要修改了BaseModel,支持早停策略、结果保存。
  20 | 
  21 | """
  22 | 
  23 | def concat_fun(inputs, axis=-1):
  24 |     if len(inputs) == 1:
  25 |         return inputs[0]
  26 |     else:
  27 |         return torch.cat(inputs, dim=axis)
  28 | 
  29 | 
  30 | def slice_arrays(arrays, start=None, stop=None):
  31 |     """Slice an array or list of arrays.
  32 | 
  33 |     This takes an array-like, or a list of
  34 |     array-likes, and outputs:
  35 |         - arrays[start:stop] if `arrays` is an array-like
  36 |         - [x[start:stop] for x in arrays] if `arrays` is a list
  37 | 
  38 |     Can also work on list/array of indices: `slice_arrays(x, indices)`
  39 | 
  40 |     Arguments:
  41 |         arrays: Single array or list of arrays.
  42 |         start: can be an integer index (start index)
  43 |             or a list/array of indices
  44 |         stop: integer (stop index); should be None if
  45 |             `start` was a list.
  46 | 
  47 |     Returns:
  48 |         A slice of the array(s).
  49 | 
  50 |     Raises:
  51 |         ValueError: If the value of start is a list and stop is not None.
  52 |     """
  53 | 
  54 |     if arrays is None:
  55 |         return [None]
  56 | 
  57 |     if isinstance(arrays, np.ndarray):
  58 |         arrays = [arrays]
  59 | 
  60 |     if isinstance(start, list) and stop is not None:
  61 |         raise ValueError('The stop argument has to be None if the value of start '
  62 |                          'is a list.')
  63 |     elif isinstance(arrays, list):
  64 |         if hasattr(start, '__len__'):
  65 |             # hdf5 datasets only support list objects as indices
  66 |             if hasattr(start, 'shape'):
  67 |                 start = start.tolist()
  68 |             return [None if x is None else x[start] for x in arrays]
  69 |         else:
  70 |             if len(arrays) == 1:
  71 |                 return arrays[0][start:stop]
  72 |             return [None if x is None else x[start:stop] for x in arrays]
  73 |     else:
  74 |         if hasattr(start, '__len__'):
  75 |             if hasattr(start, 'shape'):
  76 |                 start = start.tolist()
  77 |             return arrays[start]
  78 |         elif hasattr(start, '__getitem__'):
  79 |             return arrays[start:stop]
  80 |         else:
  81 |             return [None]
  82 | 
  83 | 
  84 | class KMaxPooling(nn.Module):
  85 |     """K Max pooling that selects the k biggest value along the specific axis.
  86 | 
  87 |       Input shape
  88 |         -  nD tensor with shape: ``(batch_size, ..., input_dim)``.
  89 | 
  90 |       Output shape
  91 |         - nD tensor with shape: ``(batch_size, ..., output_dim)``.
  92 | 
  93 |       Arguments
  94 |         - **k**: positive integer, number of top elements to look for along the ``axis`` dimension.
  95 | 
  96 |         - **axis**: positive integer, the dimension to look for elements.
  97 | 
  98 |      """
  99 | 
 100 |     def __init__(self, k, axis, device='cpu'):
 101 |         super(KMaxPooling, self).__init__()
 102 |         self.k = k
 103 |         self.axis = axis
 104 |         self.to(device)
 105 | 
 106 |     def forward(self, input):
 107 |         if self.axis < 0 or self.axis >= len(input.shape):
 108 |             raise ValueError("axis must be 0~%d,now is %d" %
 109 |                              (len(input.shape) - 1, self.axis))
 110 | 
 111 |         if self.k < 1 or self.k > input.shape[self.axis]:
 112 |             raise ValueError("k must be in 1 ~ %d,now k is %d" %
 113 |                              (input.shape[self.axis], self.k))
 114 | 
 115 |         out = torch.topk(input, k=self.k, dim=self.axis, sorted=True)[0]
 116 |         return out
 117 | 
 118 | class FM(nn.Module):
 119 |     """Factorization Machine models pairwise (order-2) feature interactions
 120 |      without linear term and bias.
 121 |       Input shape
 122 |         - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
 123 |       Output shape
 124 |         - 2D tensor with shape: ``(batch_size, 1)``.
 125 |       References
 126 |         - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf)
 127 |     """
 128 | 
 129 |     def __init__(self):
 130 |         super(FM, self).__init__()
 131 | 
 132 |     def forward(self, inputs):
 133 |         fm_input = inputs
 134 | 
 135 |         square_of_sum = torch.pow(torch.sum(fm_input, dim=1, keepdim=True), 2)
 136 |         sum_of_square = torch.sum(fm_input * fm_input, dim=1, keepdim=True)
 137 |         cross_term = square_of_sum - sum_of_square
 138 |         cross_term = 0.5 * torch.sum(cross_term, dim=2, keepdim=False)
 139 | 
 140 |         return cross_term
 141 | 
 142 | class BiInteractionPooling(nn.Module):
 143 |     """Bi-Interaction Layer used in Neural FM,compress the
 144 |      pairwise element-wise product of features into one single vector.
 145 | 
 146 |       Input shape
 147 |         - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``.
 148 | 
 149 |       Output shape
 150 |         - 3D tensor with shape: ``(batch_size,1,embedding_size)``.
 151 | 
 152 |       References
 153 |         - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027)
 154 |     """
 155 | 
 156 |     def __init__(self):
 157 |         super(BiInteractionPooling, self).__init__()
 158 | 
 159 |     def forward(self, inputs):
 160 |         concated_embeds_value = inputs
 161 |         square_of_sum = torch.pow(
 162 |             torch.sum(concated_embeds_value, dim=1, keepdim=True), 2)
 163 |         sum_of_square = torch.sum(
 164 |             concated_embeds_value * concated_embeds_value, dim=1, keepdim=True)
 165 |         cross_term = 0.5 * (square_of_sum - sum_of_square)
 166 |         return cross_term
 167 | 
 168 | 
 169 | class SENETLayer(nn.Module):
 170 |     """SENETLayer used in FiBiNET.
 171 |       Input shape
 172 |         - A list of 3D tensor with shape: ``(batch_size,filed_size,embedding_size)``.
 173 |       Output shape
 174 |         - A list of 3D tensor with shape: ``(batch_size,filed_size,embedding_size)``.
 175 |       Arguments
 176 |         - **filed_size** : Positive integer, number of feature groups.
 177 |         - **reduction_ratio** : Positive integer, dimensionality of the
 178 |          attention network output space.
 179 |         - **seed** : A Python integer to use as random seed.
 180 |       References
 181 |         - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction
 182 | Tongwen](https://arxiv.org/pdf/1905.09433.pdf)
 183 |     """
 184 | 
 185 |     def __init__(self, filed_size, reduction_ratio=3, seed=1024, device='cpu'):
 186 |         super(SENETLayer, self).__init__()
 187 |         self.seed = seed
 188 |         self.filed_size = filed_size
 189 |         self.reduction_size = max(1, filed_size // reduction_ratio)
 190 |         self.excitation = nn.Sequential(
 191 |             nn.Linear(self.filed_size, self.reduction_size, bias=False),
 192 |             nn.ReLU(),
 193 |             nn.Linear(self.reduction_size, self.filed_size, bias=False),
 194 |             nn.ReLU()
 195 |         )
 196 |         self.to(device)
 197 | 
 198 |     def forward(self, inputs):
 199 |         if len(inputs.shape) != 3:
 200 |             raise ValueError(
 201 |                 "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(inputs.shape)))
 202 |         Z = torch.mean(inputs, dim=-1, out=None)
 203 |         A = self.excitation(Z)
 204 |         V = torch.mul(inputs, torch.unsqueeze(A, dim=2))
 205 | 
 206 |         return V
 207 | 
 208 | 
 209 | class BilinearInteraction(nn.Module):
 210 |     """BilinearInteraction Layer used in FiBiNET.
 211 |       Input shape
 212 |         - A list of 3D tensor with shape: ``(batch_size,filed_size, embedding_size)``.
 213 |       Output shape
 214 |         - 3D tensor with shape: ``(batch_size,filed_size, embedding_size)``.
 215 |       Arguments
 216 |         - **filed_size** : Positive integer, number of feature groups.
 217 |         - **str** : String, types of bilinear functions used in this layer.
 218 |         - **seed** : A Python integer to use as random seed.
 219 |       References
 220 |         - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction
 221 | Tongwen](https://arxiv.org/pdf/1905.09433.pdf)
 222 |     """
 223 | 
 224 |     def __init__(self, filed_size, embedding_size, bilinear_type="interaction", seed=1024, device='cpu'):
 225 |         super(BilinearInteraction, self).__init__()
 226 |         self.bilinear_type = bilinear_type
 227 |         self.seed = seed
 228 |         self.bilinear = nn.ModuleList()
 229 |         if self.bilinear_type == "all":
 230 |             self.bilinear = nn.Linear(
 231 |                 embedding_size, embedding_size, bias=False)
 232 |         elif self.bilinear_type == "each":
 233 |             for i in range(filed_size):
 234 |                 self.bilinear.append(
 235 |                     nn.Linear(embedding_size, embedding_size, bias=False))
 236 |         elif self.bilinear_type == "interaction":
 237 |             for i, j in itertools.combinations(range(filed_size), 2):
 238 |                 self.bilinear.append(
 239 |                     nn.Linear(embedding_size, embedding_size, bias=False))
 240 |         else:
 241 |             raise NotImplementedError
 242 |         self.to(device)
 243 | 
 244 |     def forward(self, inputs):
 245 |         if len(inputs.shape) != 3:
 246 |             raise ValueError(
 247 |                 "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(inputs.shape)))
 248 |         inputs = torch.split(inputs, 1, dim=1)
 249 |         if self.bilinear_type == "all":
 250 |             p = [torch.mul(self.bilinear(v_i), v_j)
 251 |                  for v_i, v_j in itertools.combinations(inputs, 2)]
 252 |         elif self.bilinear_type == "each":
 253 |             p = [torch.mul(self.bilinear[i](inputs[i]), inputs[j])
 254 |                  for i, j in itertools.combinations(range(len(inputs)), 2)]
 255 |         elif self.bilinear_type == "interaction":
 256 |             p = [torch.mul(bilinear(v[0]), v[1])
 257 |                  for v, bilinear in zip(itertools.combinations(inputs, 2), self.bilinear)]
 258 |         else:
 259 |             raise NotImplementedError
 260 |         return torch.cat(p, dim=1)
 261 | 
 262 | 
 263 | class CIN(nn.Module):
 264 |     """Compressed Interaction Network used in xDeepFM.
 265 |       Input shape
 266 |         - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
 267 |       Output shape
 268 |         - 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num =  sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else  ``sum(layer_size)`` .
 269 |       Arguments
 270 |         - **filed_size** : Positive integer, number of feature groups.
 271 |         - **layer_size** : list of int.Feature maps in each layer.
 272 |         - **activation** : activation function used on feature maps.
 273 |         - **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit.
 274 |         - **seed** : A Python integer to use as random seed.
 275 |       References
 276 |         - [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf)
 277 |     """
 278 | 
 279 |     def __init__(self, field_size, layer_size=(128, 128), activation=F.relu, split_half=True, l2_reg=1e-5, seed=1024,
 280 |                  device='cpu'):
 281 |         super(CIN, self).__init__()
 282 |         if len(layer_size) == 0:
 283 |             raise ValueError(
 284 |                 "layer_size must be a list(tuple) of length greater than 1")
 285 | 
 286 |         self.layer_size = layer_size
 287 |         self.field_nums = [field_size]
 288 |         self.split_half = split_half
 289 |         self.activation = activation
 290 |         self.l2_reg = l2_reg
 291 |         self.seed = seed
 292 | 
 293 |         self.conv1ds = nn.ModuleList()
 294 |         for i, size in enumerate(self.layer_size):
 295 |             self.conv1ds.append(
 296 |                 nn.Conv1d(self.field_nums[-1] * self.field_nums[0], size, 1))
 297 | 
 298 |             if self.split_half:
 299 |                 if i != len(self.layer_size) - 1 and size % 2 > 0:
 300 |                     raise ValueError(
 301 |                         "layer_size must be even number except for the last layer when split_half=True")
 302 | 
 303 |                 self.field_nums.append(size // 2)
 304 |             else:
 305 |                 self.field_nums.append(size)
 306 | 
 307 |         #         for tensor in self.conv1ds:
 308 |         #             nn.init.normal_(tensor.weight, mean=0, std=init_std)
 309 |         self.to(device)
 310 | 
 311 |     def forward(self, inputs):
 312 |         if len(inputs.shape) != 3:
 313 |             raise ValueError(
 314 |                 "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(inputs.shape)))
 315 |         batch_size = inputs.shape[0]
 316 |         dim = inputs.shape[-1]
 317 |         hidden_nn_layers = [inputs]
 318 |         final_result = []
 319 | 
 320 |         for i, size in enumerate(self.layer_size):
 321 |             # x^(k-1) * x^0
 322 |             x = torch.einsum(
 323 |                 'bhd,bmd->bhmd', hidden_nn_layers[-1], hidden_nn_layers[0])
 324 |             # x.shape = (batch_size , hi * m, dim)
 325 |             x = x.reshape(
 326 |                 batch_size, hidden_nn_layers[-1].shape[1] * hidden_nn_layers[0].shape[1], dim)
 327 |             # x.shape = (batch_size , hi, dim)
 328 |             x = self.conv1ds[i](x)
 329 | 
 330 |             if self.activation is None or self.activation == 'linear':
 331 |                 curr_out = x
 332 |             else:
 333 |                 curr_out = self.activation(x)
 334 | 
 335 |             if self.split_half:
 336 |                 if i != len(self.layer_size) - 1:
 337 |                     next_hidden, direct_connect = torch.split(
 338 |                         curr_out, 2 * [size // 2], 1)
 339 |                 else:
 340 |                     direct_connect = curr_out
 341 |                     next_hidden = 0
 342 |             else:
 343 |                 direct_connect = curr_out
 344 |                 next_hidden = curr_out
 345 | 
 346 |             final_result.append(direct_connect)
 347 |             hidden_nn_layers.append(next_hidden)
 348 | 
 349 |         result = torch.cat(final_result, dim=1)
 350 |         result = torch.sum(result, -1)
 351 | 
 352 |         return result
 353 | 
 354 | 
 355 | class AFMLayer(nn.Module):
 356 |     """Attentonal Factorization Machine models pairwise (order-2) feature
 357 |     interactions without linear term and bias.
 358 |       Input shape
 359 |         - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
 360 |       Output shape
 361 |         - 2D tensor with shape: ``(batch_size, 1)``.
 362 |       Arguments
 363 |         - **in_features** : Positive integer, dimensionality of input features.
 364 |         - **attention_factor** : Positive integer, dimensionality of the
 365 |          attention network output space.
 366 |         - **l2_reg_w** : float between 0 and 1. L2 regularizer strength
 367 |          applied to attention network.
 368 |         - **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout.
 369 |         - **seed** : A Python integer to use as random seed.
 370 |       References
 371 |         - [Attentional Factorization Machines : Learning the Weight of Feature
 372 |         Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf)
 373 |     """
 374 | 
 375 |     def __init__(self, in_features, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, device='cpu'):
 376 |         super(AFMLayer, self).__init__()
 377 |         self.attention_factor = attention_factor
 378 |         self.l2_reg_w = l2_reg_w
 379 |         self.dropout_rate = dropout_rate
 380 |         self.seed = seed
 381 |         embedding_size = in_features
 382 | 
 383 |         self.attention_W = nn.Parameter(torch.Tensor(
 384 |             embedding_size, self.attention_factor))
 385 | 
 386 |         self.attention_b = nn.Parameter(torch.Tensor(self.attention_factor))
 387 | 
 388 |         self.projection_h = nn.Parameter(
 389 |             torch.Tensor(self.attention_factor, 1))
 390 | 
 391 |         self.projection_p = nn.Parameter(torch.Tensor(embedding_size, 1))
 392 | 
 393 |         for tensor in [self.attention_W, self.projection_h, self.projection_p]:
 394 |             nn.init.xavier_normal_(tensor, )
 395 | 
 396 |         self.dropout = nn.Dropout(dropout_rate)
 397 | 
 398 |         self.to(device)
 399 | 
 400 |     def forward(self, inputs):
 401 |         embeds_vec_list = inputs
 402 |         row = []
 403 |         col = []
 404 | 
 405 |         for r, c in itertools.combinations(embeds_vec_list, 2):
 406 |             row.append(r)
 407 |             col.append(c)
 408 | 
 409 |         p = torch.cat(row, dim=1)
 410 |         q = torch.cat(col, dim=1)
 411 |         inner_product = p * q
 412 | 
 413 |         bi_interaction = inner_product
 414 |         attention_temp = F.relu(torch.tensordot(
 415 |             bi_interaction, self.attention_W, dims=([-1], [0])) + self.attention_b)
 416 | 
 417 |         self.normalized_att_score = F.softmax(torch.tensordot(
 418 |             attention_temp, self.projection_h, dims=([-1], [0])), dim=1)
 419 |         attention_output = torch.sum(
 420 |             self.normalized_att_score * bi_interaction, dim=1)
 421 | 
 422 |         attention_output = self.dropout(attention_output)  # training
 423 | 
 424 |         afm_out = torch.tensordot(
 425 |             attention_output, self.projection_p, dims=([-1], [0]))
 426 |         return afm_out
 427 | 
 428 | 
 429 | class InteractingLayer(nn.Module):
 430 |     """A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism.
 431 |       Input shape
 432 |             - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
 433 |       Output shape
 434 |             - 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``.
 435 |       Arguments
 436 |             - **in_features** : Positive integer, dimensionality of input features.
 437 |             - **att_embedding_size**: int.The embedding size in multi-head self-attention network.
 438 |             - **head_num**: int.The head number in multi-head  self-attention network.
 439 |             - **use_res**: bool.Whether or not use standard residual connections before output.
 440 |             - **seed**: A Python integer to use as random seed.
 441 |       References
 442 |             - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
 443 |     """
 444 | 
 445 |     def __init__(self, in_features, att_embedding_size=8, head_num=2, use_res=True, seed=1024, device='cpu'):
 446 |         super(InteractingLayer, self).__init__()
 447 |         if head_num <= 0:
 448 |             raise ValueError('head_num must be a int > 0')
 449 |         self.att_embedding_size = att_embedding_size
 450 |         self.head_num = head_num
 451 |         self.use_res = use_res
 452 |         self.seed = seed
 453 | 
 454 |         embedding_size = in_features
 455 | 
 456 |         self.W_Query = nn.Parameter(torch.Tensor(
 457 |             embedding_size, self.att_embedding_size * self.head_num))
 458 | 
 459 |         self.W_key = nn.Parameter(torch.Tensor(
 460 |             embedding_size, self.att_embedding_size * self.head_num))
 461 | 
 462 |         self.W_Value = nn.Parameter(torch.Tensor(
 463 |             embedding_size, self.att_embedding_size * self.head_num))
 464 | 
 465 |         if self.use_res:
 466 |             self.W_Res = nn.Parameter(torch.Tensor(
 467 |                 embedding_size, self.att_embedding_size * self.head_num))
 468 |         for tensor in self.parameters():
 469 |             nn.init.normal_(tensor, mean=0.0, std=0.05)
 470 | 
 471 |         self.to(device)
 472 | 
 473 |     def forward(self, inputs):
 474 | 
 475 |         if len(inputs.shape) != 3:
 476 |             raise ValueError(
 477 |                 "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(inputs.shape)))
 478 | 
 479 |         querys = torch.tensordot(inputs, self.W_Query,
 480 |                                  dims=([-1], [0]))  # None F D*head_num
 481 |         keys = torch.tensordot(inputs, self.W_key, dims=([-1], [0]))
 482 |         values = torch.tensordot(inputs, self.W_Value, dims=([-1], [0]))
 483 | 
 484 |         # head_num None F D
 485 | 
 486 |         querys = torch.stack(torch.split(
 487 |             querys, self.att_embedding_size, dim=2))
 488 |         keys = torch.stack(torch.split(keys, self.att_embedding_size, dim=2))
 489 |         values = torch.stack(torch.split(
 490 |             values, self.att_embedding_size, dim=2))
 491 |         inner_product = torch.einsum(
 492 |             'bnik,bnjk->bnij', querys, keys)  # head_num None F F
 493 | 
 494 |         self.normalized_att_scores = F.softmax(
 495 |             inner_product, dim=1)  # head_num None F F
 496 |         result = torch.matmul(self.normalized_att_scores,
 497 |                               values)  # head_num None F D
 498 | 
 499 |         result = torch.cat(torch.split(result, 1, ), dim=-1)
 500 |         result = torch.squeeze(result, dim=0)  # None F D*head_num
 501 |         if self.use_res:
 502 |             result += torch.tensordot(inputs, self.W_Res, dims=([-1], [0]))
 503 |         result = F.relu(result)
 504 | 
 505 |         return result
 506 | 
 507 | 
 508 | class CrossNet(nn.Module):
 509 |     """The Cross Network part of Deep&Cross Network model,
 510 |     which leans both low and high degree cross feature.
 511 |       Input shape
 512 |         - 2D tensor with shape: ``(batch_size, units)``.
 513 |       Output shape
 514 |         - 2D tensor with shape: ``(batch_size, units)``.
 515 |       Arguments
 516 |         - **in_features** : Positive integer, dimensionality of input features.
 517 |         - **input_feature_num**: Positive integer, shape(Input tensor)[-1]
 518 |         - **layer_num**: Positive integer, the cross layer number
 519 |         - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix
 520 |         - **seed**: A Python integer to use as random seed.
 521 |       References
 522 |         - [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123)
 523 |     """
 524 | 
 525 |     def __init__(self, in_features, layer_num=2, seed=1024, device='cpu'):
 526 |         super(CrossNet, self).__init__()
 527 |         self.layer_num = layer_num
 528 |         self.kernels = torch.nn.ParameterList(
 529 |             [nn.Parameter(nn.init.xavier_normal_(torch.empty(in_features, 1))) for i in range(self.layer_num)])
 530 |         self.bias = torch.nn.ParameterList(
 531 |             [nn.Parameter(nn.init.zeros_(torch.empty(in_features, 1))) for i in range(self.layer_num)])
 532 |         self.to(device)
 533 | 
 534 |     def forward(self, inputs):
 535 |         x_0 = inputs.unsqueeze(2)
 536 |         x_l = x_0
 537 |         for i in range(self.layer_num):
 538 |             xl_w = torch.tensordot(x_l, self.kernels[i], dims=([1], [0]))
 539 |             dot_ = torch.matmul(x_0, xl_w)
 540 |             x_l = dot_ + self.bias[i] + x_l
 541 |         x_l = torch.squeeze(x_l, dim=2)
 542 |         return x_l
 543 | 
 544 | 
 545 | class InnerProductLayer(nn.Module):
 546 |     """InnerProduct Layer used in PNN that compute the element-wise
 547 |     product or inner product between feature vectors.
 548 |       Input shape
 549 |         - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
 550 |       Output shape
 551 |         - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape:
 552 |         ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum.
 553 |       Arguments
 554 |         - **reduce_sum**: bool. Whether return inner product or element-wise product
 555 |       References
 556 |             - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//
 557 |             Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.]
 558 |             (https://arxiv.org/pdf/1611.00144.pdf)"""
 559 | 
 560 |     def __init__(self, reduce_sum=True, device='cpu'):
 561 |         super(InnerProductLayer, self).__init__()
 562 |         self.reduce_sum = reduce_sum
 563 |         self.to(device)
 564 | 
 565 |     def forward(self, inputs):
 566 | 
 567 |         embed_list = inputs
 568 |         row = []
 569 |         col = []
 570 |         num_inputs = len(embed_list)
 571 | 
 572 |         for i in range(num_inputs - 1):
 573 |             for j in range(i + 1, num_inputs):
 574 |                 row.append(i)
 575 |                 col.append(j)
 576 |         p = torch.cat([embed_list[idx]
 577 |                        for idx in row], dim=1)  # batch num_pairs k
 578 |         q = torch.cat([embed_list[idx]
 579 |                        for idx in col], dim=1)
 580 | 
 581 |         inner_product = p * q
 582 |         if self.reduce_sum:
 583 |             inner_product = torch.sum(
 584 |                 inner_product, dim=2, keepdim=True)
 585 |         return inner_product
 586 | 
 587 | 
 588 | class OutterProductLayer(nn.Module):
 589 |     """OutterProduct Layer used in PNN.This implemention is
 590 |     adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets.
 591 |       Input shape
 592 |             - A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``.
 593 |       Output shape
 594 |             - 2D tensor with shape:``(batch_size,N*(N-1)/2 )``.
 595 |       Arguments
 596 |             - **filed_size** : Positive integer, number of feature groups.
 597 |             - **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num
 598 |             - **seed**: A Python integer to use as random seed.
 599 |       References
 600 |             - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf)
 601 |     """
 602 | 
 603 |     def __init__(self, field_size, embedding_size, kernel_type='mat', seed=1024, device='cpu'):
 604 |         super(OutterProductLayer, self).__init__()
 605 |         self.kernel_type = kernel_type
 606 | 
 607 |         num_inputs = field_size
 608 |         num_pairs = int(num_inputs * (num_inputs - 1) / 2)
 609 |         embed_size = embedding_size
 610 |         if self.kernel_type == 'mat':
 611 | 
 612 |             self.kernel = nn.Parameter(torch.Tensor(
 613 |                 embed_size, num_pairs, embed_size))
 614 | 
 615 |         elif self.kernel_type == 'vec':
 616 |             self.kernel = nn.Parameter(torch.Tensor(num_pairs, embed_size))
 617 | 
 618 |         elif self.kernel_type == 'num':
 619 |             self.kernel = nn.Parameter(torch.Tensor(num_pairs, 1))
 620 |         nn.init.xavier_uniform_(self.kernel)
 621 | 
 622 |         self.to(device)
 623 | 
 624 |     def forward(self, inputs):
 625 |         embed_list = inputs
 626 |         row = []
 627 |         col = []
 628 |         num_inputs = len(embed_list)
 629 |         for i in range(num_inputs - 1):
 630 |             for j in range(i + 1, num_inputs):
 631 |                 row.append(i)
 632 |                 col.append(j)
 633 |         p = torch.cat([embed_list[idx]
 634 |                        for idx in row], dim=1)  # batch num_pairs k
 635 |         q = torch.cat([embed_list[idx] for idx in col], dim=1)
 636 | 
 637 |         # -------------------------
 638 |         if self.kernel_type == 'mat':
 639 |             p.unsqueeze_(dim=1)
 640 |             # k     k* pair* k
 641 |             # batch * pair
 642 |             kp = torch.sum(
 643 | 
 644 |                 # batch * pair * k
 645 | 
 646 |                 torch.mul(
 647 | 
 648 |                     # batch * pair * k
 649 | 
 650 |                     torch.transpose(
 651 | 
 652 |                         # batch * k * pair
 653 | 
 654 |                         torch.sum(
 655 | 
 656 |                             # batch * k * pair * k
 657 | 
 658 |                             torch.mul(
 659 | 
 660 |                                 p, self.kernel),
 661 | 
 662 |                             dim=-1),
 663 | 
 664 |                         2, 1),
 665 | 
 666 |                     q),
 667 | 
 668 |                 dim=-1)
 669 |         else:
 670 |             # 1 * pair * (k or 1)
 671 | 
 672 |             k = torch.unsqueeze(self.kernel, 0)
 673 | 
 674 |             # batch * pair
 675 | 
 676 |             kp = torch.sum(p * q * k, dim=-1)
 677 | 
 678 |             # p q # b * p * k
 679 | 
 680 |         return kp
 681 | 
 682 | 
 683 | class ConvLayer(nn.Module):
 684 |     """Conv Layer used in CCPM.
 685 | 
 686 |       Input shape
 687 |             - A list of N 3D tensor with shape: ``(batch_size,1,filed_size,embedding_size)``.
 688 |       Output shape
 689 |             - A list of N 3D tensor with shape: ``(batch_size,last_filters,pooling_size,embedding_size)``.
 690 |       Arguments
 691 |             - **filed_size** : Positive integer, number of feature groups.
 692 |             - **conv_kernel_width**: list. list of positive integer or empty list,the width of filter in each conv layer.
 693 |             - **conv_filters**: list. list of positive integer or empty list,the number of filters in each conv layer.
 694 |       Reference:
 695 |             - Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.(http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
 696 |     """
 697 | 
 698 |     def __init__(self, field_size, conv_kernel_width, conv_filters, device='cpu'):
 699 |         super(ConvLayer, self).__init__()
 700 |         self.device = device
 701 |         module_list = []
 702 |         n = int(field_size)
 703 |         l = len(conv_filters)
 704 |         filed_shape = n
 705 |         for i in range(1, l + 1):
 706 |             if i == 1:
 707 |                 in_channels = 1
 708 |             else:
 709 |                 in_channels = conv_filters[i - 2]
 710 |             out_channels = conv_filters[i - 1]
 711 |             width = conv_kernel_width[i - 1]
 712 |             k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3
 713 |             module_list.append(Conv2dSame(in_channels=in_channels, out_channels=out_channels, kernel_size=(width, 1),
 714 |                                           stride=1).to(self.device))
 715 |             module_list.append(torch.nn.Tanh().to(self.device))
 716 | 
 717 |             # KMaxPooling, extract top_k, returns tensors values
 718 |             module_list.append(KMaxPooling(k=min(k, filed_shape), axis=2, device=self.device).to(self.device))
 719 |             filed_shape = min(k, filed_shape)
 720 |         self.conv_layer = nn.Sequential(*module_list)
 721 |         self.to(device)
 722 |         self.filed_shape = filed_shape
 723 | 
 724 |     def forward(self, inputs):
 725 |         return self.conv_layer(inputs)
 726 | 
 727 | 
 728 | class DNN(nn.Module):
 729 |     """The Multi Layer Percetron
 730 | 
 731 |       Input shape
 732 |         - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``.
 733 | 
 734 |       Output shape
 735 |         - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``.
 736 | 
 737 |       Arguments
 738 |         - **inputs_dim**: input feature dimension.
 739 | 
 740 |         - **hidden_units**:list of positive integer, the layer number and units in each layer.
 741 | 
 742 |         - **activation**: Activation function to use.
 743 | 
 744 |         - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.
 745 | 
 746 |         - **dropout_rate**: float in [0,1). Fraction of the units to dropout.
 747 | 
 748 |         - **use_bn**: bool. Whether use BatchNormalization before activation or not.
 749 | 
 750 |         - **seed**: A Python integer to use as random seed.
 751 |     """
 752 | 
 753 |     def __init__(self, inputs_dim, hidden_units, activation=F.relu, l2_reg=0, dropout_rate=0, use_bn=False,
 754 |                  init_std=0.0001, seed=1024, device='cpu'):
 755 |         super(DNN, self).__init__()
 756 |         self.activation = activation
 757 |         self.dropout_rate = dropout_rate
 758 |         self.dropout = nn.Dropout(dropout_rate)
 759 |         self.seed = seed
 760 |         self.l2_reg = l2_reg
 761 |         self.use_bn = use_bn
 762 |         if len(hidden_units) == 0:
 763 |             raise ValueError("hidden_units is empty!!")
 764 |         hidden_units = [inputs_dim] + list(hidden_units)
 765 | 
 766 |         self.linears = nn.ModuleList(
 767 |             [nn.Linear(hidden_units[i], hidden_units[i + 1]) for i in range(len(hidden_units) - 1)])
 768 | 
 769 |         if self.use_bn:
 770 |             self.bn = nn.ModuleList(
 771 |                 [nn.BatchNorm1d(hidden_units[i + 1]) for i in range(len(hidden_units) - 1)])
 772 |         for name, tensor in self.linears.named_parameters():
 773 |             if 'weight' in name:
 774 |                 nn.init.normal_(tensor, mean=0, std=init_std)
 775 | 
 776 |         self.to(device)
 777 | 
 778 |     def forward(self, inputs):
 779 |         deep_input = inputs
 780 | 
 781 |         for i in range(len(self.linears)):
 782 | 
 783 |             fc = self.linears[i](deep_input)
 784 | 
 785 |             if self.use_bn:
 786 |                 fc = self.bn[i](fc)
 787 | 
 788 |             fc = self.activation(fc)
 789 | 
 790 |             fc = self.dropout(fc)
 791 |             deep_input = fc
 792 |         return deep_input
 793 | 
 794 | 
 795 | class PredictionLayer(nn.Module):
 796 |     """
 797 |       Arguments
 798 |          - **task**: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
 799 |          - **use_bias**: bool.Whether add bias term or not.
 800 |     """
 801 | 
 802 |     def __init__(self, task='binary', use_bias=True, **kwargs):
 803 |         if task not in ["binary", "multiclass", "regression"]:
 804 |             raise ValueError("task must be binary,multiclass or regression")
 805 | 
 806 |         super(PredictionLayer, self).__init__()
 807 |         self.use_bias = use_bias
 808 |         self.task = task
 809 |         if self.use_bias:
 810 |             self.bias = nn.Parameter(torch.zeros((1,)))
 811 | 
 812 |     def forward(self, X):
 813 |         output = X
 814 |         if self.use_bias:
 815 |             output += self.bias
 816 |         if self.task == "binary":
 817 |             output = torch.sigmoid(output)
 818 |         return output
 819 | 
 820 | 
 821 | class Conv2dSame(nn.Conv2d):
 822 |     """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
 823 |     """
 824 | 
 825 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
 826 |                  padding=0, dilation=1, groups=1, bias=True):
 827 |         super(Conv2dSame, self).__init__(
 828 |             in_channels, out_channels, kernel_size, stride, 0, dilation,
 829 |             groups, bias)
 830 |         nn.init.xavier_uniform_(self.weight)
 831 | 
 832 |     def forward(self, x):
 833 |         ih, iw = x.size()[-2:]
 834 |         kh, kw = self.weight.size()[-2:]
 835 |         oh = math.ceil(ih / self.stride[0])
 836 |         ow = math.ceil(iw / self.stride[1])
 837 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 838 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
 839 |         if pad_h > 0 or pad_w > 0:
 840 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
 841 |         out = F.conv2d(x, self.weight, self.bias, self.stride,
 842 |                        self.padding, self.dilation, self.groups)
 843 |         return out
 844 | 
 845 | 
 846 | class SparseFeat(namedtuple('SparseFeat', ['name', 'dimension', 'use_hash', 'dtype', 'embedding_name', 'embedding'])):
 847 |     __slots__ = ()
 848 | 
 849 |     def __new__(cls, name, dimension, use_hash=False, dtype="int32", embedding_name=None, embedding=True):
 850 |         if embedding and embedding_name is None:
 851 |             embedding_name = name
 852 |         return super(SparseFeat, cls).__new__(cls, name, dimension, use_hash, dtype, embedding_name, embedding)
 853 | 
 854 | 
 855 | class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype'])):
 856 |     __slots__ = ()
 857 | 
 858 |     def __new__(cls, name, dimension=1, dtype="float32"):
 859 |         return super(DenseFeat, cls).__new__(cls, name, dimension, dtype)
 860 | 
 861 | 
 862 | class VarLenSparseFeat(namedtuple('VarLenFeat',
 863 |                                   ['name', 'dimension', 'maxlen', 'combiner', 'use_hash', 'dtype', 'embedding_name',
 864 |                                    'embedding'])):
 865 |     __slots__ = ()
 866 | 
 867 |     def __new__(cls, name, dimension, maxlen, combiner="mean", use_hash=False, dtype="float32", embedding_name=None,
 868 |                 embedding=True):
 869 |         if embedding_name is None:
 870 |             embedding_name = name
 871 |         return super(VarLenSparseFeat, cls).__new__(cls, name, dimension, maxlen, combiner, use_hash, dtype,
 872 |                                                     embedding_name, embedding)
 873 | 
 874 | 
 875 | class TextFeat(namedtuple('TextFeat',
 876 |                           ['name', 'dimension', 'maxlen', 'use_hash', 'dtype', 'embedding_name',
 877 |                            'embedding'])):
 878 |     """
 879 |     文本特征
 880 |     """
 881 |     __slots__ = ()
 882 | 
 883 |     def __new__(cls, name, dimension, maxlen, use_hash=False, dtype="int32", embedding_name=None,
 884 |                 embedding=True):
 885 |         if embedding_name is None:
 886 |             embedding_name = name
 887 |         return super(TextFeat, cls).__new__(cls, name, dimension, maxlen, use_hash, dtype,
 888 |                                             embedding_name, embedding)
 889 | 
 890 | 
 891 | def get_feature_names(feature_columns):
 892 |     features = build_input_features(feature_columns)
 893 |     return list(features.keys())
 894 | 
 895 | 
 896 | def get_inputs_list(inputs):
 897 |     return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))
 898 | 
 899 | 
 900 | def build_input_features(feature_columns):
 901 |     features = OrderedDict()
 902 | 
 903 |     start = 0
 904 |     for feat in feature_columns:
 905 |         feat_name = feat.name
 906 |         if feat_name in features:
 907 |             continue
 908 |         if isinstance(feat, SparseFeat):
 909 |             features[feat_name] = (start, start + 1)
 910 |             start += 1
 911 |         elif isinstance(feat, DenseFeat):
 912 |             features[feat_name] = (start, start + feat.dimension)
 913 |             start += feat.dimension
 914 |         elif isinstance(feat, VarLenSparseFeat):
 915 |             features[feat_name] = (start, start + feat.maxlen)
 916 |             start += feat.maxlen
 917 |         elif isinstance(feat, TextFeat):
 918 |             features[feat_name] = (start, start + feat.maxlen)
 919 |             start += feat.maxlen
 920 |         else:
 921 |             raise TypeError("Invalid feature column type,got", type(feat))
 922 |     return features
 923 | 
 924 | 
 925 | def get_dense_input(features, feature_columns):
 926 |     dense_feature_columns = list(filter(lambda x: isinstance(
 927 |         x, DenseFeat), feature_columns)) if feature_columns else []
 928 |     dense_input_list = []
 929 |     for fc in dense_feature_columns:
 930 |         dense_input_list.append(features[fc.name])
 931 |     return dense_input_list
 932 | 
 933 | 
 934 | def combined_dnn_input(sparse_embedding_list, dense_value_list):
 935 |     if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
 936 |         sparse_dnn_input = torch.flatten(
 937 |             torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
 938 |         dense_dnn_input = torch.flatten(
 939 |             torch.cat(dense_value_list, dim=-1), start_dim=1)
 940 |         return concat_fun([sparse_dnn_input, dense_dnn_input])
 941 |     elif len(sparse_embedding_list) > 0:
 942 |         return torch.flatten(torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
 943 |     elif len(dense_value_list) > 0:
 944 |         return torch.flatten(torch.cat(dense_value_list, dim=-1), start_dim=1)
 945 |     else:
 946 |         raise NotImplementedError
 947 | 
 948 | 
 949 | class Linear(nn.Module):
 950 |     def __init__(self, feature_columns, feature_index, init_std=0.0001, device='cpu'):
 951 |         super(Linear, self).__init__()
 952 |         self.feature_index = feature_index
 953 | 
 954 |         self.sparse_feature_columns = list(
 955 |             filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
 956 |         self.dense_feature_columns = list(
 957 |             filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []
 958 | 
 959 |         self.embedding_dict = self.create_embedding_matrix(self.sparse_feature_columns, 1, init_std, sparse=False).to(
 960 |             device)
 961 | 
 962 |         #         nn.ModuleDict(
 963 |         #             {feat.embedding_name: nn.Embedding(feat.dimension, 1, sparse=True) for feat in
 964 |         #              self.sparse_feature_columns}
 965 |         #         )
 966 |         # .to("cuda:1")
 967 |         for tensor in self.embedding_dict.values():
 968 |             nn.init.normal_(tensor.weight, mean=0, std=init_std)
 969 | 
 970 |         if len(self.dense_feature_columns) > 0:
 971 |             self.weight = nn.Parameter(torch.Tensor(sum(fc.dimension for fc in self.dense_feature_columns), 1)).to(
 972 |                 device)
 973 |             torch.nn.init.normal_(self.weight, mean=0, std=init_std)
 974 | 
 975 |     def forward(self, X):
 976 | 
 977 |         sparse_embedding_list = [self.embedding_dict[feat.embedding_name](
 978 |             X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
 979 |             feat in self.sparse_feature_columns]
 980 | 
 981 |         dense_value_list = [X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]] for feat in
 982 |                             self.dense_feature_columns]
 983 | 
 984 |         if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
 985 |             linear_sparse_logit = torch.sum(
 986 |                 torch.cat(sparse_embedding_list, dim=-1), dim=-1, keepdim=False)
 987 |             linear_dense_logit = torch.cat(
 988 |                 dense_value_list, dim=-1).matmul(self.weight)
 989 |             linear_logit = linear_sparse_logit + linear_dense_logit
 990 |         elif len(sparse_embedding_list) > 0:
 991 |             linear_logit = torch.sum(
 992 |                 torch.cat(sparse_embedding_list, dim=-1), dim=-1, keepdim=False)
 993 |         elif len(dense_value_list) > 0:
 994 |             linear_logit = torch.cat(
 995 |                 dense_value_list, dim=-1).matmul(self.weight)
 996 |         else:
 997 |             linear_logit = torch.zeros([X.shape[0], 1])
 998 |         return linear_logit
 999 | 
1000 |     def create_embedding_matrix(self, feature_columns, embedding_size, init_std=0.0001, sparse=False):
1001 | 
1002 |         sparse_feature_columns = list(
1003 |             filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
1004 | 
1005 |         embedding_dict = nn.ModuleDict(
1006 |             {feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=sparse) for feat in
1007 |              sparse_feature_columns}
1008 |         )
1009 |         for tensor in embedding_dict.values():
1010 |             nn.init.normal_(tensor.weight, mean=0, std=init_std)
1011 | 
1012 |         return embedding_dict
1013 | 
1014 | class BaseModel(nn.Module):
1015 | 
1016 |     def __init__(self,
1017 |                  linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128),
1018 |                  l2_reg_linear=1e-5,
1019 |                  l2_reg_embedding=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu',
1020 |                  task='binary', device='cpu'):
1021 |         """
1022 | 
1023 |         :param linear_feature_columns:  一阶线性模型（wi * xi 之和）所用的特征
1024 |         :param dnn_feature_columns:     深度模型所用的特征
1025 |         :param embedding_size:
1026 |         :param dnn_hidden_units:
1027 |         :param l2_reg_linear:
1028 |         :param l2_reg_embedding:
1029 |         :param l2_reg_dnn:
1030 |         :param init_std:
1031 |         :param seed:
1032 |         :param dnn_dropout:
1033 |         :param dnn_activation:
1034 |         :param task:
1035 |         :param device:
1036 |         """
1037 | 
1038 |         super(BaseModel, self).__init__()
1039 | 
1040 |         # 记录模型的参数  用于输出结果
1041 |         self.params = {"embedding_size": embedding_size, "dnn_hidden_units": dnn_hidden_units, "dnn_dropout": dnn_dropout, "dnn_activation": dnn_activation}
1042 | 
1043 |         self.reg_loss = torch.zeros((1,), device=device)
1044 |         self.device = device  # device
1045 | 
1046 |         self.feature_index = build_input_features(
1047 |             linear_feature_columns + dnn_feature_columns)
1048 |         self.dnn_feature_columns = dnn_feature_columns
1049 | 
1050 |         self.embedding_dict = self.create_embedding_matrix(dnn_feature_columns, embedding_size, init_std,
1051 |                                                            sparse=False).to(device)
1052 |         #         nn.ModuleDict(
1053 |         #             {feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=True) for feat in
1054 |         #              self.dnn_feature_columns}
1055 |         #         )
1056 | 
1057 |         self.linear_model = Linear(
1058 |             linear_feature_columns, self.feature_index, device=device)
1059 | 
1060 |         self.add_regularization_loss(
1061 |             self.embedding_dict.parameters(), l2_reg_embedding)
1062 |         self.add_regularization_loss(
1063 |             self.linear_model.parameters(), l2_reg_linear)
1064 | 
1065 |         self.out = PredictionLayer(task, )
1066 |         self.to(device)
1067 | 
1068 |     def fit(self, x=None,
1069 |             y=None,
1070 |             batch_size=None,
1071 |             epochs=1,
1072 |             verbose=1,
1073 |             initial_epoch=0,
1074 |             validation_split=0.,
1075 |             validation_data=None,
1076 |             shuffle=True, draw_pictures=False, save_results=True, model_cache_path=None, early_stop=5):
1077 |         """
1078 | 
1079 |         :param x: Numpy array of training data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs).If input layers in the model are named, you can also pass a
1080 |             dictionary mapping input names to Numpy arrays.
1081 |         :param y: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs).
1082 |         :param batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 256.
1083 |         :param epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached.
1084 |         :param verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
1085 |         :param initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run).
1086 |         :param validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling.
1087 |         :param validation_data: tuple `(x_val, y_val)` or tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`.
1088 |         :param shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch.
1089 |         :param draw_pictures: 是否绘制指标图表
1090 |         :param save_results: 是否记录结果到文件
1091 |         :param model_cache_path 模型暂存路径
1092 |         :param early_stop: 早停策略 当auc下降了early_stop次之后停止训练
1093 |         """
1094 |         if isinstance(x,dict):
1095 |             # feature_index是有序字典，所以保证了x的索引和feature_index一致
1096 |             x = [x[feature] for feature in self.feature_index]  # list of series  len(x) = feature_size
1097 | 
1098 |         """确定验证集开始。。。。。。"""
1099 |         if validation_data:
1100 |             if len(validation_data) == 2:
1101 |                 val_x, val_y = validation_data
1102 |                 val_sample_weight = None
1103 |             elif len(validation_data) == 3:
1104 |                 val_x, val_y, val_sample_weight = validation_data  # pylint: disable=unpacking-non-sequence
1105 |             else:
1106 |                 raise ValueError(
1107 |                     'When passing a `validation_data` argument, '
1108 |                     'it must contain either 2 items (x_val, y_val), '
1109 |                     'or 3 items (x_val, y_val, val_sample_weights), '
1110 |                     'or alternatively it could be a dataset or a '
1111 |                     'dataset or a dataset iterator. '
1112 |                     'However we received `validation_data=%s`' % validation_data)
1113 |             if isinstance(val_x, dict):
1114 |                 val_x = [val_x[feature] for feature in self.feature_index]
1115 | 
1116 |         elif validation_split and 0. < validation_split < 1.:
1117 |             if hasattr(x[0], 'shape'):
1118 |                 split_at = int(x[0].shape[0] * (1. - validation_split))
1119 |             else:
1120 |                 split_at = int(len(x[0]) * (1. - validation_split))
1121 |             x, val_x = (slice_arrays(x, 0, split_at),
1122 |                         slice_arrays(x, split_at))
1123 |             y, val_y = (slice_arrays(y, 0, split_at),
1124 |                         slice_arrays(y, split_at))
1125 | 
1126 |         else:
1127 |             val_x = []
1128 |             val_y = []
1129 | 
1130 |         """确定验证集完成。。。。。。"""
1131 | 
1132 |         """生成训练集迭代器开始。。。。。。"""
1133 |         for i in range(len(x)):
1134 |             if len(x[i].shape) == 1:
1135 |                 x[i] = np.expand_dims(x[i], axis=1)
1136 |         train_tensor_data = Data.TensorDataset(
1137 |             torch.from_numpy(
1138 |                 np.concatenate(x, axis=-1)),
1139 |             torch.from_numpy(y))
1140 |         if batch_size is None:
1141 |             batch_size = 256
1142 |         train_loader = DataLoader(
1143 |             dataset=train_tensor_data, shuffle=shuffle, batch_size=batch_size)
1144 |         """生成训练集迭代器结束。。。。。。"""
1145 | 
1146 |         print(self.device, end="\n")
1147 |         model = self.train()
1148 |         loss_func = self.loss_func
1149 |         optim = self.optim
1150 | 
1151 |         sample_num = len(train_tensor_data)
1152 |         steps_per_epoch = (sample_num - 1) // batch_size + 1
1153 | 
1154 |         print("Train on {0} samples, validate on {1} samples, {2} steps per epoch".format(
1155 |             len(train_tensor_data), len(val_y),steps_per_epoch))
1156 | 
1157 |         score_history = {}  # 记录迭代过程的评价指标
1158 |         for name, metric_fun in self.metrics.items():
1159 |             score_history[name] = []
1160 | 
1161 |         """训练开始。。。。。。"""
1162 |         best_auc = -1   # 记录最好的auc
1163 |         descend_count = -1  # 下降的次数
1164 |         for epoch in range(initial_epoch, epochs):
1165 |             start_time = time.time()
1166 |             loss_epoch = 0
1167 |             total_loss_epoch = 0
1168 |             # if abs(loss_last - loss_now) < 0.0
1169 |             train_result = {}
1170 |             try:
1171 |                 with tqdm(enumerate(train_loader), disable=verbose != 1) as t:
1172 |                     for index, (x_train, y_train) in t:
1173 |                         x = x_train.to(self.device).float()
1174 |                         y = y_train.to(self.device).float()
1175 |                         # print(x.size())   (batch_size, feature_size)
1176 |                         # print(x)
1177 |                         y_pred = model(x).squeeze()
1178 | 
1179 |                         optim.zero_grad()
1180 |                         loss = loss_func(y_pred, y.squeeze(), reduction='sum')
1181 | 
1182 |                         total_loss = loss + self.reg_loss
1183 | 
1184 |                         loss_epoch += loss.item()
1185 |                         total_loss_epoch += total_loss.item()
1186 |                         total_loss.backward(retain_graph=True)
1187 |                         optim.step()
1188 | 
1189 |                         if verbose > 0:
1190 |                             for name, metric_fun in self.metrics.items():
1191 |                                 if name not in train_result:
1192 |                                     train_result[name] = []
1193 |                                 train_result[name].append(metric_fun(
1194 |                                     y.cpu().data.numpy(), y_pred.cpu().data.numpy()))
1195 |             except KeyboardInterrupt:
1196 |                 t.close()
1197 |                 raise
1198 |             t.close()
1199 | 
1200 |             epoch_time = int(time.time() - start_time)
1201 |             if verbose > 0:
1202 |                 print('Epoch {0}/{1}'.format(epoch + 1, epochs))
1203 | 
1204 |                 eval_str = "{0}s - loss: {1: .4f}".format(
1205 |                     epoch_time, total_loss_epoch / sample_num)
1206 | 
1207 |                 for name, result in train_result.items():
1208 |                     eval_str += " - " + name + \
1209 |                         ": {0: .4f}".format(np.sum(result) / steps_per_epoch)
1210 | 
1211 |                 if len(val_x) and len(val_y):
1212 |                     eval_result = self.evaluate(val_x, val_y, batch_size)
1213 | 
1214 |                     if best_auc < eval_result['auc']:
1215 |                         best_auc = eval_result['auc']
1216 |                         torch.save(model.state_dict(), model_cache_path)
1217 |                     else:
1218 |                         descend_count += 1   # 没有上升
1219 | 
1220 |                     for name, result in eval_result.items():
1221 |                         score_history[name].append(result)
1222 |                         eval_str += " - val_" + name + \
1223 |                             ": {0: .4f}".format(result)
1224 |                 print(eval_str)
1225 |             if descend_count == early_stop:
1226 |                 break
1227 |         """训练结束。。。。。。"""
1228 | 
1229 |         if model_cache_path is not None:
1230 |             model.load_state_dict(torch.load(model_cache_path))
1231 | 
1232 |         if draw_pictures:
1233 |             for name, scores in score_history.items():
1234 |                 plt.plot(range(1, len(scores) + 1), scores)
1235 |                 plt.xlabel('epoch')
1236 |                 plt.title(name)
1237 |                 plt.show()
1238 | 
1239 |         if save_results:
1240 |             """最后一个数是最佳性能"""
1241 |             for name, scores in score_history.items():
1242 |                 if name in ['binary_crossentropy', 'mse', 'logloss']:
1243 |                     score_history[name].append(min(score_history[name]))
1244 |                 else:
1245 |                     score_history[name].append(max(score_history[name]))
1246 | 
1247 |             file = open("E:\\competition\\看山杯\\result.txt", 'a+', encoding='utf-8')
1248 |             file.write("\n")
1249 |             file.write("################################################################\n")
1250 |             file.write(str(self.params) + "\n")
1251 |             for name, scores in score_history.items():
1252 |                 file.write(str(name) + ": " + str(scores) + "\n")
1253 |             file.close()
1254 | 
1255 |     def evaluate(self, x, y, batch_size=256):
1256 |         """
1257 | 
1258 |         :param x: Numpy array of test data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs).
1259 |         :param y: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs).
1260 |         :param batch_size:
1261 |         :return: Integer or `None`. Number of samples per evaluation step. If unspecified, `batch_size` will default to 256.
1262 |         """
1263 |         pred_ans = self.predict(x, batch_size)
1264 |         eval_result = {}
1265 |         for name, metric_fun in self.metrics.items():
1266 |             eval_result[name] = metric_fun(y, pred_ans)
1267 |         return eval_result
1268 | 
1269 |     def predict(self, x, batch_size=256):
1270 |         """
1271 |         模型预测函数
1272 |         :param x: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs).
1273 |         :param batch_size: Integer. If unspecified, it will default to 256.
1274 |         :return: Numpy array(s) of predictions.
1275 |         """
1276 |         model = self.eval()
1277 |         if isinstance(x, dict):
1278 |             x = [x[feature] for feature in self.feature_index]
1279 |         for i in range(len(x)):
1280 |             if len(x[i].shape) == 1:
1281 |                 x[i] = np.expand_dims(x[i], axis=1)
1282 | 
1283 |         tensor_data = Data.TensorDataset(
1284 |             torch.from_numpy(np.concatenate(x, axis=-1)))
1285 |         test_loader = DataLoader(
1286 |             dataset=tensor_data, shuffle=False, batch_size=batch_size)
1287 | 
1288 |         pred_ans = []
1289 |         with torch.no_grad():
1290 |             for index, x_test in enumerate(test_loader):
1291 |                 x = x_test[0].to(self.device).float()
1292 |                 # y = y_test.to(self.device).float()
1293 | 
1294 |                 y_pred = model(x).cpu().data.numpy()  # .squeeze()
1295 |                 pred_ans.append(y_pred)
1296 |         return np.concatenate(pred_ans)
1297 | 
1298 |     def text_input_from_feature_columns(self, X, feature_columns):
1299 |         """
1300 |         取出文本特征的值
1301 |         :param X:
1302 |         :param feature_columns:
1303 |         :param embedding_dict:
1304 |         :param support_dense:
1305 |         :return:
1306 |         """
1307 |         text_feature_columns = list(
1308 |             filter(lambda x: isinstance(x, TextFeat), feature_columns)) if feature_columns else []
1309 |         text_feature_list = [X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long() for feat in text_feature_columns]
1310 |         return text_feature_list
1311 | 
1312 |     def input_from_feature_columns(self, X, feature_columns, embedding_dict, support_dense=True):
1313 |         """
1314 |         取出特征嵌入向量（除文本特征）
1315 |         :param X:
1316 |         :param feature_columns:
1317 |         :param embedding_dict:
1318 |         :param support_dense:
1319 |         :return:
1320 |         """
1321 | 
1322 |         sparse_feature_columns = list(
1323 |             filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
1324 |         dense_feature_columns = list(
1325 |             filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []
1326 | 
1327 |         varlen_sparse_feature_columns = list(
1328 |             filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []
1329 | 
1330 |         if not support_dense and len(dense_feature_columns) > 0:
1331 |             raise ValueError(
1332 |                 "DenseFeat is not supported in dnn_feature_columns")
1333 | 
1334 |         sparse_embedding_list = [embedding_dict[feat.embedding_name](
1335 |             X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
1336 |             feat in sparse_feature_columns]
1337 | 
1338 |         varlen_sparse_embedding_list = [embedding_dict[feat.embedding_name](
1339 |             X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
1340 |             feat in varlen_sparse_feature_columns]
1341 |         varlen_sparse_embedding_list = list(
1342 |             map(lambda x: x.unsqueeze(dim=1), varlen_sparse_embedding_list))
1343 | 
1344 |         dense_value_list = [X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]] for feat in
1345 |                             dense_feature_columns]
1346 | 
1347 |         return sparse_embedding_list + varlen_sparse_embedding_list, dense_value_list
1348 | 
1349 |     def create_embedding_matrix(self, feature_columns, embedding_size, init_std=0.0001, sparse=False):
1350 |         """ 为单值类别特征、多值类别特征、文本特征创建相应的embedding矩阵 """
1351 |         sparse_feature_columns = list(
1352 |             filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
1353 | 
1354 |         varlen_sparse_feature_columns = list(
1355 |             filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if len(feature_columns) else []
1356 | 
1357 |         embedding_dict = nn.ModuleDict(
1358 |             {feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=sparse) for feat in
1359 |              sparse_feature_columns}
1360 |         )
1361 | 
1362 |         for feat in varlen_sparse_feature_columns:
1363 |             embedding_dict[feat.embedding_name] = nn.EmbeddingBag(
1364 |                 feat.dimension, embedding_size, sparse=sparse, mode=feat.combiner)
1365 | 
1366 |         for tensor in embedding_dict.values():
1367 |             nn.init.normal_(tensor.weight, mean=0, std=init_std)
1368 | 
1369 |         return embedding_dict
1370 | 
1371 |     def compute_input_dim(self, feature_columns, embedding_size=1, include_sparse=True, include_dense=True, feature_group=False):
1372 |         sparse_feature_columns = list(
1373 |             filter(lambda x: isinstance(x, (SparseFeat, VarLenSparseFeat)), feature_columns)) if len(feature_columns) else []
1374 |         dense_feature_columns = list(
1375 |             filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []
1376 | 
1377 |         dense_input_dim = sum(
1378 |             map(lambda x: x.dimension, dense_feature_columns))
1379 |         if feature_group:
1380 |             sparse_input_dim = len(sparse_feature_columns)
1381 |         else:
1382 |             sparse_input_dim = len(sparse_feature_columns) * embedding_size
1383 |         input_dim = 0
1384 |         if include_sparse:
1385 |             input_dim += sparse_input_dim
1386 |         if include_dense:
1387 |             input_dim += dense_input_dim
1388 |         return input_dim
1389 | 
1390 |     def add_regularization_loss(self, weight_list, weight_decay, p=2):
1391 |         reg_loss = torch.zeros((1,), device=self.device)
1392 |         for w in weight_list:
1393 |             if isinstance(w, tuple):
1394 |                 l2_reg = torch.norm(w[1], p=p, )
1395 |             else:
1396 |                 l2_reg = torch.norm(w, p=p, )
1397 |             reg_loss = reg_loss + l2_reg
1398 |         reg_loss = weight_decay * reg_loss
1399 |         self.reg_loss += reg_loss
1400 | 
1401 |     def compile(self, optimizer,
1402 |                 loss=None,
1403 |                 metrics=None,
1404 |                 ):
1405 |         """
1406 |         配置模型的优化器、损失函数、评价函数
1407 |         :param optimizer: String (name of optimizer) or optimizer instance. See [optimizers](https://pytorch.org/docs/stable/optim.html).
1408 |         :param loss: String (name of objective function) or objective function. See [losses](https://pytorch.org/docs/stable/nn.functional.html#loss-functions).
1409 |         :param metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`.
1410 |         """
1411 | 
1412 |         self.optim = self._get_optim(optimizer)
1413 |         self.loss_func = self._get_loss_func(loss)
1414 |         self.metrics = self._get_metrics(metrics)
1415 | 
1416 |     def _get_optim(self, optimizer):
1417 |         if isinstance(optimizer, str):
1418 |             self.params['optimizer'] = optimizer
1419 |             if optimizer == "sgd":
1420 |                 optim = torch.optim.SGD(self.parameters(), lr=0.01)
1421 |             elif optimizer == "adam":
1422 |                 optim = torch.optim.Adam(self.parameters())  # 0.001
1423 |             elif optimizer == "adagrad":
1424 |                 optim = torch.optim.Adagrad(self.parameters())  # 0.01
1425 |             elif optimizer == "rmsprop":
1426 |                 optim = torch.optim.RMSprop(self.parameters())
1427 |             else:
1428 |                 raise NotImplementedError
1429 |         else:
1430 |             optim = optimizer
1431 |         return optim
1432 | 
1433 |     def _get_loss_func(self, loss):
1434 |         if isinstance(loss, str):
1435 |             self.params['loss'] = loss
1436 |             if loss == "binary_crossentropy":
1437 |                 loss_func = F.binary_cross_entropy
1438 |             elif loss == "mse":
1439 |                 loss_func = F.mse_loss
1440 |             elif loss == "mae":
1441 |                 loss_func = F.l1_loss
1442 |             else:
1443 |                 raise NotImplementedError
1444 |         else:
1445 |             loss_func = loss
1446 |         return loss_func
1447 | 
1448 |     def _get_metrics(self, metrics):
1449 |         metrics_ = {}
1450 |         if metrics:
1451 |             for metric in metrics:
1452 |                 if metric == "binary_crossentropy" or metric == "logloss":
1453 |                     metrics_[metric] = log_loss
1454 |                 if metric == "auc":
1455 |                     metrics_[metric] = roc_auc_score
1456 |                 if metric == "mse":
1457 |                     metrics_[metric] = mean_squared_error
1458 |                 if metric == "accuracy" or metric == "acc":
1459 |                     metrics_[metric] = lambda y_true, y_pred: accuracy_score(
1460 |                         y_true, np.where(y_pred > 0.5, 1, 0))
1461 |         return metrics_
1462 | 
1463 | """++++++++++++++++++++++++ 运行环境 deepctr 结束+++++++++++++++++++++"""
1464 | 
1465 | class TextCNN(nn.Module):
1466 |     """
1467 |     TextCNN : 用于提取文本的特征
1468 |     """
1469 | 
1470 |     def __init__(self, filter_sizes=[1, 2, 3], num_filters=128, embed_dim=8, vocab_size=1000, dropout=0.3,
1471 |                  embedding_matrix=None):
1472 |         """
1473 | 
1474 |         :param filter_sizes: 卷积核尺寸
1475 |         :param num_filters:  卷积核数量
1476 |         :param embed_dim: 词向量维度
1477 |         :param vocab_size: 词汇表个数 + 1
1478 |         :param embedding_matrix: 词向量矩阵 用于初始化
1479 |         :param dropout: dropout比例
1480 |         """
1481 |         super(TextCNN, self).__init__()
1482 |         self.filter_sizes = filter_sizes
1483 |         self.num_filters = num_filters
1484 |         self.embed_dim = embed_dim
1485 |         self.dropout = dropout
1486 |         if embedding_matrix is not None:
1487 |             self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix))
1488 |         else:
1489 |             self.embedding = nn.Embedding(vocab_size, embed_dim)
1490 |         self.convs = nn.ModuleList([nn.Conv2d(1, self.num_filters, (k, self.embed_dim)) for k in self.filter_sizes])
1491 |         self.dropout = nn.Dropout(self.dropout)
1492 | 
1493 |     def conv_and_pool(self, x, conv):
1494 |         x = F.relu(conv(x)).squeeze(3)
1495 |         x = F.max_pool1d(x, x.size(2)).squeeze(2)
1496 |         return x
1497 | 
1498 |     def forward(self, x):
1499 |         """
1500 |         :param x:  (batch_size, maxlen, embed_dim)
1501 |         :return:
1502 |         """
1503 |         x = self.embedding(x)
1504 |         x = x.unsqueeze(1)
1505 |         x = torch.cat([self.conv_and_pool(x, conv) for conv in self.convs], 1)
1506 |         x = self.dropout(x)
1507 |         return x
1508 | 
1509 | 
1510 | """++++++++++++++++++++++++ 模型类 +++++++++++++++++++++"""
1511 | 
1512 | class DeepFM(BaseModel):
1513 |     """Instantiates the DeepFM Network architecture.
1514 | 
1515 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
1516 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
1517 |     :param embedding_size: positive integer,sparse feature embedding_size
1518 |     :param use_fm: bool,use FM part or not
1519 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
1520 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
1521 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
1522 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
1523 |     :param init_std: float,to use as the initialize std of embedding vector
1524 |     :param seed: integer ,to use as random seed.
1525 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
1526 |     :param dnn_activation: Activation function to use in DNN
1527 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
1528 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
1529 |     :param device: str, ``"cpu"`` or ``"cuda:0"``
1530 |     :return: A PyTorch model instance.
1531 | 
1532 |     """
1533 | 
1534 |     def __init__(self,
1535 |                  linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True,
1536 |                  dnn_hidden_units=(256, 128),
1537 |                  l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024,
1538 |                  dnn_dropout=0,
1539 |                  dnn_activation=F.relu, dnn_use_bn=False, task='binary', device='cpu'):
1540 | 
1541 |         super(DeepFM, self).__init__(linear_feature_columns, dnn_feature_columns, embedding_size=embedding_size,
1542 |                                      dnn_hidden_units=dnn_hidden_units,
1543 |                                      l2_reg_linear=l2_reg_linear,
1544 |                                      l2_reg_embedding=l2_reg_embedding, l2_reg_dnn=l2_reg_dnn, init_std=init_std,
1545 |                                      seed=seed,
1546 |                                      dnn_dropout=dnn_dropout, dnn_activation=dnn_activation,
1547 |                                      task=task, device=device)
1548 | 
1549 |         self.params['model'] = 'DeepFM'
1550 |         self.use_fm = use_fm
1551 |         self.use_dnn = len(dnn_feature_columns) > 0 and len(
1552 |             dnn_hidden_units) > 0
1553 |         if use_fm:
1554 |             self.fm = FM()
1555 | 
1556 |         if self.use_dnn:
1557 |             self.dnn = DNN(self.compute_input_dim(dnn_feature_columns, embedding_size), dnn_hidden_units,
1558 |                            activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
1559 |                            init_std=init_std, device=device)
1560 |             self.dnn_linear = nn.Linear(
1561 |                 dnn_hidden_units[-1], 1, bias=False).to(device)
1562 |             self.add_regularization_loss(
1563 |                 filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
1564 |             self.add_regularization_loss(self.dnn_linear.weight, l2_reg_dnn)
1565 |         self.to(device)
1566 | 
1567 |     def forward(self, X):
1568 |         sparse_embedding_list, dense_value_list = self.input_from_feature_columns(X, self.dnn_feature_columns,
1569 |                                                                                   self.embedding_dict)
1570 |         logit = self.linear_model(X)  # 计算FM中的线性部分  (batch_size, 1)
1571 | 
1572 |         if self.use_fm and len(sparse_embedding_list) > 0:
1573 |             fm_input = torch.cat(sparse_embedding_list, dim=1)
1574 |             fm_out = self.fm(fm_input)  # (batch_size, 1)
1575 |             logit += fm_out  # 提取二阶特征交互
1576 | 
1577 |         if self.use_dnn:
1578 |             dnn_input = combined_dnn_input(
1579 |                 sparse_embedding_list, dense_value_list)
1580 | 
1581 |             dnn_output = self.dnn(dnn_input)
1582 |             dnn_logit = self.dnn_linear(dnn_output)  # (batch_size, 1)
1583 | 
1584 |             logit += dnn_logit
1585 | 
1586 |         y_pred = self.out(logit)
1587 | 
1588 |         return y_pred
1589 | 
1590 | """
1591 | Author:
1592 |     Liu Chen,liuchen@hdu.edu.cn
1593 | 
1594 | TextDeepFM: 在DeepFM模型基础上增加文本特征，并利用TextCNN模型作为文本编码器
1595 | """
1596 | 
1597 | 
1598 | class TextDeepFM(BaseModel):
1599 |     """Instantiates the TextDeepFM Network architecture.
1600 | 
1601 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
1602 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
1603 |     :param embedding_size: positive integer,sparse feature embedding_size
1604 |     :param use_fm: bool,use FM part or not
1605 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
1606 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
1607 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
1608 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
1609 |     :param init_std: float,to use as the initialize std of embedding vector
1610 |     :param seed: integer ,to use as random seed.
1611 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
1612 |     :param dnn_activation: Activation function to use in DNN
1613 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
1614 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
1615 |     :param device: str, ``"cpu"`` or ``"cuda:0"``
1616 |     :param use_textcnn: 是否使用TextCNN作为文本编码器
1617 |     :param filter_sizes: 卷积核尺寸
1618 |     :param num_filters: 卷积核个数
1619 |     :param cnn_dropout: 卷积层输出的dropout比例
1620 |     :param text_embedding_size: 文本的词向量维度
1621 |     :param text_embedding: 文本的词向量矩阵  如果不为None,则用这个矩阵初始化
1622 |     :param text_vocab_size: 多少个词汇
1623 |     :return: A PyTorch model instance.
1624 | 
1625 |     """
1626 | 
1627 |     def __init__(self,
1628 |                  linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True,
1629 |                  dnn_hidden_units=(256, 128),
1630 |                  l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024,
1631 |                  dnn_dropout=0,
1632 |                  dnn_activation=F.relu, dnn_use_bn=False, task='binary', device='cpu', use_textcnn=True,
1633 |                  filter_sizes=[1, 2, 3], num_filters=128, text_vocab_size=1000, text_embedding_size=8,
1634 |                  text_embedding_matrix=None, cnn_dropout=0.3):
1635 | 
1636 |         super(TextDeepFM, self).__init__(linear_feature_columns, dnn_feature_columns, embedding_size=embedding_size,
1637 |                                          dnn_hidden_units=dnn_hidden_units,
1638 |                                          l2_reg_linear=l2_reg_linear,
1639 |                                          l2_reg_embedding=l2_reg_embedding, l2_reg_dnn=l2_reg_dnn, init_std=init_std,
1640 |                                          seed=seed,
1641 |                                          dnn_dropout=dnn_dropout, dnn_activation=dnn_activation,
1642 |                                          task=task, device=device)
1643 | 
1644 |         self.params['model'] = 'TextDeepFM'
1645 |         self.params['filter_sizes'] = filter_sizes
1646 |         self.params['num_filters'] = num_filters
1647 |         self.params['cnn_dropout'] = cnn_dropout
1648 |         if text_embedding_matrix is None:
1649 |             self.params['text_embedding_matrix'] = 'No'  # 没事使用预训练的矩阵
1650 |         else:
1651 |             self.params['text_embedding_matrix'] = 'Yes'
1652 | 
1653 |         self.use_fm = use_fm
1654 |         self.use_textcnn = use_textcnn
1655 |         self.use_dnn = len(dnn_feature_columns) > 0 and len(
1656 |             dnn_hidden_units) > 0
1657 |         if use_fm:
1658 |             self.fm = FM()
1659 | 
1660 |         if use_textcnn:
1661 |             self.text_cnn = TextCNN(filter_sizes, num_filters, text_embedding_size, text_vocab_size, cnn_dropout,
1662 |                                     text_embedding_matrix)
1663 | 
1664 |         if self.use_dnn:
1665 |             if self.use_textcnn:
1666 |                 self.dnn = DNN(
1667 |                     self.compute_input_dim(dnn_feature_columns, embedding_size) + len(filter_sizes) * num_filters * 2,
1668 |                     dnn_hidden_units,
1669 |                     activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
1670 |                     init_std=init_std, device=device)
1671 |             else:
1672 |                 self.dnn = DNN(
1673 |                     self.compute_input_dim(dnn_feature_columns, embedding_size),
1674 |                     dnn_hidden_units,
1675 |                     activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
1676 |                     init_std=init_std, device=device)
1677 | 
1678 |             self.dnn_linear = nn.Linear(
1679 |                 dnn_hidden_units[-1], 1, bias=False).to(device)
1680 | 
1681 |             self.add_regularization_loss(
1682 |                 filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
1683 |             self.add_regularization_loss(self.dnn_linear.weight, l2_reg_dnn)
1684 |         self.to(device)
1685 | 
1686 |     def forward(self, X):
1687 |         """
1688 | 
1689 |         :param X: (batch_size, feature_size)
1690 |         :return:
1691 |         """
1692 |         sparse_embedding_list, dense_value_list = self.input_from_feature_columns(X, self.dnn_feature_columns,
1693 |                                                                                   self.embedding_dict)
1694 |         # print(sparse_embedding_list[0].shape)   # (batch_size, 1, embedding_dim)
1695 |         # print(len(sparse_embedding_list))  # sparse_feature_size + varlen_feature_size
1696 | 
1697 |         logit = self.linear_model(X)  # 计算FM中的线性部分  (batch_size, 1)
1698 | 
1699 |         if self.use_fm and len(sparse_embedding_list) > 0:
1700 |             fm_input = torch.cat(sparse_embedding_list, dim=1)
1701 |             fm_out = self.fm(fm_input)  # (batch_size, 1)
1702 |             logit += fm_out  # 提取二阶特征交互
1703 | 
1704 |         if self.use_dnn:
1705 | 
1706 |             if self.use_textcnn:
1707 |                 text_feature_list = self.text_input_from_feature_columns(X, self.dnn_feature_columns)  # 取出文本特征的值
1708 |                 # print(len(text_feature_list))
1709 |                 # print(text_feature_list[0].size())
1710 |                 text_cnn_out = []
1711 |                 for t in text_feature_list:
1712 |                     text_cnn_out.append(self.text_cnn(t).unsqueeze(1))
1713 |                 # text_cnn_out = self.text_cnn(text_feature_list[0])
1714 |                 # print(text_cnn_out.size())  # (batch_size, len(filter_sizes) * num_filters)
1715 |                 # text_cnn_out = text_cnn_out.unsqueeze(1)
1716 |                 sparse_embedding_list = sparse_embedding_list + text_cnn_out
1717 | 
1718 |             dnn_input = combined_dnn_input(
1719 |                 sparse_embedding_list, dense_value_list)
1720 | 
1721 |             dnn_output = self.dnn(dnn_input)
1722 |             dnn_logit = self.dnn_linear(dnn_output)  # (batch_size, 1)
1723 | 
1724 |             logit += dnn_logit
1725 | 
1726 |         y_pred = self.out(logit)
1727 | 
1728 |         return y_pred
1729 | 


--------------------------------------------------------------------------------