├── 0.RecBasic.ipynb ├── 0.机器学习集成模型.ipynb ├── 1.Evaluation.ipynb ├── 2.1 SimpleRecModels.ipynb ├── 2.2 DNNRecModels.ipynb ├── 2.3 SeqRecModels.ipynb ├── 2.4 MultiInterest.ipynb ├── 2.5 Multitask.ipynb ├── README.md ├── 推荐系统之召回:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf ├── 推荐系统之排序:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf └── 搜广推之特征工程:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf /2.1 SimpleRecModels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# 推荐模型\n", 13 | "# 协同过滤:矩阵分解、自编码器\n", 14 | "# 内容过滤:因子分解机\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "2526 9555 (2533,) (9565,)\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "# 加载数据集1\n", 31 | "import pandas as pd\n", 32 | "from sklearn.model_selection import train_test_split\n", 33 | "# from scipy.sparse import spmatrix\n", 34 | "\n", 35 | "# 加载 数据集 movie_ratings.csv\n", 36 | "movie_ratings = pd.read_csv('./data/movie_ratings.csv', header=None)\n", 37 | "movie_ratings.columns = ['user_id', 'item_id', 'rating']\n", 38 | "user_ids = movie_ratings.value_counts(subset=['user_id'])\n", 39 | "item_ids = movie_ratings.value_counts(subset=['item_id'])\n", 40 | "movie_ratings['rating'] = (movie_ratings['rating'] - 0) / (movie_ratings['rating'].max() - 0)\n", 41 | "X_train, X_test, y_train, y_test = train_test_split(movie_ratings.values[:,:2], movie_ratings.values[:,2], test_size=0.4, random_state=0)\n", 42 | "print(len(user_ids), len(item_ids), user_ids.keys().max(), item_ids.keys().max())\n", 43 | "movie_ratings.head()\n", 44 | "num_users = user_ids.keys().max()[0] + 1 \n", 45 | "num_items = item_ids.keys().max()[0] + 1\n" 46 | ], 47 | "metadata": { 48 | "collapsed": false, 49 | "ExecuteTime": { 50 | "end_time": "2023-09-01T03:28:26.725770200Z", 51 | "start_time": "2023-09-01T03:28:21.622711600Z" 52 | } 53 | }, 54 | "id": "b7bfe1118340f831" 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 1, 59 | "outputs": [], 60 | "source": [ 61 | "# 加载数据集2\n", 62 | "# 数据集:ml-100k\n", 63 | "\n", 64 | "import os\n", 65 | "import numpy as np\n", 66 | "from sklearn.preprocessing import MinMaxScaler\n", 67 | "\n", 68 | "# 加载数据\n", 69 | "ratings = np.array([[int(x) for x in line.strip().split('\\t')[:3]] for line in open('./data/ml-100k/ua.base','r').read().strip().split('\\n')], dtype=np.int32)\n", 70 | "ratings[:,-1] = (ratings[:,-1] - 0)/(max(ratings[:,-1]) - 0)\n", 71 | "occupation_dict = {'administrator':0, 'artist':1, 'doctor':2, 'educator':3, 'engineer':4, 'entertainment':5, 'executive':6, 'healthcare':7, 'homemaker':8, 'lawyer':9, 'librarian':10, 'marketing':11, 'none':12, 'other':13, 'programmer':14, 'retired':15, 'salesman':16, 'scientist':17, 'student':18, 'technician':19, 'writer':20}\n", 72 | "gender_dict={'M':1,'F':0}\n", 73 | "user_info = {}\n", 74 | "for line in open('./data/ml-100k/u.user','r', encoding='utf-8').read().strip().split('\\n'):\n", 75 | " phs = line.strip().split('|')\n", 76 | " user_info[int(phs[0])] = [int(phs[1]), gender_dict[phs[2]], occupation_dict[phs[3]]]\n", 77 | "item_info = {}\n", 78 | "for line in open('./data/ml-100k/u.item','r', encoding='ISO-8859-1').read().strip().split('\\n'):\n", 79 | " phs = line.strip().split('|')\n", 80 | " item_info[int(phs[0])] = phs[5:]\n", 81 | "data = np.array([user_info[u] + item_info[i] + [r] for u, i, r in ratings], dtype=np.float32)\n", 82 | "data[:,:-1] = MinMaxScaler().fit_transform(data[:,:-1])\n", 83 | "# print(len(user_info[list(user_info.keys())[0]]), len(item_info[list(item_info.keys())[0]]))\n", 84 | "# print(data.shape)\n", 85 | "num_users = len(user_info)\n", 86 | "num_items = len(item_info)\n", 87 | "num_features = 22" 88 | ], 89 | "metadata": { 90 | "collapsed": false, 91 | "ExecuteTime": { 92 | "end_time": "2023-09-01T01:28:05.460110100Z", 93 | "start_time": "2023-09-01T01:28:01.749225400Z" 94 | } 95 | }, 96 | "id": "dfb3f41e47bc2cf2" 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "[2023-08-31 21:40:09] epoch=[1/10], train_mse_loss: 0.0918, validate_mse_loss: 0.0343\n", 107 | "[2023-08-31 21:40:56] epoch=[2/10], train_mse_loss: 0.0347, validate_mse_loss: 0.0336\n", 108 | "[2023-08-31 21:41:43] epoch=[3/10], train_mse_loss: 0.0322, validate_mse_loss: 0.0322\n", 109 | "[2023-08-31 21:42:31] epoch=[4/10], train_mse_loss: 0.0314, validate_mse_loss: 0.0314\n", 110 | "[2023-08-31 21:43:20] epoch=[5/10], train_mse_loss: 0.0309, validate_mse_loss: 0.0310\n", 111 | "[2023-08-31 21:44:08] epoch=[6/10], train_mse_loss: 0.0306, validate_mse_loss: 0.0310\n", 112 | "[2023-08-31 21:44:55] epoch=[7/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0309\n", 113 | "[2023-08-31 21:45:43] epoch=[8/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0308\n", 114 | "[2023-08-31 21:46:31] epoch=[9/10], train_mse_loss: 0.0304, validate_mse_loss: 0.0309\n", 115 | "[2023-08-31 21:47:19] epoch=[10/10], train_mse_loss: 0.0305, validate_mse_loss: 0.0309\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "# CF评分预测\n", 121 | "# 1.1 mf矩阵分解模型: R = P Q.T\n", 122 | "# 数据集:movie_ratings.csv\n", 123 | "\n", 124 | "import torch\n", 125 | "from torch.nn import Module, Parameter, MSELoss, Embedding\n", 126 | "from torch.utils.data import TensorDataset, DataLoader\n", 127 | "from datetime import datetime\n", 128 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 129 | "batch_size = 100\n", 130 | "num_epochs = 10\n", 131 | "dim=99\n", 132 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 133 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 134 | "\n", 135 | "# 矩阵分解\n", 136 | "class MatrixFactorization(Module):\n", 137 | " def __init__(self, num_users, num_items, dim):\n", 138 | " super(MatrixFactorization, self).__init__()\n", 139 | " self.num_users, self.num_items, self.dim = num_users, num_items, dim\n", 140 | " self.user_embeddings = Embedding(num_users, dim)\n", 141 | " self.item_embeddings = Embedding(num_items, dim)\n", 142 | " def forward(self, user_item_ids):\n", 143 | " user_embeddings = self.user_embeddings(user_item_ids[:,0])\n", 144 | " item_embeddings = self.item_embeddings(user_item_ids[:,1])\n", 145 | " result = torch.sigmoid(torch.sum(user_embeddings * item_embeddings, dim=-1))\n", 146 | " return result.squeeze()\n", 147 | "model = MatrixFactorization(num_users=num_users, num_items=num_items, dim=dim).to(device)\n", 148 | "\n", 149 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)\n", 150 | "criterion = MSELoss(reduction='sum').to(device)\n", 151 | "\n", 152 | "for epoch in range(num_epochs):\n", 153 | " # train:\n", 154 | " epoch_train_losses = []\n", 155 | " model.train()\n", 156 | " for i, inputs in enumerate(train_loader):\n", 157 | " optimizer.zero_grad()\n", 158 | " input = inputs[0].to(device)\n", 159 | " label = inputs[1].to(device)\n", 160 | " output = model(input)\n", 161 | " loss = criterion(output, label)\n", 162 | " loss.backward()\n", 163 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 164 | " optimizer.step()\n", 165 | " epoch_train_losses.append([input.shape[0], loss.item()])\n", 166 | " # validate:\n", 167 | " model.eval()\n", 168 | " epoch_test_losses = []\n", 169 | " for i, inputs in enumerate(test_loader):\n", 170 | " input = inputs[0].to(device)\n", 171 | " label = inputs[1].to(device)\n", 172 | " output = model(input)\n", 173 | " loss = criterion(output, label)\n", 174 | " epoch_test_losses.append([input.shape[0], loss.item()])\n", 175 | " train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])\n", 176 | " test_loss = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])\n", 177 | " # print\n", 178 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))" 179 | ], 180 | "metadata": { 181 | "collapsed": false, 182 | "ExecuteTime": { 183 | "end_time": "2023-08-31T13:47:19.482846Z", 184 | "start_time": "2023-08-31T13:39:20.110027Z" 185 | } 186 | }, 187 | "id": "17e31ee1f00442ae" 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 4, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "[2023-09-01 11:30:42] epoch=[1/10], train_mse_loss: 0.0602, validate_mse_loss: 0.0276\n", 198 | "[2023-09-01 11:31:45] epoch=[2/10], train_mse_loss: 0.0284, validate_mse_loss: 0.0290\n", 199 | "[2023-09-01 11:32:47] epoch=[3/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0289\n", 200 | "[2023-09-01 11:33:50] epoch=[4/10], train_mse_loss: 0.0286, validate_mse_loss: 0.0289\n", 201 | "[2023-09-01 11:34:53] epoch=[5/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0290\n", 202 | "[2023-09-01 11:35:54] epoch=[6/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0288\n", 203 | "[2023-09-01 11:36:57] epoch=[7/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0288\n", 204 | "[2023-09-01 11:37:59] epoch=[8/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0291\n", 205 | "[2023-09-01 11:39:00] epoch=[9/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0289\n", 206 | "[2023-09-01 11:40:01] epoch=[10/10], train_mse_loss: 0.0286, validate_mse_loss: 0.0290\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "# CF评分预测\n", 212 | "# 1.2 SVD矩阵分解模型: R = P Q.T + user_bias + item_bias + bias\n", 213 | "# 数据集:movie_ratings.csv\n", 214 | "\n", 215 | "import torch\n", 216 | "from torch.nn import Module, Parameter, MSELoss, Embedding\n", 217 | "from torch.utils.data import TensorDataset, DataLoader\n", 218 | "from datetime import datetime\n", 219 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 220 | "batch_size = 100\n", 221 | "num_epochs = 10\n", 222 | "dim=99\n", 223 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 224 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 225 | "\n", 226 | "# 矩阵分解\n", 227 | "class SVD(Module):\n", 228 | " def __init__(self, num_users, num_items, dim):\n", 229 | " super(SVD, self).__init__()\n", 230 | " self.num_users, self.num_items, self.dim = num_users, num_items, dim\n", 231 | " self.user_embeddings = Embedding(num_users, dim)\n", 232 | " self.item_embeddings = Embedding(num_items, dim)\n", 233 | " self.bias = Parameter(torch.randn((1,1)), requires_grad=True)\n", 234 | " self.user_bias = Parameter(torch.randn((num_users,1)), requires_grad=True)\n", 235 | " self.item_bias = Parameter(torch.randn((num_items,1)), requires_grad=True)\n", 236 | " def forward(self, user_item_ids):\n", 237 | " user_embeddings = self.user_embeddings(user_item_ids[:,0])\n", 238 | " item_embeddings = self.item_embeddings(user_item_ids[:,1])\n", 239 | " result = torch.sigmoid(torch.sum(user_embeddings * item_embeddings, dim=-1, keepdim=True) + self.user_bias[user_item_ids[:,0]] + self.item_bias[user_item_ids[:,1]] + self.bias)\n", 240 | " return result.squeeze()\n", 241 | "model = SVD(num_users=num_users, num_items=num_items, dim=dim).to(device)\n", 242 | "\n", 243 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)\n", 244 | "criterion = MSELoss(reduction='sum').to(device)\n", 245 | "\n", 246 | "for epoch in range(num_epochs):\n", 247 | " # train:\n", 248 | " epoch_train_losses = []\n", 249 | " model.train()\n", 250 | " for i, inputs in enumerate(train_loader):\n", 251 | " optimizer.zero_grad()\n", 252 | " input = inputs[0].to(device)\n", 253 | " label = inputs[1].to(device)\n", 254 | " output = model(input)\n", 255 | " loss = criterion(output, label)\n", 256 | " loss.backward()\n", 257 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 258 | " optimizer.step()\n", 259 | " epoch_train_losses.append([input.shape[0], loss.item()])\n", 260 | " # validate:\n", 261 | " model.eval()\n", 262 | " epoch_test_losses = []\n", 263 | " for i, inputs in enumerate(test_loader):\n", 264 | " input = inputs[0].to(device)\n", 265 | " label = inputs[1].to(device)\n", 266 | " output = model(input)\n", 267 | " loss = criterion(output, label)\n", 268 | " epoch_test_losses.append([input.shape[0], loss.item()])\n", 269 | " train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])\n", 270 | " test_loss = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])\n", 271 | " # print\n", 272 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))" 273 | ], 274 | "metadata": { 275 | "collapsed": false, 276 | "ExecuteTime": { 277 | "end_time": "2023-09-01T03:40:01.319976900Z", 278 | "start_time": "2023-09-01T03:29:40.222561500Z" 279 | } 280 | }, 281 | "id": "c011420406aa913b" 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 4, 286 | "outputs": [ 287 | { 288 | "name": "stdout", 289 | "output_type": "stream", 290 | "text": [ 291 | "[2023-08-31 22:41:17] epoch=[1/10], train_mse_loss: 0.0380, validate_mse_loss: 0.0300\n", 292 | "[2023-08-31 22:41:18] epoch=[2/10], train_mse_loss: 0.0287, validate_mse_loss: 0.0286\n", 293 | "[2023-08-31 22:41:18] epoch=[3/10], train_mse_loss: 0.0275, validate_mse_loss: 0.0283\n", 294 | "[2023-08-31 22:41:19] epoch=[4/10], train_mse_loss: 0.0261, validate_mse_loss: 0.0282\n", 295 | "[2023-08-31 22:41:19] epoch=[5/10], train_mse_loss: 0.0245, validate_mse_loss: 0.0280\n", 296 | "[2023-08-31 22:41:20] epoch=[6/10], train_mse_loss: 0.0228, validate_mse_loss: 0.0282\n", 297 | "[2023-08-31 22:41:21] epoch=[7/10], train_mse_loss: 0.0213, validate_mse_loss: 0.0284\n", 298 | "[2023-08-31 22:41:21] epoch=[8/10], train_mse_loss: 0.0198, validate_mse_loss: 0.0289\n", 299 | "[2023-08-31 22:41:22] epoch=[9/10], train_mse_loss: 0.0185, validate_mse_loss: 0.0290\n", 300 | "[2023-08-31 22:41:22] epoch=[10/10], train_mse_loss: 0.0172, validate_mse_loss: 0.0290\n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "# CF评分预测\n", 306 | "# 2.自编码器: R -> z -> R_\n", 307 | "# 数据集:movie_ratings.csv\n", 308 | "\n", 309 | "import torch, numpy as np\n", 310 | "from torch.nn import Module, Parameter, MSELoss\n", 311 | "from torch.utils.data import TensorDataset, DataLoader\n", 312 | "from torch import nn\n", 313 | "from datetime import datetime\n", 314 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 315 | "batch_size = 100\n", 316 | "num_epochs = 10\n", 317 | "dim=99\n", 318 | "# train:\n", 319 | "train = np.zeros((num_users, num_items), dtype=np.float32)\n", 320 | "for f, l in zip(X_train, y_train):\n", 321 | " train[int(f[0]), int(f[1])] = l\n", 322 | "# with mask\n", 323 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(train).float(), torch.from_numpy(train>0.01).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 324 | "# test:\n", 325 | "test = np.zeros((num_users, num_items), dtype=np.float32)\n", 326 | "for f, l in zip(X_test, y_test):\n", 327 | " test[int(f[0]), int(f[1])] = l\n", 328 | "# with mask\n", 329 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(test).float(), torch.from_numpy(test>0.01).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 330 | "\n", 331 | "\n", 332 | "# 自编码器\n", 333 | "class AutoEncoder(Module):\n", 334 | " def __init__(self, num_users, num_items, dim):\n", 335 | " super(AutoEncoder,self).__init__()\n", 336 | " self.num_users = num_users\n", 337 | " self.num_items = num_items\n", 338 | " self.dim = dim\n", 339 | " self.encoder = nn.Sequential(nn.Linear(num_items, dim), nn.Sigmoid())\n", 340 | " self.decoder = nn.Sequential(nn.Linear(dim, num_items))\n", 341 | " def forward(self, torch_input):\n", 342 | " encoder = self.encoder(torch_input)\n", 343 | " decoder = self.decoder(encoder)\n", 344 | " return torch.sigmoid(decoder).squeeze()\n", 345 | "model = AutoEncoder(num_users=num_users, num_items=num_items, dim=dim).to(device)\n", 346 | "\n", 347 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)\n", 348 | "criterion = MSELoss(reduction='none').to(device)\n", 349 | "\n", 350 | "for epoch in range(num_epochs):\n", 351 | " # train:\n", 352 | " epoch_train_losses = []\n", 353 | " model.train()\n", 354 | " for i, inputs in enumerate(train_loader):\n", 355 | " optimizer.zero_grad()\n", 356 | " input = inputs[0].to(device)\n", 357 | " mask = inputs[1].to(device)\n", 358 | " if mask.sum().item()==0:\n", 359 | " continue\n", 360 | " output = model(input)\n", 361 | " loss = torch.sum(criterion(output, input) * mask)\n", 362 | " loss.backward()\n", 363 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 364 | " optimizer.step()\n", 365 | " epoch_train_losses.append([mask.sum().item(), loss.item()])\n", 366 | " # validate:\n", 367 | " model.eval()\n", 368 | " epoch_test_losses = []\n", 369 | " for i, inputs in enumerate(test_loader):\n", 370 | " optimizer.zero_grad()\n", 371 | " input = inputs[0].to(device)\n", 372 | " mask = inputs[1].to(device)\n", 373 | " if mask.sum().item()==0:\n", 374 | " continue\n", 375 | " output = model(input)\n", 376 | " loss = torch.sum(criterion(output, input) * mask)\n", 377 | " epoch_test_losses.append([mask.sum().item(), loss.item()])\n", 378 | " train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])\n", 379 | " test_loss = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])\n", 380 | " # print\n", 381 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))" 382 | ], 383 | "metadata": { 384 | "collapsed": false, 385 | "ExecuteTime": { 386 | "end_time": "2023-08-31T14:41:22.825921Z", 387 | "start_time": "2023-08-31T14:41:15.448138Z" 388 | } 389 | }, 390 | "id": "e9fb83f10079f209" 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 9, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "[2023-09-01 09:33:29] epoch=[1/10], train_mse_loss: 0.1662, validate_mse_loss: 0.1674\n", 401 | "[2023-09-01 09:33:32] epoch=[2/10], train_mse_loss: 0.1659, validate_mse_loss: 0.1670\n", 402 | "[2023-09-01 09:33:36] epoch=[3/10], train_mse_loss: 0.1660, validate_mse_loss: 0.1666\n", 403 | "[2023-09-01 09:33:43] epoch=[4/10], train_mse_loss: 0.1659, validate_mse_loss: 0.1668\n", 404 | "[2023-09-01 09:33:50] epoch=[5/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1667\n", 405 | "[2023-09-01 09:33:58] epoch=[6/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1671\n", 406 | "[2023-09-01 09:34:05] epoch=[7/10], train_mse_loss: 0.1658, validate_mse_loss: 0.1666\n", 407 | "[2023-09-01 09:34:12] epoch=[8/10], train_mse_loss: 0.1657, validate_mse_loss: 0.1667\n", 408 | "[2023-09-01 09:34:20] epoch=[9/10], train_mse_loss: 0.1656, validate_mse_loss: 0.1666\n", 409 | "[2023-09-01 09:34:27] epoch=[10/10], train_mse_loss: 0.1656, validate_mse_loss: 0.1666\n" 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "# CF评分预测\n", 415 | "# 3.神经协同过滤 NeuralCF: MF + MLP 独立\n", 416 | "# NeuralCF的实现,用于召回\n", 417 | "# WWW 2017] Neural Collaborative Filtering\n", 418 | "# 数据集:movie_ratings.csv\n", 419 | "\n", 420 | "import torch\n", 421 | "from torch import nn\n", 422 | "from torch.utils.data import TensorDataset, DataLoader\n", 423 | "from datetime import datetime\n", 424 | "from tqdm import tqdm\n", 425 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 426 | "batch_size = 100\n", 427 | "num_epochs = 10\n", 428 | "dim = 20\n", 429 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).long(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 430 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).long(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 431 | "\n", 432 | "class NeuralMF(nn.Module):\n", 433 | " def __init__(self, num_users, num_items, mf_dim, mlp_dim, dnn_layer_dims):\n", 434 | " super(NeuralMF, self).__init__()\n", 435 | " self.MF_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=mf_dim)\n", 436 | " self.MF_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=mf_dim)\n", 437 | " self.MLP_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=mlp_dim)\n", 438 | " self.MLP_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=mlp_dim)\n", 439 | " # 全连接网络\n", 440 | " self.dnn_network = nn.Sequential(nn.Linear(2 * mlp_dim, dnn_layer_dims[0]))\n", 441 | " if len(dnn_layer_dims)>1:\n", 442 | " for i, layer_dim in enumerate(dnn_layer_dims[1:]):\n", 443 | " self.dnn_network.append(nn.Linear(dnn_layer_dims[i], layer_dim))\n", 444 | " self.dnn_network.append(nn.ReLU())\n", 445 | " self.dnn_network.append(nn.Linear(dnn_layer_dims[-1], mf_dim))\n", 446 | " # 合并\n", 447 | " self.final_nn = nn.Sequential(nn.Linear(2 * mf_dim, 1), nn.Sigmoid())\n", 448 | " def forward(self, inputs):\n", 449 | " # mf\n", 450 | " mf_vec = self.MF_Embedding_User(inputs[:, 0]) * self.MF_Embedding_Item(inputs[:, 1])\n", 451 | " # mlp\n", 452 | " mlp_vec = torch.cat([self.MLP_Embedding_User(inputs[:, 0]), self.MLP_Embedding_Item(inputs[:, 1])], dim=-1)\n", 453 | " mlp_vec = self.dnn_network(mlp_vec)\n", 454 | " # 合并两个\n", 455 | " result = self.final_nn(torch.cat([mf_vec, mlp_vec], dim=-1))\n", 456 | " return result.squeeze()\n", 457 | "model = NeuralMF(num_users=num_users, num_items=num_items, mf_dim=dim, mlp_dim=dim, dnn_layer_dims=[30]).to(device)\n", 458 | "\n", 459 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)\n", 460 | "criterion = nn.MSELoss(reduction='sum').to(device)\n", 461 | "\n", 462 | "for epoch in range(num_epochs):\n", 463 | " # train:\n", 464 | " epoch_train_losses = []\n", 465 | " model.train()\n", 466 | " for i, inputs in enumerate(train_loader):\n", 467 | " optimizer.zero_grad()\n", 468 | " input = inputs[0].to(device)\n", 469 | " label = inputs[1].to(device)\n", 470 | " output = model(input)\n", 471 | " loss = criterion(output, label)\n", 472 | " loss.backward()\n", 473 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 474 | " optimizer.step()\n", 475 | " epoch_train_losses.append([input.shape[0], loss.item()])\n", 476 | " # validate:\n", 477 | " model.eval()\n", 478 | " epoch_test_losses = []\n", 479 | " for i, inputs in enumerate(test_loader):\n", 480 | " input = inputs[0].to(device)\n", 481 | " label = inputs[1].to(device)\n", 482 | " output = model(input)\n", 483 | " loss = criterion(output, label)\n", 484 | " epoch_test_losses.append([input.shape[0], loss.item()])\n", 485 | " train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])\n", 486 | " test_loss = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])\n", 487 | " # print\n", 488 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))\n" 489 | ], 490 | "metadata": { 491 | "collapsed": false, 492 | "ExecuteTime": { 493 | "end_time": "2023-09-01T01:34:27.412299600Z", 494 | "start_time": "2023-09-01T01:33:26.680881800Z" 495 | } 496 | }, 497 | "id": "f8994228ae5117d7" 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 5, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": [ 507 | "[2023-09-01 09:29:47] epoch=[1/10], train_mse_loss: 0.2159, validate_mse_loss: 0.2086\n", 508 | "[2023-09-01 09:29:49] epoch=[2/10], train_mse_loss: 0.2061, validate_mse_loss: 0.2053\n", 509 | "[2023-09-01 09:29:50] epoch=[3/10], train_mse_loss: 0.1970, validate_mse_loss: 0.1976\n", 510 | "[2023-09-01 09:29:52] epoch=[4/10], train_mse_loss: 0.1865, validate_mse_loss: 0.1806\n", 511 | "[2023-09-01 09:29:54] epoch=[5/10], train_mse_loss: 0.1746, validate_mse_loss: 0.1720\n", 512 | "[2023-09-01 09:29:56] epoch=[6/10], train_mse_loss: 0.1675, validate_mse_loss: 0.1660\n", 513 | "[2023-09-01 09:29:57] epoch=[7/10], train_mse_loss: 0.1644, validate_mse_loss: 0.1670\n", 514 | "[2023-09-01 09:29:59] epoch=[8/10], train_mse_loss: 0.1627, validate_mse_loss: 0.1650\n", 515 | "[2023-09-01 09:30:01] epoch=[9/10], train_mse_loss: 0.1619, validate_mse_loss: 0.1622\n", 516 | "[2023-09-01 09:30:03] epoch=[10/10], train_mse_loss: 0.1611, validate_mse_loss: 0.1631\n" 517 | ] 518 | } 519 | ], 520 | "source": [ 521 | "# 内容过滤:\n", 522 | "# FM 分解机:y = w0 + sum(w1 * f) + sum( * f_i f_j)\n", 523 | "# 数据集:ml-100k\n", 524 | "\n", 525 | "import os\n", 526 | "import numpy as np\n", 527 | "import torch\n", 528 | "import torch.nn as nn\n", 529 | "from torch.nn import Module, Parameter, MSELoss\n", 530 | "from torch.utils.data import Dataset, DataLoader, TensorDataset \n", 531 | "from sklearn.model_selection import train_test_split\n", 532 | "from datetime import datetime\n", 533 | "import warnings\n", 534 | "warnings.filterwarnings('ignore')\n", 535 | "\n", 536 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 537 | "batch_size = 100\n", 538 | "num_epochs = 10\n", 539 | "dim=10\n", 540 | "X_train, X_test, y_train, y_test = train_test_split(data[:,:-1], data[:,-1], test_size=0.4, random_state=0)\n", 541 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 542 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 543 | "\n", 544 | "# bit-wise cross\n", 545 | "class FactorizationMachine(Module):\n", 546 | " def __init__(self, num_features, dim):\n", 547 | " super(FactorizationMachine, self).__init__()\n", 548 | " self.num_features = num_features\n", 549 | " self.dim = dim\n", 550 | " self.w0 = Parameter(torch.randn((1,1)), requires_grad=True)\n", 551 | " self.w = Parameter(torch.randn((num_features, 1)), requires_grad=True)\n", 552 | " self.bw = Parameter(torch.randn((num_features, dim)), requires_grad=True)\n", 553 | " def forward(self, X: torch.Tensor):\n", 554 | " # 实现方式一:\n", 555 | " # tmp = torch.matmul(self.bw, self.bw.T)\n", 556 | " # tmp[np.tril_indices(self.num_features)] = 0\n", 557 | " # y = torch.sigmoid(self.w0 + torch.sum(torch.matmul(X, self.w), dim=-1, keepdim=True) + torch.sum(torch.sum(torch.bmm(X.unsqueeze(-1), X.unsqueeze(1)) * tmp.unsqueeze(0), dim=-1, keepdim=False), dim=-1, keepdim=True))\n", 558 | " # 实现方式二:和的平方 - 平方的和\n", 559 | " sum_square = torch.sum(self.bw.unsqueeze(0) * X.unsqueeze(-1).repeat(1,1,self.dim), dim=1).square()\n", 560 | " square_sum = torch.sum(self.bw.square().unsqueeze(0) * X.square().unsqueeze(-1).repeat(1,1,self.dim), dim=1)\n", 561 | " y = torch.sigmoid(self.w0 + torch.sum(torch.matmul(X, self.w), dim=-1, keepdim=True) + torch.sum(sum_square - square_sum, dim=-1, keepdim=True))\n", 562 | " return y.squeeze()\n", 563 | " def parameters(self, recurse: bool = True):\n", 564 | " return [self.w0, self.w, self.bw]\n", 565 | "model = FactorizationMachine(num_features=num_features, dim=dim).to(device)\n", 566 | "\n", 567 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=2e-4)\n", 568 | "criterion = MSELoss(reduction='sum').to(device)\n", 569 | "\n", 570 | "for epoch in range(num_epochs):\n", 571 | " # train:\n", 572 | " epoch_train_losses = []\n", 573 | " model.train()\n", 574 | " for i, inputs in enumerate(train_loader):\n", 575 | " optimizer.zero_grad()\n", 576 | " input = inputs[0].to(device)\n", 577 | " label = inputs[1].to(device)\n", 578 | " output = model(input)\n", 579 | " loss = criterion(output, label)\n", 580 | " loss.backward()\n", 581 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 582 | " optimizer.step()\n", 583 | " epoch_train_losses.append([input.shape[0], loss.item()])\n", 584 | " # validate:\n", 585 | " model.eval()\n", 586 | " epoch_test_losses = []\n", 587 | " for i, inputs in enumerate(test_loader):\n", 588 | " input = inputs[0].to(device)\n", 589 | " label = inputs[1].to(device)\n", 590 | " output = model(input)\n", 591 | " loss = criterion(output, label)\n", 592 | " epoch_test_losses.append([input.shape[0], loss.item()])\n", 593 | " train_loss = sum([x[1] for x in epoch_train_losses])/sum([x[0] for x in epoch_train_losses])\n", 594 | " test_loss = sum([x[1] for x in epoch_test_losses])/sum([x[0] for x in epoch_test_losses])\n", 595 | " # print\n", 596 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_loss: {:.4f}, validate_mse_loss: {:.4f}'.format(epoch+1, num_epochs, train_loss, test_loss))\n" 597 | ], 598 | "metadata": { 599 | "collapsed": false, 600 | "ExecuteTime": { 601 | "end_time": "2023-09-01T01:30:03.330966900Z", 602 | "start_time": "2023-09-01T01:29:45.910451Z" 603 | } 604 | }, 605 | "id": "f70af7981380aad6" 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "outputs": [], 611 | "source": [], 612 | "metadata": { 613 | "collapsed": false 614 | }, 615 | "id": "4ee312aa1a8fa7be" 616 | } 617 | ], 618 | "metadata": { 619 | "kernelspec": { 620 | "display_name": "Python 3", 621 | "language": "python", 622 | "name": "python3" 623 | }, 624 | "language_info": { 625 | "codemirror_mode": { 626 | "name": "ipython", 627 | "version": 2 628 | }, 629 | "file_extension": ".py", 630 | "mimetype": "text/x-python", 631 | "name": "python", 632 | "nbconvert_exporter": "python", 633 | "pygments_lexer": "ipython2", 634 | "version": "2.7.6" 635 | } 636 | }, 637 | "nbformat": 4, 638 | "nbformat_minor": 5 639 | } 640 | -------------------------------------------------------------------------------- /2.4 MultiInterest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# 多兴趣建模:大部分模型将⽤户的兴趣表达为⼀个 user embedding,然而单个embedding来表达⽤户的多种兴趣是很困难的。尤其是⻓期⾏为序列推荐的场景。\n", 13 | "# 可用于多兴趣召回。\n", 14 | "# MIND、ComirecSA、SINE、" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "446 1548\n", 26 | "(446, 81)\n", 27 | "[[ 397 303 260 306 312 744 257 285 338 270 682 862 328 1543\n", 28 | " 344 881 326 298 867 265 673 1491 301 337 353 261 300 1260\n", 29 | " 1238 302 325 334 331 351 347 1090 683 901 897 272 345 49\n", 30 | " 585 309 521 126 386 1282 1038 539 288 417 418 931 1444 804\n", 31 | " 164 933 941 1326 686 1001 316 324 1128 900 1091 1349 710 716\n", 32 | " 470 1499 225 98 1508 357 365 757 887 248 633]\n", 33 | " [ 344 773 365 397 445 48 374 62 431 981 231 780 384 109\n", 34 | " 39 775 1021 929 1022 393 93 399 89 386 569 715 66 1059\n", 35 | " 748 414 459 418 139 832 495 831 1413 413 398 396 77 784\n", 36 | " 717 786 50 142 768 259 1159 394 11 917 793 1306 1307 810\n", 37 | " 302 944 1328 564 53 293 141 212 778 645 825 839 328 332\n", 38 | " 1234 1363 342 1247 485 107 1134 1137 369 1277 254]]\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "import copy\n", 44 | "# 加载数据集2成序列数据集,评分[0,1,2]为负反馈,评分[3,4,5]为正反馈,只保留正样本,构造简单序列推荐数据集\n", 45 | "# 数据集:ml-100k\n", 46 | "\n", 47 | "import os, random\n", 48 | "import numpy as np\n", 49 | "import pandas as pd\n", 50 | "random.seed(100)\n", 51 | "\n", 52 | "# 加载数据: >=3分为正,用户评分次数不低于50,只保留最后50个,拆分为40: 5 + 15负例 (随机采样): 5 + 15负例 (随机采样)\n", 53 | "ratings = np.array([[int(x) for x in line.strip().split('\\t')[:4]] for line in open('./data/ml-100k/ua.base','r').read().strip().split('\\n')], dtype=np.int32)\n", 54 | "ratings_pd = pd.DataFrame({feature_name: list(feature_data) for feature_name, feature_data in zip(['user_id','item_id','rating','timestamp'], ratings.T)})\n", 55 | "pos_ratings_pd = ratings_pd[ratings_pd['rating']>2.9][['user_id','item_id','timestamp']].dropna().sort_values('timestamp') # 已经排序了\n", 56 | "pos_ratings_pd = pos_ratings_pd.groupby('user_id').filter(lambda x: x['user_id'].count()>=50)\n", 57 | "userid2id = {user_id: i for i, user_id in enumerate(sorted(list(set(pos_ratings_pd['user_id'].tolist()))))}\n", 58 | "itemid2id = {item_id: i for i, item_id in enumerate(sorted(list(set(pos_ratings_pd['item_id'].tolist()))))}\n", 59 | "print(len(userid2id), len(itemid2id))\n", 60 | "del ratings, ratings_pd\n", 61 | "\n", 62 | "# new id\n", 63 | "user_train_validate_test = {}\n", 64 | "for user,item,t in pos_ratings_pd.values:\n", 65 | " u, i = userid2id[user], itemid2id[item]\n", 66 | " if u not in user_train_validate_test:\n", 67 | " user_train_validate_test[u] = [i]\n", 68 | " else:\n", 69 | " user_train_validate_test[u].append(i)\n", 70 | " user_train_validate_test[u] = user_train_validate_test[u][-50:]\n", 71 | "train_seq_len = 40\n", 72 | "pos_num = 5\n", 73 | "neg_sample_num = 15\n", 74 | "def sample(low, high, notinset, num):\n", 75 | " nums = set([])\n", 76 | " n = num\n", 77 | " while n>0:\n", 78 | " id = random.randint(low, high)\n", 79 | " if id not in notinset and id not in nums:\n", 80 | " nums.add(id)\n", 81 | " n -= 1\n", 82 | " return list(nums)\n", 83 | "data = np.zeros((len(user_train_validate_test), 81), dtype=np.int32)\n", 84 | "i = 0\n", 85 | "for user, train_validate_test in user_train_validate_test.items():\n", 86 | " train, validate, test = train_validate_test[:train_seq_len], train_validate_test[-pos_num*2:-pos_num], train_validate_test[-pos_num:]\n", 87 | " data[i, 0] = user\n", 88 | " data[i,1:train_seq_len+1] = np.array(train)\n", 89 | " samples = sample(0, len(itemid2id)-1, set(train_validate_test), neg_sample_num * 2)\n", 90 | " data[i,1+train_seq_len : 1+train_seq_len+pos_num+neg_sample_num] = np.array(validate + samples[:neg_sample_num])\n", 91 | " data[i,1+train_seq_len+pos_num+neg_sample_num : ] = np.array(test + samples[neg_sample_num:])\n", 92 | " i += 1\n", 93 | "del user_train_validate_test\n", 94 | "print(data.shape)\n", 95 | "print(data[:2,:])\n", 96 | "\n", 97 | "# 继续加载info特征信息,内容特征\n", 98 | "occupation_dict = {'administrator':0, 'artist':1, 'doctor':2, 'educator':3, 'engineer':4, 'entertainment':5, 'executive':6, 'healthcare':7, 'homemaker':8, 'lawyer':9, 'librarian':10, 'marketing':11, 'none':12, 'other':13, 'programmer':14, 'retired':15, 'salesman':16, 'scientist':17, 'student':18, 'technician':19, 'writer':20}\n", 99 | "gender_dict={'M':1,'F':0}\n", 100 | "user_info = {}\n", 101 | "for line in open('./data/ml-100k/u.user','r', encoding='utf-8').read().strip().split('\\n'):\n", 102 | " phs = line.strip().split('|')\n", 103 | " if int(phs[0]) not in userid2id:\n", 104 | " continue\n", 105 | " uid = userid2id[int(phs[0])]\n", 106 | " user_info[uid] = [gender_dict[phs[2]], occupation_dict[phs[3]]] # int(phs[1]) 为了方便,不要连续型特征\n", 107 | "user_num_features = 2\n", 108 | "item_info = {}\n", 109 | "for line in open('./data/ml-100k/u.item','r', encoding='ISO-8859-1').read().strip().split('\\n'):\n", 110 | " phs = line.strip().split('|')\n", 111 | " if int(phs[0]) not in itemid2id:\n", 112 | " continue\n", 113 | " iid = itemid2id[int(phs[0])]\n", 114 | " item_info[iid] = phs[5:]\n", 115 | "item_num_features = 19\n", 116 | "num_users = len(user_info)\n", 117 | "num_items = len(item_info)\n", 118 | "num_features = 21" 119 | ], 120 | "metadata": { 121 | "collapsed": false, 122 | "ExecuteTime": { 123 | "end_time": "2023-09-07T07:33:25.004287Z", 124 | "start_time": "2023-09-07T07:33:22.016405700Z" 125 | } 126 | }, 127 | "id": "c5a7915fd42957cc" 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 19, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "[2023-09-05 11:39:33] epoch=[1/10], train_ce_loss: 0.7489, train_ndcg: 0.6662, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 138 | "[2023-09-05 11:39:38] epoch=[2/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 139 | "[2023-09-05 11:39:43] epoch=[3/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 140 | "[2023-09-05 11:39:48] epoch=[4/10], train_ce_loss: 0.7489, train_ndcg: 0.6640, validate_ce_loss: 0.7489, validate_ndcg: 0.6677\n", 141 | "[2023-09-05 11:39:53] epoch=[5/10], train_ce_loss: 0.7489, train_ndcg: 0.6688, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 142 | "[2023-09-05 11:39:58] epoch=[6/10], train_ce_loss: 0.7489, train_ndcg: 0.6682, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 143 | "[2023-09-05 11:40:04] epoch=[7/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 144 | "[2023-09-05 11:40:09] epoch=[8/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 145 | "[2023-09-05 11:40:15] epoch=[9/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n", 146 | "[2023-09-05 11:40:20] epoch=[10/10], train_ce_loss: 0.7489, train_ndcg: 0.6681, validate_ce_loss: 0.7489, validate_ndcg: 0.6681\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "# MIND: Multi-interest network with dynamic routing for recommendation at Tmall\n", 152 | "# 采⽤了hinton提出的胶囊⽹络(动态路由)作为多兴趣提取层(指定胶囊个数,可以类似于K-Means聚类来理解)\n", 153 | "# 我这里用了user_profile和item_profile,基于item_profile的行为序列,采用胶囊网络来建模行为序列的多兴趣偏好。\n", 154 | "# 胶囊网络迭代过程中增大top-1兴趣节点的激活,降低其他兴趣节点的激活,本质类似聚类switch expert。\n", 155 | "# 这种是hard选择路由,多个兴趣中选择得分最大的一个。\n", 156 | "# 多头注意力则是soft路由。\n", 157 | "# 数据集:ml-100k\n", 158 | "\n", 159 | "import torch\n", 160 | "from torch import nn\n", 161 | "from torch.nn import Module, CrossEntropyLoss, Sequential, Linear, Sigmoid\n", 162 | "from torch.utils.data import Dataset, DataLoader, TensorDataset \n", 163 | "from sklearn.model_selection import train_test_split\n", 164 | "from datetime import datetime\n", 165 | "import warnings\n", 166 | "warnings.filterwarnings('ignore')\n", 167 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 168 | "batch_size = 100\n", 169 | "num_epochs = 10\n", 170 | "dim=50\n", 171 | "\n", 172 | "user_feature_vals = {}\n", 173 | "for i in range(user_num_features):\n", 174 | " user_feature_vals[i] = sorted(list(set([val[i] for val in user_info.values()])))\n", 175 | " for user, info in user_info.items():\n", 176 | " user_info[user][i] = user_feature_vals[i].index(info[i])\n", 177 | "item_feature_vals = {}\n", 178 | "for i in range(item_num_features):\n", 179 | " item_feature_vals[i] = sorted(list(set([val[i] for val in item_info.values()])))\n", 180 | " for item, info in item_info.items():\n", 181 | " item_info[item][i] = item_feature_vals[i].index(info[i])\n", 182 | "\n", 183 | "user_profile_data = np.array([user_info[u] for u in data[:,0]]) # [data_len, ufeature]\n", 184 | "item_seq_profile_data = np.array([[item_info[item] for item in item_seq] for item_seq in data[:,1:]]) # [data_len, seq_len, ufeature]\n", 185 | "\n", 186 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(user_profile_data).long(), \n", 187 | " torch.from_numpy(item_seq_profile_data[:,:train_seq_len,:]).long(),\n", 188 | " torch.from_numpy(item_seq_profile_data[:,train_seq_len:(train_seq_len + pos_num + neg_sample_num),:]).long()\n", 189 | " ), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 190 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(user_profile_data).long(), \n", 191 | " torch.from_numpy(item_seq_profile_data[:,:train_seq_len,:]).long(), # 这里图简便,懒得改seq_len了\n", 192 | " torch.from_numpy(item_seq_profile_data[:,-(pos_num + neg_sample_num):,:]).long()\n", 193 | " ), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 194 | "\n", 195 | "# 胶囊网络结构\n", 196 | "class CapsuleNet(nn.Module):\n", 197 | " def __init__(self, hidden_dim: int, seq_len: int, num_interests: int = 4, routing_times: int = 3):\n", 198 | " super(CapsuleNet, self).__init__()\n", 199 | " self.hidden_dim, self.seq_len = hidden_dim, seq_len\n", 200 | " self.num_interests, self.routing_times = num_interests, routing_times\n", 201 | " self.to_interest_linear = nn.Linear(hidden_dim, hidden_dim * num_interests, bias=False)\n", 202 | " self.relu_linear = nn.Sequential(nn.Linear(hidden_dim, hidden_dim, bias=False), nn.ReLU())\n", 203 | " # [batch_len, seq_len, profile_embedding], [batch_len, seq_len]\n", 204 | " def forward(self, history_item_embeddings, mask):\n", 205 | " batch_len = history_item_embeddings.shape[0]\n", 206 | " # 计算u, [batch_len, interest_num, seq_len, hidden_dim]\n", 207 | " interest_item_embeddings = self.to_interest_linear(history_item_embeddings)\n", 208 | " interest_item_embeddings = interest_item_embeddings.reshape((batch_len, self.seq_len, self.num_interests, self.hidden_dim))\n", 209 | " interest_item_embeddings = interest_item_embeddings.permute((0,2,1,3))\n", 210 | " # 随机初始化胶囊权重b\n", 211 | " capsule_weight = torch.randn((batch_len, self.num_interests, self.seq_len), device=device, requires_grad=False)\n", 212 | " # 动态路由传播3次\n", 213 | " for i in range(self.routing_times):\n", 214 | " # mask,最后shape=[b, in, 1, s]\n", 215 | " atten_mask = torch.unsqueeze(mask, 1).repeat(1, self.num_interests, 1)\n", 216 | " paddings = torch.zeros_like(atten_mask, dtype=torch.float)\n", 217 | " # 计算c\n", 218 | " capsule_softmax_weight = torch.softmax(capsule_weight, dim=-1)\n", 219 | " capsule_softmax_weight = torch.where(torch.eq(atten_mask, 0), paddings, capsule_softmax_weight) # mask位置填充0\n", 220 | " capsule_softmax_weight = torch.unsqueeze(capsule_softmax_weight, 2)\n", 221 | " if i <= 1: # 前两次\n", 222 | " # 计算s\n", 223 | " # [batch_len, num_interests, 1, seq_len] [batch_len, num_interests, seq_len, hidden_dim] -> [batch_len, num_interests, 1, hidden_dim]\n", 224 | " interest_capsule = torch.matmul(capsule_softmax_weight, interest_item_embeddings)\n", 225 | " # 计算v = squash(s)\n", 226 | " cap_norm = torch.sum(interest_capsule.square(), -1, keepdim=True) # [batch_len, num_interests, 1, 1]\n", 227 | " scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9)\n", 228 | " interest_capsule = scalar_factor * interest_capsule # [batch_len, num_interests, 1, hidden_dim]\n", 229 | " # 计算b = b + u * v\n", 230 | " # [batch_len, interest_num, seq_len, hidden_dim], [batch_len, num_interests, hidden_dim, 1] -> [batch_len, interest_num, seq_len, 1]\n", 231 | " delta_weight = torch.matmul(interest_item_embeddings, interest_capsule.transpose(2, 3).contiguous())\n", 232 | " delta_weight = delta_weight.squeeze()\n", 233 | " # 更新时候,正益正,负益负(向量点积作用),迭代多次。\n", 234 | " capsule_weight = capsule_weight + delta_weight\n", 235 | " else:\n", 236 | " interest_capsule = torch.matmul(capsule_softmax_weight, interest_item_embeddings)\n", 237 | " cap_norm = torch.sum(torch.square(interest_capsule), -1, True)\n", 238 | " scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9)\n", 239 | " interest_capsule = scalar_factor * interest_capsule\n", 240 | " interest_capsule = self.relu_linear(interest_capsule.squeeze()) # [batch_len, num_interests, hidden_dim]\n", 241 | " return interest_capsule\n", 242 | " def parameters(self, recurse: bool = True):\n", 243 | " return [para for para in self.to_interest_linear.parameters()] + [para for para in self.relu_linear.parameters()]\n", 244 | "class MIND(nn.Module):\n", 245 | " def __init__(self, hidden_dim: int, seq_len: int, num_interests: int, routing_times: int, user_profile_feature: [tuple], item_profile_feature: [tuple], profile_feature_embedding_dim: int, dnn_layer_dims: list[int]):\n", 246 | " super(MIND, self).__init__()\n", 247 | " self.dnn_layer_dims, self.hidden_dim = dnn_layer_dims, hidden_dim\n", 248 | " self.num_interests = num_interests\n", 249 | " # 内容特征\n", 250 | " self.user_profile_feature, self.item_profile_feature, self.profile_feature_embedding_dim = user_profile_feature, item_profile_feature, profile_feature_embedding_dim\n", 251 | " self.user_profile_embed = nn.ModuleDict({'user_embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=profile_feature_embedding_dim) for i, valcount in user_profile_feature})\n", 252 | " self.item_profile_embed = nn.ModuleDict({'item_embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=profile_feature_embedding_dim) for i, valcount in item_profile_feature})\n", 253 | " self.user_profile_all_embed_dim = profile_feature_embedding_dim * len(user_profile_feature)\n", 254 | " self.item_profile_all_embed_dim = profile_feature_embedding_dim * len(item_profile_feature)\n", 255 | " # item pooling layer\n", 256 | " self.pooling_layer = nn.Sequential(nn.Linear(self.item_profile_all_embed_dim, hidden_dim), nn.ReLU())\n", 257 | " # capsule_net\n", 258 | " self.capsule_net = CapsuleNet(hidden_dim, seq_len, num_interests, routing_times)\n", 259 | " # final dnn\n", 260 | " self.all_embedding_dim = self.user_profile_all_embed_dim + hidden_dim\n", 261 | " self.final_dnn_network = nn.Sequential(nn.Linear(self.all_embedding_dim, dnn_layer_dims[0]), nn.ReLU())\n", 262 | " if len(dnn_layer_dims) > 1:\n", 263 | " for i, layer_dim in enumerate(dnn_layer_dims[1:]):\n", 264 | " self.final_dnn_network.append(nn.Linear(dnn_layer_dims[i], layer_dim))\n", 265 | " self.final_dnn_network.append(nn.ReLU())\n", 266 | " self.final_dnn_network.append(nn.Linear(dnn_layer_dims[-1], hidden_dim))\n", 267 | " self.final_dnn_network.append(nn.ReLU())\n", 268 | " def forward(self, user_profiles, item_history_list_profile, item_future_list_profile):\n", 269 | " batch_len = user_profiles.shape[0]\n", 270 | " # user profile: [batch, feature * embed_dim]\n", 271 | " user_profile_embeddings = torch.cat([self.user_profile_embed['user_embed_' + str(i)](user_profiles[:,i].long()) for i in range(user_profiles.shape[-1])], axis=-1)\n", 272 | " user_profile_embeddings = user_profile_embeddings.reshape((batch_len, len(self.user_profile_feature) * self.profile_feature_embedding_dim)) # [batch, feature, embed_dim]\n", 273 | " # item_history_list_profile: torch.Tensor([batch, seq_len, feature * embed_dim])\n", 274 | " seq_len = item_history_list_profile.shape[1]\n", 275 | " item_history_list_profile_embeddings = torch.cat([self.item_profile_embed['item_embed_' + str(i)](item_history_list_profile[:,:,i].long()) for i in range(item_history_list_profile.shape[-1])], axis=-1)\n", 276 | " item_history_list_profile_embeddings = item_history_list_profile_embeddings.reshape((batch_len, seq_len, len(self.item_profile_feature) * self.profile_feature_embedding_dim)) # [batch, seq_len, feature * embed_dim]\n", 277 | " # 以上处理user profile和行为历史,下面进行与candidate组合预测, item_future_list 和 item_future_list_profile\n", 278 | " seq_len_ = item_future_list_profile.shape[1]\n", 279 | " item_future_list_profile_embeddings = torch.cat([self.item_profile_embed['item_embed_' + str(i)](item_future_list_profile[:,:,i].long()) for i in range(item_future_list_profile.shape[-1])], axis=-1)\n", 280 | " item_future_list_profile_embeddings = item_future_list_profile_embeddings.reshape((batch_len, seq_len_, len(self.item_profile_feature) * self.profile_feature_embedding_dim)) # [batch, seq_len_, feature * embed_dim]\n", 281 | " # [batch_len, test_len, hidden_dim]\n", 282 | " # [batch, seq_len, embed_dim]\n", 283 | " item_history_pool = self.pooling_layer(item_history_list_profile_embeddings)\n", 284 | " mask = torch.ones((batch_len, seq_len)) # 目前我的数据集整齐没有mask\n", 285 | " multi_interest_capsule = self.capsule_net(item_history_pool, mask) # [batch_len, num_interests, hidden_dim]\n", 286 | " user_multi_interest_cat = torch.cat([user_profile_embeddings.unsqueeze(1).repeat((1,self.num_interests,1)), multi_interest_capsule], dim=-1)\n", 287 | " user_history_multi_interest_embed = self.final_dnn_network(user_multi_interest_cat) # [batch_len, num_interests, hidden_dim]\n", 288 | " # future prediction\n", 289 | " item_future_pool = self.pooling_layer(item_future_list_profile_embeddings)# [batch, seq_len_, hidden_dim]\n", 290 | " item_future_multi_interest_scores = torch.sigmoid(torch.bmm(item_future_pool, user_history_multi_interest_embed.permute((0,2,1)))) # [batch_len, seq_len_, num_interests]\n", 291 | " # find the best capsule\n", 292 | " best_interest_index = torch.argmax(item_future_multi_interest_scores, dim=-1) # [batch_len, seq_len_]\n", 293 | " # print(best_interest_index)\n", 294 | " best_item_future_multi_interest_score = item_future_multi_interest_scores.take(best_interest_index).squeeze()\n", 295 | " return best_item_future_multi_interest_score\n", 296 | " def parameters(self, recurse: bool = True):\n", 297 | " return [para for para in self.user_profile_embed.parameters()] + [para for para in self.item_profile_embed.parameters()] + [para for para in self.pooling_layer.parameters()] + [para for para in self.capsule_net.parameters()] + [para for para in self.final_dnn_network.parameters()] \n", 298 | "model = MIND(hidden_dim=dim, seq_len=train_seq_len, num_interests=4, routing_times=3, user_profile_feature = [(i,len(list_)) for i, list_ in user_feature_vals.items()], item_profile_feature= [(i,len(list_)) for i, list_ in item_feature_vals.items()], profile_feature_embedding_dim=dim, dnn_layer_dims=[16])\n", 299 | "\n", 300 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0003)\n", 301 | "criterion = CrossEntropyLoss(reduction='sum').to(device)\n", 302 | "label = torch.FloatTensor([1 for i in range(pos_num)] + [0 for i in range(neg_sample_num)]).to(device)\n", 303 | "\n", 304 | "def DCG(batch_labels):\n", 305 | " dcgsum = np.zeros((batch_labels.shape[0]))\n", 306 | " for i in range(batch_labels.shape[-1]):\n", 307 | " dcg = (2 ** batch_labels[:,i] - 1) / np.math.log(i + 2, 2)\n", 308 | " dcgsum += dcg\n", 309 | " return dcgsum\n", 310 | "def NDCG(output, labels):\n", 311 | " # ideal_dcg\n", 312 | " ideal_dcg = DCG(labels)\n", 313 | " # this\n", 314 | " dcg = DCG((np.argsort( - output, axis=-1) torch.Tensor:\n", 440 | " H = torch.einsum('bse, ed -> bsd', item_seq_embeds, self.W1).tanh() # [batch_len, seq_len, hidden_dim * 4]\n", 441 | " attention = torch.softmax(torch.einsum('bsd, dk -> bsk', H, self.W2) + -1.e9 * (1 - mask.unsqueeze(-1).float()), dim=1) # [batch_len, seq_len, num_interests]\n", 442 | " multi_interest_emb = torch.matmul(attention.permute(0, 2, 1), item_seq_embeds) # [batch_len, num_interests, hidden_dim]\n", 443 | " return multi_interest_emb\n", 444 | "class ComirecSA(nn.Module):\n", 445 | " def __init__(self, hidden_dim, seq_len, num_interests, user_profile_feature: [tuple], item_profile_feature: [tuple], profile_feature_embedding_dim: int, dnn_layer_dims: list[int]):\n", 446 | " super(ComirecSA, self).__init__()\n", 447 | " self.hidden_dim, self.seq_len, self.num_interests = hidden_dim, seq_len, num_interests\n", 448 | " # 内容特征\n", 449 | " self.user_profile_feature, self.item_profile_feature, self.profile_feature_embedding_dim = user_profile_feature, item_profile_feature, profile_feature_embedding_dim\n", 450 | " self.user_profile_embed = nn.ModuleDict({'user_embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=profile_feature_embedding_dim) for i, valcount in user_profile_feature})\n", 451 | " self.item_profile_embed = nn.ModuleDict({'item_embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=profile_feature_embedding_dim) for i, valcount in item_profile_feature})\n", 452 | " self.user_profile_all_embed_dim = profile_feature_embedding_dim * len(user_profile_feature)\n", 453 | " self.item_profile_all_embed_dim = profile_feature_embedding_dim * len(item_profile_feature)\n", 454 | " # user/item pooling layer\n", 455 | " self.user_pooling_layer = nn.Sequential(nn.Linear(self.user_profile_all_embed_dim, hidden_dim), nn.ReLU())\n", 456 | " self.item_pooling_layer = nn.Sequential(nn.Linear(self.item_profile_all_embed_dim, hidden_dim), nn.ReLU())\n", 457 | " # MultiInterestSelfAttention\n", 458 | " self.multi_interest_sa = MultiInterestSelfAttention(hidden_dim=hidden_dim, num_interests=num_interests)\n", 459 | " def forward(self, user_profiles, item_history_list_profile, item_future_list_profile):\n", 460 | " batch_len = user_profiles.shape[0]\n", 461 | " # user profile: [batch, feature * embed_dim]\n", 462 | " user_profile_embeddings = torch.cat([self.user_profile_embed['user_embed_' + str(i)](user_profiles[:,i].long()) for i in range(user_profiles.shape[-1])], axis=-1)\n", 463 | " user_profile_embeddings = self.user_pooling_layer(user_profile_embeddings.reshape((batch_len, len(self.user_profile_feature) * self.profile_feature_embedding_dim))) # [batch, embed_dim]\n", 464 | " seq_len = item_history_list_profile.shape[1]\n", 465 | " item_history_list_profile_embeddings = torch.cat([self.item_profile_embed['item_embed_' + str(i)](item_history_list_profile[:,:,i].long()) for i in range(item_history_list_profile.shape[-1])], axis=-1)\n", 466 | " item_history_list_profile_embeddings = self.item_pooling_layer(item_history_list_profile_embeddings.reshape((batch_len, seq_len, len(self.item_profile_feature) * self.profile_feature_embedding_dim))) # [batch, seq_len, embed_dim]\n", 467 | " # 以上处理user profile和行为历史,下面进行与candidate组合预测, item_future_list 和 item_future_list_profile\n", 468 | " seq_len_ = item_future_list_profile.shape[1]\n", 469 | " item_future_list_profile_embeddings = torch.cat([self.item_profile_embed['item_embed_' + str(i)](item_future_list_profile[:,:,i].long()) for i in range(item_future_list_profile.shape[-1])], axis=-1)\n", 470 | " item_future_list_profile_embeddings = self.item_pooling_layer(item_future_list_profile_embeddings.reshape((batch_len, seq_len_, len(self.item_profile_feature) * self.profile_feature_embedding_dim))) # [batch, seq_len_, embed_dim]\n", 471 | " mask = torch.ones((batch_len, seq_len)) # 目前我的数据集整齐没有mask\n", 472 | " \n", 473 | " # [batch_len, seq_len, embed_dim] + [batch, seq_len, embed_dim] -> [batch_len, num_interests, hidden_dim]\n", 474 | " multi_interest_embeds = self.multi_interest_sa(user_profile_embeddings.unsqueeze(1) * item_history_list_profile_embeddings, mask)\n", 475 | " # [batch, seq_len_, hidden_dim], [batch_len, num_interests, hidden_dim] -> [batch, seq_len_, num_interests]\n", 476 | " future_interest_scores = torch.sigmoid(torch.bmm(item_future_list_profile_embeddings, multi_interest_embeds.permute((0,2,1))))\n", 477 | " best_future_index = torch.argmax(future_interest_scores, dim=-1)\n", 478 | " best_item_future_multi_interest_score = future_interest_scores.take(best_future_index).squeeze()\n", 479 | " return best_item_future_multi_interest_score\n", 480 | "model = ComirecSA(hidden_dim=dim, seq_len=train_seq_len, num_interests=4, user_profile_feature = [(i,len(list_)) for i, list_ in user_feature_vals.items()], item_profile_feature= [(i,len(list_)) for i, list_ in item_feature_vals.items()], profile_feature_embedding_dim=dim, dnn_layer_dims=[16])\n", 481 | "\n", 482 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0003)\n", 483 | "criterion = CrossEntropyLoss(reduction='sum').to(device)\n", 484 | "label = torch.FloatTensor([1 for i in range(pos_num)] + [0 for i in range(neg_sample_num)]).to(device)\n", 485 | "\n", 486 | "def DCG(batch_labels):\n", 487 | " dcgsum = np.zeros((batch_labels.shape[0]))\n", 488 | " for i in range(batch_labels.shape[-1]):\n", 489 | " dcg = (2 ** batch_labels[:,i] - 1) / np.math.log(i + 2, 2)\n", 490 | " dcgsum += dcg\n", 491 | " return dcgsum\n", 492 | "def NDCG(output, labels):\n", 493 | " # ideal_dcg\n", 494 | " ideal_dcg = DCG(labels)\n", 495 | " # this\n", 496 | " dcg = DCG((np.argsort( - output, axis=-1) [batch, k, embed_dim]\n", 667 | " C_u = torch.sigmoid(topk_s).unsqueeze(-1) * self.interests_embedding(topk_index)\n", 668 | " # 2.意图分配\n", 669 | " # [batch, seq_len, embed_dim] [batch, k, embed_dim] -> [batch, seq_len, k]\n", 670 | " p_k_t = torch.softmax(torch.matmul(self.pos2interest_attention_net(item_history_list_profile_embeddings) * mask.unsqueeze(-1), C_u.permute((0,2,1))), dim=-1)\n", 671 | " # 权重分配\n", 672 | " # [batch, seq_len, 1] * num_interests -> [batch, seq_len, num_interests] \n", 673 | " p_t_k_all = torch.cat([self.interest_pos_importance_attention_nets[i](item_history_list_profile_embeddings) * mask.unsqueeze(-1) for i in range(self.num_interests)], dim=-1)\n", 674 | " p_t_k = p_t_k_all.take(topk_index.unsqueeze(1).repeat((1,seq_len,1))) # [batch, seq_len, k]\n", 675 | " phi = self.interest_nor(torch.matmul((p_k_t * p_t_k).permute((0,2,1)), item_history_list_profile_embeddings)) # [batch, k, embed_dim]\n", 676 | " # interest aggregate: [batch, seq_len, k] [batch, k, embed_dim] -> [batch, seq_len, embed_dim]\n", 677 | " x = torch.bmm(p_k_t, C_u)\n", 678 | " # [batch, seq_len, 1] [batch, seq_len, embed_dim] -> [batch, embed_dim]\n", 679 | " C_u_apt = self.interest_agg_nor(torch.bmm(self.interest_agg_net(x).permute((0,2,1)), x).squeeze()) # \n", 680 | " # [batch, 1, embed_dim] [batch, embed_dim, k]\n", 681 | " # squeeze(1) 不能用squeeze()\n", 682 | " # 总结经验:以后写代码用squeeze()一定指定维度\n", 683 | " e_u_k = torch.softmax(torch.bmm(C_u_apt.unsqueeze(1), phi.permute((0,2,1))).squeeze(1), -1) # [batch, k]\n", 684 | " # torch.Size([100, 40, 50]) torch.Size([100, 50]) torch.Size([100]) torch.Size([100, 1, 50])\n", 685 | " # 终于算出来用户的嵌入了\n", 686 | " v = torch.bmm(e_u_k.unsqueeze(1), phi).squeeze() # [batch, embed_dim]\n", 687 | " # 这里直接算点积\n", 688 | " scores = torch.sigmoid(torch.bmm(item_future_list_profile_embeddings, v.unsqueeze(-1)).squeeze())\n", 689 | " return scores\n", 690 | "model = SINE(hidden_dim=dim, seq_len=train_seq_len, num_interests=4, top_k_interests=1, user_profile_feature = [(i,len(list_)) for i, list_ in user_feature_vals.items()], item_profile_feature= [(i,len(list_)) for i, list_ in item_feature_vals.items()], profile_feature_embedding_dim=dim)\n", 691 | "\n", 692 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0003)\n", 693 | "criterion = CrossEntropyLoss(reduction='sum').to(device)\n", 694 | "label = torch.FloatTensor([1 for i in range(pos_num)] + [0 for i in range(neg_sample_num)]).to(device)\n", 695 | "\n", 696 | "def DCG(batch_labels):\n", 697 | " dcgsum = np.zeros((batch_labels.shape[0]))\n", 698 | " for i in range(batch_labels.shape[-1]):\n", 699 | " dcg = (2 ** batch_labels[:,i] - 1) / np.math.log(i + 2, 2)\n", 700 | " dcgsum += dcg\n", 701 | " return dcgsum\n", 702 | "def NDCG(output, labels):\n", 703 | " # ideal_dcg\n", 704 | " ideal_dcg = DCG(labels)\n", 705 | " # this\n", 706 | " dcg = DCG((np.argsort( - output, axis=-1)1.8).float(), torch.from_numpy(y_train>3.8).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 132 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test>1.8).float(), torch.from_numpy(y_test>3.8).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 133 | "\n", 134 | "class ESSM(Module):\n", 135 | " def __init__(self, dense_feature_cols:[(int,int)], sparse_feature_cols:[(int,int)], sparse_feature_embedding_dim, \n", 136 | " num_task:int, dnn_layer_dims:list[int], dnn_dropout=0.):\n", 137 | " super(ESSM, self).__init__()\n", 138 | " self.dense_feature_cols, self.sparse_feature_cols, self.sparse_feature_embedding_dim = dense_feature_cols, sparse_feature_cols, sparse_feature_embedding_dim\n", 139 | " self.num_task, self.dnn_layer_dims = num_task, dnn_layer_dims\n", 140 | " # sparse feature embedding dict\n", 141 | " self.embed_layers = nn.ModuleDict({'embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=sparse_feature_embedding_dim) for i, valcount in sparse_feature_cols})\n", 142 | " self.input_dim = len(dense_feature_cols) + len(sparse_feature_cols) * sparse_feature_embedding_dim\n", 143 | " # dnn for each task\n", 144 | " self.dnn_nets = []\n", 145 | " for i in range(num_task):\n", 146 | " net = nn.Sequential()\n", 147 | " pre_layer_dim = self.input_dim\n", 148 | " for layer_dim in dnn_layer_dims:\n", 149 | " net.append(nn.Linear(pre_layer_dim, layer_dim))\n", 150 | " net.append(nn.BatchNorm1d(layer_dim))\n", 151 | " net.append(nn.Dropout(dnn_dropout))\n", 152 | " pre_layer_dim = layer_dim\n", 153 | " net.append(nn.Linear(dnn_layer_dims[-1], 1))\n", 154 | " net.append(nn.Sigmoid())\n", 155 | " self.dnn_nets.append(net)\n", 156 | " def forward(self, x):\n", 157 | " dense_input = x[:, :len(self.dense_feature_cols)]\n", 158 | " sparse_embeds = torch.cat([self.embed_layers['embed_' + str(i)](x[:, i].long()) for i in range(len(self.dense_feature_cols), x.shape[1])], axis=1)\n", 159 | " x = torch.cat([sparse_embeds, dense_input], axis=-1)\n", 160 | " outputs = [self.dnn_nets[i](x).squeeze() for i in range(self.num_task)]\n", 161 | " # ESSM原文计算概率,然后相乘,我这里任务是评分预测,改成相加\n", 162 | " output_sum = outputs[0]\n", 163 | " for i in range(1,self.num_task):\n", 164 | " outputs[i] = output_sum + outputs[i] # 这里注意不能用 +=,会inplace报错\n", 165 | " output_sum = outputs[i]\n", 166 | " return outputs\n", 167 | " def parameters(self, recurse: bool = True):\n", 168 | " paras = []\n", 169 | " for layer in self.embed_layers.values():\n", 170 | " paras += [p for p in layer.parameters()]\n", 171 | " for net in self.dnn_nets:\n", 172 | " paras += [p for p in net.parameters()]\n", 173 | " return paras\n", 174 | "model = ESSM(dense_feature_cols=[i for i in range(num_number_features)], sparse_feature_cols=[(i,len(category_feature_vals[i])) for i in range(num_number_features, num_features)], sparse_feature_embedding_dim =embedding_dim, num_task=2, dnn_layer_dims=[128, 32], dnn_dropout=0.).to(device)\n", 175 | "\n", 176 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n", 177 | "criterion = MSELoss(reduction='sum').to(device)\n", 178 | "alpha = 0.5\n", 179 | "\n", 180 | "for epoch in range(num_epochs):\n", 181 | " # train:\n", 182 | " epoch_train_losses = []\n", 183 | " model.train()\n", 184 | " for i, inputs in enumerate(train_loader):\n", 185 | " optimizer.zero_grad()\n", 186 | " input = inputs[0].to(device)\n", 187 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 188 | " output1, output2 = model(input)\n", 189 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 190 | " loss = alpha * loss_1 + loss_2\n", 191 | " loss.backward()\n", 192 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 193 | " optimizer.step()\n", 194 | " epoch_train_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 195 | " # validate:\n", 196 | " model.eval()\n", 197 | " epoch_test_losses = []\n", 198 | " for i, inputs in enumerate(test_loader):\n", 199 | " input = inputs[0].to(device)\n", 200 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 201 | " output1, output2 = model(input)\n", 202 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 203 | " loss = alpha * loss_1 + loss_2\n", 204 | " epoch_test_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 205 | " num_train, num_test = sum([x[0] for x in epoch_train_losses]), sum([x[0] for x in epoch_test_losses])\n", 206 | " train_overall_loss = sum([x[-1] for x in epoch_train_losses])/num_train\n", 207 | " train_task1_loss = sum([x[1] for x in epoch_train_losses])/num_train\n", 208 | " train_task2_loss = sum([x[2] for x in epoch_train_losses])/num_train\n", 209 | " test_overall_loss = sum([x[-1] for x in epoch_test_losses])/num_test\n", 210 | " test_task1_loss = sum([x[1] for x in epoch_test_losses])/num_test\n", 211 | " test_task2_loss = sum([x[2] for x in epoch_test_losses])/num_test\n", 212 | " # print\n", 213 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_overall_loss: {:.6f}, train_mse_task1_loss: {:.6f}, train_mse_task2_loss: {:.6f}, validate_mse_overall_loss: {:.6f}, validate_mse_task1_loss: {:.6f}, validate_mse_task2_loss: {:.6f}'.format(epoch+1, num_epochs, train_overall_loss, train_task1_loss, train_task2_loss, test_overall_loss, test_task1_loss, test_task2_loss))\n", 214 | " gc.collect()" 215 | ], 216 | "metadata": { 217 | "collapsed": false, 218 | "ExecuteTime": { 219 | "end_time": "2023-09-05T08:39:53.829553100Z", 220 | "start_time": "2023-09-05T08:37:38.003601800Z" 221 | } 222 | }, 223 | "id": "4a49ef56e4d88080" 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 12, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "[2023-09-05 16:40:36] epoch=[1/10], train_mse_overall_loss: 0.012091, train_mse_task1_loss: 0.007442, train_mse_task2_loss: 0.008370, validate_mse_overall_loss: 0.000036, validate_mse_task1_loss: 0.000023, validate_mse_task2_loss: 0.000025\n", 234 | "[2023-09-05 16:40:45] epoch=[2/10], train_mse_overall_loss: 0.000020, train_mse_task1_loss: 0.000013, train_mse_task2_loss: 0.000014, validate_mse_overall_loss: 0.000011, validate_mse_task1_loss: 0.000007, validate_mse_task2_loss: 0.000007\n", 235 | "[2023-09-05 16:40:54] epoch=[3/10], train_mse_overall_loss: 0.000007, train_mse_task1_loss: 0.000005, train_mse_task2_loss: 0.000005, validate_mse_overall_loss: 0.000005, validate_mse_task1_loss: 0.000003, validate_mse_task2_loss: 0.000003\n", 236 | "[2023-09-05 16:41:04] epoch=[4/10], train_mse_overall_loss: 0.000004, train_mse_task1_loss: 0.000002, train_mse_task2_loss: 0.000003, validate_mse_overall_loss: 0.000003, validate_mse_task1_loss: 0.000002, validate_mse_task2_loss: 0.000002\n", 237 | "[2023-09-05 16:41:12] epoch=[5/10], train_mse_overall_loss: 0.000002, train_mse_task1_loss: 0.000001, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000002, validate_mse_task1_loss: 0.000001, validate_mse_task2_loss: 0.000001\n", 238 | "[2023-09-05 16:41:21] epoch=[6/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000001, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000001, validate_mse_task1_loss: 0.000001, validate_mse_task2_loss: 0.000001\n", 239 | "[2023-09-05 16:41:30] epoch=[7/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000001, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000001, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000001\n", 240 | "[2023-09-05 16:41:39] epoch=[8/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000001, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 241 | "[2023-09-05 16:41:48] epoch=[9/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 242 | "[2023-09-05 16:41:57] epoch=[10/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "# MMOE:\n", 248 | "# Modeling Task\n", 249 | "# 一个输入,多个输出。中间共享多个expert,并行每个任务在其上的gate,进入各自任务的tower网络,预测。模块概率共享。\n", 250 | "# soft sharing\n", 251 | "\n", 252 | "import os, gc\n", 253 | "import numpy as np\n", 254 | "import torch\n", 255 | "import torch.nn as nn\n", 256 | "from torch.nn import Module, Parameter, MSELoss\n", 257 | "from torch.utils.data import Dataset, DataLoader, TensorDataset \n", 258 | "from sklearn.model_selection import train_test_split\n", 259 | "from sklearn.preprocessing import MinMaxScaler\n", 260 | "from datetime import datetime\n", 261 | "from tqdm import tqdm\n", 262 | "import warnings\n", 263 | "warnings.filterwarnings('ignore')\n", 264 | "from torch.nn import Module, Sequential, ReLU, Dropout, Sigmoid\n", 265 | "\n", 266 | "# category: [1,2] + [all]\n", 267 | "# number: [0] + []\n", 268 | "number_feature_data = MinMaxScaler().fit_transform(np.array([[user_info[u][0]] for u, i, r in ratings], dtype=np.float32))\n", 269 | "category_feature_data = np.array([user_info[u][1:] + item_info[i] for u, i, r in ratings], dtype=np.int32)\n", 270 | "data = np.concatenate([number_feature_data, category_feature_data, ratings[:,-1:]], axis=-1)\n", 271 | "num_number_features = number_feature_data.shape[-1]\n", 272 | "num_category_features = category_feature_data.shape[-1]\n", 273 | "num_features = data.shape[-1] - 1\n", 274 | "category_feature_vals = {}\n", 275 | "for i in range(num_number_features, num_features):\n", 276 | " category_feature_vals[i] = sorted(list(set(list(data[:, i]))))\n", 277 | " for rid in range(data.shape[0]):\n", 278 | " data[rid, i] = category_feature_vals[i].index(data[rid, i])\n", 279 | "# print(len(user_info[list(user_info.keys())[0]]), len(item_info[list(item_info.keys())[0]]))\n", 280 | "# print(data.shape)\n", 281 | "\n", 282 | "num_users = len(user_info)\n", 283 | "num_items = len(item_info)\n", 284 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 285 | "batch_size = 100\n", 286 | "num_epochs = 10\n", 287 | "embedding_dim = 8 # sparse feature embedding dim\n", 288 | "X_train, X_test, y_train, y_test = train_test_split(data[:,:-1], data[:,-1], test_size=0.4, random_state=0)\n", 289 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train>1.8).float(), torch.from_numpy(y_train>3.8).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 290 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test>1.8).float(), torch.from_numpy(y_test>3.8).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 291 | "\n", 292 | "class MMOE(Module):\n", 293 | " def __init__(self, dense_feature_cols:[(int,int)], sparse_feature_cols:[(int,int)], sparse_feature_embedding_dim, \n", 294 | " hidden_dim:int, num_task:int, n_expert:int, dnn_layer_dims:list[int], dnn_dropout=0.):\n", 295 | " super(MMOE, self).__init__()\n", 296 | " self.dense_feature_cols, self.sparse_feature_cols, self.sparse_feature_embedding_dim = dense_feature_cols, sparse_feature_cols, sparse_feature_embedding_dim\n", 297 | " self.num_task, self.n_expert, self.dnn_layer_dims, self.hidden_dim = num_task, n_expert, dnn_layer_dims, hidden_dim\n", 298 | " # sparse feature embedding dict\n", 299 | " self.embed_layers = nn.ModuleDict({'embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=sparse_feature_embedding_dim) for i, valcount in sparse_feature_cols})\n", 300 | " self.input_dim = len(dense_feature_cols) + len(sparse_feature_cols) * sparse_feature_embedding_dim\n", 301 | " # experts: 线性层\n", 302 | " self.experts = torch.nn.Parameter(torch.randn(self.input_dim, hidden_dim, n_expert), requires_grad=True)\n", 303 | " self.experts_bias = torch.nn.Parameter(torch.randn(hidden_dim, n_expert), requires_grad=True)\n", 304 | " # gates: 线性层\n", 305 | " self.gates = [torch.nn.Parameter(torch.randn(self.input_dim, n_expert), requires_grad=True) for i in range(num_task)]\n", 306 | " self.gates_bias = [torch.nn.Parameter(torch.randn(n_expert), requires_grad=True) for i in range(num_task)]\n", 307 | " # dnn for each task\n", 308 | " self.dnn_nets = []\n", 309 | " for i in range(num_task):\n", 310 | " net = nn.Sequential()\n", 311 | " pre_layer_dim = hidden_dim\n", 312 | " for layer_dim in dnn_layer_dims:\n", 313 | " net.append(nn.Linear(pre_layer_dim, layer_dim))\n", 314 | " net.append(nn.BatchNorm1d(layer_dim))\n", 315 | " net.append(nn.Dropout(dnn_dropout))\n", 316 | " pre_layer_dim = layer_dim\n", 317 | " net.append(nn.Linear(dnn_layer_dims[-1], 1))\n", 318 | " net.append(nn.Sigmoid())\n", 319 | " self.dnn_nets.append(net)\n", 320 | " def forward(self, x):\n", 321 | " dense_input = x[:, :len(self.dense_feature_cols)]\n", 322 | " sparse_embeds = torch.cat([self.embed_layers['embed_' + str(i)](x[:, i].long()) for i in range(len(self.dense_feature_cols), x.shape[1])], axis=1)\n", 323 | " x = torch.cat([sparse_embeds, dense_input], axis=-1)\n", 324 | " experts_out = torch.einsum('ij, jkl -> ikl', x, self.experts) + self.experts_bias.unsqueeze(0) # [batch, hidden_dim, n_expert]\n", 325 | " gates_out = [torch.softmax(torch.einsum('ab, bc -> ac', x, gate) + self.gates_bias[i].unsqueeze(0), dim=-1) for i, gate in enumerate(self.gates)] # [batch, n_expert]\n", 326 | " towers_input = [torch.bmm(experts_out, gate_out.unsqueeze(-1)).squeeze() for gate_out in gates_out]\n", 327 | " outputs = [self.dnn_nets[i](tower_input).squeeze() for i,tower_input in enumerate(towers_input)]\n", 328 | " return outputs\n", 329 | " def parameters(self, recurse: bool = True):\n", 330 | " paras = [self.experts, self.experts_bias] + self.gates + self.gates_bias\n", 331 | " for layer in self.embed_layers.values():\n", 332 | " paras += [p for p in layer.parameters()]\n", 333 | " for net in self.dnn_nets:\n", 334 | " paras += [p for p in net.parameters()]\n", 335 | " return paras\n", 336 | "model = MMOE(dense_feature_cols=[i for i in range(num_number_features)], sparse_feature_cols=[(i,len(category_feature_vals[i])) for i in range(num_number_features, num_features)], sparse_feature_embedding_dim =embedding_dim, hidden_dim=embedding_dim, num_task=2, n_expert=3, dnn_layer_dims=[128, 32], dnn_dropout=0.).to(device)\n", 337 | "\n", 338 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n", 339 | "criterion = MSELoss(reduction='sum').to(device)\n", 340 | "alpha = 0.5\n", 341 | "\n", 342 | "for epoch in range(num_epochs):\n", 343 | " # train:\n", 344 | " epoch_train_losses = []\n", 345 | " model.train()\n", 346 | " for i, inputs in enumerate(train_loader):\n", 347 | " optimizer.zero_grad()\n", 348 | " input = inputs[0].to(device)\n", 349 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 350 | " output1, output2 = model(input)\n", 351 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 352 | " loss = alpha * loss_1 + loss_2\n", 353 | " loss.backward()\n", 354 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 355 | " optimizer.step()\n", 356 | " epoch_train_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 357 | " # validate:\n", 358 | " model.eval()\n", 359 | " epoch_test_losses = []\n", 360 | " for i, inputs in enumerate(test_loader):\n", 361 | " input = inputs[0].to(device)\n", 362 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 363 | " output1, output2 = model(input)\n", 364 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 365 | " loss = alpha * loss_1 + loss_2\n", 366 | " epoch_test_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 367 | " num_train, num_test = sum([x[0] for x in epoch_train_losses]), sum([x[0] for x in epoch_test_losses])\n", 368 | " train_overall_loss = sum([x[-1] for x in epoch_train_losses])/num_train\n", 369 | " train_task1_loss = sum([x[1] for x in epoch_train_losses])/num_train\n", 370 | " train_task2_loss = sum([x[2] for x in epoch_train_losses])/num_train\n", 371 | " test_overall_loss = sum([x[-1] for x in epoch_test_losses])/num_test\n", 372 | " test_task1_loss = sum([x[1] for x in epoch_test_losses])/num_test\n", 373 | " test_task2_loss = sum([x[2] for x in epoch_test_losses])/num_test\n", 374 | " # print\n", 375 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_overall_loss: {:.6f}, train_mse_task1_loss: {:.6f}, train_mse_task2_loss: {:.6f}, validate_mse_overall_loss: {:.6f}, validate_mse_task1_loss: {:.6f}, validate_mse_task2_loss: {:.6f}'.format(epoch+1, num_epochs, train_overall_loss, train_task1_loss, train_task2_loss, test_overall_loss, test_task1_loss, test_task2_loss))\n", 376 | " gc.collect()\n", 377 | "\n", 378 | "# 这种方法共享很稀疏,收敛很快" 379 | ], 380 | "metadata": { 381 | "collapsed": false, 382 | "ExecuteTime": { 383 | "end_time": "2023-09-05T08:41:57.474389Z", 384 | "start_time": "2023-09-05T08:40:24.333973900Z" 385 | } 386 | }, 387 | "id": "56b432fec9a7451c" 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 28, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "[2023-09-05 17:28:46] epoch=[1/10], train_mse_overall_loss: 0.011408, train_mse_task1_loss: 0.007582, train_mse_task2_loss: 0.007617, validate_mse_overall_loss: 0.000035, validate_mse_task1_loss: 0.000027, validate_mse_task2_loss: 0.000022\n", 398 | "[2023-09-05 17:28:57] epoch=[2/10], train_mse_overall_loss: 0.000019, train_mse_task1_loss: 0.000014, train_mse_task2_loss: 0.000012, validate_mse_overall_loss: 0.000010, validate_mse_task1_loss: 0.000008, validate_mse_task2_loss: 0.000006\n", 399 | "[2023-09-05 17:29:11] epoch=[3/10], train_mse_overall_loss: 0.000007, train_mse_task1_loss: 0.000005, train_mse_task2_loss: 0.000004, validate_mse_overall_loss: 0.000005, validate_mse_task1_loss: 0.000004, validate_mse_task2_loss: 0.000003\n", 400 | "[2023-09-05 17:29:25] epoch=[4/10], train_mse_overall_loss: 0.000004, train_mse_task1_loss: 0.000003, train_mse_task2_loss: 0.000002, validate_mse_overall_loss: 0.000003, validate_mse_task1_loss: 0.000002, validate_mse_task2_loss: 0.000002\n", 401 | "[2023-09-05 17:29:36] epoch=[5/10], train_mse_overall_loss: 0.000002, train_mse_task1_loss: 0.000002, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000002, validate_mse_task1_loss: 0.000001, validate_mse_task2_loss: 0.000001\n", 402 | "[2023-09-05 17:29:47] epoch=[6/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000001, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000001, validate_mse_task1_loss: 0.000001, validate_mse_task2_loss: 0.000001\n", 403 | "[2023-09-05 17:29:58] epoch=[7/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000001, train_mse_task2_loss: 0.000001, validate_mse_overall_loss: 0.000001, validate_mse_task1_loss: 0.000001, validate_mse_task2_loss: 0.000000\n", 404 | "[2023-09-05 17:30:10] epoch=[8/10], train_mse_overall_loss: 0.000001, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 405 | "[2023-09-05 17:30:20] epoch=[9/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 406 | "[2023-09-05 17:30:31] epoch=[10/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "# CGC:\n", 412 | "# 一个输入,多个输出。各自任务有自己的expert外,中间共享多个expert,并行每个任务在其上的gate,进入各自任务的tower网络,预测。模块概率共享。\n", 413 | "# 相较于MMOE,增加了任务私有的expert\n", 414 | "# soft sharing\n", 415 | "\n", 416 | "# PLE:是CGC叠加多个,然后进入各自任务的tower网络。(复现很简单,这里没有去实现)\n", 417 | "# Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations, 2020\n", 418 | "\n", 419 | "import os, gc\n", 420 | "import numpy as np\n", 421 | "import torch\n", 422 | "import torch.nn as nn\n", 423 | "from torch.nn import Module, Parameter, MSELoss\n", 424 | "from torch.utils.data import Dataset, DataLoader, TensorDataset \n", 425 | "from sklearn.model_selection import train_test_split\n", 426 | "from sklearn.preprocessing import MinMaxScaler\n", 427 | "from datetime import datetime\n", 428 | "from tqdm import tqdm\n", 429 | "import warnings\n", 430 | "warnings.filterwarnings('ignore')\n", 431 | "from torch.nn import Module, Sequential, ReLU, Dropout, Sigmoid\n", 432 | "\n", 433 | "# category: [1,2] + [all]\n", 434 | "# number: [0] + []\n", 435 | "number_feature_data = MinMaxScaler().fit_transform(np.array([[user_info[u][0]] for u, i, r in ratings], dtype=np.float32))\n", 436 | "category_feature_data = np.array([user_info[u][1:] + item_info[i] for u, i, r in ratings], dtype=np.int32)\n", 437 | "data = np.concatenate([number_feature_data, category_feature_data, ratings[:,-1:]], axis=-1)\n", 438 | "num_number_features = number_feature_data.shape[-1]\n", 439 | "num_category_features = category_feature_data.shape[-1]\n", 440 | "num_features = data.shape[-1] - 1\n", 441 | "category_feature_vals = {}\n", 442 | "for i in range(num_number_features, num_features):\n", 443 | " category_feature_vals[i] = sorted(list(set(list(data[:, i]))))\n", 444 | " for rid in range(data.shape[0]):\n", 445 | " data[rid, i] = category_feature_vals[i].index(data[rid, i])\n", 446 | "# print(len(user_info[list(user_info.keys())[0]]), len(item_info[list(item_info.keys())[0]]))\n", 447 | "# print(data.shape)\n", 448 | "\n", 449 | "num_users = len(user_info)\n", 450 | "num_items = len(item_info)\n", 451 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 452 | "batch_size = 100\n", 453 | "num_epochs = 10\n", 454 | "embedding_dim = 8 # sparse feature embedding dim\n", 455 | "X_train, X_test, y_train, y_test = train_test_split(data[:,:-1], data[:,-1], test_size=0.4, random_state=0)\n", 456 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train>1.8).float(), torch.from_numpy(y_train>3.8).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 457 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test>1.8).float(), torch.from_numpy(y_test>3.8).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 458 | "\n", 459 | "class Expert(Module):\n", 460 | " def __init__(self, input_dim, hidden_dim:int, n_expert:int):\n", 461 | " super(Expert, self).__init__()\n", 462 | " self.input_dim, self.hidden_dim, self.n_expert = input_dim, hidden_dim, n_expert\n", 463 | " # experts: 线性层\n", 464 | " self.experts = torch.nn.Parameter(torch.randn(self.input_dim, hidden_dim, n_expert), requires_grad=True)\n", 465 | " self.experts_bias = torch.nn.Parameter(torch.randn(hidden_dim, n_expert), requires_grad=True)\n", 466 | " def forward(self, x):\n", 467 | " expert_out = torch.einsum('ij, jkl -> ikl', x, self.experts) + self.experts_bias.unsqueeze(0) # [batch, hidden_dim, n_expert]\n", 468 | " return expert_out.permute((0,2,1)) # [batch, n_expert, hidden_dim]\n", 469 | " def parameters(self, recurse: bool = True):\n", 470 | " return [self.experts, self.experts_bias]\n", 471 | "class CGC(Module):\n", 472 | " def __init__(self, dense_feature_cols:[(int,int)], sparse_feature_cols:[(int,int)], sparse_feature_embedding_dim, \n", 473 | " hidden_dim:int, num_task:int, shared_n_expert:int, self_n_expert:int, dnn_layer_dims:list[int], dnn_dropout=0.):\n", 474 | " super(CGC, self).__init__()\n", 475 | " self.dense_feature_cols, self.sparse_feature_cols, self.sparse_feature_embedding_dim = dense_feature_cols, sparse_feature_cols, sparse_feature_embedding_dim\n", 476 | " self.num_task, self.shared_n_expert, self.self_n_expert, self.dnn_layer_dims, self.hidden_dim = num_task, shared_n_expert, self_n_expert, dnn_layer_dims, hidden_dim\n", 477 | " # sparse feature embedding dict\n", 478 | " self.embed_layers = nn.ModuleDict({'embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=sparse_feature_embedding_dim) for i, valcount in sparse_feature_cols})\n", 479 | " self.input_dim = len(dense_feature_cols) + len(sparse_feature_cols) * sparse_feature_embedding_dim\n", 480 | " # experts\n", 481 | " self.shared_expert = Expert(self.input_dim, hidden_dim, shared_n_expert)\n", 482 | " self.self_experts = [Expert(self.input_dim, hidden_dim, self_n_expert) for i in range(num_task)]\n", 483 | " # gates:\n", 484 | " self.gates = [nn.Sequential(nn.Linear((shared_n_expert + self_n_expert) * hidden_dim, shared_n_expert + self_n_expert), nn.Softmax()) for i in range(num_task)]\n", 485 | " # dnn for each task\n", 486 | " self.dnn_nets = []\n", 487 | " for i in range(num_task):\n", 488 | " net = nn.Sequential()\n", 489 | " pre_layer_dim = hidden_dim\n", 490 | " for layer_dim in dnn_layer_dims:\n", 491 | " net.append(nn.Linear(pre_layer_dim, layer_dim))\n", 492 | " net.append(nn.BatchNorm1d(layer_dim))\n", 493 | " net.append(nn.Dropout(dnn_dropout))\n", 494 | " pre_layer_dim = layer_dim\n", 495 | " net.append(nn.Linear(dnn_layer_dims[-1], 1))\n", 496 | " net.append(nn.Sigmoid())\n", 497 | " self.dnn_nets.append(net)\n", 498 | " def forward(self, x):\n", 499 | " batch_len = x.shape[0]\n", 500 | " dense_input = x[:, :len(self.dense_feature_cols)]\n", 501 | " sparse_embeds = torch.cat([self.embed_layers['embed_' + str(i)](x[:, i].long()) for i in range(len(self.dense_feature_cols), x.shape[1])], axis=1)\n", 502 | " x = torch.cat([sparse_embeds, dense_input], axis=-1)\n", 503 | " # torch.Size([100, 169]) torch.Size([100, 8, 3]) [torch.Size([100, 8, 2]), torch.Size([100, 8, 2])]\n", 504 | " shared_expert_out = self.shared_expert(x)\n", 505 | " experts_out = [torch.cat([shared_expert_out, self.self_experts[i](x)], dim=1) for i in range(self.num_task)]\n", 506 | " gates_out = [gate(experts_out[i].reshape((batch_len,-1))) for i, gate in enumerate(self.gates)]\n", 507 | " towers_input = [torch.bmm(experts_out[i].permute((0,2,1)), gate_out.unsqueeze(-1)).squeeze() for i, gate_out in enumerate(gates_out)]\n", 508 | " outputs = [self.dnn_nets[i](tower_input).squeeze() for i,tower_input in enumerate(towers_input)]\n", 509 | " return outputs\n", 510 | " def parameters(self, recurse: bool = True):\n", 511 | " paras = [p for p in self.shared_expert.parameters()]\n", 512 | " for expert in self.self_experts:\n", 513 | " paras += [p for p in expert.parameters()]\n", 514 | " for gate in self.gates:\n", 515 | " paras += [p for p in gate.parameters()]\n", 516 | " for layer in self.embed_layers.values():\n", 517 | " paras += [p for p in layer.parameters()]\n", 518 | " for net in self.dnn_nets:\n", 519 | " paras += [p for p in net.parameters()]\n", 520 | " return paras\n", 521 | "model = CGC(dense_feature_cols=[i for i in range(num_number_features)], sparse_feature_cols=[(i,len(category_feature_vals[i])) for i in range(num_number_features, num_features)], sparse_feature_embedding_dim =embedding_dim, hidden_dim=embedding_dim, num_task=2, shared_n_expert=3, self_n_expert=2, dnn_layer_dims=[128, 32], dnn_dropout=0.).to(device)\n", 522 | "\n", 523 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n", 524 | "criterion = MSELoss(reduction='sum').to(device)\n", 525 | "alpha = 0.5\n", 526 | "\n", 527 | "for epoch in range(num_epochs):\n", 528 | " # train:\n", 529 | " epoch_train_losses = []\n", 530 | " model.train()\n", 531 | " for i, inputs in enumerate(train_loader):\n", 532 | " optimizer.zero_grad()\n", 533 | " input = inputs[0].to(device)\n", 534 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 535 | " output1, output2 = model(input)\n", 536 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 537 | " loss = alpha * loss_1 + loss_2\n", 538 | " loss.backward()\n", 539 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 540 | " optimizer.step()\n", 541 | " epoch_train_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 542 | " # validate:\n", 543 | " model.eval()\n", 544 | " epoch_test_losses = []\n", 545 | " for i, inputs in enumerate(test_loader):\n", 546 | " input = inputs[0].to(device)\n", 547 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 548 | " output1, output2 = model(input)\n", 549 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 550 | " loss = alpha * loss_1 + loss_2\n", 551 | " epoch_test_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 552 | " num_train, num_test = sum([x[0] for x in epoch_train_losses]), sum([x[0] for x in epoch_test_losses])\n", 553 | " train_overall_loss = sum([x[-1] for x in epoch_train_losses])/num_train\n", 554 | " train_task1_loss = sum([x[1] for x in epoch_train_losses])/num_train\n", 555 | " train_task2_loss = sum([x[2] for x in epoch_train_losses])/num_train\n", 556 | " test_overall_loss = sum([x[-1] for x in epoch_test_losses])/num_test\n", 557 | " test_task1_loss = sum([x[1] for x in epoch_test_losses])/num_test\n", 558 | " test_task2_loss = sum([x[2] for x in epoch_test_losses])/num_test\n", 559 | " # print\n", 560 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_overall_loss: {:.6f}, train_mse_task1_loss: {:.6f}, train_mse_task2_loss: {:.6f}, validate_mse_overall_loss: {:.6f}, validate_mse_task1_loss: {:.6f}, validate_mse_task2_loss: {:.6f}'.format(epoch+1, num_epochs, train_overall_loss, train_task1_loss, train_task2_loss, test_overall_loss, test_task1_loss, test_task2_loss))\n", 561 | " gc.collect()\n" 562 | ], 563 | "metadata": { 564 | "collapsed": false, 565 | "ExecuteTime": { 566 | "end_time": "2023-09-05T09:30:31.199366800Z", 567 | "start_time": "2023-09-05T09:28:33.121316800Z" 568 | } 569 | }, 570 | "id": "9d5eb170c07aefe0" 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 14, 575 | "outputs": [ 576 | { 577 | "name": "stdout", 578 | "output_type": "stream", 579 | "text": [ 580 | "[2023-09-07 12:25:18] epoch=[1/10], train_mse_overall_loss: 0.004533, train_mse_task1_loss: 0.002647, train_mse_task2_loss: 0.003210, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 581 | "[2023-09-07 12:25:36] epoch=[2/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 582 | "[2023-09-07 12:25:53] epoch=[3/10], train_mse_overall_loss: 0.000004, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000004, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 583 | "[2023-09-07 12:26:11] epoch=[4/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 584 | "[2023-09-07 12:26:29] epoch=[5/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 585 | "[2023-09-07 12:26:46] epoch=[6/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 586 | "[2023-09-07 12:27:04] epoch=[7/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 587 | "[2023-09-07 12:27:21] epoch=[8/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 588 | "[2023-09-07 12:27:38] epoch=[9/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n", 589 | "[2023-09-07 12:27:56] epoch=[10/10], train_mse_overall_loss: 0.000000, train_mse_task1_loss: 0.000000, train_mse_task2_loss: 0.000000, validate_mse_overall_loss: 0.000000, validate_mse_task1_loss: 0.000000, validate_mse_task2_loss: 0.000000\n" 590 | ] 591 | } 592 | ], 593 | "source": [ 594 | "# AITM: 建模漏斗任务中多层级级联关系,如点击 -> 购买\n", 595 | "# Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising, KDD-2021\n", 596 | "# input embeddings -> 各自任务的tower 网络,低层任务的tower网络最后一层通过注意力机制(相当于自适应迁移信息)将信息融合到高层任务中(如上述点击任务到购买任务)\n", 597 | "# 高层桥接的注意力模式\n", 598 | "\n", 599 | "import os, gc\n", 600 | "import numpy as np\n", 601 | "import torch\n", 602 | "import torch.nn as nn\n", 603 | "from torch.nn import Module, Parameter, MSELoss\n", 604 | "from torch.utils.data import Dataset, DataLoader, TensorDataset \n", 605 | "from sklearn.model_selection import train_test_split\n", 606 | "from sklearn.preprocessing import MinMaxScaler\n", 607 | "from datetime import datetime\n", 608 | "from tqdm import tqdm\n", 609 | "import warnings\n", 610 | "warnings.filterwarnings('ignore')\n", 611 | "from torch.nn import Module, Sequential, ReLU, Dropout, Sigmoid\n", 612 | "\n", 613 | "# category: [1,2] + [all]\n", 614 | "# number: [0] + []\n", 615 | "number_feature_data = MinMaxScaler().fit_transform(np.array([[user_info[u][0]] for u, i, r in ratings], dtype=np.float32))\n", 616 | "category_feature_data = np.array([user_info[u][1:] + item_info[i] for u, i, r in ratings], dtype=np.int32)\n", 617 | "data = np.concatenate([number_feature_data, category_feature_data, ratings[:,-1:]], axis=-1)\n", 618 | "num_number_features = number_feature_data.shape[-1]\n", 619 | "num_category_features = category_feature_data.shape[-1]\n", 620 | "num_features = data.shape[-1] - 1\n", 621 | "category_feature_vals = {}\n", 622 | "for i in range(num_number_features, num_features):\n", 623 | " category_feature_vals[i] = sorted(list(set(list(data[:, i]))))\n", 624 | " for rid in range(data.shape[0]):\n", 625 | " data[rid, i] = category_feature_vals[i].index(data[rid, i])\n", 626 | "# print(len(user_info[list(user_info.keys())[0]]), len(item_info[list(item_info.keys())[0]]))\n", 627 | "# print(data.shape)\n", 628 | "\n", 629 | "num_users = len(user_info)\n", 630 | "num_items = len(item_info)\n", 631 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else ('mps:0' if torch.backends.mps.is_available() else \"cpu\"))\n", 632 | "batch_size = 100\n", 633 | "num_epochs = 10\n", 634 | "embedding_dim = 8 # sparse feature embedding dim\n", 635 | "X_train, X_test, y_train, y_test = train_test_split(data[:,:-1], data[:,-1], test_size=0.4, random_state=0)\n", 636 | "train_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train>1.8).float(), torch.from_numpy(y_train>3.8).float()), batch_size=batch_size, shuffle=True, pin_memory=True)\n", 637 | "test_loader = DataLoader(dataset=TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test>1.8).float(), torch.from_numpy(y_test>3.8).float()), batch_size=batch_size, shuffle=False, pin_memory=True)\n", 638 | "\n", 639 | "# 双向迁移模块\n", 640 | "class AIT(nn.Module):\n", 641 | " def __init__(self, input_dim, hidden_dim):\n", 642 | " super(AIT, self).__init__()\n", 643 | " self.input_size, self.hidden_dim = input_dim, hidden_dim\n", 644 | " self.h1 = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))\n", 645 | " self.h2 = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))\n", 646 | " self.h3 = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))\n", 647 | " self.transfer_nn = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))\n", 648 | " def forward(self, p, q): \n", 649 | " p1, p2, p3 = self.h1(p), self.h2(p), self.h3(p)\n", 650 | " q1, q2, q3 = self.h1(q), self.h2(q), self.h3(q)\n", 651 | " w_p = torch.sum(p2 * p3, dim=-1, keepdim=True) / np.sqrt(p2.shape[-1])\n", 652 | " w_q = torch.sum(q2 * q3, dim=-1, keepdim=True) / np.sqrt(q2.shape[-1])\n", 653 | " w_p, w_q = w_p / (w_p + w_q), w_q / (w_p + w_q) # 注意力权重\n", 654 | " z = self.transfer_nn(w_p * p1 + w_q * q1)\n", 655 | " return z\n", 656 | "class AITM(Module):\n", 657 | " def __init__(self, dense_feature_cols:[(int,int)], sparse_feature_cols:[(int,int)], sparse_feature_embedding_dim, \n", 658 | " hidden_dim:int, num_task:int, dnn_layer_dims:list[int], dnn_dropout=0.):\n", 659 | " super(AITM, self).__init__()\n", 660 | " self.dense_feature_cols, self.sparse_feature_cols, self.sparse_feature_embedding_dim = dense_feature_cols, sparse_feature_cols, sparse_feature_embedding_dim\n", 661 | " self.num_task, self.dnn_layer_dims, self.hidden_dim = num_task, dnn_layer_dims, hidden_dim\n", 662 | " # sparse feature embedding dict\n", 663 | " self.embed_layers = nn.ModuleDict({'embed_' + str(i): nn.Embedding(num_embeddings=valcount, embedding_dim=sparse_feature_embedding_dim) for i, valcount in sparse_feature_cols})\n", 664 | " self.input_dim = len(dense_feature_cols) + len(sparse_feature_cols) * sparse_feature_embedding_dim\n", 665 | " # dnn for each task: towers\n", 666 | " self.dnn_nets = []\n", 667 | " for i in range(num_task):\n", 668 | " net = nn.Sequential()\n", 669 | " pre_layer_dim = self.input_dim\n", 670 | " for layer_dim in dnn_layer_dims:\n", 671 | " net.append(nn.Linear(pre_layer_dim, layer_dim))\n", 672 | " net.append(nn.BatchNorm1d(layer_dim))\n", 673 | " net.append(nn.Dropout(dnn_dropout))\n", 674 | " pre_layer_dim = layer_dim\n", 675 | " net.append(nn.Linear(dnn_layer_dims[-1], hidden_dim))\n", 676 | " net.append(nn.Sigmoid())\n", 677 | " self.dnn_nets.append(net)\n", 678 | " # AITM\n", 679 | " self.aits = [AIT(hidden_dim, hidden_dim) for i in range(num_task)]\n", 680 | " # final dnn\n", 681 | " self.final_dnns = [nn.Sequential(nn.Linear(hidden_dim, 1), nn.Sigmoid()) for i in range(num_task)]\n", 682 | " def forward(self, x):\n", 683 | " batch_len = x.shape[0]\n", 684 | " dense_input = x[:, :len(self.dense_feature_cols)]\n", 685 | " sparse_embeds = torch.cat([self.embed_layers['embed_' + str(i)](x[:, i].long()) for i in range(len(self.dense_feature_cols), x.shape[1])], axis=1)\n", 686 | " x = torch.cat([sparse_embeds, dense_input], axis=-1)\n", 687 | " towers_out = [dnn_net(x) for dnn_net in self.dnn_nets]\n", 688 | " # 第0个,令p = q\n", 689 | " outputs = []\n", 690 | " for i in range(self.num_task):\n", 691 | " if i == 0:\n", 692 | " z = self.aits[i](towers_out[i], towers_out[i])\n", 693 | " else:\n", 694 | " z = self.aits[i](towers_out[i-1], towers_out[i])\n", 695 | " outputs.append(self.final_dnns[i](z).squeeze())\n", 696 | " return outputs\n", 697 | " def parameters(self, recurse: bool = True):\n", 698 | " paras = []\n", 699 | " for layer in self.embed_layers.values():\n", 700 | " paras += [p for p in layer.parameters()]\n", 701 | " for net in self.dnn_nets:\n", 702 | " paras += [p for p in net.parameters()]\n", 703 | " for ait in self.aits:\n", 704 | " paras += [p for p in ait.parameters()]\n", 705 | " for net in self.final_dnns:\n", 706 | " paras += [p for p in net.parameters()]\n", 707 | " return paras\n", 708 | "model = AITM(dense_feature_cols=[i for i in range(num_number_features)], sparse_feature_cols=[(i,len(category_feature_vals[i])) for i in range(num_number_features, num_features)], sparse_feature_embedding_dim =embedding_dim, hidden_dim=embedding_dim, num_task=2, dnn_layer_dims=[128, 32], dnn_dropout=0.).to(device)\n", 709 | "\n", 710 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n", 711 | "criterion = MSELoss(reduction='sum').to(device)\n", 712 | "alpha = 0.5\n", 713 | "\n", 714 | "for epoch in range(num_epochs):\n", 715 | " # train:\n", 716 | " epoch_train_losses = []\n", 717 | " model.train()\n", 718 | " for i, inputs in enumerate(train_loader):\n", 719 | " optimizer.zero_grad()\n", 720 | " input = inputs[0].to(device)\n", 721 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 722 | " output1, output2 = model(input)\n", 723 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 724 | " loss = alpha * loss_1 + loss_2\n", 725 | " loss.backward()\n", 726 | " # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1, norm_type=2)\n", 727 | " optimizer.step()\n", 728 | " epoch_train_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 729 | " # validate:\n", 730 | " model.eval()\n", 731 | " epoch_test_losses = []\n", 732 | " for i, inputs in enumerate(test_loader):\n", 733 | " input = inputs[0].to(device)\n", 734 | " label_1, label_2 = inputs[1].to(device), inputs[2].to(device)\n", 735 | " output1, output2 = model(input)\n", 736 | " loss_1, loss_2 = criterion(output1, label_1), criterion(output2, label_2)\n", 737 | " loss = alpha * loss_1 + loss_2\n", 738 | " epoch_test_losses.append([input.shape[0], loss_1.item(), loss_2.item(), loss.item()])\n", 739 | " num_train, num_test = sum([x[0] for x in epoch_train_losses]), sum([x[0] for x in epoch_test_losses])\n", 740 | " train_overall_loss = sum([x[-1] for x in epoch_train_losses])/num_train\n", 741 | " train_task1_loss = sum([x[1] for x in epoch_train_losses])/num_train\n", 742 | " train_task2_loss = sum([x[2] for x in epoch_train_losses])/num_train\n", 743 | " test_overall_loss = sum([x[-1] for x in epoch_test_losses])/num_test\n", 744 | " test_task1_loss = sum([x[1] for x in epoch_test_losses])/num_test\n", 745 | " test_task2_loss = sum([x[2] for x in epoch_test_losses])/num_test\n", 746 | " # print\n", 747 | " print('['+datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")+']', 'epoch=[{}/{}], train_mse_overall_loss: {:.6f}, train_mse_task1_loss: {:.6f}, train_mse_task2_loss: {:.6f}, validate_mse_overall_loss: {:.6f}, validate_mse_task1_loss: {:.6f}, validate_mse_task2_loss: {:.6f}'.format(epoch+1, num_epochs, train_overall_loss, train_task1_loss, train_task2_loss, test_overall_loss, test_task1_loss, test_task2_loss))\n", 748 | " gc.collect()" 749 | ], 750 | "metadata": { 751 | "collapsed": false, 752 | "ExecuteTime": { 753 | "end_time": "2023-09-07T04:27:56.523896800Z", 754 | "start_time": "2023-09-07T04:24:57.388345400Z" 755 | } 756 | }, 757 | "id": "6ee215880c96bd80" 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "outputs": [], 763 | "source": [ 764 | "# \n", 765 | "\n" 766 | ], 767 | "metadata": { 768 | "collapsed": false 769 | }, 770 | "id": "40d1c5cb82fcf9b2" 771 | } 772 | ], 773 | "metadata": { 774 | "kernelspec": { 775 | "display_name": "Python 3", 776 | "language": "python", 777 | "name": "python3" 778 | }, 779 | "language_info": { 780 | "codemirror_mode": { 781 | "name": "ipython", 782 | "version": 2 783 | }, 784 | "file_extension": ".py", 785 | "mimetype": "text/x-python", 786 | "name": "python", 787 | "nbconvert_exporter": "python", 788 | "pygments_lexer": "ipython2", 789 | "version": "2.7.6" 790 | } 791 | }, 792 | "nbformat": 4, 793 | "nbformat_minor": 5 794 | } 795 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CTRRecommenderModels (ongoing) 2 | 3 | ## 1.最新经验总结和前沿研究调研 4 | 5 | 对学术界和工业界的推荐系统进行了系统性总结,形成了《特征工程》、《召回》和《排序》三个章节,包括技术要点和前沿研究。 6 | 7 | ### 1.1 搜广推之《特征工程》前沿论文: 8 | 9 | Multi-modal Representation Learning for Short Video Understanding and Recommendation. ICME Workshops 2019. 10 | 11 | An Embedding Learning Framework for Numerical Features in CTR Prediction, KDD 2021. 12 | 13 | Dynamic Explicit Embedding Representation for Numerical Features in Deep CTR Prediction, CIKM 2022. 14 | 15 | Numerical Feature Representation with Hybrid 𝑁 -ary Encoding, CIKM 2022. 16 | 17 | AutoFeature: Searching for Feature Interactions and Their Architectures for Click-through Rate Prediction, CIKM 2020. 18 | 19 | Towards Automated Neural Interaction Discovery for Click-Through Rate Prediction, KDD 2020. 20 | 21 | AutoGroup: Automatic Feature Grouping for Modelling Explicit High-Order Feature Interactions in CTR Prediction, SIGIR 2020. 22 | 23 | Cognitive Evolutionary Search to Select Feature Interactions for Click-Through Rate Prediction, KDD 2023. 24 | 25 | AdnFM: An Attentive DenseNet based Factorization Machine for Click-Through-Rate Prediction, ICCDE 2022. 26 | 27 | CAN:Feature Co-Action Network for Click-Through Rate Prediction, WSDM 2022. 28 | 29 | Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models , DLP-KDD 2021. 30 | 31 | FINAL: Factorized Interaction Layer for CTR Prediction, SIGIR 2023. 32 | 33 | AdaFS: Adaptive Feature Selection in Deep Recommender System, KDD 2022. 34 | 35 | LPFS:Learnable Polarizing Feature Selection for Click-Through Rate Prediction, 2022. 36 | 37 | Optimizing Feature Set for Click-Through Rate Prediction, WWW 2023. 38 | 39 | Automatic Feature Selection By One-Shot Neural Architecture Search In Recommendation Systems, WWW 2023. 40 | 41 | Catch: Collaborative Feature Set Search for Automated Feature Engineering, WWW 2023. 42 | 43 | 经验总结:https://blog.csdn.net/nihaomafb/article/details/133242598 44 | 45 | ### 1.2. 推荐系统之《召回》前沿论文 46 | 47 | Large Scale Product Graph Construction for Recommendation in E-commerce, 2020. 48 | 49 | KGAT: Knowledge Graph Attention Network for Recommendation, KDD 2019. 50 | 51 | Multi-Interest Network with Dynamic Routing for Recommendation at Tmall, 2019. 52 | 53 | Controllable Multi-Interest Framework for Recommendation, KDD 2019. 54 | 55 | Sparse-Interest Network for Sequential Recommendation, WSDM 2021. 56 | 57 | Multi-task Learning Model based on Multiple Characteristics and Multiple Interests for CTR prediction, 2022. 58 | 59 | SDM: Sequential Deep Matching Model for Online Large-scale Recommender System, CIKM 2019. 60 | 61 | Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction, 2020. 62 | 63 | End-to-End User Behavior Retrieval in Click-Through Rate Prediction Model, 2021. 64 | 65 | Learning Tree-based Deep Model for Recommender Systems, KDD 2019. 66 | 67 | DemiNet: Dependency-Aware Multi-Interest Network with Self-Supervised Graph Learning for Click-Through Rate Prediction, AAAI 2022. 68 | 69 | MISS: Multi-Interest Self-Supervised Learning Framework for Click-Through Rate Prediction, ICDE 2022. 70 | 71 | Path-based Deep Network for candidate item matching in recommenders, SIGIR 2021. 72 | 73 | Sampling-bias-corrected neural modeling for large corpus item recommendations, RS 2019. 74 | 75 | 经验总结:https://blog.csdn.net/nihaomafb/article/details/133249562 76 | 77 | ### 1.3. 推荐系统之《排序》前沿论文: 78 | 79 | A Survey on User Behavior Modeling in Recommender Systems, 2023. 80 | 81 | Deep interest network for click-through rate prediction, KDD,2018. 82 | 83 | DIEN: Deep Interest Evolution Network for Click-Through Rate Prediction, AAAI 2018. 84 | 85 | SASRec: Self-attentive Sequential Recommendation, ICDM 2018. 86 | 87 | BSTransformer: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba, 2019. 88 | 89 | Deep Session Interest Network for Click-Through Rate Prediction, IJCAI 2019. 90 | 91 | Learning to Retrieve User Behaviors for Click-through Rate Estimation, TIOS 2023. 92 | 93 | A Survey on User Behavior Modeling in Recommender Systems, 2023. 94 | 95 | Practice on long sequential user behavior modeling for click-through rate prediction, KDD 2019. 96 | 97 | Lifelong sequential modeling with personalized memorization for user response prediction, SIGIR 2019. 98 | 99 | Sparse Attentive Memory Network for Click-through Rate Prediction with Long Sequences, CIKM 2022. 100 | 101 | Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction, 2020. 102 | 103 | End-to-End User Behavior Retrieval in Click-Through Rate Prediction Model, 2021. 104 | 105 | Sampling Is All You Need on Modeling Long-Term User Behaviors for CTR Prediction, CIKM 2022. 106 | 107 | Adversarial Filtering Modeling on Long-term User Behavior Sequences for Click-Through Rate Prediction, SIGIR 2022. 108 | 109 | TWIN: TWo-stage Interest Network for Lifelong User Behavior Modeling in CTR Prediction at Kuaishou, KDD 2023. 110 | 111 | Divide and Conquer: Towards Better Embedding-based Retrieval for Recommender Systems from a Multi-task Perspective, WWW 2023. 112 | 113 | Denoising Self-Attentive Sequential Recommendation, RS 2022. 114 | 115 | Hierarchically Fusing Long and Short-Term User Interests for Click-Through Rate Prediction in Product Search, CIKM 2022. 116 | 117 | Rethinking Personalized Ranking at Pinterest: An End-to-End Approach, RS 2022. 118 | 119 | Page-Wise Personalized Recommendations in an Industrial e-Commerce Setting, RS 2022. 120 | 121 | MTBRN: MultiplexTarget-BehaviorRelationEnhancedNetwork forClick-ThroughRatePrediction, CIKM 2020. 122 | 123 | Multi-Scale User Behavior Network for Entire Space Multi-Task Learning, CIKM 2022. 124 | 125 | Dynamic Multi-Behavior Sequence Modeling for Next Item Recommendation, AAAI 2023. 126 | 127 | Hierarchical Projection Enhanced Multi-behavior Recommendation, KDD 2023. 128 | 129 | Beyond Matching: Modeling Two-Sided Multi-Behavioral Sequences for Dynamic Person-Job Fit, DASFAA 2021. 130 | 131 | Deep Position-wise Interaction Network for CTR Prediction, SIGIR 2021. 132 | 133 | AutoDebias: Learning to Debias for Recommendation, SIGIR 2021. 134 | 135 | Unbiased Learning to Rank: Online or Offline?, TIOS 2020. 136 | 137 | Fair pairwise learning to rank, 2020. 138 | 139 | CAM2: Conformity-Aware Multi-Task Ranking Model for Large-Scale Recommender Systems, WWW 2023. 140 | 141 | Entire Space Cascade Delayed Feedback Modeling for Effective Conversion Rate Prediction, CIKM 2023. 142 | 143 | ESMC: Entire Space Multi-Task Model for Post-Click Conversion Rate via Parameter Constraint, 2023. 144 | 145 | Click-Conversion Multi-Task Model with Position Bias Mitigation for Sponsored Search in eCommerce, SIGIR 2023. 146 | 147 | DCMT: A Direct Entire-Space Causal Multi-Task Framework for Post-Click Conversion Estimation, ICDE 2023. 148 | 149 | Scenario-Adaptive Feature Interaction for Click-Through Rate Prediction, KDD 2023. 150 | 151 | OptMSM: Optimizing Multi-Scenario Modeling for Click-Through Rate Prediction, 2023. 152 | 153 | Leaving No One Behind: A Multi-Scenario Multi-Task Meta Learning Approach for Advertiser Modeling, WSDM 2022. 154 | 155 | M5: Multi-Modal Multi-Interest Multi-Scenario Matching for Over-the-Top Recommendation, KDD 2023. 156 | 157 | Automatic Expert Selection for Multi-Scenario and Multi-Task Search, SIGIR 2022. 158 | 159 | Continual Transfer Learning for Cross-Domain Click-Through Rate Prediction at Taobao, WWW 2023. 160 | 161 | Cross-domain Augmentation Networks for Click-Through Rate Prediction, 2023. 162 | 163 | One Model to Serve All: Star Topology Adaptive Recommender for Multi-Domain CTR Prediction, CIKM 2021. 164 | 165 | HiNet: Novel Multi-Scenario & Multi-Task Learning with Hierarchical Information Extraction, ICDE 2023. 166 | 167 | Multi-Faceted Hierarchical Multi-Task Learning for Recommender Systems, CIKM 2022. 168 | 169 | Large Scale Product Graph Construction for Recommendation in E-commerce, 2020. 170 | 171 | Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations, RS 2020. 172 | 173 | AdaTT: Adaptive Task-to-Task Fusion Network for Multitask Learning in Recommendations, KDD 2023. 174 | 175 | Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate, SIGIR 2018. 176 | 177 | Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising, KDD 2021. 178 | 179 | Advances and Challenges of Multi-task Learning Method in Recommender System: A Survey, 2023. 180 | 181 | Multi-Objective Recommender Systems: Survey and Challenges, RS 2022. 182 | 183 | Optimizing Airbnb Search Journey with Multi-task Learning, KDD 2023. 184 | 185 | A Contrastive Sharing Model for Multi-Task Recommendation, WWW 2022. 186 | 187 | Adaptive Pattern Extraction Multi-Task Learning for Multi-Step Conversion Estimations, 2023. 188 | 189 | MSSM: A Multiple-level Sparse Sharing Model for Efficient Multi-Task Learning, SIGIR 2021. 190 | 191 | STEM: Unleashing the Power of Embeddings for Multi-task Recommendation, 2023. 192 | 193 | Multi-Task Recommendations with Reinforcement Learning, WWW 2023. 194 | 195 | Hierarchically Modeling Micro and Macro Behaviors via Multi-Task Learning for Conversion Rate Prediction, SIGIR 2021. 196 | 197 | MNCM: Multi-level Network Cascades Model for Multi-Task Learning, CIKM 2022. 198 | 199 | Prototype Feature Extraction for Multi-task Learning, WWW 2022. 200 | 201 | Fast greedy map inference for determinantal point process to improve recommendation diversity, NIPS 2018. 202 | 203 | Neural Re-ranking in Multi-stage Recommender Systems: A Review, 2022. 204 | 205 | Generative Flow Network for Listwise Recommendation, KDD 2023. 206 | 207 | Context-aware Reranking with Utility Maximization for Recommendation, 2022. 208 | 209 | Revisit Recommender System in the Permutation Prospective, 2021. 210 | 211 | Entire Cost Enhanced Multi-Task Model for Online-to-Offline Conversion Rate Prediction, 2022. 212 | 213 | GRN: Generative Rerank Network for Context-wise Recommendation, 2021. 214 | 215 | PEAR: Personalized Re-ranking with Contextualized Transformer for Recommendation, WWW 2022. 216 | 217 | Personalized Diversification for Neural Re-ranking in Recommendation, ICDE 2023. 218 | 219 | Multi-Level Interaction Reranking with User Behavior History, SIGIR 2022. 220 | 221 | Slate-Aware Ranking for Recommendation, WSDM 2023. 222 | 223 | RankFormer: Listwise Learning-to-Rank Using Listwide Labels, kdd 2023. 224 | 225 | PIER: Permutation-Level Interest-Based End-to-End Re-ranking Framework in E-commerce, KDD 2023. 226 | 227 | Multi-factor Sequential Re-ranking with Perception-Aware Diversification, KDD 2023. 228 | 229 | APG: Adaptive Parameter Generation Network for Click-Through Rate Prediction, NIPS 2022. 230 | 231 | AutoFAS: Automatic Feature and Architecture Selection for Pre-Ranking System, 2022. 232 | 233 | NAS-CTR: Efficient Neural Architecture Search for Click-Through Rate Prediction, SIGIR 2022. 234 | 235 | Controllable Multi-Objective Re-ranking with Policy Hypernetworks, KDD 2023. 236 | 237 | Improving Training Stability for Multitask Ranking Models in Recommender Systems, KDD 2023. 238 | 239 | Iterative Boosting Deep Neural Networks for Predicting Click-Through Rate, 2020. 240 | 241 | DHEN: A Deep and Hierarchical Ensemble Network for Large-Scale Click-Through Rate Prediction, KDD 2022. 242 | 243 | AdaEnsemble: Learning Adaptively Sparse Structured Ensemble Network for Click-Through Rate Prediction, 2022. 244 | 245 | Multi-Task Deep Recommender Systems: A Survey, 2023. 246 | 247 | Expressive user embedding from churn and recommendation multi-task learning, WWW 2023. 248 | 249 | PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information, KDD 2023. 250 | 251 | 252 | 253 | ### 2.我实现25个推荐CTR经典模型代码(开箱即用-你需要进一步调优,我的运行环境是mac m1 + python 3.9,所有代码都本地完成测试),这个库后续继续更新; 254 | 255 | I have implemented some common-used CTR / recommender models for reusage, including 25 models as follows: 256 | 257 | #### 2.1. 4个常用机器学习集成模型:随机森林、Xgboost、lightgbm和catboost,以及使用hyperopt和bayesian-optimization进行超参数调优。(这部分基于sklearn包和相应python包实现调用) 258 | 259 | #### 2.2. 5个基础模型:Matrix Factorizatin (MF)、SVD、Factorization Machine(FM)、NeuralCF(WWW 2017)、AutoencoderRec(WWW 2015)。 260 | 261 | #### 2.3. 8个深度网络模型:DeepFM(IJCAI 2017)、DSSM(CIKM 2013)、Wide & Deep(RS 2016)、DeepCross(DCN,KDD 2016)、Attentive Factorization Machine(AFM,IJCAI 2017)、Product-based Neural Network(PNN,ICDM 2016)、Neural Factorization Machine(NFM,SIGIR 2017)、FiBiNET(RS 2019)。 262 | 263 | #### 2.4. 5个序列推荐模型:GRU4Rec(ICLR 2016)、Deep Interest Network(DIN,KDD 2018)、Deep Interest Evolution Network(DIEN,AAAI 2018)、Self-attentive Sequential Recommendation(SASRec,ICDM 2018)、Behavior Sequence Transformer(BSTransformer,2019)。 264 | 265 | #### 2.5. 3个多兴趣偏好模型:Multi-interest network with dynamic routing(MIND,2019)、Controllable Multi-Interest Framework for Recommendation(Comirec,KDD 2020)、Sparse-Interest Network(SINE,WSDM 2021)。 266 | 解决一个用户兴趣向量很难捕获用户多方面兴趣的问题(特别是从用户历史长行为序列中捕捉多方面兴趣偏好),从用户历史行为序列中得到多个兴趣偏好。当用户历史行为序列较短时(<50)可以采用各种常规序列模型(如GRU、attention序列模型之类),当用户历史行为序列较长时,需要考虑效率,如利用target item来检索相似相近的历史items并进行序列建模。建模用户多方面偏好类似于聚类效果,采用胶囊网络、多个选择通路(如top-k个激活兴趣)等等手段,每次激活一个通路或多个通路(即兴趣点),采用hard方式或者soft方式(如注意力)。 267 | 268 | #### 2.6. 4个多任务学习模型:Entire-space multi-task model(ESSM,SIGIR 2020)、Multi-gate MoE Mixture-of-Experts(MMOE,KDD 2018)、Customized Gate Control(CGC,RS 2020)、Audience Multi-step Conversions with Multi-task Learning(AITM,KDD 2021)。 269 | 270 | 多任务学习一般在实际工作中效果提升比较大的一种方式,找到场景下一些相关的任务,基于任务之间的关系特点来设计多任务共享结构,这里面有很多设计的空间,如共享模块可以是底层embedding共享、中间层共享或高层共享,共享程度大小等等,不同任务loss之间的比例,还有提高采样的效率等。共享的方式是hard还是soft等。注意一点就是根据不同任务之间相关性强弱,设计合理架构,避免负迁移。 271 | 272 | 273 | 根据这几年大厂论文,主要集中在挖掘用户超长行为序列(同时考虑效率和效益,用于精排)、多兴趣偏好(用于召回)、多任务学习(模型sharing结构设计,主要用于精排)等,特征工程(特征离散化和特征交互)的文章相对较少。 274 | 275 | ![推荐模型-演化](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/768b5ea0-eefd-4c16-a97d-3a0bbaca7317) 276 | 277 | 278 | 279 | 280 | AutoRec - Autoencoders Meet Collaborative Filtering, WWW 2015. 281 | 282 | Factorization Machines: Fast Context-aware Recommendations with Factorization Machines, SIGIR 2011. 283 | 284 | DSSM: Learning deep structured semantic models for web search using clickthrough data, CIKM 2013. 285 | 286 | DSSM 287 | 288 | NeuralCF: Neural Collaborative Filtering, WWW 2017. 289 | 290 | NeuralCF 291 | 292 | 293 | Wide&Deep: Wide & deep learning for recommender systems, RS 2016. 294 | 295 | Wide Deep 296 | 297 | 298 | DeepFM: Deepfm: a factorization-machine based neural network for ctr prediction, IJCAI 2017. 299 | 300 | DeepFM 301 | 302 | DeepCross: Deep Crossing: Web-Scale Modeling without Manually Crafted Combinatorial Features, KDD 2016. 303 | 304 | ![DeepCross DCN模型](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/96f05c1d-21a4-46d8-9c7a-951ccf147dd5) 305 | 306 | 307 | AFM: Attentive Factorization Machine, IJCAI 2017. 308 | 309 | ![AFM-2](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/1b2a3962-4049-4cce-a0d1-2a9182da85c7) 310 | 311 | NFM: Neural Factorization Machines for Sparse Predictive Analytics, SIGIR 2017. 312 | 313 | ![NFM](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/9c19319f-ea62-4218-a442-275de422f784) 314 | 315 | FiBiNET: FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction, RS 2019. 316 | 317 | FiBiNET 318 | 319 | 320 | PNN: Product-based Neural Networks for User Response Prediction, ICDM 2016. 321 | 322 | PNN 323 | 324 | 325 | GRU4Rec: Session-based Recommendations with Recurrent Neural Networks, ICLR 2016. 326 | 327 | GRU4REC 328 | 329 | 330 | Caser:Personalized Top-N Sequential Recommendation via Convolutional Sequence Embedding, WSDM 2018. 331 | 332 | Caser 333 | 334 | 335 | DIN: Deep Interest Network for Click-Through Rate Prediction, KDD 2018. 336 | 337 | DIN 338 | 339 | 340 | DIEN: Deep Interest Evolution Network for Click-Through Rate Prediction, AAAI 2018. 341 | 342 | DIEN 343 | 344 | 345 | SASRec: Self-attentive Sequential Recommendation, ICDM 2018. 346 | 347 | SASRec 348 | 349 | BSTransformer: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba, 2019. 350 | 351 | ![BST](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/0e14913b-e770-4ce0-b87b-1a6b62dcaad9) 352 | 353 | 354 | MIND:Multi-interest network with dynamic routing for recommendation at Tmall, 2019. 355 | 356 | ![MIND](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/3089993c-d291-47b8-95af-e61e81e6d86e) 357 | 358 | 359 | Comirec:Controllable Multi-Interest Framework for Recommendation, KDD 2020. 360 | 361 | ![Comirec](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/08884e82-b3c2-4c7a-b0c8-949c6c9150ff) 362 | 363 | 364 | SINE: Sparse-Interest Network for Sequential Recommendation, WSDM 2021. 365 | 366 | SINE 367 | 368 | 369 | 370 | ESSM:Entire Space Multi-task Modeling via Post-Click Behavior Decomposition for Conversion Rate Prediction, SIGIR 2020. 371 | 372 | ![ESSM](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/a7217768-d1f1-43af-9db0-2ed3768c199c) 373 | 374 | MMOE:Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts, KDD 2018. 375 | 376 | ![MMOE](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/b32c61dd-cef2-4fc1-913f-802580d3741c) 377 | 378 | CGC:Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations, RS 2020. 379 | 380 | ![CGC](https://github.com/BinFuPKU/CTRRecommenderModels/assets/29002864/1aade055-a884-4723-b868-ff87d557f1f5) 381 | 382 | 383 | AITM: Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising, KDD 2021. 384 | 385 | AITM 386 | 387 | 388 | The project is ongoing ...... 389 | -------------------------------------------------------------------------------- /推荐系统之召回:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BinFuPKU/CTRRecommenderModels/51e9ceeac695e6c0176704dd286d89e2b20abdae/推荐系统之召回:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf -------------------------------------------------------------------------------- /推荐系统之排序:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BinFuPKU/CTRRecommenderModels/51e9ceeac695e6c0176704dd286d89e2b20abdae/推荐系统之排序:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf -------------------------------------------------------------------------------- /搜广推之特征工程:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BinFuPKU/CTRRecommenderModels/51e9ceeac695e6c0176704dd286d89e2b20abdae/搜广推之特征工程:基本原理和前沿研究PPT-2023-Dr. Bin Fu.pdf --------------------------------------------------------------------------------