├── DIEN_train_example.ipynb ├── DIN_train_example.ipynb ├── README.md ├── __pycache__ ├── activations.cpython-37.pyc ├── alibaba_data_reader.cpython-37.pyc ├── layers.cpython-37.pyc ├── loss.cpython-37.pyc ├── model.cpython-37.pyc └── utils.cpython-37.pyc ├── activations.py ├── alibaba_data_reader.py ├── layers.py ├── loss.py ├── main.ipynb ├── main.py ├── model.py ├── tensorboard.log ├── tensorboard.sh └── utils.py /DIEN_train_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf\n", 10 | "from tensorflow.keras import layers\n", 11 | "from layers import AUGRU\n", 12 | "from activations import Dice,dice\n", 13 | "import pandas as pd\n", 14 | "from model import DIEN\n", 15 | "import alibaba_data_reader as data_reader\n", 16 | "import utils\n", 17 | "import matplotlib\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "from matplotlib.font_manager import FontProperties\n", 20 | "from matplotlib.pyplot import MultipleLocator\n", 21 | "import numpy as np\n", 22 | "import os\n", 23 | "from loss import AuxLayer" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "def mkdir(path):\n", 33 | " try:\n", 34 | " if not os.path.exists(path):\n", 35 | " os.makedirs(path)\n", 36 | " return 0\n", 37 | " except:\n", 38 | " return 1\n", 39 | "model_name = \"dien\"" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def is_in_notebook():\n", 49 | " import sys\n", 50 | " return 'ipykernel' in sys.modules\n", 51 | "def clear_output():\n", 52 | " \"\"\"\n", 53 | " clear output for both jupyter notebook and the console\n", 54 | " \"\"\"\n", 55 | " import os\n", 56 | " os.system('cls' if os.name == 'nt' else 'clear')\n", 57 | " if is_in_notebook():\n", 58 | " from IPython.display import clear_output as clear\n", 59 | " clear()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "2\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "print(1)\n", 77 | "clear_output()\n", 78 | "print(2)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "tags": [] 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "2.0.0\n", 93 | "GPU Available: True\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print(tf.__version__)\n", 99 | "print(\"GPU Available: \", tf.test.is_gpu_available())" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "file_path = \"/nfs/project/boweihan_2/DIEN/dien_final/\"\n", 109 | "file_path = \"\"" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "# 模型训练" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/html": [ 127 | "
\n", 128 | "\n", 141 | "\n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | "
brandcatecms_segidcms_groupgenderagepvalueshoppingoccupationuser_class_level
0460561129689713273324
\n", 173 | "
" 174 | ], 175 | "text/plain": [ 176 | " brand cate cms_segid cms_group gender age pvalue shopping \\\n", 177 | "0 460561 12968 97 13 2 7 3 3 \n", 178 | "\n", 179 | " occupation user_class_level \n", 180 | "0 2 4 " 181 | ] 182 | }, 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "train_data, test_data, embedding_count = data_reader.get_data()\n", 190 | "embedding_count" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 8, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "embedding_features_list = data_reader.get_embedding_features_list()\n", 200 | "user_behavior_features = data_reader.get_user_behavior_features()\n", 201 | "embedding_count_dict = data_reader.get_embedding_count_dict(embedding_features_list, embedding_count)\n", 202 | "embedding_dim_dict = data_reader.get_embedding_dim_dict(embedding_features_list)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "import time\n", 212 | "stamp = time.strftime(\"%Y%m%d-%H%M%S\", time.localtime())\n", 213 | "mkdir(\"./train_log/\" + model_name)\n", 214 | "log_path = \"./train_log/\"+model_name+\"/%s\" % stamp\n", 215 | "train_summary_writer = tf.summary.create_file_writer(log_path)\n", 216 | "tf.summary.trace_on(graph=True, profiler=True)\n", 217 | "loss_file_name = utils.get_file_name()\n", 218 | "mkdir(\"./loss/\" + model_name + \"/\")\n", 219 | "utils.make_train_loss_dir(loss_file_name, cols=[\"train_aux_loss\",\"train_target_loss\",\"train_final_loss\"], model=model_name)\n", 220 | "utils.make_test_loss_dir(loss_file_name, cols=[\"test_aux_loss\",\"test_target_loss\",\"test_final_loss\"], model=model_name)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "" 232 | ] 233 | }, 234 | "execution_count": 10, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "model = DIEN(\n", 241 | " embedding_count_dict, \n", 242 | " embedding_dim_dict, \n", 243 | " embedding_features_list, \n", 244 | " user_behavior_features, \n", 245 | " activation=\"dice\")\n", 246 | "model" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 11, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "min_batch = 0\n", 256 | "batch = 100\n", 257 | "optimizer = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n", 258 | "loss_metric = tf.keras.metrics.Sum()\n", 259 | "auc_metric = tf.keras.metrics.AUC()\n", 260 | "alpha = 1\n", 261 | "epochs = 3" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 12, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 13, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "def get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show):\n", 280 | " user_profile_dict = {\n", 281 | " \"cms_segid\": cms_segid,\n", 282 | " \"cms_group\": cms_group,\n", 283 | " \"gender\": gender,\n", 284 | " \"age\": age,\n", 285 | " \"pvalue\": pvalue,\n", 286 | " \"shopping\": shopping,\n", 287 | " \"occupation\": occupation,\n", 288 | " \"user_class_level\": user_class_level\n", 289 | " }\n", 290 | " user_profile_list = [\"cms_segid\", \"cms_group\", \"gender\", \"age\", \"pvalue\", \"shopping\", \"occupation\", \"user_class_level\"]\n", 291 | " user_behavior_list = [\"brand\", \"cate\"]\n", 292 | " click_behavior_dict = {\n", 293 | " \"brand\": hist_brand_behavior_clk,\n", 294 | " \"cate\": hist_cate_behavior_clk\n", 295 | " }\n", 296 | " noclick_behavior_dict = {\n", 297 | " \"brand\": hist_brand_behavior_show,\n", 298 | " \"cate\": hist_cate_behavior_show\n", 299 | " }\n", 300 | " target_item_dict = {\n", 301 | " \"brand\": target_cate,\n", 302 | " \"cate\": target_brand\n", 303 | " }\n", 304 | " return user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 14, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show) " 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 15, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "def train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label):\n", 323 | " with tf.GradientTape() as tape:\n", 324 | " output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)\n", 325 | " target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32)))\n", 326 | " final_loss = target_loss + alpha * aux_loss\n", 327 | " #print(\"[Train Loss] aux_loss=\" + str(aux_loss.numpy()) + \", target_loss=\" + str(target_loss.numpy()) + \", final_loss=\" + str(final_loss.numpy()))\n", 328 | " gradient = tape.gradient(final_loss, model.trainable_variables)\n", 329 | " clip_gradient, _ = tf.clip_by_global_norm(gradient, 5.0)\n", 330 | " optimizer.apply_gradients(zip(clip_gradient, model.trainable_variables))\n", 331 | " loss_metric(final_loss)\n", 332 | " return aux_loss.numpy(), target_loss.numpy(), final_loss.numpy()" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 16, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "def get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label):\n", 342 | " output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)\n", 343 | " target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32)))\n", 344 | " final_loss = target_loss + alpha * aux_loss\n", 345 | " #print(\"[Test Loss] aux_loss=\" + str(aux_loss.numpy()) + \", target_loss=\" + str(target_loss.numpy()) + \", final_loss=\" + str(final_loss.numpy()))\n", 346 | " return aux_loss.numpy(), target_loss.numpy(), final_loss.numpy()" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 18, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "#aux_loss, target_loss, final_loss = train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 17, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "name": "stdout", 365 | "output_type": "stream", 366 | "text": [ 367 | "WARNING:tensorflow:Layer dien is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", 368 | "\n", 369 | "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", 370 | "\n", 371 | "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", 372 | "\n" 373 | ] 374 | }, 375 | { 376 | "data": { 377 | "text/plain": [ 378 | "(0.89547175, 0.69206244, 1.5875342)" 379 | ] 380 | }, 381 | "execution_count": 17, 382 | "metadata": {}, 383 | "output_type": "execute_result" 384 | } 385 | ], 386 | "source": [ 387 | "get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 18, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "name": "stdout", 397 | "output_type": "stream", 398 | "text": [ 399 | "Model: \"dien\"\n", 400 | "_________________________________________________________________\n", 401 | "Layer (type) Output Shape Param # \n", 402 | "=================================================================\n", 403 | "embedding_5 (Embedding) multiple 448 \n", 404 | "_________________________________________________________________\n", 405 | "embedding_1 (Embedding) multiple 32000000 \n", 406 | "_________________________________________________________________\n", 407 | "embedding (Embedding) multiple 32100992 \n", 408 | "_________________________________________________________________\n", 409 | "embedding_3 (Embedding) multiple 832 \n", 410 | "_________________________________________________________________\n", 411 | "embedding_2 (Embedding) multiple 6208 \n", 412 | "_________________________________________________________________\n", 413 | "embedding_4 (Embedding) multiple 192 \n", 414 | "_________________________________________________________________\n", 415 | "embedding_8 (Embedding) multiple 320 \n", 416 | "_________________________________________________________________\n", 417 | "embedding_6 (Embedding) multiple 640 \n", 418 | "_________________________________________________________________\n", 419 | "embedding_7 (Embedding) multiple 256 \n", 420 | "_________________________________________________________________\n", 421 | "embedding_9 (Embedding) multiple 320 \n", 422 | "_________________________________________________________________\n", 423 | "gru (GRU) multiple 99072 \n", 424 | "_________________________________________________________________\n", 425 | "softmax (Softmax) multiple 0 \n", 426 | "_________________________________________________________________\n", 427 | "aux_layer (AuxLayer) multiple 31876 \n", 428 | "_________________________________________________________________\n", 429 | "augru (AUGRU) multiple 98688 \n", 430 | "_________________________________________________________________\n", 431 | "sequential_1 (Sequential) multiple 148122 \n", 432 | "=================================================================\n", 433 | "Total params: 64,487,966\n", 434 | "Trainable params: 64,485,614\n", 435 | "Non-trainable params: 2,352\n", 436 | "_________________________________________________________________\n" 437 | ] 438 | } 439 | ], 440 | "source": [ 441 | "model.summary()" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 19, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "dien/embedding_5/embeddings:0\n", 454 | "dien/embedding_1/embeddings:0\n", 455 | "dien/embedding/embeddings:0\n", 456 | "dien/embedding_3/embeddings:0\n", 457 | "dien/embedding_2/embeddings:0\n", 458 | "dien/embedding_4/embeddings:0\n", 459 | "dien/embedding_8/embeddings:0\n", 460 | "dien/embedding_6/embeddings:0\n", 461 | "dien/embedding_7/embeddings:0\n", 462 | "dien/embedding_9/embeddings:0\n", 463 | "dien/gru/kernel:0\n", 464 | "dien/gru/recurrent_kernel:0\n", 465 | "dien/gru/bias:0\n", 466 | "dien/aux_layer/sequential/batch_normalization/gamma:0\n", 467 | "dien/aux_layer/sequential/batch_normalization/beta:0\n", 468 | "dien/aux_layer/sequential/dense/kernel:0\n", 469 | "dien/aux_layer/sequential/dense/bias:0\n", 470 | "dien/aux_layer/sequential/dense_1/kernel:0\n", 471 | "dien/aux_layer/sequential/dense_1/bias:0\n", 472 | "dien/aux_layer/sequential/dense_2/kernel:0\n", 473 | "dien/aux_layer/sequential/dense_2/bias:0\n", 474 | "dien/augru/gru_gates/dense_3/kernel:0\n", 475 | "dien/augru/gru_gates/dense_3/bias:0\n", 476 | "dien/augru/gru_gates/dense_4/kernel:0\n", 477 | "dien/augru/gru_gates_1/dense_5/kernel:0\n", 478 | "dien/augru/gru_gates_1/dense_5/bias:0\n", 479 | "dien/augru/gru_gates_1/dense_6/kernel:0\n", 480 | "dien/augru/gru_gates_2/dense_7/kernel:0\n", 481 | "dien/augru/gru_gates_2/dense_7/bias:0\n", 482 | "dien/augru/gru_gates_2/dense_8/kernel:0\n", 483 | "dien/sequential_1/batch_normalization_1/gamma:0\n", 484 | "dien/sequential_1/batch_normalization_1/beta:0\n", 485 | "dien/sequential_1/dense_9/kernel:0\n", 486 | "dien/sequential_1/dense_9/bias:0\n", 487 | "Variable:0\n", 488 | "Variable:0\n", 489 | "dien/sequential_1/dense_10/kernel:0\n", 490 | "dien/sequential_1/dense_10/bias:0\n", 491 | "Variable:0\n", 492 | "Variable:0\n", 493 | "dien/sequential_1/dense_11/kernel:0\n", 494 | "dien/sequential_1/dense_11/bias:0\n" 495 | ] 496 | } 497 | ], 498 | "source": [ 499 | "for var in model.trainable_variables:\n", 500 | " print(var.name)" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 20, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "def get_loss_fig(train_loss, test_loss):\n", 510 | " loss_list = [\"aux_loss\", \"final_loss\"]\n", 511 | " color_list = [\"r\", \"b\"]\n", 512 | " plt.figure()\n", 513 | " cnt = 0\n", 514 | " for k in loss_list:\n", 515 | " loss = train_loss[k]\n", 516 | " step = list(np.arange(len(loss)))\n", 517 | " plt.plot(step,loss,color_list[cnt]+\"-\",label=\"train_\" + k, linestyle=\"--\")\n", 518 | " cnt += 1\n", 519 | " cnt = 0\n", 520 | " for k in loss_list:\n", 521 | " loss = test_loss[k]\n", 522 | " step = list(np.arange(len(loss)))\n", 523 | " plt.plot(step,loss,color_list[cnt],label=\"test_\" + k)\n", 524 | " cnt += 1\n", 525 | " plt.title(\"Loss\")\n", 526 | " plt.xlabel('iteration')\n", 527 | " plt.ylabel('loss')\n", 528 | " plt.legend()\n", 529 | " clear_output()\n", 530 | " plt.savefig(\"./loss/\" + model_name + \"/loss.png\")\n", 531 | " clear_output()\n", 532 | " plt.show()" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 21, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "def record_test_loss(test_loss, test_data, step):\n", 542 | " label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, clk_length, show_length = data_reader.get_test_data(test_data)\n", 543 | " user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n", 544 | " aux_loss, target_loss, final_loss = get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n", 545 | " loss_dict = dict()\n", 546 | " loss_dict[\"aux_loss\"] = str(aux_loss)\n", 547 | " loss_dict[\"target_loss\"] = str(target_loss)\n", 548 | " loss_dict[\"final_loss\"] = str(final_loss)\n", 549 | " utils.add_loss(loss_dict, loss_file_name, level=\"test\")\n", 550 | " test_loss[\"aux_loss\"].append(float(aux_loss))\n", 551 | " test_loss[\"target_loss\"].append(float(target_loss))\n", 552 | " test_loss[\"final_loss\"].append(float(final_loss))\n", 553 | " with train_summary_writer.as_default():\n", 554 | " tf.summary.scalar(\"test_aux_loss epoch: \"+str(epoch), aux_loss, step = step)\n", 555 | " tf.summary.scalar(\"test_target_loss epoch: \"+str(epoch), target_loss, step = step)\n", 556 | " tf.summary.scalar(\"test_final_loss epoch: \"+str(epoch), final_loss, step = step)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 22, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "mkdir(\"./checkpoint/\" + model_name)\n", 566 | "checkpoint_path = \"./checkpoint/\" + model_name + \"/cp-{epoch:04d}.ckpt\"\n", 567 | "checkpoint_dir = os.path.dirname(checkpoint_path)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 23, 573 | "metadata": {}, 574 | "outputs": [ 575 | { 576 | "data": { 577 | "image/png": "\n", 578 | "text/plain": [ 579 | "
" 580 | ] 581 | }, 582 | "metadata": { 583 | "needs_background": "light" 584 | }, 585 | "output_type": "display_data" 586 | } 587 | ], 588 | "source": [ 589 | "train_loss = {\"aux_loss\":[], \"target_loss\":[], \"final_loss\":[]}\n", 590 | "test_loss = {\"aux_loss\":[], \"target_loss\":[], \"final_loss\":[]}\n", 591 | "for epoch in range(epochs):\n", 592 | " for i in range(int(len(train_data) / batch)):\n", 593 | " record_test_loss(test_loss, test_data, i)\n", 594 | " label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)\n", 595 | " user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n", 596 | " aux_loss, target_loss, final_loss = train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n", 597 | " #Record_loss12\n", 598 | " loss_dict = dict()\n", 599 | " loss_dict[\"aux_loss\"] = str(aux_loss)\n", 600 | " loss_dict[\"target_loss\"] = str(target_loss)\n", 601 | " loss_dict[\"final_loss\"] = str(final_loss)\n", 602 | " utils.add_loss(loss_dict, loss_file_name, level=\"train\")\n", 603 | " train_loss[\"aux_loss\"].append(float(aux_loss))\n", 604 | " train_loss[\"target_loss\"].append(float(target_loss))\n", 605 | " train_loss[\"final_loss\"].append(float(final_loss))\n", 606 | " get_loss_fig(train_loss, test_loss)\n", 607 | " tf.summary.trace_on(graph=True, profiler=True)\n", 608 | " with train_summary_writer.as_default():\n", 609 | " tf.summary.scalar(\"train_aux_loss epoch: \"+str(epoch), aux_loss, step = i)\n", 610 | " tf.summary.scalar(\"train_target_loss epoch: \"+str(epoch), target_loss, step = i)\n", 611 | " tf.summary.scalar(\"train_final_loss epoch: \"+str(epoch), final_loss, step = i)\n", 612 | " tf.summary.trace_export(\n", 613 | " name=\"DIEN\", \n", 614 | " step=i, \n", 615 | " profiler_outdir=log_path)\n", 616 | " model.save_weights(checkpoint_path.format(epoch=epoch))" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "# 模型评估" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 24, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "name": "stdout", 633 | "output_type": "stream", 634 | "text": [ 635 | "./checkpoint/cp-0002.ckpt\n" 636 | ] 637 | }, 638 | { 639 | "data": { 640 | "text/plain": [ 641 | "" 642 | ] 643 | }, 644 | "execution_count": 24, 645 | "metadata": {}, 646 | "output_type": "execute_result" 647 | } 648 | ], 649 | "source": [ 650 | "last_model = DIEN(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation=\"dice\")\n", 651 | "latest = tf.train.latest_checkpoint(checkpoint_dir)\n", 652 | "print(latest)\n", 653 | "last_model.load_weights(latest)" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 26, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "name": "stdout", 663 | "output_type": "stream", 664 | "text": [ 665 | "WARNING:tensorflow:Layer dien_1 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", 666 | "\n", 667 | "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", 668 | "\n", 669 | "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", 670 | "\n" 671 | ] 672 | }, 673 | { 674 | "data": { 675 | "text/plain": [ 676 | "(0.029646765, 0.26222047, 0.29186723)" 677 | ] 678 | }, 679 | "execution_count": 26, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "model= last_model\n", 686 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, clk_length, show_length = data_reader.get_test_data(test_data)\n", 687 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n", 688 | "aux_loss, target_loss, final_loss = get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n", 689 | "aux_loss, target_loss, final_loss" 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": 27, 695 | "metadata": {}, 696 | "outputs": [], 697 | "source": [ 698 | "def convert_tensor(data):\n", 699 | " return tf.convert_to_tensor(data)\n", 700 | "\n", 701 | "def get_normal_data(data, col):\n", 702 | " return data[col].values\n", 703 | "\n", 704 | "def get_sequence_data(data, col):\n", 705 | " rst = []\n", 706 | " max_length = 0\n", 707 | " for i in data[col].values:\n", 708 | " temp = len(list(map(eval,i[1:-1].split(\",\"))))\n", 709 | " if temp > max_length:\n", 710 | " max_length = temp\n", 711 | "\n", 712 | " for i in data[col].values:\n", 713 | " temp = list(map(eval,i[1:-1].split(\",\")))\n", 714 | " padding = np.zeros(max_length - len(temp))\n", 715 | " rst.append(list(np.append(np.array(temp), padding)))\n", 716 | " return rst\n", 717 | "\n", 718 | "def get_evaluate_data(data):\n", 719 | " batch_data = data\n", 720 | " click = get_normal_data(batch_data, \"guide_dien_final_train_data.clk\")\n", 721 | " target_cate = get_normal_data(batch_data, \"guide_dien_final_train_data.cate_id\")\n", 722 | " target_brand = get_normal_data(batch_data, \"guide_dien_final_train_data.brand\")\n", 723 | " cms_segid = get_normal_data(batch_data, \"guide_dien_final_train_data.cms_segid\")\n", 724 | " cms_group = get_normal_data(batch_data, \"guide_dien_final_train_data.cms_group_id\")\n", 725 | " gender = get_normal_data(batch_data, \"guide_dien_final_train_data.final_gender_code\")\n", 726 | " age = get_normal_data(batch_data, \"guide_dien_final_train_data.age_level\")\n", 727 | " pvalue = get_normal_data(batch_data, \"guide_dien_final_train_data.pvalue_level\")\n", 728 | " shopping = get_normal_data(batch_data, \"guide_dien_final_train_data.shopping_level\")\n", 729 | " occupation = get_normal_data(batch_data, \"guide_dien_final_train_data.occupation\")\n", 730 | " user_class_level = get_normal_data(batch_data, \"guide_dien_final_train_data.new_user_class_level\")\n", 731 | " hist_brand_behavior_clk = get_sequence_data(batch_data, \"guide_dien_final_train_data.click_brand\")\n", 732 | " hist_cate_behavior_clk = get_sequence_data(batch_data, \"guide_dien_final_train_data.click_cate\")\n", 733 | " hist_brand_behavior_show = get_sequence_data(batch_data, \"guide_dien_final_train_data.show_brand\")\n", 734 | " hist_cate_behavior_show = get_sequence_data(batch_data, \"guide_dien_final_train_data.show_cate\")\n", 735 | " return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show)" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": 29, 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show = get_evaluate_data(test_data)\n", 745 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n", 746 | "output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": 30, 752 | "metadata": {}, 753 | "outputs": [ 754 | { 755 | "name": "stdout", 756 | "output_type": "stream", 757 | "text": [ 758 | "[训练集]正例:负例=501 : 9435\n", 759 | "[测试集]正例:负例=56 : 943\n" 760 | ] 761 | } 762 | ], 763 | "source": [ 764 | "train_label = train_data[\"guide_dien_final_train_data.clk\"].values\n", 765 | "positive_num = len(train_label[train_label == 1])\n", 766 | "negative_num = len(train_label[train_label == 0])\n", 767 | "print(\"[训练集]正例:负例=%d : %d\" % (positive_num, negative_num))\n", 768 | "test_label = test_data[\"guide_dien_final_train_data.clk\"].values\n", 769 | "positive_num = len(test_label[test_label == 1])\n", 770 | "negative_num = len(test_label[test_label == 0])\n", 771 | "print(\"[测试集]正例:负例=%d : %d\" % (positive_num, negative_num))" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": 31, 777 | "metadata": {}, 778 | "outputs": [], 779 | "source": [ 780 | "y_true = label.numpy()[:,-1]\n", 781 | "y_score = output.numpy()[:,-1]" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 48, 787 | "metadata": {}, 788 | "outputs": [], 789 | "source": [ 790 | "threshold = 0.0031\n", 791 | "y_pre = y_score.copy()\n", 792 | "y_pre[y_pre > threshold] = 1\n", 793 | "y_pre[y_pre <= threshold] = 0" 794 | ] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "execution_count": 34, 799 | "metadata": {}, 800 | "outputs": [], 801 | "source": [ 802 | "import numpy as np\n", 803 | "from sklearn.metrics import accuracy_score\n", 804 | "from sklearn.metrics import f1_score\n", 805 | "from sklearn.metrics import auc\n", 806 | "import sklearn.metrics as sm\n", 807 | "from sklearn.metrics import roc_curve, auc\n", 808 | "import matplotlib as mpl \n", 809 | "import matplotlib.pyplot as plt" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": 50, 815 | "metadata": {}, 816 | "outputs": [ 817 | { 818 | "name": "stdout", 819 | "output_type": "stream", 820 | "text": [ 821 | "0.8818818818818819\n" 822 | ] 823 | } 824 | ], 825 | "source": [ 826 | "print(accuracy_score(y_true, y_pre))" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": 51, 832 | "metadata": {}, 833 | "outputs": [ 834 | { 835 | "name": "stdout", 836 | "output_type": "stream", 837 | "text": [ 838 | "混淆矩阵为:\n", 839 | "[[876 67]\n", 840 | " [ 51 5]]\n" 841 | ] 842 | } 843 | ], 844 | "source": [ 845 | "m = sm.confusion_matrix(y_true, y_pre)\n", 846 | "print('混淆矩阵为:', m, sep='\\n')" 847 | ] 848 | }, 849 | { 850 | "cell_type": "code", 851 | "execution_count": 52, 852 | "metadata": {}, 853 | "outputs": [ 854 | { 855 | "name": "stdout", 856 | "output_type": "stream", 857 | "text": [ 858 | "分类报告为:\n", 859 | " precision recall f1-score support\n", 860 | "\n", 861 | " 0.0 0.94 0.93 0.94 943\n", 862 | " 1.0 0.07 0.09 0.08 56\n", 863 | "\n", 864 | " accuracy 0.88 999\n", 865 | " macro avg 0.51 0.51 0.51 999\n", 866 | "weighted avg 0.90 0.88 0.89 999\n", 867 | "\n" 868 | ] 869 | } 870 | ], 871 | "source": [ 872 | "r = sm.classification_report(y_true, y_pre)\n", 873 | "print('分类报告为:', r, sep='\\n')" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": 53, 879 | "metadata": {}, 880 | "outputs": [ 881 | { 882 | "data": { 883 | "text/plain": [ 884 | "0.679821239206181" 885 | ] 886 | }, 887 | "execution_count": 53, 888 | "metadata": {}, 889 | "output_type": "execute_result" 890 | } 891 | ], 892 | "source": [ 893 | "from sklearn.metrics import roc_auc_score\n", 894 | "auc_score = roc_auc_score(y_true,y_score)\n", 895 | "auc_score" 896 | ] 897 | }, 898 | { 899 | "cell_type": "code", 900 | "execution_count": 54, 901 | "metadata": {}, 902 | "outputs": [], 903 | "source": [ 904 | "def plot_roc(labels, predict_prob):\n", 905 | " false_positive_rate,true_positive_rate,thresholds=roc_curve(labels, predict_prob)\n", 906 | " roc_auc=auc(false_positive_rate, true_positive_rate)\n", 907 | " plt.title('ROC')\n", 908 | " plt.plot(false_positive_rate, true_positive_rate,'b',label='AUC = %0.4f'% roc_auc)\n", 909 | " plt.legend(loc='lower right')\n", 910 | " plt.plot([0,1],[0,1],'r--')\n", 911 | " plt.ylabel('TPR')\n", 912 | " plt.xlabel('FPR')\n", 913 | " plt.show()" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": 55, 919 | "metadata": {}, 920 | "outputs": [ 921 | { 922 | "data": { 923 | "image/png": "\n", 924 | "text/plain": [ 925 | "
" 926 | ] 927 | }, 928 | "metadata": { 929 | "needs_background": "light" 930 | }, 931 | "output_type": "display_data" 932 | } 933 | ], 934 | "source": [ 935 | "plot_roc(y_true, y_score)" 936 | ] 937 | }, 938 | { 939 | "cell_type": "markdown", 940 | "metadata": {}, 941 | "source": [ 942 | "# 整体训练图像" 943 | ] 944 | }, 945 | { 946 | "cell_type": "code", 947 | "execution_count": 57, 948 | "metadata": {}, 949 | "outputs": [ 950 | { 951 | "data": { 952 | "text/html": [ 953 | "
\n", 954 | "\n", 967 | "\n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | "
train_aux_losstrain_target_losstrain_final_loss
00.8954530.6920251.587478
10.8836130.6910351.574647
20.8718200.6901961.562016
30.8603340.6894091.549743
40.8486130.6888401.537453
............
2920.0302060.1975150.227721
2930.0289850.1408210.169806
2940.0289900.0819850.110975
2950.0280550.1663380.194393
2960.0287970.1971610.225958
\n", 1045 | "

297 rows × 3 columns

\n", 1046 | "
" 1047 | ], 1048 | "text/plain": [ 1049 | " train_aux_loss train_target_loss train_final_loss\n", 1050 | "0 0.895453 0.692025 1.587478\n", 1051 | "1 0.883613 0.691035 1.574647\n", 1052 | "2 0.871820 0.690196 1.562016\n", 1053 | "3 0.860334 0.689409 1.549743\n", 1054 | "4 0.848613 0.688840 1.537453\n", 1055 | ".. ... ... ...\n", 1056 | "292 0.030206 0.197515 0.227721\n", 1057 | "293 0.028985 0.140821 0.169806\n", 1058 | "294 0.028990 0.081985 0.110975\n", 1059 | "295 0.028055 0.166338 0.194393\n", 1060 | "296 0.028797 0.197161 0.225958\n", 1061 | "\n", 1062 | "[297 rows x 3 columns]" 1063 | ] 1064 | }, 1065 | "execution_count": 57, 1066 | "metadata": {}, 1067 | "output_type": "execute_result" 1068 | } 1069 | ], 1070 | "source": [ 1071 | "train_loss_data = pd.read_csv(\"./loss/dien/train_loss.csv.2020_09_22_21_35_06\")\n", 1072 | "train_loss_data" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "code", 1077 | "execution_count": 56, 1078 | "metadata": {}, 1079 | "outputs": [ 1080 | { 1081 | "data": { 1082 | "text/html": [ 1083 | "
\n", 1084 | "\n", 1097 | "\n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | "
test_aux_losstest_target_losstest_final_loss
00.8955500.6921211.587671
10.8837850.6913251.575110
20.8721210.6905321.562653
30.8605580.6897211.550279
40.8491010.6889171.538019
............
2920.0301820.2611070.291289
2930.0300740.2611990.291273
2940.0299660.2613540.291320
2950.0298590.2616390.291498
2960.0297520.2619370.291690
\n", 1175 | "

297 rows × 3 columns

\n", 1176 | "
" 1177 | ], 1178 | "text/plain": [ 1179 | " test_aux_loss test_target_loss test_final_loss\n", 1180 | "0 0.895550 0.692121 1.587671\n", 1181 | "1 0.883785 0.691325 1.575110\n", 1182 | "2 0.872121 0.690532 1.562653\n", 1183 | "3 0.860558 0.689721 1.550279\n", 1184 | "4 0.849101 0.688917 1.538019\n", 1185 | ".. ... ... ...\n", 1186 | "292 0.030182 0.261107 0.291289\n", 1187 | "293 0.030074 0.261199 0.291273\n", 1188 | "294 0.029966 0.261354 0.291320\n", 1189 | "295 0.029859 0.261639 0.291498\n", 1190 | "296 0.029752 0.261937 0.291690\n", 1191 | "\n", 1192 | "[297 rows x 3 columns]" 1193 | ] 1194 | }, 1195 | "execution_count": 56, 1196 | "metadata": {}, 1197 | "output_type": "execute_result" 1198 | } 1199 | ], 1200 | "source": [ 1201 | "test_loss_data = pd.read_csv(\"./loss/dien/test_loss.csv.2020_09_22_21_35_06\")\n", 1202 | "test_loss_data" 1203 | ] 1204 | }, 1205 | { 1206 | "cell_type": "code", 1207 | "execution_count": 58, 1208 | "metadata": {}, 1209 | "outputs": [], 1210 | "source": [ 1211 | "def get_loss_fig_aux(train_loss_data, test_loss_data):\n", 1212 | " train_loss = {\n", 1213 | " \"aux_loss\":list(train_loss_data[\"train_\" + \"aux_loss\"].values), \n", 1214 | " \"target_loss\":list(train_loss_data[\"train_\" + \"target_loss\"].values), \n", 1215 | " \"final_loss\":list(train_loss_data[\"train_\" + \"final_loss\"].values)\n", 1216 | " }\n", 1217 | " test_loss = {\n", 1218 | " \"aux_loss\":list(test_loss_data[\"test_\" + \"aux_loss\"].values), \n", 1219 | " \"target_loss\":list(test_loss_data[\"test_\" + \"target_loss\"].values), \n", 1220 | " \"final_loss\":list(test_loss_data[\"test_\" + \"final_loss\"].values)\n", 1221 | " }\n", 1222 | " get_loss_fig(train_loss, test_loss)" 1223 | ] 1224 | }, 1225 | { 1226 | "cell_type": "code", 1227 | "execution_count": 59, 1228 | "metadata": {}, 1229 | "outputs": [ 1230 | { 1231 | "data": { 1232 | "image/png": "\n", 1233 | "text/plain": [ 1234 | "
" 1235 | ] 1236 | }, 1237 | "metadata": { 1238 | "needs_background": "light" 1239 | }, 1240 | "output_type": "display_data" 1241 | } 1242 | ], 1243 | "source": [ 1244 | "get_loss_fig_aux(train_loss_data, test_loss_data)" 1245 | ] 1246 | }, 1247 | { 1248 | "cell_type": "code", 1249 | "execution_count": null, 1250 | "metadata": {}, 1251 | "outputs": [], 1252 | "source": [] 1253 | } 1254 | ], 1255 | "metadata": { 1256 | "kernelspec": { 1257 | "display_name": "Python 3", 1258 | "language": "python", 1259 | "name": "python3" 1260 | }, 1261 | "language_info": { 1262 | "codemirror_mode": { 1263 | "name": "ipython", 1264 | "version": 3 1265 | }, 1266 | "file_extension": ".py", 1267 | "mimetype": "text/x-python", 1268 | "name": "python", 1269 | "nbconvert_exporter": "python", 1270 | "pygments_lexer": "ipython3", 1271 | "version": "3.7.6" 1272 | } 1273 | }, 1274 | "nbformat": 4, 1275 | "nbformat_minor": 4 1276 | } 1277 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DIEN-DIN 2 | 3 | 本项目使用tensorflow2.0复现阿里兴趣排序模型DIEN与DIN。 4 | 5 | DIN论文链接: https://arxiv.org/pdf/1706.06978.pdf 6 | 7 | DIEN论文链接: https://arxiv.org/pdf/1809.03672.pdf 8 | 9 | 数据集使用阿里数据集测试模型代码, 数据集链接: https://tianchi.aliyun.com/dataset/dataDetail?dataId=56 10 | 11 | # 调用方法: 12 | 13 | ## 0. 简介: 14 | 15 | DIEN的输入特征中主要包含三个部分特征: 用户历史行为序列, 目标商品特征, 用户画像特征。 16 | 用户历史行为序列需包含点击序列与非点击序列。 17 | 请按如下1~2方法处理输入特征。 18 | 19 | ## 1. 初始化: 20 | 21 | 初始化DIEN时需传入5个参数: 22 | 23 | (注:feature_list中的特征名称,需要与embedding_dict中的特征名称一样) 24 | 25 | - embedding_count_dict:string->int格式,该变量记录需要embedding各个特征的词典个数,即最大整数索引+ 1的大小; 26 | 27 | - embedding_dim_dict:string->int格式,该变量记录需要embedding各个特征的输出维数,即密集嵌入的尺寸; 28 | 29 | - embedding_features_list:list(string)格式,该变量记录DIEN中user_profile部分所有需要embedding的feature名称; 30 | 31 | - user_behavior_features:list(string)格式,该变量记录DIEN中user_behavior与target_item部分所有需要embedding的feature名称 32 | 33 | - activation:string格式,默认值"PReLU",该变量空值全连接层激活函数,”PReLU“->PReLU,"Dice"->Dice 34 | 35 | ## 2. 模型调用: 36 | 37 | 模型调用需传入6个参数: 38 | 39 | (注:feature_list中的特征名称,需要与dict中的特征名称一样) 40 | 41 | - user_profile_dict:dict:string->Tensor格式,记录user_profile部分的所有输入特征的训练数据; 42 | 43 | - user_profile_list:list(string)格式,记录user_profile部分的所有特征名称; 44 | 45 | - click_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有点击输入特征的训练数据; 46 | 47 | - noclick_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有未点击输入特征的训练数据; 48 | 49 | - target_item_dict:dict:string->Tensor格式,记录target_item部分输入特征的训练数据; 50 | 51 | - user_behavior_list:list(string)格式,记录user_behavior部分的所有特征名称。 52 | 53 | # 调用演示代码: 54 | 55 | ## DIEN: 56 | 57 | DIEN_train_example.ipynb 58 | 59 | ## DIN: 60 | 61 | DIN_train_example.ipynb 62 | 63 | # 代码: 64 | 65 | - model.py: 定义模型代码 66 | 67 | - layers.py: 自定义层 68 | 69 | - loss.py: 定义Auxiliary Loss用到的NN 70 | 71 | - activations.py: 定义Dice激活函数 72 | 73 | - alibaba_data_reader.py: 输入数据处理函数(代码中使用数据已用spark处理后得到了所需序列数据, 及特征embedding词典数) 74 | -------------------------------------------------------------------------------- /__pycache__/activations.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/activations.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/alibaba_data_reader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/alibaba_data_reader.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/layers.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /activations.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class Dice(tf.keras.layers.Layer): 4 | def __init__(self): 5 | super(Dice, self).__init__() 6 | self.bn = tf.keras.layers.BatchNormalization(center=False, scale=False) 7 | self.alpha = self.add_weight(shape=(), dtype=tf.float32, name='alpha') 8 | 9 | def call(self, x): 10 | x_normed = self.bn(x) 11 | x_p = tf.sigmoid(x_normed) 12 | return self.alpha * (1.0 - x_p) * x + x_p * x 13 | 14 | class dice(tf.keras.layers.Layer): 15 | def __init__(self, feat_dim): 16 | super(dice, self).__init__() 17 | self.feat_dim = feat_dim 18 | self.alphas= tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) 19 | self.beta = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32) 20 | 21 | self.bn = tf.keras.layers.BatchNormalization(center=False, scale=False) 22 | 23 | def call(self, _x, axis=-1, epsilon=0.000000001): 24 | 25 | reduction_axes = list(range(len(_x.get_shape()))) 26 | del reduction_axes[axis] 27 | broadcast_shape = [1] * len(_x.get_shape()) 28 | broadcast_shape[axis] = self.feat_dim 29 | 30 | mean = tf.reduce_mean(_x, axis=reduction_axes) 31 | brodcast_mean = tf.reshape(mean, broadcast_shape) 32 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes) 33 | std = tf.sqrt(std) 34 | brodcast_std = tf.reshape(std, broadcast_shape) 35 | 36 | x_normed = self.bn(_x) 37 | x_p = tf.keras.activations.sigmoid(self.beta * x_normed) 38 | 39 | return self.alphas * (1.0 - x_p) * _x + x_p * _x -------------------------------------------------------------------------------- /alibaba_data_reader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | def get_embedding_features_list(): 6 | embedding_features_list = ["cate", "brand", "cms_segid", "cms_group", 7 | "gender", "age", "pvalue", "shopping", 8 | "occupation", "user_class_level"] 9 | return embedding_features_list 10 | 11 | def get_user_behavior_features(): 12 | user_behavior_features = ["cate", "brand"] 13 | return user_behavior_features 14 | 15 | def get_embedding_count(feature, embedding_count): 16 | return embedding_count[feature].values[0] 17 | 18 | def get_embedding_count_dict(embedding_features_list, embedding_count): 19 | embedding_count_dict = dict() 20 | for feature in embedding_features_list: 21 | embedding_count_dict[feature] = get_embedding_count(feature, embedding_count) 22 | embedding_count_dict["brand"] = 500000 23 | embedding_count_dict["cate"] = 501578 24 | embedding_count_dict["gender"] = 3 25 | embedding_count_dict["pvalue"] = 10 26 | embedding_count_dict["shopping"] = 4 27 | embedding_count_dict["occupation"] = 5 28 | embedding_count_dict["user_class_level"] = 5 29 | return embedding_count_dict 30 | 31 | def get_embedding_dim_dict(embedding_features_list): 32 | embedding_dim_dict = dict() 33 | for feature in embedding_features_list: 34 | embedding_dim_dict[feature] = 64 35 | return embedding_dim_dict 36 | 37 | def get_data(): 38 | train_data = pd.read_csv("./data/train.csv", sep = "\t") 39 | train_data = train_data.fillna(0) 40 | train_data = train_data[train_data["guide_dien_final_train_data.click_cate"] != 0] 41 | train_data = train_data[train_data["guide_dien_final_train_data.click_brand"] != 0] 42 | test_data = pd.read_csv("./data/test.csv", sep = "\t") 43 | test_data = test_data.fillna(0) 44 | test_data = test_data[test_data["guide_dien_final_train_data.click_cate"] != 0] 45 | test_data = test_data[test_data["guide_dien_final_train_data.click_brand"] != 0] 46 | embedding_count = pd.read_csv("./data/embedding_count.csv") 47 | return train_data, test_data, embedding_count 48 | 49 | def get_normal_data(data, col): 50 | return data[col].values 51 | 52 | def get_sequence_data(data, col): 53 | rst = [] 54 | max_length = 0 55 | for i in data[col].values: 56 | temp = len(list(map(eval,i[1:-1].split(",")))) 57 | if temp > max_length: 58 | max_length = temp 59 | 60 | for i in data[col].values: 61 | temp = list(map(eval,i[1:-1].split(","))) 62 | padding = np.zeros(max_length - len(temp)) 63 | rst.append(list(np.append(np.array(temp), padding))) 64 | return rst 65 | 66 | def get_length(data, col): 67 | rst = [] 68 | for i in data[col].values: 69 | temp = len(list(map(eval,i[1:-1].split(",")))) 70 | rst.append(temp) 71 | return rst 72 | 73 | def convert_tensor(data): 74 | return tf.convert_to_tensor(data) 75 | 76 | def get_batch_data(data, min_batch, batch=100): 77 | # batch_data = None 78 | # if min_batch + batch <= len(data): 79 | # batch_data = data.loc[min_batch:min_batch + batch - 1] 80 | # else: 81 | # batch_data = data.loc[min_batch:] 82 | batch_data = data.sample(n=batch) 83 | click = get_normal_data(batch_data, "guide_dien_final_train_data.clk") 84 | #no_click = get_normal_data(batch_data, "guide_dien_final_train_data.nonclk") 85 | #label = [click, no_click] 86 | #label = click 87 | target_cate = get_normal_data(batch_data, "guide_dien_final_train_data.cate_id") 88 | target_brand = get_normal_data(batch_data, "guide_dien_final_train_data.brand") 89 | cms_segid = get_normal_data(batch_data, "guide_dien_final_train_data.cms_segid") 90 | cms_group = get_normal_data(batch_data, "guide_dien_final_train_data.cms_group_id") 91 | gender = get_normal_data(batch_data, "guide_dien_final_train_data.final_gender_code") 92 | age = get_normal_data(batch_data, "guide_dien_final_train_data.age_level") 93 | pvalue = get_normal_data(batch_data, "guide_dien_final_train_data.pvalue_level") 94 | shopping = get_normal_data(batch_data, "guide_dien_final_train_data.shopping_level") 95 | occupation = get_normal_data(batch_data, "guide_dien_final_train_data.occupation") 96 | user_class_level = get_normal_data(batch_data, "guide_dien_final_train_data.new_user_class_level") 97 | hist_brand_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_brand") 98 | hist_cate_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_cate") 99 | hist_brand_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_brand") 100 | hist_cate_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_cate") 101 | #reshape_len = convert_tensor(label).numpy().shape[1] 102 | clk_length = get_length(batch_data, "guide_dien_final_train_data.click_brand") 103 | show_length = get_length(batch_data, "guide_dien_final_train_data.show_brand") 104 | return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show), min_batch + batch, clk_length, show_length 105 | 106 | def get_test_data(data): 107 | batch_data = data.head(150) 108 | #batch_data = data.sample(n = 50) 109 | click = get_normal_data(batch_data, "guide_dien_final_train_data.clk") 110 | target_cate = get_normal_data(batch_data, "guide_dien_final_train_data.cate_id") 111 | target_brand = get_normal_data(batch_data, "guide_dien_final_train_data.brand") 112 | cms_segid = get_normal_data(batch_data, "guide_dien_final_train_data.cms_segid") 113 | cms_group = get_normal_data(batch_data, "guide_dien_final_train_data.cms_group_id") 114 | gender = get_normal_data(batch_data, "guide_dien_final_train_data.final_gender_code") 115 | age = get_normal_data(batch_data, "guide_dien_final_train_data.age_level") 116 | pvalue = get_normal_data(batch_data, "guide_dien_final_train_data.pvalue_level") 117 | shopping = get_normal_data(batch_data, "guide_dien_final_train_data.shopping_level") 118 | occupation = get_normal_data(batch_data, "guide_dien_final_train_data.occupation") 119 | user_class_level = get_normal_data(batch_data, "guide_dien_final_train_data.new_user_class_level") 120 | hist_brand_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_brand") 121 | hist_cate_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_cate") 122 | hist_brand_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_brand") 123 | hist_cate_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_cate") 124 | clk_length = get_length(batch_data, "guide_dien_final_train_data.click_brand") 125 | show_length = get_length(batch_data, "guide_dien_final_train_data.show_brand") 126 | return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show), clk_length, show_length -------------------------------------------------------------------------------- /layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | from activations import Dice,dice 4 | 5 | class GRU_GATES(tf.keras.layers.Layer): 6 | def __init__(self, units): 7 | super(GRU_GATES, self).__init__() 8 | self.linear_act = layers.Dense(units, activation=None, use_bias=True) 9 | self.linear_noact = layers.Dense(units, activation=None, use_bias=False) 10 | 11 | def call(self, a, b, gate_b=None): 12 | if gate_b is None: 13 | return tf.keras.activations.sigmoid(self.linear_act(a) + self.linear_noact(b)) 14 | else: 15 | return tf.keras.activations.tanh(self.linear_act(a) + tf.math.multiply(gate_b, self.linear_noact(b))) 16 | 17 | class AUGRU(layers.Layer): 18 | def __init__(self, units): 19 | super(AUGRU, self).__init__() 20 | self.u_gate = GRU_GATES(units) 21 | self.r_gate = GRU_GATES(units) 22 | self.c_memo = GRU_GATES(units) 23 | 24 | def call(self, inputs, state, att_score): 25 | u = self.u_gate(inputs, state) #u_t 26 | r = self.r_gate(inputs, state) #r_t 27 | c = self.c_memo(inputs, state, r) #\tilde{h_t} 28 | u_= att_score * u #\tilde{u_{t}'} [AUGRU Add] 29 | state_next = (1 - u_) * state + u_ * c #h_t [AUGRU change u_t on output] 30 | return state_next 31 | 32 | class attention(tf.keras.layers.Layer): 33 | def __init__(self, keys_dim): 34 | super(attention, self).__init__() 35 | self.keys_dim = keys_dim 36 | self.fc = tf.keras.Sequential() 37 | self.fc.add(layers.BatchNormalization()) 38 | self.fc.add(layers.Dense(36, activation="sigmoid")) 39 | self.fc.add(dice(36)) 40 | self.fc.add(layers.Dense(1, activation=None)) 41 | 42 | def call(self, queries, keys, keys_length): 43 | #Attention 44 | queries = tf.tile(tf.expand_dims(queries, 1), [1, tf.shape(keys)[1], 1]) 45 | din_all = tf.concat([queries, keys, queries-keys, queries*keys], axis=-1) 46 | outputs = tf.transpose(self.fc(din_all), [0,2,1]) 47 | key_masks = tf.sequence_mask(keys_length, max(keys_length), dtype=tf.bool) 48 | key_masks = tf.expand_dims(key_masks, 1) 49 | paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) 50 | outputs = tf.where(key_masks, outputs, paddings) 51 | outputs = outputs / (self.keys_dim ** 0.5) 52 | #outputs = tf.keras.activations.softmax(outputs, -1) 53 | outputs = tf.keras.activations.sigmoid(outputs) 54 | 55 | #Sum Pooling 56 | outputs = tf.squeeze(tf.matmul(outputs, keys)) 57 | print("outputs:" + str(outputs.numpy().shape)) 58 | return outputs -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | 4 | class AuxLayer(layers.Layer): 5 | def __init__(self): 6 | super().__init__() 7 | self.fc = tf.keras.Sequential() 8 | self.fc.add(layers.BatchNormalization()) 9 | self.fc.add(layers.Dense(100, activation="sigmoid")) 10 | self.fc.add(layers.ReLU()) 11 | self.fc.add(layers.Dense(50, activation="sigmoid")) 12 | self.fc.add(layers.ReLU()) 13 | self.fc.add(layers.Dense(2, activation=None)) 14 | 15 | def call(self, input): 16 | logit = tf.squeeze(self.fc(input)) 17 | return tf.keras.activations.softmax(logit) 18 | 19 | -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/main.ipynb -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | from layers import AUGRU 4 | from activations import Dice 5 | import pandas as pd 6 | from model import DIEN 7 | import alibaba_data_reader as data_reader 8 | 9 | def train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label, optimizer, model, alpha, loss_metric): 10 | with tf.GradientTape() as tape: 11 | output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list) 12 | target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32))) 13 | final_loss = target_loss + alpha * aux_loss 14 | print("[Train Step] aux_loss=" + str(aux_loss.numpy()) + ", target_loss=" + str(target_loss.numpy()) + ", final_loss=" + str(final_loss.numpy())) 15 | gradient = tape.gradient(final_loss, model.trainable_variables) 16 | clip_gradient, _ = tf.clip_by_global_norm(gradient, 5.0) 17 | optimizer.apply_gradients(zip(clip_gradient, model.trainable_variables)) 18 | loss_metric(final_loss) 19 | 20 | def get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show): 21 | user_profile_dict = { 22 | "cms_segid": cms_segid, 23 | "cms_group": cms_group, 24 | "gender": gender, 25 | "age": age, 26 | "pvalue": pvalue, 27 | "shopping": shopping, 28 | "occupation": occupation, 29 | "user_class_level": user_class_level 30 | } 31 | user_profile_list = ["cms_segid", "cms_group", "gender", "age", "pvalue", "shopping", "occupation", "user_class_level"] 32 | user_behavior_list = ["brand", "cate"] 33 | click_behavior_dict = { 34 | "brand": hist_brand_behavior_clk, 35 | "cate": hist_cate_behavior_clk 36 | } 37 | noclick_behavior_dict = { 38 | "brand": hist_brand_behavior_show, 39 | "cate": hist_cate_behavior_show 40 | } 41 | target_item_dict = { 42 | "brand": target_cate, 43 | "cate": target_brand 44 | } 45 | return user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict 46 | 47 | def main(): 48 | train_data, test_data, embedding_count = data_reader.get_data() 49 | embedding_features_list = data_reader.get_embedding_features_list() 50 | user_behavior_features = data_reader.get_user_behavior_features() 51 | embedding_count_dict = data_reader.get_embedding_count_dict(embedding_features_list, embedding_count) 52 | embedding_dim_dict = data_reader.get_embedding_dim_dict(embedding_features_list) 53 | model = DIEN(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features) 54 | min_batch = 0 55 | batch = 100 56 | label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch) 57 | user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show) 58 | log_path = "./train_log/" 59 | train_summary_writer = tf.summary.create_file_writer(log_path) 60 | optimizer = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) 61 | loss_metric = tf.keras.metrics.Sum() 62 | auc_metric = tf.keras.metrics.AUC() 63 | alpha = 1 64 | epochs = 1 65 | for epoch in range(epochs): 66 | min_batch = 0 67 | for i in range(int(len(train_data) / batch)): 68 | label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch) 69 | user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show) 70 | train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label, optimizer, model, alpha, loss_metric) 71 | 72 | 73 | if __name__ == "__main__": 74 | print(tf.__version__) 75 | print("GPU Available: ", tf.test.is_gpu_available()) 76 | main() -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | from layers import AUGRU,attention 4 | from activations import Dice,dice 5 | from loss import AuxLayer 6 | import utils 7 | 8 | class DIEN(tf.keras.Model): 9 | def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation="PReLU"): 10 | super(DIEN, self).__init__(embedding_count_dict, embedding_dim_dict, embedding_features_list, activation) 11 | """DIEN初始化model函数 12 | 13 | 该函数在调用DIEN时进行DIEN的Embedding层,GRU层,AUGRU层,全连接层的初始化操作 14 | 15 | Args: 16 | embedding_count_dict:string->int格式,该变量记录需要embedding各个特征的词典个数,即最大整数索引+ 1的大小; 17 | embedding_dim_dict:string->int格式,该变量记录需要embedding各个特征的输出维数,即密集嵌入的尺寸; 18 | embedding_features_list:list(string)格式,该变量记录DIEN中user_profile部分所有需要embedding的feature名称; 19 | user_behavior_features:list(string)格式,该变量记录DIEN中user_behavior与target_item部分所有需要embedding的feature名称 20 | activation:string格式,默认值"PReLU",该变量空值全连接层激活函数,”PReLU“->PReLU,"Dice"->Dice 21 | """ 22 | #Init Embedding Layer 23 | self.embedding_dim_dict = embedding_dim_dict 24 | self.embedding_count_dict = embedding_count_dict 25 | self.embedding_layers = dict() 26 | for feature in embedding_features_list: 27 | self.embedding_layers[feature] = layers.Embedding(embedding_count_dict[feature], embedding_dim_dict[feature]) 28 | #Init GRU Layer 29 | self.user_behavior_gru = layers.GRU(self.get_GRU_input_dim(embedding_dim_dict, user_behavior_features), return_sequences=True) 30 | #Init Attention Layer 31 | self.attention_layer = layers.Softmax() 32 | #Init Auxiliary Layer 33 | self.AuxNet = AuxLayer() 34 | #Init AUGRU Layer 35 | self.user_behavior_augru = AUGRU(self.get_GRU_input_dim(embedding_dim_dict, user_behavior_features)) 36 | #Init Fully Connection Layer 37 | self.fc = tf.keras.Sequential() 38 | self.fc.add(layers.BatchNormalization()) 39 | self.fc.add(layers.Dense(200, activation="relu")) 40 | if activation == "Dice": 41 | self.fc.add(Dice()) 42 | elif activation == "dice": 43 | self.fc.add(dice(200)) 44 | elif activation == "PReLU": 45 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) 46 | self.fc.add(layers.Dense(80, activation="relu")) 47 | if activation == "Dice": 48 | self.fc.add(Dice()) 49 | elif activation == "dice": 50 | self.fc.add(dice(80)) 51 | elif activation == "PReLU": 52 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) 53 | self.fc.add(layers.Dense(2, activation=None)) 54 | 55 | def get_GRU_input_dim(self, embedding_dim_dict, user_behavior_features): 56 | rst = 0 57 | for feature in user_behavior_features: 58 | rst += embedding_dim_dict[feature] 59 | return rst 60 | 61 | def get_emb(self, user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list): 62 | user_profile_feature_embedding = dict() 63 | for feature in user_profile_list: 64 | data = user_profile_dict[feature] 65 | embedding_layer = self.embedding_layers[feature] 66 | user_profile_feature_embedding[feature] = embedding_layer(data) 67 | 68 | target_item_feature_embedding = dict() 69 | for feature in user_behavior_list: 70 | data = target_item_dict[feature] 71 | embedding_layer = self.embedding_layers[feature] 72 | target_item_feature_embedding[feature] = embedding_layer(data) 73 | 74 | click_behavior_embedding = dict() 75 | for feature in user_behavior_list: 76 | data = click_behavior_dict[feature] 77 | embedding_layer = self.embedding_layers[feature] 78 | click_behavior_embedding[feature] = embedding_layer(data) 79 | 80 | # noclick_behavior_embedding = dict() 81 | # for feature in user_behavior_list: 82 | # data = noclick_behavior_dict[feature] 83 | # embedding_layer = self.embedding_layers[feature] 84 | # noclick_behavior_embedding[feature] = embedding_layer(data) 85 | 86 | return utils.concat_features(user_profile_feature_embedding), utils.concat_features(target_item_feature_embedding), utils.concat_features(click_behavior_embedding)#, utils.concat_features(noclick_behavior_embedding) 87 | 88 | def auxiliary_loss(self, hidden_states, embedding_out): 89 | """Auxiliary Loss Function 90 | 91 | 论文中包含的源代码aux loss是通过hidden state与点击序列concate和hidden state 92 | 与展现序列concat后进一个全连接神经网络,通过softmax得到最终二分类结果与点击序列和展现序列求解log_loss的到最终aux loss。 93 | 该方法只使用用户的点击序列。 94 | 95 | Args: 96 | hidden_states: gru产出的所有hidden state,从h(0)到h(n-1) 97 | embedding_out: gru输入的embedding特征,从e(1)到e(n) 98 | """ 99 | click_input_ = tf.concat([hidden_states, embedding_out], -1) 100 | click_prop_ = self.AuxNet(click_input_)[:, :, 0] 101 | click_loss_ = - tf.reshape(tf.math.log(click_prop_), [-1, tf.shape(embedding_out)[1]]) 102 | return tf.reduce_mean(click_loss_) 103 | 104 | def call(self, user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list): 105 | """输入batch训练数据, 调用DIEN初始化后的model进行一次前向传播 106 | 107 | 调用该函数进行一次前向传播得到output, logit, aux_loss后,在自定义的训练函数内得出target_loss与final_loss后使用tensorflow中的梯度计算函数通过链式法则得到各层梯度后使用自定义优化器进行一次权重更新 108 | 109 | Args: 110 | user_profile_dict:dict:string->Tensor格式,记录user_profile部分的所有输入特征的训练数据; 111 | user_profile_list:list(string)格式,记录user_profile部分的所有特征名称; 112 | click_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有点击输入特征的训练数据; 113 | noclick_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有未点击输入特征的训练数据; 114 | target_item_dict:dict:string->Tensor格式,记录target_item部分输入特征的训练数据; 115 | user_behavior_list:list(string)Tensor格式,记录user_behavior部分的所有特征名称。 116 | """ 117 | #Embedding Layer 118 | user_profile_embedding, target_item_embedding, click_behavior_emebedding = self.get_emb(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list) 119 | #GRU Layer 120 | click_gru_emb = self.user_behavior_gru(click_behavior_emebedding) 121 | #noclick_gru_emb = self.user_behavior_gru(noclick_behavior_emebedding) 122 | #Auxiliary Loss 123 | aux_loss = self.auxiliary_loss(click_gru_emb[:, :-1, :], click_behavior_emebedding[:, 1:, :]) 124 | #Attention Layer 125 | hist_attn = self.attention_layer(tf.matmul(tf.expand_dims(target_item_embedding, 1), click_gru_emb, transpose_b=True)) 126 | #AUGRU Layer 127 | augru_hidden_state = tf.zeros_like(click_gru_emb[:, 0, :]) 128 | for in_emb, in_att in zip(tf.transpose(click_gru_emb, [1, 0, 2]), tf.transpose(hist_attn, [2, 0, 1])): 129 | augru_hidden_state = self.user_behavior_augru(in_emb, augru_hidden_state, in_att) 130 | join_emb = tf.concat([augru_hidden_state, user_profile_embedding], -1) 131 | logit = tf.squeeze(self.fc(join_emb)) 132 | output = tf.keras.activations.softmax(logit) 133 | return output, logit, aux_loss 134 | 135 | class DIN(tf.keras.Model): 136 | def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation="PReLU"): 137 | super(DIN, self).__init__(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation) 138 | #Init Embedding Layer 139 | self.embedding_dim_dict = embedding_dim_dict 140 | self.embedding_count_dict = embedding_count_dict 141 | self.embedding_layers = dict() 142 | for feature in embedding_features_list: 143 | self.embedding_layers[feature] = layers.Embedding(embedding_count_dict[feature], embedding_dim_dict[feature]) 144 | #DIN Attention+Sum pooling 145 | self.hist_at = attention(utils.get_input_dim(embedding_dim_dict, user_behavior_features)) 146 | #Init Fully Connection Layer 147 | self.fc = tf.keras.Sequential() 148 | self.fc.add(layers.BatchNormalization()) 149 | self.fc.add(layers.Dense(200, activation="relu")) 150 | if activation == "Dice": 151 | self.fc.add(Dice()) 152 | elif activation == "dice": 153 | self.fc.add(dice(200)) 154 | elif activation == "PReLU": 155 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) 156 | self.fc.add(layers.Dense(80, activation="relu")) 157 | if activation == "Dice": 158 | self.fc.add(Dice()) 159 | elif activation == "dice": 160 | self.fc.add(dice(80)) 161 | elif activation == "PReLU": 162 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) 163 | self.fc.add(layers.Dense(2, activation=None)) 164 | 165 | def get_emb_din(self, user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list): 166 | user_profile_feature_embedding = dict() 167 | for feature in user_profile_list: 168 | data = user_profile_dict[feature] 169 | embedding_layer = self.embedding_layers[feature] 170 | user_profile_feature_embedding[feature] = embedding_layer(data) 171 | 172 | target_item_feature_embedding = dict() 173 | for feature in user_behavior_list: 174 | data = target_item_dict[feature] 175 | embedding_layer = self.embedding_layers[feature] 176 | target_item_feature_embedding[feature] = embedding_layer(data) 177 | 178 | hist_behavior_embedding = dict() 179 | for feature in user_behavior_list: 180 | data = hist_behavior_dict[feature] 181 | embedding_layer = self.embedding_layers[feature] 182 | hist_behavior_embedding[feature] = embedding_layer(data) 183 | 184 | return utils.concat_features(user_profile_feature_embedding), utils.concat_features(target_item_feature_embedding), utils.concat_features(hist_behavior_embedding) 185 | 186 | def call(self, user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list, length): 187 | #Embedding Layer 188 | user_profile_embedding, target_item_embedding, hist_behavior_emebedding = self.get_emb_din(user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list) 189 | hist_attn_emb = self.hist_at(target_item_embedding, hist_behavior_emebedding, length) 190 | join_emb = tf.concat([user_profile_embedding, target_item_embedding, hist_attn_emb], -1) 191 | logit = tf.squeeze(self.fc(join_emb)) 192 | output = tf.keras.activations.softmax(logit) 193 | return output, logit 194 | 195 | if __name__ == "__main__": 196 | model = DIN(dict(), dict(), list(), list()) 197 | -------------------------------------------------------------------------------- /tensorboard.log: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | TensorBoard 2.0.0 at http://10.186.3.226:8028/ (Press CTRL+C to quit) 3 | -------------------------------------------------------------------------------- /tensorboard.sh: -------------------------------------------------------------------------------- 1 | tensorboard --logdir=./train_log/din/ --host=10.186.3.226 --port=8028 2 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import tensorflow as tf 3 | 4 | def get_file_name(): 5 | now_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) 6 | return "loss.csv." + now_time 7 | 8 | def make_train_loss_dir(file_name, cols=["train_aux_loss","train_target_loss","train_final_loss"], model="dien"): 9 | f = open("./loss/" + model + "/train_" + file_name, "w") 10 | f.write(",".join(cols) + "\n") 11 | f.close() 12 | 13 | def make_test_loss_dir(file_name, cols=["test_aux_loss","test_target_loss","test_final_loss"], model="dien"): 14 | f = open("./loss/" + model + "/test_" + file_name, "w") 15 | f.write(",".join(cols) + "\n") 16 | f.close() 17 | 18 | def add_loss(loss_dict, file_name, cols = ["aux_loss", "target_loss", "final_loss"], level="train", model="dien"): 19 | loss_list = list() 20 | for col in cols: 21 | loss_list.append(loss_dict[col]) 22 | f = open("./loss/" + model + "/" + level + "_" + file_name, "a") 23 | f.write(",".join(loss_list) + "\n") 24 | f.close() 25 | 26 | def get_input_dim(embedding_dim_dict, user_behavior_features): 27 | rst = 0 28 | for feature in user_behavior_features: 29 | rst += embedding_dim_dict[feature] 30 | return rst 31 | 32 | def concat_features(feature_data_dict): 33 | concat_list = [] 34 | for k in feature_data_dict: 35 | concat_list.append(feature_data_dict[k]) 36 | return tf.concat(concat_list, -1) 37 | 38 | def mkdir(path): 39 | try: 40 | if not os.path.exists(path): 41 | os.makedirs(path) 42 | return 0 43 | except: 44 | return 1 --------------------------------------------------------------------------------