├── DIEN_train_example.ipynb
├── DIN_train_example.ipynb
├── README.md
├── __pycache__
├── activations.cpython-37.pyc
├── alibaba_data_reader.cpython-37.pyc
├── layers.cpython-37.pyc
├── loss.cpython-37.pyc
├── model.cpython-37.pyc
└── utils.cpython-37.pyc
├── activations.py
├── alibaba_data_reader.py
├── layers.py
├── loss.py
├── main.ipynb
├── main.py
├── model.py
├── tensorboard.log
├── tensorboard.sh
└── utils.py
/DIEN_train_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import tensorflow as tf\n",
10 | "from tensorflow.keras import layers\n",
11 | "from layers import AUGRU\n",
12 | "from activations import Dice,dice\n",
13 | "import pandas as pd\n",
14 | "from model import DIEN\n",
15 | "import alibaba_data_reader as data_reader\n",
16 | "import utils\n",
17 | "import matplotlib\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "from matplotlib.font_manager import FontProperties\n",
20 | "from matplotlib.pyplot import MultipleLocator\n",
21 | "import numpy as np\n",
22 | "import os\n",
23 | "from loss import AuxLayer"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "def mkdir(path):\n",
33 | " try:\n",
34 | " if not os.path.exists(path):\n",
35 | " os.makedirs(path)\n",
36 | " return 0\n",
37 | " except:\n",
38 | " return 1\n",
39 | "model_name = \"dien\""
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "def is_in_notebook():\n",
49 | " import sys\n",
50 | " return 'ipykernel' in sys.modules\n",
51 | "def clear_output():\n",
52 | " \"\"\"\n",
53 | " clear output for both jupyter notebook and the console\n",
54 | " \"\"\"\n",
55 | " import os\n",
56 | " os.system('cls' if os.name == 'nt' else 'clear')\n",
57 | " if is_in_notebook():\n",
58 | " from IPython.display import clear_output as clear\n",
59 | " clear()"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 4,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "name": "stdout",
69 | "output_type": "stream",
70 | "text": [
71 | "2\n"
72 | ]
73 | }
74 | ],
75 | "source": [
76 | "print(1)\n",
77 | "clear_output()\n",
78 | "print(2)"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 5,
84 | "metadata": {
85 | "tags": []
86 | },
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | "2.0.0\n",
93 | "GPU Available: True\n"
94 | ]
95 | }
96 | ],
97 | "source": [
98 | "print(tf.__version__)\n",
99 | "print(\"GPU Available: \", tf.test.is_gpu_available())"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 6,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "file_path = \"/nfs/project/boweihan_2/DIEN/dien_final/\"\n",
109 | "file_path = \"\""
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "# 模型训练"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 7,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/html": [
127 | "
\n",
128 | "\n",
141 | "
\n",
142 | " \n",
143 | " \n",
144 | " | \n",
145 | " brand | \n",
146 | " cate | \n",
147 | " cms_segid | \n",
148 | " cms_group | \n",
149 | " gender | \n",
150 | " age | \n",
151 | " pvalue | \n",
152 | " shopping | \n",
153 | " occupation | \n",
154 | " user_class_level | \n",
155 | "
\n",
156 | " \n",
157 | " \n",
158 | " \n",
159 | " 0 | \n",
160 | " 460561 | \n",
161 | " 12968 | \n",
162 | " 97 | \n",
163 | " 13 | \n",
164 | " 2 | \n",
165 | " 7 | \n",
166 | " 3 | \n",
167 | " 3 | \n",
168 | " 2 | \n",
169 | " 4 | \n",
170 | "
\n",
171 | " \n",
172 | "
\n",
173 | "
"
174 | ],
175 | "text/plain": [
176 | " brand cate cms_segid cms_group gender age pvalue shopping \\\n",
177 | "0 460561 12968 97 13 2 7 3 3 \n",
178 | "\n",
179 | " occupation user_class_level \n",
180 | "0 2 4 "
181 | ]
182 | },
183 | "execution_count": 7,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "train_data, test_data, embedding_count = data_reader.get_data()\n",
190 | "embedding_count"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 8,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "embedding_features_list = data_reader.get_embedding_features_list()\n",
200 | "user_behavior_features = data_reader.get_user_behavior_features()\n",
201 | "embedding_count_dict = data_reader.get_embedding_count_dict(embedding_features_list, embedding_count)\n",
202 | "embedding_dim_dict = data_reader.get_embedding_dim_dict(embedding_features_list)"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 9,
208 | "metadata": {},
209 | "outputs": [],
210 | "source": [
211 | "import time\n",
212 | "stamp = time.strftime(\"%Y%m%d-%H%M%S\", time.localtime())\n",
213 | "mkdir(\"./train_log/\" + model_name)\n",
214 | "log_path = \"./train_log/\"+model_name+\"/%s\" % stamp\n",
215 | "train_summary_writer = tf.summary.create_file_writer(log_path)\n",
216 | "tf.summary.trace_on(graph=True, profiler=True)\n",
217 | "loss_file_name = utils.get_file_name()\n",
218 | "mkdir(\"./loss/\" + model_name + \"/\")\n",
219 | "utils.make_train_loss_dir(loss_file_name, cols=[\"train_aux_loss\",\"train_target_loss\",\"train_final_loss\"], model=model_name)\n",
220 | "utils.make_test_loss_dir(loss_file_name, cols=[\"test_aux_loss\",\"test_target_loss\",\"test_final_loss\"], model=model_name)"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 10,
226 | "metadata": {},
227 | "outputs": [
228 | {
229 | "data": {
230 | "text/plain": [
231 | ""
232 | ]
233 | },
234 | "execution_count": 10,
235 | "metadata": {},
236 | "output_type": "execute_result"
237 | }
238 | ],
239 | "source": [
240 | "model = DIEN(\n",
241 | " embedding_count_dict, \n",
242 | " embedding_dim_dict, \n",
243 | " embedding_features_list, \n",
244 | " user_behavior_features, \n",
245 | " activation=\"dice\")\n",
246 | "model"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 11,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "min_batch = 0\n",
256 | "batch = 100\n",
257 | "optimizer = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
258 | "loss_metric = tf.keras.metrics.Sum()\n",
259 | "auc_metric = tf.keras.metrics.AUC()\n",
260 | "alpha = 1\n",
261 | "epochs = 3"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 12,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 13,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "def get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show):\n",
280 | " user_profile_dict = {\n",
281 | " \"cms_segid\": cms_segid,\n",
282 | " \"cms_group\": cms_group,\n",
283 | " \"gender\": gender,\n",
284 | " \"age\": age,\n",
285 | " \"pvalue\": pvalue,\n",
286 | " \"shopping\": shopping,\n",
287 | " \"occupation\": occupation,\n",
288 | " \"user_class_level\": user_class_level\n",
289 | " }\n",
290 | " user_profile_list = [\"cms_segid\", \"cms_group\", \"gender\", \"age\", \"pvalue\", \"shopping\", \"occupation\", \"user_class_level\"]\n",
291 | " user_behavior_list = [\"brand\", \"cate\"]\n",
292 | " click_behavior_dict = {\n",
293 | " \"brand\": hist_brand_behavior_clk,\n",
294 | " \"cate\": hist_cate_behavior_clk\n",
295 | " }\n",
296 | " noclick_behavior_dict = {\n",
297 | " \"brand\": hist_brand_behavior_show,\n",
298 | " \"cate\": hist_cate_behavior_show\n",
299 | " }\n",
300 | " target_item_dict = {\n",
301 | " \"brand\": target_cate,\n",
302 | " \"cate\": target_brand\n",
303 | " }\n",
304 | " return user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 14,
310 | "metadata": {},
311 | "outputs": [],
312 | "source": [
313 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show) "
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 15,
319 | "metadata": {},
320 | "outputs": [],
321 | "source": [
322 | "def train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label):\n",
323 | " with tf.GradientTape() as tape:\n",
324 | " output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)\n",
325 | " target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32)))\n",
326 | " final_loss = target_loss + alpha * aux_loss\n",
327 | " #print(\"[Train Loss] aux_loss=\" + str(aux_loss.numpy()) + \", target_loss=\" + str(target_loss.numpy()) + \", final_loss=\" + str(final_loss.numpy()))\n",
328 | " gradient = tape.gradient(final_loss, model.trainable_variables)\n",
329 | " clip_gradient, _ = tf.clip_by_global_norm(gradient, 5.0)\n",
330 | " optimizer.apply_gradients(zip(clip_gradient, model.trainable_variables))\n",
331 | " loss_metric(final_loss)\n",
332 | " return aux_loss.numpy(), target_loss.numpy(), final_loss.numpy()"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 16,
338 | "metadata": {},
339 | "outputs": [],
340 | "source": [
341 | "def get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label):\n",
342 | " output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)\n",
343 | " target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32)))\n",
344 | " final_loss = target_loss + alpha * aux_loss\n",
345 | " #print(\"[Test Loss] aux_loss=\" + str(aux_loss.numpy()) + \", target_loss=\" + str(target_loss.numpy()) + \", final_loss=\" + str(final_loss.numpy()))\n",
346 | " return aux_loss.numpy(), target_loss.numpy(), final_loss.numpy()"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 18,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "#aux_loss, target_loss, final_loss = train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 17,
361 | "metadata": {},
362 | "outputs": [
363 | {
364 | "name": "stdout",
365 | "output_type": "stream",
366 | "text": [
367 | "WARNING:tensorflow:Layer dien is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
368 | "\n",
369 | "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
370 | "\n",
371 | "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
372 | "\n"
373 | ]
374 | },
375 | {
376 | "data": {
377 | "text/plain": [
378 | "(0.89547175, 0.69206244, 1.5875342)"
379 | ]
380 | },
381 | "execution_count": 17,
382 | "metadata": {},
383 | "output_type": "execute_result"
384 | }
385 | ],
386 | "source": [
387 | "get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": 18,
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "name": "stdout",
397 | "output_type": "stream",
398 | "text": [
399 | "Model: \"dien\"\n",
400 | "_________________________________________________________________\n",
401 | "Layer (type) Output Shape Param # \n",
402 | "=================================================================\n",
403 | "embedding_5 (Embedding) multiple 448 \n",
404 | "_________________________________________________________________\n",
405 | "embedding_1 (Embedding) multiple 32000000 \n",
406 | "_________________________________________________________________\n",
407 | "embedding (Embedding) multiple 32100992 \n",
408 | "_________________________________________________________________\n",
409 | "embedding_3 (Embedding) multiple 832 \n",
410 | "_________________________________________________________________\n",
411 | "embedding_2 (Embedding) multiple 6208 \n",
412 | "_________________________________________________________________\n",
413 | "embedding_4 (Embedding) multiple 192 \n",
414 | "_________________________________________________________________\n",
415 | "embedding_8 (Embedding) multiple 320 \n",
416 | "_________________________________________________________________\n",
417 | "embedding_6 (Embedding) multiple 640 \n",
418 | "_________________________________________________________________\n",
419 | "embedding_7 (Embedding) multiple 256 \n",
420 | "_________________________________________________________________\n",
421 | "embedding_9 (Embedding) multiple 320 \n",
422 | "_________________________________________________________________\n",
423 | "gru (GRU) multiple 99072 \n",
424 | "_________________________________________________________________\n",
425 | "softmax (Softmax) multiple 0 \n",
426 | "_________________________________________________________________\n",
427 | "aux_layer (AuxLayer) multiple 31876 \n",
428 | "_________________________________________________________________\n",
429 | "augru (AUGRU) multiple 98688 \n",
430 | "_________________________________________________________________\n",
431 | "sequential_1 (Sequential) multiple 148122 \n",
432 | "=================================================================\n",
433 | "Total params: 64,487,966\n",
434 | "Trainable params: 64,485,614\n",
435 | "Non-trainable params: 2,352\n",
436 | "_________________________________________________________________\n"
437 | ]
438 | }
439 | ],
440 | "source": [
441 | "model.summary()"
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": 19,
447 | "metadata": {},
448 | "outputs": [
449 | {
450 | "name": "stdout",
451 | "output_type": "stream",
452 | "text": [
453 | "dien/embedding_5/embeddings:0\n",
454 | "dien/embedding_1/embeddings:0\n",
455 | "dien/embedding/embeddings:0\n",
456 | "dien/embedding_3/embeddings:0\n",
457 | "dien/embedding_2/embeddings:0\n",
458 | "dien/embedding_4/embeddings:0\n",
459 | "dien/embedding_8/embeddings:0\n",
460 | "dien/embedding_6/embeddings:0\n",
461 | "dien/embedding_7/embeddings:0\n",
462 | "dien/embedding_9/embeddings:0\n",
463 | "dien/gru/kernel:0\n",
464 | "dien/gru/recurrent_kernel:0\n",
465 | "dien/gru/bias:0\n",
466 | "dien/aux_layer/sequential/batch_normalization/gamma:0\n",
467 | "dien/aux_layer/sequential/batch_normalization/beta:0\n",
468 | "dien/aux_layer/sequential/dense/kernel:0\n",
469 | "dien/aux_layer/sequential/dense/bias:0\n",
470 | "dien/aux_layer/sequential/dense_1/kernel:0\n",
471 | "dien/aux_layer/sequential/dense_1/bias:0\n",
472 | "dien/aux_layer/sequential/dense_2/kernel:0\n",
473 | "dien/aux_layer/sequential/dense_2/bias:0\n",
474 | "dien/augru/gru_gates/dense_3/kernel:0\n",
475 | "dien/augru/gru_gates/dense_3/bias:0\n",
476 | "dien/augru/gru_gates/dense_4/kernel:0\n",
477 | "dien/augru/gru_gates_1/dense_5/kernel:0\n",
478 | "dien/augru/gru_gates_1/dense_5/bias:0\n",
479 | "dien/augru/gru_gates_1/dense_6/kernel:0\n",
480 | "dien/augru/gru_gates_2/dense_7/kernel:0\n",
481 | "dien/augru/gru_gates_2/dense_7/bias:0\n",
482 | "dien/augru/gru_gates_2/dense_8/kernel:0\n",
483 | "dien/sequential_1/batch_normalization_1/gamma:0\n",
484 | "dien/sequential_1/batch_normalization_1/beta:0\n",
485 | "dien/sequential_1/dense_9/kernel:0\n",
486 | "dien/sequential_1/dense_9/bias:0\n",
487 | "Variable:0\n",
488 | "Variable:0\n",
489 | "dien/sequential_1/dense_10/kernel:0\n",
490 | "dien/sequential_1/dense_10/bias:0\n",
491 | "Variable:0\n",
492 | "Variable:0\n",
493 | "dien/sequential_1/dense_11/kernel:0\n",
494 | "dien/sequential_1/dense_11/bias:0\n"
495 | ]
496 | }
497 | ],
498 | "source": [
499 | "for var in model.trainable_variables:\n",
500 | " print(var.name)"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 20,
506 | "metadata": {},
507 | "outputs": [],
508 | "source": [
509 | "def get_loss_fig(train_loss, test_loss):\n",
510 | " loss_list = [\"aux_loss\", \"final_loss\"]\n",
511 | " color_list = [\"r\", \"b\"]\n",
512 | " plt.figure()\n",
513 | " cnt = 0\n",
514 | " for k in loss_list:\n",
515 | " loss = train_loss[k]\n",
516 | " step = list(np.arange(len(loss)))\n",
517 | " plt.plot(step,loss,color_list[cnt]+\"-\",label=\"train_\" + k, linestyle=\"--\")\n",
518 | " cnt += 1\n",
519 | " cnt = 0\n",
520 | " for k in loss_list:\n",
521 | " loss = test_loss[k]\n",
522 | " step = list(np.arange(len(loss)))\n",
523 | " plt.plot(step,loss,color_list[cnt],label=\"test_\" + k)\n",
524 | " cnt += 1\n",
525 | " plt.title(\"Loss\")\n",
526 | " plt.xlabel('iteration')\n",
527 | " plt.ylabel('loss')\n",
528 | " plt.legend()\n",
529 | " clear_output()\n",
530 | " plt.savefig(\"./loss/\" + model_name + \"/loss.png\")\n",
531 | " clear_output()\n",
532 | " plt.show()"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 21,
538 | "metadata": {},
539 | "outputs": [],
540 | "source": [
541 | "def record_test_loss(test_loss, test_data, step):\n",
542 | " label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, clk_length, show_length = data_reader.get_test_data(test_data)\n",
543 | " user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n",
544 | " aux_loss, target_loss, final_loss = get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n",
545 | " loss_dict = dict()\n",
546 | " loss_dict[\"aux_loss\"] = str(aux_loss)\n",
547 | " loss_dict[\"target_loss\"] = str(target_loss)\n",
548 | " loss_dict[\"final_loss\"] = str(final_loss)\n",
549 | " utils.add_loss(loss_dict, loss_file_name, level=\"test\")\n",
550 | " test_loss[\"aux_loss\"].append(float(aux_loss))\n",
551 | " test_loss[\"target_loss\"].append(float(target_loss))\n",
552 | " test_loss[\"final_loss\"].append(float(final_loss))\n",
553 | " with train_summary_writer.as_default():\n",
554 | " tf.summary.scalar(\"test_aux_loss epoch: \"+str(epoch), aux_loss, step = step)\n",
555 | " tf.summary.scalar(\"test_target_loss epoch: \"+str(epoch), target_loss, step = step)\n",
556 | " tf.summary.scalar(\"test_final_loss epoch: \"+str(epoch), final_loss, step = step)"
557 | ]
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": 22,
562 | "metadata": {},
563 | "outputs": [],
564 | "source": [
565 | "mkdir(\"./checkpoint/\" + model_name)\n",
566 | "checkpoint_path = \"./checkpoint/\" + model_name + \"/cp-{epoch:04d}.ckpt\"\n",
567 | "checkpoint_dir = os.path.dirname(checkpoint_path)"
568 | ]
569 | },
570 | {
571 | "cell_type": "code",
572 | "execution_count": 23,
573 | "metadata": {},
574 | "outputs": [
575 | {
576 | "data": {
577 | "image/png": "\n",
578 | "text/plain": [
579 | ""
580 | ]
581 | },
582 | "metadata": {
583 | "needs_background": "light"
584 | },
585 | "output_type": "display_data"
586 | }
587 | ],
588 | "source": [
589 | "train_loss = {\"aux_loss\":[], \"target_loss\":[], \"final_loss\":[]}\n",
590 | "test_loss = {\"aux_loss\":[], \"target_loss\":[], \"final_loss\":[]}\n",
591 | "for epoch in range(epochs):\n",
592 | " for i in range(int(len(train_data) / batch)):\n",
593 | " record_test_loss(test_loss, test_data, i)\n",
594 | " label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)\n",
595 | " user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n",
596 | " aux_loss, target_loss, final_loss = train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n",
597 | " #Record_loss12\n",
598 | " loss_dict = dict()\n",
599 | " loss_dict[\"aux_loss\"] = str(aux_loss)\n",
600 | " loss_dict[\"target_loss\"] = str(target_loss)\n",
601 | " loss_dict[\"final_loss\"] = str(final_loss)\n",
602 | " utils.add_loss(loss_dict, loss_file_name, level=\"train\")\n",
603 | " train_loss[\"aux_loss\"].append(float(aux_loss))\n",
604 | " train_loss[\"target_loss\"].append(float(target_loss))\n",
605 | " train_loss[\"final_loss\"].append(float(final_loss))\n",
606 | " get_loss_fig(train_loss, test_loss)\n",
607 | " tf.summary.trace_on(graph=True, profiler=True)\n",
608 | " with train_summary_writer.as_default():\n",
609 | " tf.summary.scalar(\"train_aux_loss epoch: \"+str(epoch), aux_loss, step = i)\n",
610 | " tf.summary.scalar(\"train_target_loss epoch: \"+str(epoch), target_loss, step = i)\n",
611 | " tf.summary.scalar(\"train_final_loss epoch: \"+str(epoch), final_loss, step = i)\n",
612 | " tf.summary.trace_export(\n",
613 | " name=\"DIEN\", \n",
614 | " step=i, \n",
615 | " profiler_outdir=log_path)\n",
616 | " model.save_weights(checkpoint_path.format(epoch=epoch))"
617 | ]
618 | },
619 | {
620 | "cell_type": "markdown",
621 | "metadata": {},
622 | "source": [
623 | "# 模型评估"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": 24,
629 | "metadata": {},
630 | "outputs": [
631 | {
632 | "name": "stdout",
633 | "output_type": "stream",
634 | "text": [
635 | "./checkpoint/cp-0002.ckpt\n"
636 | ]
637 | },
638 | {
639 | "data": {
640 | "text/plain": [
641 | ""
642 | ]
643 | },
644 | "execution_count": 24,
645 | "metadata": {},
646 | "output_type": "execute_result"
647 | }
648 | ],
649 | "source": [
650 | "last_model = DIEN(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation=\"dice\")\n",
651 | "latest = tf.train.latest_checkpoint(checkpoint_dir)\n",
652 | "print(latest)\n",
653 | "last_model.load_weights(latest)"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 26,
659 | "metadata": {},
660 | "outputs": [
661 | {
662 | "name": "stdout",
663 | "output_type": "stream",
664 | "text": [
665 | "WARNING:tensorflow:Layer dien_1 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
666 | "\n",
667 | "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
668 | "\n",
669 | "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
670 | "\n"
671 | ]
672 | },
673 | {
674 | "data": {
675 | "text/plain": [
676 | "(0.029646765, 0.26222047, 0.29186723)"
677 | ]
678 | },
679 | "execution_count": 26,
680 | "metadata": {},
681 | "output_type": "execute_result"
682 | }
683 | ],
684 | "source": [
685 | "model= last_model\n",
686 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, clk_length, show_length = data_reader.get_test_data(test_data)\n",
687 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n",
688 | "aux_loss, target_loss, final_loss = get_test_loss(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label)\n",
689 | "aux_loss, target_loss, final_loss"
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": 27,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": [
698 | "def convert_tensor(data):\n",
699 | " return tf.convert_to_tensor(data)\n",
700 | "\n",
701 | "def get_normal_data(data, col):\n",
702 | " return data[col].values\n",
703 | "\n",
704 | "def get_sequence_data(data, col):\n",
705 | " rst = []\n",
706 | " max_length = 0\n",
707 | " for i in data[col].values:\n",
708 | " temp = len(list(map(eval,i[1:-1].split(\",\"))))\n",
709 | " if temp > max_length:\n",
710 | " max_length = temp\n",
711 | "\n",
712 | " for i in data[col].values:\n",
713 | " temp = list(map(eval,i[1:-1].split(\",\")))\n",
714 | " padding = np.zeros(max_length - len(temp))\n",
715 | " rst.append(list(np.append(np.array(temp), padding)))\n",
716 | " return rst\n",
717 | "\n",
718 | "def get_evaluate_data(data):\n",
719 | " batch_data = data\n",
720 | " click = get_normal_data(batch_data, \"guide_dien_final_train_data.clk\")\n",
721 | " target_cate = get_normal_data(batch_data, \"guide_dien_final_train_data.cate_id\")\n",
722 | " target_brand = get_normal_data(batch_data, \"guide_dien_final_train_data.brand\")\n",
723 | " cms_segid = get_normal_data(batch_data, \"guide_dien_final_train_data.cms_segid\")\n",
724 | " cms_group = get_normal_data(batch_data, \"guide_dien_final_train_data.cms_group_id\")\n",
725 | " gender = get_normal_data(batch_data, \"guide_dien_final_train_data.final_gender_code\")\n",
726 | " age = get_normal_data(batch_data, \"guide_dien_final_train_data.age_level\")\n",
727 | " pvalue = get_normal_data(batch_data, \"guide_dien_final_train_data.pvalue_level\")\n",
728 | " shopping = get_normal_data(batch_data, \"guide_dien_final_train_data.shopping_level\")\n",
729 | " occupation = get_normal_data(batch_data, \"guide_dien_final_train_data.occupation\")\n",
730 | " user_class_level = get_normal_data(batch_data, \"guide_dien_final_train_data.new_user_class_level\")\n",
731 | " hist_brand_behavior_clk = get_sequence_data(batch_data, \"guide_dien_final_train_data.click_brand\")\n",
732 | " hist_cate_behavior_clk = get_sequence_data(batch_data, \"guide_dien_final_train_data.click_cate\")\n",
733 | " hist_brand_behavior_show = get_sequence_data(batch_data, \"guide_dien_final_train_data.show_brand\")\n",
734 | " hist_cate_behavior_show = get_sequence_data(batch_data, \"guide_dien_final_train_data.show_cate\")\n",
735 | " return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show)"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": 29,
741 | "metadata": {},
742 | "outputs": [],
743 | "source": [
744 | "label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show = get_evaluate_data(test_data)\n",
745 | "user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)\n",
746 | "output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)"
747 | ]
748 | },
749 | {
750 | "cell_type": "code",
751 | "execution_count": 30,
752 | "metadata": {},
753 | "outputs": [
754 | {
755 | "name": "stdout",
756 | "output_type": "stream",
757 | "text": [
758 | "[训练集]正例:负例=501 : 9435\n",
759 | "[测试集]正例:负例=56 : 943\n"
760 | ]
761 | }
762 | ],
763 | "source": [
764 | "train_label = train_data[\"guide_dien_final_train_data.clk\"].values\n",
765 | "positive_num = len(train_label[train_label == 1])\n",
766 | "negative_num = len(train_label[train_label == 0])\n",
767 | "print(\"[训练集]正例:负例=%d : %d\" % (positive_num, negative_num))\n",
768 | "test_label = test_data[\"guide_dien_final_train_data.clk\"].values\n",
769 | "positive_num = len(test_label[test_label == 1])\n",
770 | "negative_num = len(test_label[test_label == 0])\n",
771 | "print(\"[测试集]正例:负例=%d : %d\" % (positive_num, negative_num))"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "execution_count": 31,
777 | "metadata": {},
778 | "outputs": [],
779 | "source": [
780 | "y_true = label.numpy()[:,-1]\n",
781 | "y_score = output.numpy()[:,-1]"
782 | ]
783 | },
784 | {
785 | "cell_type": "code",
786 | "execution_count": 48,
787 | "metadata": {},
788 | "outputs": [],
789 | "source": [
790 | "threshold = 0.0031\n",
791 | "y_pre = y_score.copy()\n",
792 | "y_pre[y_pre > threshold] = 1\n",
793 | "y_pre[y_pre <= threshold] = 0"
794 | ]
795 | },
796 | {
797 | "cell_type": "code",
798 | "execution_count": 34,
799 | "metadata": {},
800 | "outputs": [],
801 | "source": [
802 | "import numpy as np\n",
803 | "from sklearn.metrics import accuracy_score\n",
804 | "from sklearn.metrics import f1_score\n",
805 | "from sklearn.metrics import auc\n",
806 | "import sklearn.metrics as sm\n",
807 | "from sklearn.metrics import roc_curve, auc\n",
808 | "import matplotlib as mpl \n",
809 | "import matplotlib.pyplot as plt"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 50,
815 | "metadata": {},
816 | "outputs": [
817 | {
818 | "name": "stdout",
819 | "output_type": "stream",
820 | "text": [
821 | "0.8818818818818819\n"
822 | ]
823 | }
824 | ],
825 | "source": [
826 | "print(accuracy_score(y_true, y_pre))"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 51,
832 | "metadata": {},
833 | "outputs": [
834 | {
835 | "name": "stdout",
836 | "output_type": "stream",
837 | "text": [
838 | "混淆矩阵为:\n",
839 | "[[876 67]\n",
840 | " [ 51 5]]\n"
841 | ]
842 | }
843 | ],
844 | "source": [
845 | "m = sm.confusion_matrix(y_true, y_pre)\n",
846 | "print('混淆矩阵为:', m, sep='\\n')"
847 | ]
848 | },
849 | {
850 | "cell_type": "code",
851 | "execution_count": 52,
852 | "metadata": {},
853 | "outputs": [
854 | {
855 | "name": "stdout",
856 | "output_type": "stream",
857 | "text": [
858 | "分类报告为:\n",
859 | " precision recall f1-score support\n",
860 | "\n",
861 | " 0.0 0.94 0.93 0.94 943\n",
862 | " 1.0 0.07 0.09 0.08 56\n",
863 | "\n",
864 | " accuracy 0.88 999\n",
865 | " macro avg 0.51 0.51 0.51 999\n",
866 | "weighted avg 0.90 0.88 0.89 999\n",
867 | "\n"
868 | ]
869 | }
870 | ],
871 | "source": [
872 | "r = sm.classification_report(y_true, y_pre)\n",
873 | "print('分类报告为:', r, sep='\\n')"
874 | ]
875 | },
876 | {
877 | "cell_type": "code",
878 | "execution_count": 53,
879 | "metadata": {},
880 | "outputs": [
881 | {
882 | "data": {
883 | "text/plain": [
884 | "0.679821239206181"
885 | ]
886 | },
887 | "execution_count": 53,
888 | "metadata": {},
889 | "output_type": "execute_result"
890 | }
891 | ],
892 | "source": [
893 | "from sklearn.metrics import roc_auc_score\n",
894 | "auc_score = roc_auc_score(y_true,y_score)\n",
895 | "auc_score"
896 | ]
897 | },
898 | {
899 | "cell_type": "code",
900 | "execution_count": 54,
901 | "metadata": {},
902 | "outputs": [],
903 | "source": [
904 | "def plot_roc(labels, predict_prob):\n",
905 | " false_positive_rate,true_positive_rate,thresholds=roc_curve(labels, predict_prob)\n",
906 | " roc_auc=auc(false_positive_rate, true_positive_rate)\n",
907 | " plt.title('ROC')\n",
908 | " plt.plot(false_positive_rate, true_positive_rate,'b',label='AUC = %0.4f'% roc_auc)\n",
909 | " plt.legend(loc='lower right')\n",
910 | " plt.plot([0,1],[0,1],'r--')\n",
911 | " plt.ylabel('TPR')\n",
912 | " plt.xlabel('FPR')\n",
913 | " plt.show()"
914 | ]
915 | },
916 | {
917 | "cell_type": "code",
918 | "execution_count": 55,
919 | "metadata": {},
920 | "outputs": [
921 | {
922 | "data": {
923 | "image/png": "\n",
924 | "text/plain": [
925 | ""
926 | ]
927 | },
928 | "metadata": {
929 | "needs_background": "light"
930 | },
931 | "output_type": "display_data"
932 | }
933 | ],
934 | "source": [
935 | "plot_roc(y_true, y_score)"
936 | ]
937 | },
938 | {
939 | "cell_type": "markdown",
940 | "metadata": {},
941 | "source": [
942 | "# 整体训练图像"
943 | ]
944 | },
945 | {
946 | "cell_type": "code",
947 | "execution_count": 57,
948 | "metadata": {},
949 | "outputs": [
950 | {
951 | "data": {
952 | "text/html": [
953 | "\n",
954 | "\n",
967 | "
\n",
968 | " \n",
969 | " \n",
970 | " | \n",
971 | " train_aux_loss | \n",
972 | " train_target_loss | \n",
973 | " train_final_loss | \n",
974 | "
\n",
975 | " \n",
976 | " \n",
977 | " \n",
978 | " 0 | \n",
979 | " 0.895453 | \n",
980 | " 0.692025 | \n",
981 | " 1.587478 | \n",
982 | "
\n",
983 | " \n",
984 | " 1 | \n",
985 | " 0.883613 | \n",
986 | " 0.691035 | \n",
987 | " 1.574647 | \n",
988 | "
\n",
989 | " \n",
990 | " 2 | \n",
991 | " 0.871820 | \n",
992 | " 0.690196 | \n",
993 | " 1.562016 | \n",
994 | "
\n",
995 | " \n",
996 | " 3 | \n",
997 | " 0.860334 | \n",
998 | " 0.689409 | \n",
999 | " 1.549743 | \n",
1000 | "
\n",
1001 | " \n",
1002 | " 4 | \n",
1003 | " 0.848613 | \n",
1004 | " 0.688840 | \n",
1005 | " 1.537453 | \n",
1006 | "
\n",
1007 | " \n",
1008 | " ... | \n",
1009 | " ... | \n",
1010 | " ... | \n",
1011 | " ... | \n",
1012 | "
\n",
1013 | " \n",
1014 | " 292 | \n",
1015 | " 0.030206 | \n",
1016 | " 0.197515 | \n",
1017 | " 0.227721 | \n",
1018 | "
\n",
1019 | " \n",
1020 | " 293 | \n",
1021 | " 0.028985 | \n",
1022 | " 0.140821 | \n",
1023 | " 0.169806 | \n",
1024 | "
\n",
1025 | " \n",
1026 | " 294 | \n",
1027 | " 0.028990 | \n",
1028 | " 0.081985 | \n",
1029 | " 0.110975 | \n",
1030 | "
\n",
1031 | " \n",
1032 | " 295 | \n",
1033 | " 0.028055 | \n",
1034 | " 0.166338 | \n",
1035 | " 0.194393 | \n",
1036 | "
\n",
1037 | " \n",
1038 | " 296 | \n",
1039 | " 0.028797 | \n",
1040 | " 0.197161 | \n",
1041 | " 0.225958 | \n",
1042 | "
\n",
1043 | " \n",
1044 | "
\n",
1045 | "
297 rows × 3 columns
\n",
1046 | "
"
1047 | ],
1048 | "text/plain": [
1049 | " train_aux_loss train_target_loss train_final_loss\n",
1050 | "0 0.895453 0.692025 1.587478\n",
1051 | "1 0.883613 0.691035 1.574647\n",
1052 | "2 0.871820 0.690196 1.562016\n",
1053 | "3 0.860334 0.689409 1.549743\n",
1054 | "4 0.848613 0.688840 1.537453\n",
1055 | ".. ... ... ...\n",
1056 | "292 0.030206 0.197515 0.227721\n",
1057 | "293 0.028985 0.140821 0.169806\n",
1058 | "294 0.028990 0.081985 0.110975\n",
1059 | "295 0.028055 0.166338 0.194393\n",
1060 | "296 0.028797 0.197161 0.225958\n",
1061 | "\n",
1062 | "[297 rows x 3 columns]"
1063 | ]
1064 | },
1065 | "execution_count": 57,
1066 | "metadata": {},
1067 | "output_type": "execute_result"
1068 | }
1069 | ],
1070 | "source": [
1071 | "train_loss_data = pd.read_csv(\"./loss/dien/train_loss.csv.2020_09_22_21_35_06\")\n",
1072 | "train_loss_data"
1073 | ]
1074 | },
1075 | {
1076 | "cell_type": "code",
1077 | "execution_count": 56,
1078 | "metadata": {},
1079 | "outputs": [
1080 | {
1081 | "data": {
1082 | "text/html": [
1083 | "\n",
1084 | "\n",
1097 | "
\n",
1098 | " \n",
1099 | " \n",
1100 | " | \n",
1101 | " test_aux_loss | \n",
1102 | " test_target_loss | \n",
1103 | " test_final_loss | \n",
1104 | "
\n",
1105 | " \n",
1106 | " \n",
1107 | " \n",
1108 | " 0 | \n",
1109 | " 0.895550 | \n",
1110 | " 0.692121 | \n",
1111 | " 1.587671 | \n",
1112 | "
\n",
1113 | " \n",
1114 | " 1 | \n",
1115 | " 0.883785 | \n",
1116 | " 0.691325 | \n",
1117 | " 1.575110 | \n",
1118 | "
\n",
1119 | " \n",
1120 | " 2 | \n",
1121 | " 0.872121 | \n",
1122 | " 0.690532 | \n",
1123 | " 1.562653 | \n",
1124 | "
\n",
1125 | " \n",
1126 | " 3 | \n",
1127 | " 0.860558 | \n",
1128 | " 0.689721 | \n",
1129 | " 1.550279 | \n",
1130 | "
\n",
1131 | " \n",
1132 | " 4 | \n",
1133 | " 0.849101 | \n",
1134 | " 0.688917 | \n",
1135 | " 1.538019 | \n",
1136 | "
\n",
1137 | " \n",
1138 | " ... | \n",
1139 | " ... | \n",
1140 | " ... | \n",
1141 | " ... | \n",
1142 | "
\n",
1143 | " \n",
1144 | " 292 | \n",
1145 | " 0.030182 | \n",
1146 | " 0.261107 | \n",
1147 | " 0.291289 | \n",
1148 | "
\n",
1149 | " \n",
1150 | " 293 | \n",
1151 | " 0.030074 | \n",
1152 | " 0.261199 | \n",
1153 | " 0.291273 | \n",
1154 | "
\n",
1155 | " \n",
1156 | " 294 | \n",
1157 | " 0.029966 | \n",
1158 | " 0.261354 | \n",
1159 | " 0.291320 | \n",
1160 | "
\n",
1161 | " \n",
1162 | " 295 | \n",
1163 | " 0.029859 | \n",
1164 | " 0.261639 | \n",
1165 | " 0.291498 | \n",
1166 | "
\n",
1167 | " \n",
1168 | " 296 | \n",
1169 | " 0.029752 | \n",
1170 | " 0.261937 | \n",
1171 | " 0.291690 | \n",
1172 | "
\n",
1173 | " \n",
1174 | "
\n",
1175 | "
297 rows × 3 columns
\n",
1176 | "
"
1177 | ],
1178 | "text/plain": [
1179 | " test_aux_loss test_target_loss test_final_loss\n",
1180 | "0 0.895550 0.692121 1.587671\n",
1181 | "1 0.883785 0.691325 1.575110\n",
1182 | "2 0.872121 0.690532 1.562653\n",
1183 | "3 0.860558 0.689721 1.550279\n",
1184 | "4 0.849101 0.688917 1.538019\n",
1185 | ".. ... ... ...\n",
1186 | "292 0.030182 0.261107 0.291289\n",
1187 | "293 0.030074 0.261199 0.291273\n",
1188 | "294 0.029966 0.261354 0.291320\n",
1189 | "295 0.029859 0.261639 0.291498\n",
1190 | "296 0.029752 0.261937 0.291690\n",
1191 | "\n",
1192 | "[297 rows x 3 columns]"
1193 | ]
1194 | },
1195 | "execution_count": 56,
1196 | "metadata": {},
1197 | "output_type": "execute_result"
1198 | }
1199 | ],
1200 | "source": [
1201 | "test_loss_data = pd.read_csv(\"./loss/dien/test_loss.csv.2020_09_22_21_35_06\")\n",
1202 | "test_loss_data"
1203 | ]
1204 | },
1205 | {
1206 | "cell_type": "code",
1207 | "execution_count": 58,
1208 | "metadata": {},
1209 | "outputs": [],
1210 | "source": [
1211 | "def get_loss_fig_aux(train_loss_data, test_loss_data):\n",
1212 | " train_loss = {\n",
1213 | " \"aux_loss\":list(train_loss_data[\"train_\" + \"aux_loss\"].values), \n",
1214 | " \"target_loss\":list(train_loss_data[\"train_\" + \"target_loss\"].values), \n",
1215 | " \"final_loss\":list(train_loss_data[\"train_\" + \"final_loss\"].values)\n",
1216 | " }\n",
1217 | " test_loss = {\n",
1218 | " \"aux_loss\":list(test_loss_data[\"test_\" + \"aux_loss\"].values), \n",
1219 | " \"target_loss\":list(test_loss_data[\"test_\" + \"target_loss\"].values), \n",
1220 | " \"final_loss\":list(test_loss_data[\"test_\" + \"final_loss\"].values)\n",
1221 | " }\n",
1222 | " get_loss_fig(train_loss, test_loss)"
1223 | ]
1224 | },
1225 | {
1226 | "cell_type": "code",
1227 | "execution_count": 59,
1228 | "metadata": {},
1229 | "outputs": [
1230 | {
1231 | "data": {
1232 | "image/png": "\n",
1233 | "text/plain": [
1234 | ""
1235 | ]
1236 | },
1237 | "metadata": {
1238 | "needs_background": "light"
1239 | },
1240 | "output_type": "display_data"
1241 | }
1242 | ],
1243 | "source": [
1244 | "get_loss_fig_aux(train_loss_data, test_loss_data)"
1245 | ]
1246 | },
1247 | {
1248 | "cell_type": "code",
1249 | "execution_count": null,
1250 | "metadata": {},
1251 | "outputs": [],
1252 | "source": []
1253 | }
1254 | ],
1255 | "metadata": {
1256 | "kernelspec": {
1257 | "display_name": "Python 3",
1258 | "language": "python",
1259 | "name": "python3"
1260 | },
1261 | "language_info": {
1262 | "codemirror_mode": {
1263 | "name": "ipython",
1264 | "version": 3
1265 | },
1266 | "file_extension": ".py",
1267 | "mimetype": "text/x-python",
1268 | "name": "python",
1269 | "nbconvert_exporter": "python",
1270 | "pygments_lexer": "ipython3",
1271 | "version": "3.7.6"
1272 | }
1273 | },
1274 | "nbformat": 4,
1275 | "nbformat_minor": 4
1276 | }
1277 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DIEN-DIN
2 |
3 | 本项目使用tensorflow2.0复现阿里兴趣排序模型DIEN与DIN。
4 |
5 | DIN论文链接: https://arxiv.org/pdf/1706.06978.pdf
6 |
7 | DIEN论文链接: https://arxiv.org/pdf/1809.03672.pdf
8 |
9 | 数据集使用阿里数据集测试模型代码, 数据集链接: https://tianchi.aliyun.com/dataset/dataDetail?dataId=56
10 |
11 | # 调用方法:
12 |
13 | ## 0. 简介:
14 |
15 | DIEN的输入特征中主要包含三个部分特征: 用户历史行为序列, 目标商品特征, 用户画像特征。
16 | 用户历史行为序列需包含点击序列与非点击序列。
17 | 请按如下1~2方法处理输入特征。
18 |
19 | ## 1. 初始化:
20 |
21 | 初始化DIEN时需传入5个参数:
22 |
23 | (注:feature_list中的特征名称,需要与embedding_dict中的特征名称一样)
24 |
25 | - embedding_count_dict:string->int格式,该变量记录需要embedding各个特征的词典个数,即最大整数索引+ 1的大小;
26 |
27 | - embedding_dim_dict:string->int格式,该变量记录需要embedding各个特征的输出维数,即密集嵌入的尺寸;
28 |
29 | - embedding_features_list:list(string)格式,该变量记录DIEN中user_profile部分所有需要embedding的feature名称;
30 |
31 | - user_behavior_features:list(string)格式,该变量记录DIEN中user_behavior与target_item部分所有需要embedding的feature名称
32 |
33 | - activation:string格式,默认值"PReLU",该变量空值全连接层激活函数,”PReLU“->PReLU,"Dice"->Dice
34 |
35 | ## 2. 模型调用:
36 |
37 | 模型调用需传入6个参数:
38 |
39 | (注:feature_list中的特征名称,需要与dict中的特征名称一样)
40 |
41 | - user_profile_dict:dict:string->Tensor格式,记录user_profile部分的所有输入特征的训练数据;
42 |
43 | - user_profile_list:list(string)格式,记录user_profile部分的所有特征名称;
44 |
45 | - click_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有点击输入特征的训练数据;
46 |
47 | - noclick_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有未点击输入特征的训练数据;
48 |
49 | - target_item_dict:dict:string->Tensor格式,记录target_item部分输入特征的训练数据;
50 |
51 | - user_behavior_list:list(string)格式,记录user_behavior部分的所有特征名称。
52 |
53 | # 调用演示代码:
54 |
55 | ## DIEN:
56 |
57 | DIEN_train_example.ipynb
58 |
59 | ## DIN:
60 |
61 | DIN_train_example.ipynb
62 |
63 | # 代码:
64 |
65 | - model.py: 定义模型代码
66 |
67 | - layers.py: 自定义层
68 |
69 | - loss.py: 定义Auxiliary Loss用到的NN
70 |
71 | - activations.py: 定义Dice激活函数
72 |
73 | - alibaba_data_reader.py: 输入数据处理函数(代码中使用数据已用spark处理后得到了所需序列数据, 及特征embedding词典数)
74 |
--------------------------------------------------------------------------------
/__pycache__/activations.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/activations.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/alibaba_data_reader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/alibaba_data_reader.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/layers.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/loss.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/model.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/activations.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | class Dice(tf.keras.layers.Layer):
4 | def __init__(self):
5 | super(Dice, self).__init__()
6 | self.bn = tf.keras.layers.BatchNormalization(center=False, scale=False)
7 | self.alpha = self.add_weight(shape=(), dtype=tf.float32, name='alpha')
8 |
9 | def call(self, x):
10 | x_normed = self.bn(x)
11 | x_p = tf.sigmoid(x_normed)
12 | return self.alpha * (1.0 - x_p) * x + x_p * x
13 |
14 | class dice(tf.keras.layers.Layer):
15 | def __init__(self, feat_dim):
16 | super(dice, self).__init__()
17 | self.feat_dim = feat_dim
18 | self.alphas= tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32)
19 | self.beta = tf.Variable(tf.zeros([feat_dim]), dtype=tf.float32)
20 |
21 | self.bn = tf.keras.layers.BatchNormalization(center=False, scale=False)
22 |
23 | def call(self, _x, axis=-1, epsilon=0.000000001):
24 |
25 | reduction_axes = list(range(len(_x.get_shape())))
26 | del reduction_axes[axis]
27 | broadcast_shape = [1] * len(_x.get_shape())
28 | broadcast_shape[axis] = self.feat_dim
29 |
30 | mean = tf.reduce_mean(_x, axis=reduction_axes)
31 | brodcast_mean = tf.reshape(mean, broadcast_shape)
32 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes)
33 | std = tf.sqrt(std)
34 | brodcast_std = tf.reshape(std, broadcast_shape)
35 |
36 | x_normed = self.bn(_x)
37 | x_p = tf.keras.activations.sigmoid(self.beta * x_normed)
38 |
39 | return self.alphas * (1.0 - x_p) * _x + x_p * _x
--------------------------------------------------------------------------------
/alibaba_data_reader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import tensorflow as tf
3 | import numpy as np
4 |
5 | def get_embedding_features_list():
6 | embedding_features_list = ["cate", "brand", "cms_segid", "cms_group",
7 | "gender", "age", "pvalue", "shopping",
8 | "occupation", "user_class_level"]
9 | return embedding_features_list
10 |
11 | def get_user_behavior_features():
12 | user_behavior_features = ["cate", "brand"]
13 | return user_behavior_features
14 |
15 | def get_embedding_count(feature, embedding_count):
16 | return embedding_count[feature].values[0]
17 |
18 | def get_embedding_count_dict(embedding_features_list, embedding_count):
19 | embedding_count_dict = dict()
20 | for feature in embedding_features_list:
21 | embedding_count_dict[feature] = get_embedding_count(feature, embedding_count)
22 | embedding_count_dict["brand"] = 500000
23 | embedding_count_dict["cate"] = 501578
24 | embedding_count_dict["gender"] = 3
25 | embedding_count_dict["pvalue"] = 10
26 | embedding_count_dict["shopping"] = 4
27 | embedding_count_dict["occupation"] = 5
28 | embedding_count_dict["user_class_level"] = 5
29 | return embedding_count_dict
30 |
31 | def get_embedding_dim_dict(embedding_features_list):
32 | embedding_dim_dict = dict()
33 | for feature in embedding_features_list:
34 | embedding_dim_dict[feature] = 64
35 | return embedding_dim_dict
36 |
37 | def get_data():
38 | train_data = pd.read_csv("./data/train.csv", sep = "\t")
39 | train_data = train_data.fillna(0)
40 | train_data = train_data[train_data["guide_dien_final_train_data.click_cate"] != 0]
41 | train_data = train_data[train_data["guide_dien_final_train_data.click_brand"] != 0]
42 | test_data = pd.read_csv("./data/test.csv", sep = "\t")
43 | test_data = test_data.fillna(0)
44 | test_data = test_data[test_data["guide_dien_final_train_data.click_cate"] != 0]
45 | test_data = test_data[test_data["guide_dien_final_train_data.click_brand"] != 0]
46 | embedding_count = pd.read_csv("./data/embedding_count.csv")
47 | return train_data, test_data, embedding_count
48 |
49 | def get_normal_data(data, col):
50 | return data[col].values
51 |
52 | def get_sequence_data(data, col):
53 | rst = []
54 | max_length = 0
55 | for i in data[col].values:
56 | temp = len(list(map(eval,i[1:-1].split(","))))
57 | if temp > max_length:
58 | max_length = temp
59 |
60 | for i in data[col].values:
61 | temp = list(map(eval,i[1:-1].split(",")))
62 | padding = np.zeros(max_length - len(temp))
63 | rst.append(list(np.append(np.array(temp), padding)))
64 | return rst
65 |
66 | def get_length(data, col):
67 | rst = []
68 | for i in data[col].values:
69 | temp = len(list(map(eval,i[1:-1].split(","))))
70 | rst.append(temp)
71 | return rst
72 |
73 | def convert_tensor(data):
74 | return tf.convert_to_tensor(data)
75 |
76 | def get_batch_data(data, min_batch, batch=100):
77 | # batch_data = None
78 | # if min_batch + batch <= len(data):
79 | # batch_data = data.loc[min_batch:min_batch + batch - 1]
80 | # else:
81 | # batch_data = data.loc[min_batch:]
82 | batch_data = data.sample(n=batch)
83 | click = get_normal_data(batch_data, "guide_dien_final_train_data.clk")
84 | #no_click = get_normal_data(batch_data, "guide_dien_final_train_data.nonclk")
85 | #label = [click, no_click]
86 | #label = click
87 | target_cate = get_normal_data(batch_data, "guide_dien_final_train_data.cate_id")
88 | target_brand = get_normal_data(batch_data, "guide_dien_final_train_data.brand")
89 | cms_segid = get_normal_data(batch_data, "guide_dien_final_train_data.cms_segid")
90 | cms_group = get_normal_data(batch_data, "guide_dien_final_train_data.cms_group_id")
91 | gender = get_normal_data(batch_data, "guide_dien_final_train_data.final_gender_code")
92 | age = get_normal_data(batch_data, "guide_dien_final_train_data.age_level")
93 | pvalue = get_normal_data(batch_data, "guide_dien_final_train_data.pvalue_level")
94 | shopping = get_normal_data(batch_data, "guide_dien_final_train_data.shopping_level")
95 | occupation = get_normal_data(batch_data, "guide_dien_final_train_data.occupation")
96 | user_class_level = get_normal_data(batch_data, "guide_dien_final_train_data.new_user_class_level")
97 | hist_brand_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_brand")
98 | hist_cate_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_cate")
99 | hist_brand_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_brand")
100 | hist_cate_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_cate")
101 | #reshape_len = convert_tensor(label).numpy().shape[1]
102 | clk_length = get_length(batch_data, "guide_dien_final_train_data.click_brand")
103 | show_length = get_length(batch_data, "guide_dien_final_train_data.show_brand")
104 | return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show), min_batch + batch, clk_length, show_length
105 |
106 | def get_test_data(data):
107 | batch_data = data.head(150)
108 | #batch_data = data.sample(n = 50)
109 | click = get_normal_data(batch_data, "guide_dien_final_train_data.clk")
110 | target_cate = get_normal_data(batch_data, "guide_dien_final_train_data.cate_id")
111 | target_brand = get_normal_data(batch_data, "guide_dien_final_train_data.brand")
112 | cms_segid = get_normal_data(batch_data, "guide_dien_final_train_data.cms_segid")
113 | cms_group = get_normal_data(batch_data, "guide_dien_final_train_data.cms_group_id")
114 | gender = get_normal_data(batch_data, "guide_dien_final_train_data.final_gender_code")
115 | age = get_normal_data(batch_data, "guide_dien_final_train_data.age_level")
116 | pvalue = get_normal_data(batch_data, "guide_dien_final_train_data.pvalue_level")
117 | shopping = get_normal_data(batch_data, "guide_dien_final_train_data.shopping_level")
118 | occupation = get_normal_data(batch_data, "guide_dien_final_train_data.occupation")
119 | user_class_level = get_normal_data(batch_data, "guide_dien_final_train_data.new_user_class_level")
120 | hist_brand_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_brand")
121 | hist_cate_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_cate")
122 | hist_brand_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_brand")
123 | hist_cate_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_cate")
124 | clk_length = get_length(batch_data, "guide_dien_final_train_data.click_brand")
125 | show_length = get_length(batch_data, "guide_dien_final_train_data.show_brand")
126 | return tf.one_hot(click, 2), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show), clk_length, show_length
--------------------------------------------------------------------------------
/layers.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import layers
3 | from activations import Dice,dice
4 |
5 | class GRU_GATES(tf.keras.layers.Layer):
6 | def __init__(self, units):
7 | super(GRU_GATES, self).__init__()
8 | self.linear_act = layers.Dense(units, activation=None, use_bias=True)
9 | self.linear_noact = layers.Dense(units, activation=None, use_bias=False)
10 |
11 | def call(self, a, b, gate_b=None):
12 | if gate_b is None:
13 | return tf.keras.activations.sigmoid(self.linear_act(a) + self.linear_noact(b))
14 | else:
15 | return tf.keras.activations.tanh(self.linear_act(a) + tf.math.multiply(gate_b, self.linear_noact(b)))
16 |
17 | class AUGRU(layers.Layer):
18 | def __init__(self, units):
19 | super(AUGRU, self).__init__()
20 | self.u_gate = GRU_GATES(units)
21 | self.r_gate = GRU_GATES(units)
22 | self.c_memo = GRU_GATES(units)
23 |
24 | def call(self, inputs, state, att_score):
25 | u = self.u_gate(inputs, state) #u_t
26 | r = self.r_gate(inputs, state) #r_t
27 | c = self.c_memo(inputs, state, r) #\tilde{h_t}
28 | u_= att_score * u #\tilde{u_{t}'} [AUGRU Add]
29 | state_next = (1 - u_) * state + u_ * c #h_t [AUGRU change u_t on output]
30 | return state_next
31 |
32 | class attention(tf.keras.layers.Layer):
33 | def __init__(self, keys_dim):
34 | super(attention, self).__init__()
35 | self.keys_dim = keys_dim
36 | self.fc = tf.keras.Sequential()
37 | self.fc.add(layers.BatchNormalization())
38 | self.fc.add(layers.Dense(36, activation="sigmoid"))
39 | self.fc.add(dice(36))
40 | self.fc.add(layers.Dense(1, activation=None))
41 |
42 | def call(self, queries, keys, keys_length):
43 | #Attention
44 | queries = tf.tile(tf.expand_dims(queries, 1), [1, tf.shape(keys)[1], 1])
45 | din_all = tf.concat([queries, keys, queries-keys, queries*keys], axis=-1)
46 | outputs = tf.transpose(self.fc(din_all), [0,2,1])
47 | key_masks = tf.sequence_mask(keys_length, max(keys_length), dtype=tf.bool)
48 | key_masks = tf.expand_dims(key_masks, 1)
49 | paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
50 | outputs = tf.where(key_masks, outputs, paddings)
51 | outputs = outputs / (self.keys_dim ** 0.5)
52 | #outputs = tf.keras.activations.softmax(outputs, -1)
53 | outputs = tf.keras.activations.sigmoid(outputs)
54 |
55 | #Sum Pooling
56 | outputs = tf.squeeze(tf.matmul(outputs, keys))
57 | print("outputs:" + str(outputs.numpy().shape))
58 | return outputs
--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import layers
3 |
4 | class AuxLayer(layers.Layer):
5 | def __init__(self):
6 | super().__init__()
7 | self.fc = tf.keras.Sequential()
8 | self.fc.add(layers.BatchNormalization())
9 | self.fc.add(layers.Dense(100, activation="sigmoid"))
10 | self.fc.add(layers.ReLU())
11 | self.fc.add(layers.Dense(50, activation="sigmoid"))
12 | self.fc.add(layers.ReLU())
13 | self.fc.add(layers.Dense(2, activation=None))
14 |
15 | def call(self, input):
16 | logit = tf.squeeze(self.fc(input))
17 | return tf.keras.activations.softmax(logit)
18 |
19 |
--------------------------------------------------------------------------------
/main.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StephenBo-China/DIEN-DIN/e1d9bb0591f0e0ce5be35cbf328077f6da2a45d2/main.ipynb
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import layers
3 | from layers import AUGRU
4 | from activations import Dice
5 | import pandas as pd
6 | from model import DIEN
7 | import alibaba_data_reader as data_reader
8 |
9 | def train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label, optimizer, model, alpha, loss_metric):
10 | with tf.GradientTape() as tape:
11 | output, logit, aux_loss = model(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)
12 | target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit,labels=tf.cast(label, dtype=tf.float32)))
13 | final_loss = target_loss + alpha * aux_loss
14 | print("[Train Step] aux_loss=" + str(aux_loss.numpy()) + ", target_loss=" + str(target_loss.numpy()) + ", final_loss=" + str(final_loss.numpy()))
15 | gradient = tape.gradient(final_loss, model.trainable_variables)
16 | clip_gradient, _ = tf.clip_by_global_norm(gradient, 5.0)
17 | optimizer.apply_gradients(zip(clip_gradient, model.trainable_variables))
18 | loss_metric(final_loss)
19 |
20 | def get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show):
21 | user_profile_dict = {
22 | "cms_segid": cms_segid,
23 | "cms_group": cms_group,
24 | "gender": gender,
25 | "age": age,
26 | "pvalue": pvalue,
27 | "shopping": shopping,
28 | "occupation": occupation,
29 | "user_class_level": user_class_level
30 | }
31 | user_profile_list = ["cms_segid", "cms_group", "gender", "age", "pvalue", "shopping", "occupation", "user_class_level"]
32 | user_behavior_list = ["brand", "cate"]
33 | click_behavior_dict = {
34 | "brand": hist_brand_behavior_clk,
35 | "cate": hist_cate_behavior_clk
36 | }
37 | noclick_behavior_dict = {
38 | "brand": hist_brand_behavior_show,
39 | "cate": hist_cate_behavior_show
40 | }
41 | target_item_dict = {
42 | "brand": target_cate,
43 | "cate": target_brand
44 | }
45 | return user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict
46 |
47 | def main():
48 | train_data, test_data, embedding_count = data_reader.get_data()
49 | embedding_features_list = data_reader.get_embedding_features_list()
50 | user_behavior_features = data_reader.get_user_behavior_features()
51 | embedding_count_dict = data_reader.get_embedding_count_dict(embedding_features_list, embedding_count)
52 | embedding_dim_dict = data_reader.get_embedding_dim_dict(embedding_features_list)
53 | model = DIEN(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features)
54 | min_batch = 0
55 | batch = 100
56 | label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)
57 | user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)
58 | log_path = "./train_log/"
59 | train_summary_writer = tf.summary.create_file_writer(log_path)
60 | optimizer = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
61 | loss_metric = tf.keras.metrics.Sum()
62 | auc_metric = tf.keras.metrics.AUC()
63 | alpha = 1
64 | epochs = 1
65 | for epoch in range(epochs):
66 | min_batch = 0
67 | for i in range(int(len(train_data) / batch)):
68 | label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch, clk_length, show_length = data_reader.get_batch_data(train_data, min_batch, batch = batch)
69 | user_profile_dict, user_profile_list, user_behavior_list, click_behavior_dict, noclick_behavior_dict, target_item_dict = get_train_data(label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show)
70 | train_one_step(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list, label, optimizer, model, alpha, loss_metric)
71 |
72 |
73 | if __name__ == "__main__":
74 | print(tf.__version__)
75 | print("GPU Available: ", tf.test.is_gpu_available())
76 | main()
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import layers
3 | from layers import AUGRU,attention
4 | from activations import Dice,dice
5 | from loss import AuxLayer
6 | import utils
7 |
8 | class DIEN(tf.keras.Model):
9 | def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation="PReLU"):
10 | super(DIEN, self).__init__(embedding_count_dict, embedding_dim_dict, embedding_features_list, activation)
11 | """DIEN初始化model函数
12 |
13 | 该函数在调用DIEN时进行DIEN的Embedding层,GRU层,AUGRU层,全连接层的初始化操作
14 |
15 | Args:
16 | embedding_count_dict:string->int格式,该变量记录需要embedding各个特征的词典个数,即最大整数索引+ 1的大小;
17 | embedding_dim_dict:string->int格式,该变量记录需要embedding各个特征的输出维数,即密集嵌入的尺寸;
18 | embedding_features_list:list(string)格式,该变量记录DIEN中user_profile部分所有需要embedding的feature名称;
19 | user_behavior_features:list(string)格式,该变量记录DIEN中user_behavior与target_item部分所有需要embedding的feature名称
20 | activation:string格式,默认值"PReLU",该变量空值全连接层激活函数,”PReLU“->PReLU,"Dice"->Dice
21 | """
22 | #Init Embedding Layer
23 | self.embedding_dim_dict = embedding_dim_dict
24 | self.embedding_count_dict = embedding_count_dict
25 | self.embedding_layers = dict()
26 | for feature in embedding_features_list:
27 | self.embedding_layers[feature] = layers.Embedding(embedding_count_dict[feature], embedding_dim_dict[feature])
28 | #Init GRU Layer
29 | self.user_behavior_gru = layers.GRU(self.get_GRU_input_dim(embedding_dim_dict, user_behavior_features), return_sequences=True)
30 | #Init Attention Layer
31 | self.attention_layer = layers.Softmax()
32 | #Init Auxiliary Layer
33 | self.AuxNet = AuxLayer()
34 | #Init AUGRU Layer
35 | self.user_behavior_augru = AUGRU(self.get_GRU_input_dim(embedding_dim_dict, user_behavior_features))
36 | #Init Fully Connection Layer
37 | self.fc = tf.keras.Sequential()
38 | self.fc.add(layers.BatchNormalization())
39 | self.fc.add(layers.Dense(200, activation="relu"))
40 | if activation == "Dice":
41 | self.fc.add(Dice())
42 | elif activation == "dice":
43 | self.fc.add(dice(200))
44 | elif activation == "PReLU":
45 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
46 | self.fc.add(layers.Dense(80, activation="relu"))
47 | if activation == "Dice":
48 | self.fc.add(Dice())
49 | elif activation == "dice":
50 | self.fc.add(dice(80))
51 | elif activation == "PReLU":
52 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
53 | self.fc.add(layers.Dense(2, activation=None))
54 |
55 | def get_GRU_input_dim(self, embedding_dim_dict, user_behavior_features):
56 | rst = 0
57 | for feature in user_behavior_features:
58 | rst += embedding_dim_dict[feature]
59 | return rst
60 |
61 | def get_emb(self, user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list):
62 | user_profile_feature_embedding = dict()
63 | for feature in user_profile_list:
64 | data = user_profile_dict[feature]
65 | embedding_layer = self.embedding_layers[feature]
66 | user_profile_feature_embedding[feature] = embedding_layer(data)
67 |
68 | target_item_feature_embedding = dict()
69 | for feature in user_behavior_list:
70 | data = target_item_dict[feature]
71 | embedding_layer = self.embedding_layers[feature]
72 | target_item_feature_embedding[feature] = embedding_layer(data)
73 |
74 | click_behavior_embedding = dict()
75 | for feature in user_behavior_list:
76 | data = click_behavior_dict[feature]
77 | embedding_layer = self.embedding_layers[feature]
78 | click_behavior_embedding[feature] = embedding_layer(data)
79 |
80 | # noclick_behavior_embedding = dict()
81 | # for feature in user_behavior_list:
82 | # data = noclick_behavior_dict[feature]
83 | # embedding_layer = self.embedding_layers[feature]
84 | # noclick_behavior_embedding[feature] = embedding_layer(data)
85 |
86 | return utils.concat_features(user_profile_feature_embedding), utils.concat_features(target_item_feature_embedding), utils.concat_features(click_behavior_embedding)#, utils.concat_features(noclick_behavior_embedding)
87 |
88 | def auxiliary_loss(self, hidden_states, embedding_out):
89 | """Auxiliary Loss Function
90 |
91 | 论文中包含的源代码aux loss是通过hidden state与点击序列concate和hidden state
92 | 与展现序列concat后进一个全连接神经网络,通过softmax得到最终二分类结果与点击序列和展现序列求解log_loss的到最终aux loss。
93 | 该方法只使用用户的点击序列。
94 |
95 | Args:
96 | hidden_states: gru产出的所有hidden state,从h(0)到h(n-1)
97 | embedding_out: gru输入的embedding特征,从e(1)到e(n)
98 | """
99 | click_input_ = tf.concat([hidden_states, embedding_out], -1)
100 | click_prop_ = self.AuxNet(click_input_)[:, :, 0]
101 | click_loss_ = - tf.reshape(tf.math.log(click_prop_), [-1, tf.shape(embedding_out)[1]])
102 | return tf.reduce_mean(click_loss_)
103 |
104 | def call(self, user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list):
105 | """输入batch训练数据, 调用DIEN初始化后的model进行一次前向传播
106 |
107 | 调用该函数进行一次前向传播得到output, logit, aux_loss后,在自定义的训练函数内得出target_loss与final_loss后使用tensorflow中的梯度计算函数通过链式法则得到各层梯度后使用自定义优化器进行一次权重更新
108 |
109 | Args:
110 | user_profile_dict:dict:string->Tensor格式,记录user_profile部分的所有输入特征的训练数据;
111 | user_profile_list:list(string)格式,记录user_profile部分的所有特征名称;
112 | click_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有点击输入特征的训练数据;
113 | noclick_behavior_dict:dict:string->Tensor格式,记录user_behavior部分所有未点击输入特征的训练数据;
114 | target_item_dict:dict:string->Tensor格式,记录target_item部分输入特征的训练数据;
115 | user_behavior_list:list(string)Tensor格式,记录user_behavior部分的所有特征名称。
116 | """
117 | #Embedding Layer
118 | user_profile_embedding, target_item_embedding, click_behavior_emebedding = self.get_emb(user_profile_dict, user_profile_list, click_behavior_dict, target_item_dict, noclick_behavior_dict, user_behavior_list)
119 | #GRU Layer
120 | click_gru_emb = self.user_behavior_gru(click_behavior_emebedding)
121 | #noclick_gru_emb = self.user_behavior_gru(noclick_behavior_emebedding)
122 | #Auxiliary Loss
123 | aux_loss = self.auxiliary_loss(click_gru_emb[:, :-1, :], click_behavior_emebedding[:, 1:, :])
124 | #Attention Layer
125 | hist_attn = self.attention_layer(tf.matmul(tf.expand_dims(target_item_embedding, 1), click_gru_emb, transpose_b=True))
126 | #AUGRU Layer
127 | augru_hidden_state = tf.zeros_like(click_gru_emb[:, 0, :])
128 | for in_emb, in_att in zip(tf.transpose(click_gru_emb, [1, 0, 2]), tf.transpose(hist_attn, [2, 0, 1])):
129 | augru_hidden_state = self.user_behavior_augru(in_emb, augru_hidden_state, in_att)
130 | join_emb = tf.concat([augru_hidden_state, user_profile_embedding], -1)
131 | logit = tf.squeeze(self.fc(join_emb))
132 | output = tf.keras.activations.softmax(logit)
133 | return output, logit, aux_loss
134 |
135 | class DIN(tf.keras.Model):
136 | def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation="PReLU"):
137 | super(DIN, self).__init__(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation)
138 | #Init Embedding Layer
139 | self.embedding_dim_dict = embedding_dim_dict
140 | self.embedding_count_dict = embedding_count_dict
141 | self.embedding_layers = dict()
142 | for feature in embedding_features_list:
143 | self.embedding_layers[feature] = layers.Embedding(embedding_count_dict[feature], embedding_dim_dict[feature])
144 | #DIN Attention+Sum pooling
145 | self.hist_at = attention(utils.get_input_dim(embedding_dim_dict, user_behavior_features))
146 | #Init Fully Connection Layer
147 | self.fc = tf.keras.Sequential()
148 | self.fc.add(layers.BatchNormalization())
149 | self.fc.add(layers.Dense(200, activation="relu"))
150 | if activation == "Dice":
151 | self.fc.add(Dice())
152 | elif activation == "dice":
153 | self.fc.add(dice(200))
154 | elif activation == "PReLU":
155 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
156 | self.fc.add(layers.Dense(80, activation="relu"))
157 | if activation == "Dice":
158 | self.fc.add(Dice())
159 | elif activation == "dice":
160 | self.fc.add(dice(80))
161 | elif activation == "PReLU":
162 | self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
163 | self.fc.add(layers.Dense(2, activation=None))
164 |
165 | def get_emb_din(self, user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list):
166 | user_profile_feature_embedding = dict()
167 | for feature in user_profile_list:
168 | data = user_profile_dict[feature]
169 | embedding_layer = self.embedding_layers[feature]
170 | user_profile_feature_embedding[feature] = embedding_layer(data)
171 |
172 | target_item_feature_embedding = dict()
173 | for feature in user_behavior_list:
174 | data = target_item_dict[feature]
175 | embedding_layer = self.embedding_layers[feature]
176 | target_item_feature_embedding[feature] = embedding_layer(data)
177 |
178 | hist_behavior_embedding = dict()
179 | for feature in user_behavior_list:
180 | data = hist_behavior_dict[feature]
181 | embedding_layer = self.embedding_layers[feature]
182 | hist_behavior_embedding[feature] = embedding_layer(data)
183 |
184 | return utils.concat_features(user_profile_feature_embedding), utils.concat_features(target_item_feature_embedding), utils.concat_features(hist_behavior_embedding)
185 |
186 | def call(self, user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list, length):
187 | #Embedding Layer
188 | user_profile_embedding, target_item_embedding, hist_behavior_emebedding = self.get_emb_din(user_profile_dict, user_profile_list, hist_behavior_dict, target_item_dict, user_behavior_list)
189 | hist_attn_emb = self.hist_at(target_item_embedding, hist_behavior_emebedding, length)
190 | join_emb = tf.concat([user_profile_embedding, target_item_embedding, hist_attn_emb], -1)
191 | logit = tf.squeeze(self.fc(join_emb))
192 | output = tf.keras.activations.softmax(logit)
193 | return output, logit
194 |
195 | if __name__ == "__main__":
196 | model = DIN(dict(), dict(), list(), list())
197 |
--------------------------------------------------------------------------------
/tensorboard.log:
--------------------------------------------------------------------------------
1 | nohup: ignoring input
2 | TensorBoard 2.0.0 at http://10.186.3.226:8028/ (Press CTRL+C to quit)
3 |
--------------------------------------------------------------------------------
/tensorboard.sh:
--------------------------------------------------------------------------------
1 | tensorboard --logdir=./train_log/din/ --host=10.186.3.226 --port=8028
2 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import time
2 | import tensorflow as tf
3 |
4 | def get_file_name():
5 | now_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
6 | return "loss.csv." + now_time
7 |
8 | def make_train_loss_dir(file_name, cols=["train_aux_loss","train_target_loss","train_final_loss"], model="dien"):
9 | f = open("./loss/" + model + "/train_" + file_name, "w")
10 | f.write(",".join(cols) + "\n")
11 | f.close()
12 |
13 | def make_test_loss_dir(file_name, cols=["test_aux_loss","test_target_loss","test_final_loss"], model="dien"):
14 | f = open("./loss/" + model + "/test_" + file_name, "w")
15 | f.write(",".join(cols) + "\n")
16 | f.close()
17 |
18 | def add_loss(loss_dict, file_name, cols = ["aux_loss", "target_loss", "final_loss"], level="train", model="dien"):
19 | loss_list = list()
20 | for col in cols:
21 | loss_list.append(loss_dict[col])
22 | f = open("./loss/" + model + "/" + level + "_" + file_name, "a")
23 | f.write(",".join(loss_list) + "\n")
24 | f.close()
25 |
26 | def get_input_dim(embedding_dim_dict, user_behavior_features):
27 | rst = 0
28 | for feature in user_behavior_features:
29 | rst += embedding_dim_dict[feature]
30 | return rst
31 |
32 | def concat_features(feature_data_dict):
33 | concat_list = []
34 | for k in feature_data_dict:
35 | concat_list.append(feature_data_dict[k])
36 | return tf.concat(concat_list, -1)
37 |
38 | def mkdir(path):
39 | try:
40 | if not os.path.exists(path):
41 | os.makedirs(path)
42 | return 0
43 | except:
44 | return 1
--------------------------------------------------------------------------------