├── AutoInt.ipynb ├── DCN.ipynb ├── DIN_DIEN ├── DIEN.ipynb ├── DIN.ipynb ├── data │ └── get_data.txt ├── layers.py └── utils.py ├── DeepFM.ipynb ├── README.md └── xDeepFM.ipynb /AutoInt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tensorflow.keras.layers import *\n", 13 | "import tensorflow.keras.backend as K\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import tensorflow as tf\n", 16 | "from tensorflow.keras.models import Model\n", 17 | "from tensorflow.keras.utils import plot_model\n", 18 | "from tensorflow.keras.callbacks import *\n", 19 | "from sklearn.preprocessing import LabelEncoder\n", 20 | "# from tensorflow.keras.constraints import *\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# 准备数据" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "path = '/disk/share/criteo/'\n", 39 | "data = pd.read_csv(path+'criteo_sampled_data.csv')\n", 40 | "cols = data.columns.values\n", 41 | "\n", 42 | "dense_feats = [f for f in cols if f[0] == \"I\"]\n", 43 | "sparse_feats = [f for f in cols if f[0] == \"C\"]\n", 44 | "\n", 45 | "def process_dense_feats(data, feats):\n", 46 | " d = data.copy()\n", 47 | " d = d[feats].fillna(0.0)\n", 48 | " for f in feats:\n", 49 | " d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n", 50 | " \n", 51 | " return d\n", 52 | "\n", 53 | "data_dense = process_dense_feats(data, dense_feats)\n", 54 | "\n", 55 | "vocab_sizes = {}\n", 56 | "def process_sparse_feats(data, feats):\n", 57 | " d = data.copy()\n", 58 | " d = d[feats].fillna(\"-1\")\n", 59 | " for f in feats:\n", 60 | " label_encoder = LabelEncoder()\n", 61 | " d[f] = label_encoder.fit_transform(d[f])\n", 62 | " vocab_sizes[f] = d[f].nunique() + 1\n", 63 | " return d\n", 64 | "\n", 65 | "data_sparse = process_sparse_feats(data, sparse_feats)\n", 66 | "total_data = pd.concat([data_dense, data_sparse], axis=1)\n", 67 | "total_data['label'] = data['label']" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# 自定义层" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "class SparseEmbedding(Layer):\n", 84 | " def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n", 85 | " super().__init__()\n", 86 | " self.sparse_feats = sparse_feats\n", 87 | " self.vocab_sizes = vocab_sizes\n", 88 | " self.embed_dims = embed_dims\n", 89 | " \n", 90 | " # 离散特征嵌入矩阵\n", 91 | " self.sparse_embeds_mat = []\n", 92 | " for idx, feat in enumerate(self.sparse_feats):\n", 93 | " # reg = tf.keras.regularizers.l2(0.5)\n", 94 | " emb = Embedding(input_dim=self.vocab_sizes[feat],\n", 95 | " output_dim=self.embed_dims,\n", 96 | " # embeddings_regularizer=reg,\n", 97 | " name=f'{feat}_emb')\n", 98 | " self.sparse_embeds_mat.append(emb)\n", 99 | " \n", 100 | " def call(self, sparse_inputs):\n", 101 | " sparse_embeds = []\n", 102 | " for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n", 103 | " emb = emb_mat(sparse_inputs[idx])\n", 104 | " sparse_embeds.append(emb)\n", 105 | " concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n", 106 | " return concat_sparse_embeds" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 48, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "class DenseEmbedding(Layer):\n", 116 | " def __init__(self, dense_feats, embed_dims=8):\n", 117 | " super().__init__()\n", 118 | " self.embed_dims = embed_dims\n", 119 | " \n", 120 | " self.dense_embs = []\n", 121 | " for feat in dense_feats:\n", 122 | " dense_emb = self.add_weight(shape=[1, self.embed_dims], \n", 123 | " name=f'dense_emb_{feat}')\n", 124 | " self.dense_embs.append(dense_emb)\n", 125 | " \n", 126 | " \n", 127 | " def call(self, dense_inputs):\n", 128 | " scaled_embs = []\n", 129 | " for i, dense_input in enumerate(dense_inputs):\n", 130 | " dense_emb = dense_input * self.dense_embs[i]\n", 131 | " dense_emb = tf.expand_dims(dense_emb, axis=1)\n", 132 | " scaled_embs.append(dense_emb)\n", 133 | " \n", 134 | " concat_scaled_embs = Concatenate(axis=1)(scaled_embs)\n", 135 | " return concat_scaled_embs\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 49, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "class attention_cross_layer(Layer):\n", 145 | " def __init__(self, n_heads=6, att_dim=8):\n", 146 | " super().__init__()\n", 147 | " self.n_heads = n_heads\n", 148 | " self.att_dim = att_dim\n", 149 | " \n", 150 | " def build(self, input_shape):\n", 151 | " emb_dim = input_shape[-1]\n", 152 | " self.Wq = []\n", 153 | " self.Wv = []\n", 154 | " shape = [emb_dim, self.att_dim]\n", 155 | " for i in range(self.n_heads):\n", 156 | " self.Wq.append(self.add_weight(shape=shape, name=f'Wq_{i}'))\n", 157 | " self.Wv.append(self.add_weight(shape=shape, name=f'Wv_{i}'))\n", 158 | " \n", 159 | " def call(self, embeds): # ?,n,d\n", 160 | " heads = []\n", 161 | " for i in range(self.n_heads):\n", 162 | " emb_q = tf.matmul(embeds, self.Wq[i]) # ?,n,att_dim\n", 163 | " emb_v = tf.matmul(embeds, self.Wv[i]) # ?,n,att_dim\n", 164 | " emb = Attention()([emb_q, emb_v])\n", 165 | " heads.append(emb)\n", 166 | " if len(heads) > 1:\n", 167 | " heads = tf.concat(heads, axis=-1)\n", 168 | " else:\n", 169 | " heads = heads[0]\n", 170 | " return heads" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "# 构建模型 (keras函数式)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 52, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "class AutoInt:\n", 194 | " def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n", 195 | " embed_dims=8, cross_layer_num=3, n_atten_layers=2):\n", 196 | " \n", 197 | " # 连续特征\n", 198 | " self.dense_inputs = []\n", 199 | " for feat in dense_feats:\n", 200 | " self.dense_inputs.append(Input(shape=1, name=feat))\n", 201 | " \n", 202 | " # 离散特征\n", 203 | " self.sparse_inputs = []\n", 204 | " for feat in sparse_feats:\n", 205 | " self.sparse_inputs.append(Input(shape=1, name=feat))\n", 206 | " \n", 207 | " self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n", 208 | " self.DenseEmbedding = DenseEmbedding(dense_feats, embed_dims=8)\n", 209 | " \n", 210 | " self.atten_cross_layers = []\n", 211 | " for i in range(n_atten_layers):\n", 212 | " self.atten_cross_layers.append(attention_cross_layer())\n", 213 | " \n", 214 | " self.dense = Dense(1, activation='sigmoid')\n", 215 | " \n", 216 | " def bulid_model(self):\n", 217 | " all_inputs = [self.dense_inputs, self.sparse_inputs]\n", 218 | " \n", 219 | " \n", 220 | " concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n", 221 | " concat_dense_embeds = self.DenseEmbedding(self.dense_inputs)\n", 222 | "\n", 223 | " concat_embeds = Concatenate(axis=1)([concat_sparse_embeds, concat_dense_embeds])\n", 224 | " \n", 225 | " atten_output = concat_embeds\n", 226 | " for layer in self.atten_cross_layers:\n", 227 | " atten_output = layer(atten_output)\n", 228 | " \n", 229 | " # 输出部分\n", 230 | " output = self.dense(Flatten()(atten_output))\n", 231 | " \n", 232 | " model = Model(inputs=all_inputs, outputs=output)\n", 233 | " return model" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 53, 239 | "metadata": { 240 | "scrolled": true 241 | }, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "Epoch 1/3\n", 248 | "1954/1954 [==============================] - 63s 32ms/step - loss: 0.4939 - binary_crossentropy: 0.4939 - auc: 0.7452 - val_loss: 0.4866 - val_binary_crossentropy: 0.4866 - val_auc: 0.7621 - lr: 0.0010\n", 249 | "Epoch 2/3\n", 250 | " 107/1954 [>.............................] - ETA: 54s - loss: 0.4650 - binary_crossentropy: 0.4650 - auc: 0.7802" 251 | ] 252 | }, 253 | { 254 | "ename": "KeyboardInterrupt", 255 | "evalue": "", 256 | "output_type": "error", 257 | "traceback": [ 258 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 259 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 260 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 31\u001b[0m model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n\u001b[1;32m 32\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mval_dense_x_all\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_sparse_x_all\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_label_all\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m callbacks=callbacks, epochs=3)\n\u001b[0m", 261 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 262 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 846\u001b[0m batch_size=batch_size):\n\u001b[1;32m 847\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m \u001b[0;31m# Catch OutOfRangeError for Datasets of unknown size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;31m# This blocks until the batch has finished executing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 263 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 264 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 609\u001b[0m \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 612\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 613\u001b[0m \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 265 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2418\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2419\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2420\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2422\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 266 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 1663\u001b[0m if isinstance(t, (ops.Tensor,\n\u001b[1;32m 1664\u001b[0m resource_variable_ops.BaseResourceVariable))),\n\u001b[0;32m-> 1665\u001b[0;31m self.captured_inputs)\n\u001b[0m\u001b[1;32m 1666\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1667\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 267 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1744\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1745\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[0;32m-> 1746\u001b[0;31m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[1;32m 1747\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m 1748\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 268 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 596\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 597\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 598\u001b[0;31m ctx=ctx)\n\u001b[0m\u001b[1;32m 599\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 600\u001b[0m outputs = execute.execute_with_cancellation(\n", 269 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0;32m---> 60\u001b[0;31m inputs, attrs, num_outputs)\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 270 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "train_data = total_data.loc[:500000-1]\n", 276 | "valid_data = total_data.loc[500000:]\n", 277 | "\n", 278 | "train_dense_x_all = [train_data[f].values for f in dense_feats]\n", 279 | "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n", 280 | "train_label_all = train_data[['label']].values\n", 281 | "\n", 282 | "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n", 283 | "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n", 284 | "val_label_all = valid_data[['label']].values\n", 285 | "\n", 286 | "\n", 287 | "model = AutoInt(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 288 | "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n", 289 | " metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n", 290 | "\n", 291 | "os.makedirs('checkpoints', exist_ok=True)\n", 292 | "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n", 293 | " mode='max', save_weights_only=True)# save_best_only=True\n", 294 | "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n", 295 | "def scheduler(epoch):\n", 296 | " thred = 10\n", 297 | " if epoch < thred:\n", 298 | " return 0.001\n", 299 | " else:\n", 300 | " return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n", 301 | "lr_schedule = LearningRateScheduler(scheduler)\n", 302 | "callbacks = [early_stopping, lr_schedule, checkpoints] # \n", 303 | "\n", 304 | "\n", 305 | "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n", 306 | " validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n", 307 | " callbacks=callbacks, epochs=3)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "Python 3", 321 | "language": "python", 322 | "name": "python3" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": { 326 | "name": "ipython", 327 | "version": 3 328 | }, 329 | "file_extension": ".py", 330 | "mimetype": "text/x-python", 331 | "name": "python", 332 | "nbconvert_exporter": "python", 333 | "pygments_lexer": "ipython3", 334 | "version": "3.7.6" 335 | } 336 | }, 337 | "nbformat": 4, 338 | "nbformat_minor": 4 339 | } 340 | -------------------------------------------------------------------------------- /DCN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tensorflow.keras.layers import *\n", 13 | "import tensorflow.keras.backend as K\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import tensorflow as tf\n", 16 | "from tensorflow.keras.models import Model\n", 17 | "from tensorflow.keras.utils import plot_model\n", 18 | "from tensorflow.keras.callbacks import *\n", 19 | "from sklearn.preprocessing import LabelEncoder\n", 20 | "# from tensorflow.keras.constraints import *\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# 准备数据" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "path = '/disk/share/criteo/'\n", 39 | "data = pd.read_csv(path+'criteo_sampled_data.csv')\n", 40 | "cols = data.columns.values\n", 41 | "\n", 42 | "dense_feats = [f for f in cols if f[0] == \"I\"]\n", 43 | "sparse_feats = [f for f in cols if f[0] == \"C\"]\n", 44 | "\n", 45 | "def process_dense_feats(data, feats):\n", 46 | " d = data.copy()\n", 47 | " d = d[feats].fillna(0.0)\n", 48 | " for f in feats:\n", 49 | " d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n", 50 | " \n", 51 | " return d\n", 52 | "\n", 53 | "data_dense = process_dense_feats(data, dense_feats)\n", 54 | "\n", 55 | "vocab_sizes = {}\n", 56 | "def process_sparse_feats(data, feats):\n", 57 | " d = data.copy()\n", 58 | " d = d[feats].fillna(\"-1\")\n", 59 | " for f in feats:\n", 60 | " label_encoder = LabelEncoder()\n", 61 | " d[f] = label_encoder.fit_transform(d[f])\n", 62 | " vocab_sizes[f] = d[f].nunique() + 1\n", 63 | " return d\n", 64 | "\n", 65 | "data_sparse = process_sparse_feats(data, sparse_feats)\n", 66 | "total_data = pd.concat([data_dense, data_sparse], axis=1)\n", 67 | "total_data['label'] = data['label']" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# 自定义层" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "class SparseEmbedding(Layer):\n", 84 | " def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n", 85 | " super().__init__()\n", 86 | " self.sparse_feats = sparse_feats\n", 87 | " self.vocab_sizes = vocab_sizes\n", 88 | " self.embed_dims = embed_dims\n", 89 | " \n", 90 | " # 离散特征嵌入矩阵\n", 91 | " self.sparse_embeds_mat = []\n", 92 | " for idx, feat in enumerate(self.sparse_feats):\n", 93 | " # reg = tf.keras.regularizers.l2(0.5)\n", 94 | " emb = Embedding(input_dim=self.vocab_sizes[feat],\n", 95 | " output_dim=self.embed_dims,\n", 96 | " # embeddings_regularizer=reg,\n", 97 | " name=f'{feat}_emb')\n", 98 | " self.sparse_embeds_mat.append(emb)\n", 99 | " \n", 100 | " def call(self, sparse_inputs):\n", 101 | " sparse_embeds = []\n", 102 | " for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n", 103 | " emb = emb_mat(sparse_inputs[idx])\n", 104 | " sparse_embeds.append(emb)\n", 105 | " concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n", 106 | " return concat_sparse_embeds" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "class single_cross_layer(Layer):\n", 116 | " def __init__(self):\n", 117 | " super().__init__()\n", 118 | " \n", 119 | " def build(self, input_shape):\n", 120 | " embed_dims = input_shape[0][-1]\n", 121 | " self.w = self.add_weight(shape=[embed_dims, 1], name='w')\n", 122 | " self.b = self.add_weight(shape=[embed_dims, 1], name='b')\n", 123 | " \n", 124 | " def call(self, inputs):\n", 125 | " x0, xl = inputs\n", 126 | " x0 = tf.expand_dims(x0, -1)\n", 127 | " xl = tf.expand_dims(xl, -1)\n", 128 | " x0_xl = tf.matmul(x0, xl, transpose_b=True)\n", 129 | " x_next = tf.matmul(x0_xl, self.w) + xl + self.b\n", 130 | " x_next = tf.squeeze(x_next, axis=-1)\n", 131 | " return x_next" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "class cross_layer(Layer):\n", 141 | " def __init__(self, cross_layer_num):\n", 142 | " super().__init__()\n", 143 | " self.cross_layer_num = cross_layer_num\n", 144 | " self.cross_layers = []\n", 145 | " for i in range(cross_layer_num):\n", 146 | " self.cross_layers.append(single_cross_layer())\n", 147 | " \n", 148 | " def call(self, inputs):\n", 149 | " x0 = inputs\n", 150 | " xl = self.cross_layers[0]([x0, x0])\n", 151 | " for layer in self.cross_layers[1:]:\n", 152 | " xl = layer([x0, xl])\n", 153 | " return xl\n", 154 | " " 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 6, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "class DNN(Layer):\n", 164 | " def __init__(self, hid_units=[256,256,256], use_dropout=True, output_unit=16):\n", 165 | " super().__init__()\n", 166 | " self.hid_units = hid_units\n", 167 | " self.use_dropout = use_dropout\n", 168 | " self.output_unit = output_unit\n", 169 | " self.Dropout = Dropout(0.3)\n", 170 | " self.dense_layers = []\n", 171 | " for unit in self.hid_units:\n", 172 | " self.dense_layers.append(Dense(unit, activation='relu'))\n", 173 | " self.dense_layers.append(Dense(self.output_unit))\n", 174 | " \n", 175 | " def call(self, concat_sparse_embeds):\n", 176 | " flat_sparse_embed = Flatten()(concat_sparse_embeds)\n", 177 | " \n", 178 | " x = self.dense_layers[0](flat_sparse_embed)\n", 179 | " for dense in self.dense_layers[1:]:\n", 180 | " x = dense(x)\n", 181 | " if self.use_dropout:\n", 182 | " x = self.Dropout(x)\n", 183 | " return x" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "# 构建模型 (keras函数式)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 7, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "class DCN:\n", 200 | " def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n", 201 | " embed_dims=8, cross_layer_num=3):\n", 202 | " \n", 203 | " # 连续特征\n", 204 | " self.dense_inputs = []\n", 205 | " for feat in dense_feats:\n", 206 | " self.dense_inputs.append(Input(shape=1, name=feat))\n", 207 | " \n", 208 | " # 离散特征\n", 209 | " self.sparse_inputs = []\n", 210 | " for feat in sparse_feats:\n", 211 | " self.sparse_inputs.append(Input(shape=1, name=feat))\n", 212 | " \n", 213 | " self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n", 214 | " \n", 215 | " self.cross_layer = cross_layer(cross_layer_num)\n", 216 | " \n", 217 | " self.DNN = DNN()\n", 218 | " self.dense = Dense(1, activation='sigmoid')\n", 219 | " \n", 220 | " def bulid_model(self):\n", 221 | " all_inputs = [self.dense_inputs, self.sparse_inputs]\n", 222 | " \n", 223 | " concat_dense_inputs = Concatenate(axis=1)(self.dense_inputs)\n", 224 | " \n", 225 | " concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n", 226 | " flatten_sparse_embeds = Flatten()(concat_sparse_embeds)\n", 227 | " \n", 228 | " concat_inputs = Concatenate(axis=1)([flatten_sparse_embeds, concat_dense_inputs])\n", 229 | " cross_output = self.cross_layer(concat_inputs)\n", 230 | " \n", 231 | " fc_layer_output = self.DNN(concat_sparse_embeds)\n", 232 | " \n", 233 | " # 输出部分\n", 234 | " concat_layer = Concatenate()([cross_output, fc_layer_output])\n", 235 | " output = self.dense(concat_layer)\n", 236 | " \n", 237 | " model = Model(inputs=all_inputs, outputs=output)\n", 238 | " return model" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 19, 244 | "metadata": { 245 | "scrolled": true 246 | }, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "Epoch 1/3\n", 253 | "WARNING:tensorflow:From /root/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", 254 | "Instructions for updating:\n", 255 | "If using Keras pass *_constraint arguments to layers.\n", 256 | "1954/1954 [==============================] - 111s 57ms/step - loss: 0.4835 - binary_crossentropy: 0.4835 - auc: 0.7580 - val_loss: 0.4794 - val_binary_crossentropy: 0.4794 - val_auc: 0.7693 - lr: 0.0010\n", 257 | "Epoch 2/3\n", 258 | "1954/1954 [==============================] - 111s 57ms/step - loss: 0.4586 - binary_crossentropy: 0.4586 - auc: 0.7898 - val_loss: 0.4993 - val_binary_crossentropy: 0.4993 - val_auc: 0.7686 - lr: 0.0010\n", 259 | "Epoch 3/3\n", 260 | "1954/1954 [==============================] - 110s 56ms/step - loss: 0.4238 - binary_crossentropy: 0.4238 - auc: 0.8248 - val_loss: 0.5280 - val_binary_crossentropy: 0.5280 - val_auc: 0.7396 - lr: 0.0010\n" 261 | ] 262 | }, 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "" 267 | ] 268 | }, 269 | "execution_count": 19, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "train_data = total_data.loc[:500000-1]\n", 276 | "valid_data = total_data.loc[500000:]\n", 277 | "\n", 278 | "train_dense_x_all = [train_data[f].values for f in dense_feats]\n", 279 | "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n", 280 | "train_label_all = train_data[['label']].values\n", 281 | "\n", 282 | "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n", 283 | "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n", 284 | "val_label_all = valid_data[['label']].values\n", 285 | "\n", 286 | "\n", 287 | "model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 288 | "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n", 289 | " metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n", 290 | "\n", 291 | "os.makedirs('checkpoints', exist_ok=True)\n", 292 | "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n", 293 | " mode='max', save_weights_only=True)# save_best_only=True\n", 294 | "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n", 295 | "def scheduler(epoch):\n", 296 | " thred = 10\n", 297 | " if epoch < thred:\n", 298 | " return 0.001\n", 299 | " else:\n", 300 | " return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n", 301 | "lr_schedule = LearningRateScheduler(scheduler)\n", 302 | "callbacks = [early_stopping, lr_schedule, checkpoints] # \n", 303 | "\n", 304 | "\n", 305 | "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n", 306 | " validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n", 307 | " callbacks=callbacks, epochs=3)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 16, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "# # 加载模型\n", 317 | "# model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 318 | "# model.load_weights('checkpoints/model.h5')" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.7.6" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 4 350 | } 351 | -------------------------------------------------------------------------------- /DIN_DIEN/DIEN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import json\n", 11 | "import pickle as pkl\n", 12 | "import random\n", 13 | "import gzip\n", 14 | "import tensorflow as tf\n", 15 | "from tensorflow.keras.layers import *\n", 16 | "from tensorflow.keras import Model\n", 17 | "from layers import Dice\n", 18 | "from utils import DataIterator, prepare_data" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "class EmbeddingLayer(Layer):\n", 28 | " def __init__(self, user_count, item_count, cate_count, emb_dim, use_negsampling=False):\n", 29 | " super().__init__()\n", 30 | " self.emb_dim = emb_dim\n", 31 | " self.use_negsampling = use_negsampling\n", 32 | " self.user_emb = Embedding(user_count, self.emb_dim,\n", 33 | " mask_zero=True, name=\"user_emb\")\n", 34 | " self.item_emb = Embedding(item_count, self.emb_dim,\n", 35 | " mask_zero=True, name=\"item_emb\")\n", 36 | " self.cate_emb = Embedding(cate_count, self.emb_dim,\n", 37 | " mask_zero=True, name=\"cate_emb\")\n", 38 | " \n", 39 | " def call(self, user, item, cate, item_his, cate_his,\n", 40 | " noclick_item_his=[], noclick_cate_his=[]):\n", 41 | " user_emb = self.user_emb(user) # (B, D)\n", 42 | " \n", 43 | " # 基本属性embedding:\n", 44 | " item_emb = self.item_emb(item) # (B, D)\n", 45 | " cate_emb = self.cate_emb(cate) # (B, D)\n", 46 | " item_join_emb = Concatenate(-1)([item_emb, cate_emb]) # (B, 2D)\n", 47 | " \n", 48 | " \n", 49 | " # 历史行为序列embedding:\n", 50 | " item_his_emb = self.item_emb(item_his) # (B, T, D)\n", 51 | " cate_his_emb = self.item_emb(cate_his) # (B, T, D)\n", 52 | " item_join_his_emb = Concatenate(-1)([item_his_emb, cate_his_emb]) # (B, T, 2D)\n", 53 | " item_his_emb_sum = tf.reduce_sum(item_join_his_emb, axis=1) # (B, D)\n", 54 | " \n", 55 | " if self.use_negsampling:\n", 56 | " # (B, T, neg_num, D)\n", 57 | " noclick_item_his_emb = self.item_emb(noclick_item_his) \n", 58 | " # (B, T, neg_num, D)\n", 59 | " noclick_cate_his_emb = self.item_emb(noclick_cate_his) \n", 60 | " # (B, T, neg_num, 2D)\n", 61 | " noclick_item_join_his_emb = Concatenate(-1)([noclick_item_his_emb, noclick_cate_his_emb])\n", 62 | " # (B, T, 2D)\n", 63 | " noclick_item_emb_neg_sum = tf.reduce_sum(noclick_item_join_his_emb, axis=2) \n", 64 | " # (B, 2D)\n", 65 | " noclick_item_his_emb_sum = tf.reduce_sum(noclick_item_emb_neg_sum, axis=1) \n", 66 | " # 只取出第一个负样本构成序列,(B, T, 2D)\n", 67 | " noclick_item_join_his_emb = noclick_item_join_his_emb[:, :, 0, :] \n", 68 | " # # (B, T, 2D)\n", 69 | " # noclick_item_join_his_emb = tf.squeeze(noclick_item_join_his_emb, 2)\n", 70 | " \n", 71 | " return user_emb, item_join_emb, \\\n", 72 | " item_join_his_emb, item_his_emb_sum, \\\n", 73 | " noclick_item_join_his_emb, noclick_item_his_emb_sum \n", 74 | " \n", 75 | " return user_emb, item_join_emb, \\\n", 76 | " item_join_his_emb, item_his_emb_sum\n", 77 | " " 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 13, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "class FCLayer(Layer):\n", 87 | " def __init__(self, hid_dims=[80, 40, 2], use_dice=False):\n", 88 | " super().__init__()\n", 89 | " self.hid_dims = hid_dims\n", 90 | " self.use_dice = use_dice\n", 91 | " self.bn = BatchNormalization()\n", 92 | " self.fc = []\n", 93 | " self.dice = []\n", 94 | " for dim in self.hid_dims[:-1]:\n", 95 | " if use_dice:\n", 96 | " self.fc.append(Dense(dim, name=f'dense_{dim}'))\n", 97 | " self.dice.append(Dice())\n", 98 | " else:\n", 99 | " self.fc.append(Dense(dim, activation=\"sigmoid\", \n", 100 | " name=f'dense_{dim}'))\n", 101 | " self.fc.append(Dense(self.hid_dims[-1], name=\"dense_output\"))\n", 102 | " \n", 103 | " def call(self, inputs):\n", 104 | " inputs = self.bn(inputs)\n", 105 | " if self.use_dice:\n", 106 | " fc_out = inputs\n", 107 | " for i in range(len(self.dice)):\n", 108 | " fc_out = self.fc[i](fc_out)\n", 109 | " fc_out = self.dice[i](fc_out)\n", 110 | " fc_out = self.fc[-1](fc_out)\n", 111 | " return fc_out\n", 112 | " else: \n", 113 | " fc_out = self.fc[0](inputs)\n", 114 | " for fc in self.fc[1:]:\n", 115 | " fc_out = fc(fc_out)\n", 116 | " return fc_out" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 14, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# 计算注意力得分\n", 126 | "class DINAttenLayer(Layer):\n", 127 | " def __init__(self, hid_dims=[80, 40, 1]):\n", 128 | " super().__init__()\n", 129 | " self.FCLayer = FCLayer(hid_dims)\n", 130 | " \n", 131 | " def call(self, query, facts, mask):\n", 132 | " \"\"\"\n", 133 | " query: (B, 2D)\n", 134 | " facts: (B, T, 2D)\n", 135 | " mask: (B, T)\n", 136 | " \"\"\"\n", 137 | " mask = tf.equal(mask, tf.ones_like(mask)) # (B, T)\n", 138 | " queries = tf.tile(query, [1, facts.shape[1]]) # (B, 2D*T)\n", 139 | " queries = tf.reshape(queries, [-1, facts.shape[1], facts.shape[2]]) # # (B, T, 2D)\n", 140 | " # (B, T, 2D*4)\n", 141 | " din_all = tf.concat([queries, facts, queries - facts, queries * facts], axis=-1)\n", 142 | " \n", 143 | " fc_out = self.FCLayer(din_all) # (B, T, 1)\n", 144 | " score = fc_out # (B, T, 1)\n", 145 | " key_masks = tf.expand_dims(mask, 2) # (B, T) -> (B, T, 1)\n", 146 | " padding = tf.ones_like(score) * (-2**32 + 1)\n", 147 | " # True的地方为score,否则为极大的负数\n", 148 | " score = tf.where(key_masks, score, padding) # (B, T, 1)\n", 149 | " score = tf.nn.softmax(score) # (B, T, 1)\n", 150 | " \n", 151 | " return score\n", 152 | " " 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 15, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "class AuxTrainLayer(Layer):\n", 162 | " def __init__(self, hid_dims=[100, 50, 1]):\n", 163 | " super().__init__()\n", 164 | " self.clk_fc = FCLayer(hid_dims)\n", 165 | " self.noclk_fc = FCLayer(hid_dims)\n", 166 | " \n", 167 | " def call(self, h_states, click_seq, noclick_seq, mask):\n", 168 | " mask = tf.cast(mask, tf.float32)\n", 169 | " seq_len = click_seq.shape[1] # T-1\n", 170 | " \n", 171 | " clk_input = tf.concat([h_states, click_seq], -1) # (B, T-1, 2D*2)\n", 172 | " clk_prob = tf.sigmoid(self.clk_fc(clk_input)) # (B, T-1, 1)\n", 173 | " # (B, T-1)\n", 174 | " clk_loss = - tf.reshape(tf.math.log(clk_prob), [-1, seq_len]) * mask \n", 175 | " \n", 176 | " noclk_input = tf.concat([h_states, noclick_seq], -1) # (B, T-1, 2D*2)\n", 177 | " noclk_prob = tf.sigmoid(self.clk_fc(noclk_input)) # (B, T-1, 1)\n", 178 | " # (B, T-1)\n", 179 | " noclk_loss = - tf.reshape(tf.math.log(1.0 - noclk_prob), [-1, seq_len]) * mask\n", 180 | " # 不指定axis,则计算全部数值的平均值\n", 181 | " aux_loss = tf.reduce_mean(clk_loss + noclk_loss)\n", 182 | " return aux_loss\n", 183 | " " 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 16, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "class AUGRUCell(Layer):\n", 193 | " def __init__(self, units):\n", 194 | " super().__init__()\n", 195 | " self.units = units\n", 196 | " # 作为一个 RNN 的单元,必须有state_size属性\n", 197 | " # state_size 表示每个时间步输出的维度\n", 198 | " self.state_size = units\n", 199 | " \n", 200 | " \n", 201 | " def build(self, input_shape):\n", 202 | " # 输入数据是一个tupe: (gru_output, atten_scores)\n", 203 | " # 因此,t时刻输入的x_t的维度为:\n", 204 | " dim_xt = input_shape[0][-1]\n", 205 | " \n", 206 | " # 重置门对t时刻输入数据x的权重参数:\n", 207 | " self.W_R_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_R_x')\n", 208 | " # 重置门对t时刻输入隐藏状态state的权重参数:\n", 209 | " self.W_R_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_R_s')\n", 210 | " # 重置门偏置项参数:\n", 211 | " self.W_R_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_R_b')\n", 212 | " \n", 213 | " \n", 214 | " # 更新门对t时刻输入数据x的权重参数:\n", 215 | " self.W_U_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_U_x')\n", 216 | " # 更新门对t时刻输入隐藏状态state的权重参数:\n", 217 | " self.W_U_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_U_s')\n", 218 | " # 更新门偏置项参数:\n", 219 | " self.W_U_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_U_b')\n", 220 | " \n", 221 | " \n", 222 | " # 候选隐藏状态 ~h_t 对t时刻输入数据x的权重参数:\n", 223 | " self.W_H_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_H_x')\n", 224 | " # 候选隐藏状态 ~h_t 对t时刻输入隐藏状态state的权重参数:\n", 225 | " self.W_H_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_H_s')\n", 226 | " # 候选隐藏状态 ~h_t 偏置项参数:\n", 227 | " self.W_H_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_H_b')\n", 228 | " \n", 229 | " \n", 230 | " def call(self, inputs, states):\n", 231 | " x_t, att_score = inputs\n", 232 | " states = states[0]\n", 233 | " \"\"\"\n", 234 | " x_t: x_(t), shape=(B, 2D)\n", 235 | " states: hidden_state_(t-1), shape=(B, units)\n", 236 | " att_score: attention_score_(t), shape=(B, 1)\n", 237 | " \"\"\"\n", 238 | " # 重置门\n", 239 | " r_t = tf.sigmoid(tf.matmul(x_t, self.W_R_x) + tf.matmul(states, self.W_R_s) + self.W_R_b)\n", 240 | " # 更新门\n", 241 | " u_t = tf.sigmoid(tf.matmul(x_t, self.W_U_x) + tf.matmul(states, self.W_U_s) + self.W_U_b)\n", 242 | " # 带有注意力的更新门\n", 243 | " a_u_t = tf.multiply(att_score, u_t)\n", 244 | " # 候选隐藏状态\n", 245 | " _h_t = tf.tanh(tf.matmul(x_t, self.W_H_x) + tf.matmul(tf.multiply(r_t, states), self.W_H_s) \n", 246 | " + self.W_H_b)\n", 247 | " # 输出值\n", 248 | " h_t = tf.multiply(1-a_u_t, states) + tf.multiply(a_u_t, _h_t)\n", 249 | " # 对gru而言,当前时刻的output与传递给下一时刻的state相同\n", 250 | " next_state = h_t\n", 251 | " \n", 252 | " \n", 253 | " return h_t, next_state # 第一个表示output\n", 254 | " \n", 255 | " " 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 17, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "# 得到历史行为的embedding表示\n", 265 | "class DIEN(Model):\n", 266 | " def __init__(self, user_count, item_count, cate_count, EMBEDDING_DIM, \n", 267 | " HIS_LEN = 100, use_negsampling = True, hid_dims=[200, 80, 2]):\n", 268 | " super().__init__()\n", 269 | " \n", 270 | " self.rnn_dim = EMBEDDING_DIM*2\n", 271 | " \n", 272 | " self.EmbLayer = EmbeddingLayer(user_count, item_count, cate_count, \n", 273 | " EMBEDDING_DIM, use_negsampling)\n", 274 | " \n", 275 | " self.GRU = GRU(self.rnn_dim, return_sequences=True)\n", 276 | " self.AuxTrainLayer = AuxTrainLayer()\n", 277 | " self.AttenLayer = DINAttenLayer()\n", 278 | " # self.AUGRU = AUGRU(EMBEDDING_DIM*2, return_state=True)\n", 279 | " self.AUGRU = RNN(AUGRUCell(self.rnn_dim))\n", 280 | " self.FCLayer = FCLayer(hid_dims, use_dice=True)\n", 281 | " \n", 282 | " \n", 283 | " def call(self, user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his):\n", 284 | " # 转 0, 1 为 True, False \n", 285 | " mask_bool = tf.cast(mask, tf.bool)\n", 286 | " # 得到embedding\n", 287 | " embs = self.EmbLayer(user, item, cate, item_his, cate_his, no_m_his, no_c_his)\n", 288 | " # (B, 2D) \n", 289 | " user_emb, item_emb, his_emb, his_emb_sum, noclk_his_emb, noclk_his_emb_sum = embs\n", 290 | " \n", 291 | " \n", 292 | " # 第一层 GRU\n", 293 | " # tf2.2中的大坑:\n", 294 | " # 官方文档中第二个参数为mask,\n", 295 | " # 但是不指定参数名字mask=mask_bool的话,\n", 296 | " # 则mask_bool会当成参数initial_state的值\n", 297 | " gru_output = self.GRU(his_emb, mask=mask_bool) # (B, T, 2D)\n", 298 | " # 辅助损失函数\n", 299 | " aux_loss = self.AuxTrainLayer(gru_output[:, :-1, :], \n", 300 | " his_emb[:, 1:, :],\n", 301 | " noclk_his_emb[:, 1:, :],\n", 302 | " mask[:, 1:]) # (B,)\n", 303 | " \n", 304 | " # 计算目标item与历史item的attention分数\n", 305 | " atten_scores = self.AttenLayer(item_emb, gru_output, mask) # (B, T, 1)\n", 306 | " \n", 307 | " # AUGRU\n", 308 | " behavior_emb = self.AUGRU((gru_output, atten_scores), mask=mask_bool) # (B, 2D) \n", 309 | " \n", 310 | " # 全连接层\n", 311 | " inp = tf.concat([user_emb, item_emb, his_emb_sum, behavior_emb, \n", 312 | " noclk_his_emb_sum], axis=-1)\n", 313 | " output = self.FCLayer(inp)\n", 314 | " logit = tf.nn.softmax(output)\n", 315 | " return output, logit, aux_loss\n", 316 | " \n", 317 | " def train(self, user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his, target):\n", 318 | " output, _, aux_loss = self.call(user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his)\n", 319 | " loss = tf.keras.losses.categorical_crossentropy(target, output, from_logits=False)\n", 320 | " loss = tf.reduce_mean(loss)\n", 321 | " return loss, aux_loss\n", 322 | " \n", 323 | " def predict(self, user, item, cate, item_his, cate_his, mask):\n", 324 | " _, pred, _ = self.call(user, item, cate, item_his, cate_his, mask)\n", 325 | " return pred" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 18, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "base_path = \"data/\"\n", 335 | "train_file = base_path + \"local_train_splitByUser\"\n", 336 | "test_file = base_path + \"local_test_splitByUser\"\n", 337 | "uid_voc = base_path + \"uid_voc.pkl\"\n", 338 | "mid_voc = base_path + \"mid_voc.pkl\"\n", 339 | "cat_voc = base_path + \"cat_voc.pkl\"\n", 340 | "batch_size = 128\n", 341 | "maxlen = 100\n", 342 | "\n", 343 | "train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, \n", 344 | " batch_size, maxlen, shuffle_each_epoch=False)\n", 345 | "\n", 346 | "n_uid, n_mid, n_cat = train_data.get_n() # 用户数,电影数,类别数" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 20, 352 | "metadata": { 353 | "scrolled": true 354 | }, 355 | "outputs": [ 356 | { 357 | "name": "stdout", 358 | "output_type": "stream", 359 | "text": [ 360 | "batch 0 loss 2.068039, aux loss 1.479113\n", 361 | "batch 10 loss 0.737447, aux loss 1.313956\n", 362 | "batch 20 loss 0.695979, aux loss 1.392124\n", 363 | "batch 30 loss 0.693791, aux loss 1.234638\n", 364 | "batch 40 loss 0.694052, aux loss 1.386228\n", 365 | "batch 50 loss 0.693868, aux loss 1.237043\n", 366 | "batch 60 loss 0.695745, aux loss 1.386409\n", 367 | "batch 70 loss 0.691482, aux loss 1.265594\n", 368 | "batch 80 loss 0.695219, aux loss 1.385818\n", 369 | "batch 90 loss 0.693376, aux loss 1.352869\n", 370 | "batch 100 loss 0.694469, aux loss 1.383801\n", 371 | "batch 110 loss 0.694890, aux loss 1.273229\n", 372 | "batch 120 loss 0.699449, aux loss 1.385453\n", 373 | "batch 130 loss 0.694843, aux loss 1.351792\n", 374 | "batch 140 loss 0.698159, aux loss 1.382274\n", 375 | "batch 150 loss 0.689563, aux loss 1.388619\n", 376 | "batch 160 loss 0.691783, aux loss 1.387143\n", 377 | "batch 170 loss 0.695458, aux loss 1.229023\n", 378 | "batch 180 loss 0.691120, aux loss 1.390311\n", 379 | "batch 190 loss 0.693030, aux loss 1.242190\n", 380 | "batch 200 loss 0.694106, aux loss 1.375872\n", 381 | "batch 210 loss 0.690383, aux loss 1.282734\n", 382 | "batch 220 loss 0.691290, aux loss 1.378154\n", 383 | "batch 230 loss 0.690240, aux loss 1.293406\n", 384 | "batch 240 loss 0.694030, aux loss 1.379894\n", 385 | "batch 250 loss 0.697263, aux loss 1.302448\n", 386 | "batch 260 loss 0.694523, aux loss 1.380266\n", 387 | "batch 270 loss 0.689354, aux loss 1.306656\n", 388 | "batch 280 loss 0.688328, aux loss 1.382248\n", 389 | "batch 290 loss 0.693213, aux loss 1.270545\n", 390 | "batch 300 loss 0.694994, aux loss 1.380146\n", 391 | "batch 310 loss 0.692642, aux loss 1.236873\n", 392 | "batch 320 loss 0.689740, aux loss 1.382414\n", 393 | "batch 330 loss 0.691055, aux loss 1.355132\n", 394 | "batch 340 loss 0.690439, aux loss 1.378826\n", 395 | "batch 350 loss 0.696317, aux loss 1.305768\n", 396 | "batch 360 loss 0.692655, aux loss 1.380104\n", 397 | "batch 370 loss 0.683240, aux loss 1.282599\n", 398 | "batch 380 loss 0.694989, aux loss 1.376244\n", 399 | "batch 390 loss 0.693351, aux loss 1.324468\n", 400 | "batch 400 loss 0.686526, aux loss 1.380373\n", 401 | "batch 410 loss 0.690881, aux loss 1.349163\n", 402 | "batch 420 loss 0.686478, aux loss 1.374557\n", 403 | "batch 430 loss 0.684616, aux loss 1.378559\n", 404 | "batch 440 loss 0.680370, aux loss 1.373135\n", 405 | "batch 450 loss 0.672189, aux loss 1.330817\n", 406 | "batch 460 loss 0.704011, aux loss 1.378112\n", 407 | "batch 470 loss 0.666249, aux loss 1.338828\n", 408 | "batch 480 loss 0.686237, aux loss 1.386753\n", 409 | "batch 490 loss 0.672048, aux loss 1.237627\n", 410 | "batch 500 loss 0.689416, aux loss 1.375439\n", 411 | "batch 510 loss 0.679942, aux loss 1.274050\n", 412 | "batch 520 loss 0.684325, aux loss 1.384659\n", 413 | "batch 530 loss 0.684855, aux loss 1.281043\n", 414 | "batch 540 loss 0.660100, aux loss 1.394137\n", 415 | "batch 550 loss 0.687806, aux loss 1.240736\n", 416 | "batch 560 loss 0.690218, aux loss 1.380075\n", 417 | "batch 570 loss 0.673716, aux loss 1.320786\n", 418 | "batch 580 loss 0.693648, aux loss 1.368184\n", 419 | "batch 590 loss 0.696522, aux loss 1.289395\n", 420 | "batch 600 loss 0.679908, aux loss 1.387056\n", 421 | "batch 610 loss 0.709319, aux loss 1.291360\n", 422 | "batch 620 loss 0.686557, aux loss 1.369841\n", 423 | "batch 630 loss 0.674588, aux loss 1.263717\n", 424 | "batch 640 loss 0.692565, aux loss 1.350408\n", 425 | "batch 650 loss 0.683875, aux loss 1.221066\n", 426 | "batch 660 loss 0.688051, aux loss 1.356015\n", 427 | "batch 670 loss 0.674489, aux loss 1.261728\n", 428 | "batch 680 loss 0.682079, aux loss 1.373235\n", 429 | "batch 690 loss 0.678413, aux loss 1.279788\n", 430 | "batch 700 loss 0.696760, aux loss 1.372520\n", 431 | "batch 710 loss 0.681217, aux loss 1.269751\n", 432 | "batch 720 loss 0.677947, aux loss 1.359509\n", 433 | "batch 730 loss 0.668031, aux loss 1.331041\n", 434 | "batch 740 loss 0.666014, aux loss 1.342387\n", 435 | "batch 750 loss 0.682697, aux loss 1.269510\n", 436 | "batch 760 loss 0.656295, aux loss 1.341351\n", 437 | "batch 770 loss 0.707546, aux loss 1.251843\n", 438 | "batch 780 loss 0.674079, aux loss 1.345248\n", 439 | "batch 790 loss 0.657360, aux loss 1.243539\n", 440 | "batch 800 loss 0.652130, aux loss 1.358508\n", 441 | "batch 810 loss 0.663262, aux loss 1.269382\n", 442 | "batch 820 loss 0.660223, aux loss 1.349433\n", 443 | "batch 830 loss 0.662249, aux loss 1.220359\n", 444 | "batch 840 loss 0.682940, aux loss 1.371676\n", 445 | "batch 850 loss 0.671639, aux loss 1.279638\n", 446 | "batch 860 loss 0.690707, aux loss 1.366297\n", 447 | "batch 870 loss 0.664772, aux loss 1.311729\n", 448 | "batch 880 loss 0.653953, aux loss 1.337097\n", 449 | "batch 890 loss 0.645285, aux loss 1.337443\n", 450 | "batch 900 loss 0.689522, aux loss 1.330288\n", 451 | "batch 910 loss 0.658664, aux loss 1.241068\n", 452 | "batch 920 loss 0.677607, aux loss 1.334146\n", 453 | "batch 930 loss 0.654895, aux loss 1.277277\n", 454 | "batch 940 loss 0.667736, aux loss 1.347373\n", 455 | "batch 950 loss 0.640414, aux loss 1.171708\n", 456 | "batch 960 loss 0.687433, aux loss 1.355712\n", 457 | "batch 970 loss 0.661177, aux loss 1.277527\n", 458 | "batch 980 loss 0.672078, aux loss 1.361447\n", 459 | "batch 990 loss 0.664237, aux loss 1.194987\n", 460 | "batch 1000 loss 0.708602, aux loss 1.360277\n" 461 | ] 462 | } 463 | ], 464 | "source": [ 465 | "model = DIEN(n_uid, n_mid, n_cat, 16)\n", 466 | "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)\n", 467 | "\n", 468 | "# 训练模型\n", 469 | "for i, (src, tgt) in enumerate(train_data):\n", 470 | " data = prepare_data(src, tgt, maxlen=100, return_neg=True)\n", 471 | " uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, no_m_his, no_c_his = data\n", 472 | " with tf.GradientTape() as tape:\n", 473 | " loss, aux_loss = model.train(uids, mids, cats, mid_his, cat_his, \n", 474 | " mid_mask, no_m_his, no_c_his, target)\n", 475 | " if i%10 == 0:\n", 476 | " print(\"batch %d loss %f, aux loss %f\" % (i, loss.numpy(), aux_loss.numpy()))\n", 477 | " \n", 478 | " loss = loss + aux_loss\n", 479 | " grads = tape.gradient(loss, model.trainable_variables)\n", 480 | " optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))\n", 481 | " \n", 482 | " if i == 1000:\n", 483 | " break\n" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [] 506 | } 507 | ], 508 | "metadata": { 509 | "kernelspec": { 510 | "display_name": "Python 3", 511 | "language": "python", 512 | "name": "python3" 513 | }, 514 | "language_info": { 515 | "codemirror_mode": { 516 | "name": "ipython", 517 | "version": 3 518 | }, 519 | "file_extension": ".py", 520 | "mimetype": "text/x-python", 521 | "name": "python", 522 | "nbconvert_exporter": "python", 523 | "pygments_lexer": "ipython3", 524 | "version": "3.7.6" 525 | } 526 | }, 527 | "nbformat": 4, 528 | "nbformat_minor": 4 529 | } 530 | -------------------------------------------------------------------------------- /DIN_DIEN/DIN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import json\n", 11 | "import pickle as pkl\n", 12 | "import random\n", 13 | "import gzip\n", 14 | "import tensorflow as tf\n", 15 | "from tensorflow.keras.layers import *\n", 16 | "from tensorflow.keras import Model\n", 17 | "from layers import Dice\n", 18 | "from utils import DataIterator, prepare_data" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "class EmbeddingLayer(Layer):\n", 28 | " def __init__(self, user_count, item_count, cate_count, emb_dim, use_negsampling=False):\n", 29 | " super().__init__()\n", 30 | " self.emb_dim = emb_dim\n", 31 | " self.use_negsampling = use_negsampling\n", 32 | " self.user_emb = Embedding(user_count, self.emb_dim, name=\"user_emb\")\n", 33 | " self.item_emb = Embedding(item_count, self.emb_dim, name=\"item_emb\")\n", 34 | " self.cate_emb = Embedding(cate_count, self.emb_dim, name=\"cate_emb\")\n", 35 | " \n", 36 | " def call(self, user, item, cate, item_his, cate_his,\n", 37 | " noclick_item_his=[], noclick_cate_hiss=[]):\n", 38 | " user_emb = self.user_emb(user) # (B, D)\n", 39 | " \n", 40 | " # 基本属性embedding:\n", 41 | " item_emb = self.item_emb(item) # (B, D)\n", 42 | " cate_emb = self.cate_emb(cate) # (B, D)\n", 43 | " item_join_emb = Concatenate(-1)([item_emb, cate_emb]) # (B, 2D)\n", 44 | " \n", 45 | " \n", 46 | " # 历史行为序列embedding:\n", 47 | " item_his_emb = self.item_emb(item_his) # (B, T, D)\n", 48 | " cate_his_emb = self.item_emb(cate_his) # (B, T, D)\n", 49 | " item_join_his_emb = Concatenate(-1)([item_his_emb, cate_his_emb]) # (B, T, 2D)\n", 50 | " item_his_emb_sum = tf.reduce_sum(item_join_his_emb, axis=1) # (B, D)\n", 51 | " \n", 52 | " if self.use_negsampling:\n", 53 | " # (B, T, neg_num, D)\n", 54 | " noclick_item_his_emb = self.item_emb(noclick_item_his) \n", 55 | " # (B, T, neg_num, D)\n", 56 | " noclick_cate_his_emb = self.item_emb(noclick_cate_his) \n", 57 | " # (B, T, neg_num, 2D)\n", 58 | " noclick_item_join_his_emb = Concatenate(-1)([noclick_item_his_emb, noclick_cate_his_emb])\n", 59 | " # (B, T, 2D)\n", 60 | " noclick_item_emb_neg_sum = tf.reduce_sum(noclick_item_join_his_emb, axis=2) \n", 61 | " # (B, 2D)\n", 62 | " noclick_item_his_emb_sum = tf.reduce_sum(noclick_item_emb_neg_sum, axis=1) \n", 63 | " \n", 64 | " return user_emb, item_join_emb, \\\n", 65 | " item_join_his_emb, item_his_emb_sum, \\\n", 66 | " noclick_item_join_his_emb, noclick_item_his_emb_sum \n", 67 | " \n", 68 | " return user_emb, item_join_emb, \\\n", 69 | " item_join_his_emb, item_his_emb_sum\n", 70 | " " 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "class FCLayer(Layer):\n", 80 | " def __init__(self, hid_dims=[80, 40, 2], use_dice=False):\n", 81 | " super().__init__()\n", 82 | " self.hid_dims = hid_dims\n", 83 | " self.use_dice = use_dice\n", 84 | " self.fc = []\n", 85 | " self.dice = []\n", 86 | " for dim in self.hid_dims[:-1]:\n", 87 | " if use_dice:\n", 88 | " self.fc.append(Dense(dim, name=f'dense_{dim}'))\n", 89 | " self.dice.append(Dice())\n", 90 | " else:\n", 91 | " self.fc.append(Dense(dim, activation=\"sigmoid\", \n", 92 | " name=f'dense_{dim}'))\n", 93 | " self.fc.append(Dense(self.hid_dims[-1], name=\"dense_output\"))\n", 94 | " \n", 95 | " def call(self, inputs):\n", 96 | " if self.use_dice:\n", 97 | " fc_out = inputs\n", 98 | " for i in range(len(self.dice)):\n", 99 | " fc_out = self.fc[i](fc_out)\n", 100 | " fc_out = self.dice[i](fc_out)\n", 101 | " fc_out = self.fc[-1](fc_out)\n", 102 | " return fc_out\n", 103 | " else: \n", 104 | " fc_out = self.fc[0](inputs)\n", 105 | " for fc in self.fc[1:]:\n", 106 | " fc_out = fc(fc_out)\n", 107 | " return fc_out" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# 计算注意力得分\n", 117 | "class DINAttenLayer(Layer):\n", 118 | " def __init__(self, hid_dims=[80, 40, 1]):\n", 119 | " super().__init__()\n", 120 | " self.FCLayer = FCLayer(hid_dims)\n", 121 | " \n", 122 | " def call(self, query, facts, mask):\n", 123 | " \"\"\"\n", 124 | " query: (B, 2D)\n", 125 | " facts: (B, T, 2D)\n", 126 | " mask: (B, T)\n", 127 | " \"\"\"\n", 128 | " mask = tf.equal(mask, tf.ones_like(mask)) # (B, T)\n", 129 | " queries = tf.tile(query, [1, facts.shape[1]]) # (B, 2D*T)\n", 130 | " queries = tf.reshape(queries, [-1, facts.shape[1], facts.shape[2]]) # # (B, T, 2D)\n", 131 | " # print(\"queries\", queries.shape)\n", 132 | " # (B, T, 2D*4)\n", 133 | " din_all = tf.concat([queries, facts, queries - facts, queries * facts], axis=-1)\n", 134 | " \n", 135 | " fc_out = self.FCLayer(din_all) # (B, T, 1)\n", 136 | " score = fc_out # (B, T, 1)\n", 137 | " score = tf.reshape(score, [-1, 1, facts.shape[1]]) # (B, 1, T)\n", 138 | " \n", 139 | " key_masks = tf.expand_dims(mask, 1) # (B, 1, T)\n", 140 | " padding = tf.ones_like(score) * (-2**32 + 1)\n", 141 | " # True的地方为score,否则为极大的负数\n", 142 | " score = tf.where(key_masks, score, padding) # (B, 1, T)\n", 143 | " score = tf.nn.softmax(score)\n", 144 | " \n", 145 | " output = tf.matmul(score, facts) # (B, 1, 2D)\n", 146 | " output = tf.squeeze(output, 1) # (B, 2D)\n", 147 | " return output\n", 148 | " " 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 5, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# 得到历史行为的embedding表示\n", 158 | "class DIN(Model):\n", 159 | " def __init__(self, user_count, item_count, cate_count, EMBEDDING_DIM, \n", 160 | " HIS_LEN = 100, use_negsampling = False, hid_dims=[200, 80, 2]):\n", 161 | " super().__init__()\n", 162 | " self.EmbLayer = EmbeddingLayer(user_count, item_count, cate_count, \n", 163 | " EMBEDDING_DIM, use_negsampling)\n", 164 | " self.AttenLayer = DINAttenLayer()\n", 165 | " self.FCLayer = FCLayer(hid_dims, use_dice=True)\n", 166 | " \n", 167 | " \n", 168 | " def call(self, user, item, cate, item_his, cate_his, mask):\n", 169 | " # 得到embedding\n", 170 | " embs = self.EmbLayer(user, item, cate, item_his, cate_his)\n", 171 | " # (B, 2D) \n", 172 | " user_emb, item_join_emb, item_join_his_emb, item_his_emb_sum = embs\n", 173 | " # 计算目标item与历史item的attention分数,然后加权求和,得到最终的embedding\n", 174 | " behavior_emb = self.AttenLayer(item_join_emb, item_join_his_emb, mask) # (B, 2D)\n", 175 | " \n", 176 | " # 全连接层\n", 177 | " inp = tf.concat([user_emb, item_join_emb, item_his_emb_sum, \n", 178 | " item_his_emb_sum, behavior_emb], axis=-1)\n", 179 | " output = self.FCLayer(inp)\n", 180 | " # logit = tf.nn.softmax(output)\n", 181 | " return output # , logit\n", 182 | " " 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 6, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "base_path = \"data/\"\n", 192 | "train_file = base_path + \"local_train_splitByUser\"\n", 193 | "test_file = base_path + \"local_test_splitByUser\"\n", 194 | "uid_voc = base_path + \"uid_voc.pkl\"\n", 195 | "mid_voc = base_path + \"mid_voc.pkl\"\n", 196 | "cat_voc = base_path + \"cat_voc.pkl\"\n", 197 | "batch_size = 128\n", 198 | "maxlen = 100\n", 199 | "\n", 200 | "train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, \n", 201 | " batch_size, maxlen, shuffle_each_epoch=False)\n", 202 | "\n", 203 | "n_uid, n_mid, n_cat = train_data.get_n() # 用户数,电影数,类别数" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 7, 209 | "metadata": { 210 | "scrolled": true 211 | }, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "batch 0 loss 3.620382\n", 218 | "batch 100 loss 0.689255\n", 219 | "batch 200 loss 0.671629\n", 220 | "batch 300 loss 0.659545\n", 221 | "batch 400 loss 0.709612\n", 222 | "batch 500 loss 0.655639\n", 223 | "batch 600 loss 0.636441\n" 224 | ] 225 | }, 226 | { 227 | "ename": "KeyboardInterrupt", 228 | "evalue": "", 229 | "output_type": "error", 230 | "traceback": [ 231 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 232 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 233 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0muids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcat_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_mask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcat_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_mask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlosses\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcategorical_crossentropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 234 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 966\u001b[0m with base_layer_utils.autocast_context_manager(\n\u001b[1;32m 967\u001b[0m self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 235 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, user, item, cate, item_his, cate_his, mask)\u001b[0m\n\u001b[1;32m 21\u001b[0m inp = tf.concat([user_emb, item_join_emb, item_his_emb_sum, \n\u001b[1;32m 22\u001b[0m item_his_emb_sum, behavior_emb], axis=-1)\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFCLayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0;31m# logit = tf.nn.softmax(output)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;31m# , logit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 236 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 966\u001b[0m with base_layer_utils.autocast_context_manager(\n\u001b[1;32m 967\u001b[0m self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 237 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdice\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfc_out\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 238 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 966\u001b[0m with base_layer_utils.autocast_context_manager(\n\u001b[1;32m 967\u001b[0m self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 239 | "\u001b[0;32m~/Workspace/tensorflow2/DIN_DIEN/layers.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, _x)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;31m# 标准化后使用 sigmoid 函数得到 x_p\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mx_p\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msigmoid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_normed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malpha\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1.0\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mx_p\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0m_x\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mx_p\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0m_x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 240 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36mbinary_op_wrapper\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 982\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_scope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 983\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 984\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 985\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msparse_tensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSparseTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 986\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 241 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36m_add_dispatch\u001b[0;34m(x, y, name)\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgen_math_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgen_math_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_v2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1278\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 242 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py\u001b[0m in \u001b[0;36madd_v2\u001b[0;34m(x, y, name)\u001b[0m\n\u001b[1;32m 469\u001b[0m _result = pywrap_tfe.TFE_Py_FastPathExecute(\n\u001b[1;32m 470\u001b[0m \u001b[0m_ctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_context_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"AddV2\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 471\u001b[0;31m tld.op_callbacks, x, y)\n\u001b[0m\u001b[1;32m 472\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 473\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0m_core\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_FallbackException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 243 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "model = DIN(n_uid, n_mid, n_cat, 8)\n", 249 | "optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n", 250 | "\n", 251 | "# 训练模型\n", 252 | "for i, (src, tgt) in enumerate(train_data):\n", 253 | " data = prepare_data(src, tgt, maxlen=100, return_neg=False)\n", 254 | " uids, mids, cats, mid_his, cat_his, mid_mask, target, sl = data\n", 255 | " with tf.GradientTape() as tape:\n", 256 | " output = model(uids, mids, cats, mid_his, cat_his, mid_mask)\n", 257 | " loss = tf.keras.losses.categorical_crossentropy(target, output)\n", 258 | " loss = tf.reduce_mean(loss)\n", 259 | " if i%100 == 0:\n", 260 | " print(\"batch %d loss %f\" % (i, loss.numpy()))\n", 261 | " grads = tape.gradient(loss, model.variables)\n", 262 | " optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))\n", 263 | " \n", 264 | " if i == 1000:\n", 265 | " break\n" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [] 295 | } 296 | ], 297 | "metadata": { 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.7.6" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 4 318 | } 319 | -------------------------------------------------------------------------------- /DIN_DIEN/data/get_data.txt: -------------------------------------------------------------------------------- 1 | https://github.com/mouna99/dien -------------------------------------------------------------------------------- /DIN_DIEN/layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import * 3 | 4 | class Dice(Layer): 5 | def __init__(self, axis=-1, epsilon = 1e-10, name=""): 6 | super().__init__() 7 | self.axis = axis 8 | self.epsilon = epsilon 9 | 10 | def build(self, input_shape): 11 | rand = tf.random_normal_initializer()(shape=[input_shape[-1]]) 12 | self.alpha = tf.Variable(rand, dtype=tf.float32, name="alpha") 13 | 14 | 15 | def call(self, _x): 16 | # 输入数据的各个轴的维度 17 | input_shape = list(_x.get_shape()) 18 | 19 | # 需要进行reduce计算的轴 20 | reduction_axes = list(range(len(input_shape))) 21 | del reduction_axes[self.axis] 22 | 23 | # 能进行广播运算所需要的shape 24 | # shape: (1, _x.shape[axis]) 25 | broadcast_shape = [1] * len(input_shape) 26 | broadcast_shape[self.axis] = input_shape[self.axis] 27 | 28 | # 除了axis轴外所有数算均值 29 | # shape: (_x.shape[axis], ) 30 | mean = tf.reduce_mean(_x, axis=reduction_axes) 31 | # 然后还原为_x原来的维度,并且在axis轴进行广播 32 | # shape: (1, _x.shape[axis]) 33 | brodcast_mean = tf.reshape(mean, broadcast_shape) 34 | 35 | # 除了axis轴外所有数算平方差 36 | # shape: (_x.shape[axis], ) 37 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + self.epsilon, axis=reduction_axes) 38 | # 算标准差 39 | std = tf.sqrt(std) 40 | # 然后还原为_x原来的维度,并且在axis轴进行广播 41 | # shape: (1, _x.shape[axis]) 42 | brodcast_std = tf.reshape(std, broadcast_shape) 43 | 44 | # 标准化,_x的shape不变 45 | x_normed = (_x - brodcast_mean) / (brodcast_std + self.epsilon) 46 | 47 | # # 以上操作可用下面的一句话代替: 48 | # x_normed = BatchNormalization(center=False, scale=False)(_x) 49 | 50 | # 标准化后使用 sigmoid 函数得到 x_p 51 | x_p = tf.sigmoid(x_normed) 52 | return self.alpha * (1.0 - x_p) * _x + x_p * _x -------------------------------------------------------------------------------- /DIN_DIEN/utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import json 3 | import pickle as pkl 4 | import random 5 | import gzip 6 | 7 | 8 | class DataIterator: 9 | def __init__(self, source, 10 | uid_voc, 11 | mid_voc, 12 | cat_voc, 13 | batch_size=128, 14 | maxlen=100, 15 | skip_empty=False, 16 | shuffle_each_epoch=False, 17 | sort_by_length=True, 18 | max_batch_size=20, 19 | minlen=None): 20 | 21 | self.source = self.fopen(source, 'r') 22 | self.source_dicts = [] 23 | # 用户、电影、类别对应的id 24 | for source_dict in [uid_voc, mid_voc, cat_voc]: 25 | self.source_dicts.append(self.load_dict(source_dict)) 26 | 27 | # item-info 存放每个电影id对应的类别,例如books 28 | f_meta = open("data/item-info", "r") 29 | meta_map = {} # 电影id -> 类别名称 30 | for line in f_meta: 31 | arr = line.strip().split("\t") 32 | if arr[0] not in meta_map: 33 | meta_map[arr[0]] = arr[1] 34 | self.meta_id_map ={} # 电影id -> 类别id 35 | for key in meta_map: 36 | val = meta_map[key] 37 | if key in self.source_dicts[1]: 38 | mid_idx = self.source_dicts[1][key] 39 | else: 40 | mid_idx = 0 41 | if val in self.source_dicts[2]: 42 | cat_idx = self.source_dicts[2][val] 43 | else: 44 | cat_idx = 0 45 | self.meta_id_map[mid_idx] = cat_idx 46 | 47 | f_review = open("data/reviews-info", "r") 48 | self.mid_list_for_random = [] # 用于随机访问的电影列表 49 | for line in f_review: 50 | arr = line.strip().split("\t") 51 | tmp_idx = 0 52 | if arr[1] in self.source_dicts[1]: 53 | tmp_idx = self.source_dicts[1][arr[1]] 54 | self.mid_list_for_random.append(tmp_idx) 55 | 56 | self.batch_size = batch_size 57 | self.maxlen = maxlen 58 | self.minlen = minlen 59 | self.skip_empty = skip_empty 60 | 61 | self.n_uid = len(self.source_dicts[0]) # 用户数量 62 | self.n_mid = len(self.source_dicts[1]) # 电影数量 63 | self.n_cat = len(self.source_dicts[2]) # 类别数量 64 | 65 | self.shuffle = shuffle_each_epoch 66 | self.sort_by_length = sort_by_length 67 | 68 | self.source_buffer = [] 69 | self.k = batch_size * max_batch_size 70 | 71 | self.end_of_data = False 72 | 73 | def get_n(self): 74 | return self.n_uid, self.n_mid, self.n_cat 75 | 76 | def __iter__(self): 77 | return self 78 | 79 | def reset(self): 80 | if self.shuffle: 81 | self.source= shuffle.main(self.source_orig, temporary=True) 82 | else: 83 | self.source.seek(0) 84 | 85 | def __next__(self): 86 | if self.end_of_data: 87 | self.end_of_data = False 88 | self.reset() 89 | raise StopIteration 90 | 91 | source = [] 92 | target = [] 93 | 94 | if len(self.source_buffer) == 0: 95 | for k_ in range(self.k): 96 | ss = self.source.readline() 97 | if ss == "": 98 | break 99 | # label、用户名、目标item、 目标item类别、历史item、历史item对应类别; 100 | self.source_buffer.append(ss.strip("\n").split("\t")) 101 | 102 | # sort by history behavior length 103 | if self.sort_by_length: 104 | his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer]) 105 | tidx = his_length.argsort() 106 | 107 | _sbuf = [self.source_buffer[i] for i in tidx] 108 | self.source_buffer = _sbuf 109 | else: 110 | self.source_buffer.reverse() 111 | 112 | if len(self.source_buffer) == 0: 113 | self.end_of_data = False 114 | self.reset() 115 | raise StopIteration 116 | 117 | try: 118 | 119 | # actual work here 120 | while True: 121 | 122 | # read from source file and map to word index 123 | try: 124 | ss = self.source_buffer.pop() 125 | except IndexError: 126 | break 127 | 128 | # source_dicts包含了用户、电影、类别字典 129 | # ss包含:label、用户名、目标item、 目标item类别、历史item、历史item对应类别 130 | uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0 131 | mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0 132 | cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0 133 | tmp = [] 134 | for fea in ss[4].split(""): 135 | m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0 136 | tmp.append(m) 137 | mid_list = tmp # 历史电影id 138 | 139 | tmp1 = [] 140 | for fea in ss[5].split(""): 141 | c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0 142 | tmp1.append(c) 143 | cat_list = tmp1 # 历史类别id 144 | 145 | # read from source file and map to word index 146 | 147 | #if len(mid_list) > self.maxlen: 148 | # continue 149 | if self.minlen != None: 150 | if len(mid_list) <= self.minlen: 151 | continue 152 | if self.skip_empty and (not mid_list): 153 | continue 154 | 155 | """ 156 | 一个一个从mid_list(历史行为序列)中拿movie id(pos_mid) , 157 | 然后在mid_list_for_randoms随机抽取一个位置(对应一个movie id即noclk_mid), 158 | 然后看看pos_mid和noclk_mid是不是一样,一样就跳过,不一样就可以作为一个负样本啦, 159 | 同时看到抽取个数大于5就结束了,所以都是抽出5个负样本。 160 | 注意抽取的负样本noclk_mid_list是一个二维列表[length,5], 161 | length就是该用户历史行为中点击的商品个数。 162 | """ 163 | 164 | noclk_mid_list = [] 165 | noclk_cat_list = [] 166 | for pos_mid in mid_list: 167 | noclk_tmp_mid = [] 168 | noclk_tmp_cat = [] 169 | noclk_index = 0 170 | while True: 171 | noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1) 172 | noclk_mid = self.mid_list_for_random[noclk_mid_indx] 173 | if noclk_mid == pos_mid: 174 | continue 175 | noclk_tmp_mid.append(noclk_mid) 176 | noclk_tmp_cat.append(self.meta_id_map[noclk_mid]) 177 | noclk_index += 1 178 | if noclk_index >= 5: 179 | break 180 | noclk_mid_list.append(noclk_tmp_mid) 181 | noclk_cat_list.append(noclk_tmp_cat) 182 | source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list]) 183 | target.append([float(ss[0]), 1-float(ss[0])]) 184 | 185 | if len(source) >= self.batch_size or len(target) >= self.batch_size: 186 | break 187 | except IOError: 188 | self.end_of_data = True 189 | 190 | # all sentence pairs in maxibatch filtered out because of length 191 | if len(source) == 0 or len(target) == 0: 192 | source, target = self.next() 193 | 194 | return source, target 195 | # return list(map(numpy.array, list(zip(*source)))), numpy.array(target) 196 | 197 | 198 | def load_dict(self, filename): 199 | try: 200 | with open(filename, 'rb') as f: 201 | return json.load(f) 202 | # return unicode_to_utf8(json.load(f)) 203 | except: 204 | with open(filename, 'rb') as f: 205 | return pkl.load(f) 206 | # return unicode_to_utf8(pkl.load(f)) 207 | 208 | 209 | def fopen(self, filename, mode='r'): 210 | if filename.endswith('.gz'): 211 | return gzip.open(filename, mode) 212 | return open(filename, mode) 213 | 214 | 215 | # 将行为序列长度调整到100, 不足的用0填充 216 | def prepare_data(input, target, maxlen = None, return_neg = False): 217 | # input: [(uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list)] 218 | lengths_x = [len(s[4]) for s in input] # 真实序列长度 219 | seqs_mid = [inp[3] for inp in input] 220 | seqs_cat = [inp[4] for inp in input] 221 | noclk_seqs_mid = [inp[5] for inp in input] 222 | noclk_seqs_cat = [inp[6] for inp in input] 223 | 224 | if maxlen is not None: 225 | new_seqs_mid = [] 226 | new_seqs_cat = [] 227 | new_noclk_seqs_mid = [] 228 | new_noclk_seqs_cat = [] 229 | new_lengths_x = [] 230 | for l_x, inp in zip(lengths_x, input): 231 | if l_x > maxlen: # 取后100个 232 | new_seqs_mid.append(inp[3][l_x - maxlen:]) 233 | new_seqs_cat.append(inp[4][l_x - maxlen:]) 234 | new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) 235 | new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) 236 | new_lengths_x.append(maxlen) 237 | else: 238 | new_seqs_mid.append(inp[3]) 239 | new_seqs_cat.append(inp[4]) 240 | new_noclk_seqs_mid.append(inp[5]) 241 | new_noclk_seqs_cat.append(inp[6]) 242 | new_lengths_x.append(l_x) 243 | lengths_x = new_lengths_x 244 | seqs_mid = new_seqs_mid 245 | seqs_cat = new_seqs_cat 246 | noclk_seqs_mid = new_noclk_seqs_mid 247 | noclk_seqs_cat = new_noclk_seqs_cat 248 | 249 | if len(lengths_x) < 1: 250 | return None, None, None, None 251 | 252 | n_samples = len(seqs_mid) 253 | maxlen_x = numpy.max(lengths_x) 254 | neg_samples = len(noclk_seqs_mid[0][0]) 255 | 256 | # 不足的部分用 0 填充 257 | mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') 258 | cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') 259 | noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') 260 | noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') 261 | mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32') 262 | for idx, [s_x, s_y, no_sx, no_sy] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat)): 263 | mid_mask[idx, :lengths_x[idx]] = 1. # 真实序列为1,反之为0 264 | mid_his[idx, :lengths_x[idx]] = s_x 265 | cat_his[idx, :lengths_x[idx]] = s_y 266 | noclk_mid_his[idx, :lengths_x[idx], :] = no_sx 267 | noclk_cat_his[idx, :lengths_x[idx], :] = no_sy 268 | 269 | uids = numpy.array([inp[0] for inp in input]) 270 | mids = numpy.array([inp[1] for inp in input]) 271 | cats = numpy.array([inp[2] for inp in input]) 272 | 273 | if return_neg: 274 | return uids, mids, cats, mid_his, cat_his, mid_mask, \ 275 | numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his 276 | 277 | else: 278 | return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x) 279 | -------------------------------------------------------------------------------- /DeepFM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tensorflow.keras.layers import *\n", 13 | "import tensorflow.keras.backend as K\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import tensorflow as tf\n", 16 | "from tensorflow.keras.models import Model\n", 17 | "from tensorflow.keras.utils import plot_model\n", 18 | "from tensorflow.keras.callbacks import *\n", 19 | "from sklearn.preprocessing import LabelEncoder\n", 20 | "# from tensorflow.keras.constraints import *\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# 准备数据" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "path = '/disk/share/criteo/'\n", 39 | "data = pd.read_csv(path+'criteo_sampled_data.csv')\n", 40 | "cols = data.columns.values\n", 41 | "\n", 42 | "dense_feats = [f for f in cols if f[0] == \"I\"]\n", 43 | "sparse_feats = [f for f in cols if f[0] == \"C\"]\n", 44 | "\n", 45 | "def process_dense_feats(data, feats):\n", 46 | " d = data.copy()\n", 47 | " d = d[feats].fillna(0.0)\n", 48 | " for f in feats:\n", 49 | " d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n", 50 | " \n", 51 | " return d\n", 52 | "\n", 53 | "data_dense = process_dense_feats(data, dense_feats)\n", 54 | "\n", 55 | "vocab_sizes = {}\n", 56 | "def process_sparse_feats(data, feats):\n", 57 | " d = data.copy()\n", 58 | " d = d[feats].fillna(\"-1\")\n", 59 | " for f in feats:\n", 60 | " label_encoder = LabelEncoder()\n", 61 | " d[f] = label_encoder.fit_transform(d[f])\n", 62 | " vocab_sizes[f] = d[f].nunique() + 1\n", 63 | " return d\n", 64 | "\n", 65 | "data_sparse = process_sparse_feats(data, sparse_feats)\n", 66 | "total_data = pd.concat([data_dense, data_sparse], axis=1)\n", 67 | "total_data['label'] = data['label']" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# 自定义层" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "class SparseEmbedding(Layer):\n", 84 | " def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n", 85 | " super().__init__()\n", 86 | " # 离散特征嵌入矩阵\n", 87 | " self.sparse_embeds_mat = []\n", 88 | " for idx, feat in enumerate(sparse_feats):\n", 89 | " # reg = tf.keras.regularizers.l2(0.5)\n", 90 | " emb = Embedding(input_dim=vocab_sizes[feat],\n", 91 | " output_dim=embed_dims,\n", 92 | " # embeddings_regularizer=reg,\n", 93 | " name=f'{feat}_emb')\n", 94 | " self.sparse_embeds_mat.append(emb)\n", 95 | " \n", 96 | " def call(self, sparse_inputs):\n", 97 | " # FM 部分\n", 98 | " sparse_embeds = []\n", 99 | " for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n", 100 | " emb = emb_mat(sparse_inputs[idx])\n", 101 | " sparse_embeds.append(emb)\n", 102 | " concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n", 103 | " return concat_sparse_embeds" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 4, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "class Linear(Layer):\n", 113 | " def __init__(self, sparse_feats, vocab_sizes):\n", 114 | " super().__init__()\n", 115 | " \n", 116 | " # 离散特1d征嵌入矩阵\n", 117 | " self.sparse_1d_embeds = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=1)\n", 118 | " \n", 119 | " self.fc_dense = Dense(1)\n", 120 | " self.fc_sparse = Dense(1)\n", 121 | " \n", 122 | " def call(self, inputs):\n", 123 | " dense_inputs, sparse_inputs = inputs[0], inputs[1]\n", 124 | " # 线性部分\n", 125 | " concat_dense_inputs = Concatenate(axis=1)(dense_inputs)\n", 126 | " first_order_dense_layer = self.fc_dense(concat_dense_inputs)\n", 127 | " \n", 128 | " concat_sparse_embeds_1d = self.sparse_1d_embeds(sparse_inputs) \n", 129 | " flat_sparse_embeds_1d = Flatten()(concat_sparse_embeds_1d)\n", 130 | " first_order_sparse_layer = self.fc_sparse(flat_sparse_embeds_1d)\n", 131 | " \n", 132 | " linear_output = Add()([first_order_dense_layer, first_order_sparse_layer])\n", 133 | " return linear_output" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 5, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "class FM(Layer):\n", 143 | " def __init__(self):\n", 144 | " super().__init__()\n", 145 | " \n", 146 | " def call(self, concat_sparse_embeds):\n", 147 | " # 先求和再求平方\n", 148 | " sum_embeds = tf.reduce_sum(concat_sparse_embeds, axis=1)\n", 149 | " square_sum_embeds = Multiply()([sum_embeds, sum_embeds])\n", 150 | " # 先平方再求和\n", 151 | " square_embeds = Multiply()([concat_sparse_embeds, concat_sparse_embeds])\n", 152 | " sum_square_embeds = tf.reduce_sum(square_embeds, axis=1)\n", 153 | " # 相减除以2\n", 154 | " sub = 0.5 * Subtract()([square_sum_embeds, sum_square_embeds])\n", 155 | " # 相加\n", 156 | " snd_order_sparse_output = tf.reduce_sum(sub, axis=1, keepdims=True)\n", 157 | " return snd_order_sparse_output" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 6, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "class DNN(Layer):\n", 167 | " def __init__(self, hid_units=[256,256,256], use_dropout=True):\n", 168 | " super().__init__()\n", 169 | " self.use_dropout = use_dropout\n", 170 | " self.Dropout = Dropout(0.3)\n", 171 | " self.dense_layers = []\n", 172 | " for unit in hid_units:\n", 173 | " self.dense_layers.append(Dense(unit, activation='relu'))\n", 174 | " self.dense_layers.append(Dense(1))\n", 175 | " \n", 176 | " def call(self, concat_sparse_embeds):\n", 177 | " flat_sparse_embed = Flatten()(concat_sparse_embeds)\n", 178 | " \n", 179 | " x = self.dense_layers[0](flat_sparse_embed)\n", 180 | " for dense in self.dense_layers[1:]:\n", 181 | " x = dense(x)\n", 182 | " if self.use_dropout:\n", 183 | " x = self.Dropout(x)\n", 184 | " return x" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "# 构建模型 (keras函数式)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 84, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "class DeepFM:\n", 201 | " def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):\n", 202 | " \n", 203 | " # 连续特征\n", 204 | " self.dense_inputs = []\n", 205 | " for feat in dense_feats:\n", 206 | " self.dense_inputs.append(Input(shape=1, name=feat))\n", 207 | " \n", 208 | " # 离散特征\n", 209 | " self.sparse_inputs = []\n", 210 | " for feat in sparse_feats:\n", 211 | " self.sparse_inputs.append(Input(shape=1, name=feat))\n", 212 | " \n", 213 | " self.Linear = Linear(sparse_feats, vocab_sizes)\n", 214 | " self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n", 215 | " self.FM = FM()\n", 216 | " self.DNN = DNN()\n", 217 | " \n", 218 | " def bulid_model(self):\n", 219 | " all_inputs = [self.dense_inputs, self.sparse_inputs]\n", 220 | " \n", 221 | " linear_output = self.Linear(all_inputs)\n", 222 | " concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n", 223 | " snd_order_sparse_output = self.FM(concat_sparse_embeds)\n", 224 | " fc_layer_output = self.DNN(concat_sparse_embeds)\n", 225 | " \n", 226 | " # 输出部分\n", 227 | " output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])\n", 228 | " output = Activation('sigmoid')(output)\n", 229 | " \n", 230 | " model = Model(inputs=all_inputs, outputs=output)\n", 231 | " return model" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 3, 237 | "metadata": { 238 | "scrolled": true 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "train_data = total_data.loc[:500000-1]\n", 243 | "valid_data = total_data.loc[500000:]\n", 244 | "\n", 245 | "train_dense_x_all = [train_data[f].values for f in dense_feats]\n", 246 | "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n", 247 | "train_label_all = train_data[['label']].values\n", 248 | "\n", 249 | "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n", 250 | "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n", 251 | "val_label_all = valid_data[['label']].values" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 14, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "1954/1954 [==============================] - 27s 14ms/step - loss: 0.5171 - binary_crossentropy: 0.5171 - auc: 0.7346 - val_loss: 0.4816 - val_binary_crossentropy: 0.4816 - val_auc: 0.7663 - lr: 0.0010\n" 264 | ] 265 | }, 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "" 270 | ] 271 | }, 272 | "execution_count": 14, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "model = DeepFM(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 279 | "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n", 280 | " metrics=['binary_crossentropy', tf.keras.metrics.AUC()])\n", 281 | "\n", 282 | "os.makedirs('checkpoints/model.h5', exist_ok=True)\n", 283 | "checkpoints = ModelCheckpoint('checkpoints', monitor='val_auc', \n", 284 | " mode='max', save_weights_only=True)# , save_best_only=True\n", 285 | "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=5)\n", 286 | "def scheduler(epoch):\n", 287 | " thred = 10\n", 288 | " if epoch < thred:\n", 289 | " return 0.001\n", 290 | " else:\n", 291 | " return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n", 292 | "lr_schedule = LearningRateScheduler(scheduler)\n", 293 | "callbacks = [checkpoints, early_stopping, lr_schedule]\n", 294 | "\n", 295 | "\n", 296 | "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n", 297 | " validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n", 298 | " callbacks=callbacks, epochs=1)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "# 附:继承 Model 的模型构建方法" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 7, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "class DeepFM(tf.keras.Model):\n", 315 | " def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):\n", 316 | " super().__init__()\n", 317 | " self.dense_feats = dense_feats\n", 318 | " self.sparse_feats = sparse_feats\n", 319 | " self.vocab_sizes = vocab_sizes\n", 320 | " self.embed_dims = embed_dims\n", 321 | " \n", 322 | " self.Linear = Linear(sparse_feats, vocab_sizes)\n", 323 | " self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n", 324 | " self.FM = FM()\n", 325 | " self.DNN = DNN()\n", 326 | " \n", 327 | " \n", 328 | " def call(self, inputs, training=True):\n", 329 | " dense_inputs, sparse_inputs = inputs[0], inputs[1]\n", 330 | " \n", 331 | " linear_output = self.Linear(inputs)\n", 332 | " concat_sparse_embeds = self.SparseEmbedding(sparse_inputs)\n", 333 | " snd_order_sparse_output = self.FM(concat_sparse_embeds)\n", 334 | " fc_layer_output = self.DNN(concat_sparse_embeds)\n", 335 | " \n", 336 | " # 输出部分\n", 337 | " output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])\n", 338 | " output = Activation('sigmoid')(output)\n", 339 | " return output" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 53, 345 | "metadata": { 346 | "scrolled": true 347 | }, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "WARNING:tensorflow:Layer deep_fm_12 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", 354 | "\n", 355 | "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", 356 | "\n", 357 | "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", 358 | "\n", 359 | "train_loss 2.4098854 val_loss 2.483604\n", 360 | "train_loss 1.3211236 val_loss 1.3194331\n", 361 | "train_loss 1.169803 val_loss 1.0407462\n", 362 | "train_loss 0.99219334 val_loss 1.059052\n", 363 | "train_loss 0.8957213 val_loss 0.92486346\n", 364 | "train_loss 0.9512948 val_loss 0.9190863\n", 365 | "train_loss 0.95848316 val_loss 0.8585176\n", 366 | "train_loss 0.8645132 val_loss 0.83732563\n", 367 | "train_loss 0.7037978 val_loss 0.805529\n", 368 | "train_loss 0.67349327 val_loss 0.8104939\n", 369 | "train_loss 0.7041616 val_loss 0.7876595\n", 370 | "train_loss 0.6040318 val_loss 0.7762838\n", 371 | "train_loss 0.6661148 val_loss 0.7730615\n", 372 | "train_loss 0.62362516 val_loss 0.7610869\n", 373 | "train_loss 0.7019628 val_loss 0.7555212\n", 374 | "train_loss 0.66617835 val_loss 0.7523162\n", 375 | "train_loss 0.7438489 val_loss 0.7429388\n", 376 | "train_loss 0.65911984 val_loss 0.73776776\n", 377 | "train_loss 0.7577257 val_loss 0.72724354\n", 378 | "train_loss 0.5841069 val_loss 0.7345725\n", 379 | "train_loss 0.69067764 val_loss 0.72640127\n", 380 | "train_loss 0.70591426 val_loss 0.7106958\n", 381 | "train_loss 0.5872619 val_loss 0.7009569\n", 382 | "train_loss 0.68808687 val_loss 0.7069153\n", 383 | "train_loss 0.71475106 val_loss 0.69875795\n", 384 | "train_loss 0.66280043 val_loss 0.6927632\n", 385 | "train_loss 0.5748261 val_loss 0.70920014\n", 386 | "train_loss 0.51971066 val_loss 0.6790053\n", 387 | "train_loss 0.6533222 val_loss 0.6743224\n", 388 | "train_loss 0.5787353 val_loss 0.6695126\n", 389 | "train_loss 0.6206254 val_loss 0.6677863\n", 390 | "train_loss 0.6333037 val_loss 0.65689474\n", 391 | "train_loss 0.6053113 val_loss 0.65412337\n", 392 | "train_loss 0.5766033 val_loss 0.6488665\n", 393 | "train_loss 0.58827424 val_loss 0.6418533\n", 394 | "train_loss 0.5509889 val_loss 0.6426741\n", 395 | "train_loss 0.586426 val_loss 0.63931245\n", 396 | "train_loss 0.5573069 val_loss 0.6267758\n", 397 | "train_loss 0.5612141 val_loss 0.6449642\n", 398 | "train_loss 0.5308564 val_loss 0.62057\n", 399 | "train_loss 0.47453913 val_loss 0.6234291\n", 400 | "train_loss 0.53106457 val_loss 0.6179022\n", 401 | "train_loss 0.5241479 val_loss 0.6151297\n", 402 | "train_loss 0.6813842 val_loss 0.6094231\n", 403 | "train_loss 0.6254232 val_loss 0.608384\n", 404 | "train_loss 0.5442903 val_loss 0.6020965\n", 405 | "train_loss 0.48889655 val_loss 0.59901583\n", 406 | "train_loss 0.5180689 val_loss 0.59522665\n", 407 | "train_loss 0.57622343 val_loss 0.59929794\n", 408 | "train_loss 0.5545541 val_loss 0.5887621\n", 409 | "train_loss 0.55703753 val_loss 0.5847919\n", 410 | "train_loss 0.5962651 val_loss 0.5867704\n", 411 | "train_loss 0.6141354 val_loss 0.5895961\n", 412 | "train_loss 0.44274747 val_loss 0.57974124\n", 413 | "train_loss 0.53316665 val_loss 0.5761268\n", 414 | "train_loss 0.498142 val_loss 0.57354474\n", 415 | "train_loss 0.51469684 val_loss 0.5783324\n", 416 | "train_loss 0.6005205 val_loss 0.5681326\n", 417 | "train_loss 0.5296999 val_loss 0.5666884\n", 418 | "train_loss 0.47100228 val_loss 0.5630498\n", 419 | "train_loss 0.54450154 val_loss 0.5787666\n", 420 | "train_loss 0.5576676 val_loss 0.55907017\n", 421 | "train_loss 0.53484154 val_loss 0.5579099\n", 422 | "train_loss 0.5364264 val_loss 0.5627258\n", 423 | "train_loss 0.51488286 val_loss 0.55314064\n", 424 | "train_loss 0.55697054 val_loss 0.5507671\n", 425 | "train_loss 0.57481194 val_loss 0.550431\n", 426 | "train_loss 0.55537355 val_loss 0.5489825\n", 427 | "train_loss 0.56959105 val_loss 0.5468589\n", 428 | "train_loss 0.5849153 val_loss 0.5559658\n", 429 | "train_loss 0.6252235 val_loss 0.5475525\n", 430 | "train_loss 0.56599003 val_loss 0.5416614\n", 431 | "train_loss 0.45060825 val_loss 0.5402306\n", 432 | "train_loss 0.4618894 val_loss 0.5392666\n", 433 | "train_loss 0.5865662 val_loss 0.53793675\n", 434 | "train_loss 0.52943933 val_loss 0.5399228\n", 435 | "train_loss 0.46397635 val_loss 0.53526783\n", 436 | "train_loss 0.5720602 val_loss 0.53363514\n", 437 | "train_loss 0.53340673 val_loss 0.53278434\n", 438 | "train_loss 0.5761976 val_loss 0.5318284\n", 439 | "train_loss 0.5111011 val_loss 0.5346499\n", 440 | "train_loss 0.5211141 val_loss 0.53015906\n", 441 | "train_loss 0.4850332 val_loss 0.5295973\n", 442 | "train_loss 0.5528135 val_loss 0.5284586\n", 443 | "train_loss 0.5071026 val_loss 0.52804345\n", 444 | "train_loss 0.5900887 val_loss 0.5259946\n", 445 | "train_loss 0.46475852 val_loss 0.5291728\n", 446 | "train_loss 0.51071024 val_loss 0.53127724\n", 447 | "train_loss 0.5197077 val_loss 0.52651155\n", 448 | "train_loss 0.52425003 val_loss 0.5265078\n", 449 | "train_loss 0.5947874 val_loss 0.52251405\n", 450 | "train_loss 0.6377196 val_loss 0.52052724\n", 451 | "train_loss 0.481161 val_loss 0.51937187\n", 452 | "train_loss 0.52613926 val_loss 0.5188099\n", 453 | "train_loss 0.54683673 val_loss 0.52657354\n", 454 | "train_loss 0.4866224 val_loss 0.5167897\n", 455 | "train_loss 0.49941382 val_loss 0.5169294\n", 456 | "train_loss 0.5138988 val_loss 0.51508343\n", 457 | "train_loss 0.5252162 val_loss 0.5134446\n", 458 | "train_loss 0.54643357 val_loss 0.5159939\n", 459 | "train_loss 0.50454354 val_loss 0.51795447\n", 460 | "train_loss 0.5477977 val_loss 0.51163554\n", 461 | "train_loss 0.48235464 val_loss 0.51226854\n", 462 | "train_loss 0.46278268 val_loss 0.5106911\n", 463 | "train_loss 0.55143964 val_loss 0.5130235\n", 464 | "train_loss 0.49793103 val_loss 0.5086262\n", 465 | "train_loss 0.55516046 val_loss 0.5116377\n", 466 | "train_loss 0.526183 val_loss 0.50830406\n", 467 | "train_loss 0.4525234 val_loss 0.5073179\n", 468 | "train_loss 0.48127568 val_loss 0.50913733\n", 469 | "train_loss 0.5003133 val_loss 0.5108921\n", 470 | "train_loss 0.5246837 val_loss 0.5055629\n", 471 | "train_loss 0.5484116 val_loss 0.50500053\n", 472 | "train_loss 0.5513848 val_loss 0.50540334\n", 473 | "train_loss 0.5670711 val_loss 0.5034824\n", 474 | "train_loss 0.53560483 val_loss 0.50356776\n", 475 | "train_loss 0.4458433 val_loss 0.50354636\n", 476 | "train_loss 0.5142056 val_loss 0.5023199\n", 477 | "train_loss 0.5943471 val_loss 0.5016766\n", 478 | "train_loss 0.51107144 val_loss 0.5018072\n", 479 | "train_loss 0.4835248 val_loss 0.506819\n", 480 | "train_loss 0.45860666 val_loss 0.50842124\n", 481 | "train_loss 0.47358721 val_loss 0.49983543\n", 482 | "train_loss 0.49508673 val_loss 0.5003009\n", 483 | "train_loss 0.46116623 val_loss 0.49922407\n", 484 | "train_loss 0.46439755 val_loss 0.49882585\n", 485 | "train_loss 0.55467176 val_loss 0.49828482\n" 486 | ] 487 | }, 488 | { 489 | "ename": "KeyboardInterrupt", 490 | "evalue": "", 491 | "output_type": "error", 492 | "traceback": [ 493 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 494 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 495 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain_dense_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_sparse_x\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlosses\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_crossentropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_label\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 496 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 966\u001b[0m with base_layer_utils.autocast_context_manager(\n\u001b[1;32m 967\u001b[0m self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 497 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs, training)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mlinear_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mconcat_sparse_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSparseEmbedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse_inputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0msnd_order_sparse_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFM\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mfc_layer_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDNN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 498 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 966\u001b[0m with base_layer_utils.autocast_context_manager(\n\u001b[1;32m 967\u001b[0m self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 499 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, concat_sparse_embeds)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# 先求和再求平方\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0msum_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0msquare_sum_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultiply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msum_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msum_embeds\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;31m# 先平方再求和\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0msquare_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultiply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 500 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 807\u001b[0m \u001b[0;31m# mode when all inputs can be traced back to `keras.Input()` (when building\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;31m# models using the functional API).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 809\u001b[0;31m \u001b[0mbuild_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 810\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 811\u001b[0m \u001b[0;31m# Accept NumPy and scalar inputs by converting to Tensors.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 501 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36mare_all_symbolic_tensors\u001b[0;34m(tensors)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_symbolic_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtensor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 502 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_symbolic_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtensor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 503 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36mis_symbolic_tensor\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m 352\u001b[0m return (getattr(tensor, '_keras_history', False) or\n\u001b[1;32m 353\u001b[0m not context.executing_eagerly())\n\u001b[0;32m--> 354\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcomposite_tensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCompositeTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 355\u001b[0m \u001b[0mcomponent_tensors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand_composites\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'graph'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcomponent_tensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 504 | "\u001b[0;32m~/anaconda3/lib/python3.7/abc.py\u001b[0m in \u001b[0;36m__instancecheck__\u001b[0;34m(cls, instance)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__instancecheck__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;34m\"\"\"Override for isinstance(instance, cls).\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_abc_instancecheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__subclasscheck__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubclass\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 505 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 506 | ] 507 | } 508 | ], 509 | "source": [ 510 | "train_data = total_data.loc[:500000-1]\n", 511 | "valid_data = total_data.loc[500000:]\n", 512 | "\n", 513 | "train_dense_x_all = np.array([train_data[[f]].values for f in dense_feats])\n", 514 | "train_sparse_x_all = np.array([train_data[[f]].values for f in sparse_feats])\n", 515 | "train_label_all = train_data[['label']].values\n", 516 | "\n", 517 | "val_dense_x_all = np.array([valid_data[[f]].values for f in dense_feats])\n", 518 | "val_sparse_x_all = np.array([valid_data[[f]].values for f in sparse_feats])\n", 519 | "val_label_all = valid_data[['label']].values\n", 520 | "\n", 521 | "\n", 522 | "model = DeepFM(dense_feats, sparse_feats, vocab_sizes)\n", 523 | "opt = tf.keras.optimizers.Adam(learning_rate=1e-3)\n", 524 | "batch_size = 256\n", 525 | "for i in range(total_data.shape[0]//batch_size):\n", 526 | " train_dense_x = list(train_dense_x_all[:,i*batch_size:(i+1)*batch_size,:])\n", 527 | " train_sparse_x = list(train_sparse_x_all[:,i*batch_size:(i+1)*batch_size,:])\n", 528 | " train_label = train_label_all[i*batch_size:(i+1)*batch_size]\n", 529 | " \n", 530 | " with tf.GradientTape() as tape:\n", 531 | " pred = model([train_dense_x, train_sparse_x])\n", 532 | " loss = tf.keras.losses.binary_crossentropy(train_label, pred)\n", 533 | " loss = tf.reduce_mean(loss)\n", 534 | " grads = tape.gradient(loss, model.variables)\n", 535 | " opt.apply_gradients(grads_and_vars=zip(grads, model.variables))\n", 536 | " \n", 537 | " \n", 538 | " if i%10 ==0:\n", 539 | " val_dense_x = list(val_dense_x_all)\n", 540 | " val_sparse_x = list(val_sparse_x_all)\n", 541 | " val_label = val_label_all\n", 542 | "\n", 543 | " pred = model([val_dense_x, val_sparse_x])\n", 544 | " val_loss = tf.keras.losses.binary_crossentropy(val_label, pred)\n", 545 | " val_loss = tf.reduce_mean(val_loss)\n", 546 | " print('train_loss', loss.numpy(), 'val_loss', val_loss.numpy())" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [] 555 | } 556 | ], 557 | "metadata": { 558 | "kernelspec": { 559 | "display_name": "Python 3", 560 | "language": "python", 561 | "name": "python3" 562 | }, 563 | "language_info": { 564 | "codemirror_mode": { 565 | "name": "ipython", 566 | "version": 3 567 | }, 568 | "file_extension": ".py", 569 | "mimetype": "text/x-python", 570 | "name": "python", 571 | "nbconvert_exporter": "python", 572 | "pygments_lexer": "ipython3", 573 | "version": "3.7.6" 574 | } 575 | }, 576 | "nbformat": 4, 577 | "nbformat_minor": 4 578 | } 579 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepCTR_tensorflow2 2 | Deep-learning based CTR models implemented with tensorflow2.0. / 使用tensorflow2.0实现的一些基于深度学习的CTR模型 3 | -------------------------------------------------------------------------------- /xDeepFM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 20, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tensorflow.keras.layers import *\n", 13 | "import tensorflow.keras.backend as K\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import tensorflow as tf\n", 16 | "from tensorflow.keras.models import Model\n", 17 | "from tensorflow.keras.utils import plot_model\n", 18 | "from tensorflow.keras.callbacks import *\n", 19 | "from sklearn.preprocessing import LabelEncoder\n", 20 | "# from tensorflow.keras.constraints import *\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# 准备数据" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 21, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "path = '/disk/share/criteo/'\n", 39 | "data = pd.read_csv(path+'criteo_sampled_data.csv')\n", 40 | "cols = data.columns.values\n", 41 | "\n", 42 | "dense_feats = [f for f in cols if f[0] == \"I\"]\n", 43 | "sparse_feats = [f for f in cols if f[0] == \"C\"]\n", 44 | "\n", 45 | "def process_dense_feats(data, feats):\n", 46 | " d = data.copy()\n", 47 | " d = d[feats].fillna(0.0)\n", 48 | " for f in feats:\n", 49 | " d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n", 50 | " \n", 51 | " return d\n", 52 | "\n", 53 | "data_dense = process_dense_feats(data, dense_feats)\n", 54 | "\n", 55 | "vocab_sizes = {}\n", 56 | "def process_sparse_feats(data, feats):\n", 57 | " d = data.copy()\n", 58 | " d = d[feats].fillna(\"-1\")\n", 59 | " for f in feats:\n", 60 | " label_encoder = LabelEncoder()\n", 61 | " d[f] = label_encoder.fit_transform(d[f])\n", 62 | " vocab_sizes[f] = d[f].nunique() + 1\n", 63 | " return d\n", 64 | "\n", 65 | "data_sparse = process_sparse_feats(data, sparse_feats)\n", 66 | "total_data = pd.concat([data_dense, data_sparse], axis=1)\n", 67 | "total_data['label'] = data['label']" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# 自定义层" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 22, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "class SparseEmbedding(Layer):\n", 84 | " def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n", 85 | " super().__init__()\n", 86 | " self.sparse_feats = sparse_feats\n", 87 | " self.vocab_sizes = vocab_sizes\n", 88 | " self.embed_dims = embed_dims\n", 89 | " \n", 90 | " # 离散特征嵌入矩阵\n", 91 | " self.sparse_embeds_mat = []\n", 92 | " for idx, feat in enumerate(self.sparse_feats):\n", 93 | " # reg = tf.keras.regularizers.l2(0.5)\n", 94 | " emb = Embedding(input_dim=self.vocab_sizes[feat],\n", 95 | " output_dim=self.embed_dims,\n", 96 | " # embeddings_regularizer=reg,\n", 97 | " name=f'{feat}_emb')\n", 98 | " self.sparse_embeds_mat.append(emb)\n", 99 | " \n", 100 | " def call(self, sparse_inputs):\n", 101 | " sparse_embeds = []\n", 102 | " for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n", 103 | " emb = emb_mat(sparse_inputs[idx])\n", 104 | " sparse_embeds.append(emb)\n", 105 | " concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n", 106 | " return concat_sparse_embeds" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 28, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "class Linear(Layer):\n", 116 | " def __init__(self, sparse_feats, vocab_sizes):\n", 117 | " super().__init__()\n", 118 | " \n", 119 | " # 离散特1d征嵌入矩阵\n", 120 | " self.sparse_1d_embeds = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=1)\n", 121 | " \n", 122 | " self.fc_dense = Dense(1)\n", 123 | " self.fc_sparse = Dense(1)\n", 124 | " \n", 125 | " def call(self, inputs):\n", 126 | " dense_inputs, sparse_inputs = inputs[0], inputs[1]\n", 127 | " # 线性部分\n", 128 | " concat_dense_inputs = Concatenate(axis=1)(dense_inputs)\n", 129 | " first_order_dense_layer = self.fc_dense(concat_dense_inputs)\n", 130 | " \n", 131 | " concat_sparse_embeds_1d = self.sparse_1d_embeds(sparse_inputs) \n", 132 | " flat_sparse_embeds_1d = Flatten()(concat_sparse_embeds_1d)\n", 133 | " first_order_sparse_layer = self.fc_sparse(flat_sparse_embeds_1d)\n", 134 | " \n", 135 | " linear_output = Add()([first_order_dense_layer, first_order_sparse_layer])\n", 136 | " return linear_output" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 29, 142 | "metadata": { 143 | "scrolled": true 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "class cross_layer(Layer):\n", 148 | " def __init__(self, n_filters):\n", 149 | " super().__init__()\n", 150 | " # self.n_filters = n_filters \n", 151 | " self.con1d = Conv1D(filters=n_filters, kernel_size=1, strides=1)\n", 152 | " \n", 153 | " def call(self, inputs):\n", 154 | " x0, xl = inputs\n", 155 | " h = xl.shape[1]\n", 156 | " m = x0.shape[1]\n", 157 | " D = x0.shape[-1] # emb_dim\n", 158 | "\n", 159 | " xl = tf.expand_dims(xl, -2) \n", 160 | " xl = tf.tile(xl, [1, 1, m, 1]) # ?, h, m, D\n", 161 | " x0 = tf.expand_dims(x0, -3) \n", 162 | " x0 = tf.tile(x0, [1, h, 1, 1]) # ?, h, m, D\n", 163 | " feature_maps = tf.multiply(xl, x0) # ?, h, m, D\n", 164 | "\n", 165 | " # ?, h*m, D\n", 166 | " feature_maps = tf.reshape(feature_maps, [-1, h*m, D])\n", 167 | " # ?, D, h*m\n", 168 | " feature_maps = tf.transpose(feature_maps, [0,2,1])\n", 169 | " # ?, D, n_filters\n", 170 | " feature_maps = self.con1d(feature_maps)\n", 171 | " # ?, n_filters, D\n", 172 | " feature_maps = tf.transpose(feature_maps, [0,2,1])\n", 173 | " return feature_maps" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 40, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "class CIN(Layer):\n", 183 | " def __init__(self, n_layers, n_filters):\n", 184 | " super().__init__()\n", 185 | " self.cross_layers = []\n", 186 | " for i in range(n_layers):\n", 187 | " self.cross_layers.append(cross_layer(n_filters))\n", 188 | " def call(self, inputs):\n", 189 | " x0 = xl = inputs\n", 190 | " sum_poolings = []\n", 191 | " for layer in self.cross_layers:\n", 192 | " # ?, n_filters, D\n", 193 | " xl = layer([x0, xl])\n", 194 | " # ?, n_filters\n", 195 | " sum_poolings.append(tf.reduce_sum(xl, axis=-1))\n", 196 | " \n", 197 | " return tf.concat(sum_poolings, axis=-1) # ?, n_filters*n_layers\n", 198 | " " 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 41, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "class DNN(Layer):\n", 215 | " def __init__(self, hid_units=[256,256,256], use_dropout=True, output_unit=16):\n", 216 | " super().__init__()\n", 217 | " self.hid_units = hid_units\n", 218 | " self.use_dropout = use_dropout\n", 219 | " self.output_unit = output_unit\n", 220 | " self.Dropout = Dropout(0.3)\n", 221 | " self.dense_layers = []\n", 222 | " for unit in self.hid_units:\n", 223 | " self.dense_layers.append(Dense(unit, activation='relu'))\n", 224 | " self.dense_layers.append(Dense(self.output_unit))\n", 225 | " \n", 226 | " def call(self, concat_sparse_embeds):\n", 227 | " flat_sparse_embed = Flatten()(concat_sparse_embeds)\n", 228 | " \n", 229 | " x = self.dense_layers[0](flat_sparse_embed)\n", 230 | " for dense in self.dense_layers[1:]:\n", 231 | " x = dense(x)\n", 232 | " if self.use_dropout:\n", 233 | " x = self.Dropout(x)\n", 234 | " return x" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "# 构建模型 (keras函数式)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 42, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "class xDeepFM:\n", 251 | " def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n", 252 | " embed_dims=8, cross_layer_num=3):\n", 253 | " \n", 254 | " # 连续特征\n", 255 | " self.dense_inputs = []\n", 256 | " for feat in dense_feats:\n", 257 | " self.dense_inputs.append(Input(shape=1, name=feat))\n", 258 | " \n", 259 | " # 离散特征\n", 260 | " self.sparse_inputs = []\n", 261 | " for feat in sparse_feats:\n", 262 | " self.sparse_inputs.append(Input(shape=1, name=feat))\n", 263 | " \n", 264 | " self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n", 265 | " \n", 266 | " self.linear = Linear(sparse_feats, vocab_sizes)\n", 267 | " \n", 268 | " self.CIN = CIN(n_layers=3, n_filters=6)\n", 269 | " \n", 270 | " self.DNN = DNN()\n", 271 | " self.dense = Dense(1, activation='sigmoid')\n", 272 | " \n", 273 | " def bulid_model(self):\n", 274 | " all_inputs = [self.dense_inputs, self.sparse_inputs]\n", 275 | " linear_output = self.linear(all_inputs)\n", 276 | " \n", 277 | " # concat_dense_inputs = Concatenate(axis=1)(self.dense_inputs)\n", 278 | " \n", 279 | " concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n", 280 | " # flatten_sparse_embeds = Flatten()(concat_sparse_embeds)\n", 281 | " \n", 282 | " # concat_inputs = Concatenate(axis=1)([flatten_sparse_embeds, concat_dense_inputs])\n", 283 | " cross_output = self.CIN(concat_sparse_embeds)\n", 284 | " \n", 285 | " fc_layer_output = self.DNN(concat_sparse_embeds)\n", 286 | " \n", 287 | " # 输出部分\n", 288 | " concat_layer = Concatenate()([cross_output, fc_layer_output])\n", 289 | " output = self.dense(concat_layer)\n", 290 | " \n", 291 | " model = Model(inputs=all_inputs, outputs=output)\n", 292 | " return model" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 43, 298 | "metadata": { 299 | "scrolled": true 300 | }, 301 | "outputs": [ 302 | { 303 | "name": "stdout", 304 | "output_type": "stream", 305 | "text": [ 306 | "Epoch 1/3\n", 307 | "WARNING:tensorflow:From /root/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", 308 | "Instructions for updating:\n", 309 | "If using Keras pass *_constraint arguments to layers.\n", 310 | "1954/1954 [==============================] - 35s 18ms/step - loss: 0.5071 - binary_crossentropy: 0.5071 - auc: 0.7214 - val_loss: 0.5005 - val_binary_crossentropy: 0.5005 - val_auc: 0.7355 - lr: 0.0010\n", 311 | "Epoch 2/3\n", 312 | " 914/1954 [=============>................] - ETA: 17s - loss: 0.4825 - binary_crossentropy: 0.4825 - auc: 0.7582" 313 | ] 314 | }, 315 | { 316 | "ename": "_NotOkStatusException", 317 | "evalue": "InvalidArgumentError: Error while reading CompositeTensor._type_spec.", 318 | "output_type": "error", 319 | "traceback": [ 320 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 321 | "\u001b[0;31m_NotOkStatusException\u001b[0m Traceback (most recent call last)", 322 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 31\u001b[0m model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n\u001b[1;32m 32\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mval_dense_x_all\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_sparse_x_all\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_label_all\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m callbacks=callbacks, epochs=3)\n\u001b[0m", 323 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 324 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 846\u001b[0m batch_size=batch_size):\n\u001b[1;32m 847\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m \u001b[0;31m# Catch OutOfRangeError for Datasets of unknown size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;31m# This blocks until the batch has finished executing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 325 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 326 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 609\u001b[0m \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 612\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 613\u001b[0m \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 327 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2417\u001b[0m \u001b[0;34m\"\"\"Calls a graph function specialized to the inputs.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2418\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2419\u001b[0;31m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2420\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 328 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_maybe_define_function\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 2735\u001b[0m *args, **kwargs)\n\u001b[1;32m 2736\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2737\u001b[0;31m \u001b[0mcache_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2738\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2739\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 329 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_cache_key\u001b[0;34m(self, args, kwargs, include_tensor_ranks_only)\u001b[0m\n\u001b[1;32m 2573\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2574\u001b[0m input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs,\n\u001b[0;32m-> 2575\u001b[0;31m include_tensor_ranks_only)\n\u001b[0m\u001b[1;32m 2576\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2577\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 330 | "\u001b[0;31m_NotOkStatusException\u001b[0m: InvalidArgumentError: Error while reading CompositeTensor._type_spec." 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "train_data = total_data.loc[:500000-1]\n", 336 | "valid_data = total_data.loc[500000:]\n", 337 | "\n", 338 | "train_dense_x_all = [train_data[f].values for f in dense_feats]\n", 339 | "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n", 340 | "train_label_all = train_data[['label']].values\n", 341 | "\n", 342 | "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n", 343 | "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n", 344 | "val_label_all = valid_data[['label']].values\n", 345 | "\n", 346 | "\n", 347 | "model = xDeepFM(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 348 | "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n", 349 | " metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n", 350 | "\n", 351 | "os.makedirs('checkpoints', exist_ok=True)\n", 352 | "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n", 353 | " mode='max', save_weights_only=True)# save_best_only=True\n", 354 | "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n", 355 | "def scheduler(epoch):\n", 356 | " thred = 10\n", 357 | " if epoch < thred:\n", 358 | " return 0.001\n", 359 | " else:\n", 360 | " return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n", 361 | "lr_schedule = LearningRateScheduler(scheduler)\n", 362 | "callbacks = [early_stopping, lr_schedule, checkpoints] # \n", 363 | "\n", 364 | "\n", 365 | "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n", 366 | " validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n", 367 | " callbacks=callbacks, epochs=3)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 16, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "# # 加载模型\n", 377 | "# model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n", 378 | "# model.load_weights('checkpoints/model.h5')" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [] 387 | } 388 | ], 389 | "metadata": { 390 | "kernelspec": { 391 | "display_name": "Python 3", 392 | "language": "python", 393 | "name": "python3" 394 | }, 395 | "language_info": { 396 | "codemirror_mode": { 397 | "name": "ipython", 398 | "version": 3 399 | }, 400 | "file_extension": ".py", 401 | "mimetype": "text/x-python", 402 | "name": "python", 403 | "nbconvert_exporter": "python", 404 | "pygments_lexer": "ipython3", 405 | "version": "3.7.6" 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 4 410 | } 411 | --------------------------------------------------------------------------------