├── AutoInt.ipynb
├── DCN.ipynb
├── DIN_DIEN
    ├── DIEN.ipynb
    ├── DIN.ipynb
    ├── data
    │   └── get_data.txt
    ├── layers.py
    └── utils.py
├── DeepFM.ipynb
├── README.md
└── xDeepFM.ipynb


/AutoInt.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "from tensorflow.keras.layers import *\n",
 13 |     "import tensorflow.keras.backend as K\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import tensorflow as tf\n",
 16 |     "from tensorflow.keras.models import Model\n",
 17 |     "from tensorflow.keras.utils import plot_model\n",
 18 |     "from tensorflow.keras.callbacks import *\n",
 19 |     "from sklearn.preprocessing import LabelEncoder\n",
 20 |     "# from tensorflow.keras.constraints import *\n",
 21 |     "\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# 准备数据"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "path = '/disk/share/criteo/'\n",
 39 |     "data = pd.read_csv(path+'criteo_sampled_data.csv')\n",
 40 |     "cols = data.columns.values\n",
 41 |     "\n",
 42 |     "dense_feats = [f for f in cols if f[0] == \"I\"]\n",
 43 |     "sparse_feats = [f for f in cols if f[0] == \"C\"]\n",
 44 |     "\n",
 45 |     "def process_dense_feats(data, feats):\n",
 46 |     "    d = data.copy()\n",
 47 |     "    d = d[feats].fillna(0.0)\n",
 48 |     "    for f in feats:\n",
 49 |     "        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n",
 50 |     "    \n",
 51 |     "    return d\n",
 52 |     "\n",
 53 |     "data_dense = process_dense_feats(data, dense_feats)\n",
 54 |     "\n",
 55 |     "vocab_sizes = {}\n",
 56 |     "def process_sparse_feats(data, feats):\n",
 57 |     "    d = data.copy()\n",
 58 |     "    d = d[feats].fillna(\"-1\")\n",
 59 |     "    for f in feats:\n",
 60 |     "        label_encoder = LabelEncoder()\n",
 61 |     "        d[f] = label_encoder.fit_transform(d[f])\n",
 62 |     "        vocab_sizes[f] = d[f].nunique() + 1\n",
 63 |     "    return d\n",
 64 |     "\n",
 65 |     "data_sparse = process_sparse_feats(data, sparse_feats)\n",
 66 |     "total_data = pd.concat([data_dense, data_sparse], axis=1)\n",
 67 |     "total_data['label'] = data['label']"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "# 自定义层"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "class SparseEmbedding(Layer):\n",
 84 |     "    def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n",
 85 |     "        super().__init__()\n",
 86 |     "        self.sparse_feats = sparse_feats\n",
 87 |     "        self.vocab_sizes = vocab_sizes\n",
 88 |     "        self.embed_dims = embed_dims\n",
 89 |     "        \n",
 90 |     "        # 离散特征嵌入矩阵\n",
 91 |     "        self.sparse_embeds_mat = []\n",
 92 |     "        for idx, feat in enumerate(self.sparse_feats):\n",
 93 |     "            # reg = tf.keras.regularizers.l2(0.5)\n",
 94 |     "            emb = Embedding(input_dim=self.vocab_sizes[feat],\n",
 95 |     "                            output_dim=self.embed_dims,\n",
 96 |     "                            # embeddings_regularizer=reg,\n",
 97 |     "                            name=f'{feat}_emb')\n",
 98 |     "            self.sparse_embeds_mat.append(emb)\n",
 99 |     "        \n",
100 |     "    def call(self, sparse_inputs):\n",
101 |     "        sparse_embeds = []\n",
102 |     "        for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n",
103 |     "            emb = emb_mat(sparse_inputs[idx])\n",
104 |     "            sparse_embeds.append(emb)\n",
105 |     "        concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n",
106 |     "        return concat_sparse_embeds"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 48,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "class DenseEmbedding(Layer):\n",
116 |     "    def __init__(self, dense_feats, embed_dims=8):\n",
117 |     "        super().__init__()\n",
118 |     "        self.embed_dims = embed_dims\n",
119 |     "        \n",
120 |     "        self.dense_embs = []\n",
121 |     "        for feat in dense_feats:\n",
122 |     "            dense_emb = self.add_weight(shape=[1, self.embed_dims], \n",
123 |     "                                        name=f'dense_emb_{feat}')\n",
124 |     "            self.dense_embs.append(dense_emb)\n",
125 |     "        \n",
126 |     "        \n",
127 |     "    def call(self, dense_inputs):\n",
128 |     "        scaled_embs = []\n",
129 |     "        for i, dense_input in enumerate(dense_inputs):\n",
130 |     "            dense_emb = dense_input * self.dense_embs[i]\n",
131 |     "            dense_emb = tf.expand_dims(dense_emb, axis=1)\n",
132 |     "            scaled_embs.append(dense_emb)\n",
133 |     "            \n",
134 |     "        concat_scaled_embs = Concatenate(axis=1)(scaled_embs)\n",
135 |     "        return concat_scaled_embs\n"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 49,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "class attention_cross_layer(Layer):\n",
145 |     "    def __init__(self, n_heads=6, att_dim=8):\n",
146 |     "        super().__init__()\n",
147 |     "        self.n_heads = n_heads\n",
148 |     "        self.att_dim = att_dim\n",
149 |     "        \n",
150 |     "    def build(self, input_shape):\n",
151 |     "        emb_dim = input_shape[-1]\n",
152 |     "        self.Wq = []\n",
153 |     "        self.Wv = []\n",
154 |     "        shape = [emb_dim, self.att_dim]\n",
155 |     "        for i in range(self.n_heads):\n",
156 |     "            self.Wq.append(self.add_weight(shape=shape, name=f'Wq_{i}'))\n",
157 |     "            self.Wv.append(self.add_weight(shape=shape, name=f'Wv_{i}'))\n",
158 |     "        \n",
159 |     "    def call(self, embeds): # ?,n,d\n",
160 |     "        heads = []\n",
161 |     "        for i in range(self.n_heads):\n",
162 |     "            emb_q = tf.matmul(embeds, self.Wq[i]) # ?,n,att_dim\n",
163 |     "            emb_v = tf.matmul(embeds, self.Wv[i]) # ?,n,att_dim\n",
164 |     "            emb = Attention()([emb_q, emb_v])\n",
165 |     "            heads.append(emb)\n",
166 |     "        if len(heads) > 1:\n",
167 |     "            heads = tf.concat(heads, axis=-1)\n",
168 |     "        else:\n",
169 |     "            heads = heads[0]\n",
170 |     "        return heads"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": []
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "# 构建模型 (keras函数式)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 52,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "class AutoInt:\n",
194 |     "    def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n",
195 |     "                 embed_dims=8, cross_layer_num=3, n_atten_layers=2):\n",
196 |     "        \n",
197 |     "        # 连续特征\n",
198 |     "        self.dense_inputs = []\n",
199 |     "        for feat in dense_feats:\n",
200 |     "            self.dense_inputs.append(Input(shape=1, name=feat))\n",
201 |     "            \n",
202 |     "        # 离散特征\n",
203 |     "        self.sparse_inputs = []\n",
204 |     "        for feat in sparse_feats:\n",
205 |     "            self.sparse_inputs.append(Input(shape=1, name=feat))\n",
206 |     "        \n",
207 |     "        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n",
208 |     "        self.DenseEmbedding = DenseEmbedding(dense_feats, embed_dims=8)\n",
209 |     "        \n",
210 |     "        self.atten_cross_layers = []\n",
211 |     "        for i in range(n_atten_layers):\n",
212 |     "            self.atten_cross_layers.append(attention_cross_layer())\n",
213 |     "        \n",
214 |     "        self.dense = Dense(1, activation='sigmoid')\n",
215 |     "        \n",
216 |     "    def bulid_model(self):\n",
217 |     "        all_inputs = [self.dense_inputs, self.sparse_inputs]\n",
218 |     "        \n",
219 |     "        \n",
220 |     "        concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n",
221 |     "        concat_dense_embeds = self.DenseEmbedding(self.dense_inputs)\n",
222 |     "\n",
223 |     "        concat_embeds = Concatenate(axis=1)([concat_sparse_embeds, concat_dense_embeds])\n",
224 |     "        \n",
225 |     "        atten_output = concat_embeds\n",
226 |     "        for layer in self.atten_cross_layers:\n",
227 |     "            atten_output = layer(atten_output)\n",
228 |     "        \n",
229 |     "        # 输出部分\n",
230 |     "        output = self.dense(Flatten()(atten_output))\n",
231 |     "        \n",
232 |     "        model = Model(inputs=all_inputs, outputs=output)\n",
233 |     "        return model"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 53,
239 |    "metadata": {
240 |     "scrolled": true
241 |    },
242 |    "outputs": [
243 |     {
244 |      "name": "stdout",
245 |      "output_type": "stream",
246 |      "text": [
247 |       "Epoch 1/3\n",
248 |       "1954/1954 [==============================] - 63s 32ms/step - loss: 0.4939 - binary_crossentropy: 0.4939 - auc: 0.7452 - val_loss: 0.4866 - val_binary_crossentropy: 0.4866 - val_auc: 0.7621 - lr: 0.0010\n",
249 |       "Epoch 2/3\n",
250 |       " 107/1954 [>.............................] - ETA: 54s - loss: 0.4650 - binary_crossentropy: 0.4650 - auc: 0.7802"
251 |      ]
252 |     },
253 |     {
254 |      "ename": "KeyboardInterrupt",
255 |      "evalue": "",
256 |      "output_type": "error",
257 |      "traceback": [
258 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
259 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
260 |       "\u001b[0;32m<ipython-input-53-62df256e7fb6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     31\u001b[0m model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n\u001b[1;32m     32\u001b[0m          \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mval_dense_x_all\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_sparse_x_all\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_label_all\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m          callbacks=callbacks, epochs=3)\n\u001b[0m",
261 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     64\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     65\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     68\u001b[0m     \u001b[0;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
262 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m    846\u001b[0m                 batch_size=batch_size):\n\u001b[1;32m    847\u001b[0m               \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m               \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m               \u001b[0;31m# Catch OutOfRangeError for Datasets of unknown size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m               \u001b[0;31m# This blocks until the batch has finished executing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
263 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    578\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    579\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    582\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
264 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    609\u001b[0m       \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    610\u001b[0m       \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    612\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    613\u001b[0m       \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
265 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   2418\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2419\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2420\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2422\u001b[0m   \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
266 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m   1663\u001b[0m          if isinstance(t, (ops.Tensor,\n\u001b[1;32m   1664\u001b[0m                            resource_variable_ops.BaseResourceVariable))),\n\u001b[0;32m-> 1665\u001b[0;31m         self.captured_inputs)\n\u001b[0m\u001b[1;32m   1666\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1667\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
267 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m   1744\u001b[0m       \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1745\u001b[0m       return self._build_call_outputs(self._inference_function.call(\n\u001b[0;32m-> 1746\u001b[0;31m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[1;32m   1747\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m   1748\u001b[0m         \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
268 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m    596\u001b[0m               \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    597\u001b[0m               \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 598\u001b[0;31m               ctx=ctx)\n\u001b[0m\u001b[1;32m    599\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    600\u001b[0m           outputs = execute.execute_with_cancellation(\n",
269 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     59\u001b[0m     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0;32m---> 60\u001b[0;31m                                         inputs, attrs, num_outputs)\n\u001b[0m\u001b[1;32m     61\u001b[0m   \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     62\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
270 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
271 |      ]
272 |     }
273 |    ],
274 |    "source": [
275 |     "train_data = total_data.loc[:500000-1]\n",
276 |     "valid_data = total_data.loc[500000:]\n",
277 |     "\n",
278 |     "train_dense_x_all = [train_data[f].values for f in dense_feats]\n",
279 |     "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n",
280 |     "train_label_all = train_data[['label']].values\n",
281 |     "\n",
282 |     "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n",
283 |     "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n",
284 |     "val_label_all = valid_data[['label']].values\n",
285 |     "\n",
286 |     "\n",
287 |     "model = AutoInt(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
288 |     "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n",
289 |     "              metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n",
290 |     "\n",
291 |     "os.makedirs('checkpoints', exist_ok=True)\n",
292 |     "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n",
293 |     "                              mode='max', save_weights_only=True)# save_best_only=True\n",
294 |     "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n",
295 |     "def scheduler(epoch):\n",
296 |     "    thred = 10\n",
297 |     "    if epoch < thred:\n",
298 |     "        return 0.001\n",
299 |     "    else:\n",
300 |     "        return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n",
301 |     "lr_schedule = LearningRateScheduler(scheduler)\n",
302 |     "callbacks = [early_stopping, lr_schedule, checkpoints] # \n",
303 |     "\n",
304 |     "\n",
305 |     "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n",
306 |     "         validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n",
307 |     "         callbacks=callbacks, epochs=3)"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": []
316 |   }
317 |  ],
318 |  "metadata": {
319 |   "kernelspec": {
320 |    "display_name": "Python 3",
321 |    "language": "python",
322 |    "name": "python3"
323 |   },
324 |   "language_info": {
325 |    "codemirror_mode": {
326 |     "name": "ipython",
327 |     "version": 3
328 |    },
329 |    "file_extension": ".py",
330 |    "mimetype": "text/x-python",
331 |    "name": "python",
332 |    "nbconvert_exporter": "python",
333 |    "pygments_lexer": "ipython3",
334 |    "version": "3.7.6"
335 |   }
336 |  },
337 |  "nbformat": 4,
338 |  "nbformat_minor": 4
339 | }
340 | 


--------------------------------------------------------------------------------
/DCN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "from tensorflow.keras.layers import *\n",
 13 |     "import tensorflow.keras.backend as K\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import tensorflow as tf\n",
 16 |     "from tensorflow.keras.models import Model\n",
 17 |     "from tensorflow.keras.utils import plot_model\n",
 18 |     "from tensorflow.keras.callbacks import *\n",
 19 |     "from sklearn.preprocessing import LabelEncoder\n",
 20 |     "# from tensorflow.keras.constraints import *\n",
 21 |     "\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# 准备数据"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "path = '/disk/share/criteo/'\n",
 39 |     "data = pd.read_csv(path+'criteo_sampled_data.csv')\n",
 40 |     "cols = data.columns.values\n",
 41 |     "\n",
 42 |     "dense_feats = [f for f in cols if f[0] == \"I\"]\n",
 43 |     "sparse_feats = [f for f in cols if f[0] == \"C\"]\n",
 44 |     "\n",
 45 |     "def process_dense_feats(data, feats):\n",
 46 |     "    d = data.copy()\n",
 47 |     "    d = d[feats].fillna(0.0)\n",
 48 |     "    for f in feats:\n",
 49 |     "        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n",
 50 |     "    \n",
 51 |     "    return d\n",
 52 |     "\n",
 53 |     "data_dense = process_dense_feats(data, dense_feats)\n",
 54 |     "\n",
 55 |     "vocab_sizes = {}\n",
 56 |     "def process_sparse_feats(data, feats):\n",
 57 |     "    d = data.copy()\n",
 58 |     "    d = d[feats].fillna(\"-1\")\n",
 59 |     "    for f in feats:\n",
 60 |     "        label_encoder = LabelEncoder()\n",
 61 |     "        d[f] = label_encoder.fit_transform(d[f])\n",
 62 |     "        vocab_sizes[f] = d[f].nunique() + 1\n",
 63 |     "    return d\n",
 64 |     "\n",
 65 |     "data_sparse = process_sparse_feats(data, sparse_feats)\n",
 66 |     "total_data = pd.concat([data_dense, data_sparse], axis=1)\n",
 67 |     "total_data['label'] = data['label']"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "# 自定义层"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "class SparseEmbedding(Layer):\n",
 84 |     "    def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n",
 85 |     "        super().__init__()\n",
 86 |     "        self.sparse_feats = sparse_feats\n",
 87 |     "        self.vocab_sizes = vocab_sizes\n",
 88 |     "        self.embed_dims = embed_dims\n",
 89 |     "        \n",
 90 |     "        # 离散特征嵌入矩阵\n",
 91 |     "        self.sparse_embeds_mat = []\n",
 92 |     "        for idx, feat in enumerate(self.sparse_feats):\n",
 93 |     "            # reg = tf.keras.regularizers.l2(0.5)\n",
 94 |     "            emb = Embedding(input_dim=self.vocab_sizes[feat],\n",
 95 |     "                            output_dim=self.embed_dims,\n",
 96 |     "                            # embeddings_regularizer=reg,\n",
 97 |     "                            name=f'{feat}_emb')\n",
 98 |     "            self.sparse_embeds_mat.append(emb)\n",
 99 |     "        \n",
100 |     "    def call(self, sparse_inputs):\n",
101 |     "        sparse_embeds = []\n",
102 |     "        for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n",
103 |     "            emb = emb_mat(sparse_inputs[idx])\n",
104 |     "            sparse_embeds.append(emb)\n",
105 |     "        concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n",
106 |     "        return concat_sparse_embeds"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "class single_cross_layer(Layer):\n",
116 |     "    def __init__(self):\n",
117 |     "        super().__init__()\n",
118 |     "        \n",
119 |     "    def build(self, input_shape):\n",
120 |     "        embed_dims = input_shape[0][-1]\n",
121 |     "        self.w = self.add_weight(shape=[embed_dims, 1], name='w')\n",
122 |     "        self.b = self.add_weight(shape=[embed_dims, 1], name='b')\n",
123 |     "    \n",
124 |     "    def call(self, inputs):\n",
125 |     "        x0, xl = inputs\n",
126 |     "        x0 = tf.expand_dims(x0, -1)\n",
127 |     "        xl = tf.expand_dims(xl, -1)\n",
128 |     "        x0_xl = tf.matmul(x0, xl, transpose_b=True)\n",
129 |     "        x_next = tf.matmul(x0_xl, self.w) + xl + self.b\n",
130 |     "        x_next = tf.squeeze(x_next, axis=-1)\n",
131 |     "        return x_next"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 5,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "class cross_layer(Layer):\n",
141 |     "    def __init__(self, cross_layer_num):\n",
142 |     "        super().__init__()\n",
143 |     "        self.cross_layer_num = cross_layer_num\n",
144 |     "        self.cross_layers = []\n",
145 |     "        for i in range(cross_layer_num):\n",
146 |     "            self.cross_layers.append(single_cross_layer())\n",
147 |     "    \n",
148 |     "    def call(self, inputs):\n",
149 |     "        x0 = inputs\n",
150 |     "        xl = self.cross_layers[0]([x0, x0])\n",
151 |     "        for layer in self.cross_layers[1:]:\n",
152 |     "            xl = layer([x0, xl])\n",
153 |     "        return xl\n",
154 |     "    "
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 6,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "class DNN(Layer):\n",
164 |     "    def __init__(self, hid_units=[256,256,256], use_dropout=True, output_unit=16):\n",
165 |     "        super().__init__()\n",
166 |     "        self.hid_units = hid_units\n",
167 |     "        self.use_dropout = use_dropout\n",
168 |     "        self.output_unit = output_unit\n",
169 |     "        self.Dropout = Dropout(0.3)\n",
170 |     "        self.dense_layers = []\n",
171 |     "        for unit in self.hid_units:\n",
172 |     "            self.dense_layers.append(Dense(unit, activation='relu'))\n",
173 |     "        self.dense_layers.append(Dense(self.output_unit))\n",
174 |     "        \n",
175 |     "    def call(self, concat_sparse_embeds):\n",
176 |     "        flat_sparse_embed = Flatten()(concat_sparse_embeds)\n",
177 |     "        \n",
178 |     "        x = self.dense_layers[0](flat_sparse_embed)\n",
179 |     "        for dense in self.dense_layers[1:]:\n",
180 |     "            x = dense(x)\n",
181 |     "            if self.use_dropout:\n",
182 |     "                x = self.Dropout(x)\n",
183 |     "        return x"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "# 构建模型 (keras函数式)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 7,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "class DCN:\n",
200 |     "    def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n",
201 |     "                 embed_dims=8, cross_layer_num=3):\n",
202 |     "        \n",
203 |     "        # 连续特征\n",
204 |     "        self.dense_inputs = []\n",
205 |     "        for feat in dense_feats:\n",
206 |     "            self.dense_inputs.append(Input(shape=1, name=feat))\n",
207 |     "            \n",
208 |     "        # 离散特征\n",
209 |     "        self.sparse_inputs = []\n",
210 |     "        for feat in sparse_feats:\n",
211 |     "            self.sparse_inputs.append(Input(shape=1, name=feat))\n",
212 |     "        \n",
213 |     "        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n",
214 |     "        \n",
215 |     "        self.cross_layer = cross_layer(cross_layer_num)\n",
216 |     "        \n",
217 |     "        self.DNN = DNN()\n",
218 |     "        self.dense = Dense(1, activation='sigmoid')\n",
219 |     "        \n",
220 |     "    def bulid_model(self):\n",
221 |     "        all_inputs = [self.dense_inputs, self.sparse_inputs]\n",
222 |     "        \n",
223 |     "        concat_dense_inputs = Concatenate(axis=1)(self.dense_inputs)\n",
224 |     "        \n",
225 |     "        concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n",
226 |     "        flatten_sparse_embeds = Flatten()(concat_sparse_embeds)\n",
227 |     "        \n",
228 |     "        concat_inputs = Concatenate(axis=1)([flatten_sparse_embeds, concat_dense_inputs])\n",
229 |     "        cross_output = self.cross_layer(concat_inputs)\n",
230 |     "        \n",
231 |     "        fc_layer_output = self.DNN(concat_sparse_embeds)\n",
232 |     "        \n",
233 |     "        # 输出部分\n",
234 |     "        concat_layer = Concatenate()([cross_output, fc_layer_output])\n",
235 |     "        output = self.dense(concat_layer)\n",
236 |     "        \n",
237 |     "        model = Model(inputs=all_inputs, outputs=output)\n",
238 |     "        return model"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 19,
244 |    "metadata": {
245 |     "scrolled": true
246 |    },
247 |    "outputs": [
248 |     {
249 |      "name": "stdout",
250 |      "output_type": "stream",
251 |      "text": [
252 |       "Epoch 1/3\n",
253 |       "WARNING:tensorflow:From /root/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
254 |       "Instructions for updating:\n",
255 |       "If using Keras pass *_constraint arguments to layers.\n",
256 |       "1954/1954 [==============================] - 111s 57ms/step - loss: 0.4835 - binary_crossentropy: 0.4835 - auc: 0.7580 - val_loss: 0.4794 - val_binary_crossentropy: 0.4794 - val_auc: 0.7693 - lr: 0.0010\n",
257 |       "Epoch 2/3\n",
258 |       "1954/1954 [==============================] - 111s 57ms/step - loss: 0.4586 - binary_crossentropy: 0.4586 - auc: 0.7898 - val_loss: 0.4993 - val_binary_crossentropy: 0.4993 - val_auc: 0.7686 - lr: 0.0010\n",
259 |       "Epoch 3/3\n",
260 |       "1954/1954 [==============================] - 110s 56ms/step - loss: 0.4238 - binary_crossentropy: 0.4238 - auc: 0.8248 - val_loss: 0.5280 - val_binary_crossentropy: 0.5280 - val_auc: 0.7396 - lr: 0.0010\n"
261 |      ]
262 |     },
263 |     {
264 |      "data": {
265 |       "text/plain": [
266 |        "<tensorflow.python.keras.callbacks.History at 0x7f5d7f7d48d0>"
267 |       ]
268 |      },
269 |      "execution_count": 19,
270 |      "metadata": {},
271 |      "output_type": "execute_result"
272 |     }
273 |    ],
274 |    "source": [
275 |     "train_data = total_data.loc[:500000-1]\n",
276 |     "valid_data = total_data.loc[500000:]\n",
277 |     "\n",
278 |     "train_dense_x_all = [train_data[f].values for f in dense_feats]\n",
279 |     "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n",
280 |     "train_label_all = train_data[['label']].values\n",
281 |     "\n",
282 |     "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n",
283 |     "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n",
284 |     "val_label_all = valid_data[['label']].values\n",
285 |     "\n",
286 |     "\n",
287 |     "model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
288 |     "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n",
289 |     "              metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n",
290 |     "\n",
291 |     "os.makedirs('checkpoints', exist_ok=True)\n",
292 |     "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n",
293 |     "                              mode='max', save_weights_only=True)# save_best_only=True\n",
294 |     "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n",
295 |     "def scheduler(epoch):\n",
296 |     "    thred = 10\n",
297 |     "    if epoch < thred:\n",
298 |     "        return 0.001\n",
299 |     "    else:\n",
300 |     "        return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n",
301 |     "lr_schedule = LearningRateScheduler(scheduler)\n",
302 |     "callbacks = [early_stopping, lr_schedule, checkpoints] # \n",
303 |     "\n",
304 |     "\n",
305 |     "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n",
306 |     "         validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n",
307 |     "         callbacks=callbacks, epochs=3)"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 16,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "# # 加载模型\n",
317 |     "# model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
318 |     "# model.load_weights('checkpoints/model.h5')"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": []
327 |   }
328 |  ],
329 |  "metadata": {
330 |   "kernelspec": {
331 |    "display_name": "Python 3",
332 |    "language": "python",
333 |    "name": "python3"
334 |   },
335 |   "language_info": {
336 |    "codemirror_mode": {
337 |     "name": "ipython",
338 |     "version": 3
339 |    },
340 |    "file_extension": ".py",
341 |    "mimetype": "text/x-python",
342 |    "name": "python",
343 |    "nbconvert_exporter": "python",
344 |    "pygments_lexer": "ipython3",
345 |    "version": "3.7.6"
346 |   }
347 |  },
348 |  "nbformat": 4,
349 |  "nbformat_minor": 4
350 | }
351 | 


--------------------------------------------------------------------------------
/DIN_DIEN/DIEN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import json\n",
 11 |     "import pickle as pkl\n",
 12 |     "import random\n",
 13 |     "import gzip\n",
 14 |     "import tensorflow as tf\n",
 15 |     "from tensorflow.keras.layers import *\n",
 16 |     "from tensorflow.keras import Model\n",
 17 |     "from layers import Dice\n",
 18 |     "from utils import DataIterator, prepare_data"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "class EmbeddingLayer(Layer):\n",
 28 |     "    def __init__(self, user_count, item_count, cate_count, emb_dim, use_negsampling=False):\n",
 29 |     "        super().__init__()\n",
 30 |     "        self.emb_dim = emb_dim\n",
 31 |     "        self.use_negsampling = use_negsampling\n",
 32 |     "        self.user_emb = Embedding(user_count, self.emb_dim,\n",
 33 |     "                                  mask_zero=True, name=\"user_emb\")\n",
 34 |     "        self.item_emb = Embedding(item_count, self.emb_dim,\n",
 35 |     "                                  mask_zero=True, name=\"item_emb\")\n",
 36 |     "        self.cate_emb = Embedding(cate_count, self.emb_dim,\n",
 37 |     "                                  mask_zero=True, name=\"cate_emb\")\n",
 38 |     "        \n",
 39 |     "    def call(self, user, item, cate, item_his, cate_his,\n",
 40 |     "             noclick_item_his=[],  noclick_cate_his=[]):\n",
 41 |     "        user_emb = self.user_emb(user) # (B, D)\n",
 42 |     "        \n",
 43 |     "        # 基本属性embedding:\n",
 44 |     "        item_emb = self.item_emb(item) # (B, D)\n",
 45 |     "        cate_emb = self.cate_emb(cate) # (B, D)\n",
 46 |     "        item_join_emb = Concatenate(-1)([item_emb, cate_emb]) # (B, 2D)\n",
 47 |     "        \n",
 48 |     "        \n",
 49 |     "        # 历史行为序列embedding:\n",
 50 |     "        item_his_emb = self.item_emb(item_his) # (B, T, D)\n",
 51 |     "        cate_his_emb = self.item_emb(cate_his) # (B, T, D)\n",
 52 |     "        item_join_his_emb = Concatenate(-1)([item_his_emb, cate_his_emb]) # (B, T, 2D)\n",
 53 |     "        item_his_emb_sum = tf.reduce_sum(item_join_his_emb, axis=1) # (B, D)\n",
 54 |     "        \n",
 55 |     "        if self.use_negsampling:\n",
 56 |     "            # (B, T, neg_num, D)\n",
 57 |     "            noclick_item_his_emb = self.item_emb(noclick_item_his) \n",
 58 |     "            # (B, T, neg_num, D)\n",
 59 |     "            noclick_cate_his_emb = self.item_emb(noclick_cate_his) \n",
 60 |     "            # (B, T, neg_num, 2D)\n",
 61 |     "            noclick_item_join_his_emb = Concatenate(-1)([noclick_item_his_emb, noclick_cate_his_emb])\n",
 62 |     "            # (B, T, 2D)\n",
 63 |     "            noclick_item_emb_neg_sum = tf.reduce_sum(noclick_item_join_his_emb, axis=2) \n",
 64 |     "            # (B, 2D)\n",
 65 |     "            noclick_item_his_emb_sum = tf.reduce_sum(noclick_item_emb_neg_sum, axis=1) \n",
 66 |     "            # 只取出第一个负样本构成序列，(B, T, 2D)\n",
 67 |     "            noclick_item_join_his_emb = noclick_item_join_his_emb[:, :, 0, :] \n",
 68 |     "            # # (B, T, 2D)\n",
 69 |     "            # noclick_item_join_his_emb = tf.squeeze(noclick_item_join_his_emb, 2)\n",
 70 |     "            \n",
 71 |     "            return user_emb, item_join_emb, \\\n",
 72 |     "                    item_join_his_emb, item_his_emb_sum, \\\n",
 73 |     "                    noclick_item_join_his_emb, noclick_item_his_emb_sum \n",
 74 |     "            \n",
 75 |     "        return user_emb, item_join_emb, \\\n",
 76 |     "                item_join_his_emb, item_his_emb_sum\n",
 77 |     "        "
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 13,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "class FCLayer(Layer):\n",
 87 |     "    def __init__(self, hid_dims=[80, 40, 2], use_dice=False):\n",
 88 |     "        super().__init__()\n",
 89 |     "        self.hid_dims = hid_dims\n",
 90 |     "        self.use_dice = use_dice\n",
 91 |     "        self.bn = BatchNormalization()\n",
 92 |     "        self.fc = []\n",
 93 |     "        self.dice = []\n",
 94 |     "        for dim in self.hid_dims[:-1]:\n",
 95 |     "            if use_dice:\n",
 96 |     "                self.fc.append(Dense(dim, name=f'dense_{dim}'))\n",
 97 |     "                self.dice.append(Dice())\n",
 98 |     "            else:\n",
 99 |     "                self.fc.append(Dense(dim, activation=\"sigmoid\", \n",
100 |     "                                     name=f'dense_{dim}'))\n",
101 |     "        self.fc.append(Dense(self.hid_dims[-1], name=\"dense_output\"))\n",
102 |     "        \n",
103 |     "    def call(self, inputs):\n",
104 |     "        inputs = self.bn(inputs)\n",
105 |     "        if self.use_dice:\n",
106 |     "            fc_out = inputs\n",
107 |     "            for i in range(len(self.dice)):\n",
108 |     "                fc_out = self.fc[i](fc_out)\n",
109 |     "                fc_out = self.dice[i](fc_out)\n",
110 |     "            fc_out = self.fc[-1](fc_out)\n",
111 |     "            return fc_out\n",
112 |     "        else: \n",
113 |     "            fc_out = self.fc[0](inputs)\n",
114 |     "            for fc in self.fc[1:]:\n",
115 |     "                fc_out = fc(fc_out)\n",
116 |     "            return fc_out"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 14,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "# 计算注意力得分\n",
126 |     "class DINAttenLayer(Layer):\n",
127 |     "    def __init__(self, hid_dims=[80, 40, 1]):\n",
128 |     "        super().__init__()\n",
129 |     "        self.FCLayer = FCLayer(hid_dims)\n",
130 |     "        \n",
131 |     "    def call(self, query, facts, mask):\n",
132 |     "        \"\"\"\n",
133 |     "        query: (B, 2D)\n",
134 |     "        facts: (B, T, 2D)\n",
135 |     "        mask: (B, T)\n",
136 |     "        \"\"\"\n",
137 |     "        mask = tf.equal(mask, tf.ones_like(mask)) # (B, T)\n",
138 |     "        queries = tf.tile(query, [1, facts.shape[1]]) # (B, 2D*T)\n",
139 |     "        queries = tf.reshape(queries, [-1, facts.shape[1], facts.shape[2]]) # # (B, T, 2D)\n",
140 |     "        # (B, T, 2D*4)\n",
141 |     "        din_all = tf.concat([queries, facts, queries - facts, queries * facts], axis=-1)\n",
142 |     "        \n",
143 |     "        fc_out = self.FCLayer(din_all) # (B, T, 1)\n",
144 |     "        score = fc_out # (B, T, 1)\n",
145 |     "        key_masks = tf.expand_dims(mask, 2) # (B, T) -> (B, T, 1)\n",
146 |     "        padding = tf.ones_like(score) * (-2**32 + 1)\n",
147 |     "        # True的地方为score，否则为极大的负数\n",
148 |     "        score = tf.where(key_masks, score, padding) # (B, T, 1)\n",
149 |     "        score = tf.nn.softmax(score) # (B, T, 1)\n",
150 |     "        \n",
151 |     "        return score\n",
152 |     "        "
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 15,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "class AuxTrainLayer(Layer):\n",
162 |     "    def __init__(self, hid_dims=[100, 50, 1]):\n",
163 |     "        super().__init__()\n",
164 |     "        self.clk_fc = FCLayer(hid_dims)\n",
165 |     "        self.noclk_fc = FCLayer(hid_dims)\n",
166 |     "        \n",
167 |     "    def call(self, h_states, click_seq, noclick_seq, mask):\n",
168 |     "        mask = tf.cast(mask, tf.float32)\n",
169 |     "        seq_len = click_seq.shape[1] # T-1\n",
170 |     "        \n",
171 |     "        clk_input = tf.concat([h_states, click_seq], -1) # (B, T-1, 2D*2)\n",
172 |     "        clk_prob = tf.sigmoid(self.clk_fc(clk_input)) # (B, T-1, 1)\n",
173 |     "        # (B, T-1)\n",
174 |     "        clk_loss = - tf.reshape(tf.math.log(clk_prob), [-1, seq_len]) * mask \n",
175 |     "        \n",
176 |     "        noclk_input = tf.concat([h_states, noclick_seq], -1) # (B, T-1, 2D*2)\n",
177 |     "        noclk_prob = tf.sigmoid(self.clk_fc(noclk_input)) # (B, T-1, 1)\n",
178 |     "        # (B, T-1)\n",
179 |     "        noclk_loss = - tf.reshape(tf.math.log(1.0 - noclk_prob), [-1, seq_len]) * mask\n",
180 |     "        # 不指定axis，则计算全部数值的平均值\n",
181 |     "        aux_loss = tf.reduce_mean(clk_loss + noclk_loss)\n",
182 |     "        return aux_loss\n",
183 |     "        "
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 16,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "class AUGRUCell(Layer):\n",
193 |     "    def __init__(self, units):\n",
194 |     "        super().__init__()\n",
195 |     "        self.units = units\n",
196 |     "        # 作为一个 RNN 的单元，必须有state_size属性\n",
197 |     "        # state_size 表示每个时间步输出的维度\n",
198 |     "        self.state_size = units\n",
199 |     "    \n",
200 |     "    \n",
201 |     "    def build(self, input_shape):\n",
202 |     "        # 输入数据是一个tupe: (gru_output, atten_scores)\n",
203 |     "        # 因此，t时刻输入的x_t的维度为：\n",
204 |     "        dim_xt = input_shape[0][-1]\n",
205 |     "        \n",
206 |     "        # 重置门对t时刻输入数据x的权重参数：\n",
207 |     "        self.W_R_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_R_x')\n",
208 |     "        # 重置门对t时刻输入隐藏状态state的权重参数：\n",
209 |     "        self.W_R_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_R_s')\n",
210 |     "        # 重置门偏置项参数：\n",
211 |     "        self.W_R_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_R_b')\n",
212 |     "        \n",
213 |     "        \n",
214 |     "        # 更新门对t时刻输入数据x的权重参数：\n",
215 |     "        self.W_U_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_U_x')\n",
216 |     "        # 更新门对t时刻输入隐藏状态state的权重参数：\n",
217 |     "        self.W_U_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_U_s')\n",
218 |     "        # 更新门偏置项参数：\n",
219 |     "        self.W_U_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_U_b')\n",
220 |     "        \n",
221 |     "        \n",
222 |     "        # 候选隐藏状态 ~h_t 对t时刻输入数据x的权重参数：\n",
223 |     "        self.W_H_x = tf.Variable(tf.random.normal(shape=[dim_xt, self.units]), name='W_H_x')\n",
224 |     "        # 候选隐藏状态 ~h_t 对t时刻输入隐藏状态state的权重参数：\n",
225 |     "        self.W_H_s = tf.Variable(tf.random.normal(shape=[self.units, self.units]), name='W_H_s')\n",
226 |     "        # 候选隐藏状态 ~h_t 偏置项参数：\n",
227 |     "        self.W_H_b = tf.Variable(tf.random.normal(shape=[self.units]), name='W_H_b')\n",
228 |     "        \n",
229 |     "    \n",
230 |     "    def call(self, inputs, states):\n",
231 |     "        x_t, att_score = inputs\n",
232 |     "        states = states[0]\n",
233 |     "        \"\"\"\n",
234 |     "        x_t: x_(t), shape=(B, 2D)\n",
235 |     "        states: hidden_state_(t-1), shape=(B, units)\n",
236 |     "        att_score: attention_score_(t),  shape=(B, 1)\n",
237 |     "        \"\"\"\n",
238 |     "        # 重置门\n",
239 |     "        r_t = tf.sigmoid(tf.matmul(x_t, self.W_R_x) + tf.matmul(states, self.W_R_s) + self.W_R_b)\n",
240 |     "        # 更新门\n",
241 |     "        u_t = tf.sigmoid(tf.matmul(x_t, self.W_U_x) + tf.matmul(states, self.W_U_s) + self.W_U_b)\n",
242 |     "        # 带有注意力的更新门\n",
243 |     "        a_u_t = tf.multiply(att_score, u_t)\n",
244 |     "        # 候选隐藏状态\n",
245 |     "        _h_t = tf.tanh(tf.matmul(x_t, self.W_H_x) + tf.matmul(tf.multiply(r_t, states), self.W_H_s) \n",
246 |     "                       + self.W_H_b)\n",
247 |     "        # 输出值\n",
248 |     "        h_t = tf.multiply(1-a_u_t, states) + tf.multiply(a_u_t, _h_t)\n",
249 |     "        # 对gru而言，当前时刻的output与传递给下一时刻的state相同\n",
250 |     "        next_state = h_t\n",
251 |     "        \n",
252 |     "        \n",
253 |     "        return h_t, next_state # 第一个表示output\n",
254 |     "        \n",
255 |     "        "
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 17,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": [
264 |     "# 得到历史行为的embedding表示\n",
265 |     "class DIEN(Model):\n",
266 |     "    def __init__(self, user_count, item_count, cate_count, EMBEDDING_DIM, \n",
267 |     "                 HIS_LEN = 100, use_negsampling = True, hid_dims=[200, 80, 2]):\n",
268 |     "        super().__init__()\n",
269 |     "        \n",
270 |     "        self.rnn_dim = EMBEDDING_DIM*2\n",
271 |     "        \n",
272 |     "        self.EmbLayer = EmbeddingLayer(user_count, item_count, cate_count, \n",
273 |     "                                       EMBEDDING_DIM, use_negsampling)\n",
274 |     "        \n",
275 |     "        self.GRU = GRU(self.rnn_dim, return_sequences=True)\n",
276 |     "        self.AuxTrainLayer = AuxTrainLayer()\n",
277 |     "        self.AttenLayer = DINAttenLayer()\n",
278 |     "        # self.AUGRU = AUGRU(EMBEDDING_DIM*2, return_state=True)\n",
279 |     "        self.AUGRU = RNN(AUGRUCell(self.rnn_dim))\n",
280 |     "        self.FCLayer = FCLayer(hid_dims, use_dice=True)\n",
281 |     "        \n",
282 |     "        \n",
283 |     "    def call(self, user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his):\n",
284 |     "        # 转 0, 1 为 True, False \n",
285 |     "        mask_bool = tf.cast(mask, tf.bool)\n",
286 |     "        # 得到embedding\n",
287 |     "        embs = self.EmbLayer(user, item, cate, item_his, cate_his, no_m_his, no_c_his)\n",
288 |     "        # (B, 2D) \n",
289 |     "        user_emb, item_emb, his_emb, his_emb_sum, noclk_his_emb, noclk_his_emb_sum = embs\n",
290 |     "        \n",
291 |     "        \n",
292 |     "        # 第一层 GRU\n",
293 |     "        # tf2.2中的大坑：\n",
294 |     "        # 官方文档中第二个参数为mask，\n",
295 |     "        # 但是不指定参数名字mask=mask_bool的话，\n",
296 |     "        # 则mask_bool会当成参数initial_state的值\n",
297 |     "        gru_output = self.GRU(his_emb, mask=mask_bool) # (B, T, 2D)\n",
298 |     "        # 辅助损失函数\n",
299 |     "        aux_loss = self.AuxTrainLayer(gru_output[:, :-1, :], \n",
300 |     "                                      his_emb[:, 1:, :],\n",
301 |     "                                      noclk_his_emb[:, 1:, :],\n",
302 |     "                                      mask[:, 1:]) # (B,)\n",
303 |     "        \n",
304 |     "        # 计算目标item与历史item的attention分数\n",
305 |     "        atten_scores = self.AttenLayer(item_emb, gru_output, mask) # (B, T, 1)\n",
306 |     "        \n",
307 |     "        # AUGRU\n",
308 |     "        behavior_emb = self.AUGRU((gru_output, atten_scores), mask=mask_bool) # (B, 2D) \n",
309 |     "        \n",
310 |     "        # 全连接层\n",
311 |     "        inp = tf.concat([user_emb, item_emb, his_emb_sum, behavior_emb, \n",
312 |     "                         noclk_his_emb_sum], axis=-1)\n",
313 |     "        output = self.FCLayer(inp)\n",
314 |     "        logit = tf.nn.softmax(output)\n",
315 |     "        return output, logit, aux_loss\n",
316 |     "    \n",
317 |     "    def train(self, user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his, target):\n",
318 |     "        output, _, aux_loss = self.call(user, item, cate, item_his, cate_his, mask, no_m_his, no_c_his)\n",
319 |     "        loss = tf.keras.losses.categorical_crossentropy(target, output, from_logits=False)\n",
320 |     "        loss = tf.reduce_mean(loss)\n",
321 |     "        return loss, aux_loss\n",
322 |     "        \n",
323 |     "    def predict(self, user, item, cate, item_his, cate_his, mask):\n",
324 |     "        _, pred, _ = self.call(user, item, cate, item_his, cate_his, mask)\n",
325 |     "        return pred"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 18,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "base_path = \"data/\"\n",
335 |     "train_file = base_path + \"local_train_splitByUser\"\n",
336 |     "test_file = base_path + \"local_test_splitByUser\"\n",
337 |     "uid_voc = base_path + \"uid_voc.pkl\"\n",
338 |     "mid_voc = base_path + \"mid_voc.pkl\"\n",
339 |     "cat_voc = base_path + \"cat_voc.pkl\"\n",
340 |     "batch_size = 128\n",
341 |     "maxlen = 100\n",
342 |     "\n",
343 |     "train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, \n",
344 |     "                          batch_size, maxlen, shuffle_each_epoch=False)\n",
345 |     "\n",
346 |     "n_uid, n_mid, n_cat = train_data.get_n() # 用户数，电影数，类别数"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 20,
352 |    "metadata": {
353 |     "scrolled": true
354 |    },
355 |    "outputs": [
356 |     {
357 |      "name": "stdout",
358 |      "output_type": "stream",
359 |      "text": [
360 |       "batch 0 loss 2.068039, aux loss 1.479113\n",
361 |       "batch 10 loss 0.737447, aux loss 1.313956\n",
362 |       "batch 20 loss 0.695979, aux loss 1.392124\n",
363 |       "batch 30 loss 0.693791, aux loss 1.234638\n",
364 |       "batch 40 loss 0.694052, aux loss 1.386228\n",
365 |       "batch 50 loss 0.693868, aux loss 1.237043\n",
366 |       "batch 60 loss 0.695745, aux loss 1.386409\n",
367 |       "batch 70 loss 0.691482, aux loss 1.265594\n",
368 |       "batch 80 loss 0.695219, aux loss 1.385818\n",
369 |       "batch 90 loss 0.693376, aux loss 1.352869\n",
370 |       "batch 100 loss 0.694469, aux loss 1.383801\n",
371 |       "batch 110 loss 0.694890, aux loss 1.273229\n",
372 |       "batch 120 loss 0.699449, aux loss 1.385453\n",
373 |       "batch 130 loss 0.694843, aux loss 1.351792\n",
374 |       "batch 140 loss 0.698159, aux loss 1.382274\n",
375 |       "batch 150 loss 0.689563, aux loss 1.388619\n",
376 |       "batch 160 loss 0.691783, aux loss 1.387143\n",
377 |       "batch 170 loss 0.695458, aux loss 1.229023\n",
378 |       "batch 180 loss 0.691120, aux loss 1.390311\n",
379 |       "batch 190 loss 0.693030, aux loss 1.242190\n",
380 |       "batch 200 loss 0.694106, aux loss 1.375872\n",
381 |       "batch 210 loss 0.690383, aux loss 1.282734\n",
382 |       "batch 220 loss 0.691290, aux loss 1.378154\n",
383 |       "batch 230 loss 0.690240, aux loss 1.293406\n",
384 |       "batch 240 loss 0.694030, aux loss 1.379894\n",
385 |       "batch 250 loss 0.697263, aux loss 1.302448\n",
386 |       "batch 260 loss 0.694523, aux loss 1.380266\n",
387 |       "batch 270 loss 0.689354, aux loss 1.306656\n",
388 |       "batch 280 loss 0.688328, aux loss 1.382248\n",
389 |       "batch 290 loss 0.693213, aux loss 1.270545\n",
390 |       "batch 300 loss 0.694994, aux loss 1.380146\n",
391 |       "batch 310 loss 0.692642, aux loss 1.236873\n",
392 |       "batch 320 loss 0.689740, aux loss 1.382414\n",
393 |       "batch 330 loss 0.691055, aux loss 1.355132\n",
394 |       "batch 340 loss 0.690439, aux loss 1.378826\n",
395 |       "batch 350 loss 0.696317, aux loss 1.305768\n",
396 |       "batch 360 loss 0.692655, aux loss 1.380104\n",
397 |       "batch 370 loss 0.683240, aux loss 1.282599\n",
398 |       "batch 380 loss 0.694989, aux loss 1.376244\n",
399 |       "batch 390 loss 0.693351, aux loss 1.324468\n",
400 |       "batch 400 loss 0.686526, aux loss 1.380373\n",
401 |       "batch 410 loss 0.690881, aux loss 1.349163\n",
402 |       "batch 420 loss 0.686478, aux loss 1.374557\n",
403 |       "batch 430 loss 0.684616, aux loss 1.378559\n",
404 |       "batch 440 loss 0.680370, aux loss 1.373135\n",
405 |       "batch 450 loss 0.672189, aux loss 1.330817\n",
406 |       "batch 460 loss 0.704011, aux loss 1.378112\n",
407 |       "batch 470 loss 0.666249, aux loss 1.338828\n",
408 |       "batch 480 loss 0.686237, aux loss 1.386753\n",
409 |       "batch 490 loss 0.672048, aux loss 1.237627\n",
410 |       "batch 500 loss 0.689416, aux loss 1.375439\n",
411 |       "batch 510 loss 0.679942, aux loss 1.274050\n",
412 |       "batch 520 loss 0.684325, aux loss 1.384659\n",
413 |       "batch 530 loss 0.684855, aux loss 1.281043\n",
414 |       "batch 540 loss 0.660100, aux loss 1.394137\n",
415 |       "batch 550 loss 0.687806, aux loss 1.240736\n",
416 |       "batch 560 loss 0.690218, aux loss 1.380075\n",
417 |       "batch 570 loss 0.673716, aux loss 1.320786\n",
418 |       "batch 580 loss 0.693648, aux loss 1.368184\n",
419 |       "batch 590 loss 0.696522, aux loss 1.289395\n",
420 |       "batch 600 loss 0.679908, aux loss 1.387056\n",
421 |       "batch 610 loss 0.709319, aux loss 1.291360\n",
422 |       "batch 620 loss 0.686557, aux loss 1.369841\n",
423 |       "batch 630 loss 0.674588, aux loss 1.263717\n",
424 |       "batch 640 loss 0.692565, aux loss 1.350408\n",
425 |       "batch 650 loss 0.683875, aux loss 1.221066\n",
426 |       "batch 660 loss 0.688051, aux loss 1.356015\n",
427 |       "batch 670 loss 0.674489, aux loss 1.261728\n",
428 |       "batch 680 loss 0.682079, aux loss 1.373235\n",
429 |       "batch 690 loss 0.678413, aux loss 1.279788\n",
430 |       "batch 700 loss 0.696760, aux loss 1.372520\n",
431 |       "batch 710 loss 0.681217, aux loss 1.269751\n",
432 |       "batch 720 loss 0.677947, aux loss 1.359509\n",
433 |       "batch 730 loss 0.668031, aux loss 1.331041\n",
434 |       "batch 740 loss 0.666014, aux loss 1.342387\n",
435 |       "batch 750 loss 0.682697, aux loss 1.269510\n",
436 |       "batch 760 loss 0.656295, aux loss 1.341351\n",
437 |       "batch 770 loss 0.707546, aux loss 1.251843\n",
438 |       "batch 780 loss 0.674079, aux loss 1.345248\n",
439 |       "batch 790 loss 0.657360, aux loss 1.243539\n",
440 |       "batch 800 loss 0.652130, aux loss 1.358508\n",
441 |       "batch 810 loss 0.663262, aux loss 1.269382\n",
442 |       "batch 820 loss 0.660223, aux loss 1.349433\n",
443 |       "batch 830 loss 0.662249, aux loss 1.220359\n",
444 |       "batch 840 loss 0.682940, aux loss 1.371676\n",
445 |       "batch 850 loss 0.671639, aux loss 1.279638\n",
446 |       "batch 860 loss 0.690707, aux loss 1.366297\n",
447 |       "batch 870 loss 0.664772, aux loss 1.311729\n",
448 |       "batch 880 loss 0.653953, aux loss 1.337097\n",
449 |       "batch 890 loss 0.645285, aux loss 1.337443\n",
450 |       "batch 900 loss 0.689522, aux loss 1.330288\n",
451 |       "batch 910 loss 0.658664, aux loss 1.241068\n",
452 |       "batch 920 loss 0.677607, aux loss 1.334146\n",
453 |       "batch 930 loss 0.654895, aux loss 1.277277\n",
454 |       "batch 940 loss 0.667736, aux loss 1.347373\n",
455 |       "batch 950 loss 0.640414, aux loss 1.171708\n",
456 |       "batch 960 loss 0.687433, aux loss 1.355712\n",
457 |       "batch 970 loss 0.661177, aux loss 1.277527\n",
458 |       "batch 980 loss 0.672078, aux loss 1.361447\n",
459 |       "batch 990 loss 0.664237, aux loss 1.194987\n",
460 |       "batch 1000 loss 0.708602, aux loss 1.360277\n"
461 |      ]
462 |     }
463 |    ],
464 |    "source": [
465 |     "model = DIEN(n_uid, n_mid, n_cat, 16)\n",
466 |     "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)\n",
467 |     "\n",
468 |     "# 训练模型\n",
469 |     "for i, (src, tgt) in enumerate(train_data):\n",
470 |     "    data = prepare_data(src, tgt, maxlen=100, return_neg=True)\n",
471 |     "    uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, no_m_his, no_c_his = data\n",
472 |     "    with tf.GradientTape() as tape:\n",
473 |     "        loss, aux_loss = model.train(uids, mids, cats, mid_his, cat_his, \n",
474 |     "                                     mid_mask, no_m_his, no_c_his, target)\n",
475 |     "        if i%10 == 0:\n",
476 |     "            print(\"batch %d loss %f, aux loss %f\" % (i, loss.numpy(), aux_loss.numpy()))\n",
477 |     "            \n",
478 |     "        loss = loss + aux_loss\n",
479 |     "    grads = tape.gradient(loss, model.trainable_variables)\n",
480 |     "    optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))\n",
481 |     "    \n",
482 |     "    if i == 1000:\n",
483 |     "        break\n"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": null,
489 |    "metadata": {},
490 |    "outputs": [],
491 |    "source": []
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": null,
496 |    "metadata": {},
497 |    "outputs": [],
498 |    "source": []
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "metadata": {},
504 |    "outputs": [],
505 |    "source": []
506 |   }
507 |  ],
508 |  "metadata": {
509 |   "kernelspec": {
510 |    "display_name": "Python 3",
511 |    "language": "python",
512 |    "name": "python3"
513 |   },
514 |   "language_info": {
515 |    "codemirror_mode": {
516 |     "name": "ipython",
517 |     "version": 3
518 |    },
519 |    "file_extension": ".py",
520 |    "mimetype": "text/x-python",
521 |    "name": "python",
522 |    "nbconvert_exporter": "python",
523 |    "pygments_lexer": "ipython3",
524 |    "version": "3.7.6"
525 |   }
526 |  },
527 |  "nbformat": 4,
528 |  "nbformat_minor": 4
529 | }
530 | 


--------------------------------------------------------------------------------
/DIN_DIEN/DIN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import json\n",
 11 |     "import pickle as pkl\n",
 12 |     "import random\n",
 13 |     "import gzip\n",
 14 |     "import tensorflow as tf\n",
 15 |     "from tensorflow.keras.layers import *\n",
 16 |     "from tensorflow.keras import Model\n",
 17 |     "from layers import Dice\n",
 18 |     "from utils import DataIterator, prepare_data"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "class EmbeddingLayer(Layer):\n",
 28 |     "    def __init__(self, user_count, item_count, cate_count, emb_dim, use_negsampling=False):\n",
 29 |     "        super().__init__()\n",
 30 |     "        self.emb_dim = emb_dim\n",
 31 |     "        self.use_negsampling = use_negsampling\n",
 32 |     "        self.user_emb = Embedding(user_count, self.emb_dim, name=\"user_emb\")\n",
 33 |     "        self.item_emb = Embedding(item_count, self.emb_dim, name=\"item_emb\")\n",
 34 |     "        self.cate_emb = Embedding(cate_count, self.emb_dim, name=\"cate_emb\")\n",
 35 |     "        \n",
 36 |     "    def call(self, user, item, cate, item_his, cate_his,\n",
 37 |     "             noclick_item_his=[],  noclick_cate_hiss=[]):\n",
 38 |     "        user_emb = self.user_emb(user) # (B, D)\n",
 39 |     "        \n",
 40 |     "        # 基本属性embedding:\n",
 41 |     "        item_emb = self.item_emb(item) # (B, D)\n",
 42 |     "        cate_emb = self.cate_emb(cate) # (B, D)\n",
 43 |     "        item_join_emb = Concatenate(-1)([item_emb, cate_emb]) # (B, 2D)\n",
 44 |     "        \n",
 45 |     "        \n",
 46 |     "        # 历史行为序列embedding:\n",
 47 |     "        item_his_emb = self.item_emb(item_his) # (B, T, D)\n",
 48 |     "        cate_his_emb = self.item_emb(cate_his) # (B, T, D)\n",
 49 |     "        item_join_his_emb = Concatenate(-1)([item_his_emb, cate_his_emb]) # (B, T, 2D)\n",
 50 |     "        item_his_emb_sum = tf.reduce_sum(item_join_his_emb, axis=1) # (B, D)\n",
 51 |     "        \n",
 52 |     "        if self.use_negsampling:\n",
 53 |     "            # (B, T, neg_num, D)\n",
 54 |     "            noclick_item_his_emb = self.item_emb(noclick_item_his) \n",
 55 |     "            # (B, T, neg_num, D)\n",
 56 |     "            noclick_cate_his_emb = self.item_emb(noclick_cate_his) \n",
 57 |     "            # (B, T, neg_num, 2D)\n",
 58 |     "            noclick_item_join_his_emb = Concatenate(-1)([noclick_item_his_emb, noclick_cate_his_emb])\n",
 59 |     "            # (B, T, 2D)\n",
 60 |     "            noclick_item_emb_neg_sum = tf.reduce_sum(noclick_item_join_his_emb, axis=2) \n",
 61 |     "            # (B, 2D)\n",
 62 |     "            noclick_item_his_emb_sum = tf.reduce_sum(noclick_item_emb_neg_sum, axis=1) \n",
 63 |     "            \n",
 64 |     "            return user_emb, item_join_emb, \\\n",
 65 |     "                    item_join_his_emb, item_his_emb_sum, \\\n",
 66 |     "                    noclick_item_join_his_emb, noclick_item_his_emb_sum \n",
 67 |     "            \n",
 68 |     "        return user_emb, item_join_emb, \\\n",
 69 |     "                item_join_his_emb, item_his_emb_sum\n",
 70 |     "        "
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 3,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "class FCLayer(Layer):\n",
 80 |     "    def __init__(self, hid_dims=[80, 40, 2], use_dice=False):\n",
 81 |     "        super().__init__()\n",
 82 |     "        self.hid_dims = hid_dims\n",
 83 |     "        self.use_dice = use_dice\n",
 84 |     "        self.fc = []\n",
 85 |     "        self.dice = []\n",
 86 |     "        for dim in self.hid_dims[:-1]:\n",
 87 |     "            if use_dice:\n",
 88 |     "                self.fc.append(Dense(dim, name=f'dense_{dim}'))\n",
 89 |     "                self.dice.append(Dice())\n",
 90 |     "            else:\n",
 91 |     "                self.fc.append(Dense(dim, activation=\"sigmoid\", \n",
 92 |     "                                     name=f'dense_{dim}'))\n",
 93 |     "        self.fc.append(Dense(self.hid_dims[-1], name=\"dense_output\"))\n",
 94 |     "        \n",
 95 |     "    def call(self, inputs):\n",
 96 |     "        if self.use_dice:\n",
 97 |     "            fc_out = inputs\n",
 98 |     "            for i in range(len(self.dice)):\n",
 99 |     "                fc_out = self.fc[i](fc_out)\n",
100 |     "                fc_out = self.dice[i](fc_out)\n",
101 |     "            fc_out = self.fc[-1](fc_out)\n",
102 |     "            return fc_out\n",
103 |     "        else: \n",
104 |     "            fc_out = self.fc[0](inputs)\n",
105 |     "            for fc in self.fc[1:]:\n",
106 |     "                fc_out = fc(fc_out)\n",
107 |     "            return fc_out"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 4,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "# 计算注意力得分\n",
117 |     "class DINAttenLayer(Layer):\n",
118 |     "    def __init__(self, hid_dims=[80, 40, 1]):\n",
119 |     "        super().__init__()\n",
120 |     "        self.FCLayer = FCLayer(hid_dims)\n",
121 |     "        \n",
122 |     "    def call(self, query, facts, mask):\n",
123 |     "        \"\"\"\n",
124 |     "        query: (B, 2D)\n",
125 |     "        facts: (B, T, 2D)\n",
126 |     "        mask: (B, T)\n",
127 |     "        \"\"\"\n",
128 |     "        mask = tf.equal(mask, tf.ones_like(mask)) # (B, T)\n",
129 |     "        queries = tf.tile(query, [1, facts.shape[1]]) # (B, 2D*T)\n",
130 |     "        queries = tf.reshape(queries, [-1, facts.shape[1], facts.shape[2]]) # # (B, T, 2D)\n",
131 |     "        # print(\"queries\", queries.shape)\n",
132 |     "        # (B, T, 2D*4)\n",
133 |     "        din_all = tf.concat([queries, facts, queries - facts, queries * facts], axis=-1)\n",
134 |     "        \n",
135 |     "        fc_out = self.FCLayer(din_all) # (B, T, 1)\n",
136 |     "        score = fc_out # (B, T, 1)\n",
137 |     "        score = tf.reshape(score, [-1, 1, facts.shape[1]]) # (B, 1, T)\n",
138 |     "        \n",
139 |     "        key_masks = tf.expand_dims(mask, 1) # (B, 1, T)\n",
140 |     "        padding = tf.ones_like(score) * (-2**32 + 1)\n",
141 |     "        # True的地方为score，否则为极大的负数\n",
142 |     "        score = tf.where(key_masks, score, padding) # (B, 1, T)\n",
143 |     "        score = tf.nn.softmax(score)\n",
144 |     "        \n",
145 |     "        output = tf.matmul(score, facts) # (B, 1, 2D)\n",
146 |     "        output = tf.squeeze(output, 1) # (B, 2D)\n",
147 |     "        return output\n",
148 |     "        "
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 5,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "# 得到历史行为的embedding表示\n",
158 |     "class DIN(Model):\n",
159 |     "    def __init__(self, user_count, item_count, cate_count, EMBEDDING_DIM, \n",
160 |     "                 HIS_LEN = 100, use_negsampling = False, hid_dims=[200, 80, 2]):\n",
161 |     "        super().__init__()\n",
162 |     "        self.EmbLayer = EmbeddingLayer(user_count, item_count, cate_count, \n",
163 |     "                                       EMBEDDING_DIM, use_negsampling)\n",
164 |     "        self.AttenLayer = DINAttenLayer()\n",
165 |     "        self.FCLayer = FCLayer(hid_dims, use_dice=True)\n",
166 |     "        \n",
167 |     "        \n",
168 |     "    def call(self, user, item, cate, item_his, cate_his, mask):\n",
169 |     "        # 得到embedding\n",
170 |     "        embs = self.EmbLayer(user, item, cate, item_his, cate_his)\n",
171 |     "        # (B, 2D) \n",
172 |     "        user_emb, item_join_emb, item_join_his_emb, item_his_emb_sum = embs\n",
173 |     "        # 计算目标item与历史item的attention分数，然后加权求和，得到最终的embedding\n",
174 |     "        behavior_emb = self.AttenLayer(item_join_emb, item_join_his_emb, mask) # (B, 2D)\n",
175 |     "        \n",
176 |     "        # 全连接层\n",
177 |     "        inp = tf.concat([user_emb, item_join_emb, item_his_emb_sum, \n",
178 |     "                         item_his_emb_sum, behavior_emb], axis=-1)\n",
179 |     "        output = self.FCLayer(inp)\n",
180 |     "        # logit = tf.nn.softmax(output)\n",
181 |     "        return output # , logit\n",
182 |     "        "
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 6,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "base_path = \"data/\"\n",
192 |     "train_file = base_path + \"local_train_splitByUser\"\n",
193 |     "test_file = base_path + \"local_test_splitByUser\"\n",
194 |     "uid_voc = base_path + \"uid_voc.pkl\"\n",
195 |     "mid_voc = base_path + \"mid_voc.pkl\"\n",
196 |     "cat_voc = base_path + \"cat_voc.pkl\"\n",
197 |     "batch_size = 128\n",
198 |     "maxlen = 100\n",
199 |     "\n",
200 |     "train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, \n",
201 |     "                          batch_size, maxlen, shuffle_each_epoch=False)\n",
202 |     "\n",
203 |     "n_uid, n_mid, n_cat = train_data.get_n() # 用户数，电影数，类别数"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 7,
209 |    "metadata": {
210 |     "scrolled": true
211 |    },
212 |    "outputs": [
213 |     {
214 |      "name": "stdout",
215 |      "output_type": "stream",
216 |      "text": [
217 |       "batch 0 loss 3.620382\n",
218 |       "batch 100 loss 0.689255\n",
219 |       "batch 200 loss 0.671629\n",
220 |       "batch 300 loss 0.659545\n",
221 |       "batch 400 loss 0.709612\n",
222 |       "batch 500 loss 0.655639\n",
223 |       "batch 600 loss 0.636441\n"
224 |      ]
225 |     },
226 |     {
227 |      "ename": "KeyboardInterrupt",
228 |      "evalue": "",
229 |      "output_type": "error",
230 |      "traceback": [
231 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
232 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
233 |       "\u001b[0;32m<ipython-input-7-be26d1bcc4c5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0muids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcat_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_mask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m         \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcat_his\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmid_mask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlosses\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcategorical_crossentropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
234 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    966\u001b[0m           with base_layer_utils.autocast_context_manager(\n\u001b[1;32m    967\u001b[0m               self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    969\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    970\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
235 |       "\u001b[0;32m<ipython-input-5-496c6fcffcce>\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, user, item, cate, item_his, cate_his, mask)\u001b[0m\n\u001b[1;32m     21\u001b[0m         inp = tf.concat([user_emb, item_join_emb, item_his_emb_sum, \n\u001b[1;32m     22\u001b[0m                          item_his_emb_sum, behavior_emb], axis=-1)\n\u001b[0;32m---> 23\u001b[0;31m         \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFCLayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     24\u001b[0m         \u001b[0;31m# logit = tf.nn.softmax(output)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;31m# , logit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
236 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    966\u001b[0m           with base_layer_utils.autocast_context_manager(\n\u001b[1;32m    967\u001b[0m               self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    969\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    970\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
237 |       "\u001b[0;32m<ipython-input-3-5a1b15c90a83>\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     20\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m                 \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m                 \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdice\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m             \u001b[0mfc_out\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfc_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mfc_out\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
238 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    966\u001b[0m           with base_layer_utils.autocast_context_manager(\n\u001b[1;32m    967\u001b[0m               self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    969\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    970\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
239 |       "\u001b[0;32m~/Workspace/tensorflow2/DIN_DIEN/layers.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, _x)\u001b[0m\n\u001b[1;32m     50\u001b[0m         \u001b[0;31m# 标准化后使用 sigmoid 函数得到 x_p\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m         \u001b[0mx_p\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msigmoid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_normed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malpha\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1.0\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mx_p\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0m_x\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mx_p\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0m_x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
240 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36mbinary_op_wrapper\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m    982\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_scope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    983\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 984\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    985\u001b[0m       \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msparse_tensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSparseTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    986\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
241 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36m_add_dispatch\u001b[0;34m(x, y, name)\u001b[0m\n\u001b[1;32m   1274\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mgen_math_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1275\u001b[0m   \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mgen_math_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_v2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1278\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
242 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py\u001b[0m in \u001b[0;36madd_v2\u001b[0;34m(x, y, name)\u001b[0m\n\u001b[1;32m    469\u001b[0m       _result = pywrap_tfe.TFE_Py_FastPathExecute(\n\u001b[1;32m    470\u001b[0m         \u001b[0m_ctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_context_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"AddV2\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 471\u001b[0;31m         tld.op_callbacks, x, y)\n\u001b[0m\u001b[1;32m    472\u001b[0m       \u001b[0;32mreturn\u001b[0m \u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    473\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0m_core\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_FallbackException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
243 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
244 |      ]
245 |     }
246 |    ],
247 |    "source": [
248 |     "model = DIN(n_uid, n_mid, n_cat, 8)\n",
249 |     "optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n",
250 |     "\n",
251 |     "# 训练模型\n",
252 |     "for i, (src, tgt) in enumerate(train_data):\n",
253 |     "    data = prepare_data(src, tgt, maxlen=100, return_neg=False)\n",
254 |     "    uids, mids, cats, mid_his, cat_his, mid_mask, target, sl = data\n",
255 |     "    with tf.GradientTape() as tape:\n",
256 |     "        output = model(uids, mids, cats, mid_his, cat_his, mid_mask)\n",
257 |     "        loss = tf.keras.losses.categorical_crossentropy(target, output)\n",
258 |     "        loss = tf.reduce_mean(loss)\n",
259 |     "        if i%100 == 0:\n",
260 |     "            print(\"batch %d loss %f\" % (i, loss.numpy()))\n",
261 |     "    grads = tape.gradient(loss, model.variables)\n",
262 |     "    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))\n",
263 |     "    \n",
264 |     "    if i == 1000:\n",
265 |     "        break\n"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": []
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": []
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": []
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": []
295 |   }
296 |  ],
297 |  "metadata": {
298 |   "kernelspec": {
299 |    "display_name": "Python 3",
300 |    "language": "python",
301 |    "name": "python3"
302 |   },
303 |   "language_info": {
304 |    "codemirror_mode": {
305 |     "name": "ipython",
306 |     "version": 3
307 |    },
308 |    "file_extension": ".py",
309 |    "mimetype": "text/x-python",
310 |    "name": "python",
311 |    "nbconvert_exporter": "python",
312 |    "pygments_lexer": "ipython3",
313 |    "version": "3.7.6"
314 |   }
315 |  },
316 |  "nbformat": 4,
317 |  "nbformat_minor": 4
318 | }
319 | 


--------------------------------------------------------------------------------
/DIN_DIEN/data/get_data.txt:
--------------------------------------------------------------------------------
1 | https://github.com/mouna99/dien


--------------------------------------------------------------------------------
/DIN_DIEN/layers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import *
 3 | 
 4 | class Dice(Layer):
 5 |     def __init__(self, axis=-1, epsilon = 1e-10, name=""):
 6 |         super().__init__()
 7 |         self.axis = axis
 8 |         self.epsilon = epsilon
 9 |         
10 |     def build(self, input_shape):
11 |         rand = tf.random_normal_initializer()(shape=[input_shape[-1]])
12 |         self.alpha = tf.Variable(rand, dtype=tf.float32, name="alpha")
13 |         
14 |     
15 |     def call(self, _x):
16 |         # 输入数据的各个轴的维度
17 |         input_shape = list(_x.get_shape())
18 | 
19 |         # 需要进行reduce计算的轴
20 |         reduction_axes = list(range(len(input_shape)))
21 |         del reduction_axes[self.axis]
22 |         
23 |         # 能进行广播运算所需要的shape
24 |         # shape: (1, _x.shape[axis])
25 |         broadcast_shape = [1] * len(input_shape)
26 |         broadcast_shape[self.axis] = input_shape[self.axis]
27 |         
28 |         # 除了axis轴外所有数算均值
29 |         # shape: (_x.shape[axis], )
30 |         mean = tf.reduce_mean(_x, axis=reduction_axes)
31 |         # 然后还原为_x原来的维度，并且在axis轴进行广播
32 |         # shape: (1, _x.shape[axis])
33 |         brodcast_mean = tf.reshape(mean, broadcast_shape)
34 | 
35 |         # 除了axis轴外所有数算平方差
36 |         # shape: (_x.shape[axis], )
37 |         std = tf.reduce_mean(tf.square(_x - brodcast_mean) + self.epsilon, axis=reduction_axes)
38 |         # 算标准差
39 |         std = tf.sqrt(std)
40 |         # 然后还原为_x原来的维度，并且在axis轴进行广播
41 |         # shape: (1, _x.shape[axis])
42 |         brodcast_std = tf.reshape(std, broadcast_shape)
43 |         
44 |         # 标准化，_x的shape不变
45 |         x_normed = (_x - brodcast_mean) / (brodcast_std + self.epsilon)
46 |         
47 |         # #  以上操作可用下面的一句话代替：
48 |         # x_normed = BatchNormalization(center=False, scale=False)(_x)
49 |         
50 |         # 标准化后使用 sigmoid 函数得到 x_p
51 |         x_p = tf.sigmoid(x_normed)
52 |         return self.alpha * (1.0 - x_p) * _x + x_p * _x


--------------------------------------------------------------------------------
/DIN_DIEN/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import json
  3 | import pickle as pkl
  4 | import random
  5 | import gzip
  6 | 
  7 | 
  8 | class DataIterator:
  9 |     def __init__(self, source,
 10 |                  uid_voc,
 11 |                  mid_voc,
 12 |                  cat_voc,
 13 |                  batch_size=128,
 14 |                  maxlen=100,
 15 |                  skip_empty=False,
 16 |                  shuffle_each_epoch=False,
 17 |                  sort_by_length=True,
 18 |                  max_batch_size=20,
 19 |                  minlen=None):
 20 | 
 21 |         self.source = self.fopen(source, 'r')
 22 |         self.source_dicts = []
 23 |         # 用户、电影、类别对应的id
 24 |         for source_dict in [uid_voc, mid_voc, cat_voc]:
 25 |             self.source_dicts.append(self.load_dict(source_dict))
 26 | 
 27 |         # item-info 存放每个电影id对应的类别，例如books
 28 |         f_meta = open("data/item-info", "r")
 29 |         meta_map = {} # 电影id -> 类别名称
 30 |         for line in f_meta:
 31 |             arr = line.strip().split("\t")
 32 |             if arr[0] not in meta_map:
 33 |                 meta_map[arr[0]] = arr[1]
 34 |         self.meta_id_map ={} # 电影id -> 类别id
 35 |         for key in meta_map:
 36 |             val = meta_map[key]
 37 |             if key in self.source_dicts[1]:
 38 |                 mid_idx = self.source_dicts[1][key]
 39 |             else:
 40 |                 mid_idx = 0
 41 |             if val in self.source_dicts[2]:
 42 |                 cat_idx = self.source_dicts[2][val]
 43 |             else:
 44 |                 cat_idx = 0
 45 |             self.meta_id_map[mid_idx] = cat_idx
 46 | 
 47 |         f_review = open("data/reviews-info", "r")
 48 |         self.mid_list_for_random = [] # 用于随机访问的电影列表
 49 |         for line in f_review:
 50 |             arr = line.strip().split("\t")
 51 |             tmp_idx = 0
 52 |             if arr[1] in self.source_dicts[1]:
 53 |                 tmp_idx = self.source_dicts[1][arr[1]]
 54 |             self.mid_list_for_random.append(tmp_idx)
 55 | 
 56 |         self.batch_size = batch_size
 57 |         self.maxlen = maxlen
 58 |         self.minlen = minlen
 59 |         self.skip_empty = skip_empty
 60 | 
 61 |         self.n_uid = len(self.source_dicts[0]) # 用户数量
 62 |         self.n_mid = len(self.source_dicts[1]) # 电影数量
 63 |         self.n_cat = len(self.source_dicts[2]) # 类别数量
 64 | 
 65 |         self.shuffle = shuffle_each_epoch
 66 |         self.sort_by_length = sort_by_length
 67 | 
 68 |         self.source_buffer = []
 69 |         self.k = batch_size * max_batch_size
 70 | 
 71 |         self.end_of_data = False
 72 | 
 73 |     def get_n(self):
 74 |         return self.n_uid, self.n_mid, self.n_cat
 75 | 
 76 |     def __iter__(self):
 77 |         return self
 78 | 
 79 |     def reset(self):
 80 |         if self.shuffle:
 81 |             self.source= shuffle.main(self.source_orig, temporary=True)
 82 |         else:
 83 |             self.source.seek(0)
 84 | 
 85 |     def __next__(self):
 86 |         if self.end_of_data:
 87 |             self.end_of_data = False
 88 |             self.reset()
 89 |             raise StopIteration
 90 | 
 91 |         source = []
 92 |         target = []
 93 | 
 94 |         if len(self.source_buffer) == 0:
 95 |             for k_ in range(self.k):
 96 |                 ss = self.source.readline()
 97 |                 if ss == "":
 98 |                     break
 99 |                 # label、用户名、目标item、 目标item类别、历史item、历史item对应类别；
100 |                 self.source_buffer.append(ss.strip("\n").split("\t"))
101 | 
102 |             # sort by  history behavior length
103 |             if self.sort_by_length:
104 |                 his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer])
105 |                 tidx = his_length.argsort()
106 | 
107 |                 _sbuf = [self.source_buffer[i] for i in tidx]
108 |                 self.source_buffer = _sbuf
109 |             else:
110 |                 self.source_buffer.reverse()
111 | 
112 |         if len(self.source_buffer) == 0:
113 |             self.end_of_data = False
114 |             self.reset()
115 |             raise StopIteration
116 | 
117 |         try:
118 | 
119 |             # actual work here
120 |             while True:
121 | 
122 |                 # read from source file and map to word index
123 |                 try:
124 |                     ss = self.source_buffer.pop()
125 |                 except IndexError:
126 |                     break
127 | 
128 |                 # source_dicts包含了用户、电影、类别字典
129 |                 # ss包含：label、用户名、目标item、 目标item类别、历史item、历史item对应类别
130 |                 uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0
131 |                 mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0
132 |                 cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0
133 |                 tmp = []
134 |                 for fea in ss[4].split(""):
135 |                     m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0
136 |                     tmp.append(m)
137 |                 mid_list = tmp # 历史电影id
138 | 
139 |                 tmp1 = []
140 |                 for fea in ss[5].split(""):
141 |                     c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0
142 |                     tmp1.append(c)
143 |                 cat_list = tmp1 # 历史类别id
144 | 
145 |                 # read from source file and map to word index
146 | 
147 |                 #if len(mid_list) > self.maxlen:
148 |                 #    continue
149 |                 if self.minlen != None:
150 |                     if len(mid_list) <= self.minlen:
151 |                         continue
152 |                 if self.skip_empty and (not mid_list):
153 |                     continue
154 | 
155 |                 """
156 |                 一个一个从mid_list(历史行为序列)中拿movie id（pos_mid） ，
157 |                 然后在mid_list_for_randoms随机抽取一个位置（对应一个movie id即noclk_mid），
158 |                 然后看看pos_mid和noclk_mid是不是一样，一样就跳过，不一样就可以作为一个负样本啦，
159 |                 同时看到抽取个数大于5就结束了，所以都是抽出5个负样本。
160 |                 注意抽取的负样本noclk_mid_list是一个二维列表[length,5], 
161 |                 length就是该用户历史行为中点击的商品个数。
162 |                 """
163 | 
164 |                 noclk_mid_list = []
165 |                 noclk_cat_list = []
166 |                 for pos_mid in mid_list:
167 |                     noclk_tmp_mid = []
168 |                     noclk_tmp_cat = []
169 |                     noclk_index = 0
170 |                     while True:
171 |                         noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1)
172 |                         noclk_mid = self.mid_list_for_random[noclk_mid_indx]
173 |                         if noclk_mid == pos_mid:
174 |                             continue
175 |                         noclk_tmp_mid.append(noclk_mid)
176 |                         noclk_tmp_cat.append(self.meta_id_map[noclk_mid])
177 |                         noclk_index += 1
178 |                         if noclk_index >= 5:
179 |                             break
180 |                     noclk_mid_list.append(noclk_tmp_mid)
181 |                     noclk_cat_list.append(noclk_tmp_cat)
182 |                 source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list])
183 |                 target.append([float(ss[0]), 1-float(ss[0])])
184 | 
185 |                 if len(source) >= self.batch_size or len(target) >= self.batch_size:
186 |                     break
187 |         except IOError:
188 |             self.end_of_data = True
189 | 
190 |         # all sentence pairs in maxibatch filtered out because of length
191 |         if len(source) == 0 or len(target) == 0:
192 |             source, target = self.next()
193 | 
194 |         return source, target
195 |         # return list(map(numpy.array, list(zip(*source)))), numpy.array(target)
196 |     
197 |     
198 |     def load_dict(self, filename):
199 |         try:
200 |             with open(filename, 'rb') as f:
201 |                 return json.load(f)
202 |                 # return unicode_to_utf8(json.load(f))
203 |         except:
204 |             with open(filename, 'rb') as f:
205 |                 return pkl.load(f)
206 |                 # return unicode_to_utf8(pkl.load(f))
207 | 
208 | 
209 |     def fopen(self, filename, mode='r'):
210 |         if filename.endswith('.gz'):
211 |             return gzip.open(filename, mode)
212 |         return open(filename, mode)
213 |     
214 |     
215 | # 将行为序列长度调整到100, 不足的用0填充
216 | def prepare_data(input, target, maxlen = None, return_neg = False):
217 |     # input: [(uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list)]
218 |     lengths_x = [len(s[4]) for s in input] # 真实序列长度
219 |     seqs_mid = [inp[3] for inp in input]
220 |     seqs_cat = [inp[4] for inp in input]
221 |     noclk_seqs_mid = [inp[5] for inp in input]
222 |     noclk_seqs_cat = [inp[6] for inp in input]
223 | 
224 |     if maxlen is not None:
225 |         new_seqs_mid = []
226 |         new_seqs_cat = []
227 |         new_noclk_seqs_mid = []
228 |         new_noclk_seqs_cat = []
229 |         new_lengths_x = []
230 |         for l_x, inp in zip(lengths_x, input):
231 |             if l_x > maxlen: # 取后100个
232 |                 new_seqs_mid.append(inp[3][l_x - maxlen:])
233 |                 new_seqs_cat.append(inp[4][l_x - maxlen:])
234 |                 new_noclk_seqs_mid.append(inp[5][l_x - maxlen:])
235 |                 new_noclk_seqs_cat.append(inp[6][l_x - maxlen:])
236 |                 new_lengths_x.append(maxlen)
237 |             else:
238 |                 new_seqs_mid.append(inp[3])
239 |                 new_seqs_cat.append(inp[4])
240 |                 new_noclk_seqs_mid.append(inp[5])
241 |                 new_noclk_seqs_cat.append(inp[6])
242 |                 new_lengths_x.append(l_x)
243 |         lengths_x = new_lengths_x
244 |         seqs_mid = new_seqs_mid
245 |         seqs_cat = new_seqs_cat
246 |         noclk_seqs_mid = new_noclk_seqs_mid
247 |         noclk_seqs_cat = new_noclk_seqs_cat
248 | 
249 |         if len(lengths_x) < 1:
250 |             return None, None, None, None
251 | 
252 |     n_samples = len(seqs_mid)
253 |     maxlen_x = numpy.max(lengths_x)
254 |     neg_samples = len(noclk_seqs_mid[0][0])
255 | 
256 |     # 不足的部分用 0 填充
257 |     mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64')
258 |     cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64')
259 |     noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64')
260 |     noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64')
261 |     mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32')
262 |     for idx, [s_x, s_y, no_sx, no_sy] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat)):
263 |         mid_mask[idx, :lengths_x[idx]] = 1. # 真实序列为1，反之为0
264 |         mid_his[idx, :lengths_x[idx]] = s_x
265 |         cat_his[idx, :lengths_x[idx]] = s_y
266 |         noclk_mid_his[idx, :lengths_x[idx], :] = no_sx
267 |         noclk_cat_his[idx, :lengths_x[idx], :] = no_sy
268 | 
269 |     uids = numpy.array([inp[0] for inp in input])
270 |     mids = numpy.array([inp[1] for inp in input])
271 |     cats = numpy.array([inp[2] for inp in input])
272 | 
273 |     if return_neg:
274 |         return uids, mids, cats, mid_his, cat_his, mid_mask, \
275 |                 numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his
276 | 
277 |     else:
278 |         return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x)
279 | 


--------------------------------------------------------------------------------
/DeepFM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "from tensorflow.keras.layers import *\n",
 13 |     "import tensorflow.keras.backend as K\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import tensorflow as tf\n",
 16 |     "from tensorflow.keras.models import Model\n",
 17 |     "from tensorflow.keras.utils import plot_model\n",
 18 |     "from tensorflow.keras.callbacks import *\n",
 19 |     "from sklearn.preprocessing import LabelEncoder\n",
 20 |     "# from tensorflow.keras.constraints import *\n",
 21 |     "\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# 准备数据"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "path = '/disk/share/criteo/'\n",
 39 |     "data = pd.read_csv(path+'criteo_sampled_data.csv')\n",
 40 |     "cols = data.columns.values\n",
 41 |     "\n",
 42 |     "dense_feats = [f for f in cols if f[0] == \"I\"]\n",
 43 |     "sparse_feats = [f for f in cols if f[0] == \"C\"]\n",
 44 |     "\n",
 45 |     "def process_dense_feats(data, feats):\n",
 46 |     "    d = data.copy()\n",
 47 |     "    d = d[feats].fillna(0.0)\n",
 48 |     "    for f in feats:\n",
 49 |     "        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n",
 50 |     "    \n",
 51 |     "    return d\n",
 52 |     "\n",
 53 |     "data_dense = process_dense_feats(data, dense_feats)\n",
 54 |     "\n",
 55 |     "vocab_sizes = {}\n",
 56 |     "def process_sparse_feats(data, feats):\n",
 57 |     "    d = data.copy()\n",
 58 |     "    d = d[feats].fillna(\"-1\")\n",
 59 |     "    for f in feats:\n",
 60 |     "        label_encoder = LabelEncoder()\n",
 61 |     "        d[f] = label_encoder.fit_transform(d[f])\n",
 62 |     "        vocab_sizes[f] = d[f].nunique() + 1\n",
 63 |     "    return d\n",
 64 |     "\n",
 65 |     "data_sparse = process_sparse_feats(data, sparse_feats)\n",
 66 |     "total_data = pd.concat([data_dense, data_sparse], axis=1)\n",
 67 |     "total_data['label'] = data['label']"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "# 自定义层"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "class SparseEmbedding(Layer):\n",
 84 |     "    def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n",
 85 |     "        super().__init__()\n",
 86 |     "        # 离散特征嵌入矩阵\n",
 87 |     "        self.sparse_embeds_mat = []\n",
 88 |     "        for idx, feat in enumerate(sparse_feats):\n",
 89 |     "            # reg = tf.keras.regularizers.l2(0.5)\n",
 90 |     "            emb = Embedding(input_dim=vocab_sizes[feat],\n",
 91 |     "                            output_dim=embed_dims,\n",
 92 |     "                            # embeddings_regularizer=reg,\n",
 93 |     "                            name=f'{feat}_emb')\n",
 94 |     "            self.sparse_embeds_mat.append(emb)\n",
 95 |     "        \n",
 96 |     "    def call(self, sparse_inputs):\n",
 97 |     "        # FM 部分\n",
 98 |     "        sparse_embeds = []\n",
 99 |     "        for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n",
100 |     "            emb = emb_mat(sparse_inputs[idx])\n",
101 |     "            sparse_embeds.append(emb)\n",
102 |     "        concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n",
103 |     "        return concat_sparse_embeds"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 4,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "class Linear(Layer):\n",
113 |     "    def __init__(self, sparse_feats, vocab_sizes):\n",
114 |     "        super().__init__()\n",
115 |     "        \n",
116 |     "        # 离散特1d征嵌入矩阵\n",
117 |     "        self.sparse_1d_embeds = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=1)\n",
118 |     "        \n",
119 |     "        self.fc_dense = Dense(1)\n",
120 |     "        self.fc_sparse = Dense(1)\n",
121 |     "        \n",
122 |     "    def call(self, inputs):\n",
123 |     "        dense_inputs, sparse_inputs = inputs[0], inputs[1]\n",
124 |     "        # 线性部分\n",
125 |     "        concat_dense_inputs = Concatenate(axis=1)(dense_inputs)\n",
126 |     "        first_order_dense_layer = self.fc_dense(concat_dense_inputs)\n",
127 |     "        \n",
128 |     "        concat_sparse_embeds_1d = self.sparse_1d_embeds(sparse_inputs) \n",
129 |     "        flat_sparse_embeds_1d = Flatten()(concat_sparse_embeds_1d)\n",
130 |     "        first_order_sparse_layer = self.fc_sparse(flat_sparse_embeds_1d)\n",
131 |     "        \n",
132 |     "        linear_output = Add()([first_order_dense_layer, first_order_sparse_layer])\n",
133 |     "        return linear_output"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 5,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "class FM(Layer):\n",
143 |     "    def __init__(self):\n",
144 |     "        super().__init__()\n",
145 |     "        \n",
146 |     "    def call(self, concat_sparse_embeds):\n",
147 |     "        # 先求和再求平方\n",
148 |     "        sum_embeds = tf.reduce_sum(concat_sparse_embeds, axis=1)\n",
149 |     "        square_sum_embeds = Multiply()([sum_embeds, sum_embeds])\n",
150 |     "        # 先平方再求和\n",
151 |     "        square_embeds = Multiply()([concat_sparse_embeds, concat_sparse_embeds])\n",
152 |     "        sum_square_embeds = tf.reduce_sum(square_embeds, axis=1)\n",
153 |     "        # 相减除以2\n",
154 |     "        sub =  0.5 * Subtract()([square_sum_embeds, sum_square_embeds])\n",
155 |     "        # 相加\n",
156 |     "        snd_order_sparse_output = tf.reduce_sum(sub, axis=1, keepdims=True)\n",
157 |     "        return snd_order_sparse_output"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 6,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "class DNN(Layer):\n",
167 |     "    def __init__(self, hid_units=[256,256,256], use_dropout=True):\n",
168 |     "        super().__init__()\n",
169 |     "        self.use_dropout = use_dropout\n",
170 |     "        self.Dropout = Dropout(0.3)\n",
171 |     "        self.dense_layers = []\n",
172 |     "        for unit in hid_units:\n",
173 |     "            self.dense_layers.append(Dense(unit, activation='relu'))\n",
174 |     "        self.dense_layers.append(Dense(1))\n",
175 |     "        \n",
176 |     "    def call(self, concat_sparse_embeds):\n",
177 |     "        flat_sparse_embed = Flatten()(concat_sparse_embeds)\n",
178 |     "        \n",
179 |     "        x = self.dense_layers[0](flat_sparse_embed)\n",
180 |     "        for dense in self.dense_layers[1:]:\n",
181 |     "            x = dense(x)\n",
182 |     "            if self.use_dropout:\n",
183 |     "                x = self.Dropout(x)\n",
184 |     "        return x"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "# 构建模型 (keras函数式)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 84,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "class DeepFM:\n",
201 |     "    def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):\n",
202 |     "        \n",
203 |     "        # 连续特征\n",
204 |     "        self.dense_inputs = []\n",
205 |     "        for feat in dense_feats:\n",
206 |     "            self.dense_inputs.append(Input(shape=1, name=feat))\n",
207 |     "            \n",
208 |     "        # 离散特征\n",
209 |     "        self.sparse_inputs = []\n",
210 |     "        for feat in sparse_feats:\n",
211 |     "            self.sparse_inputs.append(Input(shape=1, name=feat))\n",
212 |     "        \n",
213 |     "        self.Linear = Linear(sparse_feats, vocab_sizes)\n",
214 |     "        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n",
215 |     "        self.FM = FM()\n",
216 |     "        self.DNN = DNN()\n",
217 |     "        \n",
218 |     "    def bulid_model(self):\n",
219 |     "        all_inputs = [self.dense_inputs, self.sparse_inputs]\n",
220 |     "        \n",
221 |     "        linear_output = self.Linear(all_inputs)\n",
222 |     "        concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n",
223 |     "        snd_order_sparse_output = self.FM(concat_sparse_embeds)\n",
224 |     "        fc_layer_output = self.DNN(concat_sparse_embeds)\n",
225 |     "        \n",
226 |     "        # 输出部分\n",
227 |     "        output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])\n",
228 |     "        output = Activation('sigmoid')(output)\n",
229 |     "        \n",
230 |     "        model = Model(inputs=all_inputs, outputs=output)\n",
231 |     "        return model"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 3,
237 |    "metadata": {
238 |     "scrolled": true
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "train_data = total_data.loc[:500000-1]\n",
243 |     "valid_data = total_data.loc[500000:]\n",
244 |     "\n",
245 |     "train_dense_x_all = [train_data[f].values for f in dense_feats]\n",
246 |     "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n",
247 |     "train_label_all = train_data[['label']].values\n",
248 |     "\n",
249 |     "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n",
250 |     "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n",
251 |     "val_label_all = valid_data[['label']].values"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 14,
257 |    "metadata": {},
258 |    "outputs": [
259 |     {
260 |      "name": "stdout",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "1954/1954 [==============================] - 27s 14ms/step - loss: 0.5171 - binary_crossentropy: 0.5171 - auc: 0.7346 - val_loss: 0.4816 - val_binary_crossentropy: 0.4816 - val_auc: 0.7663 - lr: 0.0010\n"
264 |      ]
265 |     },
266 |     {
267 |      "data": {
268 |       "text/plain": [
269 |        "<tensorflow.python.keras.callbacks.History at 0x7fc618863450>"
270 |       ]
271 |      },
272 |      "execution_count": 14,
273 |      "metadata": {},
274 |      "output_type": "execute_result"
275 |     }
276 |    ],
277 |    "source": [
278 |     "model = DeepFM(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
279 |     "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n",
280 |     "              metrics=['binary_crossentropy', tf.keras.metrics.AUC()])\n",
281 |     "\n",
282 |     "os.makedirs('checkpoints/model.h5', exist_ok=True)\n",
283 |     "checkpoints = ModelCheckpoint('checkpoints', monitor='val_auc', \n",
284 |     "                              mode='max', save_weights_only=True)# , save_best_only=True\n",
285 |     "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=5)\n",
286 |     "def scheduler(epoch):\n",
287 |     "    thred = 10\n",
288 |     "    if epoch < thred:\n",
289 |     "        return 0.001\n",
290 |     "    else:\n",
291 |     "        return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n",
292 |     "lr_schedule = LearningRateScheduler(scheduler)\n",
293 |     "callbacks = [checkpoints, early_stopping, lr_schedule]\n",
294 |     "\n",
295 |     "\n",
296 |     "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n",
297 |     "         validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n",
298 |     "         callbacks=callbacks, epochs=1)"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "# 附：继承 Model 的模型构建方法"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 7,
311 |    "metadata": {},
312 |    "outputs": [],
313 |    "source": [
314 |     "class DeepFM(tf.keras.Model):\n",
315 |     "    def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):\n",
316 |     "        super().__init__()\n",
317 |     "        self.dense_feats = dense_feats\n",
318 |     "        self.sparse_feats = sparse_feats\n",
319 |     "        self.vocab_sizes = vocab_sizes\n",
320 |     "        self.embed_dims = embed_dims\n",
321 |     "        \n",
322 |     "        self.Linear = Linear(sparse_feats, vocab_sizes)\n",
323 |     "        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n",
324 |     "        self.FM = FM()\n",
325 |     "        self.DNN = DNN()\n",
326 |     "        \n",
327 |     "    \n",
328 |     "    def call(self, inputs, training=True):\n",
329 |     "        dense_inputs, sparse_inputs = inputs[0], inputs[1]\n",
330 |     "        \n",
331 |     "        linear_output = self.Linear(inputs)\n",
332 |     "        concat_sparse_embeds = self.SparseEmbedding(sparse_inputs)\n",
333 |     "        snd_order_sparse_output = self.FM(concat_sparse_embeds)\n",
334 |     "        fc_layer_output = self.DNN(concat_sparse_embeds)\n",
335 |     "        \n",
336 |     "        # 输出部分\n",
337 |     "        output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])\n",
338 |     "        output = Activation('sigmoid')(output)\n",
339 |     "        return output"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 53,
345 |    "metadata": {
346 |     "scrolled": true
347 |    },
348 |    "outputs": [
349 |     {
350 |      "name": "stdout",
351 |      "output_type": "stream",
352 |      "text": [
353 |       "WARNING:tensorflow:Layer deep_fm_12 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
354 |       "\n",
355 |       "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
356 |       "\n",
357 |       "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
358 |       "\n",
359 |       "train_loss 2.4098854 val_loss 2.483604\n",
360 |       "train_loss 1.3211236 val_loss 1.3194331\n",
361 |       "train_loss 1.169803 val_loss 1.0407462\n",
362 |       "train_loss 0.99219334 val_loss 1.059052\n",
363 |       "train_loss 0.8957213 val_loss 0.92486346\n",
364 |       "train_loss 0.9512948 val_loss 0.9190863\n",
365 |       "train_loss 0.95848316 val_loss 0.8585176\n",
366 |       "train_loss 0.8645132 val_loss 0.83732563\n",
367 |       "train_loss 0.7037978 val_loss 0.805529\n",
368 |       "train_loss 0.67349327 val_loss 0.8104939\n",
369 |       "train_loss 0.7041616 val_loss 0.7876595\n",
370 |       "train_loss 0.6040318 val_loss 0.7762838\n",
371 |       "train_loss 0.6661148 val_loss 0.7730615\n",
372 |       "train_loss 0.62362516 val_loss 0.7610869\n",
373 |       "train_loss 0.7019628 val_loss 0.7555212\n",
374 |       "train_loss 0.66617835 val_loss 0.7523162\n",
375 |       "train_loss 0.7438489 val_loss 0.7429388\n",
376 |       "train_loss 0.65911984 val_loss 0.73776776\n",
377 |       "train_loss 0.7577257 val_loss 0.72724354\n",
378 |       "train_loss 0.5841069 val_loss 0.7345725\n",
379 |       "train_loss 0.69067764 val_loss 0.72640127\n",
380 |       "train_loss 0.70591426 val_loss 0.7106958\n",
381 |       "train_loss 0.5872619 val_loss 0.7009569\n",
382 |       "train_loss 0.68808687 val_loss 0.7069153\n",
383 |       "train_loss 0.71475106 val_loss 0.69875795\n",
384 |       "train_loss 0.66280043 val_loss 0.6927632\n",
385 |       "train_loss 0.5748261 val_loss 0.70920014\n",
386 |       "train_loss 0.51971066 val_loss 0.6790053\n",
387 |       "train_loss 0.6533222 val_loss 0.6743224\n",
388 |       "train_loss 0.5787353 val_loss 0.6695126\n",
389 |       "train_loss 0.6206254 val_loss 0.6677863\n",
390 |       "train_loss 0.6333037 val_loss 0.65689474\n",
391 |       "train_loss 0.6053113 val_loss 0.65412337\n",
392 |       "train_loss 0.5766033 val_loss 0.6488665\n",
393 |       "train_loss 0.58827424 val_loss 0.6418533\n",
394 |       "train_loss 0.5509889 val_loss 0.6426741\n",
395 |       "train_loss 0.586426 val_loss 0.63931245\n",
396 |       "train_loss 0.5573069 val_loss 0.6267758\n",
397 |       "train_loss 0.5612141 val_loss 0.6449642\n",
398 |       "train_loss 0.5308564 val_loss 0.62057\n",
399 |       "train_loss 0.47453913 val_loss 0.6234291\n",
400 |       "train_loss 0.53106457 val_loss 0.6179022\n",
401 |       "train_loss 0.5241479 val_loss 0.6151297\n",
402 |       "train_loss 0.6813842 val_loss 0.6094231\n",
403 |       "train_loss 0.6254232 val_loss 0.608384\n",
404 |       "train_loss 0.5442903 val_loss 0.6020965\n",
405 |       "train_loss 0.48889655 val_loss 0.59901583\n",
406 |       "train_loss 0.5180689 val_loss 0.59522665\n",
407 |       "train_loss 0.57622343 val_loss 0.59929794\n",
408 |       "train_loss 0.5545541 val_loss 0.5887621\n",
409 |       "train_loss 0.55703753 val_loss 0.5847919\n",
410 |       "train_loss 0.5962651 val_loss 0.5867704\n",
411 |       "train_loss 0.6141354 val_loss 0.5895961\n",
412 |       "train_loss 0.44274747 val_loss 0.57974124\n",
413 |       "train_loss 0.53316665 val_loss 0.5761268\n",
414 |       "train_loss 0.498142 val_loss 0.57354474\n",
415 |       "train_loss 0.51469684 val_loss 0.5783324\n",
416 |       "train_loss 0.6005205 val_loss 0.5681326\n",
417 |       "train_loss 0.5296999 val_loss 0.5666884\n",
418 |       "train_loss 0.47100228 val_loss 0.5630498\n",
419 |       "train_loss 0.54450154 val_loss 0.5787666\n",
420 |       "train_loss 0.5576676 val_loss 0.55907017\n",
421 |       "train_loss 0.53484154 val_loss 0.5579099\n",
422 |       "train_loss 0.5364264 val_loss 0.5627258\n",
423 |       "train_loss 0.51488286 val_loss 0.55314064\n",
424 |       "train_loss 0.55697054 val_loss 0.5507671\n",
425 |       "train_loss 0.57481194 val_loss 0.550431\n",
426 |       "train_loss 0.55537355 val_loss 0.5489825\n",
427 |       "train_loss 0.56959105 val_loss 0.5468589\n",
428 |       "train_loss 0.5849153 val_loss 0.5559658\n",
429 |       "train_loss 0.6252235 val_loss 0.5475525\n",
430 |       "train_loss 0.56599003 val_loss 0.5416614\n",
431 |       "train_loss 0.45060825 val_loss 0.5402306\n",
432 |       "train_loss 0.4618894 val_loss 0.5392666\n",
433 |       "train_loss 0.5865662 val_loss 0.53793675\n",
434 |       "train_loss 0.52943933 val_loss 0.5399228\n",
435 |       "train_loss 0.46397635 val_loss 0.53526783\n",
436 |       "train_loss 0.5720602 val_loss 0.53363514\n",
437 |       "train_loss 0.53340673 val_loss 0.53278434\n",
438 |       "train_loss 0.5761976 val_loss 0.5318284\n",
439 |       "train_loss 0.5111011 val_loss 0.5346499\n",
440 |       "train_loss 0.5211141 val_loss 0.53015906\n",
441 |       "train_loss 0.4850332 val_loss 0.5295973\n",
442 |       "train_loss 0.5528135 val_loss 0.5284586\n",
443 |       "train_loss 0.5071026 val_loss 0.52804345\n",
444 |       "train_loss 0.5900887 val_loss 0.5259946\n",
445 |       "train_loss 0.46475852 val_loss 0.5291728\n",
446 |       "train_loss 0.51071024 val_loss 0.53127724\n",
447 |       "train_loss 0.5197077 val_loss 0.52651155\n",
448 |       "train_loss 0.52425003 val_loss 0.5265078\n",
449 |       "train_loss 0.5947874 val_loss 0.52251405\n",
450 |       "train_loss 0.6377196 val_loss 0.52052724\n",
451 |       "train_loss 0.481161 val_loss 0.51937187\n",
452 |       "train_loss 0.52613926 val_loss 0.5188099\n",
453 |       "train_loss 0.54683673 val_loss 0.52657354\n",
454 |       "train_loss 0.4866224 val_loss 0.5167897\n",
455 |       "train_loss 0.49941382 val_loss 0.5169294\n",
456 |       "train_loss 0.5138988 val_loss 0.51508343\n",
457 |       "train_loss 0.5252162 val_loss 0.5134446\n",
458 |       "train_loss 0.54643357 val_loss 0.5159939\n",
459 |       "train_loss 0.50454354 val_loss 0.51795447\n",
460 |       "train_loss 0.5477977 val_loss 0.51163554\n",
461 |       "train_loss 0.48235464 val_loss 0.51226854\n",
462 |       "train_loss 0.46278268 val_loss 0.5106911\n",
463 |       "train_loss 0.55143964 val_loss 0.5130235\n",
464 |       "train_loss 0.49793103 val_loss 0.5086262\n",
465 |       "train_loss 0.55516046 val_loss 0.5116377\n",
466 |       "train_loss 0.526183 val_loss 0.50830406\n",
467 |       "train_loss 0.4525234 val_loss 0.5073179\n",
468 |       "train_loss 0.48127568 val_loss 0.50913733\n",
469 |       "train_loss 0.5003133 val_loss 0.5108921\n",
470 |       "train_loss 0.5246837 val_loss 0.5055629\n",
471 |       "train_loss 0.5484116 val_loss 0.50500053\n",
472 |       "train_loss 0.5513848 val_loss 0.50540334\n",
473 |       "train_loss 0.5670711 val_loss 0.5034824\n",
474 |       "train_loss 0.53560483 val_loss 0.50356776\n",
475 |       "train_loss 0.4458433 val_loss 0.50354636\n",
476 |       "train_loss 0.5142056 val_loss 0.5023199\n",
477 |       "train_loss 0.5943471 val_loss 0.5016766\n",
478 |       "train_loss 0.51107144 val_loss 0.5018072\n",
479 |       "train_loss 0.4835248 val_loss 0.506819\n",
480 |       "train_loss 0.45860666 val_loss 0.50842124\n",
481 |       "train_loss 0.47358721 val_loss 0.49983543\n",
482 |       "train_loss 0.49508673 val_loss 0.5003009\n",
483 |       "train_loss 0.46116623 val_loss 0.49922407\n",
484 |       "train_loss 0.46439755 val_loss 0.49882585\n",
485 |       "train_loss 0.55467176 val_loss 0.49828482\n"
486 |      ]
487 |     },
488 |     {
489 |      "ename": "KeyboardInterrupt",
490 |      "evalue": "",
491 |      "output_type": "error",
492 |      "traceback": [
493 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
494 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
495 |       "\u001b[0;32m<ipython-input-53-8f0fdeaec3c8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m         \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain_dense_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_sparse_x\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlosses\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_crossentropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_label\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
496 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    966\u001b[0m           with base_layer_utils.autocast_context_manager(\n\u001b[1;32m    967\u001b[0m               self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    969\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    970\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
497 |       "\u001b[0;32m<ipython-input-7-0ecc6b3cc7fa>\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs, training)\u001b[0m\n\u001b[1;32m     18\u001b[0m         \u001b[0mlinear_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m         \u001b[0mconcat_sparse_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSparseEmbedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse_inputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m         \u001b[0msnd_order_sparse_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFM\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     21\u001b[0m         \u001b[0mfc_layer_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDNN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
498 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    966\u001b[0m           with base_layer_utils.autocast_context_manager(\n\u001b[1;32m    967\u001b[0m               self._compute_dtype):\n\u001b[0;32m--> 968\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    969\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle_activity_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    970\u001b[0m           \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_mask_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_masks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
499 |       "\u001b[0;32m<ipython-input-5-743cce7a7281>\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, concat_sparse_embeds)\u001b[0m\n\u001b[1;32m      6\u001b[0m         \u001b[0;31m# 先求和再求平方\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m         \u001b[0msum_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m         \u001b[0msquare_sum_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultiply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msum_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msum_embeds\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m         \u001b[0;31m# 先平方再求和\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m         \u001b[0msquare_embeds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultiply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconcat_sparse_embeds\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
500 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    807\u001b[0m     \u001b[0;31m# mode when all inputs can be traced back to `keras.Input()` (when building\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    808\u001b[0m     \u001b[0;31m# models using the functional API).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 809\u001b[0;31m     \u001b[0mbuild_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    810\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    811\u001b[0m     \u001b[0;31m# Accept NumPy and scalar inputs by converting to Tensors.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
501 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36mare_all_symbolic_tensors\u001b[0;34m(tensors)\u001b[0m\n\u001b[1;32m    324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m   \u001b[0;32mreturn\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_symbolic_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtensor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
502 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mare_all_symbolic_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m   \u001b[0;32mreturn\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_symbolic_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtensor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
503 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py\u001b[0m in \u001b[0;36mis_symbolic_tensor\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m    352\u001b[0m     return (getattr(tensor, '_keras_history', False) or\n\u001b[1;32m    353\u001b[0m             not context.executing_eagerly())\n\u001b[0;32m--> 354\u001b[0;31m   \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcomposite_tensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCompositeTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    355\u001b[0m     \u001b[0mcomponent_tensors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand_composites\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    356\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'graph'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcomponent_tensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
504 |       "\u001b[0;32m~/anaconda3/lib/python3.7/abc.py\u001b[0m in \u001b[0;36m__instancecheck__\u001b[0;34m(cls, instance)\u001b[0m\n\u001b[1;32m    137\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0m__instancecheck__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m             \u001b[0;34m\"\"\"Override for isinstance(instance, cls).\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0m_abc_instancecheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m         \u001b[0;32mdef\u001b[0m \u001b[0m__subclasscheck__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubclass\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
505 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
506 |      ]
507 |     }
508 |    ],
509 |    "source": [
510 |     "train_data = total_data.loc[:500000-1]\n",
511 |     "valid_data = total_data.loc[500000:]\n",
512 |     "\n",
513 |     "train_dense_x_all = np.array([train_data[[f]].values for f in dense_feats])\n",
514 |     "train_sparse_x_all = np.array([train_data[[f]].values for f in sparse_feats])\n",
515 |     "train_label_all = train_data[['label']].values\n",
516 |     "\n",
517 |     "val_dense_x_all = np.array([valid_data[[f]].values for f in dense_feats])\n",
518 |     "val_sparse_x_all = np.array([valid_data[[f]].values for f in sparse_feats])\n",
519 |     "val_label_all = valid_data[['label']].values\n",
520 |     "\n",
521 |     "\n",
522 |     "model = DeepFM(dense_feats, sparse_feats, vocab_sizes)\n",
523 |     "opt = tf.keras.optimizers.Adam(learning_rate=1e-3)\n",
524 |     "batch_size = 256\n",
525 |     "for i in range(total_data.shape[0]//batch_size):\n",
526 |     "    train_dense_x = list(train_dense_x_all[:,i*batch_size:(i+1)*batch_size,:])\n",
527 |     "    train_sparse_x = list(train_sparse_x_all[:,i*batch_size:(i+1)*batch_size,:])\n",
528 |     "    train_label = train_label_all[i*batch_size:(i+1)*batch_size]\n",
529 |     "    \n",
530 |     "    with tf.GradientTape() as tape:\n",
531 |     "        pred = model([train_dense_x, train_sparse_x])\n",
532 |     "        loss = tf.keras.losses.binary_crossentropy(train_label, pred)\n",
533 |     "        loss = tf.reduce_mean(loss)\n",
534 |     "    grads = tape.gradient(loss, model.variables)\n",
535 |     "    opt.apply_gradients(grads_and_vars=zip(grads, model.variables))\n",
536 |     "    \n",
537 |     "    \n",
538 |     "    if i%10 ==0:\n",
539 |     "        val_dense_x = list(val_dense_x_all)\n",
540 |     "        val_sparse_x = list(val_sparse_x_all)\n",
541 |     "        val_label = val_label_all\n",
542 |     "\n",
543 |     "        pred = model([val_dense_x, val_sparse_x])\n",
544 |     "        val_loss = tf.keras.losses.binary_crossentropy(val_label, pred)\n",
545 |     "        val_loss = tf.reduce_mean(val_loss)\n",
546 |     "        print('train_loss', loss.numpy(), 'val_loss', val_loss.numpy())"
547 |    ]
548 |   },
549 |   {
550 |    "cell_type": "code",
551 |    "execution_count": null,
552 |    "metadata": {},
553 |    "outputs": [],
554 |    "source": []
555 |   }
556 |  ],
557 |  "metadata": {
558 |   "kernelspec": {
559 |    "display_name": "Python 3",
560 |    "language": "python",
561 |    "name": "python3"
562 |   },
563 |   "language_info": {
564 |    "codemirror_mode": {
565 |     "name": "ipython",
566 |     "version": 3
567 |    },
568 |    "file_extension": ".py",
569 |    "mimetype": "text/x-python",
570 |    "name": "python",
571 |    "nbconvert_exporter": "python",
572 |    "pygments_lexer": "ipython3",
573 |    "version": "3.7.6"
574 |   }
575 |  },
576 |  "nbformat": 4,
577 |  "nbformat_minor": 4
578 | }
579 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepCTR_tensorflow2
2 | Deep-learning based CTR models implemented with tensorflow2.0.  / 使用tensorflow2.0实现的一些基于深度学习的CTR模型
3 | 


--------------------------------------------------------------------------------
/xDeepFM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 20,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "from tensorflow.keras.layers import *\n",
 13 |     "import tensorflow.keras.backend as K\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import tensorflow as tf\n",
 16 |     "from tensorflow.keras.models import Model\n",
 17 |     "from tensorflow.keras.utils import plot_model\n",
 18 |     "from tensorflow.keras.callbacks import *\n",
 19 |     "from sklearn.preprocessing import LabelEncoder\n",
 20 |     "# from tensorflow.keras.constraints import *\n",
 21 |     "\n",
 22 |     "%matplotlib inline"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# 准备数据"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 21,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "path = '/disk/share/criteo/'\n",
 39 |     "data = pd.read_csv(path+'criteo_sampled_data.csv')\n",
 40 |     "cols = data.columns.values\n",
 41 |     "\n",
 42 |     "dense_feats = [f for f in cols if f[0] == \"I\"]\n",
 43 |     "sparse_feats = [f for f in cols if f[0] == \"C\"]\n",
 44 |     "\n",
 45 |     "def process_dense_feats(data, feats):\n",
 46 |     "    d = data.copy()\n",
 47 |     "    d = d[feats].fillna(0.0)\n",
 48 |     "    for f in feats:\n",
 49 |     "        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)\n",
 50 |     "    \n",
 51 |     "    return d\n",
 52 |     "\n",
 53 |     "data_dense = process_dense_feats(data, dense_feats)\n",
 54 |     "\n",
 55 |     "vocab_sizes = {}\n",
 56 |     "def process_sparse_feats(data, feats):\n",
 57 |     "    d = data.copy()\n",
 58 |     "    d = d[feats].fillna(\"-1\")\n",
 59 |     "    for f in feats:\n",
 60 |     "        label_encoder = LabelEncoder()\n",
 61 |     "        d[f] = label_encoder.fit_transform(d[f])\n",
 62 |     "        vocab_sizes[f] = d[f].nunique() + 1\n",
 63 |     "    return d\n",
 64 |     "\n",
 65 |     "data_sparse = process_sparse_feats(data, sparse_feats)\n",
 66 |     "total_data = pd.concat([data_dense, data_sparse], axis=1)\n",
 67 |     "total_data['label'] = data['label']"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "# 自定义层"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 22,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "class SparseEmbedding(Layer):\n",
 84 |     "    def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):\n",
 85 |     "        super().__init__()\n",
 86 |     "        self.sparse_feats = sparse_feats\n",
 87 |     "        self.vocab_sizes = vocab_sizes\n",
 88 |     "        self.embed_dims = embed_dims\n",
 89 |     "        \n",
 90 |     "        # 离散特征嵌入矩阵\n",
 91 |     "        self.sparse_embeds_mat = []\n",
 92 |     "        for idx, feat in enumerate(self.sparse_feats):\n",
 93 |     "            # reg = tf.keras.regularizers.l2(0.5)\n",
 94 |     "            emb = Embedding(input_dim=self.vocab_sizes[feat],\n",
 95 |     "                            output_dim=self.embed_dims,\n",
 96 |     "                            # embeddings_regularizer=reg,\n",
 97 |     "                            name=f'{feat}_emb')\n",
 98 |     "            self.sparse_embeds_mat.append(emb)\n",
 99 |     "        \n",
100 |     "    def call(self, sparse_inputs):\n",
101 |     "        sparse_embeds = []\n",
102 |     "        for idx, emb_mat in enumerate(self.sparse_embeds_mat):\n",
103 |     "            emb = emb_mat(sparse_inputs[idx])\n",
104 |     "            sparse_embeds.append(emb)\n",
105 |     "        concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)\n",
106 |     "        return concat_sparse_embeds"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 28,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "class Linear(Layer):\n",
116 |     "    def __init__(self, sparse_feats, vocab_sizes):\n",
117 |     "        super().__init__()\n",
118 |     "        \n",
119 |     "        # 离散特1d征嵌入矩阵\n",
120 |     "        self.sparse_1d_embeds = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=1)\n",
121 |     "        \n",
122 |     "        self.fc_dense = Dense(1)\n",
123 |     "        self.fc_sparse = Dense(1)\n",
124 |     "        \n",
125 |     "    def call(self, inputs):\n",
126 |     "        dense_inputs, sparse_inputs = inputs[0], inputs[1]\n",
127 |     "        # 线性部分\n",
128 |     "        concat_dense_inputs = Concatenate(axis=1)(dense_inputs)\n",
129 |     "        first_order_dense_layer = self.fc_dense(concat_dense_inputs)\n",
130 |     "        \n",
131 |     "        concat_sparse_embeds_1d = self.sparse_1d_embeds(sparse_inputs) \n",
132 |     "        flat_sparse_embeds_1d = Flatten()(concat_sparse_embeds_1d)\n",
133 |     "        first_order_sparse_layer = self.fc_sparse(flat_sparse_embeds_1d)\n",
134 |     "        \n",
135 |     "        linear_output = Add()([first_order_dense_layer, first_order_sparse_layer])\n",
136 |     "        return linear_output"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 29,
142 |    "metadata": {
143 |     "scrolled": true
144 |    },
145 |    "outputs": [],
146 |    "source": [
147 |     "class cross_layer(Layer):\n",
148 |     "    def __init__(self, n_filters):\n",
149 |     "        super().__init__()\n",
150 |     "        # self.n_filters = n_filters \n",
151 |     "        self.con1d = Conv1D(filters=n_filters, kernel_size=1, strides=1)\n",
152 |     "        \n",
153 |     "    def call(self, inputs):\n",
154 |     "        x0, xl = inputs\n",
155 |     "        h = xl.shape[1]\n",
156 |     "        m = x0.shape[1]\n",
157 |     "        D = x0.shape[-1] # emb_dim\n",
158 |     "\n",
159 |     "        xl = tf.expand_dims(xl, -2) \n",
160 |     "        xl = tf.tile(xl, [1, 1, m, 1]) # ?, h, m, D\n",
161 |     "        x0 = tf.expand_dims(x0, -3) \n",
162 |     "        x0 = tf.tile(x0, [1, h, 1, 1]) # ?, h, m, D\n",
163 |     "        feature_maps = tf.multiply(xl, x0) # ?, h, m, D\n",
164 |     "\n",
165 |     "        # ?, h*m, D\n",
166 |     "        feature_maps = tf.reshape(feature_maps, [-1, h*m, D])\n",
167 |     "        # ?, D, h*m\n",
168 |     "        feature_maps = tf.transpose(feature_maps, [0,2,1])\n",
169 |     "        # ?, D, n_filters\n",
170 |     "        feature_maps = self.con1d(feature_maps)\n",
171 |     "        # ?, n_filters, D\n",
172 |     "        feature_maps = tf.transpose(feature_maps, [0,2,1])\n",
173 |     "        return feature_maps"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 40,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "class CIN(Layer):\n",
183 |     "    def __init__(self, n_layers, n_filters):\n",
184 |     "        super().__init__()\n",
185 |     "        self.cross_layers = []\n",
186 |     "        for i in range(n_layers):\n",
187 |     "            self.cross_layers.append(cross_layer(n_filters))\n",
188 |     "    def call(self, inputs):\n",
189 |     "        x0 = xl = inputs\n",
190 |     "        sum_poolings = []\n",
191 |     "        for layer in self.cross_layers:\n",
192 |     "            # ?, n_filters, D\n",
193 |     "            xl = layer([x0, xl])\n",
194 |     "            # ?, n_filters\n",
195 |     "            sum_poolings.append(tf.reduce_sum(xl, axis=-1))\n",
196 |     "            \n",
197 |     "        return tf.concat(sum_poolings, axis=-1) # ?, n_filters*n_layers\n",
198 |     "        "
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": []
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 41,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "class DNN(Layer):\n",
215 |     "    def __init__(self, hid_units=[256,256,256], use_dropout=True, output_unit=16):\n",
216 |     "        super().__init__()\n",
217 |     "        self.hid_units = hid_units\n",
218 |     "        self.use_dropout = use_dropout\n",
219 |     "        self.output_unit = output_unit\n",
220 |     "        self.Dropout = Dropout(0.3)\n",
221 |     "        self.dense_layers = []\n",
222 |     "        for unit in self.hid_units:\n",
223 |     "            self.dense_layers.append(Dense(unit, activation='relu'))\n",
224 |     "        self.dense_layers.append(Dense(self.output_unit))\n",
225 |     "        \n",
226 |     "    def call(self, concat_sparse_embeds):\n",
227 |     "        flat_sparse_embed = Flatten()(concat_sparse_embeds)\n",
228 |     "        \n",
229 |     "        x = self.dense_layers[0](flat_sparse_embed)\n",
230 |     "        for dense in self.dense_layers[1:]:\n",
231 |     "            x = dense(x)\n",
232 |     "            if self.use_dropout:\n",
233 |     "                x = self.Dropout(x)\n",
234 |     "        return x"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {},
240 |    "source": [
241 |     "# 构建模型 (keras函数式)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 42,
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": [
250 |     "class xDeepFM:\n",
251 |     "    def __init__(self, dense_feats, sparse_feats, vocab_sizes, \n",
252 |     "                 embed_dims=8, cross_layer_num=3):\n",
253 |     "        \n",
254 |     "        # 连续特征\n",
255 |     "        self.dense_inputs = []\n",
256 |     "        for feat in dense_feats:\n",
257 |     "            self.dense_inputs.append(Input(shape=1, name=feat))\n",
258 |     "            \n",
259 |     "        # 离散特征\n",
260 |     "        self.sparse_inputs = []\n",
261 |     "        for feat in sparse_feats:\n",
262 |     "            self.sparse_inputs.append(Input(shape=1, name=feat))\n",
263 |     "        \n",
264 |     "        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)\n",
265 |     "        \n",
266 |     "        self.linear = Linear(sparse_feats, vocab_sizes)\n",
267 |     "        \n",
268 |     "        self.CIN = CIN(n_layers=3, n_filters=6)\n",
269 |     "        \n",
270 |     "        self.DNN = DNN()\n",
271 |     "        self.dense = Dense(1, activation='sigmoid')\n",
272 |     "        \n",
273 |     "    def bulid_model(self):\n",
274 |     "        all_inputs = [self.dense_inputs, self.sparse_inputs]\n",
275 |     "        linear_output = self.linear(all_inputs)\n",
276 |     "        \n",
277 |     "        # concat_dense_inputs = Concatenate(axis=1)(self.dense_inputs)\n",
278 |     "        \n",
279 |     "        concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)\n",
280 |     "        # flatten_sparse_embeds = Flatten()(concat_sparse_embeds)\n",
281 |     "        \n",
282 |     "        # concat_inputs = Concatenate(axis=1)([flatten_sparse_embeds, concat_dense_inputs])\n",
283 |     "        cross_output = self.CIN(concat_sparse_embeds)\n",
284 |     "        \n",
285 |     "        fc_layer_output = self.DNN(concat_sparse_embeds)\n",
286 |     "        \n",
287 |     "        # 输出部分\n",
288 |     "        concat_layer = Concatenate()([cross_output, fc_layer_output])\n",
289 |     "        output = self.dense(concat_layer)\n",
290 |     "        \n",
291 |     "        model = Model(inputs=all_inputs, outputs=output)\n",
292 |     "        return model"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 43,
298 |    "metadata": {
299 |     "scrolled": true
300 |    },
301 |    "outputs": [
302 |     {
303 |      "name": "stdout",
304 |      "output_type": "stream",
305 |      "text": [
306 |       "Epoch 1/3\n",
307 |       "WARNING:tensorflow:From /root/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
308 |       "Instructions for updating:\n",
309 |       "If using Keras pass *_constraint arguments to layers.\n",
310 |       "1954/1954 [==============================] - 35s 18ms/step - loss: 0.5071 - binary_crossentropy: 0.5071 - auc: 0.7214 - val_loss: 0.5005 - val_binary_crossentropy: 0.5005 - val_auc: 0.7355 - lr: 0.0010\n",
311 |       "Epoch 2/3\n",
312 |       " 914/1954 [=============>................] - ETA: 17s - loss: 0.4825 - binary_crossentropy: 0.4825 - auc: 0.7582"
313 |      ]
314 |     },
315 |     {
316 |      "ename": "_NotOkStatusException",
317 |      "evalue": "InvalidArgumentError: Error while reading CompositeTensor._type_spec.",
318 |      "output_type": "error",
319 |      "traceback": [
320 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
321 |       "\u001b[0;31m_NotOkStatusException\u001b[0m                     Traceback (most recent call last)",
322 |       "\u001b[0;32m<ipython-input-43-ff0fbfa3b92d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     31\u001b[0m model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n\u001b[1;32m     32\u001b[0m          \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mval_dense_x_all\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_sparse_x_all\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_label_all\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m          callbacks=callbacks, epochs=3)\n\u001b[0m",
323 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     64\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     65\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     68\u001b[0m     \u001b[0;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
324 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m    846\u001b[0m                 batch_size=batch_size):\n\u001b[1;32m    847\u001b[0m               \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m               \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m               \u001b[0;31m# Catch OutOfRangeError for Datasets of unknown size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m               \u001b[0;31m# This blocks until the batch has finished executing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
325 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    578\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    579\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    582\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
326 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    609\u001b[0m       \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    610\u001b[0m       \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    612\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    613\u001b[0m       \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
327 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   2417\u001b[0m     \u001b[0;34m\"\"\"Calls a graph function specialized to the inputs.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2418\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2419\u001b[0;31m       \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2420\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
328 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_maybe_define_function\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m   2735\u001b[0m           *args, **kwargs)\n\u001b[1;32m   2736\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2737\u001b[0;31m     \u001b[0mcache_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2738\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2739\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
329 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_cache_key\u001b[0;34m(self, args, kwargs, include_tensor_ranks_only)\u001b[0m\n\u001b[1;32m   2573\u001b[0m       \u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2574\u001b[0m       input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs,\n\u001b[0;32m-> 2575\u001b[0;31m                                                     include_tensor_ranks_only)\n\u001b[0m\u001b[1;32m   2576\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2577\u001b[0m       \u001b[0;32mdel\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
330 |       "\u001b[0;31m_NotOkStatusException\u001b[0m: InvalidArgumentError: Error while reading CompositeTensor._type_spec."
331 |      ]
332 |     }
333 |    ],
334 |    "source": [
335 |     "train_data = total_data.loc[:500000-1]\n",
336 |     "valid_data = total_data.loc[500000:]\n",
337 |     "\n",
338 |     "train_dense_x_all = [train_data[f].values for f in dense_feats]\n",
339 |     "train_sparse_x_all = [train_data[f].values for f in sparse_feats]\n",
340 |     "train_label_all = train_data[['label']].values\n",
341 |     "\n",
342 |     "val_dense_x_all = [valid_data[f].values for f in dense_feats]\n",
343 |     "val_sparse_x_all = [valid_data[f].values for f in sparse_feats]\n",
344 |     "val_label_all = valid_data[['label']].values\n",
345 |     "\n",
346 |     "\n",
347 |     "model = xDeepFM(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
348 |     "model.compile(optimizer='rmsprop', loss='binary_crossentropy', \n",
349 |     "              metrics=['binary_crossentropy', 'AUC']) # tf.keras.metrics.AUC()\n",
350 |     "\n",
351 |     "os.makedirs('checkpoints', exist_ok=True)\n",
352 |     "checkpoints = ModelCheckpoint('checkpoints/model.h5', monitor='val_auc', \n",
353 |     "                              mode='max', save_weights_only=True)# save_best_only=True\n",
354 |     "early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=2)\n",
355 |     "def scheduler(epoch):\n",
356 |     "    thred = 10\n",
357 |     "    if epoch < thred:\n",
358 |     "        return 0.001\n",
359 |     "    else:\n",
360 |     "        return 0.001 * tf.math.exp(0.1 * (thred - epoch))\n",
361 |     "lr_schedule = LearningRateScheduler(scheduler)\n",
362 |     "callbacks = [early_stopping, lr_schedule, checkpoints] # \n",
363 |     "\n",
364 |     "\n",
365 |     "model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,\n",
366 |     "         validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),\n",
367 |     "         callbacks=callbacks, epochs=3)"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": 16,
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "# # 加载模型\n",
377 |     "# model = DCN(dense_feats, sparse_feats, vocab_sizes).bulid_model()\n",
378 |     "# model.load_weights('checkpoints/model.h5')"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": []
387 |   }
388 |  ],
389 |  "metadata": {
390 |   "kernelspec": {
391 |    "display_name": "Python 3",
392 |    "language": "python",
393 |    "name": "python3"
394 |   },
395 |   "language_info": {
396 |    "codemirror_mode": {
397 |     "name": "ipython",
398 |     "version": 3
399 |    },
400 |    "file_extension": ".py",
401 |    "mimetype": "text/x-python",
402 |    "name": "python",
403 |    "nbconvert_exporter": "python",
404 |    "pygments_lexer": "ipython3",
405 |    "version": "3.7.6"
406 |   }
407 |  },
408 |  "nbformat": 4,
409 |  "nbformat_minor": 4
410 | }
411 | 


--------------------------------------------------------------------------------