├── CIKM2019_Poster.pdf ├── README.md ├── SDM_CIKM2019.pdf ├── code ├── config │ └── task_config.json ├── model_utils │ ├── hyperparams.py │ ├── model_helper.py │ └── task_config.py ├── models │ ├── basic_modules.py │ ├── deep_match.py │ └── extra_modules.py ├── parsers │ └── model_feature_parser.py └── train │ ├── run.py │ └── utils.py └── data └── sample_data ├── sample_action.csv ├── sample_item.csv └── sample_user.csv /CIKM2019_Poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicogintel/SDM/ad898dd471d448ee2745ecc48c1a46b2af38e516/CIKM2019_Poster.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SDM: Sequential Deep Matching Model for Online Large-scale Recommender System 2 | ## New Released Code!!! 3 | Thanks to the [DeepMatch Group](https://github.com/shenweichen/DeepMatch) members for providing [doc](https://zhuanlan.zhihu.com/p/141411747) and [code](https://github.com/shenweichen/DeepMatch). 4 | 5 | ## Demo Code 6 | Code (Python2.7, TF1.4) of the sequential deep matching (SDM) model for recommender system at Taobao. 7 | Current version only contains the core code of our model. The processes of data processing and evaluation are executed on our internal cloud platform [ODPS](https://www.alibabacloud.com/campaign/10-year-anniversary). 8 | 9 | ## Paper 10 | Here is the arxiv [link](https://arxiv.org/abs/1909.00385) (accepted by CIKM 2019) 11 | 12 | Citation: 13 | ``` 14 | @inproceedings{lv2019sdm, 15 | title={SDM: Sequential deep matching model for online large-scale recommender system}, 16 | author={Lv, Fuyu and Jin, Taiwei and Yu, Changlong and Sun, Fei and Lin, Quan and Yang, Keping and Ng, Wilfred}, 17 | booktitle={Proceedings of the 28th ACM International Conference on Information and Knowledge Management}, 18 | pages={2635--2643}, 19 | year={2019}, 20 | organization={ACM} 21 | } 22 | ``` 23 | 24 | ## Dataset 25 | 26 | **JD Dataset:** [raw data](https://drive.google.com/open?id=19PemKrhA8j-RZj0i20_j4ERcnzaxl5JZ), [train and test data](https://drive.google.com/open?id=1pam-_ojsKooRLVeOXEvbh3AwJ6S4IZ7B) in the paper (tfrecord). 27 | The schema of raw data is shown in data/sample_data/. 28 | 29 | ## Disclaimer 30 | This is an implementation on experiment of offline JD dataset rather than the online official version. 31 | There may be differences between results reported in the paper and the released one, 32 | because the former one is achieved in distribution tensorflow on our internal deep learning platform [PAI](https://data.aliyun.com/product/learn). 33 | -------------------------------------------------------------------------------- /SDM_CIKM2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicogintel/SDM/ad898dd471d448ee2745ecc48c1a46b2af38e516/SDM_CIKM2019.pdf -------------------------------------------------------------------------------- /code/config/task_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment": "model config for SDM", 3 | "parameters": { 4 | "mode":"train", 5 | "num_units": 128, 6 | "unit_type": "lstm", 7 | "num_layers": 2, 8 | "num_residual_layers": 1, 9 | "forget_bias": 1.0, 10 | "dropout": 0.2, 11 | "max_gradient_norm": 5.0, 12 | "optimizer": "adagrad", 13 | "learning_rate": 0.1, 14 | "num_samples": 20000, 15 | "batch_size": 256, 16 | "last_step": 15000000, 17 | "loss_by_example": false, 18 | "num_buckets": 20, 19 | "model": "rnn", 20 | "vocab_size": 100000000 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /code/model_utils/hyperparams.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from model_utils.task_config import TaskConfig 3 | 4 | TrainingHParams = collections.namedtuple('TrainingHParams', [ 5 | 'ps_num', 6 | 'mode', 7 | 'model', 8 | 'init_op', 9 | 'seed', 10 | 'init_weight', 11 | 'num_partitions', 12 | 'min_slice_size', 13 | 'batch_size', 14 | 'num_units', 15 | 'vocab_size', 16 | 'unit_type', 17 | 'num_layers', 18 | 'num_residual_layers', 19 | 'forget_bias', 20 | 'dropout', 21 | 'num_samples', 22 | 'optimizer', 23 | 'start_decay_step', 24 | 'learning_rate', 25 | 'decay_steps', 26 | 'decay_factor', 27 | 'colocate_gradients_with_ops', 28 | 'max_gradient_norm', 29 | 'last_step', 30 | 'topK', 31 | 'num_epochs', 32 | 'shuffle', 33 | 'loss_by_example', 34 | 'attention_window_size', 35 | 'num_buckets', 36 | 'num_heads', 37 | 'max_length', 38 | 'input_fn', 39 | 'item_fc_trans', 40 | 'user_fc_trans', 41 | 'nn_init_op', 42 | "bucket_size", 43 | "embedding_size", 44 | "self_attn_ffn", 45 | "split_size", 46 | "num_labels", 47 | "softmax", 48 | "user_residual", 49 | "partn_strgy", 50 | "validation", 51 | "train_len", 52 | "test_interval", 53 | "STAMP", 54 | "NARM", 55 | "attn_layer_norm", 56 | "rnn_layer_norm", 57 | "user_attn", 58 | "prefer_avg_pooling", 59 | "rnn_hidden_units", 60 | "attn_fc", 61 | "num_multi_head", 62 | "wait_time", 63 | "user_id_only", 64 | "item_id_only", 65 | "fusion_op", 66 | "prefer_fc", 67 | "g_units_one", 68 | "input_user_feature", 69 | "use_user_id", 70 | ]) 71 | 72 | 73 | def create_hparams(task_config): 74 | 75 | return TrainingHParams( 76 | # basic 77 | ps_num=task_config.get_config_as_int("ps_num", 1), 78 | mode=task_config.get_config("mode", "train"), 79 | model=task_config.get_config("model", "rnn"), 80 | num_buckets=task_config.get_config_as_int("num_buckets", 10), 81 | max_length=task_config.get_config_as_int("max_length", 50), 82 | input_fn=task_config.get_config("input_fn", "data_set"), 83 | topK=task_config.get_config_as_int("topK", 20), 84 | num_epochs=task_config.get_config_as_int("num_epochs", None), 85 | shuffle=task_config.get_config_as_bool("shuffle", True), 86 | validation=task_config.get_config_as_bool("validation", True), 87 | train_len=task_config.get_config_as_int("train_len", None), 88 | test_interval=task_config.get_config_as_int("test_interval", 1), 89 | wait_time=task_config.get_config_as_int("wait_time", 1), 90 | 91 | # initializer 92 | init_op=task_config.get_config("init_op", "uniform"), 93 | nn_init_op=task_config.get_config("nn_init_op", "orthogonal"), 94 | seed=task_config.get_config_as_int("seed", 2018), 95 | init_weight=task_config.get_config_as_float("init_weight", 0.1), 96 | 97 | # embedding partition 98 | num_partitions=task_config.get_config_as_int("num_partitions", None), 99 | min_slice_size=task_config.get_config_as_int("min_slice_size", 32), 100 | bucket_size={ 101 | "item": task_config.get_config_as_int("item_bucket_size", 10000000), 102 | "cate": task_config.get_config_as_int("cate_bucket_size", 60000), 103 | "brand": task_config.get_config_as_int("brand_bucket_size", 10000000), 104 | "shop": task_config.get_config_as_int("shop_bucket_size", 30000000), 105 | "user_id": task_config.get_config_as_int("user_id_bucket_size", 1000000), 106 | "age": task_config.get_config_as_int("age_bucket_size", 100), 107 | "sex": task_config.get_config_as_int("sex_bucket_size", 10), 108 | "user_lv_cd": task_config.get_config_as_int("user_lv_cd_bucket_size", 100), 109 | "city_level": task_config.get_config_as_int("city_level_bucket_size", 100), 110 | "province": task_config.get_config_as_int("province_bucket_size", 1000), 111 | "city": task_config.get_config_as_int("city_bucket_size", 1000), 112 | "country": task_config.get_config_as_int("country_bucket_size", 10000) 113 | }, 114 | embedding_size={ 115 | "item": task_config.get_config_as_int("item_embedding_size", 64), 116 | "cate": task_config.get_config_as_int("cate_embedding_size", 16), 117 | "brand": task_config.get_config_as_int("brand_embedding_size", 16), 118 | "shop": task_config.get_config_as_int("shop_embedding_size", 32), 119 | "user_id": task_config.get_config_as_int("user_id_embedding_size", 64), 120 | "age": task_config.get_config_as_int("age_embedding_size", 4), 121 | "sex": task_config.get_config_as_int("sex_embedding_size", 4), 122 | "user_lv_cd": task_config.get_config_as_int("user_lv_cd_embedding_size", 4), 123 | "city_level": task_config.get_config_as_int("city_level_embedding_size", 4), 124 | "province": task_config.get_config_as_int("province_embedding_size", 4), 125 | "city": task_config.get_config_as_int("city_embedding_size", 4), 126 | "country": task_config.get_config_as_int("country_embedding_size", 4) 127 | }, 128 | 129 | # network 130 | batch_size=task_config.get_config_as_int("batch_size", 256), 131 | num_units=task_config.get_config_as_int("num_units", 64), 132 | vocab_size=task_config.get_config_as_int("vocab_size"), 133 | unit_type=task_config.get_config("unit_type", "lstm"), 134 | num_layers=task_config.get_config_as_int("num_layers", 2), 135 | num_residual_layers=task_config.get_config_as_int("num_residual_layers", 1), 136 | forget_bias=task_config.get_config_as_float("forget_bias", 1.0), 137 | dropout=task_config.get_config_as_float("dropout", 0.2), 138 | num_samples=task_config.get_config_as_int("num_samples", 2000), 139 | attention_window_size=task_config.get_config_as_int("attention_window_size", None), 140 | num_heads=task_config.get_config_as_int("num_heads", 8), 141 | item_fc_trans=task_config.get_config_as_bool("item_fc_trans", False), 142 | user_fc_trans=task_config.get_config_as_bool("user_fc_trans", False), 143 | self_attn_ffn=task_config.get_config_as_bool("self_attn_ffn", False), 144 | user_residual=task_config.get_config_as_bool("user_residual", False), 145 | STAMP=task_config.get_config_as_bool("STAMP", False), 146 | NARM=task_config.get_config_as_bool("NARM", False), 147 | attn_layer_norm=task_config.get_config_as_bool("attn_layer_norm", True), 148 | rnn_layer_norm=task_config.get_config_as_bool("rnn_layer_norm", False), 149 | user_attn=task_config.get_config("user_attn", "general"), 150 | prefer_avg_pooling=task_config.get_config_as_bool("prefer_avg_pooling", False), 151 | rnn_hidden_units=task_config.get_config_as_int("rnn_hidden_units", 64), 152 | attn_fc=task_config.get_config_as_bool("attn_fc", False), 153 | num_multi_head=task_config.get_config_as_int("num_multi_head", 1), 154 | user_id_only=task_config.get_config_as_bool("user_id_only", False), 155 | item_id_only=task_config.get_config_as_bool("item_id_only", False), 156 | fusion_op=task_config.get_config("fusion_op", "gated"), 157 | prefer_fc=task_config.get_config_as_bool("prefer_fc", True), 158 | g_units_one=task_config.get_config_as_bool("g_units_one", False), 159 | input_user_feature=task_config.get_config_as_bool("input_user_feature", False), 160 | use_user_id=task_config.get_config_as_bool("use_user_id", True), 161 | 162 | # optimizer 163 | optimizer=task_config.get_config("optimizer", "adam"), 164 | start_decay_step=task_config.get_config_as_int("start_decay_step", 1600000), 165 | learning_rate=task_config.get_config_as_float("learning_rate", 1), 166 | decay_steps=task_config.get_config_as_int("decay_steps", 100000), 167 | decay_factor=task_config.get_config_as_float("decay_factor", 0.98), 168 | colocate_gradients_with_ops=task_config.get_config_as_bool("colocate_gradients_with_ops", True), 169 | max_gradient_norm=task_config.get_config_as_float("max_gradient_norm", 5.0), 170 | loss_by_example=task_config.get_config_as_bool("loss_by_example", False), 171 | last_step=task_config.get_config_as_int("last_step", 32000000), 172 | split_size=task_config.get_config_as_int("split_size", 1), 173 | num_labels=task_config.get_config_as_int("num_labels", 1), 174 | softmax=task_config.get_config("softmax", "sampled_softmax"), 175 | partn_strgy=task_config.get_config("partn_strgy", "mod") 176 | ) 177 | 178 | 179 | def create_flags(flags): 180 | flags.DEFINE_string("checkpointDir", "./", "checkpoint_dir") 181 | flags.DEFINE_string("model", "rnn,self_attn,personal,user_attn,prefer", "model") 182 | flags.DEFINE_string("mode", "train", "mode") 183 | flags.DEFINE_string("unit_type", "gru", "unit_type") 184 | flags.DEFINE_string("num_epochs", 10, "num_epochs") 185 | flags.DEFINE_string("batch_size", 256, "batch_size") 186 | flags.DEFINE_string("num_samples", 2000, "num_samples") 187 | flags.DEFINE_integer("split_size", 1, "split_size, batch split size, splited_samples share neg_samples") 188 | flags.DEFINE_integer("last_step", 15000000, "last_step") 189 | flags.DEFINE_string("user_id_embedding_size", 64, "user_id_embedding_size") 190 | flags.DEFINE_string("num_buckets", 1, "num_buckets") 191 | flags.DEFINE_string("shuffle", True, "shuffle") 192 | flags.DEFINE_string("loss_by_example", False, "loss_by_example") 193 | flags.DEFINE_string("user_residual", True, "user layer residual") 194 | flags.DEFINE_integer("vocab_size", 157371, "size of item pool") 195 | 196 | flags.DEFINE_string("learning_rate", 0.001, "learning_rate") 197 | flags.DEFINE_string("start_decay_step", 16000000, "start_decay_step") 198 | flags.DEFINE_string("decay_steps", 100000, "decay_steps") 199 | flags.DEFINE_string("decay_factor", 0.95, "decay_factor") 200 | flags.DEFINE_string("optimizer", "adagrad", "optimizer") 201 | flags.DEFINE_string("max_gradient_norm", 5.0, "max_gradient_norm") 202 | flags.DEFINE_integer("num_labels", 5, "multi labels") 203 | flags.DEFINE_string("softmax", "sampled_softmax", "softmax layer") 204 | flags.DEFINE_string("partn_strgy", "div", "for inference or not") 205 | flags.DEFINE_string("validation", True, "validation or not") 206 | flags.DEFINE_integer("train_len", 1430824, "sample lens") 207 | flags.DEFINE_string("item_fc_trans", False, "itemid+general repre") 208 | flags.DEFINE_string("self_attn_ffn", False, "self_attn_ffn") 209 | flags.DEFINE_integer("test_interval", 1, "test_interval") 210 | 211 | flags.DEFINE_string("STAMP", False, "short term priority") 212 | flags.DEFINE_string("NARM", False, "neural attentive") 213 | flags.DEFINE_integer("num_heads", 4, "heads num for attention") 214 | flags.DEFINE_string("attn_layer_norm", True, "layer_norm attention") 215 | flags.DEFINE_string("rnn_layer_norm", False, "rnn_layer_norm") 216 | flags.DEFINE_string("user_attn", "general", "user attention layer choice") 217 | flags.DEFINE_string("prefer_avg_pooling", False, "prefer features avg pooling, otherwise user attn") 218 | flags.DEFINE_integer("rnn_hidden_units", 64, "rnn hidden size") 219 | flags.DEFINE_integer("num_layers", 1, "rnn layer num") 220 | flags.DEFINE_integer("num_residual_layers", 0, "residual layer num") 221 | flags.DEFINE_integer("item_embedding_size", 64, "residual layer num") 222 | flags.DEFINE_integer("num_units", 64, "softmax embedding size") 223 | flags.DEFINE_string("attn_fc", False, "attention fc") 224 | flags.DEFINE_integer("num_multi_head", 1, "number of transformers") 225 | flags.DEFINE_integer("wait_time", 1, "chief worker waiting time") 226 | flags.DEFINE_string("user_id_only", False, "only user id feature") 227 | flags.DEFINE_string("item_id_only", False, "only item id feature") 228 | flags.DEFINE_string("fusion_op", "gated", "fusion operation") 229 | flags.DEFINE_string("prefer_fc", True, "long rep fc to units") 230 | flags.DEFINE_string("g_units_one", False, "if scalar gate") 231 | flags.DEFINE_string("input_user_feature", False, "user feature added to input layer") 232 | flags.DEFINE_string("use_user_id", True, "user id feature") 233 | 234 | return flags 235 | 236 | 237 | def create_task_config(FLAGS, conf_file_path): 238 | FLAGS._parse_flags() 239 | task_config = TaskConfig(FLAGS.__flags, conf_file_path) 240 | return task_config 241 | -------------------------------------------------------------------------------- /code/model_utils/model_helper.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops import partitioned_variables 3 | from tensorflow.python.platform import tf_logging as logging 4 | from tensorflow.python.framework import dtypes 5 | from tensorflow.python.ops import variable_scope 6 | from tensorflow.contrib import layers 7 | 8 | 9 | def get_initializer(init_op, seed=None, init_weight=None): 10 | """Create an initializer. init_weight is only for uniform.""" 11 | if init_op is None: 12 | return None 13 | if init_op == "uniform": 14 | assert init_weight 15 | return tf.random_uniform_initializer(-init_weight, init_weight, seed=seed) 16 | elif init_op == "normal": 17 | return tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=seed) 18 | elif init_op == "glorot_normal": 19 | return tf.contrib.keras.initializers.glorot_normal(seed=seed) 20 | elif init_op == "glorot_uniform": 21 | return tf.contrib.keras.initializers.glorot_uniform(seed=seed) 22 | elif init_op == "xavier": 23 | return tf.contrib.layers.xavier_initializer(seed=seed) 24 | elif init_op == "orthogonal": 25 | return tf.orthogonal_initializer() 26 | else: 27 | raise ValueError("Unknown init_op %s" % init_op) 28 | 29 | 30 | def get_emb_partitioner(num_partitions=None, min_slice_size=None, max_partitions=None): 31 | partitioner = None 32 | if num_partitions > 1: 33 | partitioner = tf.fixed_size_partitioner(num_partitions) 34 | elif min_slice_size is not None and max_partitions is not None: 35 | partitioner = partitioned_variables.min_max_variable_partitioner( 36 | max_partitions=max_partitions, 37 | min_slice_size=min_slice_size << 20) 38 | return partitioner 39 | 40 | 41 | def _single_cell(unit_type, num_units, forget_bias, dropout, 42 | mode, residual_connection=False): 43 | """Create an instance of a single RNN cell.""" 44 | # dropout (= 1 - keep_prob) is set to 0 during eval and infer 45 | logger_list = [] 46 | # Cell Type 47 | if unit_type == "lstm": 48 | logger_list.append(" LSTM, forget_bias=%g" % forget_bias) 49 | single_cell = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=forget_bias) 50 | elif unit_type == "lstmblock": 51 | logger_list.append(" LSTM Block, forget_bias=%g" % forget_bias) 52 | single_cell = tf.contrib.rnn.LSTMBlockCell(num_units, forget_bias=forget_bias) 53 | elif unit_type == "lstmfused": 54 | logger_list.append(" LSTM Block Fused, forget_bias=%g" % forget_bias) 55 | single_cell = tf.contrib.rnn.LSTMBlockFusedCell(num_units, forget_bias=forget_bias) 56 | elif unit_type == "gru": 57 | logger_list.append(" GRU") 58 | single_cell = tf.contrib.rnn.GRUCell(num_units) 59 | elif unit_type == "layer_norm_lstm": 60 | logger_list.append(" Layer Normalized LSTM, forget_bias=%g" % forget_bias) 61 | single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units, forget_bias=forget_bias, layer_norm=True) 62 | else: 63 | raise ValueError("Unknown unit type %s!" % unit_type) 64 | 65 | # Dropout (= 1 - keep_prob) 66 | dropout = dropout if mode == "train" else 0 67 | single_cell = tf.contrib.rnn.DropoutWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout)) 68 | logger_list.append(" %s " % type(single_cell).__name__) 69 | 70 | # Residual 71 | if residual_connection: 72 | single_cell = tf.contrib.rnn.ResidualWrapper(single_cell) 73 | logger_list.append(" %s" % type(single_cell).__name__) 74 | logging.info("".join(logger_list)) 75 | 76 | return single_cell 77 | 78 | 79 | def _cell_list(unit_type, num_units, num_layers, num_residual_layers, 80 | forget_bias, dropout, mode, single_cell_fn=None): 81 | """Create a list of RNN cells.""" 82 | if not single_cell_fn: 83 | single_cell_fn = _single_cell 84 | 85 | cell_list = [] 86 | for i in range(num_layers): 87 | logging.info(" cell %d" % i) 88 | single_cell = single_cell_fn( 89 | unit_type=unit_type, 90 | num_units=num_units, 91 | forget_bias=forget_bias, 92 | dropout=dropout, 93 | mode=mode, 94 | residual_connection=(i >= num_layers - num_residual_layers) 95 | ) 96 | cell_list.append(single_cell) 97 | 98 | return cell_list 99 | 100 | 101 | def create_rnn_cell(unit_type, num_units, num_layers, num_residual_layers, 102 | forget_bias, dropout, mode, attention_window_size, single_cell_fn=None): 103 | """Create multi-layer RNN cell. 104 | 105 | Args: 106 | unit_type: string representing the unit type, i.e. "lstm". 107 | num_units: the depth of each unit. 108 | num_layers: number of cells. 109 | num_residual_layers: Number of residual layers from top to bottom. For 110 | example, if `num_layers=4` and `num_residual_layers=2`, the last 2 RNN 111 | cells in the returned list will be wrapped with `ResidualWrapper`. 112 | forget_bias: the initial forget bias of the RNNCell(s). 113 | dropout: floating point value between 0.0 and 1.0: 114 | the probability of dropout. this is ignored if `mode != train`. 115 | mode: either train/predict 116 | single_cell_fn: single_cell_fn: allow for adding customized cell. 117 | When not specified, we default to model_helper._single_cell 118 | Returns: 119 | An `RNNCell` instance. 120 | """ 121 | cell_list = _cell_list(unit_type=unit_type, 122 | num_units=num_units, 123 | num_layers=num_layers, 124 | num_residual_layers=num_residual_layers, 125 | forget_bias=forget_bias, 126 | dropout=dropout, 127 | mode=mode, 128 | single_cell_fn=single_cell_fn) 129 | 130 | if len(cell_list) == 1: # Single layer. 131 | final_cell = cell_list[0] 132 | else: # Multi layers 133 | final_cell = tf.contrib.rnn.MultiRNNCell(cell_list) 134 | 135 | # Attention Wrapper Cell 136 | if attention_window_size is not None: 137 | final_cell = tf.contrib.rnn.AttentionCellWrapper(final_cell, attention_window_size) 138 | return final_cell 139 | 140 | 141 | def gradient_clip(gradients, max_gradient_norm): 142 | """Clipping gradients of a model.""" 143 | clipped_gradients, gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) 144 | tf.summary.scalar("grad_norm", gradient_norm) 145 | tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)) 146 | 147 | return clipped_gradients 148 | 149 | 150 | def extract_axis_1(data, ind): 151 | """ 152 | Get specified elements along the first axis of tensor. 153 | :param data: Tensorflow tensor that will be subsetted. 154 | :param ind: Indices to take (one for each element along axis 0 of data). 155 | :return: Subsetted tensor. 156 | """ 157 | batch_range = tf.range(tf.shape(data)[0], dtype=tf.int32) 158 | indices = tf.stack([batch_range, ind], axis=1) 159 | res = tf.gather_nd(data, indices) 160 | return res 161 | 162 | 163 | def get_optimizer(hparams, _global_step): 164 | _learning_rate = tf.constant(hparams.learning_rate) 165 | opt = tf.train.GradientDescentOptimizer(hparams.learning_rate) 166 | if hparams.optimizer == "sgd": 167 | _learning_rate = tf.cond( 168 | _global_step < hparams.start_decay_step, 169 | lambda: tf.constant(hparams.learning_rate), 170 | lambda: tf.train.exponential_decay( 171 | hparams.learning_rate, 172 | (_global_step - hparams.start_decay_step), 173 | hparams.decay_steps, 174 | hparams.decay_factor, 175 | staircase=True), 176 | name="learning_rate") 177 | opt = tf.train.GradientDescentOptimizer(_learning_rate) 178 | elif hparams.optimizer == "adam": 179 | assert float(hparams.learning_rate) <= 0.001, "! High Adam learning rate %g" % hparams.learning_rate 180 | opt = tf.train.AdamOptimizer(hparams.learning_rate) 181 | elif hparams.optimizer == 'adagrad': 182 | opt = tf.train.AdagradOptimizer(hparams.learning_rate) 183 | elif hparams.optimizer == 'adadelta': 184 | opt = tf.train.AdadeltaOptimizer(hparams.learning_rate) 185 | elif hparams.optimizer == 'RMSprop': 186 | opt = tf.train.RMSPropOptimizer(hparams.learning_rate) 187 | tf.summary.scalar("lr", _learning_rate) 188 | return opt, _learning_rate 189 | 190 | 191 | def hash_bucket_embedding(name, bucket_size, dim, use_hashmap=False): 192 | if use_hashmap: 193 | id_feature = tf.contrib.layers.sparse_column_with_hash_bucket( 194 | column_name=name, hash_bucket_size=bucket_size, use_hashmap=True) 195 | else: 196 | id_feature = tf.contrib.layers.sparse_column_with_hash_bucket( 197 | column_name=name, hash_bucket_size=bucket_size) 198 | return tf.contrib.layers.embedding_column(sparse_id_column=id_feature, dimension=dim) 199 | 200 | 201 | def learned_positional_encoding(inputs, max_length, num_units): 202 | outputs = tf.range(tf.shape(inputs)[1]) # (T_q) 203 | outputs = tf.where(tf.greater_equal(outputs, max_length), tf.fill(tf.shape(outputs), max_length - 1), outputs) 204 | outputs = tf.expand_dims(outputs, 0) # (1, T_q) 205 | outputs = tf.tile(outputs, [tf.shape(inputs)[0], 1]) # (N, T_q) 206 | with variable_scope.variable_scope("embeddings") as scope: 207 | pos_embedding = tf.get_variable(name="pos_embedding", shape=[max_length, num_units], 208 | dtype=tf.float32) 209 | encoded = tf.nn.embedding_lookup(pos_embedding, outputs) 210 | return encoded 211 | 212 | 213 | def pointwise_feedforward(inputs, drop_out, is_training, num_units=None, activation=None): 214 | # Inner layer 215 | # outputs = tf.layers.conv1d(inputs, num_units[0], kernel_size=1, activation=activation) 216 | outputs = tf.layers.dense(inputs, num_units[0], activation=activation) 217 | outputs = tf.layers.dropout(outputs, drop_out, training=is_training) 218 | # Readout layer 219 | # outputs = tf.layers.conv1d(outputs, num_units[1], kernel_size=1, activation=None) 220 | outputs = tf.layers.dense(outputs, num_units[1], activation=None) 221 | 222 | # drop_out before add&norm 223 | outputs = tf.layers.dropout(outputs, drop_out, training=is_training) 224 | # Residual connection 225 | outputs += inputs 226 | # Normalize 227 | outputs = layer_norm(outputs) 228 | return outputs 229 | 230 | 231 | def layer_norm(inputs, epsilon=1e-8): 232 | mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True) 233 | normalized = (inputs - mean) / (tf.sqrt(variance + epsilon)) 234 | 235 | params_shape = inputs.get_shape()[-1:] 236 | gamma = tf.get_variable('gamma', params_shape, tf.float32, tf.ones_initializer()) 237 | beta = tf.get_variable('beta', params_shape, tf.float32, tf.zeros_initializer()) 238 | 239 | outputs = gamma * normalized + beta 240 | return outputs 241 | 242 | 243 | def self_multi_head_attn(inputs, num_units, num_heads, key_masks, dropout_rate, is_training, is_layer_norm=True): 244 | """ 245 | Args: 246 | inputs(query): A 3d tensor with shape of [N, T_q, C_q] 247 | inputs(keys): A 3d tensor with shape of [N, T_k, C_k] 248 | """ 249 | if num_units is None: 250 | num_units = inputs.get_shape().as_list[-1] 251 | 252 | Q_K_V = tf.layers.dense(inputs, 3 * num_units) # tf.nn.relu 253 | Q, K, V = tf.split(Q_K_V, 3, -1) 254 | 255 | Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 256 | K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 257 | V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 258 | 259 | # (h*N, T_q, T_k) 260 | align = general_attention(Q_, K_) 261 | 262 | # (h*N, T_k) 263 | key_masks = tf.tile(key_masks, [num_heads, 1]) 264 | # (h*N, T_q, T_k) 265 | key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(inputs)[1], 1]) 266 | # (h*N, T_q, C/h) 267 | outputs = soft_max_weighted_sum(align, V_, key_masks, dropout_rate, is_training, future_binding=True) 268 | 269 | # Restore shape 270 | outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2) # (N, T_q, C) 271 | # output linear 272 | outputs = tf.layers.dense(outputs, num_units) 273 | 274 | # drop_out before residual and layernorm 275 | outputs = tf.layers.dropout(outputs, dropout_rate, training=is_training) 276 | # Residual connection 277 | outputs += inputs # (N, T_q, C) 278 | # Normalize 279 | if is_layer_norm: 280 | outputs = layer_norm(outputs) # (N, T_q, C) 281 | 282 | return outputs 283 | 284 | 285 | def concat_attention(query, key): 286 | """ 287 | :param query: [batch_size, 1, query_size] -> [batch_size, time, query_size] 288 | :param key: [batch_size, time, key_size] 289 | :return: [batch_size, 1, time] 290 | query_size should keep the same dim with key_size 291 | """ 292 | # TODO: only support 1D attention at present 293 | # query = tf.tile(query, [1, tf.shape(key)[1], 1]) 294 | # [batch_size, time, q_size+k_size] 295 | q_k = tf.concat([query, key], axis=-1) 296 | # [batch_size, time, 1] 297 | align = tf.layers.dense(q_k, 1, tf.nn.tanh) # tf.nn.relu old 298 | # scale (optional) 299 | align = align / (key.get_shape().as_list()[-1] ** 0.5) 300 | align = tf.transpose(align, [0, 2, 1]) 301 | return align 302 | 303 | 304 | def general_attention(query, key): 305 | """ 306 | :param query: [batch_size, None, query_size] 307 | :param key: [batch_size, time, key_size] 308 | :return: [batch_size, None, time] 309 | query_size should keep the same dim with key_size 310 | """ 311 | # [batch_size, None, time] 312 | align = tf.matmul(query, tf.transpose(key, [0, 2, 1])) 313 | # scale (optional) 314 | align = align / (key.get_shape().as_list()[-1] ** 0.5) 315 | return align 316 | 317 | 318 | def self_attention(inputs, num_units, key_masks, dropout_rate, is_training, is_layer_norm=True): 319 | """ 320 | Args: 321 | inputs(queries): A 3d tensor with shape of [N, T_q, C_q] 322 | inputs(keys): A 3d tensor with shape of [N, T_k, C_k] 323 | """ 324 | # if num_units is None: 325 | # num_units = inputs.get_shape().as_list[-1] 326 | 327 | # (N, T_q, C) 328 | # Q = tf.layers.dense(inputs, num_units, tf.nn.relu, name='unlinear_trans', reuse=tf.AUTO_REUSE) 329 | # (N, T_k, C) 330 | # K = tf.layers.dense(inputs, num_units, tf.nn.relu, name="unlinear_trans", reuse=tf.AUTO_REUSE) 331 | 332 | Q = inputs 333 | K = inputs 334 | V = inputs 335 | 336 | align = general_attention(Q, K) 337 | outputs = soft_max_weighted_sum(align, V, key_masks, dropout_rate, is_training, future_binding=True) 338 | 339 | # Residual connection 340 | # outputs += inputs # (N, T_q, C) 341 | if is_layer_norm: 342 | # Normalize 343 | outputs = layer_norm(outputs) # (N, T_q, C) 344 | return outputs 345 | 346 | 347 | def soft_max_weighted_sum(align, value, key_masks, drop_out, is_training, future_binding=False): 348 | """ 349 | :param align: [batch_size, None, time] 350 | :param value: [batch_size, time, units] 351 | :param key_masks: [batch_size, None, time] 352 | 2nd dim size with align 353 | :param drop_out: 354 | :param is_training: 355 | :param future_binding: TODO: only support 2D situation at present 356 | :return: weighted sum vector 357 | [batch_size, None, units] 358 | """ 359 | # exp(-large) -> 0 360 | paddings = tf.fill(tf.shape(align), float('-inf')) 361 | # [batch_size, None, time] 362 | align = tf.where(key_masks, align, paddings) 363 | 364 | if future_binding: 365 | length = tf.reshape(tf.shape(value)[1], [-1]) 366 | # [time, time] 367 | lower_tri = tf.ones(tf.concat([length, length], axis=0)) 368 | # [time, time] 369 | lower_tri = tf.contrib.linalg.LinearOperatorTriL(lower_tri).to_dense() 370 | # [batch_size, time, time] 371 | masks = tf.tile(tf.expand_dims(lower_tri, 0), [tf.shape(align)[0], 1, 1]) 372 | # [batch_size, time, time] 373 | align = tf.where(tf.equal(masks, 0), paddings, align) 374 | 375 | # soft_max and dropout 376 | # [batch_size, None, time] 377 | align = tf.nn.softmax(align) 378 | align = tf.layers.dropout(align, drop_out, training=is_training) 379 | # weighted sum 380 | # [batch_size, None, units] 381 | return tf.matmul(align, value) 382 | 383 | 384 | def sequence_feature_mask(columns_to_tensors, feature_columns, seq_len, avg_pooling=False, 385 | user_embedding=None, drop_out=0, is_training=True): 386 | # [batch_size, time, units] 387 | encoded = layers.sequence_input_from_feature_columns( 388 | columns_to_tensors=columns_to_tensors, 389 | feature_columns=feature_columns, 390 | scope="reuse_embedding" 391 | ) 392 | 393 | # [batch_size, time] 394 | key_masks = tf.sequence_mask(seq_len, tf.shape(encoded)[1], dtypes.float32) 395 | 396 | if avg_pooling: 397 | # [batch_size, time, 1] 398 | key_masks = tf.reshape(key_masks, [-1, tf.shape(encoded)[1], 1]) 399 | encoded = tf.multiply(encoded, key_masks) 400 | encoded = tf.reduce_sum(encoded, 1) / tf.reshape(tf.cast(seq_len, dtypes.float32), [-1, 1]) 401 | else: 402 | # [batch_size, 1, time] 403 | query = tf.tile(user_embedding, [1, tf.shape(encoded)[1], 1]) 404 | align = concat_attention(query, encoded) 405 | key_masks = tf.cast(key_masks, dtypes.bool) 406 | # [batch_size, 1, time] 407 | key_masks = tf.expand_dims(key_masks, 1) 408 | encoded = soft_max_weighted_sum(align, encoded, key_masks, drop_out, is_training) 409 | encoded = tf.squeeze(encoded, 1) 410 | # [batch_size, units] 411 | return encoded 412 | -------------------------------------------------------------------------------- /code/model_utils/task_config.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | from tensorflow.python.lib.io import file_io 4 | from tensorflow.python.platform import tf_logging as logging 5 | 6 | 7 | class TaskConfig(object): 8 | def __init__(self, param_map=None, conf_file_path=None): 9 | self._param_map = {} 10 | try: 11 | config = json.load(file_io.FileIO(conf_file_path, 'r')) 12 | if not config: 13 | logging.error("config file not exists") 14 | if config['parameters']: 15 | self._param_map = config['parameters'] 16 | if param_map: 17 | self._param_map.update(param_map) 18 | except: 19 | logging.info("load conf error!") 20 | 21 | def get_config(self, config_name, default=None): 22 | return self._param_map.get(config_name, default) 23 | 24 | def get_config_as_int(self, config_name, default=None): 25 | value_str = self.get_config(config_name, default) 26 | return int(value_str) if value_str else value_str 27 | 28 | def get_config_as_float(self, config_name, default=None): 29 | value_str = self.get_config(config_name, default) 30 | return float(value_str) if value_str else value_str 31 | 32 | def get_config_as_bool(self, config_name, default=None): 33 | raw_value = self.get_config(config_name, default) 34 | if raw_value and isinstance(raw_value, bool): 35 | return raw_value 36 | elif raw_value and (isinstance(raw_value, str) or isinstance(raw_value, unicode)): 37 | return ast.literal_eval(raw_value) 38 | else: 39 | return False 40 | 41 | def get_config_as_list(self, config_name, default=None): 42 | raw_value = self.get_config(config_name, default) 43 | if raw_value and isinstance(raw_value, list): 44 | return raw_value 45 | else: 46 | return ast.literal_eval(raw_value) 47 | 48 | def contains(self, config_name): 49 | return config_name in self._param_map 50 | 51 | def add_config(self, key, value): 52 | self._param_map[key] = value 53 | 54 | def add_if_not_contain(self, key, value): 55 | if not self.contains(key): 56 | self.add_config(key, value) 57 | -------------------------------------------------------------------------------- /code/models/basic_modules.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import layers 3 | from tensorflow.python.ops import variable_scope 4 | from tensorflow.python.ops.nn_impl import sampled_softmax_loss 5 | from model_utils import model_helper 6 | from model_utils.model_helper import extract_axis_1, get_optimizer, layer_norm, sequence_feature_mask, \ 7 | self_multi_head_attn, self_attention, pointwise_feedforward, general_attention, concat_attention, \ 8 | soft_max_weighted_sum, learned_positional_encoding 9 | from tensorflow.python.framework import dtypes 10 | 11 | 12 | class BasicModules: 13 | def __init__(self, parser, hparams): 14 | self.hparams = hparams 15 | self.parser = parser 16 | self.num_units = self.hparams.num_units 17 | self.global_step = tf.train.get_or_create_global_step() 18 | self.initializer = model_helper.get_initializer(self.hparams.init_op, 19 | self.hparams.seed, 20 | self.hparams.init_weight) 21 | self.kernel_initializer = model_helper.get_initializer(self.hparams.nn_init_op, 22 | seed=self.hparams.seed) 23 | self.partitioner = model_helper.get_emb_partitioner(self.hparams.num_partitions, 24 | self.hparams.min_slice_size, 25 | self.hparams.ps_num) 26 | self.dropout = tf.placeholder(tf.float32, name="dropout") \ 27 | if self.hparams.validation else self.hparams.dropout 28 | self.is_training = self.hparams.mode == 'train' 29 | self.my_dict = {} 30 | 31 | def dataset_batch(self, params, dataset): 32 | def _parse_function(example_proto): 33 | features = tf.parse_single_example(example_proto, features=self.parser.feature_map) 34 | sparse2dense = {k: tf.sparse_tensor_to_dense(f, default_value=0) 35 | for k, f in features.iteritems() 36 | if isinstance(f, tf.SparseTensor) and f.dtype != tf.string} 37 | features.update(sparse2dense) 38 | sparse2dense = {k: tf.sparse_tensor_to_dense(f, default_value="0") 39 | for k, f in features.iteritems() 40 | if isinstance(f, tf.SparseTensor) and f.dtype == tf.string} 41 | features.update(sparse2dense) 42 | # tf.logging.info(features) 43 | return self.parser.output_one_example(features) 44 | 45 | # Bucket by source sequence length (buckets for lengths 0-9, 10-19, ...) 46 | def batching_func(x): 47 | tf_padded = {"fix": [], "int": tf.cast(0, tf.int64), "str": "0", 48 | "var": [None], "str_multi": "43,35,12,54,21"} 49 | padded_shapes = [] 50 | padded_values = [] 51 | for key in self.parser.input_keys: 52 | key_1 = key[1] 53 | key_2 = key[2] if key[0] != "multi_labels" else "str_multi" 54 | padded_shapes.append(tf_padded[key_1]) 55 | padded_values.append(tf_padded[key_2]) 56 | 57 | return x.padded_batch(params['batch_size'], 58 | padded_shapes=tuple(padded_shapes), 59 | padding_values=tuple(padded_values)) 60 | 61 | def key_func(src_len, *unused_list): 62 | # Calculate bucket_width by maximum source sequence length. 63 | # Pairs with length [0, bucket_width) go to bucket 0, length 64 | # [bucket_width, 2 * bucket_width) go to bucket 1, etc. Pairs with length 65 | # over ((num_bucket-1) * bucket_width) words all go into the last bucket. 66 | if self.hparams.max_length > 1: 67 | bucket_width = (self.hparams.max_length + params['num_buckets'] - 1) // params['num_buckets'] 68 | else: 69 | bucket_width = 5 70 | # Bucket sentence pairs by the length of their source sentence and target 71 | # sentence. 72 | bucket_id = src_len // bucket_width 73 | bucket_id = tf.cast(bucket_id, tf.int32) 74 | return tf.to_int64(tf.minimum(params['num_buckets'], bucket_id)) 75 | 76 | def reduce_func(unused_key, windowed_data): 77 | return batching_func(windowed_data) 78 | 79 | dataset = dataset.map(_parse_function, num_parallel_calls=32) 80 | dataset = dataset.repeat(params["epochs"]) 81 | if params['shuffle']: 82 | dataset = dataset.shuffle(buffer_size=10000, seed=self.hparams.seed) 83 | if params['num_buckets'] > 1 and params['mode'] == 'train': 84 | dataset = dataset.apply(tf.contrib.data.group_by_window(key_func=key_func, reduce_func=reduce_func, 85 | window_size=params["batch_size"])) 86 | elif params['mode'] == 'train' or params['mode'] == "test": 87 | dataset = batching_func(dataset) 88 | else: 89 | dataset = dataset.batch(params["batch_size"]) 90 | dataset = dataset.prefetch(buffer_size=1000) 91 | return dataset 92 | 93 | def input_fn_dataset(self, file_list, data_type="train"): 94 | if data_type == "test": 95 | params = {"mode": "test", "epochs": 1, "shuffle": False, "batch_size": 64, "num_buckets": 0} 96 | else: 97 | params = {"mode": self.hparams.mode, "epochs": self.hparams.num_epochs, "shuffle": self.hparams.shuffle, 98 | "batch_size": self.hparams.batch_size, "num_buckets": self.hparams.num_buckets} 99 | 100 | with tf.name_scope(data_type + '_input_fn') as scope: 101 | dataset = tf.data.TFRecordDataset(file_list) 102 | dataset = self.dataset_batch(params, dataset) 103 | if self.hparams.validation: 104 | return dataset 105 | else: 106 | iterator = dataset.make_one_shot_iterator() 107 | return self.parser.output_features(iterator) 108 | 109 | def create_item_embeddings(self, features): 110 | # soft_max 111 | with variable_scope.variable_scope("soft_max", values=None, partitioner=self.partitioner) as scope: 112 | nce_biases = tf.zeros([self.hparams.vocab_size], name='bias') 113 | nce_weights = tf.get_variable(name='weight', shape=[self.hparams.vocab_size, self.num_units], 114 | dtype=tf.float32, initializer=self.initializer) 115 | 116 | # input item embeddings 117 | with variable_scope.variable_scope("item_embeddings", partitioner=self.partitioner, 118 | initializer=self.initializer, reuse=tf.AUTO_REUSE) as scope: 119 | embeddings = self.parser.embedding_columns(feature_type="item") 120 | if self.hparams.item_id_only: 121 | encoded = layers.sequence_input_from_feature_columns( 122 | columns_to_tensors={"item_emb": features["item_ids"]}, 123 | feature_columns=[embeddings[0]], scope="reuse_embedding") 124 | else: 125 | encoded = layers.sequence_input_from_feature_columns( 126 | columns_to_tensors={"item_emb": features["item_ids"], 127 | "shop_emb": features["shop_ids"], 128 | "cate_emb": features["cate_ids"], 129 | "brand_emb": features["brand_ids"]}, 130 | feature_columns=embeddings, scope="reuse_embedding") 131 | if self.hparams.item_fc_trans: 132 | encoded = tf.layers.dense(encoded, self.num_units, tf.nn.tanh, 133 | kernel_initializer=self.kernel_initializer, 134 | name="item_fc") 135 | return nce_weights, nce_biases, encoded 136 | 137 | def create_user_embeddings(self, features): 138 | # input user embedding 139 | with variable_scope.variable_scope("user_embeddings", partitioner=self.partitioner, 140 | initializer=self.initializer) as scope: 141 | embeddings_fix = self.parser.embedding_columns(feature_type="user_fix", use_hashmap=True) 142 | if self.hparams.use_user_id and self.hparams.user_id_only: 143 | encoded = layers.input_from_feature_columns( 144 | columns_to_tensors={"user_id_emb": features["user_id"]}, 145 | feature_columns=[embeddings_fix[0]]) 146 | else: 147 | personal_encoded = [] 148 | profile_features = {} 149 | for fs_name in self.parser.embedding_user_features_fix: 150 | profile_features.update({fs_name + "_emb": features[fs_name]}) 151 | 152 | profile_encoded = layers.input_from_feature_columns( 153 | columns_to_tensors=profile_features, 154 | feature_columns=embeddings_fix) 155 | 156 | personal_encoded.append(profile_encoded) 157 | encoded = tf.concat(personal_encoded, -1) 158 | 159 | if self.hparams.user_fc_trans: 160 | encoded = tf.layers.dense(encoded, self.num_units, tf.nn.tanh, 161 | kernel_initializer=self.kernel_initializer) 162 | return encoded 163 | 164 | def create_prefer_embeddings(self, features, user_embedding): 165 | # input prefer item embeddings 166 | with variable_scope.variable_scope("item_embeddings", partitioner=self.partitioner, 167 | initializer=self.initializer, reuse=tf.AUTO_REUSE) as scope: 168 | embeddings = self.parser.embedding_columns(feature_type="item") 169 | feature_names = ["item", "shop", "brand", "cate"] 170 | if self.hparams.item_id_only: 171 | feature_names = [feature_names[0]] 172 | prefer_outputs = [] 173 | for i in range(len(feature_names)): 174 | key_emb = feature_names[i] + "_emb" 175 | value_emb = features["prefer_"+feature_names[i]+"s"] 176 | value_len = features[feature_names[i]+"s"+"_len"] 177 | prefer_encoded = sequence_feature_mask({key_emb: value_emb}, 178 | [embeddings[i]], 179 | value_len, 180 | avg_pooling=self.hparams.prefer_avg_pooling, 181 | user_embedding=user_embedding, 182 | drop_out=self.dropout, 183 | is_training=self.is_training) 184 | prefer_outputs.append(prefer_encoded) 185 | prefer_outputs = tf.concat(prefer_outputs, -1) 186 | if self.hparams.prefer_fc: 187 | prefer_outputs = tf.layers.dense(prefer_outputs, self.num_units, tf.nn.tanh, 188 | kernel_initializer=self.kernel_initializer, 189 | name="prefer_fc") 190 | return prefer_outputs 191 | 192 | def create_rnn_encoder(self, seq_len, inputs): 193 | with tf.variable_scope("encoder", values=None, 194 | initializer=model_helper.get_initializer(self.hparams.nn_init_op, seed=self.hparams.seed), 195 | partitioner=self.partitioner) as scope: 196 | cell = model_helper.create_rnn_cell(unit_type=self.hparams.unit_type, 197 | num_units=self.hparams.rnn_hidden_units, 198 | num_layers=self.hparams.num_layers, 199 | num_residual_layers=self.hparams.num_residual_layers, 200 | forget_bias=self.hparams.forget_bias, 201 | dropout=self.dropout, 202 | mode=self.hparams.mode, 203 | attention_window_size=self.hparams.attention_window_size) 204 | 205 | rnn_outputs, last_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, 206 | sequence_length=seq_len, inputs=inputs) 207 | 208 | if self.hparams.rnn_layer_norm: 209 | rnn_outputs = layer_norm(rnn_outputs) 210 | 211 | return rnn_outputs, last_states 212 | 213 | def create_position_encoding(self, inputs): 214 | with tf.variable_scope('add_pos_encoding', initializer=self.initializer, partitioner=self.partitioner): 215 | pos_input = learned_positional_encoding(inputs, self.hparams.max_length, self.num_units) 216 | outputs = inputs + pos_input 217 | outputs = tf.layers.dropout(outputs, self.dropout, training=self.is_training) 218 | return outputs 219 | 220 | def create_self_attn(self, key_masks_1d, key_masks_2d, inputs): 221 | attn_outputs = inputs 222 | for layer in range(self.hparams.num_multi_head): 223 | with tf.variable_scope('self_attn_'+str(layer), partitioner=self.partitioner): 224 | if self.hparams.NARM: 225 | attn_outputs = self_attention(attn_outputs, num_units=self.num_units, 226 | key_masks=key_masks_2d, dropout_rate=self.dropout, 227 | is_training=self.is_training, 228 | is_layer_norm=self.hparams.attn_layer_norm) 229 | else: 230 | attn_outputs = self_multi_head_attn(attn_outputs, num_units=self.num_units, 231 | num_heads=self.hparams.num_heads, key_masks=key_masks_1d, 232 | dropout_rate=self.dropout, is_training=self.is_training, 233 | is_layer_norm=self.hparams.attn_layer_norm) 234 | with tf.variable_scope('ffn_'+str(layer), partitioner=self.partitioner): 235 | if self.hparams.self_attn_ffn: 236 | attn_outputs = pointwise_feedforward(attn_outputs, self.dropout, self.is_training, 237 | num_units=[self.num_units, self.num_units], # 4 * 238 | activation=tf.nn.relu) 239 | 240 | with tf.variable_scope('attn_concat', partitioner=self.partitioner): 241 | if self.hparams.STAMP: 242 | inputs = tf.layers.dense(inputs, self.num_units, tf.nn.tanh) 243 | attn_outputs = tf.layers.dense(attn_outputs, self.num_units, tf.nn.tanh) 244 | attn_outputs = tf.multiply(attn_outputs, inputs) 245 | 246 | if self.hparams.NARM and not self.hparams.STAMP: 247 | attn_outputs = tf.concat([attn_outputs, inputs], axis=-1) 248 | if self.hparams.attn_fc: 249 | attn_outputs = tf.layers.dense(attn_outputs, self.num_units) 250 | 251 | return attn_outputs 252 | 253 | def create_user_attn(self, key_masks, inputs, user_embedding_1d, user_embedding_2d): 254 | """ 255 | Args: 256 | user_embedding : [batch_size, user_embedding_size] 257 | inputs : [batch_size, time, num_units] 258 | key_masks: sequence mask, 2D tensor 259 | Returns: 260 | outputs : [batch_size, time, num_units] 261 | """ 262 | with tf.variable_scope('user_attn', partitioner=self.partitioner): 263 | # [batch_size, 1, num_units] 264 | # query = tf.expand_dims(user_embedding, 1) 265 | key = inputs 266 | align = None 267 | if self.hparams.user_attn == 'general': 268 | query = tf.layers.dense(user_embedding_1d, self.num_units, tf.nn.tanh) 269 | align = general_attention(query, key) 270 | elif self.hparams.user_attn == 'concat': 271 | query = user_embedding_2d 272 | align = concat_attention(query, key) 273 | 274 | # [batch_size, time, time] 275 | align = tf.tile(align, [1, tf.shape(inputs)[1], 1]) 276 | outputs = soft_max_weighted_sum(align, key, key_masks, self.dropout, self.is_training, future_binding=True) 277 | 278 | if self.hparams.user_residual: 279 | outputs += inputs 280 | # outputs = layer_norm(outputs) 281 | 282 | return outputs 283 | 284 | def create_item_user_input(self, seq_input, user_embedding): 285 | with tf.variable_scope('item_user_feature', partitioner=self.partitioner): 286 | # user_embedding = tf.tile(tf.expand_dims(user_embedding, 1), [1, tf.shape(seq_input)[1], 1]) 287 | seq_input = tf.concat([seq_input, user_embedding], axis=-1) 288 | return seq_input 289 | 290 | def combine_long_short(self, short_rep, long_rep, user_embedding): 291 | """ 292 | short_rep: [batch_size, time, units] 293 | long_rep: [batch_size, units] 294 | user_embedding: [batch_size, units] 295 | """ 296 | with variable_scope.variable_scope("fusion", partitioner=self.partitioner) as scope: 297 | long_rep = tf.tile(tf.expand_dims(long_rep, 1), [1, tf.shape(short_rep)[1], 1]) 298 | if self.hparams.fusion_op == "add": 299 | outputs = long_rep + short_rep 300 | elif self.hparams.fusion_op == "multiply": 301 | outputs = tf.multiply(long_rep, short_rep) 302 | elif self.hparams.fusion_op == "concat": 303 | outputs = tf.concat([short_rep, long_rep], axis=-1) 304 | outputs = tf.layers.dense(outputs, self.num_units) 305 | elif self.hparams.fusion_op == "feature_gated": 306 | f_input = tf.concat([short_rep, long_rep], -1) 307 | f = tf.layers.dense(f_input, self.num_units, activation=tf.nn.tanh) 308 | g_input = tf.concat([short_rep, long_rep], -1) 309 | g = tf.layers.dense(g_input, self.num_units, activation=tf.sigmoid) 310 | outputs = tf.multiply(g, short_rep) + tf.multiply(1 - g, f) 311 | tf.summary.scalar("gate", tf.reduce_mean(g)) 312 | else: 313 | g_units = self.num_units 314 | if self.hparams.g_units_one: 315 | g_units = 1 316 | # user_embedding = tf.tile(tf.expand_dims(user_embedding, 1), [1, tf.shape(short_rep)[1], 1]) 317 | g_input = tf.concat([short_rep, long_rep, user_embedding], -1) 318 | g = tf.layers.dense(g_input, g_units, activation=tf.sigmoid) 319 | outputs = tf.multiply(g, short_rep) + tf.multiply(1 - g, long_rep) 320 | tf.summary.scalar("gate", tf.reduce_mean(g)) 321 | 322 | return outputs 323 | 324 | def calculate_loss(self, nce_weights, nce_biases, label_split, rnn_outputs_split, target_splits, batch_size, 325 | sampled_values=None): 326 | 327 | sampled_loss = sampled_softmax_loss( 328 | weights=nce_weights, 329 | biases=nce_biases, 330 | labels=label_split, 331 | inputs=rnn_outputs_split, 332 | num_sampled=self.hparams.num_samples, 333 | num_classes=self.hparams.vocab_size, 334 | num_true=self.hparams.num_labels, 335 | sampled_values=sampled_values, 336 | partition_strategy=self.hparams.partn_strgy 337 | ) 338 | 339 | sampled_loss = tf.reshape(sampled_loss, [batch_size, -1]) 340 | sampled_loss = tf.reduce_sum(sampled_loss * target_splits) 341 | 342 | return sampled_loss 343 | 344 | def create_split_optimizer(self, features, outputs, nce_weights, nce_biases): 345 | seq_len = tf.cast(features["seq_len"], dtypes.int32) 346 | batch_size = tf.shape(outputs)[0] 347 | 348 | with tf.variable_scope("loss") as scope: 349 | rnn_outputs_flat = tf.reshape(outputs, [-1, self.num_units]) 350 | num_labels = self.hparams.num_labels 351 | if num_labels > 1: 352 | multi_labels = tf.reshape(features["multi_labels"], [-1]) 353 | multi_labels = tf.string_split(multi_labels, delimiter=",").values 354 | multi_labels = tf.reshape(multi_labels, [-1, num_labels]) 355 | label_flat = tf.string_to_number(multi_labels, out_type=tf.int64) 356 | else: 357 | label_flat = tf.reshape(features["labels"], [-1, 1]) 358 | istarget = tf.sequence_mask(seq_len, tf.shape(outputs)[1], dtype=outputs.dtype) 359 | 360 | rnn_outputs_splits = tf.split(rnn_outputs_flat, num_or_size_splits=self.hparams.split_size, 361 | name="rnn_output_split", axis=0) 362 | label_splits = tf.split(label_flat, num_or_size_splits=self.hparams.split_size, 363 | name="label_split", axis=0) 364 | istarget_splits = tf.split(istarget, num_or_size_splits=self.hparams.split_size, 365 | name="istarget_split", axis=0) 366 | 367 | losses = [] 368 | 369 | i = 0 370 | for (rnn_outputs_split, label_split, target_split) in zip(rnn_outputs_splits, 371 | label_splits, 372 | istarget_splits): 373 | with tf.variable_scope("loss_" + str(i)) as scope: 374 | sampled_loss = self.calculate_loss(nce_weights, nce_biases, 375 | label_split, rnn_outputs_split, 376 | target_split, batch_size / self.hparams.split_size) 377 | losses.append(sampled_loss) 378 | i += 1 379 | 380 | all_loss = sum(losses) 381 | 382 | _mean_loss_by_example = all_loss / (tf.to_float(batch_size)) 383 | _mean_loss_by_pos = all_loss / (tf.reduce_sum(istarget)) 384 | if self.hparams.loss_by_example: 385 | _mean_loss = _mean_loss_by_example 386 | else: 387 | _mean_loss = _mean_loss_by_pos 388 | _mean_loss = tf.check_numerics(_mean_loss, "loss is nan of inf") 389 | 390 | with tf.variable_scope("metrics"): 391 | tf.summary.scalar("mean_loss_by_example", _mean_loss_by_example) 392 | tf.summary.scalar("mean_loss_by_pos", _mean_loss_by_pos) 393 | tf.summary.scalar("train_loss", _mean_loss) 394 | for i in range(self.hparams.split_size): 395 | tf.summary.scalar("sample_loss_" + str(i), losses[i]) 396 | 397 | with tf.variable_scope("optimizer") as scope: 398 | params = tf.trainable_variables() 399 | gradients = tf.gradients(_mean_loss, params, 400 | colocate_gradients_with_ops=self.hparams.colocate_gradients_with_ops) 401 | clipped_gradients = model_helper.gradient_clip(gradients=gradients, 402 | max_gradient_norm=self.hparams.max_gradient_norm) 403 | opt, _learning_rate = get_optimizer(self.hparams, self.global_step) 404 | train_op = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) 405 | 406 | self.my_dict.update({ 407 | 'learning_rate': _learning_rate, 408 | 'loss': _mean_loss, 409 | 'drop_out': self.dropout 410 | }) 411 | 412 | if self.hparams.validation: 413 | with tf.variable_scope("validation"): 414 | last_output = extract_axis_1(outputs, seq_len - 1) 415 | logits = tf.matmul(last_output, tf.transpose(nce_weights)) + nce_biases 416 | top_item_ids = tf.nn.top_k(logits, k=self.hparams.topK).indices 417 | top_item_ids = tf.reshape(top_item_ids, [batch_size, self.hparams.topK]) 418 | 419 | self.my_dict.update({ 420 | 'top_items': top_item_ids, 421 | 'user_id': features['user_id'], 422 | 'ds': features['ds'], 423 | "weight": nce_weights.as_tensor(), 424 | 'user_embedding_output': last_output 425 | }) 426 | 427 | return train_op, self.my_dict 428 | -------------------------------------------------------------------------------- /code/models/deep_match.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.basic_modules import BasicModules 3 | from models.extra_modules import ExtraModules 4 | 5 | 6 | class DeepMatch(BasicModules, ExtraModules): 7 | 8 | def __init__(self, parser, hparams): 9 | BasicModules.__init__(self, parser, hparams) 10 | ExtraModules.__init__(self, parser, hparams) 11 | 12 | def create_sequence_mask(self, features, seq_input): 13 | with tf.variable_scope('seq_masks', partitioner=self.partitioner): 14 | seq_len = features['seq_len'] 15 | max_seq_len = tf.shape(seq_input)[1] 16 | if "shan" in self.hparams.model: 17 | seq_len = seq_len + 1 18 | max_seq_len = max_seq_len + 1 19 | # [batch_size, time] 20 | key_masks_1d = tf.sequence_mask(seq_len, max_seq_len) 21 | # [batch_size, time, time] 22 | key_masks_2d = tf.tile(tf.expand_dims(key_masks_1d, 1), [1, max_seq_len, 1]) 23 | return key_masks_1d, key_masks_2d 24 | 25 | def create_expand_seq_dim(self, input_1d, seq_input): 26 | with tf.variable_scope('expand_seq_dim', partitioner=self.partitioner): 27 | max_seq_len = tf.shape(seq_input)[1] 28 | input_1d = tf.expand_dims(input_1d, 1) 29 | input_2d = tf.tile(input_1d, [1, max_seq_len, 1]) 30 | return input_1d, input_2d 31 | 32 | def model_fn_train(self, features): 33 | nce_weights, nce_biases, seq_input = self.create_item_embeddings(features) 34 | user_embedding_1d, user_embedding_2d, user_embedding = None, None, None 35 | 36 | if "personal" in self.hparams.model: 37 | user_embedding = self.create_user_embeddings(features) 38 | user_embedding_1d, user_embedding_2d = self.create_expand_seq_dim(user_embedding, seq_input) 39 | if self.hparams.input_user_feature: 40 | seq_input = self.create_item_user_input(seq_input, user_embedding_2d) 41 | 42 | key_masks_1d, key_masks_2d = self.create_sequence_mask(features, seq_input) 43 | 44 | outputs = seq_input 45 | if "rnn" in self.hparams.model: 46 | outputs, last_states = self.create_rnn_encoder(features['seq_len'], seq_input) 47 | elif "dnn" in self.hparams.model: 48 | outputs = self.average_item_embedding(seq_input) 49 | elif "ahead_pos" in self.hparams.model: 50 | outputs = self.create_position_encoding(seq_input) 51 | 52 | if "self_attn" in self.hparams.model: 53 | outputs = self.create_self_attn(key_masks_1d, key_masks_2d, outputs) 54 | if "user_attn" in self.hparams.model: 55 | outputs = self.create_user_attn(key_masks_2d, outputs, user_embedding_1d, user_embedding_2d) 56 | 57 | if "prefer" in self.hparams.model: 58 | prefer_outputs = self.create_prefer_embeddings(features, user_embedding_1d) 59 | if "dnn" in self.hparams.model: 60 | outputs = self.create_dnn(outputs, prefer_outputs, user_embedding) 61 | elif "shan" in self.hparams.model: 62 | outputs = self.create_in_shan(outputs, prefer_outputs) 63 | outputs = self.create_user_attn(key_masks_2d, outputs, user_embedding_1d, user_embedding_2d) 64 | outputs = self.create_out_shan(outputs) 65 | else: 66 | outputs = self.combine_long_short(outputs, prefer_outputs, user_embedding_2d) 67 | 68 | train_op, my_dict = self.create_split_optimizer(features, outputs, nce_weights, nce_biases) 69 | return train_op, self.global_step, my_dict 70 | -------------------------------------------------------------------------------- /code/models/extra_modules.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import dtypes 3 | 4 | 5 | class ExtraModules: 6 | 7 | def __init__(self, parser, hparams): 8 | pass 9 | 10 | def average_item_embedding(self, inputs): 11 | """ 12 | :param inputs: [batch_size, time, units] 13 | :return: masked average pooling 14 | """ 15 | with tf.variable_scope('item_avg_pooling', partitioner=self.partitioner): 16 | length = tf.reshape(tf.shape(inputs)[1], [-1]) 17 | # [time, time] 18 | lower_tri = tf.ones(tf.concat([length, length], axis=0)) 19 | # [time, time] 20 | lower_tri = tf.contrib.linalg.LinearOperatorTriL(lower_tri).to_dense() 21 | # [batch_size, time, time] 22 | masks = tf.tile(tf.expand_dims(lower_tri, 0), [tf.shape(inputs)[0], 1, 1]) 23 | # [batch_size, time, units] 24 | output = tf.matmul(masks, inputs) 25 | # [time] 26 | avg_num = tf.range(1, 1 + tf.shape(inputs)[1]) 27 | avg_num = tf.cast(avg_num, dtypes.float32) 28 | # [1, time, 1] 29 | avg_num = tf.reshape(avg_num, [1, tf.shape(avg_num)[0], 1]) 30 | # [batch_size, time, 1] 31 | avg_num = tf.tile(avg_num, [tf.shape(inputs)[0], 1, 1]) 32 | # [batch_size, time, units] 33 | output = tf.divide(output, avg_num) 34 | return output 35 | 36 | def create_dnn(self, item_embedding, prefer_embedding, user_embedding): 37 | """ 38 | :param prefer_embedding: 39 | :param item_embedding: 40 | :param user_embedding: 41 | :return: 42 | """ 43 | with tf.variable_scope('dnn', partitioner=self.partitioner): 44 | user_embedding = tf.tile(tf.expand_dims(user_embedding, 1), [1, tf.shape(item_embedding)[1], 1]) 45 | prefer_embedding = tf.tile(tf.expand_dims(prefer_embedding, 1), [1, tf.shape(item_embedding)[1], 1]) 46 | output = tf.concat([item_embedding, prefer_embedding, user_embedding], -1) 47 | output = tf.layers.dropout(output, self.dropout, training=self.is_training) 48 | output = tf.layers.dense(output, 4 * self.num_units, tf.nn.relu) 49 | output = tf.layers.dropout(output, self.dropout, training=self.is_training) 50 | output = tf.layers.dense(output, 2 * self.num_units, tf.nn.relu) 51 | output = tf.layers.dropout(output, self.dropout, training=self.is_training) 52 | output = tf.layers.dense(output, self.num_units, tf.nn.relu) 53 | return output 54 | 55 | def create_in_shan(self, item_embedding, prefer_embedding): 56 | """ 57 | :param item_embedding: 58 | :param user_embedding: 59 | :param prefer_embedding: 60 | :param num_units: 61 | :return: 62 | """ 63 | with tf.variable_scope('shan_in', partitioner=self.partitioner): 64 | prefer_embedding = tf.reshape(prefer_embedding, [-1, 1, tf.shape(prefer_embedding)[-1]]) 65 | output = tf.concat([prefer_embedding, item_embedding], 1) 66 | return output 67 | 68 | def create_out_shan(self, inputs): 69 | """ 70 | :param inputs: 71 | :return: 72 | """ 73 | with tf.variable_scope('shan_out', partitioner=self.partitioner): 74 | output = tf.slice(inputs, [0, 1, 0], [tf.shape(inputs)[0], tf.shape(inputs)[1] - 1, tf.shape(inputs)[2]]) 75 | return output 76 | -------------------------------------------------------------------------------- /code/parsers/model_feature_parser.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from model_utils import model_helper 3 | 4 | 5 | class ModelFeatureParser(object): 6 | def __init__(self, hparams): 7 | self.hparams = hparams 8 | # tf record input data schema 9 | self.len_fix_int_keys = [("seq_len", "fix", "int"), ("items_len", "fix", "int"), 10 | ("shops_len", "fix", "int"), ("cates_len", "fix", "int"), ("brands_len", "fix", "int")] 11 | 12 | self.label_var_int_keys = [("labels", "var", "int")] 13 | 14 | self.label_var_str_keys = [("multi_labels", "var", "str")] 15 | 16 | self.item_feature_var_str_keys = [("item_ids", "var", "str"), ("shop_ids", "var", "str"), 17 | ("cate_ids", "var", "str"), ("brand_ids", "var", "str")] 18 | 19 | self.user_feature_var_str_keys = [("prefer_items", "var", "str"), ("prefer_shops", "var", "str"), 20 | ("prefer_cates", "var", "str"), ("prefer_brands", "var", "str")] 21 | 22 | self.user_feature_fix_str_keys = [("user_id", "fix", "str"), ("age", "fix", "str"), ("sex", "fix", "str"), 23 | ("user_lv_cd", "fix", "str"), ("city_level", "fix", "str"), 24 | ("province", "fix", "str"), ("city", "fix", "str"), ("country", "fix", "str")] 25 | 26 | # distinct sparse id feature 27 | self.embedding_item_features = ["item", "shop", "brand", "cate"] 28 | self.embedding_user_features_fix = ["user_id", "age", "sex", "user_lv_cd", "city_level", "province", "city", "country"] 29 | 30 | self.input_keys = self.len_fix_int_keys + self.label_var_int_keys + \ 31 | self.label_var_str_keys + self.item_feature_var_str_keys + \ 32 | self.user_feature_var_str_keys + self.user_feature_fix_str_keys + [("ds", "fix", "str")] 33 | 34 | tf_feature = {"fix_int": tf.FixedLenFeature([], dtype=tf.int64), 35 | "var_int": tf.VarLenFeature(dtype=tf.int64), 36 | "var_str": tf.VarLenFeature(dtype=tf.string), 37 | "fix_str": tf.FixedLenFeature([], dtype=tf.string)} 38 | self.feature_map = {} 39 | if self.hparams.mode == "train": 40 | for key in self.input_keys: 41 | self.feature_map.update({key[0]: tf_feature[key[1] + '_' + key[2]]}) 42 | 43 | def embedding_columns(self, feature_type, use_hashmap=False): 44 | sparse_features_emb = [] 45 | embedding_features = {"item": self.embedding_item_features, "user_fix": self.embedding_user_features_fix} 46 | # item or feature 47 | for fs_name in embedding_features[feature_type]: 48 | new_emb = model_helper.hash_bucket_embedding(fs_name+'_emb', self.hparams.bucket_size[fs_name], 49 | self.hparams.embedding_size[fs_name], 50 | use_hashmap=use_hashmap) 51 | sparse_features_emb.append(new_emb) 52 | return sparse_features_emb 53 | 54 | def output_one_example(self, features): 55 | if self.hparams.mode == "train": 56 | example = [] 57 | for key in self.input_keys: 58 | example.append(features[key[0]]) 59 | return example 60 | 61 | def output_features(self, iterator): 62 | if self.hparams.mode == "train": 63 | features = iterator.get_next() 64 | return {self.input_keys[i][0]: features[i] for i in range(len(self.input_keys))} 65 | -------------------------------------------------------------------------------- /code/train/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import sys 4 | import tensorflow as tf 5 | import traceback 6 | import numpy as np 7 | 8 | currentPath = os.path.split(os.path.realpath(__file__))[0] 9 | sys.path.append(currentPath + os.sep + '../') 10 | sys.path.append(currentPath + os.sep + '../..') 11 | 12 | from parsers.model_feature_parser import ModelFeatureParser 13 | from model_utils.hyperparams import create_hparams, create_flags, create_task_config 14 | from tensorflow.python.platform import tf_logging as logging 15 | from train.utils import parent_directory 16 | from models.deep_match import DeepMatch 17 | 18 | flags = tf.app.flags 19 | FLAGS = create_flags(flags).FLAGS 20 | 21 | 22 | def main(unused_argv): 23 | tf.logging.set_verbosity(tf.logging.INFO) 24 | 25 | config = tf.ConfigProto() 26 | config.gpu_options.allow_growth = True 27 | 28 | # Parse config parameters 29 | current_dir = os.path.dirname(os.path.abspath(__file__)) 30 | conf_file_path = os.path.join(os.path.join(parent_directory(current_dir), 'config/task_config.json')) 31 | logging.info("will use task conf file %s" % conf_file_path) 32 | task_config = create_task_config(FLAGS, conf_file_path) 33 | hparams = create_hparams(task_config=task_config) 34 | print hparams 35 | 36 | parser = ModelFeatureParser(hparams) 37 | model = DeepMatch(parser, hparams) 38 | 39 | # start the training 40 | try: 41 | run_validating(hparams=hparams, model=model) 42 | except Exception, e: 43 | logging.error("catch a exception: %s" % e.message) 44 | logging.error("exception is: %s" % traceback.format_exc()) 45 | raise Exception("terminate process!") 46 | 47 | 48 | def run_validating(hparams, model): 49 | acc_keys = ["user_id", "ds", "user_embedding_output"] 50 | 51 | # user defined function 52 | # you should write your own code here for reading and writing data 53 | train_file = get_your_train_files() 54 | test_file = get_your_test_files() 55 | writer = open_your_test_result_file() 56 | 57 | if not train_file or len(train_file) == 0 or not test_file or len(test_file) == 0: 58 | logging.error("End training directly since no train files or test files!") 59 | return 60 | 61 | logging.info("current_train_file: {}".format(train_file)) 62 | logging.info("current_test_file: {}".format(test_file)) 63 | 64 | checkpointDir = FLAGS.checkpointDir 65 | if not tf.gfile.Exists(checkpointDir): 66 | tf.gfile.MakeDirs(checkpointDir) 67 | with tf.gfile.FastGFile(os.path.join(checkpointDir, "hyperparams"), 'w') as f: 68 | f.write(str(hparams)) 69 | f.flush() 70 | f.close() 71 | 72 | train_data = model.input_fn_dataset(train_file, data_type="train") 73 | test_data = model.input_fn_dataset(test_file, data_type="test") 74 | 75 | iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) 76 | train_init_op = iterator.make_initializer(train_data) 77 | test_init_op = iterator.make_initializer(test_data) 78 | 79 | features = model.parser.output_features(iterator) 80 | 81 | train_op, global_step, my_dict = model.model_fn_train(features) 82 | 83 | steps_per_epoch = hparams.train_len // hparams.batch_size 84 | test_interval = hparams.test_interval 85 | epochs = test_interval 86 | 87 | config = tf.ConfigProto() 88 | 89 | chief_only_hooks = [tf.train.StepCounterHook()] 90 | drop_zero_dict = { 91 | my_dict['drop_out']: 0.0 92 | } 93 | 94 | drop_dict = { 95 | my_dict['drop_out']: hparams.dropout 96 | } 97 | summary_dir = os.path.join(FLAGS.checkpointDir, 'train') 98 | 99 | with tf.train.MonitoredTrainingSession(chief_only_hooks=chief_only_hooks, config=config) as sess: 100 | train_writer = tf.summary.MetricsWriter(summary_dir, sess.graph) 101 | step_ = 0 102 | sess.run(train_init_op, feed_dict=drop_zero_dict) 103 | while step_ < steps_per_epoch * hparams.num_epochs + 5: 104 | _, loss_, step_, lr_ = sess.run([train_op, my_dict["loss"], global_step, my_dict['learning_rate']], 105 | feed_dict=drop_dict) 106 | train_writer.add_scalar("loss", loss_, step_) 107 | train_writer.add_scalar("learning_rate", lr_, step_) 108 | if random.randint(1, 200) == 1: 109 | logging.info("[Epoch {}] {}_sampled_mean_loss: {}".format(epochs, step_, loss_)) 110 | if step_ >= steps_per_epoch * epochs: 111 | logging.info("[Epoch {}] Testing...".format(epochs)) 112 | sess.run(test_init_op, feed_dict=drop_zero_dict) 113 | weight = sess.run(my_dict['weight'], feed_dict=drop_zero_dict) 114 | logging.info(weight.shape) 115 | test_batch_counter = 0 116 | try: 117 | while True: 118 | test_batch_counter += 1 119 | user_id, ds, user_vector = sess.run([my_dict[j] for j in acc_keys], feed_dict=drop_zero_dict) 120 | user_id = user_id.tolist() 121 | ds = ds.tolist() 122 | arr = np.matmul(user_vector, np.transpose(weight)) 123 | indices = np.argpartition(arr, -hparams.topK, axis=1)[:, -hparams.topK:] 124 | for num, p in enumerate(zip(user_id, indices, ds)): 125 | writer.write([p[0], ','.join(map(str, p[1])), epochs, p[2]]) 126 | except tf.errors.OutOfRangeError: 127 | logging.info("[Epoch {}] test batch counter {}..".format(epochs, test_batch_counter)) 128 | pass 129 | sess.run(train_init_op, feed_dict=drop_zero_dict) 130 | logging.info("[Epoch {}] Back to train...".format(epochs)) 131 | epochs += test_interval 132 | 133 | logging.info("*" * 20 + "End training.") 134 | 135 | 136 | if __name__ == '__main__': 137 | tf.app.run() 138 | -------------------------------------------------------------------------------- /code/train/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | 6 | 7 | def parent_directory(path, times=1): 8 | backup_path = path 9 | for i in range(times): 10 | backup_path = os.path.dirname(backup_path) 11 | return backup_path 12 | -------------------------------------------------------------------------------- /data/sample_data/sample_action.csv: -------------------------------------------------------------------------------- 1 | user_id,item_id,action_time,session_id 2 | 937922,357022,2018/2/4 8:28,8107857 3 | 937922,73,2018/2/4 8:27,8107857 4 | 937922,29583,2018/2/4 8:26,8107857 5 | 937922,108763,2018/2/4 8:26,8107857 6 | 1369473,331139,2018/2/3 21:55,3712240 7 | 1330642,69016,2018/2/1 12:47,1844129 8 | 1330642,211690,2018/2/1 12:48,1844129 9 | 1330642,322692,2018/2/1 12:48,1844129 10 | 1330642,19643,2018/2/1 12:47,1844129 11 | 411741,320293,2018/3/10 21:20,4680176 12 | 411741,291893,2018/3/10 21:21,4680176 13 | 1552476,319649,2018/3/29 6:27,8194740 14 | 1552476,110244,2018/3/29 6:22,8194740 15 | 1552476,343178,2018/3/29 6:27,8194740 16 | 742645,13795,2018/3/19 23:26,10771503 17 | 742645,50764,2018/3/19 23:27,10771503 18 | 1349460,210131,2018/2/27 14:32,4392084 19 | 1455298,208441,2018/4/11 15:21,6190659 20 | 1455298,334318,2018/4/11 15:14,6190659 21 | 1455298,237755,2018/4/11 15:14,6190659 22 | 1455298,6422,2018/4/11 15:22,6190659 23 | 1455298,268566,2018/4/11 15:14,6190659 24 | 1455298,115915,2018/4/11 15:13,6190659 25 | 1455298,208254,2018/4/11 15:22,6190659 26 | 1455298,177209,2018/4/14 14:09,6628254 27 | 1455298,71793,2018/4/14 14:10,6628254 28 | 1455298,141950,2018/4/12 15:37,10207258 29 | 1455298,97236,2018/4/12 15:46,8129309 30 | 1068082,277346,2018/3/3 23:14,6364387 31 | 67623,314054,2018/3/12 17:29,3455782 32 | 914860,177884,2018/3/4 14:16,4186146 33 | 914860,211134,2018/3/4 14:18,4186146 34 | 914860,298918,2018/3/4 14:20,4186146 35 | 914860,326156,2018/3/3 15:42,5422447 36 | 914860,254818,2018/3/3 15:41,5422447 37 | 1068082,128673,2018/4/13 8:40,1449113 38 | 931145,243514,2018/3/26 10:36,2341021 39 | 931145,321897,2018/3/26 10:41,2341021 40 | 931145,19120,2018/3/26 10:37,2341021 41 | 931145,229189,2018/3/26 10:30,2341021 42 | 931145,168630,2018/3/26 10:38,2341021 43 | 931145,287459,2018/3/26 10:32,2341021 44 | 931145,53531,2018/3/26 10:31,2341021 45 | 931145,199243,2018/3/26 10:35,2341021 46 | 931145,255210,2018/3/26 10:30,2341021 47 | 1030562,161532,2018/2/18 22:20,290794 48 | 48912,241353,2018/3/13 18:05,2408273 49 | 48912,198785,2018/3/13 18:04,2408273 50 | 48912,283695,2018/3/13 18:04,2408273 51 | 48912,330122,2018/3/13 18:01,2408273 52 | 48912,223701,2018/3/13 18:04,2408273 53 | 843174,201651,2018/4/15 7:25,3077407 54 | 188522,245398,2018/2/28 10:13,4115259 55 | 102717,272462,2018/3/18 18:54,8017199 56 | 102717,92989,2018/3/18 18:55,8017199 57 | 102717,167081,2018/3/18 18:59,8017199 58 | 1177411,252529,2018/3/24 5:42,6564504 59 | 669643,194478,2018/4/8 19:24,7861652 60 | 1105549,318390,2018/3/21 14:52,10266611 61 | 1105549,253645,2018/3/21 14:53,10266611 62 | 1105549,10592,2018/3/21 14:55,10266611 63 | 854108,142082,2018/2/27 22:48,8246488 64 | 861709,262378,2018/2/11 12:24,10053112 65 | 125241,158355,2018/2/26 15:55,4265977 66 | 125241,156646,2018/2/26 15:56,4265977 67 | 102717,68275,2018/3/1 16:01,3224127 68 | 914860,35907,2018/2/25 14:54,10115485 69 | 914860,155219,2018/2/25 16:00,6518843 70 | 914860,34820,2018/2/25 15:54,6518843 71 | 914860,275374,2018/2/25 15:56,6518843 72 | 1359480,7737,2018/3/25 13:31,6187812 73 | 780789,194477,2018/3/6 6:02,2431464 74 | 780789,910,2018/3/6 6:26,2431464 75 | 780789,358167,2018/3/6 5:58,2431464 76 | 780789,19643,2018/3/6 5:59,2431464 77 | 1164554,28660,2018/2/1 20:51,11253914 78 | 742645,33865,2018/3/20 21:06,7902443 79 | 742645,248059,2018/3/21 8:09,8880819 80 | 914860,245991,2018/3/1 14:56,4800040 81 | 914860,48661,2018/3/1 14:57,4800040 82 | 1359480,129456,2018/3/18 8:06,9296468 83 | 1359480,133804,2018/4/7 10:01,3976607 84 | 1359480,86536,2018/4/7 10:02,3976607 85 | 931145,92067,2018/4/5 8:52,5711637 86 | 931145,34516,2018/4/5 8:47,5711637 87 | 931145,2890,2018/4/5 8:43,5711637 88 | 931145,313787,2018/4/5 8:51,5711637 89 | 931145,281189,2018/4/5 8:52,5711637 90 | 843174,236680,2018/3/23 1:53,9598328 91 | 1177411,86758,2018/3/3 16:06,8652446 92 | 1177411,221085,2018/2/18 7:58,4312641 93 | 1177411,191329,2018/2/18 7:55,4312641 94 | 1177411,280994,2018/2/18 8:00,4312641 95 | 1177411,11435,2018/2/18 8:04,4312641 96 | 1177411,233176,2018/2/18 8:06,4312641 97 | 1177411,157263,2018/2/18 7:57,4312641 98 | 1105549,160369,2018/3/17 14:10,4361756 99 | 188522,164058,2018/2/24 20:06,8581424 100 | 188522,245398,2018/2/24 20:08,8581424 -------------------------------------------------------------------------------- /data/sample_data/sample_item.csv: -------------------------------------------------------------------------------- 1 | item_id,brand,shop_id,cate, 226519,6302,2399,79, 63114,9167,4216,79, 372345,2748,7125,79, 366931,2698,10252,79, 174979,8368,871,79, 295436,6302,2399,79, 282251,6302,2399,79, 146764,6302,2399,79, 130851,6302,2399,79, 150184,2748,9541,79, 114345,6574,4071,79, 98950,8368,871,79, 188988,6574,4071,79, 106591,8103,7575,79, 251912,6111,1570,79, 140059,4217,7011,79, 110630,4217,7011,79, 196053,5192,652,79, 227772,9167,4216,79, 228234,5192,652,79, 271298,5192,652,79, 133835,3274,6700,79, 208319,5192,652,79, 313911,9167,4216,79, 151783,9268,1259,79, 5374,10435,8501,79, 134395,2698,10252,79, 228138,6302,2399,79, 73526,10614,9389,79, 86302,6302,2399,79, 217122,6302,2399,79, 14750,10435,8330,79, 80215,6302,7129,79, 253840,6302,7129,79, 354373,10435,6075,79, 180753,6302,1345,79, 284293,6302,1345,79, 188267,2300,2375,79, 133191,6302,1345,79, 195692,6302,1345,79, 207669,6302,1345,79, 94010,10435,8330,79, 262860,6302,1345,79, 252344,9167,4216,79, 173463,6302,6004,79, 212106,6302,6515,79, 220171,6302,7579,79, 169499,6302,7579,79, 215755,6302,7579,79, 276694,6302,8382,79, 222024,6302,8382,79, 294204,6302,8382,79, 109604,6302,8382,79, 166892,6302,8382,79, 194915,6302,8382,79, 176506,6302,8382,79, 334983,6302,8382,79, 220101,6302,8382,79, 142410,10435,8330,79, 292099,6302,4348,79, 149415,6302,4348,79, 63999,6302,4348,79, 111022,6302,4348,79, 6144,6302,8382,79, 30167,6302,9311,79, 51969,6302,9311,79, 226975,6302,1616,79, 296097,6302,6586,79, 251503,6302,6586,79, 352183,9167,4216,79, 51930,5762,8382,79, 14937,5762,8382,79, 332781,10435,7950,79, 154392,6302,7650,79, 33575,10179,216,79, 100728,9167,4385,79, 344292,9167,4385,79, 367542,10435,7950,79, 325100,9167,4385,79, 352254,9167,4385,79, 58610,9167,4385,79, 179737,10435,6126,79, 315108,10435,6075,79, 359267,10435,6126,79, 9362,10435,6126,79, 101011,10435,1884,79, 135892,10435,6075,79, 343553,5762,6826,79, 39192,10435,7950,79, 362725,10435,7950,79, 117009,10435,7950,79, 254757,10435,7950,79, 229056,10435,7950,79, 287141,6302,5568,79, 166195,6302,5568,79, 345583,6302,5568,79, 305000,6302,9234,79, 19069,10435,5048,79, 331913,6302,9234,79, 2 | -------------------------------------------------------------------------------- /data/sample_data/sample_user.csv: -------------------------------------------------------------------------------- 1 | user_id,age,sex,user_lv_cd,city_level,province,city,county 2 | 1117472,5,1,6,5,30,149,2407 3 | 203661,5,1,6,5,30,149,2407 4 | 882636,2,0,7,5,30,149,2407 5 | 1495035,2,0,1,5,30,149,2407 6 | 929985,2,1,1,5,30,149,2407 7 | 684456,5,1,1,5,30,149,2407 8 | 740847,5,0,7,5,30,149,2407 9 | 538600,4,0,6,5,30,149,2407 10 | 402514,5,1,5,5,30,149,2407 11 | 1429509,4,0,5,5,30,149,2407 12 | 580012,5,1,1,5,30,149,2407 13 | 1054779,5,0,7,5,30,149,2407 14 | 146414,5,0,6,5,30,149,2407 15 | 1570039,4,0,1,5,30,149,2407 16 | 789137,6,0,6,5,30,149,2407 17 | 80381,5,0,1,5,30,149,2407 18 | 1146021,2,0,5,5,30,149,2407 19 | 730149,5,1,7,5,30,149,2407 20 | 1229930,5,1,5,5,30,149,2407 21 | 903792,5,1,5,5,30,149,2407 22 | 656442,5,1,1,5,30,149,2407 23 | 512385,6,0,1,5,30,149,2407 24 | 334786,5,0,6,5,30,149,2407 25 | 44483,5,1,1,5,30,149,2407 26 | 523737,4,1,5,5,30,149,2407 27 | 702347,5,1,1,5,30,149,2407 28 | 415747,4,0,1,5,30,149,2407 29 | 1569153,6,1,6,5,30,149,2407 30 | 1382694,4,1,1,5,30,149,2407 31 | 464712,6,0,5,5,30,149,2407 32 | 913142,4,1,6,5,30,149,2407 33 | 729097,4,0,7,5,30,149,2407 34 | 696529,5,1,1,5,30,149,2407 35 | 461675,1,0,1,5,30,149,2407 36 | 808711,4,1,6,5,30,149,2407 37 | 532217,6,1,6,5,30,149,2407 38 | 85693,2,0,6,5,30,149,2407 39 | 100963,5,0,1,5,30,149,2407 40 | 751194,5,0,6,5,30,149,2407 41 | 829948,2,-1,6,5,30,149,2407 42 | 506921,5,1,6,5,30,149,2407 43 | 537878,4,1,6,5,30,149,2407 44 | 352351,1,0,6,5,30,149,2407 45 | 208907,6,0,5,5,30,149,2407 46 | 185796,2,1,6,5,30,149,2407 47 | 564309,6,0,5,5,30,149,2407 48 | 1298329,6,1,1,5,30,149,2407 49 | 1365052,6,0,1,5,30,149,2407 50 | 951094,4,0,1,5,30,149,2407 51 | 1075853,6,1,1,5,30,149,2407 52 | 478383,1,1,6,5,30,149,2407 53 | 43966,6,1,6,5,30,149,2407 54 | 179008,4,0,1,5,30,149,2407 55 | 31279,4,1,6,5,30,149,2407 56 | 526369,1,0,4,5,30,149,2407 57 | 234619,4,0,6,5,30,149,2407 58 | 1269373,5,1,6,5,30,149,2407 59 | 115392,5,1,5,5,30,149,2407 60 | 1192799,5,0,6,5,30,149,2407 61 | 1452587,5,0,5,5,30,149,2407 62 | 829598,5,1,1,5,30,149,2407 63 | 1469900,5,0,6,5,30,149,2407 64 | 327722,5,1,6,5,30,149,2407 65 | 371501,6,1,5,5,30,149,2407 66 | 541536,5,0,7,5,30,149,2407 67 | 1161472,1,0,6,5,30,149,2407 68 | 1320654,6,0,5,5,30,149,2407 69 | 968528,4,1,6,5,30,149,2407 70 | 965120,4,1,6,5,30,149,2407 71 | 992568,5,0,1,5,30,149,2407 72 | 14591,1,1,6,5,30,149,2407 73 | 893716,6,0,1,5,30,149,2407 74 | 145613,5,1,1,5,30,149,2407 75 | 671215,6,1,1,5,30,149,2407 76 | 1096950,5,0,1,5,30,149,2407 77 | 842543,6,1,1,5,30,149,2407 78 | 2655,5,0,5,5,30,149,2407 79 | 790750,6,0,5,5,30,149,2407 80 | 266009,5,0,1,5,30,149,2407 81 | 800645,5,0,1,5,30,149,2407 82 | 1543388,4,1,1,5,30,149,2407 83 | 91222,6,0,5,5,30,149,2407 84 | 502292,6,1,7,5,30,149,2407 85 | 222058,6,0,6,5,30,149,2407 86 | 430407,4,0,6,5,30,149,2407 87 | 1490779,5,1,5,5,30,149,2407 88 | 718115,4,1,7,5,30,149,2407 89 | 46428,4,1,6,5,30,149,2407 90 | 1086362,4,1,1,5,30,149,2407 91 | 1013903,6,1,5,5,30,149,2407 92 | 1215558,5,1,5,5,30,149,2407 93 | 594560,5,1,1,5,30,149,2407 94 | 100004,5,1,5,5,30,149,2407 95 | 395926,5,1,5,5,30,149,2407 96 | 311773,4,1,1,5,30,149,2407 97 | 633035,5,1,1,5,30,149,2407 98 | 1479930,5,1,1,5,30,149,2407 99 | 977783,5,1,1,5,30,149,2407 100 | 27711,6,0,1,5,30,149,2407 --------------------------------------------------------------------------------