├── .gitignore ├── README.md ├── nn ├── bag.py ├── cv.sh ├── main.py ├── models │ ├── __init__.py │ ├── base_model.py │ ├── fc_net │ │ ├── __init__.py │ │ └── fc_net.py │ ├── lambda_net │ │ ├── __init__.py │ │ └── lambda_net.py │ └── pgs_net │ │ ├── __init__.py │ │ ├── pgs_bypass.py │ │ ├── pgs_leak.py │ │ ├── pgs_mix.py │ │ ├── pgs_net.py │ │ └── pgs_net_with_base.py ├── small.sh ├── small │ ├── clicks_tr.sample │ ├── clicks_va.sample │ ├── test.list │ ├── test.sample │ ├── train.list │ └── train.sample ├── sub.sh └── util │ ├── __init__.py │ ├── data_util.py │ ├── evaluate.py │ └── nn_util.py ├── split.py ├── stack_nn.png ├── write_base_list.py ├── write_list.py ├── write_sub_base_list.py └── write_sub_list.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Alt text](/stack_nn.png?raw=true "Optional Title")# stack-nn-tensorflow 2 | -------------------------------------------------------------------------------- /nn/bag.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | for i in range(5): 5 | cmd = 'python main.py --train data/sub_train.list --display_train ../../better_split/data/clicks_va.csv --fea_limit 5 --test data/sub_test.list --display_test ../../input/clicks_test.csv --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub bag/sub%d.csv --num_epochs 3 --acc_period 100 --normalize 1 --base_train data/sub_train.list.base --base_test data/sub_test.list.base --model pgs_wb'%i 6 | os.system(cmd) 7 | s = [] 8 | for i in range(5): 9 | s.append(pd.read_csv('bag/sub%d.csv'%i)) 10 | s = pd.concat(s, axis=1).values 11 | s = np.mean(s,axis=1) 12 | np.savetxt("bag/sub_ave.csv",s,header='clicked') 13 | 14 | -------------------------------------------------------------------------------- /nn/cv.sh: -------------------------------------------------------------------------------- 1 | #python main.py --train data/train0.list --display_train ../cvdata/clicks_va.csv.tr.0 --fea_limit 5 --test data/test0.list --display_test ../cvdata/clicks_va.csv.va.0 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv0.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train0.list.base --base_test data/test0.list.base --model pgs_wb 2 | #python main.py --train data/train1.list --display_train ../cvdata/clicks_va.csv.tr.1 --fea_limit 5 --test data/test1.list --display_test ../cvdata/clicks_va.csv.va.1 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv1.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train1.list.base --base_test data/test1.list.base --model pgs_wb 3 | python main.py --train data/train2.list --display_train ../cvdata/clicks_va.csv.tr.2 --fea_limit 5 --test data/test2.list --display_test ../cvdata/clicks_va.csv.va.2 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv2.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train2.list.base --base_test data/test2.list.base --model pgs_wb 4 | -------------------------------------------------------------------------------- /nn/main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from util.data_util import DataSet,get_num_fea,write_sub 3 | from models.pgs_net.pgs_net import PGS_NET 4 | 5 | flags = tf.app.flags 6 | flags.DEFINE_string('save_dir', 'save', 'Save path [save]') 7 | flags.DEFINE_integer('num_epochs', 10, 'Number of epochs for training [256]') 8 | flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate [0.002]') 9 | flags.DEFINE_integer('acc_period', 1, 'Accuracy display period [10]') 10 | flags.DEFINE_string('task', 'cv', 'cv or test') 11 | flags.DEFINE_string('train', 'data/train.list', 'meta list of train') 12 | flags.DEFINE_string('display_train', '', 'display of train') 13 | flags.DEFINE_string('test', 'data/test.list', 'meta list of test') 14 | flags.DEFINE_string('display_test', '', 'display of test') 15 | flags.DEFINE_string('cache', 'cache', 'cache path') 16 | flags.DEFINE_integer('fea_limit', 5, 'Max number of features, above this value will call online code in preprocessing') 17 | flags.DEFINE_integer('batch_size', 2, 'batch size') 18 | flags.DEFINE_integer('max_ads', 18, 'maximum ads per display') 19 | flags.DEFINE_integer('meta_features', 11, 'number of features') 20 | flags.DEFINE_float('weight_decay', 0.001, 'Weight decay - 0 to turn off L2 regularization [0.001]') 21 | flags.DEFINE_string('sub', 'sub.csv', 'path of submission') 22 | FLAGS = flags.FLAGS 23 | 24 | def main(_): 25 | 26 | FLAGS.meta_features = get_num_fea(FLAGS.train) 27 | print ("number of meta features", FLAGS.meta_features) 28 | train = DataSet(metalist=FLAGS.train, display=FLAGS.display_train, cache=FLAGS.cache, shuffle=True, limit = FLAGS.fea_limit) 29 | test = DataSet(metalist=FLAGS.test, display=FLAGS.display_test, cache=FLAGS.cache, shuffle=False, limit = FLAGS.fea_limit) 30 | #train.sanity_check() 31 | # test's row order doesn't change!!! 32 | with tf.Session() as sess: 33 | model = PGS_NET(FLAGS) 34 | sess.run(tf.initialize_all_variables()) 35 | model.train(sess, train, test) 36 | preds = model.eval(sess, test, is_va = False) 37 | write_sub(preds,FLAGS.sub) 38 | if __name__ == '__main__': 39 | tf.app.run() 40 | 41 | -------------------------------------------------------------------------------- /nn/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/__init__.py -------------------------------------------------------------------------------- /nn/models/base_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tqdm import tqdm 3 | from termcolor import colored 4 | import numpy as np 5 | from util.evaluate import apk12 6 | 7 | class BaseModel(object): 8 | 9 | def __init__(self, params): 10 | self.params = params 11 | self.save_dir = params.save_dir 12 | 13 | with tf.variable_scope("Stack_NN"): 14 | #print("building stack nn...") 15 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 16 | self.build() 17 | 18 | ############################################### 19 | # Start: virtual functions to be implemented 20 | ############################################### 21 | def build(self): 22 | #pass 23 | raise NotImplementedError() 24 | 25 | def get_feed_dict(self, batch, is_train): 26 | raise NotImplementedError() 27 | 28 | def preprocess_batch(self, batch): 29 | raise NotImplementedError() 30 | ############################################### 31 | # End: virtual functions to be implemented 32 | ############################################### 33 | 34 | 35 | ############################################### 36 | # Start: common functions to be inherited 37 | ############################################### 38 | 39 | def train_batch(self, sess, batch): 40 | feed_dict = self.get_feed_dict(batch, is_train=True) 41 | return sess.run([self.opt_op, self.global_step], feed_dict=feed_dict) 42 | 43 | def test_batch(self, sess, batch): 44 | feed_dict = self.get_feed_dict(batch, is_train=False) 45 | return sess.run([self.loss, self.global_step, self.predictions], feed_dict=feed_dict) 46 | 47 | def train(self, sess, train_data, val_data=None): 48 | params = self.params 49 | num_epochs = params.num_epochs 50 | num_batches = (train_data.num_groups + self.params.batch_size -1)/self.params.batch_size 51 | 52 | print("Training %d epochs ..." % num_epochs) 53 | for epoch_no in tqdm(range(num_epochs), desc='Epoch', maxinterval=86400, ncols=100): 54 | losses = [] 55 | #for i in range(num_batches): 56 | while True: 57 | batch = train_data.next_batch(self.params.batch_size) # random shuffled batch 58 | self.train_batch(sess, batch) 59 | if batch[4]: 60 | break 61 | #losses.append(loss) 62 | #train_data.reset() 63 | 64 | if (epoch_no + 1) % params.acc_period == 0: 65 | print() # Newline for TQDM 66 | #print("[Train] step %d: Loss = %.4f" % \ 67 | #( global_step, np.mean(losses))) 68 | if val_data: 69 | self.eval(sess, val_data, is_va = True) 70 | train_data.reset() 71 | 72 | def eval(self, sess, data, is_va = False): 73 | data.reset() 74 | num_batches = (data.num_groups + self.params.batch_size -1)/self.params.batch_size 75 | name = 'Validation' if is_va else 'Test' 76 | apk_results = [] 77 | predictions = [] 78 | losses = [] 79 | #for _ in range(num_batches): 80 | while True: 81 | batch = data.next_batch(self.params.batch_size) # continuous batch 82 | # batch is a tuple (X, y, dispaly_id) 83 | loss, global_step, prediction = self.test_batch(sess, batch) 84 | apk_result, ypred = apk12(batch, prediction) 85 | apk_results.append(apk_result) 86 | predictions.append(ypred) 87 | losses.append(loss) 88 | if batch[4]: 89 | break 90 | print(colored("[%s] step %d: APK-12 = %.4f, Loss = %.4f"%(name, global_step, np.mean(apk_results), np.mean(losses)), 'green')) 91 | #data.reset() 92 | return np.concatenate(predictions) # row order is the same as input test 93 | 94 | def save(self, sess): 95 | print("Saving model to %s" % self.save_dir) 96 | self.saver.save(sess, self.save_dir, self.global_step) 97 | 98 | def load(self, sess): 99 | print("Loading model ...") 100 | checkpoint = tf.train.get_checkpoint_state(self.save_dir) 101 | if checkpoint is None: 102 | print("Error: No saved model found. Please train first.") 103 | sys.exit(0) 104 | self.saver.restore(sess, checkpoint.model_checkpoint_path) 105 | 106 | 107 | ############################################### 108 | # End: common functions to be inherited 109 | ############################################### 110 | -------------------------------------------------------------------------------- /nn/models/fc_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/fc_net/__init__.py -------------------------------------------------------------------------------- /nn/models/fc_net/fc_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class FC_NET(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | params = self.params 10 | N = params.batch_size # number of groups/display_ids per batch 11 | A = params.max_ads # maximum number of Ads per display_id 12 | F = params.meta_features # number of meta features per Ad 13 | 14 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 15 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 16 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 17 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 18 | is_training = tf.placeholder(tf.bool) 19 | 20 | if self.params.softmax_transform: 21 | print("softmax_transform") 22 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 23 | Xtmp = tf.exp(Xtmp) 24 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 25 | Xtmp = Xtmp/stmp 26 | else: 27 | Xtmp = X 28 | 29 | with tf.name_scope("Fully-connected"): 30 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 31 | with tf.variable_scope("Layer1"): 32 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid') 33 | with tf.variable_scope("Layer2"): 34 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid') 35 | with tf.variable_scope("Layer3"): 36 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None') 37 | 38 | # ytmp is [N*A, 1] now 39 | 40 | yp = tf.reshape(ytmp,[N, A]) * Xmask # masking the padding Ads 41 | # yp is [N, A] now 42 | 43 | with tf.name_scope('Loss'): 44 | # Cross-Entropy loss 45 | #yp = ytmp * tf.reshape(Xmask,[N*A, 1]) 46 | #y = tf.reshape(y,) 47 | #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 48 | yp = tf.maximum(yp,1e-5) 49 | yp = tf.minimum(yp,1-1e-5) 50 | cross_entropy = -(y*tf.log(yp)+(1-y)*tf.log(1-yp)) 51 | loss = tf.reduce_mean(cross_entropy) 52 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 53 | 54 | with tf.name_scope('Predict'): 55 | pred = yp #tf.nn.softmax(yp) 56 | 57 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 58 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 59 | 60 | 61 | self.predictions = pred 62 | self.loss = cross_entropy 63 | self.total_loss = total_loss 64 | self.opt_op = opt_op 65 | 66 | self.x = X 67 | self.y = y 68 | self.xmask = Xmask 69 | self.is_train = is_training 70 | 71 | def preprocess_batch(self, batch): 72 | # batch = (x, y, g, r) 73 | params = self.params 74 | N = params.batch_size # number of groups/display_ids per batch 75 | A = params.max_ads # maximum number of Ads per display_id 76 | F = params.meta_features # number of meta features per Ad 77 | #print N,A,F 78 | x, y, g, r, _ = batch 79 | #print "batch.r", r 80 | X = np.zeros([N,A,F]) 81 | #print "batch.x", x.shape, "X", X.shape 82 | 83 | Y = np.zeros([N,A]) 84 | Xmask = np.zeros([N,A]) 85 | for i in range(N): 86 | if i+1 >= len(r): 87 | break 88 | start, end = r[i], r[i+1] 89 | #rtmp = range(start,end) 90 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 91 | X[i,0:end-start,:] = x[start:end,:] 92 | Y[i,0:end-start] = y[start:end] 93 | Xmask[i,0:end-start] = 1 94 | #print rtmp, X.shape, Y.shape, y.shape 95 | #X[i,rtmp,:] = x[rtmp,:] 96 | #Y[i,rtmp,:] = y[np.array(rtmp)] 97 | #Xmask[i,rtmp,:] = 0 98 | 99 | return X, Y, Xmask 100 | 101 | def get_feed_dict(self, batch, is_train): 102 | X, Y, Xmask = self.preprocess_batch(batch) 103 | return { 104 | self.x: X, 105 | self.xmask: Xmask, 106 | self.y: Y, 107 | self.is_train: is_train 108 | } 109 | 110 | -------------------------------------------------------------------------------- /nn/models/lambda_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/lambda_net/__init__.py -------------------------------------------------------------------------------- /nn/models/lambda_net/lambda_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class LAMBDA_NET(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build LAMBDA_NET" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 16 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 17 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 18 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 19 | is_training = tf.placeholder(dtype=tf.bool) 20 | 21 | if self.params.softmax_transform: 22 | print("softmax_transform") 23 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 24 | Xtmp = tf.exp(Xtmp) 25 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 26 | Xtmp = Xtmp/stmp 27 | else: 28 | Xtmp = X 29 | 30 | with tf.name_scope("Fully-connected"): 31 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 32 | with tf.variable_scope("Layer1"): 33 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch) 34 | with tf.variable_scope("Layer2"): 35 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch) 36 | with tf.variable_scope("Layer3"): 37 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 38 | 39 | # ytmp is [N*A, 1] now 40 | 41 | yp = tf.reshape(ytmp,[N, A])# + Xmask # masking the padding Ads 42 | # yp is [N, A] now 43 | ypos = tf.matmul(tf.reduce_sum(yp*y,1,keep_dims=True),tf.constant(1,type="float32",shape=[1,N])) 44 | with tf.name_scope('Loss'): 45 | # Cross-Entropy loss 46 | #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 47 | loss = tf.reduce_mean(cross_entropy) 48 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 49 | 50 | with tf.name_scope('Predict'): 51 | pred = tf.nn.softmax(yp) 52 | 53 | if self.params.opt == 'adam': 54 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 55 | elif self.params.opt == 'sgd': 56 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 57 | elif self.params.opt == 'ada': 58 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 59 | elif self.params.opt == 'rmsprop': 60 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 61 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 62 | 63 | 64 | self.predictions = pred 65 | self.loss = cross_entropy 66 | self.total_loss = total_loss 67 | self.opt_op = opt_op 68 | 69 | self.x = X 70 | self.y = y 71 | self.xmask = Xmask 72 | self.is_train = is_training 73 | 74 | def preprocess_batch(self, batch): 75 | # batch = (x, y, g, r) 76 | params = self.params 77 | N = params.batch_size # number of groups/display_ids per batch 78 | A = params.max_ads # maximum number of Ads per display_id 79 | F = params.meta_features # number of meta features per Ad 80 | #print N,A,F 81 | x, y, g, r, _ = batch 82 | #print "batch.r", r 83 | X = np.zeros([N,A,F]) 84 | #print "batch.x", x.shape, "X", X.shape 85 | 86 | Y = np.zeros([N,A]) 87 | Xmask = np.ones([N,A])*(-1e10) 88 | for i in range(N): 89 | if i+1 >= len(r): 90 | break 91 | start, end = r[i], r[i+1] 92 | #rtmp = range(start,end) 93 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 94 | X[i,0:end-start,:] = x[start:end,:] 95 | Y[i,0:end-start] = y[start:end] 96 | Xmask[i,0:end-start] = 0 97 | #print rtmp, X.shape, Y.shape, y.shape 98 | #X[i,rtmp,:] = x[rtmp,:] 99 | #Y[i,rtmp,:] = y[np.array(rtmp)] 100 | #Xmask[i,rtmp,:] = 0 101 | 102 | return X, Y, Xmask 103 | 104 | def get_feed_dict(self, batch, is_train): 105 | X, Y, Xmask = self.preprocess_batch(batch) 106 | return { 107 | self.x: X, 108 | self.xmask: Xmask, 109 | self.y: Y, 110 | self.is_train: is_train 111 | } 112 | 113 | -------------------------------------------------------------------------------- /nn/models/pgs_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/pgs_net/__init__.py -------------------------------------------------------------------------------- /nn/models/pgs_net/pgs_bypass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class PGS_BYPASS_NET(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build PGS_BYPASS_NET" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 16 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 17 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 18 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 19 | is_training = tf.placeholder(dtype=tf.bool) 20 | 21 | if self.params.softmax_transform: 22 | print("softmax_transform") 23 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 24 | Xtmp = tf.exp(Xtmp) 25 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 26 | Xtmp = Xtmp/stmp 27 | else: 28 | Xtmp = X 29 | 30 | with tf.name_scope("Fully-connected"): 31 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 32 | with tf.variable_scope("Layer1"): 33 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch) 34 | with tf.variable_scope("Layer2"): 35 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch) 36 | with tf.variable_scope("Layer3"): 37 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 38 | 39 | ytmp = ytmp + fully_connected(Xtmp, num_neurons=1, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch) 40 | 41 | # ytmp is [N*A, 1] now 42 | 43 | yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads 44 | # yp is [N, A] now 45 | 46 | with tf.name_scope('Loss'): 47 | # Cross-Entropy loss 48 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 49 | loss = tf.reduce_mean(cross_entropy) 50 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 51 | 52 | with tf.name_scope('Predict'): 53 | pred = tf.nn.softmax(yp) 54 | 55 | if self.params.opt == 'adam': 56 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 57 | elif self.params.opt == 'sgd': 58 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 59 | elif self.params.opt == 'ada': 60 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 61 | elif self.params.opt == 'rmsprop': 62 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 63 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 64 | 65 | 66 | self.predictions = pred 67 | self.loss = cross_entropy 68 | self.total_loss = total_loss 69 | self.opt_op = opt_op 70 | 71 | self.x = X 72 | self.y = y 73 | self.xmask = Xmask 74 | self.is_train = is_training 75 | 76 | def preprocess_batch(self, batch): 77 | # batch = (x, y, g, r) 78 | params = self.params 79 | N = params.batch_size # number of groups/display_ids per batch 80 | A = params.max_ads # maximum number of Ads per display_id 81 | F = params.meta_features # number of meta features per Ad 82 | #print N,A,F 83 | x, y, g, r, _ = batch 84 | #print "batch.r", r 85 | X = np.zeros([N,A,F]) 86 | #print "batch.x", x.shape, "X", X.shape 87 | 88 | Y = np.zeros([N,A]) 89 | Xmask = np.ones([N,A])*(-1e10) 90 | for i in range(N): 91 | if i+1 >= len(r): 92 | break 93 | start, end = r[i], r[i+1] 94 | #rtmp = range(start,end) 95 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 96 | X[i,0:end-start,:] = x[start:end,:] 97 | Y[i,0:end-start] = y[start:end] 98 | Xmask[i,0:end-start] = 0 99 | #print rtmp, X.shape, Y.shape, y.shape 100 | #X[i,rtmp,:] = x[rtmp,:] 101 | #Y[i,rtmp,:] = y[np.array(rtmp)] 102 | #Xmask[i,rtmp,:] = 0 103 | 104 | return X, Y, Xmask 105 | 106 | def get_feed_dict(self, batch, is_train): 107 | X, Y, Xmask = self.preprocess_batch(batch) 108 | return { 109 | self.x: X, 110 | self.xmask: Xmask, 111 | self.y: Y, 112 | self.is_train: is_train 113 | } 114 | 115 | -------------------------------------------------------------------------------- /nn/models/pgs_net/pgs_leak.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class PGS_LEAK(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build PGS_LEAK" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | yb = tf.placeholder('float32', shape=[N, A], name='yb') 16 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 17 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 18 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 19 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 20 | is_training = tf.placeholder(dtype=tf.bool) 21 | 22 | if self.params.softmax_transform: 23 | print("softmax_transform") 24 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 25 | Xtmp = tf.exp(Xtmp) 26 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 27 | Xtmp = Xtmp/stmp 28 | else: 29 | Xtmp = X 30 | 31 | with tf.name_scope("Fully-connected"): 32 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 33 | with tf.variable_scope("Layer1"): 34 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch) 35 | with tf.variable_scope("Layer2"): 36 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch) 37 | #with tf.variable_scope("Layer4"): 38 | # ytmp = fully_connected(ytmp, num_neurons=10, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch) 39 | 40 | with tf.variable_scope("Layer3"): 41 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 42 | 43 | # ytmp is [N*A, 1] now 44 | 45 | yp = tf.reshape(ytmp,[N, A])+yb*10 + Xmask #+ yb# masking the padding Ads 46 | # yp is [N, A] now 47 | 48 | with tf.name_scope('Loss'): 49 | # Cross-Entropy loss 50 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 51 | loss = tf.reduce_mean(cross_entropy) 52 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 53 | 54 | with tf.name_scope('Predict'): 55 | pred = tf.nn.softmax(yp) 56 | 57 | if self.params.opt == 'adam': 58 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 59 | elif self.params.opt == 'sgd': 60 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 61 | elif self.params.opt == 'ada': 62 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 63 | elif self.params.opt == 'rmsprop': 64 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 65 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 66 | 67 | 68 | self.predictions = pred 69 | self.loss = cross_entropy 70 | self.total_loss = total_loss 71 | self.opt_op = opt_op 72 | 73 | self.yb = yb 74 | self.x = X 75 | self.y = y 76 | self.xmask = Xmask 77 | self.is_train = is_training 78 | 79 | def preprocess_batch(self, batch): 80 | # batch = (x, y, g, r) 81 | params = self.params 82 | N = params.batch_size # number of groups/display_ids per batch 83 | A = params.max_ads # maximum number of Ads per display_id 84 | F = params.meta_features # number of meta features per Ad 85 | #print N,A,F 86 | x, y, g, r, _, b = batch 87 | #print "batch.r", r 88 | X = np.zeros([N,A,F]) 89 | #print "batch.x", x.shape, "X", X.shape 90 | 91 | Y = np.zeros([N,A]) 92 | Xb = np.zeros([N,A]) 93 | Xmask = np.ones([N,A])*(-1e10) 94 | for i in range(N): 95 | if i+1 >= len(r): 96 | break 97 | start, end = r[i], r[i+1] 98 | #rtmp = range(start,end) 99 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 100 | X[i,0:end-start,:] = x[start:end,:] 101 | Y[i,0:end-start] = y[start:end] 102 | Xb[i,0:end-start] = b[start:end,0] 103 | Xmask[i,0:end-start] = 0 104 | #print rtmp, X.shape, Y.shape, y.shape 105 | #X[i,rtmp,:] = x[rtmp,:] 106 | #Y[i,rtmp,:] = y[np.array(rtmp)] 107 | #Xmask[i,rtmp,:] = 0 108 | 109 | return X, Y, Xmask,Xb 110 | 111 | def get_feed_dict(self, batch, is_train): 112 | X, Y, Xmask, Xb = self.preprocess_batch(batch) 113 | return { 114 | self.x: X, 115 | self.xmask: Xmask, 116 | self.y: Y, 117 | self.is_train: is_train, 118 | self.yb: Xb 119 | } 120 | 121 | -------------------------------------------------------------------------------- /nn/models/pgs_net/pgs_mix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class PGS_MIX(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build PGS_MIX_NET" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 16 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 17 | Xmask2 = tf.placeholder('float32', shape=[N, A], name='xmask2') 18 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 19 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 20 | is_training = tf.placeholder(dtype=tf.bool) 21 | 22 | if self.params.softmax_transform: 23 | print("softmax_transform") 24 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 25 | Xtmp = tf.exp(Xtmp) 26 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 27 | Xtmp = Xtmp/stmp 28 | else: 29 | Xtmp = X 30 | 31 | with tf.name_scope("Fully-connected"): 32 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 33 | with tf.variable_scope("Layer1"): 34 | ytmp = fully_connected(Xtmp, num_neurons=100, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch) 35 | with tf.variable_scope("Layer2"): 36 | ytmp = fully_connected(ytmp, num_neurons=50, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch) 37 | with tf.variable_scope("Layer4"): 38 | ytmp = fully_connected(ytmp, num_neurons=25, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch) 39 | with tf.variable_scope("Layer3"): 40 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 41 | 42 | # ytmp is [N*A, 1] now 43 | 44 | yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads 45 | # yp is [N, A] now 46 | 47 | with tf.name_scope('LogLoss'): 48 | ypx = tf.reshape(ytmp,[N, A]) * Xmask2 49 | ypx = tf.maximum(ypx,1e-5) 50 | ypx = tf.minimum(ypx,1-1e-5) 51 | lossx = -(y*tf.log(ypx)+(1-y)*tf.log(1-ypx)) 52 | lossx = tf.reduce_mean(lossx) 53 | 54 | with tf.name_scope('Loss'): 55 | # Cross-Entropy loss 56 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 57 | loss = tf.reduce_mean(cross_entropy) 58 | total_loss = loss + params.logweight*lossx + params.weight_decay * tf.add_n(tf.get_collection('l2')) 59 | 60 | with tf.name_scope('Predict'): 61 | pred = tf.nn.softmax(yp) 62 | 63 | if self.params.opt == 'adam': 64 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 65 | elif self.params.opt == 'sgd': 66 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 67 | elif self.params.opt == 'ada': 68 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 69 | elif self.params.opt == 'rmsprop': 70 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 71 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 72 | 73 | 74 | self.predictions = pred 75 | self.loss = cross_entropy 76 | self.total_loss = total_loss 77 | self.opt_op = opt_op 78 | 79 | self.x = X 80 | self.y = y 81 | self.xmask = Xmask 82 | self.xmask2 = Xmask2 83 | self.is_train = is_training 84 | 85 | def preprocess_batch(self, batch): 86 | # batch = (x, y, g, r) 87 | params = self.params 88 | N = params.batch_size # number of groups/display_ids per batch 89 | A = params.max_ads # maximum number of Ads per display_id 90 | F = params.meta_features # number of meta features per Ad 91 | #print N,A,F 92 | x, y, g, r, _ = batch 93 | #print "batch.r", r 94 | X = np.zeros([N,A,F]) 95 | #print "batch.x", x.shape, "X", X.shape 96 | 97 | Y = np.zeros([N,A]) 98 | Xmask = np.ones([N,A])*(-1e10) 99 | Xmask2 = np.zeros([N,A]) 100 | for i in range(N): 101 | if i+1 >= len(r): 102 | break 103 | start, end = r[i], r[i+1] 104 | #rtmp = range(start,end) 105 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 106 | X[i,0:end-start,:] = x[start:end,:] 107 | Y[i,0:end-start] = y[start:end] 108 | Xmask[i,0:end-start] = 0 109 | Xmask2[i,0:end-start] = 0 110 | #print rtmp, X.shape, Y.shape, y.shape 111 | #X[i,rtmp,:] = x[rtmp,:] 112 | #Y[i,rtmp,:] = y[np.array(rtmp)] 113 | #Xmask[i,rtmp,:] = 0 114 | 115 | return X, Y, Xmask, Xmask2 116 | 117 | def get_feed_dict(self, batch, is_train): 118 | X, Y, Xmask, Xmask2 = self.preprocess_batch(batch) 119 | return { 120 | self.x: X, 121 | self.xmask: Xmask, 122 | self.xmask2: Xmask2, 123 | self.y: Y, 124 | self.is_train: is_train 125 | } 126 | 127 | -------------------------------------------------------------------------------- /nn/models/pgs_net/pgs_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class PGS_NET(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build PGS_NET" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 16 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 17 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 18 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 19 | is_training = tf.placeholder(dtype=tf.bool) 20 | 21 | if self.params.softmax_transform: 22 | print("softmax_transform") 23 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 24 | Xtmp = tf.exp(Xtmp) 25 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 26 | Xtmp = Xtmp/stmp 27 | else: 28 | Xtmp = X 29 | 30 | with tf.name_scope("Fully-connected"): 31 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 32 | with tf.variable_scope("Layer1"): 33 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch) 34 | with tf.variable_scope("Layer2"): 35 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch) 36 | with tf.variable_scope("Layer3"): 37 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 38 | 39 | # ytmp is [N*A, 1] now 40 | 41 | yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads 42 | # yp is [N, A] now 43 | 44 | with tf.name_scope('Loss'): 45 | # Cross-Entropy loss 46 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 47 | loss = tf.reduce_mean(cross_entropy) 48 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 49 | 50 | with tf.name_scope('Predict'): 51 | pred = tf.nn.softmax(yp) 52 | 53 | if self.params.opt == 'adam': 54 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 55 | elif self.params.opt == 'sgd': 56 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 57 | elif self.params.opt == 'ada': 58 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 59 | elif self.params.opt == 'rmsprop': 60 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 61 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 62 | 63 | 64 | self.predictions = pred 65 | self.loss = cross_entropy 66 | self.total_loss = total_loss 67 | self.opt_op = opt_op 68 | 69 | self.x = X 70 | self.y = y 71 | self.xmask = Xmask 72 | self.is_train = is_training 73 | 74 | def preprocess_batch(self, batch): 75 | # batch = (x, y, g, r) 76 | params = self.params 77 | N = params.batch_size # number of groups/display_ids per batch 78 | A = params.max_ads # maximum number of Ads per display_id 79 | F = params.meta_features # number of meta features per Ad 80 | #print N,A,F 81 | x, y, g, r, _ = batch 82 | #print "batch.r", r 83 | X = np.zeros([N,A,F]) 84 | #print "batch.x", x.shape, "X", X.shape 85 | 86 | Y = np.zeros([N,A]) 87 | Xmask = np.ones([N,A])*(-1e10) 88 | for i in range(N): 89 | if i+1 >= len(r): 90 | break 91 | start, end = r[i], r[i+1] 92 | #rtmp = range(start,end) 93 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 94 | X[i,0:end-start,:] = x[start:end,:] 95 | Y[i,0:end-start] = y[start:end] 96 | Xmask[i,0:end-start] = 0 97 | #print rtmp, X.shape, Y.shape, y.shape 98 | #X[i,rtmp,:] = x[rtmp,:] 99 | #Y[i,rtmp,:] = y[np.array(rtmp)] 100 | #Xmask[i,rtmp,:] = 0 101 | 102 | return X, Y, Xmask 103 | 104 | def get_feed_dict(self, batch, is_train): 105 | X, Y, Xmask = self.preprocess_batch(batch) 106 | return { 107 | self.x: X, 108 | self.xmask: Xmask, 109 | self.y: Y, 110 | self.is_train: is_train 111 | } 112 | 113 | -------------------------------------------------------------------------------- /nn/models/pgs_net/pgs_net_with_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from util.nn_util import fully_connected 4 | from models.base_model import BaseModel 5 | 6 | class PGS_NET_WB(BaseModel): 7 | # Per-group-softmax net 8 | def build(self): 9 | print "build PGS_NET_WB" 10 | params = self.params 11 | N = params.batch_size # number of groups/display_ids per batch 12 | A = params.max_ads # maximum number of Ads per display_id 13 | F = params.meta_features # number of meta features per Ad 14 | 15 | yb = tf.placeholder('float32', shape=[N, A], name='yb') 16 | X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding 17 | Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads 18 | #Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id 19 | y = tf.placeholder('float32', shape=[N, A], name='y') # y in {0, 1} with zero padding 20 | is_training = tf.placeholder(dtype=tf.bool) 21 | 22 | if self.params.softmax_transform: 23 | print("softmax_transform") 24 | Xtmp = X + tf.reshape(Xmask, [N, A, 1]) 25 | Xtmp = tf.exp(Xtmp) 26 | stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5 27 | Xtmp = Xtmp/stmp 28 | else: 29 | Xtmp = X 30 | 31 | with tf.name_scope("Fully-connected"): 32 | Xtmp = tf.reshape(Xtmp, [N*A, F]) 33 | with tf.variable_scope("Layer1"): 34 | ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch) 35 | with tf.variable_scope("Layer2"): 36 | ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch) 37 | #with tf.variable_scope("Layer4"): 38 | # ytmp = fully_connected(ytmp, num_neurons=10, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch) 39 | 40 | with tf.variable_scope("Layer3"): 41 | ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch) 42 | 43 | # ytmp is [N*A, 1] now 44 | 45 | yp = tf.reshape(ytmp,[N, A])*yb + Xmask #+ yb# masking the padding Ads 46 | # yp is [N, A] now 47 | 48 | with tf.name_scope('Loss'): 49 | # Cross-Entropy loss 50 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y) 51 | loss = tf.reduce_mean(cross_entropy) 52 | total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) 53 | 54 | with tf.name_scope('Predict'): 55 | pred = tf.nn.softmax(yp) 56 | 57 | if self.params.opt == 'adam': 58 | optimizer = tf.train.AdamOptimizer(params.learning_rate) 59 | elif self.params.opt == 'sgd': 60 | optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) 61 | elif self.params.opt == 'ada': 62 | optimizer = tf.train.AdagradOptimizer(params.learning_rate) 63 | elif self.params.opt == 'rmsprop': 64 | optimizer = tf.train.RMSPropOptimizer(params.learning_rate) 65 | opt_op = optimizer.minimize(total_loss, global_step=self.global_step) 66 | 67 | 68 | self.predictions = pred 69 | self.loss = cross_entropy 70 | self.total_loss = total_loss 71 | self.opt_op = opt_op 72 | 73 | self.yb = yb 74 | self.x = X 75 | self.y = y 76 | self.xmask = Xmask 77 | self.is_train = is_training 78 | 79 | def preprocess_batch(self, batch): 80 | # batch = (x, y, g, r) 81 | params = self.params 82 | N = params.batch_size # number of groups/display_ids per batch 83 | A = params.max_ads # maximum number of Ads per display_id 84 | F = params.meta_features # number of meta features per Ad 85 | #print N,A,F 86 | x, y, g, r, _, b = batch 87 | #print "batch.r", r 88 | X = np.zeros([N,A,F]) 89 | #print "batch.x", x.shape, "X", X.shape 90 | 91 | Y = np.zeros([N,A]) 92 | Xb = np.zeros([N,A]) 93 | Xmask = np.ones([N,A])*(-1e10) 94 | for i in range(N): 95 | if i+1 >= len(r): 96 | break 97 | start, end = r[i], r[i+1] 98 | #rtmp = range(start,end) 99 | #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape 100 | X[i,0:end-start,:] = x[start:end,:] 101 | Y[i,0:end-start] = y[start:end] 102 | Xb[i,0:end-start] = b[start:end,0] 103 | Xmask[i,0:end-start] = 0 104 | #print rtmp, X.shape, Y.shape, y.shape 105 | #X[i,rtmp,:] = x[rtmp,:] 106 | #Y[i,rtmp,:] = y[np.array(rtmp)] 107 | #Xmask[i,rtmp,:] = 0 108 | 109 | return X, Y, Xmask,Xb 110 | 111 | def get_feed_dict(self, batch, is_train): 112 | X, Y, Xmask, Xb = self.preprocess_batch(batch) 113 | return { 114 | self.x: X, 115 | self.xmask: Xmask, 116 | self.y: Y, 117 | self.is_train: is_train, 118 | self.yb: Xb 119 | } 120 | 121 | -------------------------------------------------------------------------------- /nn/small.sh: -------------------------------------------------------------------------------- 1 | python main.py --train small/train.list --display_train small/clicks_tr.sample --fea_limit 5 --test small/test.list --display_test small/clicks_va.sample --learning_rate 0.005 2 | -------------------------------------------------------------------------------- /nn/small/clicks_tr.sample: -------------------------------------------------------------------------------- 1 | display_id,ad_id,clicked 2 | 3,71547,0 3 | 3,95814,0 4 | 3,152141,0 5 | 3,183846,0 6 | 3,228657,1 7 | 3,250082,0 8 | 13,121703,0 9 | 13,143294,0 10 | 13,149541,1 11 | 16,104208,0 12 | 16,172888,0 13 | 16,235104,0 14 | 16,273567,1 15 | 26,152193,1 16 | 26,285992,0 17 | 32,111697,0 18 | 32,185710,0 19 | 32,188665,0 20 | 32,333874,1 21 | 34,14096,1 22 | 34,23522,0 23 | 34,101761,0 24 | 34,151209,0 25 | 34,162129,0 26 | 34,220315,0 27 | 34,228566,0 28 | 34,260399,0 29 | 42,29046,1 30 | 42,276172,0 31 | 3,71547,0 32 | 3,95814,0 33 | 3,152141,0 34 | 3,183846,0 35 | 3,228657,1 36 | 3,250082,0 37 | 13,121703,0 38 | 13,143294,0 39 | 13,149541,1 40 | 16,104208,0 41 | 16,172888,0 42 | 16,235104,0 43 | 16,273567,1 44 | 26,152193,1 45 | 26,285992,0 46 | 32,111697,0 47 | 32,185710,0 48 | 32,188665,0 49 | 32,333874,1 50 | 34,14096,1 51 | 34,23522,0 52 | 34,101761,0 53 | 34,151209,0 54 | 34,162129,0 55 | 34,220315,0 56 | 34,228566,0 57 | 34,260399,0 58 | 42,29046,1 59 | 42,276172,0 60 | 3,71547,0 61 | 3,95814,0 62 | 3,152141,0 63 | 3,183846,0 64 | 3,228657,1 65 | 3,250082,0 66 | 13,121703,0 67 | 13,143294,0 68 | 13,149541,1 69 | 16,104208,0 70 | 16,172888,0 71 | 16,235104,0 72 | 16,273567,1 73 | 26,152193,1 74 | 26,285992,0 75 | 32,111697,0 76 | 32,185710,0 77 | 32,188665,0 78 | 32,333874,1 79 | 34,14096,1 80 | 34,23522,0 81 | 34,101761,0 82 | 34,151209,0 83 | 34,162129,0 84 | 34,220315,0 85 | 34,228566,0 86 | 34,260399,0 87 | 42,29046,1 88 | 42,276172,0 89 | 3,71547,0 90 | 3,95814,0 91 | 3,152141,0 92 | 3,183846,0 93 | 3,228657,1 94 | 3,250082,0 95 | 13,121703,0 96 | 13,143294,0 97 | 13,149541,1 98 | 16,104208,0 99 | 16,172888,0 100 | 16,235104,0 101 | 16,273567,1 102 | 26,152193,1 103 | 26,285992,0 104 | 32,111697,0 105 | 32,185710,0 106 | 32,188665,0 107 | 32,333874,1 108 | 34,14096,1 109 | 34,23522,0 110 | 34,101761,0 111 | 34,151209,0 112 | 34,162129,0 113 | 34,220315,0 114 | 34,228566,0 115 | 34,260399,0 116 | 42,29046,1 117 | 42,276172,0 118 | -------------------------------------------------------------------------------- /nn/small/clicks_va.sample: -------------------------------------------------------------------------------- 1 | display_id,ad_id,clicked 2 | 3,71547,0 3 | 3,95814,0 4 | 3,152141,0 5 | 3,183846,0 6 | 3,228657,1 7 | 3,250082,0 8 | 13,121703,0 9 | 13,143294,0 10 | 13,149541,1 11 | 16,104208,0 12 | 16,172888,0 13 | 16,235104,0 14 | 16,273567,1 15 | 26,152193,1 16 | 26,285992,0 17 | 32,111697,0 18 | 32,185710,0 19 | 32,188665,0 20 | 32,333874,1 21 | 34,14096,1 22 | 34,23522,0 23 | 34,101761,0 24 | 34,151209,0 25 | 34,162129,0 26 | 34,220315,0 27 | 34,228566,0 28 | 34,260399,0 29 | 42,29046,1 30 | 42,276172,0 31 | 3,71547,0 32 | 3,95814,0 33 | 3,152141,0 34 | 3,183846,0 35 | 3,228657,1 36 | 3,250082,0 37 | 13,121703,0 38 | 13,143294,0 39 | 13,149541,1 40 | 16,104208,0 41 | 16,172888,0 42 | 16,235104,0 43 | 16,273567,1 44 | 26,152193,1 45 | 26,285992,0 46 | 32,111697,0 47 | 32,185710,0 48 | 32,188665,0 49 | 32,333874,1 50 | 34,14096,1 51 | 34,23522,0 52 | 34,101761,0 53 | 34,151209,0 54 | 34,162129,0 55 | 34,220315,0 56 | 34,228566,0 57 | 34,260399,0 58 | 42,29046,1 59 | 42,276172,0 60 | 3,71547,0 61 | 3,95814,0 62 | 3,152141,0 63 | 3,183846,0 64 | 3,228657,1 65 | 3,250082,0 66 | 13,121703,0 67 | 13,143294,0 68 | 13,149541,1 69 | 16,104208,0 70 | 16,172888,0 71 | 16,235104,0 72 | 16,273567,1 73 | 26,152193,1 74 | 26,285992,0 75 | 32,111697,0 76 | 32,185710,0 77 | 32,188665,0 78 | 32,333874,1 79 | 34,14096,1 80 | 34,23522,0 81 | 34,101761,0 82 | 34,151209,0 83 | 34,162129,0 84 | 34,220315,0 85 | 34,228566,0 86 | 34,260399,0 87 | 42,29046,1 88 | 42,276172,0 89 | 3,71547,0 90 | 3,95814,0 91 | 3,152141,0 92 | 3,183846,0 93 | 3,228657,1 94 | 3,250082,0 95 | 13,121703,0 96 | 13,143294,0 97 | 13,149541,1 98 | 16,104208,0 99 | 16,172888,0 100 | 16,235104,0 101 | 16,273567,1 102 | 26,152193,1 103 | 26,285992,0 104 | 32,111697,0 105 | 32,185710,0 106 | 32,188665,0 107 | 32,333874,1 108 | 34,14096,1 109 | 34,23522,0 110 | 34,101761,0 111 | 34,151209,0 112 | 34,162129,0 113 | 34,220315,0 114 | 34,228566,0 115 | 34,260399,0 116 | 42,29046,1 117 | 42,276172,0 118 | -------------------------------------------------------------------------------- /nn/small/test.list: -------------------------------------------------------------------------------- 1 | name,columns 2 | "data/test.sample","sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0" 3 | -------------------------------------------------------------------------------- /nn/small/test.sample: -------------------------------------------------------------------------------- 1 | ,sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0,click 2 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 3 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 4 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 5 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 6 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 7 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 8 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 9 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 10 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 11 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 12 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 13 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 14 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 15 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 16 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 17 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 18 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 19 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 20 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 21 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 22 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 23 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 24 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 25 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 26 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 27 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 28 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 29 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 30 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 31 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 32 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 33 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 34 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 35 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 36 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 37 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 38 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 39 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 40 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 41 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 42 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 43 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 44 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 45 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 46 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 47 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 48 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 49 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 50 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 51 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 52 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 53 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 54 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 55 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 56 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 57 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 58 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 59 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 60 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 61 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 62 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 63 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 64 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 65 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 66 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 67 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 68 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 69 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 70 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 71 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 72 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 73 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 74 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 75 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 76 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 77 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 78 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 79 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 80 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 81 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 82 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 83 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 84 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 85 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 86 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 87 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 88 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 89 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 90 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 91 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 92 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 93 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 94 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 95 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 96 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 97 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 98 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 99 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 100 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 101 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 102 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 103 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 104 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 105 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 106 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 107 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 108 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 109 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 110 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 111 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 112 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 113 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 114 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 115 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 116 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 117 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 118 | -------------------------------------------------------------------------------- /nn/small/train.list: -------------------------------------------------------------------------------- 1 | name,columns 2 | "data/train.sample","sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0" 3 | -------------------------------------------------------------------------------- /nn/small/train.sample: -------------------------------------------------------------------------------- 1 | ,sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0,click 2 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 3 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 4 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 5 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 6 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 7 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 8 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 9 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 10 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 11 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 12 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 13 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 14 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 15 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 16 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 17 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 18 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 19 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 20 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 21 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 22 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 23 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 24 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 25 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 26 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 27 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 28 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 29 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 30 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 31 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 32 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 33 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 34 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 35 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 36 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 37 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 38 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 39 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 40 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 41 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 42 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 43 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 44 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 45 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 46 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 47 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 48 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 49 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 50 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 51 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 52 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 53 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 54 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 55 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 56 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 57 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 58 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 59 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 60 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 61 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 62 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 63 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 64 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 65 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 66 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 67 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 68 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 69 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 70 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 71 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 72 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 73 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 74 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 75 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 76 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 77 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 78 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 79 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 80 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 81 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 82 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 83 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 84 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 85 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 86 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 87 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 88 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 89 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0 90 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0 91 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0 92 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0 93 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0 94 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0 95 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0 96 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0 97 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0 98 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0 99 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0 100 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0 101 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0 102 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0 103 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0 104 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0 105 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0 106 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0 107 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0 108 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0 109 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0 110 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0 111 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0 112 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0 113 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0 114 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0 115 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0 116 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0 117 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0 118 | -------------------------------------------------------------------------------- /nn/sub.sh: -------------------------------------------------------------------------------- 1 | python main.py --train data/sub_train.list --display_train ../../better_split/data/clicks_va.csv --fea_limit 5 --test data/sub_test.list --display_test ../../input/clicks_test.csv --learning_rate 0.001 --batch_size 64 --sub sub.csv --num_epochs 2 --acc_period 10 2 | -------------------------------------------------------------------------------- /nn/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/util/__init__.py -------------------------------------------------------------------------------- /nn/util/data_util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import h5py 4 | import os 5 | import csv 6 | import gc 7 | from sklearn.utils import shuffle as sk_shuffle 8 | import random 9 | 10 | class DataSet: 11 | def __init__(self, metalist, display, baselist = '', cache='cache', shuffle=False, limit = 5, normalize = 1, softmax=0,mean=None,std=None,num_big=10): 12 | self.num_big = num_big 13 | self.softmax = 0#softmax # perform softmax transformation online 14 | self.files, self.columns = self._parse(metalist) 15 | self.baselist = baselist 16 | if self.baselist != '': 17 | self.bfiles, self.bcolumns = self._parse(baselist) 18 | self.normalize = normalize 19 | self.display = display 20 | self.cache = cache 21 | self.limit = limit # max number of features, above the limit will call online code 22 | #self.files # a list of file names: ['xx/m1.csv', 'yy/m2.csv' ...] 23 | #self.columns # column names of each file [['clicked'], ['ffm1','ffm2'], ...] 24 | 25 | self.shuffle = shuffle 26 | self._read() 27 | self.mean, self.std = mean, std 28 | if normalize and mean is None: 29 | self._normalize() 30 | self.reset() 31 | 32 | def _normalize(self): 33 | print "run normalization ..." 34 | sum1, sum2 = None, None 35 | num = 0 36 | for i in range(self.num_big): 37 | self._get_big_batch(batch_id = i) 38 | num += self.big_X.shape[0] 39 | if i==0: 40 | sum1 = np.sum(self.big_X,axis=0) 41 | sum2 = np.sum(self.big_X*self.big_X,axis=0) 42 | else: 43 | sum1 += np.sum(self.big_X,axis=0) 44 | sum2 += np.sum(self.big_X*self.big_X,axis=0) 45 | del self.big_X 46 | gc.collect() 47 | mean1,mean2 = sum1/num, sum2/num 48 | std = np.sqrt(mean2-mean1*mean1) 49 | self.mean = mean1 50 | self.std = std 51 | print self.mean.shape, self.std.shape 52 | 53 | def reset(self): 54 | self.current_big_batch = 0 # 0~9 55 | self.current_mini_batch = 0 56 | self.big_X = None 57 | self.big_Y = None 58 | self.big_order = range(self.num_big) 59 | if self.shuffle: 60 | random.shuffle(self.big_order) 61 | 62 | def _load_row_array(self): 63 | name = self.display.split('/')[-1] 64 | cache = self.cache 65 | cname = '%s/%s.row_array.bin'%(cache, name) 66 | h5f=h5py.File(cname,'r') 67 | self.row_array=h5f['dataset_1'][:] 68 | h5f.close() 69 | 70 | def next_batch(self, batch_size): 71 | shuffle = self.shuffle 72 | if self.big_X is None: 73 | self._get_big_batch(batch_id = self.big_order[self.current_big_batch]) 74 | self.tmp_list = range(len(self.group_idx)-1) 75 | self._base_get_big_batch(batch_id = self.big_order[self.current_big_batch]) 76 | if shuffle: 77 | random.shuffle(self.tmp_list) 78 | #print self.row_array 79 | #print self.group_idx, self.tmp_list 80 | mini_batch_id = self.current_mini_batch 81 | next_batch = mini_batch_id + batch_size 82 | next_batch = min(next_batch, len(self.tmp_list)) 83 | #print self.current_mini_batch, next_batch 84 | X, y, G, R = self._extract_batch(self.current_mini_batch, next_batch) 85 | done = False 86 | if self.baselist != '': 87 | Xb = self._base_extract_batch(self.current_mini_batch, next_batch) 88 | if next_batch == len(self.tmp_list): 89 | del self.big_X 90 | gc.collect() 91 | self.big_X = None 92 | self.big_base_X = None 93 | self.current_big_batch += 1 94 | if self.current_big_batch==self.num_big: 95 | self.reset() 96 | done = True 97 | self.current_mini_batch = 0 98 | else: 99 | self.current_mini_batch = next_batch 100 | #X = (X - np.mean(X,0))/np.std(X,0) 101 | if self.baselist == '': 102 | return X, y, G, R, done 103 | else: 104 | return X, y, G, R, done, Xb 105 | 106 | def _softmax(self, x): 107 | # perform softmax on the 2nd dimenstion 108 | for i in range(x.shape[1]): 109 | #tmpmax = np.max(x[:,i]) 110 | tmp = np.exp(x[:,i]) 111 | sumtmp = np.sum(tmp) 112 | x[:,i] = tmp/sumtmp 113 | return x 114 | 115 | def _base_extract_batch(self, start, end): 116 | X, R = [], [0] 117 | for i in range(start,end): 118 | idx = self.tmp_list[i] 119 | #idx = self.group_idx[idx] 120 | s_, e_ = self.group_idx[idx], self.group_idx[idx+1] 121 | R.append(e_-s_+R[-1]) 122 | #print s_, e_ 123 | if self.softmax: 124 | X.append(self._softmax(self.big_base_X[s_:e_,:])) 125 | else: 126 | X.append(self.big_base_X[s_:e_,:]) 127 | X = np.vstack(X) 128 | return X 129 | 130 | def _extract_batch(self, start, end): 131 | X, y, G, R = [],[],[],[0] 132 | for i in range(start,end): 133 | idx = self.tmp_list[i] 134 | #idx = self.group_idx[idx] 135 | s_, e_ = self.group_idx[idx], self.group_idx[idx+1] 136 | R.append(e_-s_+R[-1]) 137 | #print s_, e_ 138 | if self.softmax: 139 | X.append(self._softmax(self.big_X[s_:e_,:])) 140 | else: 141 | X.append(self.big_X[s_:e_,:]) 142 | if len(self.big_Y.shape) == 2: # no label, just group 143 | y.append(self.big_Y[s_:e_,1]) 144 | G.append(self.big_Y[s_:e_,0]) 145 | else: 146 | G.append(self.big_Y[s_:e_]) 147 | X = np.vstack(X) 148 | if len(y) == 0: 149 | y = np.zeros(X.shape[0]) 150 | else: 151 | y = np.concatenate(y) 152 | G = np.concatenate(G) 153 | return X, y, G, R 154 | 155 | def sanity_check(self): 156 | print (self.row_array) 157 | print () 158 | for i in range(self.num_big): 159 | self._get_big_batch(i) 160 | print (i) 161 | #print self.big_X 162 | print (self.big_Y) 163 | print (self.group_idx) 164 | print () 165 | def _base_get_big_batch(self, batch_id): 166 | if self.baselist=='': 167 | self.big_base_X = None 168 | return 169 | X = [] 170 | cache = self.cache 171 | for f in self.bfiles: 172 | name = f.split('/')[-1] 173 | cname = '%s/%s_%d.bin'%(cache,name,batch_id) 174 | assert(os.path.exists(cname), "Meta bin data not exist!") 175 | h5f=h5py.File(cname,'r') 176 | train=h5f['dataset_1'][:] 177 | h5f.close() 178 | if len(train.shape)==1: 179 | train = np.reshape(train, [train.shape[0], 1]) 180 | X.append(train) 181 | self.big_base_X = np.hstack(X) 182 | 183 | def _get_big_batch(self, batch_id): 184 | X = [] 185 | cache = self.cache 186 | for f in self.files: 187 | name = f.split('/')[-1] 188 | cname = '%s/%s_%d.bin'%(cache,name,batch_id) 189 | assert(os.path.exists(cname), "Meta bin data not exist!") 190 | h5f=h5py.File(cname,'r') 191 | train=h5f['dataset_1'][:] 192 | h5f.close() 193 | if len(train.shape)==1: 194 | train = np.reshape(train, [train.shape[0], 1]) 195 | X.append(train) 196 | del train 197 | self.big_X = np.hstack(X) 198 | if self.normalize and self.mean is not None: 199 | #self.big_X = (self.big_X - np.mean(self.big_X,0))/np.std(self.big_X,0) 200 | self.big_X = (self.big_X - self.mean)/self.std 201 | #print "Load big batch", batch_id, self.big_X.shape 202 | del X 203 | gc.collect() 204 | 205 | display = self.display 206 | name = display.split('/')[-1] 207 | cname = '%s/%s_%d.bin'%(cache, name, batch_id) 208 | assert(os.path.exists(cname), "Display bin data not exist!") 209 | h5f=h5py.File(cname,'r') 210 | self.big_Y=h5f['dataset_1'][:] 211 | h5f.close() 212 | 213 | cname = '%s/%s_%d.group.bin'%(cache,name, batch_id) 214 | h5f=h5py.File(cname,'r') 215 | self.group_idx=h5f['dataset_1'][:] 216 | h5f.close() 217 | 218 | 219 | 220 | def _parse(self, metalist): 221 | files, columns = [], [] 222 | with open(metalist,'r') as f: 223 | for c,row in enumerate(csv.DictReader(f)): 224 | files.append(row['name']) 225 | columns.append(row['columns'].split(',')) 226 | return files, columns 227 | 228 | def _read(self): 229 | cache = self.cache 230 | display = self.display 231 | 232 | name = display.split('/')[-1] 233 | cname = '%s/%s_0.bin'%(cache, name) 234 | if os.path.exists(cname) == False: 235 | self._RW_display_to_bin() 236 | cname = '%s/%s.group'%(cache, name) 237 | if True: 238 | with open(cname,'r') as f: 239 | self.num_groups = int(f.readline().strip()) 240 | print ("Total number of groups", self.num_groups) 241 | for f,col in zip(self.files, self.columns): 242 | name = f.split('/')[-1] 243 | for i in range(self.num_big): 244 | cname = '%s/%s_%d.bin'%(cache,name,i) 245 | #print cname 246 | if os.path.exists(cname) == False: 247 | if len(col)self.num_big-1: 327 | idx = self.num_big-1 328 | #groups[idx].append(dc-1) 329 | if last!='': 330 | groups[last_idx].append(local_row_count + groups[last_idx][-1]) 331 | local_row_count = 0 332 | last_idx = idx 333 | last = row['display_id'] 334 | local_row_count += 1 335 | cname = '%s/%s_%d.bin'%(cache,name,idx) 336 | 337 | if os.path.exists(cname) == False: 338 | if 'clicked' in row: 339 | dids[idx].append([int(row['display_id']), int(row['clicked'])]) 340 | else: 341 | dids[idx].append(int(row['display_id'])) 342 | groups[last_idx].append(local_row_count + groups[last_idx][-1]) 343 | for i in range(self.num_big): 344 | cname = '%s/%s_%d.bin'%(cache,name,i) 345 | if os.path.exists(cname) == False: 346 | did = np.array(dids[i]) 347 | h5f=h5py.File(cname,'w') 348 | h5f.create_dataset('dataset_1', data=did) 349 | h5f.close() 350 | group = np.array(groups[i]) 351 | cname = '%s/%s_%d.group.bin'%(cache,name,i) 352 | h5f=h5py.File(cname,'w') 353 | h5f.create_dataset('dataset_1', data=group) 354 | h5f.close() 355 | 356 | print ("read", display, did.shape, i, 'done') 357 | total.append(did.shape[0]) 358 | print ("Total:", sum(total)) 359 | 360 | def _RW_meta_to_bin(self, inputname,column): 361 | display = self.display 362 | cache = self.cache 363 | 364 | shuffle = self.shuffle 365 | if not shuffle: 366 | num_groups = self.num_groups 367 | name = inputname.split('/')[-1] 368 | yps = [] 369 | for i in range(self.num_big): 370 | yps.append([]) 371 | 372 | if True: 373 | with open(display, 'r') as d: 374 | with open(inputname, 'r') as f: 375 | last = '' 376 | dc = 0 377 | dreader = csv.DictReader(d) 378 | yp = [] 379 | for c,row in enumerate(csv.DictReader(f)): 380 | drow = dreader.next() 381 | if last!=drow['display_id']: 382 | dc += 1 383 | last = drow['display_id'] 384 | if shuffle : 385 | idx = dc%self.num_big 386 | else: 387 | step = (num_groups/self.num_big) 388 | if step<1: 389 | step = 1 390 | idx = int(dc / step) 391 | if idx>self.num_big-1: 392 | idx = self.num_big-1 393 | cname = '%s/%s_%d.bin'%(cache,name,idx) 394 | if os.path.exists(cname) == False: 395 | tmp = [float(row[x]) for x in column] 396 | yps[idx].append(tmp) 397 | total = [] 398 | for i in range(self.num_big): 399 | cname = '%s/%s_%d.bin'%(cache,name,i) 400 | if os.path.exists(cname) == False: 401 | yp = np.array(yps[i]) 402 | print (inputname, i, yp.shape) 403 | h5f=h5py.File(cname,'w') 404 | h5f.create_dataset('dataset_1', data=yp) 405 | h5f.close() 406 | total.append(yp.shape[0]) 407 | print ("Total:", sum(total)) 408 | 409 | 410 | def _RW_meta_to_bin_online(self, inputname, column): 411 | display = self.display 412 | cache = self.cache 413 | 414 | shuffle = self.shuffle 415 | if not shuffle: 416 | num_groups = self.num_groups 417 | name = inputname.split('/')[-1] 418 | total = [] 419 | for i in range(self.num_big): 420 | cname = '%s/%s_%d.bin'%(cache,name,i) 421 | if os.path.exists(cname) == True: 422 | continue 423 | with open(display, 'r') as d: 424 | with open(inputname, 'r') as f: 425 | last = '' 426 | dc = 0 427 | dreader = csv.DictReader(d) 428 | yp = [] 429 | for c,row in enumerate(csv.DictReader(f)): 430 | drow = dreader.next() 431 | if last!=drow['display_id']: 432 | dc += 1 433 | last = drow['display_id'] 434 | if shuffle and dc%self.num_big != i: 435 | continue 436 | if not shuffle: 437 | step = (num_groups/self.num_big) 438 | if step<1: 439 | step = 1 440 | 441 | if dc < step*i: 442 | continue 443 | elif i < self.num_big-1: 444 | if dc >= step*(i+1): 445 | break 446 | 447 | tmp = [float(row[x]) for x in column] 448 | yp.append(tmp) 449 | yp = np.array(yp) 450 | print (inputname, i, yp.shape) 451 | h5f=h5py.File(cname,'w') 452 | h5f.create_dataset('dataset_1', data=yp) 453 | h5f.close() 454 | total.append(yp.shape[0]) 455 | print ("Total:", sum(total)) 456 | 457 | def get_num_fea( metalist): 458 | columns = [] 459 | with open(metalist,'r') as f: 460 | for c,row in enumerate(csv.DictReader(f)): 461 | columns.append(len(row['columns'].split(','))) 462 | return sum(columns) 463 | 464 | def write_sub(yp,name): 465 | s = pd.DataFrame({"clicked":yp}) 466 | s.to_csv(name,index=False) 467 | 468 | if __name__ == '__main__': 469 | ds = DataSet(display = '../../../input/clicks_test.csv', files=[], columns=[], cache='../cache', shuffle=False) 470 | -------------------------------------------------------------------------------- /nn/util/evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def apk12(batch, prediction): 4 | if len(batch) == 5: 5 | _, y, g, r, _ = batch 6 | else: 7 | _, y, g, r, _, _ = batch 8 | yp = [] 9 | result = [] 10 | if True: 11 | for i in range(len(r)-1): 12 | start, end = r[i], r[i+1] 13 | yp.append(prediction[i,0:end-start]) 14 | act = [c for c,i in enumerate(y[start:end]) if i>0] 15 | pred = {i:c for c,i in enumerate(yp[-1])} 16 | pred = [pred[i] for i in sorted(yp[-1],reverse=True)] 17 | result.append(apk(act,pred,k=12)) 18 | yp = np.concatenate(yp) 19 | assert(len(y)==len(yp)) 20 | return np.mean(result), yp 21 | 22 | 23 | 24 | def apk(actual, predicted, k=10): 25 | """ 26 | Computes the average precision at k. 27 | This function computes the average prescision at k between two lists of 28 | items. 29 | Parameters 30 | ---------- 31 | actual : list 32 | A list of elements that are to be predicted (order doesn't matter) 33 | predicted : list 34 | A list of predicted elements (order does matter) 35 | k : int, optional 36 | The maximum number of predicted elements 37 | Returns 38 | ------- 39 | score : double 40 | The average precision at k over the input lists 41 | """ 42 | if len(predicted)>k: 43 | predicted = predicted[:k] 44 | if not actual: 45 | return 0.0 46 | 47 | score = 0.0 48 | num_hits = 0.0 49 | 50 | for i,p in enumerate(predicted): 51 | if p in actual and p not in predicted[:i]: 52 | num_hits += 1.0 53 | score += num_hits / (i+1.0) 54 | 55 | 56 | return score / min(len(actual), k) 57 | 58 | -------------------------------------------------------------------------------- /nn/util/nn_util.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | def weight(name, shape, init='he', range=None): 7 | """ Initializes weight. 8 | :param name: Variable name 9 | :param shape: Tensor shape 10 | :param init: Init mode. xavier / normal / uniform / he (default is 'he') 11 | :param range: 12 | :return: Variable 13 | """ 14 | initializer = tf.constant_initializer() 15 | if init == 'xavier': 16 | fan_in, fan_out = _get_dims(shape) 17 | range = math.sqrt(6.0 / (fan_in + fan_out)) 18 | initializer = tf.random_uniform_initializer(-range, range) 19 | 20 | elif init == 'he': 21 | fan_in, _ = _get_dims(shape) 22 | std = math.sqrt(2.0 / fan_in) 23 | initializer = tf.random_normal_initializer(stddev=std) 24 | 25 | elif init == 'normal': 26 | initializer = tf.random_normal_initializer(stddev=0.1) 27 | 28 | elif init == 'uniform': 29 | if range is None: 30 | raise ValueError("range must not be None if uniform init is used.") 31 | initializer = tf.random_uniform_initializer(-range, range) 32 | 33 | var = tf.get_variable(name, shape, initializer=initializer) 34 | tf.add_to_collection('l2', tf.nn.l2_loss(var)) # Add L2 Loss 35 | return var 36 | 37 | 38 | def _get_dims(shape): 39 | fan_in = shape[0] if len(shape) == 2 else np.prod(shape[:-1]) 40 | fan_out = shape[1] if len(shape) == 2 else shape[-1] 41 | return int(fan_in), int(fan_out) 42 | 43 | 44 | def bias(name, dim, initial_value=0.0): 45 | """ Initializes bias parameter. 46 | :param name: Variable name 47 | :param dim: Tensor size (list or int) 48 | :param initial_value: Initial bias term 49 | :return: Variable 50 | """ 51 | dims = dim if isinstance(dim, list) else [dim] 52 | return tf.get_variable(name, dims, initializer=tf.constant_initializer(initial_value)) 53 | 54 | 55 | def batch_norm(x, is_training, default = False): 56 | """ Batch normalization. 57 | :param x: Tensor 58 | :param is_training: boolean tf.Variable, true indicates training phase 59 | :return: batch-normalized tensor 60 | """ 61 | with tf.variable_scope('BatchNorm'): 62 | # calculate dimensions (from tf.contrib.layers.batch_norm) 63 | inputs_shape = x.get_shape() 64 | axis = list(range(len(inputs_shape) - 1)) 65 | param_shape = inputs_shape[-1:] 66 | 67 | beta = tf.get_variable('beta', param_shape, initializer=tf.constant_initializer(0.)) 68 | gamma = tf.get_variable('gamma', param_shape, initializer=tf.constant_initializer(1.)) 69 | batch_mean, batch_var = tf.nn.moments(x, axis) 70 | ema = tf.train.ExponentialMovingAverage(decay=0.5) 71 | 72 | def mean_var_with_update(): 73 | ema_apply_op = ema.apply([batch_mean, batch_var]) 74 | with tf.control_dependencies([ema_apply_op]): 75 | return tf.identity(batch_mean), tf.identity(batch_var) 76 | 77 | if not default: 78 | mean, var = tf.cond(is_training, 79 | mean_var_with_update, 80 | lambda: (ema.average(batch_mean), ema.average(batch_var))) 81 | else: 82 | mean, var = mean_var_with_update() 83 | normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) 84 | return normed 85 | 86 | 87 | def dropout(x, keep_prob, is_training): 88 | """ Apply dropout. 89 | :param x: Tensor 90 | :param keep_prob: float, Dropout rate. 91 | :param is_training: boolean tf.Varialbe, true indicates training phase 92 | :return: dropout applied tensor 93 | """ 94 | return tf.cond(is_training, lambda: tf.nn.dropout(x, keep_prob), lambda: x) 95 | 96 | 97 | def conv(x, filter, is_training): 98 | l = tf.nn.conv2d(x, filter, strides=[1, 1, 1, 1], padding='SAME') 99 | l = batch_norm(l, is_training) 100 | return tf.nn.relu(l) 101 | 102 | 103 | def flatten(x): 104 | return tf.reshape(x, [-1]) 105 | 106 | 107 | def fully_connected(input, num_neurons, name, use_batch_norm=False, use_drop_out=False, keep_prob = 1.0, is_training = True , activation = 'relu', default_batch = 0): 108 | input_size = input.get_shape()[1] 109 | w = weight(name, [input_size, num_neurons], init='he') 110 | l = tf.matmul(input, w) 111 | if use_batch_norm: 112 | l = batch_norm(l, is_training, default = default_batch) 113 | if use_drop_out: 114 | l = dropout(l, keep_prob, is_training) 115 | if activation == 'relu': 116 | l = tf.nn.relu(l) 117 | elif activation == 'sigmoid': 118 | l = tf.nn.sigmoid(l) 119 | elif activation == 'None': 120 | pass 121 | else: 122 | raise NotImplementedError 123 | return l 124 | -------------------------------------------------------------------------------- /split.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | def split(inputname, display, outputdir,name=None): 4 | if name is None: 5 | name = inputname.split('/')[-1] 6 | 7 | ftr = [open('%s/%s.tr.%d'%(outputdir,name,i),'w') for i in range(3)] 8 | fva = [open('%s/%s.va.%d'%(outputdir,name,i),'w') for i in range(3)] 9 | 10 | 11 | with open(inputname, 'r') as f: 12 | head = f.readline() 13 | for i in range(3): 14 | ftr[i].write(head) 15 | fva[i].write(head) 16 | with open(display, 'r') as fd: 17 | for c,row in enumerate(csv.DictReader(fd)): 18 | line = f.readline() 19 | for i in range(3): 20 | if row['fold%d'%(i+1)]=='0': 21 | ftr[i].write(line) 22 | elif row['fold%d'%(i+1)]=='1': 23 | fva[i].write(line) 24 | if c%1000000 == 0: 25 | print c 26 | for i in range(3): 27 | ftr[i].close() 28 | fva[i].close() 29 | 30 | def split_display(display, outputdir): 31 | name = display.split('/')[-1] 32 | ftr = [open('%s/%s.display.tr.%d'%(outputdir,name,i),'w') for i in range(3)] 33 | fva = [open('%s/%s.display.va.%d'%(outputdir,name,i),'w') for i in range(3)] 34 | head = 'display_id,clicked' 35 | for i in range(3): 36 | ftr[i].write(head) 37 | fva[i].write(head) 38 | 39 | with open(display, 'r') as fd: 40 | for c,row in enumerate(csv.DictReader(fd)): 41 | line='%s,%s'%(row['display_id',row.get('clicked','0')]) 42 | for i in range(3): 43 | if row['fold%d'%(i+1)]=='0': 44 | ftr[i].write(line) 45 | elif row['fold%d'%(i+1)]=='1': 46 | fva[i].write(line) 47 | if c%1000000 == 0: 48 | print c 49 | for i in range(3): 50 | ftr[i].close() 51 | fva[i].close() 52 | 53 | 54 | 55 | if __name__ == '__main__': 56 | #split(inputname='../stack/data/cv_0.691775_lb_0.69167/train_meta.csv', display='data/stack_split2.csv', outputdir='cvdata') 57 | #split_display(display='data/stack_split2.csv', outputdir='cvdata') 58 | #split(inputname='../better_split/data/clicks_va.csv', display='data/stack_split2.csv', outputdir='cvdata') 59 | #split(inputname='../better_split/good/cv_0.691873_lb_0.69122/cv_0.691873/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.691873') 60 | #split(inputname='../better_split/good/cv_0.690716/cv_0.690716/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.690716') 61 | #split(inputname='../better_split/good/cv_0.690598/cv_0.690598/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.690598') 62 | #split(inputname='data/ffm2_valid_k16_eta0.050.csv', display='data/stack_split2.csv', outputdir='cvdata',name='ffm2_valid_k16_eta0.050') 63 | split(inputname='data/ftrl_va_group.csv', display='data/stack_split2.csv', outputdir='cvdata',name='ftrl_va_group') 64 | split(inputname='data/mt_cv_0.681601/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='mt_cv_0.681601') 65 | -------------------------------------------------------------------------------- /stack_nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/stack_nn.png -------------------------------------------------------------------------------- /write_base_list.py: -------------------------------------------------------------------------------- 1 | def write_list(names,columns): 2 | for i in range(3): 3 | for j in ['train','test']: 4 | fo = open('nn/data/%s%d.list.base'%(j,i),'w') 5 | fo.write('name,columns\n') 6 | tag = 'tr' if j=='train' else 'va' 7 | for name,col in zip(names,columns): 8 | fo.write('"%s.%s.%d","%s"\n'%(name,tag,i,col)) 9 | fo.close() 10 | 11 | names = [#"../cvdata/train_meta.csv", 12 | #"../cvdata/cv_0.694441", 13 | #"../cvdata/cv_0.694441_cv_leak", 14 | "../cvdata/takuya1" 15 | #"../cvdata/xgb_cv_0.691432_lb_0.69531" 16 | ] 17 | cols = [#"sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0", 18 | "clicked", 19 | ] 20 | 21 | write_list(names,cols) 22 | 23 | -------------------------------------------------------------------------------- /write_list.py: -------------------------------------------------------------------------------- 1 | def write_list(names,columns): 2 | for i in range(3): 3 | for j in ['train','test']: 4 | fo = open('nn/data/%s%d.list'%(j,i),'w') 5 | fo.write('name,columns\n') 6 | tag = 'tr' if j=='train' else 'va' 7 | for name,col in zip(names,columns): 8 | fo.write('"%s.%s.%d","%s"\n'%(name,tag,i,col)) 9 | fo.close() 10 | 11 | names = ["../cvdata/train_meta.csv", 12 | 13 | "../cvdata/cv_0.690716", 14 | "../cvdata/cv_0.690598", 15 | "../cvdata/ffm2_valid_k16_eta0.050", 16 | "../cvdata/mt_cv_0.681601", 17 | "../cvdata/ftrl_va_group", 18 | 19 | "../cvdata/2way_try1", 20 | "../cvdata/2way_try2", 21 | 22 | "../cvdata/cv_0.692248", 23 | "../cvdata/cv_0.692143", 24 | #"../cvdata/lat1", 25 | "../cvdata/cv_0.694441", 26 | "../cvdata/takuya1", 27 | #"../cvdata/takuya_features", 28 | "../cvdata/takuya2", 29 | "../cvdata/takuya3", 30 | "../cvdata/takuya4", 31 | # "../cvdata/fm_0.693821_cv_13", 32 | # "../cvdata/fm_0.693821_cv_14", 33 | # "../cvdata/fm_0.693821_cv_15", 34 | # "../cvdata/fm_0.693821_cv_16", 35 | # "../cvdata/fm_0.693821_cv_17", 36 | # "../cvdata/fm_0.693821_cv_18", 37 | # "../cvdata/fm_0.693821_cv_19", 38 | # "../cvdata/fm_0.693821_cv_20", 39 | # "../cvdata/fm_0.693821_cv_21", 40 | # "../cvdata/fm_0.693821_cv_22", 41 | # "../cvdata/fm_0.693821_cv_23", 42 | #"../cvdata/fm_0.693821_cv_24", 43 | ] 44 | cols = ["sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0", 45 | "clicked","clicked","clicked","clicked","clicked", 46 | "neighbor_ad_document_id,neighbor_ad_leak,neighbor_ad_doc_after_click,ad_id_document_id,ad_id_leak,ad_id_doc_after_click,document_idx_document_id,document_idx_leak,document_idx_doc_after_click", 47 | "ad_id_category_id,ad_id_entity_id,ad_id_source_id,ad_id_publisher_id,campaign_id_category_id,campaign_id_entity_id,campaign_id_source_id,campaign_id_publisher_id,advertiser_id_category_id,advertiser_id_entity_id,advertiser_id_source_id,advertiser_id_publisher_id", 48 | "clicked", 49 | "clicked", 50 | #"lat0", 51 | "clicked", 52 | "clicked", 53 | #"doc_dot_doc,doc_dot_doc1,doc_dot_doc_categories_topics,doc_dot_doc_categories_entities,doc_dot_doc_topics_entities,doc_dot_doc_topics_entities_entities,doc_dot_doc_source_id,doc_dot_doc_publisher_id,doc_dot_doc_topics,doc_dot_doc_categories,doc_dot_doc_entities,user_dot_doc,user_dot_doc1,user_dot_doc_categories_topics,user_dot_doc_categories_entities,user_dot_doc_topics_entities,user_dot_doc_topics_entities_entities,user_dot_doc_source_id,user_dot_doc_publisher_id,user_dot_doc_topics,user_dot_doc_categories,user_dot_doc_entities,norm_user_dot_doc,norm_user_dot_doc1,norm_user_dot_doc_categories_topics,norm_user_dot_doc_categories_entities,norm_user_dot_doc_topics_entities,norm_user_dot_doc_topics_entities_entities,norm_user_dot_doc_source_id,norm_user_dot_doc_publisher_id,norm_user_dot_doc_topics,norm_user_dot_doc_categories,norm_user_dot_doc_entities", 54 | "clicked", 55 | "clicked", 56 | "clicked", 57 | #"document_id-ad_id,document_id-document_idx,document_id-campaign_id,document_id-advertiser_id,document_id-entity_idx,document_id-source_idx,document_id-publisher_idx,document_id-category_idx,document_id-topic_idx,document_id-source_id_leak,document_id-publisher_id_leak,document_id-leak", 58 | #"platform-ad_id,platform-document_idx,platform-campaign_id,platform-advertiser_id,platform-entity_idx,platform-source_idx,platform-publisher_idx,platform-category_idx,platform-topic_idx,platform-source_id_leak,platform-publisher_id_leak,platform-leak", 59 | #"geo_location-ad_id,geo_location-document_idx,geo_location-campaign_id,geo_location-advertiser_id,geo_location-entity_idx,geo_location-source_idx,geo_location-publisher_idx,geo_location-category_idx,geo_location-topic_idx,geo_location-source_id_leak,geo_location-publisher_id_leak,geo_location-leak", 60 | #"entity_id-ad_id,entity_id-document_idx,entity_id-campaign_id,entity_id-advertiser_id,entity_id-entity_idx,entity_id-source_idx,entity_id-publisher_idx,entity_id-category_idx,entity_id-topic_idx,entity_id-source_id_leak,entity_id-publisher_id_leak,entity_id-leak", 61 | #"source_id-ad_id,source_id-document_idx,source_id-campaign_id,source_id-advertiser_id,source_id-entity_idx,source_id-source_idx,source_id-publisher_idx,source_id-category_idx,source_id-topic_idx,source_id-source_id_leak,source_id-publisher_id_leak,source_id-leak", 62 | #"publisher_id-ad_id,publisher_id-document_idx,publisher_id-campaign_id,publisher_id-advertiser_id,publisher_id-entity_idx,publisher_id-source_idx,publisher_id-publisher_idx,publisher_id-category_idx,publisher_id-topic_idx,publisher_id-source_id_leak,publisher_id-publisher_id_leak,publisher_id-leak", 63 | #"category_id-ad_id,category_id-document_idx,category_id-campaign_id,category_id-advertiser_id,category_id-entity_idx,category_id-source_idx,category_id-publisher_idx,category_id-category_idx,category_id-topic_idx,category_id-source_id_leak,category_id-publisher_id_leak,category_id-leak", 64 | #"topic_id-ad_id,topic_id-document_idx,topic_id-campaign_id,topic_id-advertiser_id,topic_id-entity_idx,topic_id-source_idx,topic_id-publisher_idx,topic_id-category_idx,topic_id-topic_idx,topic_id-source_id_leak,topic_id-publisher_id_leak,topic_id-leak", 65 | #"day-ad_id,day-document_idx,day-campaign_id,day-advertiser_id,day-entity_idx,day-source_idx,day-publisher_idx,day-category_idx,day-topic_idx,day-source_id_leak,day-publisher_id_leak,day-leak", 66 | #"hour-ad_id,hour-document_idx,hour-campaign_id,hour-advertiser_id,hour-entity_idx,hour-source_idx,hour-publisher_idx,hour-category_idx,hour-topic_idx,hour-source_id_leak,hour-publisher_id_leak,hour-leak", 67 | #"weekday-ad_id,weekday-document_idx,weekday-campaign_id,weekday-advertiser_id,weekday-entity_idx,weekday-source_idx,weekday-publisher_idx,weekday-category_idx,weekday-topic_idx,weekday-source_id_leak,weekday-publisher_id_leak,weekday-leak", 68 | #"doc_after_click-ad_id,doc_after_click-document_idx,doc_after_click-campaign_id,doc_after_click-advertiser_id,doc_after_click-entity_idx,doc_after_click-source_idx,doc_after_click-publisher_idx,doc_after_click-category_idx,doc_after_click-topic_idx,doc_after_click-source_id_leak,doc_after_click-publisher_id_leak,doc_after_click-leak", 69 | ] 70 | 71 | write_list(names,cols) 72 | 73 | -------------------------------------------------------------------------------- /write_sub_base_list.py: -------------------------------------------------------------------------------- 1 | def write_list(names,columns): 2 | if True: 3 | for c,j in enumerate(['train','test']): 4 | fo = open('nn/data/sub_%s.list.base'%(j),'w') 5 | fo.write('name,columns\n') 6 | for name,col in zip(names[c],columns): 7 | fo.write('"%s","%s"\n'%(name,col)) 8 | fo.close() 9 | 10 | names1 =[ 11 | "../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_bag1.out" 12 | #"../data/meta/cv_0.694441_cv.csv", 13 | ] 14 | names2 =[ 15 | "../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_T12_bag1.out" 16 | #"../data/meta/cv_0.694441_sub.csv", 17 | ] 18 | names = (names1,names2) 19 | cols = ['clicked'] 20 | write_list(names,cols) 21 | 22 | -------------------------------------------------------------------------------- /write_sub_list.py: -------------------------------------------------------------------------------- 1 | def write_list(names,columns): 2 | if True: 3 | for c,j in enumerate(['train','test']): 4 | fo = open('nn/data/sub_%s.list'%(j),'w') 5 | fo.write('name,columns\n') 6 | for name,col in zip(names[c],columns): 7 | fo.write('"%s","%s"\n'%(name,col)) 8 | fo.close() 9 | 10 | names1 = ["../../stack/data/cv_0.691775_lb_0.69167/train_meta.csv", 11 | 12 | "../data/meta/cv_0.690598_cv.csv", 13 | "../data/meta/cv_0.690716_cv.csv", 14 | "../data/meta/cv_0.692248_cv.csv", 15 | "../data/meta/cv_0.692143_cv.csv", 16 | "../data/meta/mt_cv_0.681601_cv.csv", 17 | "../data/meta/ftrl_va_group.csv", 18 | "../data/meta/ffm2_valid_k16_eta0.050.csv", 19 | #"../data/meta/leak_meta_cv.csv", 20 | '../data/meta/ffm-train-dataWeight4-1__406-nextView_Wleak_R0.3_K8_bag1.out', 21 | '../data/meta/ffm-train-dataWeight4-1__407-1.5-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.4_K8_bag1.out', 22 | '../data/meta/va_xgb.csv', 23 | '../data/meta/cv_0.694441_cv.csv', 24 | "../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_bag1.out", 25 | "../data/meta/full_try_1va.fm_2way.target", 26 | "../data/meta/full_try_2va.fm_2way.target", 27 | ] 28 | names2 = ["../../stack/data/cv_0.691775_lb_0.69167/test_meta.csv", 29 | 30 | "../data/meta/cv_0.690598_sub.csv", 31 | "../data/meta/cv_0.690716_sub.csv", 32 | "../data/meta/cv_0.692248_sub.csv", 33 | "../data/meta/cv_0.692143_sub.csv", 34 | "../data/meta/mt_cv_0.681601_sub.csv", 35 | "../data/meta/ftrl_test_group.csv", 36 | "../data/meta/ffm2_pred_k16_eta0.050.csv", 37 | #"../data/meta/leak_meta_sub.csv", 38 | '../data/meta/ffm-train-dataWeight4-1__406-nextView_Wleak_R0.3_K8_T12_bag1.out', 39 | '../data/meta/ffm-train-dataWeight4-1__407-1.5-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.4_K8_T23_bag1.out', 40 | '../data/meta/test_xgb.csv', 41 | '../data/meta/cv_0.694441_sub.csv', 42 | "../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_T12_bag1.out", 43 | "../data/meta/full_try_1test.fm_2way.target", 44 | "../data/meta/full_try_2test.fm_2way.target", 45 | ] 46 | 47 | names = (names1,names2) 48 | cols = ["sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0", 49 | "clicked","clicked","clicked","clicked","clicked","clicked","clicked", 50 | #"source_id_leak,publisher_id_leak", 51 | "clicked","clicked","clicked","clicked","clicked", 52 | "neighbor_ad_document_id,neighbor_ad_leak,neighbor_ad_doc_after_click,ad_id_document_id,ad_id_leak,ad_id_doc_after_click,document_idx_document_id,document_idx_leak,document_idx_doc_after_click", 53 | "ad_id_category_id,ad_id_entity_id,ad_id_source_id,ad_id_publisher_id,campaign_id_category_id,campaign_id_entity_id,campaign_id_source_id,campaign_id_publisher_id,advertiser_id_category_id,advertiser_id_entity_id,advertiser_id_source_id,advertiser_id_publisher_id", 54 | ] 55 | 56 | write_list(names,cols) 57 | 58 | --------------------------------------------------------------------------------