├── .gitignore
├── README.md
├── nn
    ├── bag.py
    ├── cv.sh
    ├── main.py
    ├── models
    │   ├── __init__.py
    │   ├── base_model.py
    │   ├── fc_net
    │   │   ├── __init__.py
    │   │   └── fc_net.py
    │   ├── lambda_net
    │   │   ├── __init__.py
    │   │   └── lambda_net.py
    │   └── pgs_net
    │   │   ├── __init__.py
    │   │   ├── pgs_bypass.py
    │   │   ├── pgs_leak.py
    │   │   ├── pgs_mix.py
    │   │   ├── pgs_net.py
    │   │   └── pgs_net_with_base.py
    ├── small.sh
    ├── small
    │   ├── clicks_tr.sample
    │   ├── clicks_va.sample
    │   ├── test.list
    │   ├── test.sample
    │   ├── train.list
    │   └── train.sample
    ├── sub.sh
    └── util
    │   ├── __init__.py
    │   ├── data_util.py
    │   ├── evaluate.py
    │   └── nn_util.py
├── split.py
├── stack_nn.png
├── write_base_list.py
├── write_list.py
├── write_sub_base_list.py
└── write_sub_list.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ![Alt text](/stack_nn.png?raw=true "Optional Title")# stack-nn-tensorflow
2 | 


--------------------------------------------------------------------------------
/nn/bag.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | for i in range(5):
 5 |     cmd = 'python main.py --train data/sub_train.list --display_train ../../better_split/data/clicks_va.csv  --fea_limit 5 --test data/sub_test.list --display_test ../../input/clicks_test.csv --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub bag/sub%d.csv --num_epochs 3 --acc_period 100  --normalize 1 --base_train data/sub_train.list.base --base_test data/sub_test.list.base --model pgs_wb'%i
 6 |     os.system(cmd)
 7 | s = []
 8 | for i in range(5):
 9 |     s.append(pd.read_csv('bag/sub%d.csv'%i))
10 | s = pd.concat(s, axis=1).values
11 | s = np.mean(s,axis=1)
12 | np.savetxt("bag/sub_ave.csv",s,header='clicked')
13 |  
14 | 


--------------------------------------------------------------------------------
/nn/cv.sh:
--------------------------------------------------------------------------------
1 | #python main.py --train data/train0.list --display_train ../cvdata/clicks_va.csv.tr.0  --fea_limit 5 --test data/test0.list --display_test ../cvdata/clicks_va.csv.va.0 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv0.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train0.list.base --base_test data/test0.list.base --model pgs_wb 
2 | #python main.py --train data/train1.list --display_train ../cvdata/clicks_va.csv.tr.1  --fea_limit 5 --test data/test1.list --display_test ../cvdata/clicks_va.csv.va.1 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv1.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train1.list.base --base_test data/test1.list.base --model pgs_wb 
3 | python main.py --train data/train2.list --display_train ../cvdata/clicks_va.csv.tr.2  --fea_limit 5 --test data/test2.list --display_test ../cvdata/clicks_va.csv.va.2 --learning_rate 0.0005 --weight_decay 0.001 --batch_size 96 --sub results/cv2.csv --num_epochs 3 --acc_period 1 --normalize 0 --base_train data/train2.list.base --base_test data/test2.list.base --model pgs_wb
4 | 


--------------------------------------------------------------------------------
/nn/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from util.data_util import DataSet,get_num_fea,write_sub
 3 | from models.pgs_net.pgs_net import PGS_NET 
 4 | 
 5 | flags = tf.app.flags
 6 | flags.DEFINE_string('save_dir', 'save', 'Save path [save]')
 7 | flags.DEFINE_integer('num_epochs', 10, 'Number of epochs for training [256]')
 8 | flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate [0.002]')
 9 | flags.DEFINE_integer('acc_period', 1, 'Accuracy display period [10]')
10 | flags.DEFINE_string('task', 'cv', 'cv or test')
11 | flags.DEFINE_string('train', 'data/train.list', 'meta list of train')
12 | flags.DEFINE_string('display_train', '', 'display of train')
13 | flags.DEFINE_string('test', 'data/test.list', 'meta list of test')
14 | flags.DEFINE_string('display_test', '', 'display of test')
15 | flags.DEFINE_string('cache', 'cache', 'cache path')
16 | flags.DEFINE_integer('fea_limit', 5, 'Max number of features, above this value will call online code in preprocessing')
17 | flags.DEFINE_integer('batch_size', 2, 'batch size')
18 | flags.DEFINE_integer('max_ads', 18, 'maximum ads per display')
19 | flags.DEFINE_integer('meta_features', 11, 'number of features')
20 | flags.DEFINE_float('weight_decay', 0.001, 'Weight decay - 0 to turn off L2 regularization [0.001]')
21 | flags.DEFINE_string('sub', 'sub.csv', 'path of submission')
22 | FLAGS = flags.FLAGS
23 | 
24 | def main(_):
25 | 
26 |     FLAGS.meta_features = get_num_fea(FLAGS.train)   
27 |     print ("number of meta features", FLAGS.meta_features)  
28 |     train = DataSet(metalist=FLAGS.train, display=FLAGS.display_train, cache=FLAGS.cache, shuffle=True, limit = FLAGS.fea_limit) 
29 |     test = DataSet(metalist=FLAGS.test, display=FLAGS.display_test, cache=FLAGS.cache, shuffle=False, limit = FLAGS.fea_limit)
30 |     #train.sanity_check()
31 |     # test's row order doesn't change!!!
32 |     with tf.Session() as sess:
33 |     	model = PGS_NET(FLAGS)
34 | 	sess.run(tf.initialize_all_variables())
35 | 	model.train(sess, train, test)
36 | 	preds = model.eval(sess, test, is_va = False)
37 | 	write_sub(preds,FLAGS.sub)
38 | if __name__ == '__main__':
39 |     tf.app.run()
40 |     
41 | 


--------------------------------------------------------------------------------
/nn/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/__init__.py


--------------------------------------------------------------------------------
/nn/models/base_model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tqdm import tqdm
  3 | from termcolor import colored
  4 | import numpy as np
  5 | from util.evaluate import apk12
  6 | 
  7 | class BaseModel(object):
  8 | 
  9 |     def __init__(self, params):
 10 | 	self.params = params
 11 | 	self.save_dir = params.save_dir
 12 | 	
 13 | 	with tf.variable_scope("Stack_NN"):
 14 | 	    #print("building stack nn...")
 15 | 	    self.global_step = tf.Variable(0, name='global_step', trainable=False)
 16 | 	    self.build()
 17 | 
 18 |     ###############################################
 19 |     # Start: virtual functions to be implemented
 20 |     ###############################################
 21 |     def build(self):
 22 | 	#pass
 23 | 	raise NotImplementedError()
 24 | 
 25 |     def get_feed_dict(self, batch, is_train):
 26 |         raise NotImplementedError()
 27 | 
 28 |     def preprocess_batch(self, batch):
 29 | 	raise NotImplementedError()
 30 |     ###############################################
 31 |     # End: virtual functions to be implemented
 32 |     ###############################################
 33 | 
 34 | 
 35 |     ###############################################
 36 |     # Start: common functions to be inherited
 37 |     ###############################################
 38 | 
 39 |     def train_batch(self, sess, batch):
 40 |         feed_dict = self.get_feed_dict(batch, is_train=True)
 41 |         return sess.run([self.opt_op, self.global_step], feed_dict=feed_dict)
 42 | 
 43 |     def test_batch(self, sess, batch):
 44 |         feed_dict = self.get_feed_dict(batch, is_train=False)
 45 |         return sess.run([self.loss,  self.global_step, self.predictions], feed_dict=feed_dict)
 46 | 
 47 |     def train(self, sess, train_data, val_data=None):
 48 | 	params = self.params
 49 |         num_epochs = params.num_epochs
 50 |         num_batches = (train_data.num_groups + self.params.batch_size -1)/self.params.batch_size
 51 | 
 52 | 	print("Training %d epochs ..." % num_epochs)
 53 | 	for epoch_no in tqdm(range(num_epochs), desc='Epoch', maxinterval=86400, ncols=100):
 54 | 	    losses = []
 55 | 	    #for i in range(num_batches):
 56 | 	    while True:
 57 | 		batch = train_data.next_batch(self.params.batch_size) # random shuffled batch
 58 | 		self.train_batch(sess, batch)
 59 | 		if batch[4]:
 60 | 		    break
 61 | 		#losses.append(loss)
 62 | 	    #train_data.reset()
 63 | 
 64 | 	    if (epoch_no + 1) % params.acc_period == 0:
 65 | 		print()  # Newline for TQDM
 66 | 		#print("[Train] step %d: Loss = %.4f" % \
 67 |               	#( global_step, np.mean(losses)))
 68 | 		if val_data:
 69 | 		    self.eval(sess, val_data, is_va = True)	
 70 | 	    train_data.reset()		    
 71 | 
 72 |     def eval(self, sess, data, is_va = False):
 73 | 	data.reset()
 74 | 	num_batches = (data.num_groups + self.params.batch_size -1)/self.params.batch_size 
 75 | 	name = 'Validation' if is_va  else 'Test'
 76 | 	apk_results = []
 77 | 	predictions = []
 78 | 	losses = []
 79 | 	#for _ in range(num_batches):
 80 | 	while True:
 81 | 	    batch = data.next_batch(self.params.batch_size) # continuous batch
 82 | 	    # batch is a tuple (X, y, dispaly_id)
 83 | 	    loss, global_step, prediction = self.test_batch(sess, batch)
 84 | 	    apk_result, ypred = apk12(batch, prediction)
 85 | 	    apk_results.append(apk_result)
 86 | 	    predictions.append(ypred)
 87 | 	    losses.append(loss)
 88 | 	    if batch[4]:
 89 | 		break
 90 | 	print(colored("[%s] step %d: APK-12 = %.4f, Loss = %.4f"%(name, global_step, np.mean(apk_results), np.mean(losses)), 'green'))
 91 | 	#data.reset()
 92 | 	return np.concatenate(predictions) # row order is the same as input test
 93 | 
 94 |     def save(self, sess):
 95 |         print("Saving model to %s" % self.save_dir)
 96 |         self.saver.save(sess, self.save_dir, self.global_step)
 97 | 
 98 |     def load(self, sess):
 99 |         print("Loading model ...")
100 |         checkpoint = tf.train.get_checkpoint_state(self.save_dir)
101 |         if checkpoint is None:
102 |             print("Error: No saved model found. Please train first.")
103 |             sys.exit(0)
104 |         self.saver.restore(sess, checkpoint.model_checkpoint_path)
105 | 
106 | 
107 |     ###############################################
108 |     # End: common functions to be inherited
109 |     ###############################################
110 | 


--------------------------------------------------------------------------------
/nn/models/fc_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/fc_net/__init__.py


--------------------------------------------------------------------------------
/nn/models/fc_net/fc_net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class FC_NET(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self): 
  9 | 	params = self.params
 10 | 	N = params.batch_size     # number of groups/display_ids per batch
 11 | 	A = params.max_ads        # maximum number of Ads per display_id
 12 | 	F = params.meta_features  # number of meta features per Ad
 13 | 	
 14 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 15 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 16 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 17 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 18 | 	is_training = tf.placeholder(tf.bool)
 19 | 
 20 | 	if self.params.softmax_transform:
 21 | 	    print("softmax_transform")
 22 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 23 |  	    Xtmp = tf.exp(Xtmp)
 24 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 25 | 	    Xtmp = Xtmp/stmp	
 26 | 	else:
 27 | 	    Xtmp = X
 28 | 
 29 | 	with tf.name_scope("Fully-connected"):
 30 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 31 | 	    with tf.variable_scope("Layer1"):
 32 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid')	
 33 | 	    with tf.variable_scope("Layer2"):
 34 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid')
 35 | 	    with tf.variable_scope("Layer3"):
 36 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None')
 37 | 
 38 | 	# ytmp is [N*A, 1] now
 39 | 	
 40 | 	yp = tf.reshape(ytmp,[N, A]) * Xmask # masking the padding Ads
 41 | 	# yp is [N, A] now
 42 | 	
 43 | 	with tf.name_scope('Loss'):
 44 |             # Cross-Entropy loss
 45 | 	    #yp = ytmp * tf.reshape(Xmask,[N*A, 1])
 46 | 	    #y = tf.reshape(y,)
 47 | 	    #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 48 | 	    yp = tf.maximum(yp,1e-5)
 49 | 	    yp = tf.minimum(yp,1-1e-5)
 50 | 	    cross_entropy = -(y*tf.log(yp)+(1-y)*tf.log(1-yp))
 51 |             loss = tf.reduce_mean(cross_entropy)
 52 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 53 | 
 54 | 	with tf.name_scope('Predict'):
 55 | 	    pred = yp #tf.nn.softmax(yp)    
 56 | 
 57 | 	optimizer = tf.train.AdamOptimizer(params.learning_rate)
 58 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 59 | 
 60 | 	
 61 | 	self.predictions = pred 	
 62 | 	self.loss = cross_entropy
 63 | 	self.total_loss = total_loss
 64 | 	self.opt_op = opt_op
 65 | 
 66 | 	self.x = X
 67 | 	self.y = y 
 68 | 	self.xmask = Xmask
 69 | 	self.is_train = is_training
 70 | 
 71 |     def preprocess_batch(self, batch): 
 72 | 	# batch = (x, y, g, r)
 73 | 	params = self.params
 74 |         N = params.batch_size     # number of groups/display_ids per batch
 75 |         A = params.max_ads        # maximum number of Ads per display_id
 76 |         F = params.meta_features  # number of meta features per Ad
 77 | 	#print N,A,F
 78 | 	x, y, g, r, _ = batch
 79 | 	#print "batch.r", r	
 80 | 	X = np.zeros([N,A,F])
 81 | 	#print "batch.x", x.shape, "X", X.shape
 82 | 
 83 | 	Y = np.zeros([N,A])
 84 | 	Xmask = np.zeros([N,A])
 85 | 	for i in range(N):
 86 | 	    if i+1 >= len(r):
 87 | 		break
 88 | 	    start, end = r[i], r[i+1]
 89 | 	    #rtmp = range(start,end)
 90 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
 91 | 	    X[i,0:end-start,:] = x[start:end,:]
 92 | 	    Y[i,0:end-start] = y[start:end]
 93 | 	    Xmask[i,0:end-start] = 1
 94 | 	    #print rtmp, X.shape, Y.shape, y.shape 
 95 | 	    #X[i,rtmp,:] = x[rtmp,:]
 96 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
 97 | 	    #Xmask[i,rtmp,:] = 0
 98 | 
 99 | 	return X, Y, Xmask
100 | 
101 |     def get_feed_dict(self, batch, is_train):
102 |         X, Y, Xmask = self.preprocess_batch(batch)
103 |         return {
104 |             self.x: X,
105 |             self.xmask: Xmask,
106 |             self.y: Y,
107 |             self.is_train: is_train
108 |         }
109 | 
110 | 


--------------------------------------------------------------------------------
/nn/models/lambda_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/lambda_net/__init__.py


--------------------------------------------------------------------------------
/nn/models/lambda_net/lambda_net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class LAMBDA_NET(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build LAMBDA_NET" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 	
 15 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 16 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 17 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 18 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 19 | 	is_training = tf.placeholder(dtype=tf.bool)
 20 | 
 21 | 	if self.params.softmax_transform:
 22 | 	    print("softmax_transform")
 23 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 24 |  	    Xtmp = tf.exp(Xtmp)
 25 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 26 | 	    Xtmp = Xtmp/stmp	
 27 | 	else:
 28 | 	    Xtmp = X
 29 | 
 30 | 	with tf.name_scope("Fully-connected"):
 31 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 32 | 	    with tf.variable_scope("Layer1"):
 33 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch)	
 34 | 	    with tf.variable_scope("Layer2"):
 35 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch)
 36 | 	    with tf.variable_scope("Layer3"):
 37 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 38 | 
 39 | 	# ytmp is [N*A, 1] now
 40 | 
 41 | 	yp = tf.reshape(ytmp,[N, A])# + Xmask # masking the padding Ads
 42 | 	# yp is [N, A] now
 43 | 	ypos = tf.matmul(tf.reduce_sum(yp*y,1,keep_dims=True),tf.constant(1,type="float32",shape=[1,N]))
 44 | 	with tf.name_scope('Loss'):
 45 |             # Cross-Entropy loss
 46 | 	    #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 47 |             loss = tf.reduce_mean(cross_entropy)
 48 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 49 | 
 50 | 	with tf.name_scope('Predict'):
 51 | 	    pred = tf.nn.softmax(yp)    
 52 | 
 53 | 	if self.params.opt == 'adam':
 54 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 55 | 	elif self.params.opt == 'sgd':
 56 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 57 | 	elif self.params.opt == 'ada':
 58 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 59 | 	elif self.params.opt == 'rmsprop':
 60 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 61 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 62 | 
 63 | 	
 64 | 	self.predictions = pred 	
 65 | 	self.loss = cross_entropy
 66 | 	self.total_loss = total_loss
 67 | 	self.opt_op = opt_op
 68 | 
 69 | 	self.x = X
 70 | 	self.y = y 
 71 | 	self.xmask = Xmask
 72 | 	self.is_train = is_training
 73 | 
 74 |     def preprocess_batch(self, batch): 
 75 | 	# batch = (x, y, g, r)
 76 | 	params = self.params
 77 |         N = params.batch_size     # number of groups/display_ids per batch
 78 |         A = params.max_ads        # maximum number of Ads per display_id
 79 |         F = params.meta_features  # number of meta features per Ad
 80 | 	#print N,A,F
 81 | 	x, y, g, r, _ = batch
 82 | 	#print "batch.r", r	
 83 | 	X = np.zeros([N,A,F])
 84 | 	#print "batch.x", x.shape, "X", X.shape
 85 | 
 86 | 	Y = np.zeros([N,A])
 87 | 	Xmask = np.ones([N,A])*(-1e10)
 88 | 	for i in range(N):
 89 | 	    if i+1 >= len(r):
 90 | 		break
 91 | 	    start, end = r[i], r[i+1]
 92 | 	    #rtmp = range(start,end)
 93 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
 94 | 	    X[i,0:end-start,:] = x[start:end,:]
 95 | 	    Y[i,0:end-start] = y[start:end]
 96 | 	    Xmask[i,0:end-start] = 0
 97 | 	    #print rtmp, X.shape, Y.shape, y.shape 
 98 | 	    #X[i,rtmp,:] = x[rtmp,:]
 99 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
100 | 	    #Xmask[i,rtmp,:] = 0
101 | 
102 | 	return X, Y, Xmask
103 | 
104 |     def get_feed_dict(self, batch, is_train):
105 |         X, Y, Xmask = self.preprocess_batch(batch)
106 |         return {
107 |             self.x: X,
108 |             self.xmask: Xmask,
109 |             self.y: Y,
110 |             self.is_train: is_train
111 |         }
112 | 
113 | 


--------------------------------------------------------------------------------
/nn/models/pgs_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/models/pgs_net/__init__.py


--------------------------------------------------------------------------------
/nn/models/pgs_net/pgs_bypass.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class PGS_BYPASS_NET(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build PGS_BYPASS_NET" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 	
 15 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 16 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 17 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 18 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 19 | 	is_training = tf.placeholder(dtype=tf.bool)
 20 | 
 21 | 	if self.params.softmax_transform:
 22 | 	    print("softmax_transform")
 23 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 24 |  	    Xtmp = tf.exp(Xtmp)
 25 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 26 | 	    Xtmp = Xtmp/stmp	
 27 | 	else:
 28 | 	    Xtmp = X
 29 | 
 30 | 	with tf.name_scope("Fully-connected"):
 31 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 32 | 	    with tf.variable_scope("Layer1"):
 33 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch)	
 34 | 	    with tf.variable_scope("Layer2"):
 35 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch)
 36 | 	    with tf.variable_scope("Layer3"):
 37 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 38 | 
 39 | 	ytmp = ytmp + fully_connected(Xtmp, num_neurons=1, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch)
 40 | 
 41 | 	# ytmp is [N*A, 1] now
 42 | 
 43 | 	yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads
 44 | 	# yp is [N, A] now
 45 | 	
 46 | 	with tf.name_scope('Loss'):
 47 |             # Cross-Entropy loss
 48 | 	    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 49 |             loss = tf.reduce_mean(cross_entropy)
 50 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 51 | 
 52 | 	with tf.name_scope('Predict'):
 53 | 	    pred = tf.nn.softmax(yp)    
 54 | 
 55 | 	if self.params.opt == 'adam':
 56 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 57 | 	elif self.params.opt == 'sgd':
 58 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 59 | 	elif self.params.opt == 'ada':
 60 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 61 | 	elif self.params.opt == 'rmsprop':
 62 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 63 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 64 | 
 65 | 	
 66 | 	self.predictions = pred 	
 67 | 	self.loss = cross_entropy
 68 | 	self.total_loss = total_loss
 69 | 	self.opt_op = opt_op
 70 | 
 71 | 	self.x = X
 72 | 	self.y = y 
 73 | 	self.xmask = Xmask
 74 | 	self.is_train = is_training
 75 | 
 76 |     def preprocess_batch(self, batch): 
 77 | 	# batch = (x, y, g, r)
 78 | 	params = self.params
 79 |         N = params.batch_size     # number of groups/display_ids per batch
 80 |         A = params.max_ads        # maximum number of Ads per display_id
 81 |         F = params.meta_features  # number of meta features per Ad
 82 | 	#print N,A,F
 83 | 	x, y, g, r, _ = batch
 84 | 	#print "batch.r", r	
 85 | 	X = np.zeros([N,A,F])
 86 | 	#print "batch.x", x.shape, "X", X.shape
 87 | 
 88 | 	Y = np.zeros([N,A])
 89 | 	Xmask = np.ones([N,A])*(-1e10)
 90 | 	for i in range(N):
 91 | 	    if i+1 >= len(r):
 92 | 		break
 93 | 	    start, end = r[i], r[i+1]
 94 | 	    #rtmp = range(start,end)
 95 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
 96 | 	    X[i,0:end-start,:] = x[start:end,:]
 97 | 	    Y[i,0:end-start] = y[start:end]
 98 | 	    Xmask[i,0:end-start] = 0
 99 | 	    #print rtmp, X.shape, Y.shape, y.shape 
100 | 	    #X[i,rtmp,:] = x[rtmp,:]
101 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
102 | 	    #Xmask[i,rtmp,:] = 0
103 | 
104 | 	return X, Y, Xmask
105 | 
106 |     def get_feed_dict(self, batch, is_train):
107 |         X, Y, Xmask = self.preprocess_batch(batch)
108 |         return {
109 |             self.x: X,
110 |             self.xmask: Xmask,
111 |             self.y: Y,
112 |             self.is_train: is_train
113 |         }
114 | 
115 | 


--------------------------------------------------------------------------------
/nn/models/pgs_net/pgs_leak.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class PGS_LEAK(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build PGS_LEAK" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 
 15 | 	yb = tf.placeholder('float32', shape=[N, A], name='yb') 	
 16 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 17 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 18 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 19 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 20 | 	is_training = tf.placeholder(dtype=tf.bool)
 21 | 
 22 | 	if self.params.softmax_transform:
 23 | 	    print("softmax_transform")
 24 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 25 |  	    Xtmp = tf.exp(Xtmp)
 26 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 27 | 	    Xtmp = Xtmp/stmp	
 28 | 	else:
 29 | 	    Xtmp = X
 30 | 
 31 | 	with tf.name_scope("Fully-connected"):
 32 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 33 | 	    with tf.variable_scope("Layer1"):
 34 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch)	
 35 | 	    with tf.variable_scope("Layer2"):
 36 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch)
 37 | 	    #with tf.variable_scope("Layer4"):
 38 |             #    ytmp = fully_connected(ytmp, num_neurons=10, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch)
 39 | 
 40 | 	    with tf.variable_scope("Layer3"):
 41 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 42 | 
 43 | 	# ytmp is [N*A, 1] now
 44 | 
 45 | 	yp = tf.reshape(ytmp,[N, A])+yb*10 + Xmask #+ yb# masking the padding Ads
 46 | 	# yp is [N, A] now
 47 | 	
 48 | 	with tf.name_scope('Loss'):
 49 |             # Cross-Entropy loss
 50 | 	    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 51 |             loss = tf.reduce_mean(cross_entropy)
 52 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 53 | 
 54 | 	with tf.name_scope('Predict'):
 55 | 	    pred = tf.nn.softmax(yp)    
 56 | 
 57 | 	if self.params.opt == 'adam':
 58 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 59 | 	elif self.params.opt == 'sgd':
 60 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 61 | 	elif self.params.opt == 'ada':
 62 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 63 | 	elif self.params.opt == 'rmsprop':
 64 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 65 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 66 | 
 67 | 	
 68 | 	self.predictions = pred 	
 69 | 	self.loss = cross_entropy
 70 | 	self.total_loss = total_loss
 71 | 	self.opt_op = opt_op
 72 | 
 73 | 	self.yb = yb
 74 | 	self.x = X
 75 | 	self.y = y 
 76 | 	self.xmask = Xmask
 77 | 	self.is_train = is_training
 78 | 
 79 |     def preprocess_batch(self, batch): 
 80 | 	# batch = (x, y, g, r)
 81 | 	params = self.params
 82 |         N = params.batch_size     # number of groups/display_ids per batch
 83 |         A = params.max_ads        # maximum number of Ads per display_id
 84 |         F = params.meta_features  # number of meta features per Ad
 85 | 	#print N,A,F
 86 | 	x, y, g, r, _, b = batch
 87 | 	#print "batch.r", r	
 88 | 	X = np.zeros([N,A,F])
 89 | 	#print "batch.x", x.shape, "X", X.shape
 90 | 
 91 | 	Y = np.zeros([N,A])
 92 | 	Xb = np.zeros([N,A])
 93 | 	Xmask = np.ones([N,A])*(-1e10)
 94 | 	for i in range(N):
 95 | 	    if i+1 >= len(r):
 96 | 		break
 97 | 	    start, end = r[i], r[i+1]
 98 | 	    #rtmp = range(start,end)
 99 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
100 | 	    X[i,0:end-start,:] = x[start:end,:]
101 | 	    Y[i,0:end-start] = y[start:end]
102 | 	    Xb[i,0:end-start] = b[start:end,0]
103 | 	    Xmask[i,0:end-start] = 0
104 | 	    #print rtmp, X.shape, Y.shape, y.shape 
105 | 	    #X[i,rtmp,:] = x[rtmp,:]
106 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
107 | 	    #Xmask[i,rtmp,:] = 0
108 | 
109 | 	return X, Y, Xmask,Xb
110 | 
111 |     def get_feed_dict(self, batch, is_train):
112 |         X, Y, Xmask, Xb = self.preprocess_batch(batch)
113 |         return {
114 |             self.x: X,
115 |             self.xmask: Xmask,
116 |             self.y: Y,
117 |             self.is_train: is_train,
118 | 	    self.yb: Xb
119 |         }
120 | 
121 | 


--------------------------------------------------------------------------------
/nn/models/pgs_net/pgs_mix.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class PGS_MIX(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build PGS_MIX_NET" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 	
 15 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 16 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 17 | 	Xmask2 = tf.placeholder('float32', shape=[N, A], name='xmask2')
 18 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 19 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 20 | 	is_training = tf.placeholder(dtype=tf.bool)
 21 | 
 22 | 	if self.params.softmax_transform:
 23 | 	    print("softmax_transform")
 24 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 25 |  	    Xtmp = tf.exp(Xtmp)
 26 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 27 | 	    Xtmp = Xtmp/stmp	
 28 | 	else:
 29 | 	    Xtmp = X
 30 | 
 31 | 	with tf.name_scope("Fully-connected"):
 32 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 33 | 	    with tf.variable_scope("Layer1"):
 34 | 	       	ytmp = fully_connected(Xtmp, num_neurons=100, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch)	
 35 | 	    with tf.variable_scope("Layer2"):
 36 | 	    	ytmp = fully_connected(ytmp, num_neurons=50, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch)
 37 | 	    with tf.variable_scope("Layer4"):
 38 |                 ytmp = fully_connected(ytmp, num_neurons=25, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch)
 39 | 	    with tf.variable_scope("Layer3"):
 40 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 41 | 
 42 | 	# ytmp is [N*A, 1] now
 43 | 
 44 | 	yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads
 45 | 	# yp is [N, A] now
 46 | 
 47 | 	with tf.name_scope('LogLoss'):
 48 | 	    ypx = tf.reshape(ytmp,[N, A]) * Xmask2
 49 |             ypx = tf.maximum(ypx,1e-5)
 50 |             ypx = tf.minimum(ypx,1-1e-5)
 51 |             lossx = -(y*tf.log(ypx)+(1-y)*tf.log(1-ypx))
 52 |             lossx = tf.reduce_mean(lossx)	
 53 | 	
 54 | 	with tf.name_scope('Loss'):
 55 |             # Cross-Entropy loss
 56 | 	    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 57 |             loss = tf.reduce_mean(cross_entropy)
 58 |             total_loss = loss + params.logweight*lossx + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 59 | 
 60 | 	with tf.name_scope('Predict'):
 61 | 	    pred = tf.nn.softmax(yp)    
 62 | 
 63 | 	if self.params.opt == 'adam':
 64 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 65 | 	elif self.params.opt == 'sgd':
 66 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 67 | 	elif self.params.opt == 'ada':
 68 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 69 | 	elif self.params.opt == 'rmsprop':
 70 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 71 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 72 | 
 73 | 	
 74 | 	self.predictions = pred 	
 75 | 	self.loss = cross_entropy
 76 | 	self.total_loss = total_loss
 77 | 	self.opt_op = opt_op
 78 | 
 79 | 	self.x = X
 80 | 	self.y = y 
 81 | 	self.xmask = Xmask
 82 | 	self.xmask2 = Xmask2
 83 | 	self.is_train = is_training
 84 | 
 85 |     def preprocess_batch(self, batch): 
 86 | 	# batch = (x, y, g, r)
 87 | 	params = self.params
 88 |         N = params.batch_size     # number of groups/display_ids per batch
 89 |         A = params.max_ads        # maximum number of Ads per display_id
 90 |         F = params.meta_features  # number of meta features per Ad
 91 | 	#print N,A,F
 92 | 	x, y, g, r, _ = batch
 93 | 	#print "batch.r", r	
 94 | 	X = np.zeros([N,A,F])
 95 | 	#print "batch.x", x.shape, "X", X.shape
 96 | 
 97 | 	Y = np.zeros([N,A])
 98 | 	Xmask = np.ones([N,A])*(-1e10)
 99 | 	Xmask2 = np.zeros([N,A])
100 | 	for i in range(N):
101 | 	    if i+1 >= len(r):
102 | 		break
103 | 	    start, end = r[i], r[i+1]
104 | 	    #rtmp = range(start,end)
105 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
106 | 	    X[i,0:end-start,:] = x[start:end,:]
107 | 	    Y[i,0:end-start] = y[start:end]
108 | 	    Xmask[i,0:end-start] = 0
109 | 	    Xmask2[i,0:end-start] = 0
110 | 	    #print rtmp, X.shape, Y.shape, y.shape 
111 | 	    #X[i,rtmp,:] = x[rtmp,:]
112 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
113 | 	    #Xmask[i,rtmp,:] = 0
114 | 
115 | 	return X, Y, Xmask, Xmask2
116 | 
117 |     def get_feed_dict(self, batch, is_train):
118 |         X, Y, Xmask, Xmask2 = self.preprocess_batch(batch)
119 |         return {
120 |             self.x: X,
121 |             self.xmask: Xmask,
122 | 	    self.xmask2: Xmask2,
123 |             self.y: Y,
124 |             self.is_train: is_train
125 |         }
126 | 
127 | 


--------------------------------------------------------------------------------
/nn/models/pgs_net/pgs_net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class PGS_NET(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build PGS_NET" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 	
 15 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 16 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 17 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 18 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 19 | 	is_training = tf.placeholder(dtype=tf.bool)
 20 | 
 21 | 	if self.params.softmax_transform:
 22 | 	    print("softmax_transform")
 23 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 24 |  	    Xtmp = tf.exp(Xtmp)
 25 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 26 | 	    Xtmp = Xtmp/stmp	
 27 | 	else:
 28 | 	    Xtmp = X
 29 | 
 30 | 	with tf.name_scope("Fully-connected"):
 31 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 32 | 	    with tf.variable_scope("Layer1"):
 33 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'sigmoid', default_batch = params.default_batch)	
 34 | 	    with tf.variable_scope("Layer2"):
 35 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'sigmoid', default_batch = params.default_batch)
 36 | 	    with tf.variable_scope("Layer3"):
 37 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 38 | 
 39 | 	# ytmp is [N*A, 1] now
 40 | 
 41 | 	yp = tf.reshape(ytmp,[N, A]) + Xmask # masking the padding Ads
 42 | 	# yp is [N, A] now
 43 | 	
 44 | 	with tf.name_scope('Loss'):
 45 |             # Cross-Entropy loss
 46 | 	    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 47 |             loss = tf.reduce_mean(cross_entropy)
 48 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 49 | 
 50 | 	with tf.name_scope('Predict'):
 51 | 	    pred = tf.nn.softmax(yp)    
 52 | 
 53 | 	if self.params.opt == 'adam':
 54 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 55 | 	elif self.params.opt == 'sgd':
 56 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 57 | 	elif self.params.opt == 'ada':
 58 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 59 | 	elif self.params.opt == 'rmsprop':
 60 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 61 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 62 | 
 63 | 	
 64 | 	self.predictions = pred 	
 65 | 	self.loss = cross_entropy
 66 | 	self.total_loss = total_loss
 67 | 	self.opt_op = opt_op
 68 | 
 69 | 	self.x = X
 70 | 	self.y = y 
 71 | 	self.xmask = Xmask
 72 | 	self.is_train = is_training
 73 | 
 74 |     def preprocess_batch(self, batch): 
 75 | 	# batch = (x, y, g, r)
 76 | 	params = self.params
 77 |         N = params.batch_size     # number of groups/display_ids per batch
 78 |         A = params.max_ads        # maximum number of Ads per display_id
 79 |         F = params.meta_features  # number of meta features per Ad
 80 | 	#print N,A,F
 81 | 	x, y, g, r, _ = batch
 82 | 	#print "batch.r", r	
 83 | 	X = np.zeros([N,A,F])
 84 | 	#print "batch.x", x.shape, "X", X.shape
 85 | 
 86 | 	Y = np.zeros([N,A])
 87 | 	Xmask = np.ones([N,A])*(-1e10)
 88 | 	for i in range(N):
 89 | 	    if i+1 >= len(r):
 90 | 		break
 91 | 	    start, end = r[i], r[i+1]
 92 | 	    #rtmp = range(start,end)
 93 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
 94 | 	    X[i,0:end-start,:] = x[start:end,:]
 95 | 	    Y[i,0:end-start] = y[start:end]
 96 | 	    Xmask[i,0:end-start] = 0
 97 | 	    #print rtmp, X.shape, Y.shape, y.shape 
 98 | 	    #X[i,rtmp,:] = x[rtmp,:]
 99 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
100 | 	    #Xmask[i,rtmp,:] = 0
101 | 
102 | 	return X, Y, Xmask
103 | 
104 |     def get_feed_dict(self, batch, is_train):
105 |         X, Y, Xmask = self.preprocess_batch(batch)
106 |         return {
107 |             self.x: X,
108 |             self.xmask: Xmask,
109 |             self.y: Y,
110 |             self.is_train: is_train
111 |         }
112 | 
113 | 


--------------------------------------------------------------------------------
/nn/models/pgs_net/pgs_net_with_base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from util.nn_util import fully_connected
  4 | from models.base_model import BaseModel
  5 | 
  6 | class PGS_NET_WB(BaseModel):
  7 |     # Per-group-softmax net
  8 |     def build(self):
  9 | 	print "build PGS_NET_WB" 
 10 | 	params = self.params
 11 | 	N = params.batch_size     # number of groups/display_ids per batch
 12 | 	A = params.max_ads        # maximum number of Ads per display_id
 13 | 	F = params.meta_features  # number of meta features per Ad
 14 | 
 15 | 	yb = tf.placeholder('float32', shape=[N, A], name='yb') 	
 16 | 	X = tf.placeholder('float32', shape=[N, A, F], name='x') # zero padding
 17 | 	Xmask = tf.placeholder('float32', shape=[N, A], name='xmask') # in {-e10, 1}, 1 for real Ads and -e10 for padding Ads
 18 | 	#Xads = tf.placeholder('float32', shape=[N], name='xads') # number of Ads per display_id
 19 | 	y = tf.placeholder('float32', shape=[N, A], name='y')  # y in {0, 1} with zero padding 
 20 | 	is_training = tf.placeholder(dtype=tf.bool)
 21 | 
 22 | 	if self.params.softmax_transform:
 23 | 	    print("softmax_transform")
 24 | 	    Xtmp = X + tf.reshape(Xmask, [N, A, 1])
 25 |  	    Xtmp = tf.exp(Xtmp)
 26 | 	    stmp = tf.reduce_sum(Xtmp, 1, keep_dims=True)+1e-5
 27 | 	    Xtmp = Xtmp/stmp	
 28 | 	else:
 29 | 	    Xtmp = X
 30 | 
 31 | 	with tf.name_scope("Fully-connected"):
 32 | 	    Xtmp = tf.reshape(Xtmp, [N*A, F])
 33 | 	    with tf.variable_scope("Layer1"):
 34 | 	       	ytmp = fully_connected(Xtmp, num_neurons=50, name='W1', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 0.7, activation = 'relu', default_batch = params.default_batch)	
 35 | 	    with tf.variable_scope("Layer2"):
 36 | 	    	ytmp = fully_connected(ytmp, num_neurons=25, name='W2', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch)
 37 | 	    #with tf.variable_scope("Layer4"):
 38 |             #    ytmp = fully_connected(ytmp, num_neurons=10, name='W4', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1.0, activation = 'relu', default_batch = params.default_batch)
 39 | 
 40 | 	    with tf.variable_scope("Layer3"):
 41 | 	    	ytmp = fully_connected(ytmp, num_neurons=1, name='W3', is_training = is_training, use_batch_norm=True, use_drop_out=False, keep_prob = 1, activation = 'None', default_batch = params.default_batch)
 42 | 
 43 | 	# ytmp is [N*A, 1] now
 44 | 
 45 | 	yp = tf.reshape(ytmp,[N, A])*yb + Xmask #+ yb# masking the padding Ads
 46 | 	# yp is [N, A] now
 47 | 	
 48 | 	with tf.name_scope('Loss'):
 49 |             # Cross-Entropy loss
 50 | 	    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(yp, y)
 51 |             loss = tf.reduce_mean(cross_entropy)
 52 |             total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))
 53 | 
 54 | 	with tf.name_scope('Predict'):
 55 | 	    pred = tf.nn.softmax(yp)    
 56 | 
 57 | 	if self.params.opt == 'adam':
 58 | 	    optimizer = tf.train.AdamOptimizer(params.learning_rate)
 59 | 	elif self.params.opt == 'sgd':
 60 | 	    optimizer = tf.train.GradientDescentOptimizer(params.learning_rate)
 61 | 	elif self.params.opt == 'ada':
 62 | 	    optimizer = tf.train.AdagradOptimizer(params.learning_rate)
 63 | 	elif self.params.opt == 'rmsprop':
 64 | 	    optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
 65 |         opt_op = optimizer.minimize(total_loss, global_step=self.global_step)
 66 | 
 67 | 	
 68 | 	self.predictions = pred 	
 69 | 	self.loss = cross_entropy
 70 | 	self.total_loss = total_loss
 71 | 	self.opt_op = opt_op
 72 | 
 73 | 	self.yb = yb
 74 | 	self.x = X
 75 | 	self.y = y 
 76 | 	self.xmask = Xmask
 77 | 	self.is_train = is_training
 78 | 
 79 |     def preprocess_batch(self, batch): 
 80 | 	# batch = (x, y, g, r)
 81 | 	params = self.params
 82 |         N = params.batch_size     # number of groups/display_ids per batch
 83 |         A = params.max_ads        # maximum number of Ads per display_id
 84 |         F = params.meta_features  # number of meta features per Ad
 85 | 	#print N,A,F
 86 | 	x, y, g, r, _, b = batch
 87 | 	#print "batch.r", r	
 88 | 	X = np.zeros([N,A,F])
 89 | 	#print "batch.x", x.shape, "X", X.shape
 90 | 
 91 | 	Y = np.zeros([N,A])
 92 | 	Xb = np.zeros([N,A])
 93 | 	Xmask = np.ones([N,A])*(-1e10)
 94 | 	for i in range(N):
 95 | 	    if i+1 >= len(r):
 96 | 		break
 97 | 	    start, end = r[i], r[i+1]
 98 | 	    #rtmp = range(start,end)
 99 | 	    #print i, start, end, X[i,start:end,:].shape, x[start:end,:].shape, X.shape, x.shape, y.shape
100 | 	    X[i,0:end-start,:] = x[start:end,:]
101 | 	    Y[i,0:end-start] = y[start:end]
102 | 	    Xb[i,0:end-start] = b[start:end,0]
103 | 	    Xmask[i,0:end-start] = 0
104 | 	    #print rtmp, X.shape, Y.shape, y.shape 
105 | 	    #X[i,rtmp,:] = x[rtmp,:]
106 | 	    #Y[i,rtmp,:] = y[np.array(rtmp)]
107 | 	    #Xmask[i,rtmp,:] = 0
108 | 
109 | 	return X, Y, Xmask,Xb
110 | 
111 |     def get_feed_dict(self, batch, is_train):
112 |         X, Y, Xmask, Xb = self.preprocess_batch(batch)
113 |         return {
114 |             self.x: X,
115 |             self.xmask: Xmask,
116 |             self.y: Y,
117 |             self.is_train: is_train,
118 | 	    self.yb: Xb
119 |         }
120 | 
121 | 


--------------------------------------------------------------------------------
/nn/small.sh:
--------------------------------------------------------------------------------
1 | python main.py --train small/train.list --display_train small/clicks_tr.sample --fea_limit 5 --test small/test.list --display_test small/clicks_va.sample --learning_rate 0.005 
2 | 


--------------------------------------------------------------------------------
/nn/small/clicks_tr.sample:
--------------------------------------------------------------------------------
  1 | display_id,ad_id,clicked
  2 | 3,71547,0
  3 | 3,95814,0
  4 | 3,152141,0
  5 | 3,183846,0
  6 | 3,228657,1
  7 | 3,250082,0
  8 | 13,121703,0
  9 | 13,143294,0
 10 | 13,149541,1
 11 | 16,104208,0
 12 | 16,172888,0
 13 | 16,235104,0
 14 | 16,273567,1
 15 | 26,152193,1
 16 | 26,285992,0
 17 | 32,111697,0
 18 | 32,185710,0
 19 | 32,188665,0
 20 | 32,333874,1
 21 | 34,14096,1
 22 | 34,23522,0
 23 | 34,101761,0
 24 | 34,151209,0
 25 | 34,162129,0
 26 | 34,220315,0
 27 | 34,228566,0
 28 | 34,260399,0
 29 | 42,29046,1
 30 | 42,276172,0
 31 | 3,71547,0
 32 | 3,95814,0
 33 | 3,152141,0
 34 | 3,183846,0
 35 | 3,228657,1
 36 | 3,250082,0
 37 | 13,121703,0
 38 | 13,143294,0
 39 | 13,149541,1
 40 | 16,104208,0
 41 | 16,172888,0
 42 | 16,235104,0
 43 | 16,273567,1
 44 | 26,152193,1
 45 | 26,285992,0
 46 | 32,111697,0
 47 | 32,185710,0
 48 | 32,188665,0
 49 | 32,333874,1
 50 | 34,14096,1
 51 | 34,23522,0
 52 | 34,101761,0
 53 | 34,151209,0
 54 | 34,162129,0
 55 | 34,220315,0
 56 | 34,228566,0
 57 | 34,260399,0
 58 | 42,29046,1
 59 | 42,276172,0
 60 | 3,71547,0
 61 | 3,95814,0
 62 | 3,152141,0
 63 | 3,183846,0
 64 | 3,228657,1
 65 | 3,250082,0
 66 | 13,121703,0
 67 | 13,143294,0
 68 | 13,149541,1
 69 | 16,104208,0
 70 | 16,172888,0
 71 | 16,235104,0
 72 | 16,273567,1
 73 | 26,152193,1
 74 | 26,285992,0
 75 | 32,111697,0
 76 | 32,185710,0
 77 | 32,188665,0
 78 | 32,333874,1
 79 | 34,14096,1
 80 | 34,23522,0
 81 | 34,101761,0
 82 | 34,151209,0
 83 | 34,162129,0
 84 | 34,220315,0
 85 | 34,228566,0
 86 | 34,260399,0
 87 | 42,29046,1
 88 | 42,276172,0
 89 | 3,71547,0
 90 | 3,95814,0
 91 | 3,152141,0
 92 | 3,183846,0
 93 | 3,228657,1
 94 | 3,250082,0
 95 | 13,121703,0
 96 | 13,143294,0
 97 | 13,149541,1
 98 | 16,104208,0
 99 | 16,172888,0
100 | 16,235104,0
101 | 16,273567,1
102 | 26,152193,1
103 | 26,285992,0
104 | 32,111697,0
105 | 32,185710,0
106 | 32,188665,0
107 | 32,333874,1
108 | 34,14096,1
109 | 34,23522,0
110 | 34,101761,0
111 | 34,151209,0
112 | 34,162129,0
113 | 34,220315,0
114 | 34,228566,0
115 | 34,260399,0
116 | 42,29046,1
117 | 42,276172,0
118 | 


--------------------------------------------------------------------------------
/nn/small/clicks_va.sample:
--------------------------------------------------------------------------------
  1 | display_id,ad_id,clicked
  2 | 3,71547,0
  3 | 3,95814,0
  4 | 3,152141,0
  5 | 3,183846,0
  6 | 3,228657,1
  7 | 3,250082,0
  8 | 13,121703,0
  9 | 13,143294,0
 10 | 13,149541,1
 11 | 16,104208,0
 12 | 16,172888,0
 13 | 16,235104,0
 14 | 16,273567,1
 15 | 26,152193,1
 16 | 26,285992,0
 17 | 32,111697,0
 18 | 32,185710,0
 19 | 32,188665,0
 20 | 32,333874,1
 21 | 34,14096,1
 22 | 34,23522,0
 23 | 34,101761,0
 24 | 34,151209,0
 25 | 34,162129,0
 26 | 34,220315,0
 27 | 34,228566,0
 28 | 34,260399,0
 29 | 42,29046,1
 30 | 42,276172,0
 31 | 3,71547,0
 32 | 3,95814,0
 33 | 3,152141,0
 34 | 3,183846,0
 35 | 3,228657,1
 36 | 3,250082,0
 37 | 13,121703,0
 38 | 13,143294,0
 39 | 13,149541,1
 40 | 16,104208,0
 41 | 16,172888,0
 42 | 16,235104,0
 43 | 16,273567,1
 44 | 26,152193,1
 45 | 26,285992,0
 46 | 32,111697,0
 47 | 32,185710,0
 48 | 32,188665,0
 49 | 32,333874,1
 50 | 34,14096,1
 51 | 34,23522,0
 52 | 34,101761,0
 53 | 34,151209,0
 54 | 34,162129,0
 55 | 34,220315,0
 56 | 34,228566,0
 57 | 34,260399,0
 58 | 42,29046,1
 59 | 42,276172,0
 60 | 3,71547,0
 61 | 3,95814,0
 62 | 3,152141,0
 63 | 3,183846,0
 64 | 3,228657,1
 65 | 3,250082,0
 66 | 13,121703,0
 67 | 13,143294,0
 68 | 13,149541,1
 69 | 16,104208,0
 70 | 16,172888,0
 71 | 16,235104,0
 72 | 16,273567,1
 73 | 26,152193,1
 74 | 26,285992,0
 75 | 32,111697,0
 76 | 32,185710,0
 77 | 32,188665,0
 78 | 32,333874,1
 79 | 34,14096,1
 80 | 34,23522,0
 81 | 34,101761,0
 82 | 34,151209,0
 83 | 34,162129,0
 84 | 34,220315,0
 85 | 34,228566,0
 86 | 34,260399,0
 87 | 42,29046,1
 88 | 42,276172,0
 89 | 3,71547,0
 90 | 3,95814,0
 91 | 3,152141,0
 92 | 3,183846,0
 93 | 3,228657,1
 94 | 3,250082,0
 95 | 13,121703,0
 96 | 13,143294,0
 97 | 13,149541,1
 98 | 16,104208,0
 99 | 16,172888,0
100 | 16,235104,0
101 | 16,273567,1
102 | 26,152193,1
103 | 26,285992,0
104 | 32,111697,0
105 | 32,185710,0
106 | 32,188665,0
107 | 32,333874,1
108 | 34,14096,1
109 | 34,23522,0
110 | 34,101761,0
111 | 34,151209,0
112 | 34,162129,0
113 | 34,220315,0
114 | 34,228566,0
115 | 34,260399,0
116 | 42,29046,1
117 | 42,276172,0
118 | 


--------------------------------------------------------------------------------
/nn/small/test.list:
--------------------------------------------------------------------------------
1 | name,columns
2 | "data/test.sample","sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0"
3 | 


--------------------------------------------------------------------------------
/nn/small/test.sample:
--------------------------------------------------------------------------------
  1 | ,sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0,click
  2 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
  3 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
  4 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
  5 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
  6 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
  7 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
  8 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
  9 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 10 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 11 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 12 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 13 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 14 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 15 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 16 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 17 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 18 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 19 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 20 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 21 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 22 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 23 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 24 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 25 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 26 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 27 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 28 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 29 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 30 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 31 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 32 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 33 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 34 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 35 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 36 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 37 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 38 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 39 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 40 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 41 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 42 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 43 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 44 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 45 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 46 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 47 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 48 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 49 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 50 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 51 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 52 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 53 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 54 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 55 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 56 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 57 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 58 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 59 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 60 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 61 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 62 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 63 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 64 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 65 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 66 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 67 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 68 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 69 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 70 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 71 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 72 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 73 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 74 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 75 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 76 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 77 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 78 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 79 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 80 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 81 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 82 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 83 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 84 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 85 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 86 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 87 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 88 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 89 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 90 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 91 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 92 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 93 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 94 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 95 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 96 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 97 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 98 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 99 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
100 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
101 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
102 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
103 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
104 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
105 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
106 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
107 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
108 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
109 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
110 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
111 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
112 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
113 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
114 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
115 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
116 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
117 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
118 | 


--------------------------------------------------------------------------------
/nn/small/train.list:
--------------------------------------------------------------------------------
1 | name,columns
2 | "data/train.sample","sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0"
3 | 


--------------------------------------------------------------------------------
/nn/small/train.sample:
--------------------------------------------------------------------------------
  1 | ,sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,sffm10,ffm0,click
  2 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
  3 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
  4 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
  5 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
  6 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
  7 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
  8 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
  9 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 10 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 11 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 12 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 13 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 14 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 15 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 16 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 17 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 18 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 19 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 20 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 21 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 22 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 23 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 24 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 25 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 26 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 27 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 28 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 29 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 30 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 31 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 32 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 33 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 34 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 35 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 36 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 37 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 38 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 39 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 40 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 41 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 42 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 43 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 44 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 45 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 46 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 47 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 48 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 49 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 50 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 51 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 52 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 53 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 54 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 55 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 56 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 57 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 58 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 59 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 60 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 61 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 62 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 63 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 64 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 65 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 66 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 67 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 68 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 69 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 70 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
 71 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
 72 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
 73 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
 74 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
 75 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
 76 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
 77 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
 78 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
 79 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
 80 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
 81 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
 82 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
 83 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
 84 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
 85 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
 86 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
 87 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
 88 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
 89 | 0,0.0515877,0.0512952,0.0584666,0.0541916,0.0599344,0.0588076,0.0666565,0.0681219,0.0560595,0.058072,0.059793,0.0532316,0.0
 90 | 1,0.190674,0.191824,0.239068,0.1922,0.237203,0.230217,0.313678,0.349786,0.21474,0.391031,0.344956,0.338265,0.0
 91 | 2,0.153426,0.159144,0.14328,0.155737,0.138683,0.131298,0.17126,0.200034,0.1269,0.193841,0.213431,0.197445,0.0
 92 | 3,0.121402,0.134652,0.142062,0.137984,0.14124,0.14173,0.212495,0.233223,0.150194,0.246823,0.20004,0.208704,0.0
 93 | 4,0.201166,0.212233,0.212998,0.185245,0.237729,0.207711,0.210761,0.192045,0.183674,0.202456,0.220514,0.190291,1.0
 94 | 5,0.0475586,0.0462934,0.0499161,0.0456461,0.0472016,0.0497535,0.0605238,0.0464654,0.0517964,0.0455793,0.0509504,0.0402469,0.0
 95 | 6,0.305636,0.298276,0.268011,0.317113,0.298493,0.330001,0.433118,0.440794,0.332874,0.382836,0.373078,0.490156,0.0
 96 | 7,0.198167,0.144942,0.157391,0.162188,0.195639,0.148722,0.150848,0.150656,0.161302,0.147178,0.160963,0.138738,0.0
 97 | 8,0.407063,0.359248,0.306404,0.384878,0.363618,0.287798,0.524089,0.383478,0.305048,0.399877,0.372271,0.518678,1.0
 98 | 9,0.129071,0.126445,0.0511994,0.118964,0.059193,0.0565401,0.00705537,0.0207395,0.0631221,0.0507373,0.0542783,0.00419731,0.0
 99 | 10,0.440347,0.462728,0.262948,0.464676,0.22216,0.166368,0.0593336,0.179087,0.201939,0.162554,0.189122,0.0122727,0.0
100 | 11,0.228471,0.130217,0.0168495,0.199576,0.00848224,0.00950886,0.00171805,0.206851,0.014308,0.0779484,0.087863,0.00087822,0.0
101 | 12,0.418847,0.991691,0.96683,0.43619,0.859727,0.925833,0.978079,0.981086,0.975378,0.956581,0.964555,0.971586,1.0
102 | 13,0.671055,0.617605,0.606565,0.631376,0.602544,0.668022,0.876672,0.721964,0.698678,0.754485,0.764871,0.901184,1.0
103 | 14,0.130901,0.0756133,0.0853984,0.0991483,0.10652,0.0944819,0.0596014,0.0475533,0.104515,0.0442111,0.0569756,0.0445175,0.0
104 | 15,0.24861,0.269577,0.233931,0.268359,0.22982,0.275756,0.245144,0.310737,0.26588,0.321166,0.309314,0.231232,0.0
105 | 16,0.221161,0.14135,0.175787,0.184226,0.185819,0.232389,0.236772,0.274849,0.234639,0.23912,0.227355,0.211311,0.0
106 | 17,0.303925,0.274397,0.231182,0.266895,0.286691,0.270527,0.228943,0.301131,0.266436,0.335565,0.307768,0.202424,0.0
107 | 18,0.122112,0.106457,0.0938463,0.114249,0.0944101,0.0881247,0.104559,0.141754,0.0751357,0.152233,0.153261,0.0842195,1.0
108 | 19,0.29682,0.337259,0.348353,0.303856,0.346684,0.33906,0.351329,0.360309,0.315823,0.338081,0.387706,0.361115,1.0
109 | 20,0.117618,0.119904,0.112298,0.105533,0.0971093,0.0989348,0.0775649,0.0663834,0.102068,0.0495816,0.0624868,0.0806681,0.0
110 | 21,0.301816,0.24819,0.309643,0.261582,0.37596,0.332394,0.326399,0.29292,0.342858,0.31686,0.341505,0.316123,0.0
111 | 22,0.15122,0.136581,0.138819,0.145024,0.165039,0.171084,0.144622,0.160229,0.14734,0.164099,0.173422,0.138611,0.0
112 | 23,0.109819,0.108503,0.192634,0.114078,0.205974,0.169323,0.162738,0.199573,0.174041,0.216872,0.209412,0.122169,0.0
113 | 24,0.0284366,0.0265458,0.0375123,0.0287954,0.050621,0.0304326,0.031097,0.0152319,0.0348891,0.0141938,0.0169807,0.026086,0.0
114 | 25,0.0415617,0.0498941,0.0429027,0.0462508,0.0402712,0.038657,0.0267309,0.0260804,0.0390857,0.0244079,0.0314033,0.0283975,0.0
115 | 26,0.0782183,0.0860654,0.0386417,0.093939,0.0311257,0.0336799,0.0271866,0.0474061,0.0377424,0.0348524,0.0339082,0.0186533,0.0
116 | 27,0.515198,0.477969,0.487953,0.512853,0.528621,0.549713,0.519788,0.577133,0.55569,0.587544,0.5528,0.615966,1.0
117 | 28,0.473032,0.506821,0.492473,0.473992,0.498311,0.533195,0.541702,0.561859,0.515185,0.560986,0.564291,0.483087,0.0
118 | 


--------------------------------------------------------------------------------
/nn/sub.sh:
--------------------------------------------------------------------------------
1 | python main.py --train data/sub_train.list --display_train ../../better_split/data/clicks_va.csv  --fea_limit 5 --test data/sub_test.list --display_test ../../input/clicks_test.csv --learning_rate 0.001 --batch_size 64 --sub sub.csv --num_epochs 2 --acc_period 10 
2 | 


--------------------------------------------------------------------------------
/nn/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/nn/util/__init__.py


--------------------------------------------------------------------------------
/nn/util/data_util.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import h5py
  4 | import os
  5 | import csv
  6 | import gc
  7 | from sklearn.utils import shuffle as sk_shuffle
  8 | import random
  9 | 
 10 | class DataSet:
 11 |     def __init__(self, metalist, display, baselist = '', cache='cache', shuffle=False, limit = 5, normalize = 1, softmax=0,mean=None,std=None,num_big=10):
 12 |         self.num_big = num_big
 13 | 	self.softmax = 0#softmax # perform softmax transformation online
 14 | 	self.files, self.columns = self._parse(metalist)
 15 | 	self.baselist = baselist
 16 | 	if self.baselist != '':
 17 | 	    self.bfiles, self.bcolumns = self._parse(baselist)
 18 | 	self.normalize = normalize
 19 | 	self.display = display
 20 | 	self.cache = cache
 21 | 	self.limit = limit  # max number of features, above the limit will call online code
 22 | 	#self.files       # a list of file names: ['xx/m1.csv', 'yy/m2.csv' ...]
 23 |         #self.columns     # column names of each file [['clicked'], ['ffm1','ffm2'], ...]
 24 | 
 25 | 	self.shuffle = shuffle
 26 | 	self._read()
 27 | 	self.mean, self.std = mean, std
 28 | 	if normalize and mean is None:
 29 | 	    self._normalize()	
 30 | 	self.reset()
 31 | 
 32 |     def _normalize(self):
 33 | 	print "run normalization ..."
 34 | 	sum1, sum2 = None, None
 35 | 	num = 0
 36 | 	for i in range(self.num_big):
 37 | 	    self._get_big_batch(batch_id = i)
 38 | 	    num += self.big_X.shape[0]
 39 | 	    if i==0:
 40 | 		sum1 = np.sum(self.big_X,axis=0)
 41 | 		sum2 = np.sum(self.big_X*self.big_X,axis=0)
 42 | 	    else:
 43 | 		sum1 += np.sum(self.big_X,axis=0)
 44 | 		sum2 += np.sum(self.big_X*self.big_X,axis=0)
 45 | 	    del self.big_X
 46 |             gc.collect()	
 47 | 	mean1,mean2 = sum1/num, sum2/num
 48 | 	std = np.sqrt(mean2-mean1*mean1)
 49 | 	self.mean = mean1
 50 |         self.std = std
 51 | 	print self.mean.shape, self.std.shape	
 52 | 
 53 |     def reset(self):
 54 | 	self.current_big_batch = 0 # 0~9
 55 |         self.current_mini_batch = 0
 56 |         self.big_X = None
 57 |         self.big_Y = None
 58 |         self.big_order = range(self.num_big)
 59 | 	if self.shuffle:
 60 | 	    random.shuffle(self.big_order)
 61 | 	
 62 |     def _load_row_array(self):
 63 | 	name = self.display.split('/')[-1]
 64 | 	cache = self.cache
 65 | 	cname = '%s/%s.row_array.bin'%(cache, name)
 66 | 	h5f=h5py.File(cname,'r')
 67 |         self.row_array=h5f['dataset_1'][:]
 68 |         h5f.close()
 69 | 
 70 |     def next_batch(self, batch_size):
 71 | 	shuffle = self.shuffle 
 72 | 	if self.big_X is None:
 73 | 	    self._get_big_batch(batch_id = self.big_order[self.current_big_batch])
 74 | 	    self.tmp_list = range(len(self.group_idx)-1)
 75 | 	    self._base_get_big_batch(batch_id = self.big_order[self.current_big_batch])
 76 | 	    if shuffle:
 77 | 		random.shuffle(self.tmp_list)
 78 | 	#print self.row_array
 79 | 	#print self.group_idx, self.tmp_list
 80 | 	mini_batch_id = self.current_mini_batch
 81 |   	next_batch = mini_batch_id + batch_size
 82 | 	next_batch = min(next_batch, len(self.tmp_list))
 83 | 	#print self.current_mini_batch, next_batch
 84 | 	X, y, G, R = self._extract_batch(self.current_mini_batch, next_batch)	
 85 | 	done = False
 86 | 	if self.baselist != '':
 87 | 	    Xb = self._base_extract_batch(self.current_mini_batch, next_batch)
 88 | 	if next_batch == len(self.tmp_list):
 89 | 	    del self.big_X
 90 | 	    gc.collect()
 91 | 	    self.big_X = None
 92 | 	    self.big_base_X = None
 93 | 	    self.current_big_batch += 1
 94 | 	    if self.current_big_batch==self.num_big:
 95 | 		self.reset()
 96 | 		done = True
 97 | 	    self.current_mini_batch = 0
 98 | 	else:
 99 | 	    self.current_mini_batch = next_batch 
100 | 	#X = (X - np.mean(X,0))/np.std(X,0)
101 | 	if self.baselist == '':
102 | 	    return X, y, G, R, done
103 | 	else:
104 | 	    return X, y, G, R, done, Xb
105 | 
106 |     def _softmax(self, x):
107 | 	# perform softmax on the 2nd dimenstion
108 | 	for i in range(x.shape[1]):
109 | 	    #tmpmax = np.max(x[:,i])
110 | 	    tmp = np.exp(x[:,i])
111 | 	    sumtmp = np.sum(tmp)
112 | 	    x[:,i] = tmp/sumtmp
113 | 	return x
114 | 
115 |     def _base_extract_batch(self, start, end):
116 |         X, R = [], [0]
117 |         for i in range(start,end):
118 |             idx = self.tmp_list[i]
119 |             #idx = self.group_idx[idx]
120 |             s_, e_ = self.group_idx[idx], self.group_idx[idx+1]
121 |             R.append(e_-s_+R[-1])
122 |             #print s_, e_
123 |             if self.softmax:
124 |                 X.append(self._softmax(self.big_base_X[s_:e_,:]))
125 |             else:
126 |                 X.append(self.big_base_X[s_:e_,:])
127 | 	X = np.vstack(X)
128 | 	return X
129 | 
130 |     def _extract_batch(self, start, end):
131 | 	X, y, G, R = [],[],[],[0]
132 | 	for i in range(start,end):
133 | 	    idx = self.tmp_list[i]
134 | 	    #idx = self.group_idx[idx]
135 | 	    s_, e_ = self.group_idx[idx], self.group_idx[idx+1]
136 | 	    R.append(e_-s_+R[-1])
137 | 	    #print s_, e_
138 | 	    if self.softmax:
139 | 		X.append(self._softmax(self.big_X[s_:e_,:]))
140 | 	    else:
141 | 	        X.append(self.big_X[s_:e_,:])
142 | 	    if len(self.big_Y.shape) == 2: # no label, just group
143 | 		y.append(self.big_Y[s_:e_,1])	
144 | 	        G.append(self.big_Y[s_:e_,0])
145 | 	    else:
146 | 	 	G.append(self.big_Y[s_:e_])	
147 |         X = np.vstack(X)
148 | 	if len(y) == 0:
149 | 	    y = np.zeros(X.shape[0])
150 | 	else:
151 | 	    y = np.concatenate(y)
152 | 	G = np.concatenate(G)
153 | 	return X, y, G, R		
154 | 
155 |     def sanity_check(self):
156 | 	print (self.row_array)
157 | 	print ()
158 |         for i in range(self.num_big):
159 | 	    self._get_big_batch(i)
160 | 	    print (i)
161 | 	    #print self.big_X
162 | 	    print (self.big_Y)
163 | 	    print (self.group_idx)
164 | 	    print ()
165 |     def _base_get_big_batch(self, batch_id):
166 | 	if self.baselist=='':
167 | 	    self.big_base_X = None
168 | 	    return
169 |         X = []
170 |         cache = self.cache
171 |         for f in self.bfiles:
172 |             name = f.split('/')[-1]
173 |             cname = '%s/%s_%d.bin'%(cache,name,batch_id)
174 |             assert(os.path.exists(cname), "Meta bin data not exist!")
175 |             h5f=h5py.File(cname,'r')
176 |             train=h5f['dataset_1'][:]
177 |             h5f.close()
178 |             if len(train.shape)==1:
179 |                 train = np.reshape(train, [train.shape[0], 1])
180 |             X.append(train)
181 | 	self.big_base_X = np.hstack(X)
182 | 	
183 |     def _get_big_batch(self, batch_id):
184 | 	X = []
185 | 	cache = self.cache
186 | 	for f in self.files:
187 | 	    name = f.split('/')[-1]
188 | 	    cname = '%s/%s_%d.bin'%(cache,name,batch_id)
189 | 	    assert(os.path.exists(cname), "Meta bin data not exist!")
190 | 	    h5f=h5py.File(cname,'r')
191 | 	    train=h5f['dataset_1'][:]
192 |     	    h5f.close()
193 | 	    if len(train.shape)==1:
194 | 		train = np.reshape(train, [train.shape[0], 1])	
195 | 	    X.append(train)
196 | 	    del train
197 | 	self.big_X = np.hstack(X)
198 | 	if self.normalize and self.mean is not None:
199 | 	    #self.big_X = (self.big_X - np.mean(self.big_X,0))/np.std(self.big_X,0)
200 | 	    self.big_X = (self.big_X - self.mean)/self.std
201 | 	#print "Load big batch", batch_id, self.big_X.shape
202 | 	del X
203 | 	gc.collect()
204 | 
205 | 	display = self.display
206 | 	name = display.split('/')[-1]
207 |         cname = '%s/%s_%d.bin'%(cache, name, batch_id)
208 | 	assert(os.path.exists(cname), "Display bin data not exist!")
209 | 	h5f=h5py.File(cname,'r')
210 |         self.big_Y=h5f['dataset_1'][:]
211 |         h5f.close()
212 | 	
213 | 	cname = '%s/%s_%d.group.bin'%(cache,name, batch_id)
214 | 	h5f=h5py.File(cname,'r')
215 |         self.group_idx=h5f['dataset_1'][:]
216 |         h5f.close()
217 | 
218 | 	
219 | 
220 |     def _parse(self, metalist):
221 | 	files, columns = [], []
222 | 	with open(metalist,'r') as f:
223 | 	    for c,row in enumerate(csv.DictReader(f)):
224 | 		files.append(row['name'])
225 | 		columns.append(row['columns'].split(','))
226 | 	return files, columns
227 | 
228 |     def _read(self):
229 | 	cache = self.cache
230 | 	display = self.display
231 | 
232 | 	name = display.split('/')[-1]
233 | 	cname = '%s/%s_0.bin'%(cache, name)
234 | 	if os.path.exists(cname) == False:
235 | 	    self._RW_display_to_bin()
236 | 	cname = '%s/%s.group'%(cache, name)
237 | 	if True:
238 | 	    with open(cname,'r') as f:
239 | 	    	self.num_groups = int(f.readline().strip())
240 | 	    print ("Total number of groups", self.num_groups)
241 | 	for f,col in zip(self.files, self.columns):
242 | 	    name = f.split('/')[-1]
243 | 	    for i in range(self.num_big):
244 | 	        cname = '%s/%s_%d.bin'%(cache,name,i)
245 | 		#print cname
246 | 	        if os.path.exists(cname) == False:
247 | 		    if len(col)<self.limit:
248 | 			print ("build data in whole-data-in-memory mode", f)
249 | 		        self._RW_meta_to_bin(f,col)
250 | 		    else:
251 | 			print ("build data in online mode", f)
252 | 			self._RW_meta_to_bin_online(f,col)
253 | 	
254 | 	if self.baselist=='':
255 | 	    print ("Build Data set done!")
256 | 	    return
257 | 
258 | 	for f,col in zip(self.bfiles, self.bcolumns):
259 |             name = f.split('/')[-1]
260 |             for i in range(self.num_big):
261 |                 cname = '%s/%s_%d.bin'%(cache,name,i)
262 |                 #print cname
263 |                 if os.path.exists(cname) == False:
264 |                     if len(col)<self.limit:
265 |                         print ("build data in whole-data-in-memory mode", f)
266 |                         self._RW_meta_to_bin(f,col)
267 |                     else:
268 |                         print ("build data in online mode", f)
269 |                         self._RW_meta_to_bin_online(f,col)
270 | 
271 | 	print ("Build Data set done!")
272 | 
273 |     def _RW_display_to_bin(self):
274 | 	display = self.display
275 | 	cache = self.cache
276 | 	shuffle = self.shuffle
277 | 	num_groups = 0
278 | 	name = display.split('/')[-1]
279 | 	row_array = []
280 | 
281 |         if True:
282 | 	    cname = '%s/%s.group'%(cache, name)
283 | 	    if os.path.exists(cname) == True:
284 | 		with open(cname,'r') as f:
285 | 		    num_groups = int(f.readline().strip())
286 | 	    else:	    
287 |                 with open(display, 'r') as f:
288 | 		    last = ''
289 | 		    for c,row in enumerate(csv.DictReader(f)):
290 | 			if last!=row['display_id']:
291 |                     	    num_groups += 1
292 | 			    row_array.append(c)
293 | 			
294 | 			last = row['display_id']
295 | 		    row_array.append(c+1)
296 | 		assert(len(row_array)==num_groups+1)
297 | 		with open(cname,'w') as f:
298 | 		    f.write('%d\n'%num_groups)
299 | 		print ("Total number of groups", num_groups)
300 | 	cname = '%s/%s.row_array.bin'%(cache, name)
301 | 	h5f=h5py.File(cname,'w')
302 |         h5f.create_dataset('dataset_1', data=np.array(row_array))
303 |         h5f.close()
304 | 
305 | 	total = []
306 | 	dids = []
307 | 	groups = []
308 | 	for i in range(self.num_big):
309 | 	    dids.append([])
310 | 	    groups.append([0])
311 |    	if True:
312 |             with open(display, 'r') as d:
313 | 		last = ''
314 |                 dc = 0
315 | 		local_row_count = 0
316 | 		for c,row in enumerate(csv.DictReader(d)):
317 | 		    if last!=row['display_id']:
318 | 	    		dc += 1
319 | 			if shuffle :
320 |                             idx = dc%self.num_big
321 |                     	else:
322 |                             step = (num_groups/self.num_big)
323 |                             if step<1:
324 |                             	step = 1
325 |                             idx = int(dc / step)
326 |                             if idx>self.num_big-1:
327 |                             	idx = self.num_big-1
328 | 			#groups[idx].append(dc-1)
329 | 			if last!='':
330 | 			    groups[last_idx].append(local_row_count + groups[last_idx][-1])
331 | 			local_row_count = 0
332 | 			last_idx = idx
333 | 		    last = row['display_id']
334 | 		    local_row_count += 1
335 | 		    cname = '%s/%s_%d.bin'%(cache,name,idx)
336 | 
337 | 		    if os.path.exists(cname) == False:
338 | 			if 'clicked' in row:
339 | 		    	    dids[idx].append([int(row['display_id']), int(row['clicked'])])
340 | 			else:
341 | 			    dids[idx].append(int(row['display_id']))
342 | 		groups[last_idx].append(local_row_count + groups[last_idx][-1])
343 | 	for i in range(self.num_big):
344 | 	    cname = '%s/%s_%d.bin'%(cache,name,i)
345 | 	    if os.path.exists(cname) == False:    
346 | 	        did = np.array(dids[i])
347 | 	        h5f=h5py.File(cname,'w')
348 |     	        h5f.create_dataset('dataset_1', data=did)
349 |     	        h5f.close()
350 | 		group = np.array(groups[i])
351 | 		cname = '%s/%s_%d.group.bin'%(cache,name,i)
352 | 		h5f=h5py.File(cname,'w')
353 |                 h5f.create_dataset('dataset_1', data=group)
354 |                 h5f.close()
355 | 
356 | 	        print ("read", display, did.shape, i, 'done')	
357 | 	        total.append(did.shape[0])
358 | 	print ("Total:", sum(total))				    	
359 | 
360 |     def _RW_meta_to_bin(self, inputname,column):
361 | 	display = self.display
362 |         cache = self.cache
363 | 
364 | 	shuffle = self.shuffle
365 | 	if not shuffle:
366 |             num_groups = self.num_groups
367 |         name = inputname.split('/')[-1]
368 | 	yps = []
369 |         for i in range(self.num_big):
370 | 	    yps.append([])
371 | 
372 |         if True:
373 |             with open(display, 'r') as d:
374 |                 with open(inputname, 'r') as f:
375 |                     last = ''
376 |                     dc = 0
377 |                     dreader = csv.DictReader(d)
378 |                     yp = []
379 |                     for c,row in enumerate(csv.DictReader(f)):
380 |                         drow = dreader.next()
381 |                         if last!=drow['display_id']:
382 |                             dc += 1
383 |                         last = drow['display_id']
384 | 			if shuffle :
385 |                             idx = dc%self.num_big
386 |                     	else:
387 | 			    step = (num_groups/self.num_big)
388 |                             if step<1:
389 |     	                        step = 1
390 |                             idx = int(dc / step)
391 |                             if idx>self.num_big-1:
392 |                             	idx = self.num_big-1
393 | 			cname = '%s/%s_%d.bin'%(cache,name,idx)
394 | 			if os.path.exists(cname) == False:
395 |                             tmp = [float(row[x]) for x in column]
396 |                             yps[idx].append(tmp)
397 | 	total = []
398 | 	for i in range(self.num_big):
399 | 	    cname = '%s/%s_%d.bin'%(cache,name,i)
400 | 	    if os.path.exists(cname) == False:
401 |             	yp = np.array(yps[i])
402 |             	print (inputname, i, yp.shape)
403 | 		h5f=h5py.File(cname,'w')
404 |                 h5f.create_dataset('dataset_1', data=yp)
405 |                 h5f.close()
406 | 		total.append(yp.shape[0])
407 | 	print ("Total:", sum(total))
408 | 
409 | 
410 |     def _RW_meta_to_bin_online(self, inputname, column):
411 | 	display = self.display
412 |         cache = self.cache
413 | 
414 | 	shuffle = self.shuffle
415 | 	if not shuffle:
416 | 	    num_groups = self.num_groups
417 | 	name = inputname.split('/')[-1]
418 | 	total = []
419 | 	for i in range(self.num_big):
420 | 	    cname = '%s/%s_%d.bin'%(cache,name,i)
421 | 	    if os.path.exists(cname) == True:
422 | 		continue
423 | 	    with open(display, 'r') as d:
424 | 	        with open(inputname, 'r') as f:
425 | 		    last = ''
426 | 		    dc = 0
427 | 		    dreader = csv.DictReader(d)
428 | 		    yp = []
429 | 	    	    for c,row in enumerate(csv.DictReader(f)):
430 | 		    	drow = dreader.next()
431 | 		    	if last!=drow['display_id']:
432 | 			    dc += 1
433 | 			last = drow['display_id']
434 | 			if shuffle and dc%self.num_big != i:
435 | 			    continue
436 | 			if not shuffle:
437 | 			    step = (num_groups/self.num_big)
438 |                             if step<1:
439 |                                 step = 1
440 | 
441 | 			    if dc < step*i:
442 | 				continue
443 | 			    elif i < self.num_big-1:
444 | 				if dc >= step*(i+1):
445 | 				    break
446 | 		
447 | 			tmp = [float(row[x]) for x in column]	
448 | 			yp.append(tmp)
449 | 	    yp = np.array(yp)
450 | 	    print (inputname, i, yp.shape)
451 | 	    h5f=h5py.File(cname,'w')
452 |             h5f.create_dataset('dataset_1', data=yp)
453 |             h5f.close()   
454 | 	    total.append(yp.shape[0])
455 | 	print ("Total:", sum(total))
456 | 
457 | def get_num_fea( metalist):
458 |     columns = []
459 |     with open(metalist,'r') as f:
460 |         for c,row in enumerate(csv.DictReader(f)):
461 |             columns.append(len(row['columns'].split(',')))
462 |     return sum(columns)
463 | 
464 | def write_sub(yp,name):
465 |     s = pd.DataFrame({"clicked":yp})
466 |     s.to_csv(name,index=False)	
467 | 	
468 | if __name__ == '__main__':
469 |     ds = DataSet(display = '../../../input/clicks_test.csv', files=[], columns=[], cache='../cache', shuffle=False)
470 | 


--------------------------------------------------------------------------------
/nn/util/evaluate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def apk12(batch, prediction):
 4 |     if len(batch) == 5:
 5 |         _, y, g, r, _ = batch
 6 |     else:
 7 | 	_, y, g, r, _, _ = batch
 8 |     yp = []
 9 |     result = []
10 |     if True:
11 | 	for i in range(len(r)-1):
12 |             start, end = r[i], r[i+1]
13 |     	    yp.append(prediction[i,0:end-start])
14 | 	    act = [c for c,i in enumerate(y[start:end]) if i>0]
15 |             pred = {i:c for c,i in enumerate(yp[-1])}
16 | 	    pred = [pred[i] for i in sorted(yp[-1],reverse=True)]
17 | 	    result.append(apk(act,pred,k=12))
18 | 	yp = np.concatenate(yp)
19 |     assert(len(y)==len(yp))
20 |     return np.mean(result), yp
21 |     
22 | 
23 | 
24 | def apk(actual, predicted, k=10):
25 |     """
26 |     Computes the average precision at k.
27 |     This function computes the average prescision at k between two lists of
28 |     items.
29 |     Parameters
30 |     ----------
31 |     actual : list
32 |              A list of elements that are to be predicted (order doesn't matter)
33 |     predicted : list
34 |                 A list of predicted elements (order does matter)
35 |     k : int, optional
36 |         The maximum number of predicted elements
37 |     Returns
38 |     -------
39 |     score : double
40 |             The average precision at k over the input lists
41 |     """
42 |     if len(predicted)>k:
43 |         predicted = predicted[:k]
44 |     if not actual:
45 |         return 0.0
46 | 
47 |     score = 0.0
48 |     num_hits = 0.0
49 | 
50 |     for i,p in enumerate(predicted):
51 |         if p in actual and p not in predicted[:i]:
52 |             num_hits += 1.0
53 |             score += num_hits / (i+1.0)
54 | 
55 | 
56 |     return score / min(len(actual), k)
57 | 
58 | 


--------------------------------------------------------------------------------
/nn/util/nn_util.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | 
  6 | def weight(name, shape, init='he', range=None):
  7 |     """ Initializes weight.
  8 |     :param name: Variable name
  9 |     :param shape: Tensor shape
 10 |     :param init: Init mode. xavier / normal / uniform / he (default is 'he')
 11 |     :param range:
 12 |     :return: Variable
 13 |     """
 14 |     initializer = tf.constant_initializer()
 15 |     if init == 'xavier':
 16 |         fan_in, fan_out = _get_dims(shape)
 17 |         range = math.sqrt(6.0 / (fan_in + fan_out))
 18 |         initializer = tf.random_uniform_initializer(-range, range)
 19 | 
 20 |     elif init == 'he':
 21 |         fan_in, _ = _get_dims(shape)
 22 |         std = math.sqrt(2.0 / fan_in)
 23 |         initializer = tf.random_normal_initializer(stddev=std)
 24 | 
 25 |     elif init == 'normal':
 26 |         initializer = tf.random_normal_initializer(stddev=0.1)
 27 | 
 28 |     elif init == 'uniform':
 29 |         if range is None:
 30 |             raise ValueError("range must not be None if uniform init is used.")
 31 |         initializer = tf.random_uniform_initializer(-range, range)
 32 | 
 33 |     var = tf.get_variable(name, shape, initializer=initializer)
 34 |     tf.add_to_collection('l2', tf.nn.l2_loss(var))  # Add L2 Loss
 35 |     return var
 36 | 
 37 | 
 38 | def _get_dims(shape):
 39 |     fan_in = shape[0] if len(shape) == 2 else np.prod(shape[:-1])
 40 |     fan_out = shape[1] if len(shape) == 2 else shape[-1]
 41 |     return int(fan_in), int(fan_out)
 42 | 
 43 | 
 44 | def bias(name, dim, initial_value=0.0):
 45 |     """ Initializes bias parameter.
 46 |     :param name: Variable name
 47 |     :param dim: Tensor size (list or int)
 48 |     :param initial_value: Initial bias term
 49 |     :return: Variable
 50 |     """
 51 |     dims = dim if isinstance(dim, list) else [dim]
 52 |     return tf.get_variable(name, dims, initializer=tf.constant_initializer(initial_value))
 53 | 
 54 | 
 55 | def batch_norm(x, is_training, default = False):
 56 |     """ Batch normalization.
 57 |     :param x: Tensor
 58 |     :param is_training: boolean tf.Variable, true indicates training phase
 59 |     :return: batch-normalized tensor
 60 |     """
 61 |     with tf.variable_scope('BatchNorm'):
 62 |         # calculate dimensions (from tf.contrib.layers.batch_norm)
 63 |         inputs_shape = x.get_shape()
 64 |         axis = list(range(len(inputs_shape) - 1))
 65 |         param_shape = inputs_shape[-1:]
 66 | 
 67 |         beta = tf.get_variable('beta', param_shape, initializer=tf.constant_initializer(0.))
 68 |         gamma = tf.get_variable('gamma', param_shape, initializer=tf.constant_initializer(1.))
 69 |         batch_mean, batch_var = tf.nn.moments(x, axis)
 70 |         ema = tf.train.ExponentialMovingAverage(decay=0.5)
 71 | 
 72 |         def mean_var_with_update():
 73 |             ema_apply_op = ema.apply([batch_mean, batch_var])
 74 |             with tf.control_dependencies([ema_apply_op]):
 75 |                 return tf.identity(batch_mean), tf.identity(batch_var)
 76 | 
 77 | 	if not default:
 78 |             mean, var = tf.cond(is_training,
 79 |                                 mean_var_with_update,
 80 |                                 lambda: (ema.average(batch_mean), ema.average(batch_var)))
 81 | 	else:
 82 | 	    mean, var = mean_var_with_update()
 83 |         normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
 84 |     return normed
 85 | 
 86 | 
 87 | def dropout(x, keep_prob, is_training):
 88 |     """ Apply dropout.
 89 |     :param x: Tensor
 90 |     :param keep_prob: float, Dropout rate.
 91 |     :param is_training: boolean tf.Varialbe, true indicates training phase
 92 |     :return: dropout applied tensor
 93 |     """
 94 |     return tf.cond(is_training, lambda: tf.nn.dropout(x, keep_prob), lambda: x)
 95 | 
 96 | 
 97 | def conv(x, filter, is_training):
 98 |     l = tf.nn.conv2d(x, filter, strides=[1, 1, 1, 1], padding='SAME')
 99 |     l = batch_norm(l, is_training)
100 |     return tf.nn.relu(l)
101 | 
102 | 
103 | def flatten(x):
104 |     return tf.reshape(x, [-1])
105 | 
106 | 
107 | def fully_connected(input, num_neurons, name, use_batch_norm=False, use_drop_out=False, keep_prob = 1.0, is_training = True , activation = 'relu', default_batch = 0):
108 |     input_size = input.get_shape()[1]
109 |     w = weight(name, [input_size, num_neurons], init='he')
110 |     l = tf.matmul(input, w)
111 |     if use_batch_norm:
112 |     	l = batch_norm(l, is_training, default = default_batch)
113 |     if use_drop_out:
114 | 	l = dropout(l, keep_prob, is_training)
115 |     if activation == 'relu':
116 | 	l = tf.nn.relu(l)
117 |     elif  activation == 'sigmoid':
118 |     	l =  tf.nn.sigmoid(l)
119 |     elif  activation == 'None':
120 |         pass
121 |     else:
122 | 	raise NotImplementedError
123 |     return l
124 | 


--------------------------------------------------------------------------------
/split.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | def split(inputname, display, outputdir,name=None):
 4 |     if name is None:
 5 |     	name = inputname.split('/')[-1]
 6 |     
 7 |     ftr = [open('%s/%s.tr.%d'%(outputdir,name,i),'w') for i in range(3)]
 8 |     fva = [open('%s/%s.va.%d'%(outputdir,name,i),'w') for i in range(3)]
 9 |     
10 | 
11 |     with open(inputname, 'r') as f:
12 | 	head = f.readline()
13 | 	for i in range(3):
14 | 	    ftr[i].write(head)
15 | 	    fva[i].write(head)
16 | 	with open(display, 'r') as fd:
17 | 	    for c,row in enumerate(csv.DictReader(fd)):
18 | 		line = f.readline()
19 | 		for i in range(3):
20 | 		    if row['fold%d'%(i+1)]=='0':
21 | 			ftr[i].write(line)
22 | 		    elif row['fold%d'%(i+1)]=='1':
23 | 			fva[i].write(line)
24 | 		if c%1000000 == 0:
25 | 		    print c
26 |     for i in range(3):
27 | 	ftr[i].close()
28 | 	fva[i].close()
29 | 
30 | def split_display(display, outputdir):
31 |     name = display.split('/')[-1]
32 |     ftr = [open('%s/%s.display.tr.%d'%(outputdir,name,i),'w') for i in range(3)]
33 |     fva = [open('%s/%s.display.va.%d'%(outputdir,name,i),'w') for i in range(3)]
34 |     head = 'display_id,clicked'
35 |     for i in range(3):
36 | 	ftr[i].write(head)
37 | 	fva[i].write(head)
38 | 
39 |     with open(display, 'r') as fd:
40 |         for c,row in enumerate(csv.DictReader(fd)):
41 | 	    line='%s,%s'%(row['display_id',row.get('clicked','0')])
42 |             for i in range(3):
43 |                 if row['fold%d'%(i+1)]=='0':
44 |                     ftr[i].write(line)
45 |                 elif row['fold%d'%(i+1)]=='1':
46 |                     fva[i].write(line)
47 |             if c%1000000 == 0:
48 |                 print c
49 |     for i in range(3):
50 |         ftr[i].close()
51 |         fva[i].close()
52 | 
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     #split(inputname='../stack/data/cv_0.691775_lb_0.69167/train_meta.csv', display='data/stack_split2.csv', outputdir='cvdata')
57 |     #split_display(display='data/stack_split2.csv', outputdir='cvdata')				
58 |     #split(inputname='../better_split/data/clicks_va.csv', display='data/stack_split2.csv', outputdir='cvdata')
59 |     #split(inputname='../better_split/good/cv_0.691873_lb_0.69122/cv_0.691873/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.691873')	
60 |     #split(inputname='../better_split/good/cv_0.690716/cv_0.690716/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.690716')
61 |     #split(inputname='../better_split/good/cv_0.690598/cv_0.690598/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='cv_0.690598')
62 |     #split(inputname='data/ffm2_valid_k16_eta0.050.csv', display='data/stack_split2.csv', outputdir='cvdata',name='ffm2_valid_k16_eta0.050')
63 |     split(inputname='data/ftrl_va_group.csv', display='data/stack_split2.csv', outputdir='cvdata',name='ftrl_va_group') 
64 |     split(inputname='data/mt_cv_0.681601/cv.csv', display='data/stack_split2.csv', outputdir='cvdata',name='mt_cv_0.681601')	
65 | 


--------------------------------------------------------------------------------
/stack_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxiongshu/stack-nn-tensorflow/8218247ce49d54db75a33edec816367de9949f23/stack_nn.png


--------------------------------------------------------------------------------
/write_base_list.py:
--------------------------------------------------------------------------------
 1 | def write_list(names,columns):
 2 |     for i in range(3):
 3 | 	for j in ['train','test']:
 4 | 	    fo = open('nn/data/%s%d.list.base'%(j,i),'w')
 5 | 	    fo.write('name,columns\n')
 6 | 	    tag = 'tr' if j=='train' else 'va'
 7 | 	    for name,col in zip(names,columns):
 8 | 		fo.write('"%s.%s.%d","%s"\n'%(name,tag,i,col))
 9 | 	    fo.close()
10 | 
11 | names = [#"../cvdata/train_meta.csv",
12 | 	#"../cvdata/cv_0.694441",
13 | 	#"../cvdata/cv_0.694441_cv_leak",
14 | 	"../cvdata/takuya1"
15 | 	#"../cvdata/xgb_cv_0.691432_lb_0.69531"
16 | 	]
17 | cols = [#"sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0",
18 | 	"clicked",
19 | 	]
20 | 
21 | write_list(names,cols)
22 | 
23 | 


--------------------------------------------------------------------------------
/write_list.py:
--------------------------------------------------------------------------------
 1 | def write_list(names,columns):
 2 |     for i in range(3):
 3 | 	for j in ['train','test']:
 4 | 	    fo = open('nn/data/%s%d.list'%(j,i),'w')
 5 | 	    fo.write('name,columns\n')
 6 | 	    tag = 'tr' if j=='train' else 'va'
 7 | 	    for name,col in zip(names,columns):
 8 | 		fo.write('"%s.%s.%d","%s"\n'%(name,tag,i,col))
 9 | 	    fo.close()
10 | 
11 | names = ["../cvdata/train_meta.csv",
12 | 
13 | 	"../cvdata/cv_0.690716",
14 | 	"../cvdata/cv_0.690598",
15 | 	"../cvdata/ffm2_valid_k16_eta0.050",
16 | 	"../cvdata/mt_cv_0.681601",
17 | 	"../cvdata/ftrl_va_group",
18 | 
19 | 	"../cvdata/2way_try1",
20 | 	"../cvdata/2way_try2",
21 | 
22 | 	"../cvdata/cv_0.692248",
23 | 	"../cvdata/cv_0.692143",
24 | 	#"../cvdata/lat1",
25 | 	"../cvdata/cv_0.694441",
26 | 	"../cvdata/takuya1",
27 | 	#"../cvdata/takuya_features",
28 | 	"../cvdata/takuya2",
29 | 	"../cvdata/takuya3",
30 | 	"../cvdata/takuya4",	
31 | #	"../cvdata/fm_0.693821_cv_13",
32 | #	"../cvdata/fm_0.693821_cv_14",
33 | #	"../cvdata/fm_0.693821_cv_15",
34 | #       "../cvdata/fm_0.693821_cv_16",
35 | #	"../cvdata/fm_0.693821_cv_17",
36 | #        "../cvdata/fm_0.693821_cv_18",
37 | #        "../cvdata/fm_0.693821_cv_19",
38 | #        "../cvdata/fm_0.693821_cv_20",
39 | #	"../cvdata/fm_0.693821_cv_21",
40 | #	"../cvdata/fm_0.693821_cv_22",
41 | #        "../cvdata/fm_0.693821_cv_23",
42 | 	#"../cvdata/fm_0.693821_cv_24",
43 | 	]
44 | cols = ["sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0",
45 | 	"clicked","clicked","clicked","clicked","clicked",
46 | 	"neighbor_ad_document_id,neighbor_ad_leak,neighbor_ad_doc_after_click,ad_id_document_id,ad_id_leak,ad_id_doc_after_click,document_idx_document_id,document_idx_leak,document_idx_doc_after_click",
47 | 	"ad_id_category_id,ad_id_entity_id,ad_id_source_id,ad_id_publisher_id,campaign_id_category_id,campaign_id_entity_id,campaign_id_source_id,campaign_id_publisher_id,advertiser_id_category_id,advertiser_id_entity_id,advertiser_id_source_id,advertiser_id_publisher_id",
48 | 	"clicked",
49 | 	"clicked",
50 | 	#"lat0",
51 | 	"clicked",
52 |         "clicked",
53 | 	#"doc_dot_doc,doc_dot_doc1,doc_dot_doc_categories_topics,doc_dot_doc_categories_entities,doc_dot_doc_topics_entities,doc_dot_doc_topics_entities_entities,doc_dot_doc_source_id,doc_dot_doc_publisher_id,doc_dot_doc_topics,doc_dot_doc_categories,doc_dot_doc_entities,user_dot_doc,user_dot_doc1,user_dot_doc_categories_topics,user_dot_doc_categories_entities,user_dot_doc_topics_entities,user_dot_doc_topics_entities_entities,user_dot_doc_source_id,user_dot_doc_publisher_id,user_dot_doc_topics,user_dot_doc_categories,user_dot_doc_entities,norm_user_dot_doc,norm_user_dot_doc1,norm_user_dot_doc_categories_topics,norm_user_dot_doc_categories_entities,norm_user_dot_doc_topics_entities,norm_user_dot_doc_topics_entities_entities,norm_user_dot_doc_source_id,norm_user_dot_doc_publisher_id,norm_user_dot_doc_topics,norm_user_dot_doc_categories,norm_user_dot_doc_entities",
54 | 	"clicked",
55 |         "clicked",
56 | 	"clicked",
57 | 	#"document_id-ad_id,document_id-document_idx,document_id-campaign_id,document_id-advertiser_id,document_id-entity_idx,document_id-source_idx,document_id-publisher_idx,document_id-category_idx,document_id-topic_idx,document_id-source_id_leak,document_id-publisher_id_leak,document_id-leak",
58 |         #"platform-ad_id,platform-document_idx,platform-campaign_id,platform-advertiser_id,platform-entity_idx,platform-source_idx,platform-publisher_idx,platform-category_idx,platform-topic_idx,platform-source_id_leak,platform-publisher_id_leak,platform-leak",
59 |         #"geo_location-ad_id,geo_location-document_idx,geo_location-campaign_id,geo_location-advertiser_id,geo_location-entity_idx,geo_location-source_idx,geo_location-publisher_idx,geo_location-category_idx,geo_location-topic_idx,geo_location-source_id_leak,geo_location-publisher_id_leak,geo_location-leak",
60 |         #"entity_id-ad_id,entity_id-document_idx,entity_id-campaign_id,entity_id-advertiser_id,entity_id-entity_idx,entity_id-source_idx,entity_id-publisher_idx,entity_id-category_idx,entity_id-topic_idx,entity_id-source_id_leak,entity_id-publisher_id_leak,entity_id-leak",
61 | 	#"source_id-ad_id,source_id-document_idx,source_id-campaign_id,source_id-advertiser_id,source_id-entity_idx,source_id-source_idx,source_id-publisher_idx,source_id-category_idx,source_id-topic_idx,source_id-source_id_leak,source_id-publisher_id_leak,source_id-leak",
62 |         #"publisher_id-ad_id,publisher_id-document_idx,publisher_id-campaign_id,publisher_id-advertiser_id,publisher_id-entity_idx,publisher_id-source_idx,publisher_id-publisher_idx,publisher_id-category_idx,publisher_id-topic_idx,publisher_id-source_id_leak,publisher_id-publisher_id_leak,publisher_id-leak",
63 |         #"category_id-ad_id,category_id-document_idx,category_id-campaign_id,category_id-advertiser_id,category_id-entity_idx,category_id-source_idx,category_id-publisher_idx,category_id-category_idx,category_id-topic_idx,category_id-source_id_leak,category_id-publisher_id_leak,category_id-leak",
64 |         #"topic_id-ad_id,topic_id-document_idx,topic_id-campaign_id,topic_id-advertiser_id,topic_id-entity_idx,topic_id-source_idx,topic_id-publisher_idx,topic_id-category_idx,topic_id-topic_idx,topic_id-source_id_leak,topic_id-publisher_id_leak,topic_id-leak",	
65 | 	#"day-ad_id,day-document_idx,day-campaign_id,day-advertiser_id,day-entity_idx,day-source_idx,day-publisher_idx,day-category_idx,day-topic_idx,day-source_id_leak,day-publisher_id_leak,day-leak",
66 | 	#"hour-ad_id,hour-document_idx,hour-campaign_id,hour-advertiser_id,hour-entity_idx,hour-source_idx,hour-publisher_idx,hour-category_idx,hour-topic_idx,hour-source_id_leak,hour-publisher_id_leak,hour-leak",
67 | 	#"weekday-ad_id,weekday-document_idx,weekday-campaign_id,weekday-advertiser_id,weekday-entity_idx,weekday-source_idx,weekday-publisher_idx,weekday-category_idx,weekday-topic_idx,weekday-source_id_leak,weekday-publisher_id_leak,weekday-leak",
68 | 	#"doc_after_click-ad_id,doc_after_click-document_idx,doc_after_click-campaign_id,doc_after_click-advertiser_id,doc_after_click-entity_idx,doc_after_click-source_idx,doc_after_click-publisher_idx,doc_after_click-category_idx,doc_after_click-topic_idx,doc_after_click-source_id_leak,doc_after_click-publisher_id_leak,doc_after_click-leak",
69 | 	]
70 | 
71 | write_list(names,cols)
72 | 
73 | 


--------------------------------------------------------------------------------
/write_sub_base_list.py:
--------------------------------------------------------------------------------
 1 | def write_list(names,columns):
 2 |     if True:
 3 | 	for c,j in enumerate(['train','test']):
 4 | 	    fo = open('nn/data/sub_%s.list.base'%(j),'w')
 5 | 	    fo.write('name,columns\n')
 6 | 	    for name,col in zip(names[c],columns):
 7 | 		fo.write('"%s","%s"\n'%(name,col))
 8 | 	    fo.close()
 9 | 
10 | names1 =[
11 | 	"../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_bag1.out" 
12 | 	#"../data/meta/cv_0.694441_cv.csv",  
13 | 	]
14 | names2 =[ 
15 | 	"../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_T12_bag1.out"
16 | 	#"../data/meta/cv_0.694441_sub.csv",
17 | 	]
18 | names = (names1,names2)
19 | cols = ['clicked']
20 | write_list(names,cols)
21 | 
22 | 


--------------------------------------------------------------------------------
/write_sub_list.py:
--------------------------------------------------------------------------------
 1 | def write_list(names,columns):
 2 |     if True:
 3 | 	for c,j in enumerate(['train','test']):
 4 | 	    fo = open('nn/data/sub_%s.list'%(j),'w')
 5 | 	    fo.write('name,columns\n')
 6 | 	    for name,col in zip(names[c],columns):
 7 | 		fo.write('"%s","%s"\n'%(name,col))
 8 | 	    fo.close()
 9 | 
10 | names1 = ["../../stack/data/cv_0.691775_lb_0.69167/train_meta.csv",
11 | 
12 | 	"../data/meta/cv_0.690598_cv.csv",  
13 | 	"../data/meta/cv_0.690716_cv.csv", 
14 | 	"../data/meta/cv_0.692248_cv.csv",
15 | 	"../data/meta/cv_0.692143_cv.csv",
16 | 	"../data/meta/mt_cv_0.681601_cv.csv",
17 | 	"../data/meta/ftrl_va_group.csv",
18 | 	"../data/meta/ffm2_valid_k16_eta0.050.csv",
19 | 	#"../data/meta/leak_meta_cv.csv",
20 | 	'../data/meta/ffm-train-dataWeight4-1__406-nextView_Wleak_R0.3_K8_bag1.out',
21 | 	'../data/meta/ffm-train-dataWeight4-1__407-1.5-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.4_K8_bag1.out',
22 | 	'../data/meta/va_xgb.csv',
23 | 	'../data/meta/cv_0.694441_cv.csv',
24 | 	"../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_bag1.out",	
25 | 	"../data/meta/full_try_1va.fm_2way.target",
26 | 	"../data/meta/full_try_2va.fm_2way.target",
27 | 	]
28 | names2 = ["../../stack/data/cv_0.691775_lb_0.69167/test_meta.csv",
29 | 
30 |         "../data/meta/cv_0.690598_sub.csv",  
31 |         "../data/meta/cv_0.690716_sub.csv",
32 | 	"../data/meta/cv_0.692248_sub.csv",
33 | 	"../data/meta/cv_0.692143_sub.csv",
34 |         "../data/meta/mt_cv_0.681601_sub.csv",
35 | 	"../data/meta/ftrl_test_group.csv",
36 | 	"../data/meta/ffm2_pred_k16_eta0.050.csv",
37 | 	#"../data/meta/leak_meta_sub.csv",	
38 | 	'../data/meta/ffm-train-dataWeight4-1__406-nextView_Wleak_R0.3_K8_T12_bag1.out',
39 |         '../data/meta/ffm-train-dataWeight4-1__407-1.5-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.4_K8_T23_bag1.out',
40 |         '../data/meta/test_xgb.csv',
41 |         '../data/meta/cv_0.694441_sub.csv',
42 |         "../data/meta/ffm-train-dataWeight4-1__406-nextView-nextViewMulti-nextViewDot-nextViewMultiDot_Wleak_R0.3_K8_T12_bag1.out", 
43 | 	"../data/meta/full_try_1test.fm_2way.target",
44 | 	"../data/meta/full_try_2test.fm_2way.target",
45 | 	]
46 | 
47 | names = (names1,names2)
48 | cols = ["sffm0,sffm1,sffm2,sffm3,sffm4,sffm5,sffm6,sffm7,sffm8,sffm9,ffm0",
49 | 	"clicked","clicked","clicked","clicked","clicked","clicked","clicked",
50 | 	#"source_id_leak,publisher_id_leak",
51 | 	"clicked","clicked","clicked","clicked","clicked",	
52 | 	"neighbor_ad_document_id,neighbor_ad_leak,neighbor_ad_doc_after_click,ad_id_document_id,ad_id_leak,ad_id_doc_after_click,document_idx_document_id,document_idx_leak,document_idx_doc_after_click",
53 | 	"ad_id_category_id,ad_id_entity_id,ad_id_source_id,ad_id_publisher_id,campaign_id_category_id,campaign_id_entity_id,campaign_id_source_id,campaign_id_publisher_id,advertiser_id_category_id,advertiser_id_entity_id,advertiser_id_source_id,advertiser_id_publisher_id",
54 | 	]
55 | 
56 | write_list(names,cols)
57 | 
58 | 


--------------------------------------------------------------------------------