├── python ├── figcrop_single.sh ├── bid_strategy.py ├── mcpc_bid.py ├── extract_click_mp.py ├── model.py ├── integrate_click.py ├── make_output.py ├── opt_bid.py ├── interval_landscape.py ├── config.py ├── rr_model.py ├── statistics.py ├── sqlr_model.py ├── bid_landscape.py ├── integrate_performance.py ├── eu_model.py ├── tool.py ├── test_lr.py ├── test_sqlr.py ├── test_lrlin.py ├── test_rr.py ├── test_eu.py ├── dataset.py ├── replay.py ├── test_em.py ├── lr_model.py └── em_model.py ├── scripts ├── pinyou_stage2.sh ├── run-sqlr.sh ├── run-lr.sh ├── run-eu.sh ├── replay.sh ├── run-rr.sh ├── pinyou_stage2_lrlin.sh ├── integrate_performance.sh ├── run-em.sh └── pinyou_stage1.sh └── README.md /python/figcrop_single.sh: -------------------------------------------------------------------------------- 1 | file=$1 2 | pdfcrop $file $file 3 | -------------------------------------------------------------------------------- /scripts/pinyou_stage2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | props="128 64 32 16 8 4 2 1" 3 | 4 | for prop in $props; do 5 | sh run-em.sh $prop > ../output/$prop.log & 6 | done -------------------------------------------------------------------------------- /scripts/run-sqlr.sh: -------------------------------------------------------------------------------- 1 | camps=$1 2 | steps="1 10 20 30 40 50 60" 3 | 4 | for camp in $camps; do 5 | for step in $steps; do 6 | echo $camp $step 7 | python ../python/test_sqlr.py $camp $step 8 | done 9 | done -------------------------------------------------------------------------------- /scripts/run-lr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "run" 3 | camps=$1 4 | steps="1E-4 1E-5 1E-3 5E-3 5E-5 5E-4 1E-2 5E-2 1E-1" 5 | for camp in $camps; do 6 | for step in $steps; do 7 | echo $camp 8 | python ../python/test_lr.py $camp $step 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /python/bid_strategy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | class BidStrategy: 4 | '''Bid strategy with CTR estimation module and bid function module.''' 5 | 6 | def __init__(self, parameters): 7 | pass 8 | 9 | def bid(self): 10 | pass 11 | 12 | def main(): 13 | print "main method." 14 | 15 | if __name__ == '__main__': 16 | main() -------------------------------------------------------------------------------- /scripts/run-eu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | camps=$1 3 | laplace="3" 4 | scale="10 20 30 40 60 80 90" 5 | ratio="0 0.1 0.5" 6 | 7 | for camp in $camps; do 8 | for lap in $laplace; do 9 | for sca in $scale; do 10 | for rat in $ratio; do 11 | echo $camp $lap $sca $rat 12 | python ../python/test_eu.py $camp $lap $sca $rat 13 | done 14 | done 15 | done 16 | done -------------------------------------------------------------------------------- /scripts/replay.sh: -------------------------------------------------------------------------------- 1 | project_folder="/home/rk/Code/optimal-ctr-bidding/" 2 | cd $project_folder/python 3 | budget_props="1 4 8 16 32" 4 | 5 | name=$1 6 | model=$2 7 | test_file=$3 8 | output_folder=$4 9 | 10 | echo $name 11 | echo $model 12 | echo $test_file 13 | echo $output_folder 14 | 15 | for prop in $budget_props; do 16 | echo $prop 17 | python replay.py -1 $prop $model $test_file $output_folder/$prop\_$name\_perf.csv 18 | done 19 | -------------------------------------------------------------------------------- /python/mcpc_bid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from bid_strategy import BidStrategy 3 | 4 | class McpcBid(BidStrategy): 5 | def __init__(self, camp_v): 6 | self.camp_v = camp_v 7 | 8 | def set_camp_value(self, v): 9 | self.camp_v = v 10 | 11 | def bid(self, ctr): 12 | bid_price = int(self.camp_v * ctr * 1E3) 13 | # print "bid price \t" + `bid_price` 14 | return bid_price 15 | 16 | def main(): 17 | print "main method." 18 | 19 | if __name__ == '__main__': 20 | main() -------------------------------------------------------------------------------- /scripts/run-rr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | camps=$1 3 | laplace="3 10" 4 | scale="0.005 0.01 0.02 0.03 0.04 0.06 0.08 0.1 0.2 0.5" #"0.005 0.01 0.012 0.016 0.032 0.05 0.064 0.128 0.256 0.512 1.0" 5 | ratio="0.1 0.01 0.5" #"0" 6 | 7 | for camp in $camps; do 8 | for lap in $laplace; do 9 | for sca in $scale; do 10 | for rat in $ratio; do 11 | echo $camp $lap $sca $rat 12 | python ../python/test_rr.py $camp $lap $sca $rat 13 | done 14 | done 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /python/extract_click_mp.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import math 4 | import operator 5 | import tool 6 | 7 | if len(sys.argv) < 3: 8 | print "python extract_click_mp.py test.yzx.txt camp_click.txt" 9 | exit(-1) 10 | 11 | test_file = open(sys.argv[1], 'r') 12 | out_file = open(sys.argv[2], 'w') 13 | 14 | for line in test_file: 15 | li = tool.ints(line.replace(':1','').split()) 16 | clk = li[0] 17 | mp = li[1] 18 | out_file.write(`clk` + "\t" + `mp` + "\n") 19 | 20 | test_file.close() 21 | out_file.close() -------------------------------------------------------------------------------- /python/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | class Model: 4 | def __init__(self, train_data, test_data): 5 | self.set_train_data(train_data) 6 | self.set_test_data(test_data) 7 | 8 | def set_train_data(self, train_data): 9 | self.train_data = train_data 10 | 11 | def set_test_data(self, test_data): 12 | self.test_data = test_data 13 | 14 | def train(self): 15 | pass 16 | 17 | def converged(self): 18 | pass 19 | return False 20 | 21 | def test(self): 22 | pass 23 | 24 | def calc_performance(self, dataset): 25 | pass 26 | 27 | def main(): 28 | print "main method." 29 | 30 | if __name__ == '__main__': 31 | main() -------------------------------------------------------------------------------- /python/integrate_click.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import math 4 | import operator 5 | import tool 6 | 7 | if len(sys.argv) < 3: 8 | print "python integrate_click.py bid_mp_file click_file output_file" 9 | exit(-1) 10 | 11 | bid_mp_file = open(sys.argv[1], 'r') 12 | click_file = open(sys.argv[2], 'r') 13 | out_file = open(sys.argv[3], 'w') 14 | 15 | bi_list = zip(bid_mp_file, click_file) 16 | 17 | for (line_a, line_b) in bi_list: 18 | li_a = tool.ints(line_a.split()) 19 | li_b = tool.ints(line_b.split()) 20 | clk = li_b[0] 21 | bi = li_a[0] 22 | mp = li_a[1] 23 | out_file.write(`bi` + "\t" + `mp` + "\t" + `clk` + "\n") 24 | 25 | bid_mp_file.close() 26 | click_file.close() 27 | out_file.close() -------------------------------------------------------------------------------- /scripts/pinyou_stage2_lrlin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python ../python/test_lrlin.py 1458 0.001 > ../output/lr_1458_0.001.log & 4 | python ../python/test_lrlin.py 2259 0.1 > ../output/lr_2259_0.1.log & 5 | python ../python/test_lrlin.py 2261 0.001 > ../output/lr_2261_0.001.log & 6 | python ../python/test_lrlin.py 2821 0.1 > ../output/lr_2821_0.1.log & 7 | python ../python/test_lrlin.py 2997 0.0005 > ../output/lr_2997_0.0005.log & 8 | python ../python/test_lrlin.py 3358 0.01 > ../output/lr_3358_0.01.log & 9 | python ../python/test_lrlin.py 3386 0.001 > ../output/lr_3386_0.001.log & 10 | python ../python/test_lrlin.py 3427 0.005 > ../output/lr_3427_0.005.log & 11 | python ../python/test_lrlin.py 3476 0.005 > ../output/lr_3476_0.005.log & 12 | -------------------------------------------------------------------------------- /python/make_output.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Usage: python yoyi_make_output.py lr/yoyi(/)" 7 | exit(-1) 8 | 9 | folder = sys.argv[1] 10 | files = os.listdir(folder) 11 | 12 | fo = open(folder + '/integration.txt', 'w') 13 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tscale\tds_ratio\tbudget_prop" 14 | fo.write(header + '\n') 15 | 16 | for f in files: 17 | if not f.endswith('.csv'): 18 | continue 19 | file_path = os.path.join(folder, f) 20 | fi = open(file_path) 21 | lines = fi.read().split('\n') 22 | if len(lines) < 2: 23 | continue 24 | fo.write(lines[1] + '\n') 25 | fi.close() 26 | 27 | fo.close() -------------------------------------------------------------------------------- /scripts/integrate_performance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python ../python/integrate_performance.py ../output/selected/ revenue 1458 3 | python ../python/integrate_performance.py ../output/selected/ revenue 2259 4 | python ../python/integrate_performance.py ../output/selected/ revenue 2261 5 | python ../python/integrate_performance.py ../output/selected/ revenue 2821 6 | python ../python/integrate_performance.py ../output/selected/ revenue 2997 7 | python ../python/integrate_performance.py ../output/selected/ revenue 3358 8 | python ../python/integrate_performance.py ../output/selected/ revenue 3386 9 | python ../python/integrate_performance.py ../output/selected/ revenue 3427 10 | python ../python/integrate_performance.py ../output/selected/ revenue 3476 -------------------------------------------------------------------------------- /python/opt_bid.py: -------------------------------------------------------------------------------- 1 | from bid_strategy import BidStrategy 2 | import config 3 | 4 | class OptBid(BidStrategy): 5 | def __init__(self, camp_v, mu): 6 | self.mu = mu 7 | self.phi = 1.0 / (1.0 + self.mu) 8 | self.camp_v = camp_v 9 | 10 | def set_camp_value(self, v): 11 | self.camp_v = v 12 | 13 | def set_mu(self, mu): 14 | self.mu = mu 15 | self.phi = 1.0 / (1.0 + self.mu) 16 | 17 | def calibrate(self, ctr): 18 | ctr_calib = ctr / (ctr + (1 - ctr) / config.ds_ratio) 19 | return ctr_calib 20 | 21 | def bid_calib(self, camp_v, mu, ctr): 22 | bid_price = int(camp_v * self.calibrate(ctr) / (1.0 + mu) * 1E3) 23 | return bid_price 24 | 25 | # b = 1.0 / (1.0 + mu) * ctr 26 | def bid(self, ctr): 27 | bid_price = int(1.0 / (1.0 + self.mu) * self.camp_v * ctr * 1E3) 28 | return bid_price -------------------------------------------------------------------------------- /python/interval_landscape.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from bid_landscape import BidLandscape 3 | 4 | class IntervalLandscape(BidLandscape): 5 | '''The interval style bid landscape.''' 6 | 7 | def __init__(self, dataset, campaign_id, laplace=1, interval=2): 8 | BidLandscape.__init__(self, dataset, campaign_id, laplace) 9 | self.interval = interval if not (interval > self.max_price) else (self.max_price + 1) 10 | 11 | #TODO test the interval mode 12 | def get_probability(self, price): 13 | if price > self.max_price: 14 | return self.get_probability(self.max_price) 15 | left_index = int(price / self.interval) * self.interval 16 | idx = left_index + self.interval 17 | right_index = len(self.distribution) if len(self.distribution) < idx else idx 18 | probability = 0.0 19 | for p in range(left_index, right_index): 20 | probability += self.distribution[p] 21 | return probability 22 | 23 | def main(): 24 | print "main method." 25 | 26 | if __name__ == '__main__': 27 | main() -------------------------------------------------------------------------------- /scripts/run-em.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | prop=$1 4 | #"64 32 16 8 4 2 1" 5 | 6 | python ../python/test_em.py 1458 rr 3 0.512 0 $prop 7 | python ../python/test_em.py 1458 eu 3 90 0.1 $prop 8 | 9 | python ../python/test_em.py 2259 eu 3 80 0.5 $prop 10 | python ../python/test_em.py 2259 rr 3 2E-1 0.5 $prop 11 | 12 | python ../python/test_em.py 2261 rr 10 0.01 0.5 $prop 13 | python ../python/test_em.py 2261 eu 3 20 0.5 $prop 14 | 15 | python ../python/test_em.py 2821 rr 3 0.128 0 $prop 16 | python ../python/test_em.py 2821 eu 3 40 0.5 $prop 17 | 18 | python ../python/test_em.py 2997 eu 3 80 0.5 $prop 19 | python ../python/test_em.py 2997 rr 10 0.256 0 $prop 20 | 21 | python ../python/test_em.py 3358 rr 10 0.016 0 $prop 22 | python ../python/test_em.py 3358 eu 3 90 0.1 $prop 23 | 24 | python ../python/test_em.py 3386 rr 10 0.04 0.5 $prop 25 | python ../python/test_em.py 3386 eu 3 80 0.1 $prop 26 | 27 | python ../python/test_em.py 3427 rr 3 0.5 0.5 $prop 28 | python ../python/test_em.py 3427 eu 3 90 0.1 $prop 29 | 30 | python ../python/test_em.py 3476 rr 3 0.032 0 $prop 31 | python ../python/test_em.py 3476 eu 3 40 0.1 $prop 32 | -------------------------------------------------------------------------------- /scripts/pinyou_stage1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /home/rk/Code/optimal-ctr-bidding/scripts/ 4 | pwd 5 | 6 | echo "run" 7 | sh run-lr.sh "1458 2261 2821" > ../output/log_parallel/lr_1458_2261_2821.log & 8 | sh run-rr.sh "1458 2261 2821" > ../output/log_parallel/rr_1458_2261_2821.log & 9 | sh run-eu.sh "1458 2261 2821" > ../output/log_parallel/eu_1458_2261_2821.log & 10 | sh run-lr.sh "2997 3358 3386" > ../output/log_parallel/lr_2997_3358_3386.log & 11 | sh run-rr.sh "2997 3358 3386" > ../output/log_parallel/rr_2997_3358_3386.log & 12 | sh run-eu.sh "2997 3358 3386" > ../output/log_parallel/eu_2997_3358_3386.log & 13 | sh run-lr.sh "3427 3476 2259" > ../output/log_parallel/lr_3427_3476_2259.log & 14 | sh run-rr.sh "3427 3476 2259" > ../output/log_parallel/rr_3427_3476_2259.log & 15 | sh run-eu.sh "3427 3476 2259" > ../output/log_parallel/eu_3427_3476_2259.log & 16 | #sqlr 17 | sh run-sqlr.sh "1458 2261 2821" > ../output/log_parallel/sqlr_1458_2261_2821.log & 18 | sh run-sqlr.sh "2997 3358 3386" > ../output/log_parallel/sqlr_2997_3358_3386.log & 19 | sh run-sqlr.sh "3427 3476 2259" > ../output/log_parallel/sqlr_3427_3476_2259.log & 20 | 21 | echo "done" -------------------------------------------------------------------------------- /python/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | INTVL = False 4 | 5 | #--- data folder ---# 6 | data_folder = "../../make-ipinyou-data/" 7 | train_postfix = "/train.yzx.txt" 8 | test_postfix = "/test.yzx.txt" 9 | 10 | output_folder = "../output/" 11 | 12 | campaign_list = [1458, 2259, 2261, 2821, 2997, 3358, 3386, 3427, 3476] 13 | campaign = 100000 14 | 15 | #--- training hyper parameter ---# 16 | model_list = ['rr', 'eu'] 17 | model_name = '' 18 | laplace = 3 19 | interval = 5 20 | budget_prop = 1 21 | 22 | lr_train_round = 20 23 | lr_alpha = 5E-3 24 | lr_lambda = 1E-4 25 | eu_lambda = 1E-2 26 | 27 | eu_train_round = 30 28 | eu_scale = 10 29 | ds_ratio = 0 30 | mu_range = np.arange(-0.99, 0.99, 0.01) 31 | #np.arange(-0.8, 0.1, 0.1).extend(np.arange(-0.1, 0.1, 0.01).extend(np.arange(0.1, 0.9, 0.1))) 32 | 33 | em_scale = 1E-3 34 | em_round = 30 35 | 36 | #--- debug parameter ---# 37 | math_err_num = 0 38 | 39 | 40 | #--- replay parameter ---# 41 | test_progress_unit = 0.1 42 | train_progress_unit = 0.25 43 | budget_props = [128, 64, 32, 16, 8, 4, 2, 1] 44 | 45 | #--- draw parameter ---# 46 | colors = {'lr':'cx--', 'rr':'or-', 'eu':'kp-', 'sqlr':'*b--'} 47 | -------------------------------------------------------------------------------- /python/rr_model.py: -------------------------------------------------------------------------------- 1 | from eu_model import EuModel 2 | from bid_landscape import BidLandscape 3 | from bid_strategy import BidStrategy 4 | from dataset import Dataset 5 | import math 6 | import random 7 | import tool 8 | import config 9 | 10 | class RrModel(EuModel): 11 | def __init__(self, train_data, test_data): 12 | EuModel.__init__(self, train_data, test_data) 13 | 14 | def train(self): 15 | random.seed(10) 16 | train_data = self.train_data 17 | progress = 0.0 18 | iter_id = train_data.init_index() 19 | while not train_data.reached_tail(iter_id): 20 | data = train_data.get_next_data(iter_id) 21 | y = data[0] 22 | feature = data[2:len(data)] 23 | ctr = tool.estimate_ctr(self.weight, feature, train_flag=True) 24 | phi = 1.0 / (1.0 + self.mu) 25 | bp = self.bid_strategy.bid(ctr) 26 | pz = self.train_data.landscape.get_probability(bp) 27 | # print `bp` + '\t' + `pz` 28 | scale_x = (phi * ctr - y) * phi * math.pow(self.camp_v, 2) * pz * config.eu_scale 29 | for idx in feature: 30 | self.weight[idx] = self.weight[idx] * self.reg_update_param - config.lr_alpha * scale_x 31 | # prg = train_data.get_progress(iter_id) 32 | # if prg < 0.9 and prg > (progress + config.train_progress_unit - 1E-3): 33 | # self.test() 34 | # progress += config.train_progress_unit 35 | -------------------------------------------------------------------------------- /python/statistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from dataset import Dataset 3 | import config 4 | import sys 5 | 6 | if len(sys.argv) > 1: 7 | config.campaign_list = [int(sys.argv[1])] 8 | 9 | fo = open("../output/statistics.csv", 'w') 10 | header = "camp_id\tdataset\tmax_price\tctr\tecpc\tecpm\tclk_sum\tcost_sum\tsize\n" 11 | 12 | fo.write(header) 13 | 14 | for camp_id in config.campaign_list: 15 | train_dataset = Dataset(config.data_folder+`camp_id`+config.train_postfix, camp_id) 16 | tr_stat = train_dataset.get_statistics() 17 | 18 | test_dataset = Dataset(config.data_folder+`camp_id`+config.test_postfix, camp_id) 19 | te_stat = test_dataset.get_statistics() 20 | 21 | tr_line = "%d\t%s\t%d\t%f\t%d\t%d\t%d\t%d\t%d\n" % (camp_id, "train", tr_stat['max_price'], tr_stat['ctr'], 22 | tr_stat['ecpc'], tr_stat['ecpm'], tr_stat['clk_sum'], 23 | tr_stat['cost_sum'], tr_stat['size']) 24 | te_line = "%d\t%s\t%d\t%f\t%d\t%d\t%d\t%d\t%d\n" % (camp_id, "test", te_stat['max_price'], te_stat['ctr'], 25 | te_stat['ecpc'], te_stat['ecpm'], te_stat['clk_sum'], 26 | te_stat['cost_sum'], te_stat['size']) 27 | 28 | fo.write(tr_line) 29 | fo.write(te_line) 30 | 31 | del train_dataset 32 | del test_dataset 33 | 34 | print "Deleted " + `camp_id` 35 | fo.close() 36 | 37 | print "done" -------------------------------------------------------------------------------- /python/sqlr_model.py: -------------------------------------------------------------------------------- 1 | from lr_model import LrModel 2 | from bid_landscape import BidLandscape 3 | from opt_bid import OptBid 4 | from dataset import Dataset 5 | import math 6 | import random 7 | import tool 8 | import config 9 | 10 | class SqlrModel(LrModel): 11 | def __init__(self, train_data, test_data): 12 | LrModel.__init__(self, train_data, test_data) 13 | 14 | def train(self): # train with one traversal of the full train_data 15 | random.seed(10) 16 | train_data = self.train_data 17 | # print "Train data \t" + `train_data` + "\tsize \t" + `train_data.get_size()` 18 | progress = 0.0 19 | iter_id = train_data.init_index() 20 | while not train_data.reached_tail(iter_id): 21 | data = train_data.get_next_data(iter_id) 22 | y = data[0] 23 | feature = data[2:len(data)] 24 | ctr = self.estimate_ctr(self.weight, feature, train_flag=True, ctr_avg=train_data.get_statistics()['ctr']) 25 | for idx in feature: # update 26 | self.weight[idx] = self.weight[idx] * self.reg_update_param - config.lr_alpha * (ctr - y) * ctr * (1-ctr) 27 | # prg = train_data.get_progress(iter_id) 28 | # if prg < 0.9 and prg > (progress + config.train_progress_unit - 1E-3): 29 | # self.test() 30 | # progress += config.train_progress_unit 31 | 32 | def estimate_ctr(self, weight, feature, train_flag = False, ctr_avg=0.125): 33 | value = 0.0 34 | for idx in feature: 35 | if idx in weight: 36 | value += weight[idx] 37 | elif train_flag: 38 | if idx == 0: 39 | weight[idx] = - math.log(1.0 / (ctr_avg) - 1.0) 40 | else: 41 | weight[idx] = tool.next_init_weight() 42 | ctr = tool.sigmoid(value) 43 | # print "Estimated CTR \t" + `ctr` 44 | return ctr -------------------------------------------------------------------------------- /python/bid_landscape.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | class BidLandscape: 4 | '''The landscape making and storage class.''' 5 | 6 | def __init__(self, dataset, camp_id, laplace=1): 7 | self.dataset = dataset 8 | self.dataset.init_landscape(self) 9 | self.camp_id = camp_id 10 | self.laplace = laplace if laplace>1 else 1 11 | self.init_distribution() 12 | self.make_distribution() 13 | print "Inited Bid Landscape." 14 | 15 | def get_campaign_id(self): 16 | return self.camp_id 17 | 18 | def init_distribution(self): 19 | self.max_price = self.dataset.get_max_price() 20 | self.distribution = [0.0*i for i in range(0, self.max_price+1)] 21 | 22 | def get_distribution(self): 23 | return self.distribution 24 | 25 | def make_distribution(self): # make the original distribution with laplace smoothing 26 | mp_dict = {} 27 | iter_id = self.dataset.init_index() 28 | while not self.dataset.reached_tail(iter_id): 29 | data = self.dataset.get_next_data(iter_id) 30 | mp = data[1] 31 | if mp in mp_dict: 32 | mp_dict[mp] = mp_dict[mp] + 1 33 | else: 34 | mp_dict[mp] = 1 35 | total_num = self.dataset.get_size() + (self.max_price + 1) * self.laplace 36 | for p in range(0, self.max_price+1): 37 | if p not in mp_dict: 38 | self.distribution[p] = 1.0 * self.laplace / total_num 39 | else: 40 | self.distribution[p] = 1.0 * (mp_dict[p] + self.laplace) / total_num 41 | print "Landscape made." 42 | 43 | def get_probability(self, price): # get the probability of the given price in the landscape 44 | price = int(price) 45 | probability = 0.0 46 | if price > self.max_price: 47 | probability = self.distribution[self.max_price] 48 | elif price < 0: 49 | probability = self.distribution[0] 50 | else: 51 | probability = self.distribution[price] 52 | return probability 53 | 54 | 55 | 56 | 57 | def main(): 58 | print "main method." 59 | 60 | if __name__ == '__main__': 61 | main() -------------------------------------------------------------------------------- /python/integrate_performance.py: -------------------------------------------------------------------------------- 1 | import tool 2 | import config 3 | import sys 4 | import os 5 | import matplotlib.pyplot as pl 6 | 7 | def draw(camp, metric, performances, folder): 8 | print camp 9 | pl.figure(figsize=(5, 5)) 10 | legend = [] 11 | min_y = 1E10 12 | max_y = 0 13 | for model in performances: 14 | perf = performances[model] 15 | legend.append(model.upper()) 16 | pl.plot(range(0, len(perf), 1), perf, config.colors[model]) 17 | mi = min(perf) 18 | ma = max(perf) 19 | min_y = mi if mi < min_y else min_y 20 | max_y = ma if ma > max_y else max_y 21 | pl.xlabel("Training Rounds") 22 | pl.ylabel(metric) 23 | min_y = min_y - abs(int(0.05*min_y)) 24 | max_y = max_y + abs(int(0.05*max_y)) 25 | pl.ylim([min_y, max_y]) 26 | pl.title('Learning curve in Camp. ' + camp) 27 | path = os.path.join(folder, camp+"_"+metric+".pdf") 28 | pl.grid(True) 29 | pl.legend(legend, loc = 'lower right') 30 | # pl.show() 31 | pl.savefig(path, dpi=300) 32 | pl.close() 33 | 34 | def read_values(file_path, metric): 35 | if metric == 'revenue': 36 | metric = 'Test' 37 | fi = open(file_path, 'r') 38 | lines = fi.read().split('\n') 39 | fi.close() 40 | count_flag = False 41 | metric_index = 1 # defaultly count on revenue 42 | perf_list = [] 43 | for line in lines: 44 | args = line.split('\t') 45 | if args[0] == 'Round': 46 | count_flag = True 47 | try: 48 | metric_index = args.index(metric) 49 | except ValueError: 50 | print "No such metric name." 51 | print ValueError 52 | exit(-1) 53 | continue 54 | if count_flag and not args[0] == '': 55 | perf_list.append(float(args[metric_index])) 56 | return perf_list 57 | 58 | def main(): 59 | if len(sys.argv) < 4: 60 | print "Usage: python draw_camp_perf.py ../output revenue 1458" 61 | exit(-1) 62 | 63 | folder = sys.argv[1] # '../output/' 64 | metric = sys.argv[2] # 'revenue' 65 | camp = sys.argv[3] # '1458' 66 | print camp 67 | files = os.listdir(folder) 68 | 69 | performances = {} 70 | 71 | for f in files: 72 | if not f.endswith('.csv'): 73 | continue 74 | params = f.split('_') 75 | camp_id = params[0] 76 | if not camp_id == camp: 77 | continue 78 | model = params[1] 79 | perf_list = read_values(os.path.join(folder, f), metric) 80 | performances[model] = perf_list 81 | 82 | print performances 83 | # folder = "./" 84 | # camp = "1458" 85 | # metric = "revenue" 86 | # performances = {'eu': [1,3,5,7,9], 87 | # 'rr': [2,2,6,8,19,13], 88 | # 'lr': [1,2,5,5], 89 | # 'sqlr': [0,3,4,5,8]} 90 | # print performances 91 | # draw(camp, metric, performances, folder) 92 | 93 | if __name__ == '__main__': 94 | main() -------------------------------------------------------------------------------- /python/eu_model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Dec 14, 2015 3 | 4 | @author: rk 5 | ''' 6 | from lr_model import LrModel 7 | from bid_landscape import BidLandscape 8 | from opt_bid import OptBid 9 | from dataset import Dataset 10 | import math 11 | import random 12 | import tool 13 | import config 14 | 15 | class EuModel(LrModel): 16 | def __init__(self, train_data, test_data): 17 | LrModel.__init__(self, train_data, test_data) 18 | 19 | def init_parameters(self): 20 | self.camp_v = self.train_data.get_statistics()['ecpc'] 21 | if config.ds_ratio > 0: 22 | self.ori_camp_v = self.train_data.get_statistics()['ori_ecpc'] 23 | self.budget = int(self.test_data.get_statistics()['cost_sum'] / config.budget_prop) 24 | self.mu = 0.0 25 | 26 | def init_bid_strategy(self): 27 | self.bid_strategy = OptBid(self.camp_v, self.mu) 28 | 29 | def train(self): 30 | random.seed(10) 31 | train_data = self.train_data 32 | progress = 0.0 33 | iter_id = train_data.init_index() 34 | while not train_data.reached_tail(iter_id): 35 | data = train_data.get_next_data(iter_id) 36 | y = data[0] 37 | feature = data[2:len(data)] 38 | ctr = self.estimate_ctr(self.weight, feature, train_flag=True, ctr_avg=train_data.get_statistics()['ctr']) 39 | # tool.estimate_ctr(self.weight, feature, train_flag=True) 40 | phi = 1.0 / (1.0 + self.mu) 41 | bp = self.bid_strategy.bid(ctr) 42 | pz = self.train_data.landscape.get_probability(bp) 43 | # print `bp` + '\t' + `pz` 44 | scale_x = (phi * ctr - y) * phi * math.pow(self.camp_v, 2) * pz * ctr * (1 - ctr) * config.eu_scale 45 | for idx in feature: 46 | self.weight[idx] = self.weight[idx] * self.reg_update_param - config.lr_alpha * scale_x 47 | # prg = train_data.get_progress(iter_id) 48 | # if prg < 0.9 and prg > (progress + config.train_progress_unit - 1E-3): 49 | # self.test() 50 | # progress += config.train_progress_unit 51 | 52 | def estimate_ctr(self, weight, feature, train_flag = False, ctr_avg=0.125): 53 | value = 0.0 54 | for idx in feature: 55 | if idx in weight: 56 | value += weight[idx] 57 | elif train_flag: 58 | if idx == 0: 59 | weight[idx] = - math.log(1.0 / (ctr_avg) - 1.0) 60 | else: 61 | weight[idx] = tool.next_init_weight() 62 | ctr = tool.sigmoid(value) 63 | # print "Estimated CTR \t" + `ctr` 64 | return ctr -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optimal CTR bidding 2 | An experimental framework to support experiments in CIKM 2016 paper "User Response Learning for Directly Optimizing Campaign Performance in Display Advertising". [PDF](http://apex.sjtu.edu.cn/public/files/papers/20160817/opt-ctr-bid.pdf) 3 | 4 | If you have any problem, please [send an E-mail](mailto:kren@apex.sjtu.edu.cn) to [Kan Ren](http://apex.sjtu.edu.cn/members/kren). 5 | 6 | ## Datasets 7 | * `iPinYou` has been decribed in [this page](https://github.com/wnzhang/make-ipinyou-data). 8 | * `YOYI` is the newly published dataset in our CIKM paper. The detail of this dataset is [here](http://apex.sjtu.edu.cn/datasets/7). 9 | 10 | ## Format of data 11 | We use `yzx` data structure to formalize bidding logs. 12 | Each record contains 13 | * `y`: true label of user response (1 for positive and 0 otherwise). 14 | * `z`: the market price of this sample. 15 | * `x`: pre-processed features of the bid request. 16 | 17 | Other details of `yzx` data can be found in [this benchmarking paper](http://arxiv.org/abs/1407.7073) 18 | 19 | ## Prepare the dataset 20 | * Clone and prepare `iPinYou` dataset as described [here](https://github.com/wnzhang/make-ipinyou-data). Note that, please put `make-ipinyou-data` folder in the same parent folder as `optimal-ctr-bidding` project. 21 | ``` 22 | |-- code-folder 23 | ----|-- make-ipinyou-data 24 | --------|-- yoyi-data 25 | --------|-- 1458 26 | --------|-- 2259 27 | --------... 28 | ----|-- optimal-ctr-bidding 29 | --------|-- python 30 | --------|-- scripts 31 | --------|-- README.md 32 | ``` 33 | * (optional) Download `YOYI` dataset and put the folder in `make-ipinyou-data`. 34 | 35 | ## Run the code 36 | * Go to `script` folder and execute `run_MODEL` scripts, where `MODEL` is a placeholder of model names including "lr", "sqlr", "eu" and "rr". Details of the models can be found in our paper. 37 | * Example: ```sh run-lr.sh "1458 2261 2821"``` 38 | 39 | ## Citation 40 | ``` 41 | @inproceedings{ren2016user, 42 | title={User response learning for directly optimizing campaign performance in display advertising}, 43 | author={Ren, Kan and Zhang, Weinan and Rong, Yifei and Zhang, Haifeng and Yu, Yong and Wang, Jun}, 44 | booktitle={Proceedings of the 25th ACM International on Conference on Information and Knowledge Management}, 45 | pages={679--688}, 46 | year={2016}, 47 | organization={ACM} 48 | } 49 | ``` 50 | 51 | ``` 52 | @article{ren2018bidding, 53 | title={Bidding Machine: Learning to Bid for Directly Optimizing Profits in Display Advertising}, 54 | author={Ren, Kan and Zhang, Weinan and Chang, Ke and Rong, Yifei and Yu, Yong and Wang, Jun}, 55 | journal={IEEE Transactions on Knowledge and Data Engineering}, 56 | volume={30}, 57 | number={4}, 58 | pages={645--659}, 59 | year={2018}, 60 | publisher={IEEE} 61 | } 62 | ``` 63 | -------------------------------------------------------------------------------- /python/tool.py: -------------------------------------------------------------------------------- 1 | #--- tool package ---# 2 | import os 3 | import random 4 | import math 5 | import config 6 | 7 | init_weight = 0.05 8 | # random.seed(10) 9 | 10 | def next_init_weight(): 11 | return (random.random() - 0.5) * init_weight 12 | 13 | # convert string list to integer array [yzx] 14 | def ints(data): 15 | int_array = [] 16 | for d in data: 17 | int_array.append(int(d)) 18 | return int_array 19 | 20 | # convert to string list 21 | def strings(data): 22 | str_array = [] 23 | for d in data: 24 | str_array.append(`d`) 25 | return str_array 26 | 27 | # sigmoid function 28 | def sigmoid(z): 29 | value = 0.5 30 | try: 31 | value = 1.0 / (1.0 + math.exp(-z)) 32 | except: 33 | # print "Math Out of Range. " + `z` 34 | value = 1E-9 35 | return value 36 | 37 | def estimate_ctr(weight, feature, train_flag = False): 38 | value = 0.0 39 | for idx in feature: 40 | if idx in weight: 41 | value += weight[idx] 42 | elif train_flag: 43 | weight[idx] = next_init_weight() 44 | ctr = sigmoid(value) 45 | # print "Estimated CTR \t" + `ctr` 46 | return ctr 47 | 48 | def calibrate_ctr(pctr, ds_ratio): 49 | cal_pctr = pctr / (pctr + (1 - pctr) / ds_ratio) 50 | return cal_pctr 51 | 52 | def gen_performance_line(log): 53 | performance = log['performance'] 54 | line = `performance['revenue']` + "\t" \ 55 | + `performance['roi']` + "\t" \ 56 | + `performance['ctr']` + "\t" \ 57 | + `performance['cpc']` + "\t" \ 58 | + `performance['auc']` + "\t" \ 59 | + `performance['rmse']` + "\t" \ 60 | + `performance['cpm']` + "\t" \ 61 | + `performance['bids']` + "\t" \ 62 | + `performance['imps']` + "\t" \ 63 | + `performance['clks']` 64 | return line 65 | 66 | def judge_stop(logs): 67 | stop = False 68 | # step = int(1/config.train_progress_unit) 69 | step = 1 70 | curr_loop = len(logs) - 1 # the latest record id 71 | if curr_loop >= 2*step: 72 | current_r = logs[curr_loop]['performance']['revenue'] 73 | last_r = logs[curr_loop - step]['performance']['revenue'] 74 | last_2_r = logs[curr_loop - 2*step]['performance']['revenue'] 75 | # print "Curr:last:last_2 = " + `current_r` + ":" + `last_r` + ":" + `last_2_r` 76 | if current_r < last_r and last_r < last_2_r: 77 | stop = True 78 | return stop 79 | 80 | def extend_judge_stop(logs): 81 | stop = False 82 | if len(logs) < 10: 83 | stop = False 84 | else: 85 | stop = judge_stop(logs) 86 | return stop 87 | 88 | def get_last_log(logs): 89 | return logs[len(logs)-1] 90 | 91 | #--- no use below ---# 92 | 93 | # load data from file as [[yzx]] 94 | def load_data(file_path): 95 | dataset = [] 96 | if not os.path.isfile(file_path): 97 | print "ERROR: file not exist. " + file_path 98 | else: 99 | fi = open(file_path, 'r') 100 | for line in fi: 101 | li = ints(line.replace(':1','').split()) 102 | dataset.append(li) 103 | fi.close() 104 | return dataset 105 | -------------------------------------------------------------------------------- /python/test_lr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import config 3 | import tool 4 | from dataset import Dataset 5 | from bid_landscape import BidLandscape 6 | from interval_landscape import IntervalLandscape 7 | from lr_model import LrModel 8 | import sys 9 | 10 | 11 | def main(): 12 | if len(sys.argv) < 3: 13 | print "Usage python test_lr.py campaign_id learn_rate (budget_prop)" 14 | exit(-1) 15 | data_folder = "../../make-ipinyou-data/" 16 | config.campaign_id = int(sys.argv[1]) 17 | # print config.campaign 18 | # print config.campaign_id 19 | # exit(-1) 20 | config.lr_alpha = float(sys.argv[2]) 21 | if len(sys.argv) == 4: 22 | config.budget_prop = int(sys.argv[3]) 23 | train_path = data_folder + `config.campaign_id` + "/train.yzx.txt" 24 | test_path = data_folder + `config.campaign_id` + "/test.yzx.txt" 25 | 26 | train_data = Dataset(train_path, config.campaign_id) 27 | train_data.shuffle() # make train data shuffled 28 | test_data = Dataset(test_path, config.campaign_id) 29 | print "Load done." 30 | 31 | lr_model = LrModel(train_data, test_data) 32 | print "campaign v = " + `lr_model.camp_v` 33 | print "learn_rate = " + `config.lr_alpha` 34 | print "budget = " + `lr_model.budget` 35 | 36 | if config.ds_ratio > 0: 37 | print "Need calibration." 38 | else: 39 | print "No calibration." 40 | 41 | print "Begin training ..." 42 | for i in range(0, config.lr_train_round): 43 | lr_model.train() 44 | lr_model.test() 45 | print "Round " + `i+1` + "\t" + `tool.get_last_log(lr_model.test_log)['performance']` 46 | if tool.judge_stop(lr_model.test_log): 47 | break; 48 | print "Train done." 49 | 50 | 51 | log_file = `config.campaign_id` + "_lr_" + `config.lr_alpha` + "_" + `config.budget_prop` + ".csv" 52 | fo = open("../output/"+log_file, 'w') 53 | 54 | print "Begin log ..." 55 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tlearn_rate\tnds_ratio\tbudget_prop" 56 | best_test_log = lr_model.get_best_test_log() 57 | best_test_line = `config.campaign_id` + "\t" + "LR\ttest\t" \ 58 | + tool.gen_performance_line(best_test_log) + "\t" \ 59 | + 'None' + "\t" + "None" + "\t" + `config.lr_alpha` + "\t" \ 60 | + "None" + "\t" + `config.budget_prop` 61 | fo.write(header+"\n") 62 | fo.write(best_test_line+"\n") 63 | 64 | fo.write("\n") 65 | 66 | fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n") 67 | for i in range(0, len(lr_model.test_log)): 68 | test_log = lr_model.test_log[i] 69 | line = `i+1` + "\t" + `test_log['performance']['revenue']` \ 70 | + "\t" + `test_log['performance']['ctr']` \ 71 | + "\t" + `test_log['performance']['cpc']` \ 72 | + "\t" + `test_log['performance']['auc']` \ 73 | + "\t" + `test_log['performance']['rmse']` \ 74 | + "\t" + `test_log['performance']['cpm']` \ 75 | + "\t" + `test_log['performance']['clks']` \ 76 | + "\t" + `test_log['performance']['imps']` \ 77 | + "\t" + `test_log['performance']['bids']` 78 | fo.write(line + "\n") 79 | fo.close() 80 | print "Log done." 81 | 82 | weight_path = `config.campaign_id` + "_" + "lr_best_weight" \ 83 | + "_" + `config.lr_alpha` + "_" + `config.budget_prop` \ 84 | + ".weight" 85 | lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 86 | 87 | if __name__ == '__main__': 88 | main() -------------------------------------------------------------------------------- /python/test_sqlr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import config 3 | import tool 4 | from dataset import Dataset 5 | from bid_landscape import BidLandscape 6 | from interval_landscape import IntervalLandscape 7 | from sqlr_model import SqlrModel 8 | import sys 9 | 10 | 11 | def main(): 12 | if len(sys.argv) < 3: 13 | print "Usage python test_sqlr.py campaign_id learn_rate (budget_prop)" 14 | exit(-1) 15 | data_folder = "../../make-ipinyou-data/" 16 | config.campaign_id = int(sys.argv[1]) 17 | config.lr_alpha = float(sys.argv[2]) 18 | if len(sys.argv) == 4: 19 | config.budget_prop = int(sys.argv[3]) 20 | train_path = data_folder + `config.campaign_id` + "/train.yzx.txt" 21 | test_path = data_folder + `config.campaign_id` + "/test.yzx.txt" 22 | print "Camp_id\tlearn_alpha" 23 | print `config.campaign_id` + "\t" + `config.lr_alpha` 24 | 25 | train_data = Dataset(train_path, config.campaign_id) 26 | train_data.shuffle() 27 | test_data = Dataset(test_path, config.campaign_id) 28 | print "Load done." 29 | 30 | lr_model = SqlrModel(train_data, test_data) 31 | print "campaign v = " + `lr_model.camp_v` 32 | print "budget = " + `lr_model.budget` 33 | 34 | log_file = `config.campaign_id` + "_sqlr_" + `config.lr_alpha` + "_" + `config.budget_prop` + ".csv" 35 | fo = open("../output/"+log_file, 'w') 36 | 37 | print "Begin training ..." 38 | for i in range(0, config.lr_train_round): 39 | lr_model.train() 40 | lr_model.test() 41 | print "Round " + `i+1` + "\t" + `tool.get_last_log(lr_model.test_log)['performance']` 42 | if tool.judge_stop(lr_model.test_log): 43 | break; 44 | print "Train done." 45 | 46 | print "Begin log ..." 47 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tlearn_rate\tnds_ratio" 48 | best_test_log = lr_model.get_best_test_log() 49 | best_test_line = `config.campaign_id` + "\t" + "SQ\ttest\t" \ 50 | + tool.gen_performance_line(best_test_log) + "\t" \ 51 | + `config.laplace` + "\t" + "None" + "\t" \ 52 | + `config.lr_alpha` + "\t" + "None" 53 | fo.write(header+"\n") 54 | fo.write(best_test_line+"\n") 55 | 56 | fo.write("\n") 57 | 58 | fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n") 59 | for i in range(0, len(lr_model.test_log)): 60 | test_log = lr_model.test_log[i] 61 | line = `i+1` + "\t" + `test_log['performance']['revenue']` \ 62 | + "\t" + `test_log['performance']['ctr']` \ 63 | + "\t" + `test_log['performance']['cpc']` \ 64 | + "\t" + `test_log['performance']['auc']` \ 65 | + "\t" + `test_log['performance']['rmse']` \ 66 | + "\t" + `test_log['performance']['cpm']` \ 67 | + "\t" + `test_log['performance']['clks']` \ 68 | + "\t" + `test_log['performance']['imps']` \ 69 | + "\t" + `test_log['performance']['bids']` 70 | fo.write(line + "\n") 71 | fo.close() 72 | print "Log done." 73 | 74 | # weight_path = `config.campaign_id` + "_sqlr_best_weight_" + `config.lr_alpha` + "_" + `config.budget_prop` + ".txt" 75 | # lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 76 | 77 | weight_path = `config.campaign_id` + "_" + "sqlr_best_weight" \ 78 | + "_" + `config.laplace` \ 79 | + "_" + `config.eu_scale` \ 80 | + "_" + `config.ds_ratio` \ 81 | + ".weight" 82 | lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /python/test_lrlin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import config 3 | import tool 4 | from dataset import Dataset 5 | from bid_landscape import BidLandscape 6 | from interval_landscape import IntervalLandscape 7 | from lr_model import LrModel 8 | import sys 9 | 10 | 11 | def main(): 12 | if len(sys.argv) < 3: 13 | print "Usage python test_lr.py campaign_id learn_rate (budget_prop)" 14 | exit(-1) 15 | data_folder = "../../make-ipinyou-data/" 16 | config.campaign_id = int(sys.argv[1]) 17 | # print config.campaign 18 | # print config.campaign_id 19 | # exit(-1) 20 | config.lr_alpha = float(sys.argv[2]) 21 | if len(sys.argv) == 4: 22 | config.budget_prop = int(sys.argv[3]) 23 | train_path = data_folder + `config.campaign_id` + "/train.yzx.txt" 24 | test_path = data_folder + `config.campaign_id` + "/test.yzx.txt" 25 | 26 | train_data = Dataset(train_path, config.campaign_id) 27 | train_data.shuffle() # make train data shuffled 28 | test_data = Dataset(test_path, config.campaign_id) 29 | print "Load done." 30 | 31 | lr_model = LrModel(train_data, test_data) 32 | print "campaign v = " + `lr_model.camp_v` 33 | print "learn_rate = " + `config.lr_alpha` 34 | print "budget = " + `lr_model.budget` 35 | 36 | if config.ds_ratio > 0: 37 | print "Need calibration." 38 | else: 39 | print "No calibration." 40 | 41 | print "Begin training ..." 42 | for i in range(0, config.lr_train_round): 43 | lr_model.train() 44 | lr_model.test() 45 | print "Round " + `i+1` + "\t" + `tool.get_last_log(lr_model.test_log)['performance']` 46 | if tool.judge_stop(lr_model.test_log): 47 | break; 48 | print "Train done." 49 | 50 | log_file = `config.campaign_id` + "_lrlin_" + `config.lr_alpha` + "_" + `config.budget_prop` + ".csv" 51 | fo = open("../output/"+log_file, 'w') 52 | 53 | print "Begin log ..." 54 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tlearn_rate\tnds_ratio\tbudget_prop" 55 | best_test_log = lr_model.get_best_test_log() 56 | best_test_line = `config.campaign_id` + "\t" + "LR\ttest\t" \ 57 | + tool.gen_performance_line(best_test_log) + "\t" \ 58 | + 'None' + "\t" + "None" + "\t" + `config.lr_alpha` + "\t" \ 59 | + "None" + "\t" + `config.budget_prop` 60 | fo.write(header+"\n") 61 | fo.write(best_test_line+"\n") 62 | 63 | # search for best linear parameter 64 | opt_param = lr_model.lin_bid(best_test_log['weight']) 65 | fo.write("prop\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\timps\tclks\tlin_param\n") 66 | for prop in config.budget_props: 67 | performance = lr_model.replay(best_test_log['weight'], lr_model.test_data, prop) 68 | fo.write(`prop`); fo.write("\t") 69 | fo.write(`performance['revenue']`); fo.write("\t") 70 | fo.write(`performance['roi']`); fo.write("\t") 71 | fo.write(`performance['ctr']`); fo.write("\t") 72 | fo.write(`performance['cpc']`); fo.write("\t") 73 | fo.write(`performance['auc']`); fo.write("\t") 74 | fo.write(`performance['rmse']`); fo.write("\t") 75 | fo.write(`performance['cpm']`); fo.write("\t") 76 | fo.write(`performance['imps']`); fo.write("\t") 77 | fo.write(`performance['clks']`); fo.write("\t") 78 | fo.write(`opt_param`) 79 | fo.write("\n") 80 | 81 | 82 | fo.write("\n") 83 | 84 | fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n") 85 | for i in range(0, len(lr_model.test_log)): 86 | test_log = lr_model.test_log[i] 87 | line = `i+1` + "\t" + `test_log['performance']['revenue']` \ 88 | + "\t" + `test_log['performance']['ctr']` \ 89 | + "\t" + `test_log['performance']['cpc']` \ 90 | + "\t" + `test_log['performance']['auc']` \ 91 | + "\t" + `test_log['performance']['rmse']` \ 92 | + "\t" + `test_log['performance']['cpm']` \ 93 | + "\t" + `test_log['performance']['clks']` \ 94 | + "\t" + `test_log['performance']['imps']` \ 95 | + "\t" + `test_log['performance']['bids']` 96 | fo.write(line + "\n") 97 | fo.close() 98 | print "Log done." 99 | 100 | weight_path = `config.campaign_id` + "_" + "lrlin_best_weight" \ 101 | + "_" + `config.lr_alpha` + "_" + `config.budget_prop` \ 102 | + ".weight" 103 | lr_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 104 | 105 | if __name__ == '__main__': 106 | main() -------------------------------------------------------------------------------- /python/test_rr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from dataset import Dataset 3 | from bid_landscape import BidLandscape 4 | from rr_model import RrModel 5 | import sys 6 | import config 7 | import tool 8 | 9 | def main(): 10 | if len(sys.argv) < 5: 11 | print "Usage: python test_eu.py campaign_id laplace eu_scale ds_ratio" 12 | exit(-1) 13 | 14 | config.campaign_id = int(sys.argv[1]) if int(sys.argv[1]) in config.campaign_list else config.campaign_id 15 | config.laplace = int(sys.argv[2]) if int(sys.argv[2])>0 else config.laplace 16 | config.eu_scale = float(sys.argv[3]) if float(sys.argv[3])>0 else config.eu_scale 17 | config.ds_ratio = float(sys.argv[4]) if float(sys.argv[4])>0 else 0 18 | print "camp_id\tlaplace\tscale\tds_ratio" 19 | print `config.campaign_id` + "\t" + `config.laplace` + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio` 20 | 21 | train_path = config.data_folder + `config.campaign_id` + "/train.yzx.txt" 22 | test_path = config.data_folder + `config.campaign_id` + "/test.yzx.txt" 23 | train_data = Dataset(train_path, config.campaign_id) 24 | train_data.shuffle() # make train data shuffled 25 | test_data = Dataset(test_path, config.campaign_id) 26 | if config.INTVL: 27 | IntervalLandscape(train_data, train_data.get_camp_id(), config.laplace, 3) 28 | IntervalLandscape(test_data, test_data.get_camp_id(), config.laplace, 3) 29 | else: 30 | BidLandscape(train_data, train_data.get_camp_id(), config.laplace) 31 | BidLandscape(test_data, test_data.get_camp_id(), config.laplace) 32 | print "Load done." 33 | 34 | # downsampling 35 | train_data_ds = train_data.down_sampling(config.ds_ratio) if config.ds_ratio>0 else train_data 36 | print train_data_ds.get_statistics() 37 | print "Down sampled." 38 | 39 | rr_model = RrModel(train_data_ds, test_data) 40 | print "campaign v = " + `rr_model.camp_v` 41 | 42 | # train 43 | print "Begin training ..." 44 | for i in range(0, config.eu_train_round): 45 | rr_model.train() 46 | rr_model.test() 47 | print "Round " + `i+1` + "\t" + `tool.get_last_log(rr_model.test_log)['performance']` 48 | if tool.judge_stop(rr_model.test_log): 49 | break; 50 | print "Train done." 51 | 52 | # rr_2997_3_0.1_0.05.csv 53 | log_file = `config.campaign_id` + "_rr" \ 54 | + "_" + `config.laplace` \ 55 | + "_" + `config.eu_scale` \ 56 | + "_" + `config.ds_ratio` \ 57 | + ".csv" 58 | fo = open("../output/"+log_file, 'w') 59 | 60 | print "Being log ..." 61 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tscale\tds_ratio" 62 | best_test_log = rr_model.get_best_test_log() 63 | best_test_line = `config.campaign_id` + "\t" + "RR\ttest\t" \ 64 | + tool.gen_performance_line(best_test_log) + "\t" \ 65 | + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" + (`config.ds_ratio` if config.ds_ratio>0 else "None") 66 | fo.write(header+"\n") 67 | fo.write(best_test_line+"\n") 68 | 69 | fo.write("\n") 70 | 71 | fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n") 72 | for i in range(0, len(rr_model.test_log)): 73 | test_log = rr_model.test_log[i] 74 | line = `i+1` + "\t" + `test_log['performance']['revenue']` \ 75 | + "\t" + `test_log['performance']['ctr']` \ 76 | + "\t" + `test_log['performance']['cpc']` \ 77 | + "\t" + `test_log['performance']['auc']` \ 78 | + "\t" + `test_log['performance']['rmse']` \ 79 | + "\t" + `test_log['performance']['cpm']` \ 80 | + "\t" + `test_log['performance']['clks']` \ 81 | + "\t" + `test_log['performance']['imps']` \ 82 | + "\t" + `test_log['performance']['bids']` 83 | fo.write(line + "\n") 84 | fo.close() 85 | print "Log done." 86 | 87 | weight_path = `config.campaign_id` + "_" + "rr_best_weight" \ 88 | + "_" + `config.laplace` \ 89 | + "_" + `config.eu_scale` \ 90 | + "_" + `config.ds_ratio` \ 91 | + ".weight" 92 | rr_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 93 | 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /python/test_eu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from dataset import Dataset 3 | from bid_landscape import BidLandscape 4 | from eu_model import EuModel 5 | import sys 6 | import config 7 | import tool 8 | 9 | def main(): 10 | if len(sys.argv) < 5: 11 | print "Usage: python test_eu.py campaign_id laplace eu_scale ds_ratio" 12 | exit(-1) 13 | 14 | config.campaign_id = int(sys.argv[1]) if int(sys.argv[1]) in config.campaign_list else config.campaign_id 15 | config.laplace = int(sys.argv[2]) if int(sys.argv[2])>0 else config.laplace 16 | config.eu_scale = float(sys.argv[3]) if float(sys.argv[3])>0 else config.eu_scale 17 | config.ds_ratio = float(sys.argv[4]) if float(sys.argv[4])>0 else 0 18 | print "camp_id\tlaplace\tscale\tds_ratio" 19 | print `config.campaign_id` + "\t" + `config.laplace` + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio` 20 | 21 | train_path = config.data_folder + `config.campaign_id` + "/train.yzx.txt" 22 | test_path = config.data_folder + `config.campaign_id` + "/test.yzx.txt" 23 | train_data = Dataset(train_path, config.campaign_id) 24 | train_data.shuffle() # make train data shuffled 25 | test_data = Dataset(test_path, config.campaign_id) 26 | if config.INTVL: 27 | IntervalLandscape(train_data, train_data.get_camp_id(), config.laplace, 3) 28 | IntervalLandscape(test_data, test_data.get_camp_id(), config.laplace, 3) 29 | else: 30 | BidLandscape(train_data, train_data.get_camp_id(), config.laplace) 31 | BidLandscape(test_data, test_data.get_camp_id(), config.laplace) 32 | print "Load done." 33 | 34 | # downsampling 35 | if config.ds_ratio>0: 36 | train_data_ds = train_data.down_sampling(config.ds_ratio) 37 | else: 38 | train_data_ds = train_data 39 | print "Down sampled." 40 | print train_data_ds.get_statistics() 41 | 42 | eu_model = EuModel(train_data_ds, test_data) 43 | print "campaign v = " + `eu_model.camp_v` 44 | 45 | # train 46 | print "Begin training ..." 47 | for i in range(0, config.eu_train_round): 48 | eu_model.train() 49 | eu_model.test() 50 | print "Round " + `i+1` + "\t" + `tool.get_last_log(eu_model.test_log)['performance']` 51 | if tool.judge_stop(eu_model.test_log): 52 | break; 53 | print "Train done." 54 | 55 | # eu_2997_3_0.1_0.05.csv 56 | log_file = `config.campaign_id` + "_eu" \ 57 | + "_" + `config.laplace` \ 58 | + "_" + `config.eu_scale` \ 59 | + "_" + `config.ds_ratio` \ 60 | + ".csv" 61 | fo = open("../output/"+log_file, 'w') 62 | 63 | print "Begin log ..." 64 | header = "camp_id\tmodel\tdataset\trevenue\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\teu_scale\tnds_ratio" 65 | best_test_log = eu_model.get_best_test_log() 66 | best_test_line = `config.campaign_id` + "\t" + "EU\ttest\t" \ 67 | + tool.gen_performance_line(best_test_log) + "\t" \ 68 | + `config.laplace` + "\t" + "None" + "\t" + `config.eu_scale` + "\t" + `config.ds_ratio` 69 | fo.write(header+"\n") 70 | fo.write(best_test_line+"\n") 71 | 72 | fo.write("\n") 73 | 74 | fo.write("Round\tTest\tctr\tcpc\tauc\trmse\tcpm\tclks\timps\tbids\n") 75 | for i in range(0, len(eu_model.test_log)): 76 | test_log = eu_model.test_log[i] 77 | line = `i+1` + "\t" + `test_log['performance']['revenue']` \ 78 | + "\t" + `test_log['performance']['ctr']` \ 79 | + "\t" + `test_log['performance']['cpc']` \ 80 | + "\t" + `test_log['performance']['auc']` \ 81 | + "\t" + `test_log['performance']['rmse']` \ 82 | + "\t" + `test_log['performance']['cpm']` \ 83 | + "\t" + `test_log['performance']['clks']` \ 84 | + "\t" + `test_log['performance']['imps']` \ 85 | + "\t" + `test_log['performance']['bids']` 86 | fo.write(line + "\n") 87 | fo.close() 88 | print "Log done." 89 | 90 | weight_path = `config.campaign_id` + "_" + "eu_best_weight" \ 91 | + "_" + `config.laplace` \ 92 | + "_" + `config.eu_scale` \ 93 | + "_" + `config.ds_ratio` \ 94 | + ".weight" 95 | eu_model.output_weight(best_test_log['weight'], "../output/" + weight_path) 96 | 97 | 98 | if __name__ == '__main__': 99 | main() -------------------------------------------------------------------------------- /python/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import tool 4 | import copy 5 | import random 6 | 7 | class Dataset: 8 | '''The class for data loading and storage.''' 9 | 10 | def __init__(self, file_path, camp_id): 11 | self.file_path = file_path 12 | self.camp_id = camp_id 13 | self.init_statistics() 14 | self.load() 15 | self.iterators = [] 16 | 17 | def load(self): # load data from the specified file path 18 | print "Loading data ..." 19 | self.dataset = [] 20 | if not os.path.isfile(self.file_path): 21 | print "ERROR: file not exist. " + self.file_path 22 | exit(-1) 23 | size = 0 24 | cost_sum = 0 25 | clk_sum = 0 26 | max_price = -1 27 | fi = open(self.file_path, 'r') 28 | for line in fi: 29 | li = tool.ints(line.replace(':1','').split()) 30 | if self.camp_id < 0: 31 | li.append(-1) 32 | self.dataset.append(li) 33 | y = li[0] 34 | mp = li[1] 35 | size += 1 36 | cost_sum += mp 37 | max_price = mp if mp > max_price else max_price 38 | clk_sum += y 39 | fi.close() 40 | self.statistics['size'] = size 41 | self.statistics['cost_sum'] = cost_sum 42 | self.statistics['clk_sum'] = clk_sum 43 | self.statistics['ecpm'] = 1.0 * cost_sum / size 44 | self.statistics['ecpc'] = int(cost_sum / clk_sum * 1E-3) 45 | self.statistics['ctr'] = 1.0 * clk_sum / size 46 | self.statistics['max_price'] = max_price 47 | print "Loaded." 48 | print self.get_statistics() 49 | 50 | def shuffle(self): 51 | random.seed(200) 52 | random.shuffle(self.dataset) 53 | 54 | def init_statistics(self): # init all the statistic elements 55 | self.statistics = {'size':0, 'cost_sum':0, 'clk_sum':0, 56 | 'ecpm':0, 'ecpc':0, 'ctr':0.0, 'max_price':0} 57 | 58 | def update_statistics(self): 59 | # print "update statistics \t" + `self` 60 | size = 0 61 | cost_sum = 0 62 | clk_sum = 0 63 | max_price = -1 64 | for data in self.dataset: 65 | y = data[0] 66 | mp = data[1] 67 | size += 1 68 | cost_sum += mp 69 | max_price = mp if mp > max_price else max_price 70 | clk_sum += y 71 | self.statistics['size'] = size 72 | self.statistics['cost_sum'] = cost_sum 73 | self.statistics['clk_sum'] = clk_sum 74 | self.statistics['ecpm'] = 1.0 * cost_sum / size 75 | self.statistics['ori_ecpc'] = self.statistics['ecpc'] 76 | self.statistics['ecpc'] = cost_sum / clk_sum * 1E-3 77 | self.statistics['ctr'] = 1.0 * clk_sum / size 78 | self.statistics['max_price'] = max_price 79 | 80 | def init_landscape(self, landscape): # record the bid landscape into the dataset instance 81 | self.landscape = landscape 82 | 83 | def down_sampling(self, ratio): 84 | # print "original dataset \t " + `self` 85 | ds_dataset = copy.deepcopy(self) 86 | # print "downsampled dataset \t" + `ds_dataset` 87 | ds_dataset.self_down_sampling(ratio) 88 | return ds_dataset 89 | 90 | def self_down_sampling(self, ratio): 91 | random.seed(20) 92 | ds_dataset = [] 93 | neg_dataset = [] 94 | pos_num = self.statistics['clk_sum'] 95 | neg_num = self.get_size() - pos_num 96 | desired_neg_num = int(neg_num * ratio) # desired_neg_num if desired_neg_num < neg_num else neg_num 97 | for data in self.dataset: 98 | y = data[0] 99 | if y == 1: 100 | ds_dataset.append(copy.deepcopy(data)) 101 | else: 102 | neg_dataset.append(copy.deepcopy(data)) 103 | ds_dataset += random.sample(neg_dataset, desired_neg_num) 104 | #TODO update statistics, e.g. size 105 | self.dataset = ds_dataset 106 | self.update_statistics() 107 | random.shuffle(ds_dataset) 108 | self.init_all_iterators() 109 | 110 | def get_camp_id(self): 111 | return self.camp_id 112 | 113 | def get_statistics(self): 114 | return self.statistics 115 | 116 | def get_landscape(self): 117 | if self.landscape == None: 118 | print "ERROR: Please init landscape first. [Dataset.init_landscape(landscape)]" 119 | return self.landscape 120 | 121 | def get_dataset(self): 122 | return self.dataset 123 | 124 | def init_index(self): # initialize an iterator and store it 125 | self.iterators.append(0) 126 | iter_id = len(self.iterators) - 1 127 | return iter_id 128 | 129 | def init_all_iterators(self): 130 | iter_num = len(self.iterators) 131 | if iter_num > 0: 132 | for idx in range(0, iter_num): 133 | self.iterators[idx] = 0 134 | 135 | def get_next_data(self, iter_id): # get the next data in the dataset 136 | if self.iterators[iter_id] >= self.get_size(): 137 | self.iterators[iter_id] = 0 138 | data = self.dataset[self.iterators[iter_id]] 139 | self.iterators[iter_id] = self.iterators[iter_id] + 1 140 | return data 141 | 142 | def get_progress(self, iter_id): 143 | progress = 1.0 * self.iterators[iter_id] / self.get_size() 144 | return progress 145 | 146 | def get_size(self): # get the volume size of the dataset 147 | return self.statistics['size'] 148 | 149 | def get_max_price(self): 150 | return self.statistics['max_price'] 151 | 152 | def reached_tail(self, iter_id): # judge whether the last data have been reached 153 | flag = (self.iterators[iter_id] >= self.get_size()) 154 | return flag 155 | 156 | 157 | def main(): 158 | print "main method." 159 | 160 | if __name__ == '__main__': 161 | main() 162 | -------------------------------------------------------------------------------- /python/replay.py: -------------------------------------------------------------------------------- 1 | import config 2 | from dataset import Dataset 3 | from opt_bid import OptBid 4 | import sys 5 | import os 6 | import tool 7 | from sklearn.metrics import roc_auc_score 8 | from sklearn.metrics import mean_squared_error 9 | import math 10 | 11 | nds_ratio = 0.01 12 | camp_v = 36000 13 | 14 | # header_dataset = "camp_id\tnds_ratio\tcamp_v\tsize\tcost_sum\tclk_sum\tecpc\tcpm\tctr\tmax_price\n" 15 | def make_dataset_record(dataset, camp_id): 16 | stat = dataset.get_statistics() 17 | line = `camp_id` + "\t" + `nds_ratio` + "\t" + \ 18 | `camp_v` + "\t" + `stat['size']` + "\t" + \ 19 | `stat['cost_sum']` + "\t" + `stat['clk_sum']` + "\t" + \ 20 | `stat['ecpc']` + "\t" + `stat['ecpm']` + "\t" + \ 21 | `stat['ctr']` + "\t" + `stat['max_price']` + "\n" 22 | return line 23 | 24 | # header_log = "progress\trevenue\tctr\twin_rate\tauc\trmse\tecpc\tcpm\tclk_sum\timp_sum\tbid_sum\n" 25 | def make_log_record(log): 26 | line = `log['progress']` + "\t" + `log['revenue']` + "\t" + \ 27 | `log['ctr']` + "\t" + `log['win_rate']` + "\t" + \ 28 | `log['auc']` + "\t" + `log['rmse']` + "\t" + \ 29 | `log['cpc']` + "\t" + `log['cpm']` + "\t" + \ 30 | `log['clks']` + "\t" + `log['imps']` + "\t" + \ 31 | `log['bids']` + "\n" 32 | return line 33 | 34 | def make_performance(progress, bid_sum, cost_sum, imp_sum, clk_sum, revenue_sum, labels, p_labels): 35 | log = {} 36 | log['progress'] = progress 37 | log['bids'] = bid_sum 38 | log['imps'] = imp_sum 39 | log['clks'] = clk_sum 40 | log['revenue'] = revenue_sum 41 | log['auc'] = roc_auc_score(labels, p_labels) 42 | log['rmse'] = math.sqrt(mean_squared_error(labels, p_labels)) 43 | log['cpc'] = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 44 | log['cpm'] = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 45 | log['ctr'] = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 46 | log['win_rate'] = 0.0 if bid_sum == 0 else 1.0 * imp_sum / bid_sum 47 | print log 48 | return log 49 | 50 | def calibrate_ctr(pctr): 51 | cal_pctr = pctr / (pctr + (1 - pctr) / nds_ratio) 52 | return cal_pctr 53 | 54 | def bid_cal(ctr): 55 | cal_ctr = calibrate_ctr(ctr) 56 | bid_price = int(camp_v * cal_ctr * 1E3) 57 | return bid_price 58 | 59 | def bid(ctr): 60 | bid_price = int(camp_v * ctr * 1E3) 61 | print camp_v 62 | return bid_price 63 | 64 | def check_file(path): 65 | if not os.path.isfile(path): 66 | print "ERROR: file not exist. " + path 67 | exit(-1) 68 | 69 | def read_weight(path): 70 | weight = {} 71 | check_file(path) 72 | fi = open(path, 'r') 73 | for line in fi: 74 | k_v = line.split() 75 | key = int(k_v[0]) 76 | value = float(k_v[1]) 77 | weight[key] = value 78 | return weight 79 | 80 | def main(): 81 | if len(sys.argv) < 6: 82 | print "Usage: python replay.py camp_id(yoyi=-1) budget_prop weight.txt test.yzx.txt log.csv (calib)" 83 | exit(-1) 84 | 85 | camp_id = int(sys.argv[1]) 86 | print "Campaign ID = " + `camp_id` 87 | budget_prop = int(sys.argv[2]) 88 | weight_path = sys.argv[3] 89 | data_path = sys.argv[4] 90 | log_path = sys.argv[5] 91 | global nds_ratio 92 | global camp_v 93 | if len(sys.argv) == 7: 94 | nds_ratio = float(sys.argv[6]) 95 | if not nds_ratio > 0: 96 | print "No calibration." 97 | 98 | dataset = Dataset(data_path, camp_id) 99 | weight = read_weight(weight_path) 100 | budget = int(dataset.get_statistics()['cost_sum'] / budget_prop) 101 | if camp_id > 0: 102 | camp_v = dataset.get_statistics()['ecpc'] 103 | 104 | # init the metrics 105 | logs = [] 106 | labels = [] 107 | p_labels = [] 108 | bid_sum = 0 109 | cost_sum = 0 110 | imp_sum = 0 111 | clk_sum = 0 112 | revenue_sum = 0 113 | 114 | detail_fo = open("../detail/" + `camp_id` + ".txt", 'w') 115 | 116 | # replay 117 | progress = 0.0 118 | total_num = dataset.get_statistics()['size'] 119 | iter_id = dataset.init_index() 120 | while not dataset.reached_tail(iter_id): 121 | data = dataset.get_next_data(iter_id) 122 | bid_sum += 1 123 | y = data[0] 124 | mp = data[1] 125 | feature = data[2:len(data)] 126 | ctr = tool.estimate_ctr(weight, feature, train_flag=False) 127 | labels.append(y) 128 | p_labels.append(ctr) 129 | if camp_id < 0 or nds_ratio > 0: 130 | bid_price = bid_cal(ctr) 131 | else: 132 | bid_price = bid(ctr) 133 | if bid_price > mp: 134 | cost_sum += mp 135 | imp_sum += 1 136 | clk_sum += y 137 | revenue_sum = revenue_sum - mp + int(camp_v * y * 1E3) 138 | detail_fo.write(`bid_price` + "\t" + `mp`+"\n") 139 | # prg = 1.0 * bid_sum / total_num 140 | # if prg > (progress + config.test_progress_unit - 1E-5): 141 | # progress += config.test_progress_unit 142 | # performance = make_performance(prg, bid_sum, cost_sum, imp_sum, clk_sum, revenue_sum, labels, p_labels) 143 | # logs.append(performance) 144 | if cost_sum > budget: 145 | performance = make_performance(prg, bid_sum, cost_sum, imp_sum, clk_sum, revenue_sum, labels, p_labels) 146 | logs.append(performance) 147 | break 148 | performance = make_performance(1.0, bid_sum, cost_sum, imp_sum, clk_sum, revenue_sum, labels, p_labels) 149 | logs.append(performance) 150 | 151 | detail_fo.close() 152 | 153 | # make record 154 | log_file = open(log_path, 'w') 155 | header_dataset = "camp_id\tnds_ratio\tcamp_v\tsize\tcost_sum\tclk_sum\tecpc\tcpm\tctr\tmax_price\n" 156 | header_log = "progress\trevenue\tctr\twin_rate\tauc\trmse\tecpc\tcpm\tclk_sum\timp_sum\tbid_sum\n" 157 | log_file.write(header_dataset) 158 | log_file.write(make_dataset_record(dataset, camp_id)) 159 | log_file.write(header_log) 160 | for log in logs: 161 | log_file.write(make_log_record(log)) 162 | 163 | 164 | if __name__ == '__main__': 165 | main() 166 | -------------------------------------------------------------------------------- /python/test_em.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from dataset import Dataset 3 | from bid_landscape import BidLandscape 4 | from em_model import EmModel 5 | import sys 6 | import config 7 | import tool 8 | 9 | def main(): 10 | if len(sys.argv) < 7: 11 | print "Usage: python test_em.py camp_id model_name laplace x_scale ds_ratio budget_prop" 12 | exit(-1) 13 | 14 | config.campaign_id = int(sys.argv[1]) 15 | model_name = sys.argv[2] 16 | if not model_name in config.model_list: 17 | print "Wrong model name." 18 | exit(-1) 19 | config.model_name = model_name 20 | config.laplace = int(sys.argv[3]) 21 | config.em_scale = float(sys.argv[4]) 22 | config.ds_ratio = float(sys.argv[5]) if float(sys.argv[5]) > 0 else 0 23 | config.budget_prop = int(sys.argv[6]) 24 | print "camp_id\tmodel\tlaplace\tscale\tds_ratio\tbudget_prop" 25 | print `config.campaign_id` + "\t" + `model_name` \ 26 | + "\t" + `config.laplace` + "\t" + `config.em_scale` \ 27 | + "\t" + `config.ds_ratio` + "\t" + `config.budget_prop` 28 | 29 | train_path = config.data_folder + `config.campaign_id` + "/train.yzx.txt" 30 | test_path = config.data_folder + `config.campaign_id` + "/test.yzx.txt" 31 | train_data = Dataset(train_path, config.campaign_id) 32 | train_data.shuffle() # make train data shuffled 33 | test_data = Dataset(test_path, config.campaign_id) 34 | 35 | # no interval setting 36 | BidLandscape(train_data, train_data.get_camp_id(), config.laplace) 37 | BidLandscape(test_data, test_data.get_camp_id(), config.laplace) 38 | print "Load done." 39 | 40 | # downsampling 41 | train_data_ds = train_data.down_sampling(config.ds_ratio) if config.ds_ratio>0 else train_data 42 | print train_data_ds.get_statistics() 43 | print "Down sampled." 44 | 45 | em_model = EmModel(train_data_ds, test_data, model_name) 46 | print "campaign v = " + `em_model.camp_v` 47 | 48 | # train 49 | print "Begin training ..." 50 | for i in range(0, config.em_round): 51 | em_model.train() 52 | print "EM Round " + `i+1` + "\t" + `tool.get_last_log(em_model.em_log)['performance']` 53 | if tool.judge_stop(em_model.em_log): 54 | break; 55 | print "Train done." 56 | 57 | # em_rr_2997_3_0.1_0.csv 58 | log_file = "em_" + model_name \ 59 | + "_" + `config.campaign_id` \ 60 | + "_" + `config.budget_prop` \ 61 | + "_" + `config.laplace` \ 62 | + "_" + `config.em_scale` \ 63 | + "_" + `config.ds_ratio` \ 64 | + ".csv" 65 | fo = open("../output/" + log_file, 'w') 66 | 67 | print "Begin log ..." 68 | header = "camp_id\tmodel\tdataset\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\tbids\timps\tclks\tlaplace\tinterval\tscale\tds_ratio\tbudget_prop\tem_round\tmu" 69 | best_em_log = em_model.get_best_log(em_model.em_log) 70 | best_em_line = `config.campaign_id` + "\t" + "em"+model_name + "\ttest\t" \ 71 | + tool.gen_performance_line(best_em_log) + "\t" \ 72 | + `config.laplace` + "\t" + "None" + "\t" + `config.em_scale` + "\t" \ 73 | + (`config.ds_ratio` if config.ds_ratio>0 else "None") + "\t" \ 74 | + `config.budget_prop` +"\t" \ 75 | + `len(em_model.em_log)` + "\t" + `best_em_log['mu']` 76 | 77 | fo.write(header + "\n") 78 | fo.write(best_em_line + "\n") 79 | 80 | fo.write("Test with Budget Constraints\n") 81 | 82 | # # reset mu 83 | # em_model.mu = best_em_log['mu'] 84 | # em_model.bid_strategy.set_mu(em_model.mu) 85 | # # replay 86 | # fo.write("prop\trevenue\troi\tctr\tcpc\tauc\trmse\tcpm\timps\tclks\n") 87 | # for prop in config.budget_props: 88 | # performance = em_model.replay(best_em_log['weight'], em_model.test_data, prop) 89 | # fo.write(`prop`); fo.write("\t") 90 | # fo.write(`performance['revenue']`); fo.write("\t") 91 | # fo.write(`performance['roi']`); fo.write("\t") 92 | # fo.write(`performance['ctr']`); fo.write("\t") 93 | # fo.write(`performance['cpc']`); fo.write("\t") 94 | # fo.write(`performance['auc']`); fo.write("\t") 95 | # fo.write(`performance['rmse']`); fo.write("\t") 96 | # fo.write(`performance['cpm']`); fo.write("\t") 97 | # fo.write(`performance['imps']`); fo.write("\t") 98 | # fo.write(`performance['clks']`); fo.write("\t") 99 | # fo.write("\n") 100 | 101 | 102 | fo.write("\n") 103 | 104 | fo.write("Round\trevenue\troi\tcpc\tctr\tauc\trmse\timps\ttruncate\tmu\n") 105 | for i in range(0, len(em_model.em_log)): 106 | em_log = em_model.em_log[i] 107 | line = `i+1` + "\t" + `em_log['performance']['revenue']` + "\t" \ 108 | + `em_log['performance']['roi']` + "\t" \ 109 | + `em_log['performance']['cpc']` + "\t" \ 110 | + `em_log['performance']['ctr']` + "\t" \ 111 | + `em_log['performance']['auc']` + "\t" \ 112 | + `em_log['performance']['rmse']` + "\t" \ 113 | + `em_log['performance']['imps']` + "\t" \ 114 | + `em_log['weight'][0]` + "\t" \ 115 | + `em_log['mu']` 116 | fo.write(line + "\n") 117 | fo.write("\n") 118 | for i in range(0, len(em_model.test_log)): 119 | test_log = em_model.test_log[i] 120 | line = `i+1` + "\t" + `test_log['performance']['revenue']` + "\t" \ 121 | + `test_log['performance']['roi']` + "\t" \ 122 | + `test_log['performance']['cpc']` + "\t" \ 123 | + `test_log['performance']['ctr']` + "\t" \ 124 | + `test_log['performance']['auc']` + "\t" \ 125 | + `test_log['performance']['rmse']` + "\t" \ 126 | + `test_log['performance']['imps']` + "\t" \ 127 | + `test_log['weight'][0]` 128 | if 'm' in test_log: 129 | line = line + "\tm" 130 | fo.write(line + "\n") 131 | 132 | fo.close() 133 | print "Log done." 134 | 135 | if __name__ == '__main__': 136 | main() 137 | -------------------------------------------------------------------------------- /python/lr_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import copy 3 | import random 4 | from model import Model 5 | from mcpc_bid import McpcBid 6 | import tool 7 | import config 8 | from sklearn.metrics import roc_auc_score 9 | from sklearn.metrics import mean_squared_error 10 | import math 11 | 12 | class LrModel(Model): 13 | def __init__(self, train_data, test_data): 14 | Model.__init__(self, train_data, test_data) 15 | self.init_parameters() 16 | self.init_weight() 17 | self.init_bid_strategy() 18 | self.reg_update_param = 1 - config.lr_alpha * config.lr_lambda 19 | # print self.reg_update_param 20 | self.train_log = [] 21 | self.test_log = [] 22 | 23 | def init_weight(self): 24 | self.weight = {} 25 | self.best_weight = {} 26 | 27 | def init_bid_strategy(self): 28 | self.bid_strategy = McpcBid(self.camp_v) 29 | 30 | def init_parameters(self): 31 | self.camp_v = self.train_data.get_statistics()['ecpc'] 32 | self.mu = 0.0 33 | self.budget = int(self.test_data.get_statistics()['cost_sum'] / config.budget_prop) 34 | # print "camp_v \t " + `self.camp_v` 35 | 36 | def train(self): # train with one traversal of the full train_data 37 | random.seed(10) 38 | train_data = self.train_data 39 | # print "Train data \t" + `train_data` + "\tsize \t" + `train_data.get_size()` 40 | progress = 0.0 41 | iter_id = train_data.init_index() 42 | while not train_data.reached_tail(iter_id): 43 | data = train_data.get_next_data(iter_id) 44 | y = data[0] 45 | feature = data[2:len(data)] 46 | ctr = tool.estimate_ctr(self.weight, feature, train_flag=True) 47 | for idx in feature: # update 48 | self.weight[idx] = self.weight[idx] * self.reg_update_param - config.lr_alpha * (ctr - y) 49 | # prg = train_data.get_progress(iter_id) 50 | # if prg < 0.9 and prg > (progress + config.train_progress_unit - 1E-3): 51 | # self.test() 52 | # progress += config.train_progress_unit 53 | 54 | def test(self): 55 | parameters = {'weight':self.weight} 56 | performance = self.calc_performance(self.test_data, parameters) 57 | # record performance 58 | log = self.make_log(self.weight, performance) 59 | self.test_log.append(log) 60 | 61 | def make_log(self, weight, performance): 62 | log = {} 63 | log['weight'] = copy.deepcopy(weight) 64 | log['performance'] = copy.deepcopy(performance) 65 | log['mu'] = self.mu 66 | return log 67 | 68 | def calc_performance(self, dataset, parameters): # calculate the performance w.r.t. the given dataset and parameters 69 | weight = parameters['weight'] 70 | # budget = parameters['budget'] 71 | bid_sum = 0 72 | cost_sum = 0 73 | imp_sum = 0 74 | clk_sum = 0 75 | revenue_sum = 0 76 | labels = [] 77 | p_labels = [] 78 | iter_id = dataset.init_index() 79 | while not dataset.reached_tail(iter_id): #TODO no budget set 80 | bid_sum += 1 81 | data = dataset.get_next_data(iter_id) 82 | y = data[0] 83 | market_price = data[1] 84 | feature = data[2:len(data)] 85 | ctr = tool.estimate_ctr(weight, feature, train_flag=False) 86 | labels.append(y) 87 | p_labels.append(ctr) 88 | if config.ds_ratio > 0: # down sampled, needs to calibrate 89 | bid_price = self.bid_strategy.bid_calib(self.ori_camp_v, self.mu, ctr) 90 | else: 91 | bid_price = self.bid_strategy.bid(ctr) 92 | if bid_price > market_price: 93 | cost_sum += market_price 94 | imp_sum += 1 95 | clk_sum += y 96 | if config.ds_ratio > 0: 97 | revenue_sum = int(revenue_sum - market_price + y * self.ori_camp_v * 1E3) 98 | else: 99 | revenue_sum = int(revenue_sum - market_price + y * self.camp_v * 1E3) 100 | if cost_sum >= self.budget: 101 | break 102 | cpc = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 103 | cpm = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 104 | ctr = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 105 | roi = 0.0 if cost_sum == 0 else 1.0 * (revenue_sum) / cost_sum 106 | auc = roc_auc_score(labels, p_labels) 107 | rmse = math.sqrt(mean_squared_error(labels, p_labels)) 108 | performance = {'bids':bid_sum, 'cpc':cpc, 'cpm':cpm, 109 | 'ctr': ctr, 'revenue':revenue_sum, 110 | 'imps':imp_sum, 'clks':clk_sum, 111 | 'auc': auc, 'rmse': rmse, 112 | 'roi': roi} 113 | return performance 114 | 115 | def get_best_train_log(self): 116 | return self.get_best_log(self.train_log) 117 | 118 | def get_best_test_log(self): 119 | return self.get_best_log(self.test_log) 120 | 121 | def get_best_log(self, logs): 122 | best_log = {} 123 | if len(logs) == 0: 124 | print "ERROR: no record in the log." 125 | else: 126 | best_revenue = -1E10 127 | for log in logs: 128 | revenue = log['performance']['revenue'] 129 | if revenue > best_revenue: 130 | best_revenue = revenue 131 | best_log = log 132 | return best_log 133 | 134 | def output_weight(self, weight, path): 135 | fo = open(path, 'w') 136 | for idx in weight: 137 | fo.write(`idx` + '\t' + `weight[idx]` + '\n') 138 | fo.close() 139 | 140 | def lin_bid(self, weight): 141 | params = range(30, 100, 5) + range(100, 400, 10) + range(400, 800, 50) 142 | base_ctr = self.train_data.get_statistics()['ctr'] 143 | dataset = self.test_data 144 | opt_param = 3000 145 | opt_revenue = -1E10 146 | for param in params: 147 | bid_sum = 0 148 | cost_sum = 0 149 | imp_sum = 0 150 | clk_sum = 0 151 | revenue_sum = 0 152 | labels = [] 153 | p_labels = [] 154 | iter_id = dataset.init_index() 155 | while not dataset.reached_tail(iter_id): #TODO no budget set 156 | bid_sum += 1 157 | data = dataset.get_next_data(iter_id) 158 | y = data[0] 159 | market_price = data[1] 160 | feature = data[2:len(data)] 161 | ctr = tool.estimate_ctr(weight, feature, train_flag=False) 162 | labels.append(y) 163 | p_labels.append(ctr) 164 | bid_price = int(param * ctr / base_ctr) 165 | if bid_price > market_price: 166 | cost_sum += market_price 167 | imp_sum += 1 168 | clk_sum += y 169 | revenue_sum = int(revenue_sum - market_price + y * self.camp_v * 1E3) 170 | if cost_sum >= self.budget: 171 | break 172 | cpc = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 173 | cpm = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 174 | ctr = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 175 | roi = 0.0 if cost_sum == 0 else 1.0 * (revenue_sum) / cost_sum 176 | auc = roc_auc_score(labels, p_labels) 177 | rmse = math.sqrt(mean_squared_error(labels, p_labels)) 178 | performance = {'bids':bid_sum, 'cpc':cpc, 'cpm':cpm, 179 | 'ctr': ctr, 'revenue':revenue_sum, 180 | 'imps':imp_sum, 'clks':clk_sum, 181 | 'auc': auc, 'rmse': rmse, 182 | 'roi': roi} 183 | if performance['revenue'] > opt_revenue: 184 | opt_revenue = performance['revenue'] 185 | opt_param = param 186 | self.opt_param = opt_param 187 | return opt_param 188 | 189 | def replay(self, weight, test_data, budget_prop): 190 | budget = int(1.0 * test_data.get_statistics()['cost_sum'] / budget_prop) 191 | base_ctr = self.train_data.get_statistics()['ctr'] 192 | label = [] 193 | p_labels = [] 194 | bid_sum = 0 195 | cost_sum = 0 196 | imp_sum = 0 197 | clk_sum = 0 198 | revenue_sum = 0 199 | labels = [] 200 | p_labels = [] 201 | iter_id = test_data.init_index() 202 | while not test_data.reached_tail(iter_id): 203 | data = test_data.get_next_data(iter_id) 204 | bid_sum += 1 205 | y = data[0] 206 | mp = data[1] 207 | feature = data[2:len(data)] 208 | ctr = tool.estimate_ctr(weight, feature, train_flag=False) 209 | labels.append(y) 210 | p_labels.append(ctr) 211 | bp = int(self.opt_param * ctr / base_ctr) 212 | # bp = self.bid_strategy.bid(ctr) 213 | if bp > mp: 214 | cost_sum += mp 215 | imp_sum += 1 216 | clk_sum += y 217 | revenue_sum = int(revenue_sum - mp + y * self.camp_v * 1E3) 218 | if cost_sum >= budget: 219 | break 220 | cpc = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 221 | cpm = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 222 | ctr = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 223 | roi = 0.0 if cost_sum == 0 else 1.0 * revenue_sum / cost_sum 224 | auc = roc_auc_score(labels, p_labels) 225 | rmse = math.sqrt(mean_squared_error(labels, p_labels)) 226 | performance = {'bids':bid_sum, 'cpc':cpc, 'cpm':cpm, 227 | 'auc': auc, 'rmse': rmse, 228 | 'ctr': ctr, 'revenue':revenue_sum, 229 | 'imps':imp_sum, 'clks':clk_sum, 230 | 'roi': roi} 231 | return performance 232 | -------------------------------------------------------------------------------- /python/em_model.py: -------------------------------------------------------------------------------- 1 | from lr_model import LrModel 2 | from opt_bid import OptBid 3 | from dataset import Dataset 4 | import math 5 | import random 6 | import copy 7 | import tool 8 | import config 9 | from sklearn.metrics import roc_auc_score 10 | from sklearn.metrics import mean_squared_error 11 | 12 | 13 | class EmModel(LrModel): 14 | def __init__(self, train_data, test_data, model): 15 | LrModel.__init__(self, train_data, test_data) 16 | if not model in config.model_list: 17 | print "Wrong model name when initializing EM model." 18 | exit(-1) 19 | self.model = model 20 | self.em_log = [] 21 | 22 | def init_parameters(self): 23 | self.camp_v = self.train_data.get_statistics()['ecpc'] 24 | if config.ds_ratio > 0: 25 | self.ori_camp_v = self.train_data.get_statistics()['ori_ecpc'] 26 | self.mu = 0.0 27 | self.budget = int(self.test_data.get_statistics()['cost_sum'] / config.budget_prop) 28 | # budget is only used in test phase or M-step 29 | 30 | def init_bid_strategy(self): 31 | self.bid_strategy = OptBid(self.camp_v, self.mu) 32 | 33 | def train(self): 34 | e_stop = False 35 | loop = 0 36 | while not e_stop: 37 | self.e_step() 38 | self.test() 39 | print "E step loop " + `loop+1` + "\t" + `self.test_log[len(self.test_log)-1]['performance']` 40 | e_stop = tool.judge_stop(self.test_log) 41 | loop += 1 42 | # self.test_log.pop(); self.test_log.pop() # delete the last two points 43 | best_log = self.get_best_e_log(self.test_log) 44 | print "Changed weight to the best one. the best revenue in last E phase is " + `best_log['performance']['revenue']` 45 | self.weight = best_log['weight'] 46 | print "E step done." 47 | self.m_step() 48 | print "Optimal mu = " + `self.mu` 49 | print "M step done." 50 | 51 | def e_step(self): 52 | random.seed(10) 53 | train_data = self.train_data 54 | progress = 0.0 55 | iter_id = train_data.init_index() 56 | while not train_data.reached_tail(iter_id): 57 | data = train_data.get_next_data(iter_id) 58 | y = data[0] 59 | feature = data[2:len(data)] 60 | ctr = tool.estimate_ctr(self.weight, feature, train_flag=True) 61 | phi = 1.0 / (1.0 + self.mu) 62 | bp = self.bid_strategy.bid(ctr) 63 | pz = self.train_data.landscape.get_probability(bp) 64 | scale_x = (phi * ctr - y) * phi * math.pow(self.camp_v, 2) * pz * config.em_scale 65 | if config.model_name == 'eu': 66 | scale_x = ctr * (1 - ctr) * scale_x 67 | for idx in feature: 68 | self.weight[idx] = self.weight[idx] * self.reg_update_param - config.lr_alpha * scale_x 69 | # prg = train_data.get_progress(iter_id) 70 | # if prg < 0.9 and prg > (progress + config.train_progress_unit - 1E-3): 71 | # self.test() 72 | # progress += config.train_progress_unit 73 | 74 | def m_step(self): 75 | opt_mu = self.mu 76 | opt_revenue = -1E10 77 | opt_performance = {} 78 | test_data = self.test_data 79 | for mu in config.mu_range: 80 | bid_sum = 0 81 | cost_sum = 0 82 | imp_sum = 0 83 | clk_sum = 0 84 | revenue_sum = 0 85 | labels = [] 86 | p_labels = [] 87 | self.bid_strategy.set_mu(mu) 88 | iter_id = test_data.init_index() 89 | while not test_data.reached_tail(iter_id): 90 | data = test_data.get_next_data(iter_id) 91 | bid_sum += 1 92 | y = data[0] 93 | mp = data[1] 94 | feature = data[2:len(data)] 95 | ctr = tool.estimate_ctr(self.weight, feature, train_flag=False) 96 | labels.append(y) 97 | p_labels.append(ctr) 98 | if config.ds_ratio > 0: # down sampled, needs to calibrate 99 | bp = self.bid_strategy.bid_calib(self.ori_camp_v, mu, ctr) 100 | else: 101 | bp = self.bid_strategy.bid(ctr) 102 | # bp = self.bid_strategy.bid(ctr) 103 | if bp > mp: 104 | cost_sum += mp 105 | imp_sum += 1 106 | clk_sum += y 107 | # revenue_sum = int(revenue_sum - mp + y * self.camp_v * 1E3)\ 108 | if config.ds_ratio > 0: 109 | revenue_sum = int(revenue_sum - mp + y * self.ori_camp_v * 1E3) 110 | else: 111 | revenue_sum = int(revenue_sum - mp + y * self.camp_v * 1E3) 112 | if cost_sum >= self.budget: 113 | break 114 | cpc = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 115 | cpm = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 116 | ctr = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 117 | roi = 0.0 if cost_sum == 0 else 1.0 * revenue_sum / cost_sum 118 | auc = roc_auc_score(labels, p_labels) 119 | rmse = math.sqrt(mean_squared_error(labels, p_labels)) 120 | performance = {'bids':bid_sum, 'cpc':cpc, 'cpm':cpm, 121 | 'auc': auc, 'rmse': rmse, 122 | 'ctr': ctr, 'revenue':revenue_sum, 123 | 'imps':imp_sum, 'clks':clk_sum, 124 | 'roi': roi} 125 | print "current mu = " + `mu` + "\t" + `performance` 126 | if performance['revenue'] > opt_revenue: 127 | opt_revenue = performance['revenue'] 128 | opt_performance = performance 129 | opt_mu = mu 130 | # reset the value of mu in both bidding function and model inner parameter 131 | self.bid_strategy.set_mu(opt_mu) 132 | self.mu = opt_mu 133 | log = self.make_log(self.weight, opt_performance) 134 | log['m'] = True 135 | self.test_log.append(log) 136 | self.em_log.append(log) 137 | 138 | def make_log(self, weight, performance): 139 | log = {} 140 | log['weight'] = copy.deepcopy(weight) 141 | log['performance'] = copy.deepcopy(performance) 142 | log['mu'] = self.mu 143 | return log 144 | 145 | def get_best_e_log(self, logs): 146 | best_log = {} 147 | if len(logs) == 0: 148 | print "ERROR: no record in the log." 149 | else: 150 | best_revenue = -1E10 151 | idx = len(logs)-1 152 | while idx>=0 and not 'm' in logs[idx]: 153 | log = logs[idx] 154 | revenue = log['performance']['revenue'] 155 | if revenue > best_revenue: 156 | best_revenue = revenue 157 | best_log = log 158 | idx -= 1 159 | return best_log 160 | 161 | # def replay(self, weight, test_data, budget_prop): 162 | # budget = int(1.0 * test_data.get_statistics()['cost_sum'] / budget_prop) 163 | # mu = self.mu 164 | # label = [] 165 | # p_labels = [] 166 | # bid_sum = 0 167 | # cost_sum = 0 168 | # imp_sum = 0 169 | # clk_sum = 0 170 | # revenue_sum = 0 171 | # labels = [] 172 | # p_labels = [] 173 | # iter_id = test_data.init_index() 174 | # while not test_data.reached_tail(iter_id): 175 | # data = test_data.get_next_data(iter_id) 176 | # bid_sum += 1 177 | # y = data[0] 178 | # mp = data[1] 179 | # feature = data[2:len(data)] 180 | # ctr = tool.estimate_ctr(weight, feature, train_flag=False) 181 | # labels.append(y) 182 | # p_labels.append(ctr) 183 | # if config.ds_ratio > 0: # down sampled, needs to calibrate 184 | # bp = self.bid_strategy.bid_calib(self.ori_camp_v, mu, ctr) 185 | # else: 186 | # bp = self.bid_strategy.bid(ctr) 187 | # # bp = self.bid_strategy.bid(ctr) 188 | # if bp > mp: 189 | # cost_sum += mp 190 | # imp_sum += 1 191 | # clk_sum += y 192 | # # revenue_sum = int(revenue_sum - mp + y * self.camp_v * 1E3)\ 193 | # if config.ds_ratio > 0: 194 | # revenue_sum = int(revenue_sum - mp + y * self.ori_camp_v * 1E3) 195 | # else: 196 | # revenue_sum = int(revenue_sum - mp + y * self.camp_v * 1E3) 197 | # if cost_sum >= budget: 198 | # break 199 | # cpc = 0.0 if clk_sum == 0 else 1.0 * cost_sum / clk_sum * 1E-3 200 | # cpm = 0.0 if imp_sum == 0 else 1.0 * cost_sum / imp_sum 201 | # ctr = 0.0 if imp_sum == 0 else 1.0 * clk_sum / imp_sum 202 | # roi = 0.0 if cost_sum == 0 else 1.0 * revenue_sum / cost_sum 203 | # auc = roc_auc_score(labels, p_labels) 204 | # rmse = math.sqrt(mean_squared_error(labels, p_labels)) 205 | # performance = {'bids':bid_sum, 'cpc':cpc, 'cpm':cpm, 206 | # 'auc': auc, 'rmse': rmse, 207 | # 'ctr': ctr, 'revenue':revenue_sum, 208 | # 'imps':imp_sum, 'clks':clk_sum, 209 | # 'roi': roi} 210 | # return performance 211 | --------------------------------------------------------------------------------