├── LICENSE
├── README.md
├── exp-data
    ├── data.txt
    ├── test.txt
    └── train.txt
├── python
    ├── control-awr-pid.py
    ├── control-awr-waterlevel-budget.py
    ├── control-awr-waterlevel.py
    ├── control-ecpc-multiex-pid-bid-optimisation.py
    ├── control-ecpc-multiex-pid-eco.py
    ├── control-ecpc-pid-example.py
    ├── control-ecpc-pid.py
    ├── control-ecpc-waterlevel.py
    ├── lryzx.py
    └── make-yzpc.py
├── report
    ├── example-report-single.tsv
    └── example-report-test.tsv
└── scripts
    ├── pid-bid-optimisation.sh
    ├── run_demo_example.sh
    ├── run_ipinyou_campaign.sh
    └── run_lr.sh


/LICENSE:
--------------------------------------------------------------------------------
 1 | 1. INTENT/PURPOSE:
 2 | Non-commercial license of this software (the rtb control code) that was written by Weinan Zhang and Yifei Rong, and is copyrighted by both of them.
 3 | 2. LICENSEE:
 4 | Any person or organization who receives the software with a copy of this license.
 5 | 3. INTELLECTUAL PROPERTY LICENSED:
 6 | The rights to use, copy, modify, and compile the software, subject to the restrictions described in the present document.
 7 | 5. SCOPE OF THE LICENSE
 8 | NON-COMMERCIAL license for research and evaluation purposes ONLY.
 9 | NO right to commercialize the software, or any derivative work, without separate agreement with the copyright owners.
10 | 6. MODIFICATION
11 | License permits licensee to modify the software for their research and evaluation purposes.
12 | 7. REDISTRIBUTION
13 | License permits licensee to redistribute verbatim copies of the software, accompanied with a copy of this license.
14 | License does not permit licensee to redistribute modified versions of the software.
15 | License does not permit licensee to commercialize the software or any derivative work of the software.
16 | 8. FEE/ROYALTY
17 | Licensee pays no royalty for non-commercial license
18 | Licensee and any third parties must enter a new agreement for any use beyond scope of license.
19 | 9. NO WARRANTY
20 | The software is provided "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The entire risk as to the quality and performance of the program is with the Licensee.
21 | 10. NO LIABILITY
22 | In no event unless required by applicable law or agreed to in writing will any copyright owner be liable to Licensee for damages, including any general, special, incidental or consequential damages arising out of the use or inability to use the program (including but not limited to loss of data or data being rendered inaccurate or losses sustained by Licensee or third parties or a failure of the program to operate with other programs), even if such holder has been advised of the possibility of such damages.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Experiment Code for RTB Feedback Control Techniques
  2 | ===========
  3 | 
  4 | This is a repository of the experiment code supporting the paper "Feedback Control of Real-Time Display Advertising" submitted to WSDM 2016.
  5 | 
  6 | For any problems, please report the issues here or contact [Weinan Zhang](http://www0.cs.ucl.ac.uk/staff/w.zhang/).
  7 | 
  8 | After pulling the repository, you could start from checking the demo under the folder of `scripts` by running:
  9 | ```
 10 | $ bash run_demo_example.sh
 11 | ```
 12 | You should get the following experiment results:
 13 | ```
 14 | Example of PID control eCPC.
 15 | Data sample from campaign 1458 from iPinYou dataset.
 16 | Reference eCPC: 40000
 17 | test performance:
 18 | round	ecpc	phi	total_click	click_ratio	win_ratio	total_cost	ref
 19 | 0	54062.0000	0.0000	2	0.0308	0.0250	162186.0000	40000.0
 20 | 1	54230.3333	-2.0000	2	0.0308	0.0251	162691.0000	40000.0
 21 | 2	40755.2500	-2.0000	3	0.0462	0.0254	163021.0000	40000.0
 22 | 3	51294.7500	0.9408	3	0.0462	0.0343	205179.0000	40000.0
 23 | 4	51399.7500	-2.0000	3	0.0462	0.0347	205599.0000	40000.0
 24 | 5	51478.0000	-2.0000	3	0.0462	0.0349	205912.0000	40000.0
 25 | 6	51557.2500	-2.0000	3	0.0462	0.0352	206229.0000	40000.0
 26 | 7	51574.7500	-2.0000	3	0.0462	0.0353	206299.0000	40000.0
 27 | 8	51626.0000	-2.0000	3	0.0462	0.0354	206504.0000	40000.0
 28 | 9	51692.7500	-2.0000	3	0.0462	0.0357	206771.0000	40000.0
 29 | 10	51809.0000	-2.0000	3	0.0462	0.0359	207236.0000	40000.0
 30 | 11	51882.5000	-2.0000	3	0.0462	0.0361	207530.0000	40000.0
 31 | 12	41629.4000	-2.0000	4	0.0615	0.0363	208147.0000	40000.0
 32 | 13	43198.4000	0.0756	4	0.0615	0.0389	215992.0000	40000.0
 33 | 14	43286.2000	-1.8943	4	0.0615	0.0391	216431.0000	40000.0
 34 | 15	43337.6000	-1.7934	4	0.0615	0.0393	216688.0000	40000.0
 35 | 16	43408.8000	-1.8188	4	0.0615	0.0394	217044.0000	40000.0
 36 | 17	43477.0000	-1.8597	4	0.0615	0.0398	217385.0000	40000.0
 37 | 18	43581.4000	-1.8970	4	0.0615	0.0401	217907.0000	40000.0
 38 | 19	36385.5000	-1.9564	5	0.0769	0.0403	218313.0000	40000.0
 39 | 20	49287.1429	2.3752	6	0.0923	0.0602	345010.0000	40000.0
 40 | 21	49311.4286	-2.0000	6	0.0923	0.0603	345180.0000	40000.0
 41 | 22	49376.1429	-2.0000	6	0.0923	0.0605	345633.0000	40000.0
 42 | 23	49387.5714	-2.0000	6	0.0923	0.0607	345713.0000	40000.0
 43 | 24	49407.2857	-2.0000	6	0.0923	0.0609	345851.0000	40000.0
 44 | 25	49461.4286	-2.0000	6	0.0923	0.0612	346230.0000	40000.0
 45 | 26	49508.4286	-2.0000	6	0.0923	0.0616	346559.0000	40000.0
 46 | 27	43407.2500	-2.0000	7	0.1077	0.0619	347258.0000	40000.0
 47 | 28	43491.2500	-1.3143	7	0.1077	0.0624	347930.0000	40000.0
 48 | 29	43528.3750	-1.9783	7	0.1077	0.0626	348227.0000	40000.0
 49 | 30	43551.6250	-1.9957	7	0.1077	0.0628	348413.0000	40000.0
 50 | 31	38747.3333	-2.0000	8	0.1231	0.0631	348726.0000	40000.0
 51 | 32	43250.1111	0.8766	8	0.1231	0.0727	389251.0000	40000.0
 52 | 33	38951.5000	-2.0000	9	0.1385	0.0730	389515.0000	40000.0
 53 | 34	35326.5000	0.7218	11	0.1692	0.0810	423918.0000	40000.0
 54 | 35	34818.2500	2.4716	15	0.2308	0.1020	557092.0000	40000.0
 55 | 36	39457.0588	2.4192	16	0.2462	0.1227	670770.0000	40000.0
 56 | 37	39675.2353	-0.4143	16	0.2462	0.1241	674479.0000	40000.0
 57 | 38	40023.9412	-0.0810	16	0.2462	0.1274	680407.0000	40000.0
 58 | 39	40259.4706	-0.2685	16	0.2462	0.1288	684411.0000	40000.0
 59 | 
 60 | train performance:
 61 | round	ecpc	phi	total_click	click_ratio	win_ratio	total_cost	ref
 62 | 0	39157.0000	0.0000	3	0.0380	0.0250	156628.0000	40000.0
 63 | 1	44887.2500	0.4223	3	0.0380	0.0309	179549.0000	40000.0
 64 | 2	44999.7500	-2.0000	3	0.0380	0.0312	179999.0000	40000.0
 65 | 3	45053.7500	-2.0000	3	0.0380	0.0314	180215.0000	40000.0
 66 | 4	45189.2500	-2.0000	3	0.0380	0.0316	180757.0000	40000.0
 67 | 5	36220.8000	-2.0000	4	0.0506	0.0319	181104.0000	40000.0
 68 | 6	43019.8571	2.7709	6	0.0759	0.0542	301139.0000	40000.0
 69 | 7	43100.8571	-2.0000	6	0.0759	0.0546	301706.0000	40000.0
 70 | 8	43236.7143	-1.5802	6	0.0759	0.0555	302657.0000	40000.0
 71 | 9	37924.2500	-1.6568	7	0.0886	0.0563	303394.0000	40000.0
 72 | 10	42490.0000	1.5463	8	0.1013	0.0746	382410.0000	40000.0
 73 | 11	42567.1111	-1.7269	8	0.1013	0.0754	383104.0000	40000.0
 74 | 12	42702.2222	-1.3191	8	0.1013	0.0764	384320.0000	40000.0
 75 | 13	42793.2222	-1.3952	8	0.1013	0.0771	385139.0000	40000.0
 76 | 14	38672.3000	-1.4391	9	0.1139	0.0776	386723.0000	40000.0
 77 | 15	32223.0000	1.0439	13	0.1646	0.0918	451122.0000	40000.0
 78 | 16	37882.1250	4.5092	15	0.1899	0.1165	606114.0000	40000.0
 79 | 17	35758.8333	0.4709	17	0.2152	0.1259	643659.0000	40000.0
 80 | 18	41013.5263	2.3150	18	0.2278	0.1488	779257.0000	40000.0
 81 | 19	39210.5500	-1.0511	19	0.2405	0.1513	784211.0000	40000.0
 82 | 20	38076.5000	0.5569	21	0.2658	0.1640	837683.0000	40000.0
 83 | 21	38038.5652	1.0590	22	0.2785	0.1733	874887.0000	40000.0
 84 | 22	37106.1600	0.9703	24	0.3038	0.1849	927654.0000	40000.0
 85 | 23	38529.9231	1.5288	25	0.3165	0.2015	1001778.0000	40000.0
 86 | 24	36818.7857	0.5828	27	0.3418	0.2105	1030926.0000	40000.0
 87 | 25	40309.0357	1.7550	27	0.3418	0.2285	1128653.0000	40000.0
 88 | 26	40427.8214	-0.5105	27	0.3418	0.2307	1131979.0000	40000.0
 89 | 27	40534.9643	-0.2332	27	0.3418	0.2319	1134979.0000	40000.0
 90 | 28	40656.0714	-0.2861	27	0.3418	0.2334	1138370.0000	40000.0
 91 | 29	40795.9286	-0.3487	27	0.3418	0.2359	1142286.0000	40000.0
 92 | 30	40926.1786	-0.4213	27	0.3418	0.2386	1145933.0000	40000.0
 93 | 31	41039.1429	-0.4864	27	0.3418	0.2404	1149096.0000	40000.0
 94 | 32	41107.2857	-0.5422	27	0.3418	0.2415	1151004.0000	40000.0
 95 | 33	41178.2857	-0.5729	27	0.3418	0.2429	1152992.0000	40000.0
 96 | 34	41251.7143	-0.6099	27	0.3418	0.2445	1155048.0000	40000.0
 97 | 35	41313.7857	-0.6481	27	0.3418	0.2457	1156786.0000	40000.0
 98 | 36	41369.9286	-0.6793	27	0.3418	0.2467	1158358.0000	40000.0
 99 | 37	41422.4286	-0.7082	27	0.3418	0.2473	1159828.0000	40000.0
100 | 38	40045.8276	-0.7355	28	0.3544	0.2484	1161329.0000	40000.0
101 | 39	39000.8000	0.0957	29	0.3671	0.2506	1170024.0000	40000.0
102 | ```
103 | This is a demo of controlling eCPC to 400,000 (RMB cent in CPM) by PID controller. The example data files `exp-data/train.txt` and `exp-data/test.txt` are sampled from the campaign 1458 from iPinYou dataset. We can observe that the eCPC successfully gets converged within the error band [36000, 44000] in both train and test stages.
104 | 
105 | The current version of repository contains the code supporting the experiment Sections 4.2, 4.3 and 4.5.
106 | 
107 | ### Feature Engineering for Large-scale Experiment
108 | For running further large-scale experiments, you will rely on another repository which is written for [iPinYou dataset](http://data.computational-advertising.org) feature engineering.
109 | 
110 | Please check our GitHub project [make-ipinyou-data](https://github.com/wnzhang/make-ipinyou-data). After downloading the dataset, by simplying `make all` you can generate the standardised data which will be used in the bid optimisation tasks.
111 | 


--------------------------------------------------------------------------------
/python/control-awr-pid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | 
  6 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  7 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
  8 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
  9 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 10 | 
 11 | advertiser = "2259"
 12 | mode = "single"
 13 | basebid = 93
 14 | ref = 0.6
 15 | print "%s\t%s\t%d\t%f" % (advertiser, mode, basebid, ref)
 16 | 
 17 | # parameter setting
 18 | minbid = 5
 19 | cntr_rounds = 40
 20 | para_p = 20
 21 | para_i = 8
 22 | para_d = 3
 23 | div = 1e-6
 24 | para_ps = range(0, 40, 5)
 25 | para_is = range(0, 25, 5)
 26 | para_ds = range(0, 25, 5)
 27 | settle_con = 0.1
 28 | rise_con = 0.9
 29 | min_phi = -2.0
 30 | max_phi = 100
 31 | 
 32 | def ints(s):
 33 |     res = []
 34 |     for ss in s:
 35 |         res.append(int(ss))
 36 |     return res
 37 | 
 38 | def sigmoid(p):
 39 |     return 1.0 / (1.0 + math.exp(-p))
 40 | 
 41 | def estimator_lr(feats):
 42 |     pred = 0.0
 43 |     for feat in feats:
 44 |         if feat in featWeight:
 45 |             pred += featWeight[feat]
 46 |     pred = sigmoid(pred)
 47 |     return pred
 48 | 
 49 | # bidding functions
 50 | def lin(pctr, basectr, basebid):
 51 |     return int(pctr *  basebid / basectr)
 52 | 
 53 | # calculate settling time
 54 | def cal_settling_time(winrs, ref):
 55 |     settled = False
 56 |     settling_time = 0
 57 |     for key, value in winrs.iteritems():
 58 |         error = ref - value
 59 |         if abs(error) / ref <= settle_con and settled == False:
 60 |             settled = True
 61 |             settling_time = key
 62 |         elif abs(error) / ref > settle_con:
 63 |             settled = False
 64 |             settling_time = cntr_rounds
 65 |     return settling_time
 66 | 
 67 | # # calculate steady-state error
 68 | def cal_rmse_ss(winrs, ref):
 69 |     settling_time = cal_settling_time(winrs, ref)
 70 |     rmse = 0.0
 71 |     if settling_time >= cntr_rounds:
 72 |         settling_time = cntr_rounds - 1
 73 |     for round in range(settling_time, cntr_rounds):
 74 |         rmse += (winrs[round] - ref) * (winrs[round] - ref)
 75 |     rmse /= (cntr_rounds - settling_time)
 76 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 77 |     return rmse
 78 | 
 79 | # # calculate steady-state standard deviation
 80 | def cal_sd_ss(winrs, ref):
 81 |     settling_time = cal_settling_time(winrs, ref)
 82 |     if settling_time >= cntr_rounds:
 83 |         settling_time = cntr_rounds - 1
 84 |     sum2 = 0.0
 85 |     sum = 0.0
 86 |     for round in range(settling_time, cntr_rounds):
 87 |         sum2 += winrs[round] * winrs[round]
 88 |         sum += winrs[round]
 89 |     n = cntr_rounds - settling_time
 90 |     mean = sum / n
 91 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
 92 |     return sd
 93 | 
 94 | # calculate rise time
 95 | def cal_rise_time(winrs, ref, rise_con):
 96 |     rise_time = 0
 97 |     for key, value in winrs.iteritems():
 98 |         error = ref - value
 99 |         if abs(error) / ref <= (1 - rise_con):
100 |             rise_time = key
101 |             break
102 |     return rise_time
103 | 
104 | # calculate percentage overshoot
105 | def cal_overshoot(winrs, ref):
106 |     if winrs[0] > ref:
107 |         min = winrs[0];
108 |         for key, value in winrs.iteritems():
109 |             if value <= min:
110 |                 min = value
111 |         if min < ref:
112 |             return (ref - min) * 100.0 / ref
113 |         else:
114 |             return 0.0
115 |     elif winrs[0] < ref:
116 |         max = winrs[0]
117 |         for key, value in winrs.iteritems():
118 |             if value >= max:
119 |                 max = value
120 |         if max > ref:
121 |             return (max - ref) * 100.0 / ref
122 |         else:
123 |             return 0.0
124 |     else:
125 |         max = 0
126 |         for key, value in winrs.iteritems():
127 |             if abs(value - ref) >= max:
128 |                 max = value
129 |         return (max - ref) * 100.0 / ref
130 | 
131 | # control function
132 | def control(cntr_rounds, ref, para_p, para_i, para_d, outfile):
133 |     fo = open(outfile, 'w')
134 |     fo.write("round\twinr\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tecpc\tref\n")
135 |     winrs = {}
136 |     ecpcs = {}
137 |     bid_count = 0
138 |     error_sum = 0.0
139 |     first_round = True
140 |     sec_round = False
141 |     cntr_size = int(len(yp) / cntr_rounds)
142 |     total_cost = 0.0
143 |     total_clks = 0
144 |     total_wins = 0
145 |     tc = {}
146 |     for round in range(0, cntr_rounds):
147 |         if first_round and (not sec_round):
148 |             phi = 0.0
149 |             first_round = False
150 |             sec_round = True
151 |         elif sec_round and (not first_round):
152 |             error = ref - winrs[round-1]
153 |             error_sum += error
154 |             phi = para_p*error + para_i*error_sum
155 |             sec_round = False
156 |         else:
157 |             error = ref - winrs[round-1]
158 |             error_sum += error
159 |             phi = para_p*error + para_i*error_sum + para_d*(winrs[round-2]-winrs[round-1])
160 |         cost = 0
161 |         clks = 0
162 | 
163 |         imp_index = ((round+1)*cntr_size)
164 | 
165 |         if round == cntr_rounds - 1:
166 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
167 | 
168 |         # fang piao
169 |         if phi <= min_phi:
170 |             phi = min_phi
171 |         elif phi >= max_phi:
172 |             phi = max_phi
173 | 
174 |         for i in range(round*cntr_size, imp_index):
175 |             bid_count += 1
176 |             clk = y[i]
177 |             pctr = yp[i]
178 |             mp = mplist[i]
179 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
180 |             if round == 0:
181 |                 bid = 1000.0
182 | 
183 |             if bid > mp:
184 |                 total_wins += 1
185 |                 clks += clk
186 |                 total_clks += clk
187 |                 cost += mp
188 |                 total_cost += mp
189 |         tc[round] = total_cost
190 |         winrs[round] = total_wins * 1.0/ bid_count
191 |         ecpcs[round] = total_cost / (total_clks+1)
192 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
193 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
194 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, winrs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs[round], ref))
195 |     for round in range(0, cntr_rounds):
196 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "test-ref", 0.0, 0,  0.0, 0.0, 0.0, tc[round], ref))
197 |     overshoot.append(cal_overshoot(winrs, ref))
198 |     settling_time.append(cal_settling_time(winrs, ref))
199 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
200 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
201 |     sd_ss.append(cal_sd_ss(winrs, ref))
202 | 
203 |     # train
204 |     winrs_train = {}
205 |     ecpcs_train = {}
206 |     bid_count = 0
207 |     error_sum = 0.0
208 |     first_round = True
209 |     sec_round = False
210 |     cntr_size = int(len(yp_train) / cntr_rounds)
211 |     total_cost = 0.0
212 |     total_clks = 0
213 |     total_wins = 0
214 |     tc_train = {}
215 |     for round in range(0, cntr_rounds):
216 |         if first_round and (not sec_round):
217 |             phi = 0.0
218 |             first_round = False
219 |             sec_round = True
220 |         elif sec_round and (not first_round):
221 |             error = ref - winrs_train[round-1]
222 |             error_sum += error
223 |             phi = para_p*error + para_i*error_sum
224 |             sec_round = False
225 |         else:
226 |             error = ref - winrs_train[round-1]
227 |             error_sum += error
228 |             phi = para_p*error + para_i*error_sum + para_d*(winrs_train[round-2]-winrs_train[round-1])
229 |         cost = 0
230 |         clks = 0
231 | 
232 |         imp_index = ((round+1)*cntr_size)
233 | 
234 |         if round == cntr_rounds - 1:
235 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
236 | 
237 |         # fang piao
238 |         if phi <= min_phi:
239 |             phi = min_phi
240 |         elif phi >= max_phi:
241 |             phi = max_phi
242 | 
243 |         for i in range(round*cntr_size, imp_index):
244 |             bid_count += 1
245 |             clk = y_train[i]
246 |             pctr = yp_train[i]
247 |             mp = mplist_train[i]
248 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
249 |             if round == 0:
250 |                 bid = 1000.0
251 | 
252 |             if bid > mp:
253 |                 total_wins += 1
254 |                 clks += clk
255 |                 total_clks += clk
256 |                 cost += mp
257 |                 total_cost += mp
258 |         tc_train[round] = total_cost
259 |         winrs_train[round] = total_wins * 1.0 / bid_count
260 |         ecpcs_train[round] = total_cost / (total_clks+1)
261 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
262 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
263 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, winrs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs_train[round], ref))
264 |     for round in range(0, cntr_rounds):
265 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, 0.0, tc_train[round], ref))
266 |     fo.close()
267 | 
268 | def control_test(cntr_rounds, ref, para_p, para_i , para_d):
269 |     winrs = {}
270 |     ecpcs = {}
271 |     bid_count = 0
272 |     error_sum = 0.0
273 |     first_round = True
274 |     sec_round = False
275 |     cntr_size = int(len(yp) / cntr_rounds)
276 |     total_cost = 0.0
277 |     total_clks = 0
278 |     total_wins = 0
279 |     for round in range(0, cntr_rounds):
280 |         if first_round and (not sec_round):
281 |             phi = 0.0
282 |             first_round = False
283 |             sec_round = True
284 |         elif sec_round and (not first_round):
285 |             error = ref - winrs[round-1]
286 |             error_sum += error
287 |             phi = para_p*error + para_i*error_sum
288 |             sec_round = False
289 |         else:
290 |             error = ref - winrs[round-1]
291 |             error_sum += error
292 |             phi = para_p*error + para_i*error_sum + para_d*(winrs[round-2]-winrs[round-1])
293 |         cost = 0
294 |         clks = 0
295 | 
296 |         imp_index = ((round+1)*cntr_size)
297 | 
298 |         if round == cntr_rounds - 1:
299 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
300 | 
301 |         # fang piao
302 |         if phi <= min_phi:
303 |             phi = min_phi
304 |         elif phi >= max_phi:
305 |             phi = max_phi
306 | 
307 |         for i in range(round*cntr_size, imp_index):
308 |             bid_count += 1
309 |             clk = y[i]
310 |             pctr = yp[i]
311 |             mp = mplist[i]
312 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
313 |             if round == 0:
314 |                 bid = 1000.0
315 | 
316 |             if bid > mp:
317 |                 total_wins += 1
318 |                 clks += clk
319 |                 total_clks += clk
320 |                 cost += mp
321 |                 total_cost += mp
322 |         winrs[round] = total_wins * 1.0 / bid_count
323 |         ecpcs[round] = total_cost / (total_clks+1)
324 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
325 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
326 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.1f\t%.4f\t%.4f" % (round, winrs[round], phi, total_clks, click_ratio, win_ratio, ref, ecpcs[round], total_cost)
327 |     overshoot.append(cal_overshoot(winrs, ref))
328 |     settling_time.append(cal_settling_time(winrs, ref))
329 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
330 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
331 |     sd_ss.append(cal_sd_ss(winrs, ref))
332 | 
333 |     # train
334 |     winrs_train = {}
335 |     ecpcs_train = {}
336 |     bid_count = 0
337 |     error_sum = 0.0
338 |     first_round = True
339 |     sec_round = False
340 |     cntr_size = int(len(yp_train) / cntr_rounds)
341 |     total_cost = 0.0
342 |     total_clks = 0
343 |     total_wins = 0
344 |     for round in range(0, cntr_rounds):
345 |         if first_round and (not sec_round):
346 |             phi = 0.0
347 |             first_round = False
348 |             sec_round = True
349 |         elif sec_round and (not first_round):
350 |             error = ref - winrs_train[round-1]
351 |             error_sum += error
352 |             phi = para_p*error + para_i*error_sum
353 |             sec_round = False
354 |         else:
355 |             error = ref - winrs_train[round-1]
356 |             error_sum += error
357 |             phi = para_p*error + para_i*error_sum + para_d*(winrs_train[round-2]-winrs_train[round-1])
358 |         cost = 0
359 |         clks = 0
360 | 
361 |         imp_index = ((round+1)*cntr_size)
362 | 
363 |         if round == cntr_rounds - 1:
364 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
365 | 
366 |         # fang piao
367 |         if phi <= min_phi:
368 |             phi = min_phi
369 |         elif phi >= max_phi:
370 |             phi = max_phi
371 | 
372 |         for i in range(round*cntr_size, imp_index):
373 |             bid_count += 1
374 |             clk = y_train[i]
375 |             pctr = yp_train[i]
376 |             mp = mplist_train[i]
377 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
378 |             if round == 0:
379 |                 bid = 1000.0
380 | 
381 |             if bid > mp:
382 |                 total_wins += 1
383 |                 clks += clk
384 |                 total_clks += clk
385 |                 cost += mp
386 |                 total_cost += mp
387 |         winrs_train[round] = total_wins * 1.0 / bid_count
388 |         ecpcs_train[round] = total_cost / (total_clks+1)
389 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
390 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
391 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.1f\t%.4f\t%.4f" % (round, winrs_train[round], phi, total_clks, click_ratio, win_ratio, ref, ecpcs_train[round], total_cost)
392 | 
393 | random.seed(10)
394 | 
395 | # if len(sys.argv) != 3:
396 | #     print 'campaignId mode'
397 | #     exit(-1)
398 | 
399 | mplist = []
400 | y = []
401 | yp = []
402 | mplist_train = []
403 | y_train = []
404 | yp_train = []
405 | featWeight = {}
406 | 
407 | 
408 | #initialize the lr
409 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt.lr.weight", 'r')
410 | for line in fi:
411 |     s = line.strip().split()
412 |     feat = int(s[0])
413 |     weight = float(s[1])
414 |     featWeight[feat] = weight
415 | fi.close()
416 | 
417 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzx.txt", 'r')
418 | for line in fi:
419 |     data = ints(line.strip().replace(":1", "").split())
420 |     clk = data[0]
421 |     mp = data[1]
422 |     fsid = 2 # feature start id
423 |     feats = data[fsid:]
424 |     pred = estimator_lr(feats)
425 |     y.append(clk)
426 |     yp.append(pred)
427 |     mplist.append(mp)
428 | fi.close()
429 | 
430 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt", 'r')
431 | for line in fi:
432 |     data = ints(line.strip().replace(":1", "").split())
433 |     clk = data[0]
434 |     mp = data[1]
435 |     fsid = 2 # feature start id
436 |     feats = data[fsid:]
437 |     pred = estimator_lr(feats)
438 |     y_train.append(clk)
439 |     yp_train.append(pred)
440 |     mplist_train.append(mp)
441 | fi.close()
442 | 
443 | basectr = sum(yp_train) / float(len(yp_train))
444 | 
445 | # for reporting
446 | parameters = []
447 | overshoot = []
448 | settling_time = []
449 | rise_time = []
450 | rmse_ss = []
451 | sd_ss = []
452 | report_path = ""
453 | 
454 | 
455 | if mode == "test": # test mode
456 |     report_path = "../report/report-win-test.tsv"
457 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
458 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
459 |     parameters.append(parameter)
460 |     control_test(cntr_rounds, ref, para_p, para_i, para_d)
461 |     rout = open(report_path, 'w')
462 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
463 |     for idx, val in enumerate(parameters):
464 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
465 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
466 |     rout.close()
467 | elif mode == "batch":# batch mode
468 |     report_path = "../report/report-win-batch.tsv"
469 |     for temp_p in para_ps:
470 |         for temp_i in para_is:
471 |             for temp_d in para_ds:
472 |                 para_p = temp_p * 1.0 * div
473 |                 para_i = temp_i * 1.0 * div
474 |                 para_d = temp_d * 1.0 * div
475 |                 out_path = "../exp-data/win_"+advertiser+"_ref="+str(ref)+"_p=" + \
476 |                            str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
477 |                 control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
478 |                 parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
479 |                            str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
480 |                 parameters.append(parameter)
481 |     rout = open(report_path, 'w')
482 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
483 |     for idx, val in enumerate(parameters):
484 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
485 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
486 |     rout.close()
487 | elif mode == "single": # single mode
488 |     out_path = "../exp-data/win_"+advertiser+"_ref="+str(ref)+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
489 |     control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
490 |     report_path = "../report/report-win-single.tsv"
491 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
492 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
493 |     parameters.append(parameter)
494 |     rout = open(report_path, 'w')
495 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
496 |     for idx, val in enumerate(parameters):
497 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
498 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
499 |     rout.close()
500 | else:
501 |     print "wrong mode entered"


--------------------------------------------------------------------------------
/python/control-awr-waterlevel-budget.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | 
  6 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  7 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
  8 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
  9 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 10 | advs_train_cost = {"1458": 212400000, "2259": 77754000, "2261": 61610000, "2821": 118082000, "2997": 19689000, "3358": 160943000, "3386": 219066000, "3427": 210239000, "3476": 156088000}
 11 | advs_test_cost = {"1458": 45216000, "2259": 43497000, "2261": 28795000, "2821": 68257000, "2997": 8617000, "3358": 34159000, "3386": 45715000, "3427": 46356000, "3476": 43627000}
 12 | 
 13 | advertiser = "2997"
 14 | mode = "test"
 15 | basebid = 63
 16 | ref = 0.6
 17 | temp = 0.6
 18 | print "%s\t%s\t%d\t%f" % (advertiser, mode, basebid, ref)
 19 | 
 20 | # parameter setting
 21 | minbid = 5
 22 | cntr_rounds = 40
 23 | para_gamma = 60
 24 | div = 1
 25 | para_gammas = range(60, 120, 5)
 26 | settle_con = 0.1
 27 | rise_con = 0.9
 28 | min_phi = -100
 29 | max_phi = 1.5
 30 | damping = 0.25
 31 | budget_train = damping * advs_train_cost[advertiser]
 32 | budget_test = damping * advs_test_cost[advertiser]
 33 | print "test-budget: " + str(budget_test)
 34 | print "train-budget: " + str(budget_train)
 35 | max_ref = 1.2 * ref
 36 | min_ref = ref / 1.2
 37 | max_ref = ref * 1.05
 38 | min_ref = ref * 0.95
 39 | para_z = 20.0
 40 | 
 41 | def ints(s):
 42 |     res = []
 43 |     for ss in s:
 44 |         res.append(int(ss))
 45 |     return res
 46 | 
 47 | def sigmoid(p):
 48 |     return 1.0 / (1.0 + math.exp(-p))
 49 | 
 50 | def estimator_lr(feats):
 51 |     pred = 0.0
 52 |     for feat in feats:
 53 |         if feat in featWeight:
 54 |             pred += featWeight[feat]
 55 |     pred = sigmoid(pred)
 56 |     return pred
 57 | 
 58 | # bidding functions
 59 | def lin(pctr, basectr, basebid):
 60 |     return int(pctr *  basebid / basectr)
 61 | 
 62 | # calculate settling time
 63 | def cal_settling_time(winrs, ref):
 64 |     settled = False
 65 |     settling_time = 0
 66 |     for key, value in winrs.iteritems():
 67 |         error = ref - value
 68 |         if abs(error) / ref <= settle_con and settled == False:
 69 |             settled = True
 70 |             settling_time = key
 71 |         elif abs(error) / ref > settle_con:
 72 |             settled = False
 73 |             settling_time = cntr_rounds
 74 |     return settling_time
 75 | 
 76 | # # calculate steady-state error
 77 | def cal_rmse_ss(winrs, ref):
 78 |     settling_time = cal_settling_time(winrs, ref)
 79 |     rmse = 0.0
 80 |     if settling_time >= cntr_rounds:
 81 |         settling_time = cntr_rounds - 1
 82 |     for round in range(settling_time, cntr_rounds):
 83 |         rmse += (winrs[round] - ref) * (winrs[round] - ref)
 84 |     rmse /= (cntr_rounds - settling_time)
 85 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 86 |     return rmse
 87 | 
 88 | # # calculate steady-state standard deviation
 89 | def cal_sd_ss(winrs, ref):
 90 |     settling_time = cal_settling_time(winrs, ref)
 91 |     if settling_time >= cntr_rounds:
 92 |         settling_time = cntr_rounds - 1
 93 |     sum2 = 0.0
 94 |     sum = 0.0
 95 |     for round in range(settling_time, cntr_rounds):
 96 |         sum2 += winrs[round] * winrs[round]
 97 |         sum += winrs[round]
 98 |     n = cntr_rounds - settling_time
 99 |     mean = sum / n
100 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
101 |     return sd
102 | 
103 | # calculate rise time
104 | def cal_rise_time(winrs, ref, rise_con):
105 |     rise_time = 0
106 |     for key, value in winrs.iteritems():
107 |         error = ref - value
108 |         if abs(error) / ref <= (1 - rise_con):
109 |             rise_time = key
110 |             break
111 |     return rise_time
112 | 
113 | # calculate percentage overshoot
114 | def cal_overshoot(winrs, ref):
115 |     if winrs[0] > ref:
116 |         min = winrs[0];
117 |         for key, value in winrs.iteritems():
118 |             if value <= min:
119 |                 min = value
120 |         if min < ref:
121 |             return (ref - min) * 100.0 / ref
122 |         else:
123 |             return 0.0
124 |     elif winrs[0] < ref:
125 |         max = winrs[0]
126 |         for key, value in winrs.iteritems():
127 |             if value >= max:
128 |                 max = value
129 |         if max > ref:
130 |             return (max - ref) * 100.0 / ref
131 |         else:
132 |             return 0.0
133 |     else:
134 |         max = 0
135 |         for key, value in winrs.iteritems():
136 |             if abs(value - ref) >= max:
137 |                 max = value
138 |         return (max - ref) * 100.0 / ref
139 | 
140 | # control function
141 | def control(cntr_rounds, ref, para_gamma, outfile):
142 |     fo = open(outfile, 'w')
143 |     fo.write("round\tecpc\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tecpc\tref\n")
144 |     winrs = {}
145 |     ecpcs = {}
146 |     win_nums = {}
147 |     bid_count = 0
148 |     first_round = True
149 |     cntr_size = int(len(yp) / cntr_rounds)
150 |     total_cost = 0.0
151 |     total_clks = 0
152 |     total_wins = 0
153 |     temp_ref = {}
154 |     temp_ref[0] = temp
155 |     tc = {}
156 |     for round in range(0, cntr_rounds):
157 |         if first_round:
158 |             phi = 0.0
159 |             first_round = False
160 |         else:
161 |             a = (budget_test - tc[round-1]) / (para_z * (len(yp) - bid_count))
162 |             if a < 0:
163 |                 temp_ref[round] = 0
164 |             else:
165 |                 temp_ref[round] = (math.sqrt(a * a + 4 * a) - a) / 2
166 |             if temp_ref[round] <= min_ref:
167 |                 temp_ref[round] = min_ref
168 |             elif temp_ref[round] >= max_ref:
169 |                 temp_ref[round] = max_ref
170 | 
171 |             error = win_nums[round-1] * 1.0 / (temp_ref[round] * len(yp)) - 1.0 / cntr_rounds
172 |             phi = para_gamma * (-1) * error
173 |         cost = 0
174 |         clks = 0
175 |         win = 0
176 | 
177 |         imp_index = ((round+1)*cntr_size)
178 | 
179 |         if round == cntr_rounds - 1:
180 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
181 | 
182 |         # fang piao
183 |         if phi <= min_phi:
184 |             phi = min_phi
185 |         elif phi >= max_phi:
186 |             phi = max_phi
187 | 
188 |         for i in range(round*cntr_size, imp_index):
189 |             bid_count += 1
190 |             clk = y[i]
191 |             pctr = yp[i]
192 |             mp = mplist[i]
193 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
194 |             if round == 0:
195 |                 bid = 1000.0
196 | 
197 |             if bid > mp:
198 |                 win += 1
199 |                 total_wins += 1
200 |                 clks += clk
201 |                 total_clks += clk
202 |                 cost += mp
203 |                 total_cost += mp
204 |         winrs[round] = total_wins * 1.0 / bid_count
205 |         win_nums[round] = win
206 |         tc[round] = total_cost
207 |         ecpcs[round] = total_cost / (total_clks+1)
208 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
209 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
210 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % (round, winrs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs[round], temp_ref[round]))
211 |     for round in range(0, cntr_rounds):
212 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % (round, temp_ref[round], "test-ref", 0.0, 0,  0.0, 0.0, tc[round], 0.0, temp_ref[round]))
213 |     overshoot.append(cal_overshoot(winrs, ref))
214 |     settling_time.append(cal_settling_time(winrs, ref))
215 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
216 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
217 |     sd_ss.append(cal_sd_ss(winrs, ref))
218 | 
219 |     # # train
220 |     # winrs_train = {}
221 |     # win_nums_train = {}
222 |     # ecpcs_train = {}
223 |     # bid_count = 0
224 |     # first_round = True
225 |     # cntr_size = int(len(yp_train) / cntr_rounds)
226 |     # total_cost = 0.0
227 |     # total_clks = 0
228 |     # total_wins = 0
229 |     # tc_train = {}
230 |     # for round in range(0, cntr_rounds):
231 |     #     if first_round:
232 |     #         phi = 0.0
233 |     #         first_round = False
234 |     #     else:
235 |     #         error = win_nums_train[round-1] * 1.0 / (ref * len(yp_train)) - 1.0 / cntr_rounds
236 |     #         phi = para_gamma * (-1) * error
237 |     #     cost = 0
238 |     #     clks = 0
239 |     #     win = 0
240 |     #
241 |     #     imp_index = ((round+1)*cntr_size)
242 |     #
243 |     #     if round == cntr_rounds - 1:
244 |     #         imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
245 |     #
246 |     #     # fang piao
247 |     #     if phi <= min_phi:
248 |     #         phi = min_phi
249 |     #     elif phi >= max_phi:
250 |     #         phi = max_phi
251 |     #
252 |     #     for i in range(round*cntr_size, imp_index):
253 |     #         bid_count += 1
254 |     #         clk = y_train[i]
255 |     #         pctr = yp_train[i]
256 |     #         mp = mplist_train[i]
257 |     #         bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
258 |     #         if round == 0:
259 |     #             bid = 1000.0
260 |     #
261 |     #         if bid > mp:
262 |     #             win += 1
263 |     #             total_wins += 1
264 |     #             clks += clk
265 |     #             total_clks += clk
266 |     #             cost += mp
267 |     #             total_cost += mp
268 |     #     winrs_train[round] = total_wins * 1.0 / bid_count
269 |     #     win_nums_train[round] = win
270 |     #     tc_train[round] = total_cost
271 |     #     ecpcs_train[round] = total_cost / (total_clks+1)
272 |     #     click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
273 |     #     win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
274 |     #     fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, winrs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs_train[round], ref))
275 |     # for round in range(0, cntr_rounds):
276 |     #     fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, tc_train[round], 0.0, ref))
277 |     fo.close()
278 | 
279 | def control_test(cntr_rounds, ref, para_gamma):
280 |     winrs = {}
281 |     win_nums = {}
282 |     ecpcs = {}
283 |     bid_count = 0
284 |     first_round = True
285 |     cntr_size = int(len(yp) / cntr_rounds)
286 |     total_cost = 0.0
287 |     total_clks = 0
288 |     total_wins = 0
289 |     temp_ref = {}
290 |     temp_ref[0] = temp
291 |     tc = {}
292 |     for round in range(0, cntr_rounds):
293 |         if first_round:
294 |             phi = 0.0
295 |             first_round = False
296 |         else:
297 |             a = (budget_test - tc[round-1]) / (para_z * (len(yp) - bid_count))
298 |             if a < 0:
299 |                 temp_ref[round] = 0
300 |             else:
301 |                 temp_ref[round] = (math.sqrt(a * a + 4 * a) - a) / 2
302 |             if temp_ref[round] <= min_ref:
303 |                 temp_ref[round] = min_ref
304 |             elif temp_ref[round] >= max_ref:
305 |                 temp_ref[round] = max_ref
306 | 
307 |             error = win_nums[round-1] * 1.0 / (temp_ref[round] * len(yp)) - 1.0 / cntr_rounds
308 |             phi = para_gamma * (-1) * error
309 |         cost = 0
310 |         clks = 0
311 |         win = 0
312 | 
313 |         imp_index = ((round+1)*cntr_size)
314 | 
315 |         if round == cntr_rounds - 1:
316 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
317 | 
318 |         # fang piao
319 |         if phi <= min_phi:
320 |             phi = min_phi
321 |         elif phi >= max_phi:
322 |             phi = max_phi
323 | 
324 |         for i in range(round*cntr_size, imp_index):
325 |             bid_count += 1
326 |             clk = y[i]
327 |             pctr = yp[i]
328 |             mp = mplist[i]
329 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
330 |             if round == 0:
331 |                 bid = 1000.0
332 | 
333 |             if bid > mp:
334 |                 win += 1
335 |                 total_wins += 1
336 |                 clks += clk
337 |                 total_clks += clk
338 |                 cost += mp
339 |                 total_cost += mp
340 |         tc[round] = total_cost
341 |         winrs[round] = total_wins * 1.0 / bid_count
342 |         ecpcs[round] = total_cost / (total_clks+1)
343 |         win_nums[round] = win
344 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
345 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
346 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f" % (round, winrs[round], phi, total_clks, click_ratio, win_ratio, total_cost, ecpcs[round], temp_ref[round])
347 |     overshoot.append(cal_overshoot(winrs, ref))
348 |     settling_time.append(cal_settling_time(winrs, ref))
349 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
350 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
351 |     sd_ss.append(cal_sd_ss(winrs, ref))
352 | 
353 |     # train
354 |     winrs_train = {}
355 |     win_nums_train = {}
356 |     ecpcs_train = {}
357 |     bid_count = 0
358 |     first_round = True
359 |     cntr_size = int(len(yp_train) / cntr_rounds)
360 |     total_cost = 0.0
361 |     total_clks = 0
362 |     total_wins = 0
363 |     temp_ref = {}
364 |     temp_ref[0] = temp
365 |     tc = {}
366 | 
367 |     for round in range(0, cntr_rounds):
368 |         if first_round:
369 |             phi = 0.0
370 |             first_round = False
371 |         else:
372 |             a = (budget_train - tc[round-1]) / (para_z * (len(yp_train) - bid_count))
373 |             if a < 0:
374 |                 temp_ref[round] = 0
375 |             else:
376 |                 temp_ref[round] = (math.sqrt(a * a + 4 * a) - a) / 2
377 |             if temp_ref[round] <= min_ref:
378 |                 temp_ref[round] = min_ref
379 |             elif temp_ref[round] >= max_ref:
380 |                 temp_ref[round] = max_ref
381 | 
382 |             error = (win_nums_train[round-1] * 1.0 / (temp_ref[round] * len(yp_train))) - (1.0 / cntr_rounds)
383 |             phi = para_gamma * (-1) * error
384 |         cost = 0
385 |         clks = 0
386 |         win = 0
387 | 
388 |         imp_index = ((round+1)*cntr_size)
389 | 
390 |         if round == cntr_rounds - 1:
391 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
392 | 
393 |         # fang piao
394 |         if phi <= min_phi:
395 |             phi = min_phi
396 |         elif phi >= max_phi:
397 |             phi = max_phi
398 | 
399 |         for i in range(round*cntr_size, imp_index):
400 |             bid_count += 1
401 |             clk = y_train[i]
402 |             pctr = yp_train[i]
403 |             mp = mplist_train[i]
404 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
405 |             if round == 0:
406 |                 bid = 1000.0
407 | 
408 |             if bid > mp:
409 |                 win += 1
410 |                 total_wins += 1
411 |                 clks += clk
412 |                 total_clks += clk
413 |                 cost += mp
414 |                 total_cost += mp
415 |         tc[round] = total_cost
416 |         winrs_train[round] = total_wins * 1.0 / bid_count
417 |         ecpcs_train[round] = total_cost / (total_clks+1)
418 |         win_nums_train[round] = win
419 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
420 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
421 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f" % (round, winrs_train[round], phi, total_clks, click_ratio, win_ratio, total_cost,  ecpcs_train[round], temp_ref[round])
422 | 
423 | random.seed(10)
424 | 
425 | # if len(sys.argv) != 3:
426 | #     print 'campaignId mode'
427 | #     exit(-1)
428 | 
429 | mplist = []
430 | y = []
431 | yp = []
432 | mplist_train = []
433 | y_train = []
434 | yp_train = []
435 | featWeight = {}
436 | 
437 | 
438 | #initialize the lr
439 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt.lr.weight", 'r')
440 | for line in fi:
441 |     s = line.strip().split()
442 |     feat = int(s[0])
443 |     weight = float(s[1])
444 |     featWeight[feat] = weight
445 | fi.close()
446 | 
447 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzx.txt", 'r')
448 | for line in fi:
449 |     data = ints(line.strip().replace(":1", "").split())
450 |     clk = data[0]
451 |     mp = data[1]
452 |     fsid = 2 # feature start id
453 |     feats = data[fsid:]
454 |     pred = estimator_lr(feats)
455 |     y.append(clk)
456 |     yp.append(pred)
457 |     mplist.append(mp)
458 | fi.close()
459 | 
460 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt", 'r')
461 | for line in fi:
462 |     data = ints(line.strip().replace(":1", "").split())
463 |     clk = data[0]
464 |     mp = data[1]
465 |     fsid = 2 # feature start id
466 |     feats = data[fsid:]
467 |     pred = estimator_lr(feats)
468 |     y_train.append(clk)
469 |     yp_train.append(pred)
470 |     mplist_train.append(mp)
471 | fi.close()
472 | 
473 | basectr = sum(yp_train) / float(len(yp_train))
474 | 
475 | # for reporting
476 | parameters = []
477 | overshoot = []
478 | settling_time = []
479 | rise_time = []
480 | rmse_ss = []
481 | sd_ss = []
482 | report_path = ""
483 | 
484 | 
485 | if mode == "test": # test mode
486 |     report_path = "../report/report-win-waterlevel-budget-test.tsv"
487 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
488 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
489 |     parameters.append(parameter)
490 |     control_test(cntr_rounds, ref, para_gamma)
491 |     rout = open(report_path, 'w')
492 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
493 |     for idx, val in enumerate(parameters):
494 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
495 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
496 |     rout.close()
497 | elif mode == "batch":# batch mode
498 |     report_path = "../report/report-win-waterlevel-budget-batch.tsv"
499 |     for temp_gamma in para_gammas:
500 |         para_gamma = temp_gamma * 1.0 * div
501 |         out_path = "../exp-data/budget_win_waterlevel_"+advertiser+"_ref="+str(ref)+"_gamma=" + str(para_gamma)+".tsv"
502 |         control(cntr_rounds, ref, para_gamma, out_path)
503 |         parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
504 |                      str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
505 |         parameters.append(parameter)
506 |     rout = open(report_path, 'w')
507 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
508 |     for idx, val in enumerate(parameters):
509 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
510 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
511 |     rout.close()
512 | elif mode == "single": # single mode
513 |     out_path = "../exp-data/budget="+budget_test+"_win_waterlevel_"+advertiser+"_ref="+str(ref)+"_gamma="+str(para_gamma)+".tsv"
514 |     control(cntr_rounds, ref, para_gamma, out_path)
515 |     report_path = "../report/report-win-waterlevel-budget-single.tsv"
516 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
517 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
518 |     parameters.append(parameter)
519 |     rout = open(report_path, 'w')
520 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
521 |     for idx, val in enumerate(parameters):
522 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
523 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
524 |     rout.close()
525 | else:
526 |     print "wrong mode entered"


--------------------------------------------------------------------------------
/python/control-awr-waterlevel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | 
  6 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  7 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
  8 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
  9 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 10 | 
 11 | advertiser = "2821"
 12 | mode = "test"
 13 | basebid = 90
 14 | ref = 0.4
 15 | print "%s\t%s\t%d\t%f" % (advertiser, mode, basebid, ref)
 16 | 
 17 | # parameter setting
 18 | minbid = 5
 19 | cntr_rounds = 40
 20 | para_gamma = 100
 21 | div = 1
 22 | para_gammas = range(60, 120, 5)
 23 | settle_con = 0.1
 24 | rise_con = 0.9
 25 | min_phi = -3.5
 26 | max_phi = 2
 27 | 
 28 | def ints(s):
 29 |     res = []
 30 |     for ss in s:
 31 |         res.append(int(ss))
 32 |     return res
 33 | 
 34 | def sigmoid(p):
 35 |     return 1.0 / (1.0 + math.exp(-p))
 36 | 
 37 | def estimator_lr(feats):
 38 |     pred = 0.0
 39 |     for feat in feats:
 40 |         if feat in featWeight:
 41 |             pred += featWeight[feat]
 42 |     pred = sigmoid(pred)
 43 |     return pred
 44 | 
 45 | # bidding functions
 46 | def lin(pctr, basectr, basebid):
 47 |     return int(pctr *  basebid / basectr)
 48 | 
 49 | # calculate settling time
 50 | def cal_settling_time(winrs, ref):
 51 |     settled = False
 52 |     settling_time = 0
 53 |     for key, value in winrs.iteritems():
 54 |         error = ref - value
 55 |         if abs(error) / ref <= settle_con and settled == False:
 56 |             settled = True
 57 |             settling_time = key
 58 |         elif abs(error) / ref > settle_con:
 59 |             settled = False
 60 |             settling_time = cntr_rounds
 61 |     return settling_time
 62 | 
 63 | # # calculate steady-state error
 64 | def cal_rmse_ss(winrs, ref):
 65 |     settling_time = cal_settling_time(winrs, ref)
 66 |     rmse = 0.0
 67 |     if settling_time >= cntr_rounds:
 68 |         settling_time = cntr_rounds - 1
 69 |     for round in range(settling_time, cntr_rounds):
 70 |         rmse += (winrs[round] - ref) * (winrs[round] - ref)
 71 |     rmse /= (cntr_rounds - settling_time)
 72 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 73 |     return rmse
 74 | 
 75 | # # calculate steady-state standard deviation
 76 | def cal_sd_ss(winrs, ref):
 77 |     settling_time = cal_settling_time(winrs, ref)
 78 |     if settling_time >= cntr_rounds:
 79 |         settling_time = cntr_rounds - 1
 80 |     sum2 = 0.0
 81 |     sum = 0.0
 82 |     for round in range(settling_time, cntr_rounds):
 83 |         sum2 += winrs[round] * winrs[round]
 84 |         sum += winrs[round]
 85 |     n = cntr_rounds - settling_time
 86 |     mean = sum / n
 87 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
 88 |     return sd
 89 | 
 90 | # calculate rise time
 91 | def cal_rise_time(winrs, ref, rise_con):
 92 |     rise_time = 0
 93 |     for key, value in winrs.iteritems():
 94 |         error = ref - value
 95 |         if abs(error) / ref <= (1 - rise_con):
 96 |             rise_time = key
 97 |             break
 98 |     return rise_time
 99 | 
100 | # calculate percentage overshoot
101 | def cal_overshoot(winrs, ref):
102 |     if winrs[0] > ref:
103 |         min = winrs[0];
104 |         for key, value in winrs.iteritems():
105 |             if value <= min:
106 |                 min = value
107 |         if min < ref:
108 |             return (ref - min) * 100.0 / ref
109 |         else:
110 |             return 0.0
111 |     elif winrs[0] < ref:
112 |         max = winrs[0]
113 |         for key, value in winrs.iteritems():
114 |             if value >= max:
115 |                 max = value
116 |         if max > ref:
117 |             return (max - ref) * 100.0 / ref
118 |         else:
119 |             return 0.0
120 |     else:
121 |         max = 0
122 |         for key, value in winrs.iteritems():
123 |             if abs(value - ref) >= max:
124 |                 max = value
125 |         return (max - ref) * 100.0 / ref
126 | 
127 | # control function
128 | def control(cntr_rounds, ref, para_gamma, outfile):
129 |     fo = open(outfile, 'w')
130 |     fo.write("round\twinr\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tecpc\tref\n")
131 |     winrs = {}
132 |     ecpcs = {}
133 |     win_nums = {}
134 |     bid_count = 0
135 |     first_round = True
136 |     cntr_size = int(len(yp) / cntr_rounds)
137 |     total_cost = 0.0
138 |     total_clks = 0
139 |     total_wins = 0
140 |     tc = {}
141 |     for round in range(0, cntr_rounds):
142 |         if first_round:
143 |             phi = 0.0
144 |             first_round = False
145 |         else:
146 |             error = win_nums[round-1] * 1.0 / (ref * len(yp)) - 1.0 / cntr_rounds
147 |             phi = para_gamma * (-1) * error
148 |         cost = 0
149 |         clks = 0
150 |         win = 0
151 | 
152 |         imp_index = ((round+1)*cntr_size)
153 | 
154 |         if round == cntr_rounds - 1:
155 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
156 | 
157 |         # fang piao
158 |         if phi <= min_phi:
159 |             phi = min_phi
160 |         elif phi >= max_phi:
161 |             phi = max_phi
162 | 
163 |         for i in range(round*cntr_size, imp_index):
164 |             bid_count += 1
165 |             clk = y[i]
166 |             pctr = yp[i]
167 |             mp = mplist[i]
168 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
169 |             if round == 0:
170 |                 bid = 1000.0
171 | 
172 |             if bid > mp:
173 |                 win += 1
174 |                 total_wins += 1
175 |                 clks += clk
176 |                 total_clks += clk
177 |                 cost += mp
178 |                 total_cost += mp
179 |         winrs[round] = total_wins * 1.0 / bid_count
180 |         win_nums[round] = win
181 |         tc[round] = total_cost
182 |         ecpcs[round] = total_cost / (total_clks+1)
183 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
184 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
185 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, winrs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs[round], ref))
186 |     for round in range(0, cntr_rounds):
187 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "test-ref", 0.0, 0,  0.0, 0.0, tc[round], 0.0, ref))
188 |     overshoot.append(cal_overshoot(winrs, ref))
189 |     settling_time.append(cal_settling_time(winrs, ref))
190 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
191 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
192 |     sd_ss.append(cal_sd_ss(winrs, ref))
193 | 
194 |     # train
195 |     winrs_train = {}
196 |     win_nums_train = {}
197 |     ecpcs_train = {}
198 |     bid_count = 0
199 |     first_round = True
200 |     cntr_size = int(len(yp_train) / cntr_rounds)
201 |     total_cost = 0.0
202 |     total_clks = 0
203 |     total_wins = 0
204 |     tc_train = {}
205 |     for round in range(0, cntr_rounds):
206 |         if first_round:
207 |             phi = 0.0
208 |             first_round = False
209 |         else:
210 |             error = win_nums_train[round-1] * 1.0 / (ref * len(yp_train)) - 1.0 / cntr_rounds
211 |             phi = para_gamma * (-1) * error
212 |         cost = 0
213 |         clks = 0
214 |         win = 0
215 | 
216 |         imp_index = ((round+1)*cntr_size)
217 | 
218 |         if round == cntr_rounds - 1:
219 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
220 | 
221 |         # fang piao
222 |         if phi <= min_phi:
223 |             phi = min_phi
224 |         elif phi >= max_phi:
225 |             phi = max_phi
226 | 
227 |         for i in range(round*cntr_size, imp_index):
228 |             bid_count += 1
229 |             clk = y_train[i]
230 |             pctr = yp_train[i]
231 |             mp = mplist_train[i]
232 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
233 |             if round == 0:
234 |                 bid = 1000.0
235 | 
236 |             if bid > mp:
237 |                 win += 1
238 |                 total_wins += 1
239 |                 clks += clk
240 |                 total_clks += clk
241 |                 cost += mp
242 |                 total_cost += mp
243 |         winrs_train[round] = total_wins * 1.0 / bid_count
244 |         win_nums_train[round] = win
245 |         tc_train[round] = total_cost
246 |         ecpcs_train[round] = total_cost / (total_clks+1)
247 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
248 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
249 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, winrs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ecpcs_train[round], ref))
250 |     for round in range(0, cntr_rounds):
251 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, tc_train[round], 0.0, ref))
252 |     fo.close()
253 | 
254 | def control_test(cntr_rounds, ref, para_gamma):
255 |     winrs = {}
256 |     win_nums = {}
257 |     ecpcs = {}
258 |     bid_count = 0
259 |     first_round = True
260 |     cntr_size = int(len(yp) / cntr_rounds)
261 |     total_cost = 0.0
262 |     total_clks = 0
263 |     total_wins = 0
264 |     for round in range(0, cntr_rounds):
265 |         if first_round:
266 |             phi = 0.0
267 |             first_round = False
268 |         else:
269 |             error = (win_nums[round-1] * 1.0 / (ref * len(yp))) - (1.0 / cntr_rounds)
270 |             phi = para_gamma * (-1) * error
271 |         cost = 0
272 |         clks = 0
273 |         win = 0
274 | 
275 |         imp_index = ((round+1)*cntr_size)
276 | 
277 |         if round == cntr_rounds - 1:
278 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
279 | 
280 |         # fang piao
281 |         if phi <= min_phi:
282 |             phi = min_phi
283 |         elif phi >= max_phi:
284 |             phi = max_phi
285 | 
286 |         for i in range(round*cntr_size, imp_index):
287 |             bid_count += 1
288 |             clk = y[i]
289 |             pctr = yp[i]
290 |             mp = mplist[i]
291 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
292 |             if round == 0:
293 |                 bid = 1000.0
294 | 
295 |             if bid > mp:
296 |                 win += 1
297 |                 total_wins += 1
298 |                 clks += clk
299 |                 total_clks += clk
300 |                 cost += mp
301 |                 total_cost += mp
302 |         winrs[round] = total_wins * 1.0 / bid_count
303 |         ecpcs[round] = total_cost / (total_clks+1)
304 |         win_nums[round] = win
305 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
306 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
307 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f" % (round, winrs[round], phi, total_clks, click_ratio, win_ratio, total_cost, ecpcs[round], ref)
308 |     overshoot.append(cal_overshoot(winrs, ref))
309 |     settling_time.append(cal_settling_time(winrs, ref))
310 |     rise_time.append(cal_rise_time(winrs, ref, rise_con))
311 |     rmse_ss.append(cal_rmse_ss(winrs, ref))
312 |     sd_ss.append(cal_sd_ss(winrs, ref))
313 | 
314 |     # train
315 |     winrs_train = {}
316 |     win_nums_train = {}
317 |     ecpcs_train = {}
318 |     bid_count = 0
319 |     first_round = True
320 |     cntr_size = int(len(yp_train) / cntr_rounds)
321 |     total_cost = 0.0
322 |     total_clks = 0
323 |     total_wins = 0
324 |     for round in range(0, cntr_rounds):
325 |         if first_round:
326 |             phi = 0.0
327 |             first_round = False
328 |         else:
329 |             error = (win_nums_train[round-1] * 1.0 / (ref * len(yp_train))) - (1.0 / cntr_rounds)
330 |             phi = para_gamma * (-1) * error
331 |         cost = 0
332 |         clks = 0
333 |         win = 0
334 | 
335 |         imp_index = ((round+1)*cntr_size)
336 | 
337 |         if round == cntr_rounds - 1:
338 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
339 | 
340 |         # fang piao
341 |         if phi <= min_phi:
342 |             phi = min_phi
343 |         elif phi >= max_phi:
344 |             phi = max_phi
345 | 
346 |         for i in range(round*cntr_size, imp_index):
347 |             bid_count += 1
348 |             clk = y_train[i]
349 |             pctr = yp_train[i]
350 |             mp = mplist_train[i]
351 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
352 |             if round == 0:
353 |                 bid = 1000.0
354 | 
355 |             if bid > mp:
356 |                 win += 1
357 |                 total_wins += 1
358 |                 clks += clk
359 |                 total_clks += clk
360 |                 cost += mp
361 |                 total_cost += mp
362 |         winrs_train[round] = total_wins * 1.0 / bid_count
363 |         ecpcs_train[round] = total_cost / (total_clks+1)
364 |         win_nums_train[round] = win
365 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
366 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
367 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.1f" % (round, winrs_train[round], phi, total_clks, click_ratio, win_ratio, total_cost,  ecpcs_train[round], ref)
368 | 
369 | random.seed(10)
370 | 
371 | # if len(sys.argv) != 3:
372 | #     print 'campaignId mode'
373 | #     exit(-1)
374 | 
375 | mplist = []
376 | y = []
377 | yp = []
378 | mplist_train = []
379 | y_train = []
380 | yp_train = []
381 | featWeight = {}
382 | 
383 | 
384 | #initialize the lr
385 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt.lr.weight", 'r')
386 | for line in fi:
387 |     s = line.strip().split()
388 |     feat = int(s[0])
389 |     weight = float(s[1])
390 |     featWeight[feat] = weight
391 | fi.close()
392 | 
393 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzx.txt", 'r')
394 | for line in fi:
395 |     data = ints(line.strip().replace(":1", "").split())
396 |     clk = data[0]
397 |     mp = data[1]
398 |     fsid = 2 # feature start id
399 |     feats = data[fsid:]
400 |     pred = estimator_lr(feats)
401 |     y.append(clk)
402 |     yp.append(pred)
403 |     mplist.append(mp)
404 | fi.close()
405 | 
406 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt", 'r')
407 | for line in fi:
408 |     data = ints(line.strip().replace(":1", "").split())
409 |     clk = data[0]
410 |     mp = data[1]
411 |     fsid = 2 # feature start id
412 |     feats = data[fsid:]
413 |     pred = estimator_lr(feats)
414 |     y_train.append(clk)
415 |     yp_train.append(pred)
416 |     mplist_train.append(mp)
417 | fi.close()
418 | 
419 | basectr = sum(yp_train) / float(len(yp_train))
420 | 
421 | # for reporting
422 | parameters = []
423 | overshoot = []
424 | settling_time = []
425 | rise_time = []
426 | rmse_ss = []
427 | sd_ss = []
428 | report_path = ""
429 | 
430 | 
431 | if mode == "test": # test mode
432 |     report_path = "../report/report-win-waterlevel-test.tsv"
433 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
434 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
435 |     parameters.append(parameter)
436 |     control_test(cntr_rounds, ref, para_gamma)
437 |     rout = open(report_path, 'w')
438 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
439 |     for idx, val in enumerate(parameters):
440 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
441 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
442 |     rout.close()
443 | elif mode == "batch":# batch mode
444 |     report_path = "../report/report-win-waterlevel-batch.tsv"
445 |     for temp_gamma in para_gammas:
446 |         para_gamma = temp_gamma * 1.0 * div
447 |         out_path = "../exp-data/win_waterlevel_"+advertiser+"_ref="+str(ref)+"_gamma=" + str(para_gamma)+".tsv"
448 |         control(cntr_rounds, ref, para_gamma, out_path)
449 |         parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
450 |                      str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
451 |         parameters.append(parameter)
452 |     rout = open(report_path, 'w')
453 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
454 |     for idx, val in enumerate(parameters):
455 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
456 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
457 |     rout.close()
458 | elif mode == "single": # single mode
459 |     out_path = "../exp-data/win_waterlevel_"+advertiser+"_ref="+str(ref)+"_gamma="+str(para_gamma)+".tsv"
460 |     control(cntr_rounds, ref, para_gamma, out_path)
461 |     report_path = "../report/report-win-waterlevel-single.tsv"
462 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
463 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
464 |     parameters.append(parameter)
465 |     rout = open(report_path, 'w')
466 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
467 |     for idx, val in enumerate(parameters):
468 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
469 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
470 |     rout.close()
471 | else:
472 |     print "wrong mode entered"


--------------------------------------------------------------------------------
/python/control-ecpc-multiex-pid-bid-optimisation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | from sklearn.metrics import roc_auc_score
  6 | from sklearn.metrics import mean_squared_error
  7 | 
  8 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  9 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
 10 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
 11 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 12 | advs_train_ori_ecpc = {"1458": 86550.0000, "2259": 277700.0000, "2261": 297640.0000, "2821": 140070.0000, "2997": 14210.0000, "3358": 118510.0000, "3386": 105520.0000, "3427": 109160.0000, "3476": 151980.0000}
 13 | advs_test_ori_ecpc = {"1458": 83270.0000, "2259": 332040.0000, "2261": 296870.0000, "2821": 173240.0000, "2997": 16170.0000, "3358": 100770.0000, "3386": 92170.0000, "3427": 117360.0000, "3476": 144460.0000}
 14 | advs_train_cost = {"1458": 212400000, "2259": 77754000, "2261": 61610000, "2821": 118082000, "2997": 19689000, "3358": 160943000, "3386": 219066000, "3427": 210239000, "3476": 156088000}
 15 | advs_test_cost = {"1458": 45216000, "2259": 43497000, "2261": 28795000, "2821": 68257000, "2997": 8617000, "3358": 34159000, "3386": 45715000, "3427": 46356000, "3476": 43627000}
 16 | advs_base_bid = {"1458": 69, "2259": 93, "2261":90, "2821":90, "2997":63, "3358":92, "3386":77, "3427": 81, "3476": 79}
 17 | 
 18 | advs_test_lin_ori_cost = {"1458": 1630539.0, "2259": 23220744.0, "2261": 14531891.0, "2821": 28152710.0, "2997": 4241173.0, "3358": 2518069.0, "3386": 13076826.0, "3427": 4986476.0, "3476": 9075114.0}
 19 | budget_damping = 0.5
 20 | 
 21 | # pre-calculated references using economics model
 22 | advs_adex_ref = {'3386': {'1': 19.650368000000004, 'all': 29.02608458227849, '2': 44.213328000000004, '3': 37.711368000000014},
 23 |                  '2997': {'all': 5.983467456521738, 'null': 5.983467456521738},
 24 |                  '3476': {'1': 19.33278951724137, 'all': 23.144013680672266, '2': 26.20928985365854, '3': 23.360453999999994},
 25 |                  '3427': {'1': 9.885587179487173, 'all': 8.46566368421053, '2': 6.185100000000001, '3': 8.355310526315792},
 26 |                  '2821': {'1': 61.942298684210535, 'all': 53.86468627810651, '2': 64.04917959183673, '4': 49.54528928571428, '3': 51.99172861445784},
 27 |                  '2259': {'1': 155.06496331797234, 'all': 118.39829060240965, '2': 136.23116210526317, '3': 102.05130064516129},
 28 |                  '2261': {'1': 68.34028703703704, 'all': 102.89947283647797, '2': 139.2594528301887, '3': 99.87262888198757},
 29 |                  '1458': {'1': 2.580408429906542, 'all': 2.1646681698113244, '2': 1.517053307692311, '3': 2.2326444905660416},
 30 |                  '3358': {'1': 8.342108526315794, 'all': 7.09656621428572, '2': 8.860251913043474, '3': 1.9785028543689314}}
 31 | 
 32 | # control parameters
 33 | advs_optimal_p = {"1458": 1500000,   "2259": 100000, "2261":500000,  "2821":40000, "2997":1000000,  "3358":500000,  "3386":50000,  "3427": 500000, "3476": 500000}
 34 | advs_optimal_i = {"1458": 100,       "2259": 20000,  "2261":5000,   "2821":200,   "2997":1000,   "3358":100,   "3386":10000,   "3427": 1000,  "3476": 1000}
 35 | advs_optimal_d = {"1458": 10000,     "2259": 10000,  "2261":50000,   "2821":100,   "2997":10000,   "3358":10000,   "3386":1000,   "3427": 1000,  "3476": 10005}
 36 | advs_optimal_min_phi = {"1458": -3,  "2259": -0.6, "2261":-0.8,     "2821":-0.5,   "2997":-0.5,   "3358":-1,   "3386":-0.45,   "3427": -1,  "3476": -2}
 37 | advs_optimal_max_phi = {"1458": 5,  "2259": 4,      "2261":5,       "2821":5,       "2997":5,       "3358":2,   "3386":2,   "3427": 2,  "3476": 3}
 38 | 
 39 | # uni control parameters
 40 | advs_optimal_p_uni = {"1458": 1500000,   "2259": 100000, "2261":500000,  "2821":40000, "2997":1000000,  "3358":500000,  "3386":50000,  "3427": 500000, "3476": 500000}
 41 | advs_optimal_i_uni = {"1458": 100,       "2259": 20000,  "2261":5000,   "2821":200,   "2997":1000,   "3358":100,   "3386":10000,   "3427": 1000,  "3476": 1000}
 42 | advs_optimal_d_uni = {"1458": 10000,       "2259": 10000,  "2261":50000,   "2821":100,   "2997":10000,   "3358":10000,   "3386":1000,   "3427": 1000,  "3476": 10005}
 43 | 
 44 | 
 45 | advs_filtered_adex = {"2259":[]}
 46 | 
 47 | advertiser = "3476"
 48 | if len(sys.argv) > 1:
 49 |     advertiser = sys.argv[1]
 50 | mode = "single"
 51 | basebid = advs_base_bid[advertiser]
 52 | print "%s\t%s\t%d" % (advertiser, mode, basebid)
 53 | 
 54 | # parameter setting
 55 | minbid = 5
 56 | cntr_rounds = 40
 57 | div = 1e-6
 58 | para_p = advs_optimal_p[advertiser] * div
 59 | para_i = advs_optimal_i[advertiser] * div
 60 | para_d = advs_optimal_d[advertiser] * div
 61 | 
 62 | (para_p, para_i, para_d) = (2.08434691735,	0,	0.0158489319246)
 63 | 
 64 | para_p_uni = para_p # advs_optimal_p_uni[advertiser] * div
 65 | para_i_uni = para_i # advs_optimal_i_uni[advertiser] * div
 66 | para_d_uni = para_d # advs_optimal_d_uni[advertiser] * div
 67 | 
 68 | para_ps = range(0, 40, 5)
 69 | para_is = range(0, 25, 5)
 70 | para_ds = range(0, 25, 5)
 71 | settle_con = 0.1
 72 | rise_con = 0.9
 73 | min_phi = advs_optimal_min_phi[advertiser]
 74 | max_phi = advs_optimal_max_phi[advertiser]
 75 | #budget = 9999999999999.0
 76 | budget = advs_test_lin_ori_cost[advertiser] * budget_damping
 77 | lin_budget = budget
 78 | uni_budget = budget
 79 | 
 80 | def ints(s):
 81 |     res = []
 82 |     for ss in s:
 83 |         res.append(int(ss))
 84 |     return res
 85 | 
 86 | def sigmoid(p):
 87 |     return 1.0 / (1.0 + math.exp(-p))
 88 | 
 89 | def estimator_lr(feats):
 90 |     pred = 0.0
 91 |     for feat in feats:
 92 |         if feat in featWeight:
 93 |             pred += featWeight[feat]
 94 |     pred = sigmoid(pred)
 95 |     return pred
 96 | 
 97 | # bidding functions
 98 | def lin(pctr, basectr, basebid):
 99 |     return int(pctr *  basebid / basectr)
100 | 
101 | # calculate settling time
102 | def cal_settling_time(ecpcs, adex_ref):
103 |     settling_time = {}
104 |     for ex, ex_ecpcs in ecpcs.iteritems():
105 |         if ex == "all":
106 |             continue
107 |         settled = False
108 |         settling_time[ex] = 0
109 |         for key, value in ex_ecpcs.iteritems():
110 |             error = adex_ref[ex] - value
111 |             if abs(error) / adex_ref[ex] <= settle_con and (not settled):
112 |                 settled = True
113 |                 settling_time[ex] = key
114 |             elif abs(error) / adex_ref[ex] > settle_con:
115 |                 settled = False
116 |                 settling_time[ex] = cntr_rounds
117 |     return settling_time
118 | 
119 | # # calculate steady-state error
120 | def cal_rmse_ss(ecpcs, adex_ref):
121 |     settled = False
122 |     settling_time = cal_settling_time(ecpcs, adex_ref)
123 |     rmse = {}
124 |     for ex, ex_ecpcs in ecpcs.iteritems():
125 |         if ex == "all":
126 |             continue
127 |         rmse[ex] = 0.0
128 |         if settling_time[ex] >= cntr_rounds:
129 |             settling_time[ex] = cntr_rounds - 1
130 |         for round in range(settling_time[ex], cntr_rounds):
131 |             rmse[ex] += (ecpcs[ex][round] - adex_ref[ex]) * (ecpcs[ex][round] - adex_ref[ex]) / (adex_ref[ex] * adex_ref[ex])
132 | 
133 |         rmse[ex] /= (cntr_rounds - settling_time[ex])
134 |         rmse[ex] = math.sqrt(rmse[ex]) # weinan: relative rmse
135 |     return rmse
136 | 
137 | # # calculate steady-state standard deviation
138 | def cal_sd_ss(ecpcs, adex_ref):
139 |     settled = False
140 |     settling_time = cal_settling_time(ecpcs, adex_ref)
141 |     sd = {}
142 |     for ex, ex_ecpcs in ecpcs.iteritems():
143 |         if ex == "all":
144 |             continue
145 |         sd[ex] = 0.0
146 |         if settling_time[ex] >= cntr_rounds:
147 |             settling_time[ex] = cntr_rounds - 1
148 |         sum2 = 0.0
149 |         sum = 0.0
150 |         for round in range(settling_time[ex], cntr_rounds):
151 |             sum2 += ecpcs[ex][round] * ecpcs[ex][round]
152 |             sum += ecpcs[ex][round]
153 |         n = cntr_rounds - settling_time[ex]
154 |         mean = sum / n
155 |         sd[ex] = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
156 |     return sd
157 | 
158 | # calculate rise time
159 | def cal_rise_time(ecpcs, adex_ref):
160 |     rise_time = {}
161 |     for ex, ex_ecpcs in ecpcs.iteritems():
162 |         if ex == "all":
163 |             continue
164 |         for key, value in ex_ecpcs.iteritems():
165 |             error = adex_ref[ex] - value
166 |             if abs(error) / adex_ref[ex] <= (1 - rise_con):
167 |                 rise_time[ex] = key
168 |                 break
169 |         try:
170 |             is_set = rise_time[ex]
171 |         except:
172 |             rise_time[ex] = 0
173 |     return rise_time
174 | 
175 | # calculate percentage overshoot
176 | def cal_overshoot(ecpcs, adex_ref):
177 |     overshoot = {}
178 |     for ex, ex_ecpcs in ecpcs.iteritems():
179 |         if ex == "all":
180 |             continue
181 |         max_os = 0.0
182 |         if ex_ecpcs[0] > adex_ref[ex]:
183 |             for key, value in ex_ecpcs.iteritems():
184 |                 if value <= adex_ref[ex]:
185 |                     os = (adex_ref[ex] - value) * 100.0 / adex_ref[ex]
186 |                     if os > max_os:
187 |                         max_os = os
188 |         elif ex_ecpcs[0] < adex_ref[ex]:
189 |             for key, value in ex_ecpcs.iteritems():
190 |                 if value >= adex_ref[ex]:
191 |                     os = (value - adex_ref[ex]) * 100.0 / adex_ref[ex]
192 |                     if os > max_os:
193 |                         max_os = os
194 |         else:
195 |             for key, value in ex_ecpcs.iteritems():
196 |                 os = abs(value - adex_ref[ex]) * 100.0 / adex_ref[ex]
197 |                 if os >= max_os:
198 |                     max_os = os
199 |         overshoot[ex] = max_os
200 |     return overshoot
201 | 
202 | # control function
203 | def control(para_p, para_i, para_d, outfile):
204 |     fo = open(outfile, 'w')
205 |     fo.write("stage\tround\texchange\tecpc\tphi\ttotal_bid_num\ttotal_wins\ttotal_clks\tclick_ratio\ttotal_cost\tbudget\tref\tori_ecpc\n")
206 |     ecpcs = {}
207 |     first_round = True
208 |     sec_round = False
209 |     cntr_size = int(len(yp) / cntr_rounds)
210 |     adex_ref = advs_adex_ref[advertiser]
211 |     error_sum = {}
212 |     phi = {}
213 |     total_cost = {"all":0}
214 |     total_clks = {"all":0}
215 |     total_wins = {"all":0}
216 |     total_bid_num = {"all":0}
217 |     lin_total_wins = {"all":0}
218 |     lin_total_clks = {"all":0}
219 |     lin_total_cost = {"all":0.}
220 |     lin_total_bid_num = {"all":0}
221 |     uni_total_wins = {"all":0}  # uni means uniform reference for all ad exchanges
222 |     uni_total_clks = {"all":0}
223 |     uni_total_cost = {"all":0.}
224 |     uni_total_bid_num = {"all":0}
225 | 
226 |     break_round = False
227 | 
228 |     for val in list(set(exchange)):
229 |         total_clks[val] = 0
230 |         total_cost[val] = 0.0
231 |         total_wins[val] = 0
232 |         total_bid_num[val] = 0
233 |         lin_total_clks[val] = 0
234 |         lin_total_cost[val] = 0.0
235 |         lin_total_wins[val] = 0
236 |         lin_total_bid_num[val] = 0
237 |         uni_total_clks[val] = 0
238 |         uni_total_cost[val] = 0.0
239 |         uni_total_wins[val] = 0
240 |         uni_total_bid_num[val] = 0
241 | 
242 |     for round in range(0, cntr_rounds):
243 |         if first_round and (not sec_round):
244 |             phi["uni"] = 0.0
245 |             error_sum["uni"] = 0.0
246 |             ecpcs["uni"] = {}
247 |             ecpcs["all"] = {}
248 |             for val in list(set(exchange)):
249 |                 phi[val] = 0.0
250 |                 error_sum[val] = 0.0
251 |             first_round = False
252 |             sec_round = True
253 |         elif sec_round and (not first_round):
254 |             error = adex_ref["all"] - ecpcs["uni"][round-1]
255 |             error_sum["uni"] += error
256 |             phi["uni"] = para_p_uni*error + para_i_uni*error_sum["uni"]
257 |             for val in list(set(exchange)):
258 |                 error = adex_ref[val] - ecpcs[val][round-1]
259 |                 error_sum[val] += error
260 |                 phi[val] = para_p*error + para_i*error_sum[val]
261 |             sec_round = False
262 |         else:
263 |             error = adex_ref["all"] - ecpcs["uni"][round-1]
264 |             error_sum["uni"] += error
265 |             phi["uni"] = para_p_uni*error + para_i_uni*error_sum["uni"] + para_d_uni*(ecpcs["uni"][round-2]-ecpcs["uni"][round-1])
266 |             for val in list(set(exchange)):
267 |                 error = adex_ref[val] - ecpcs[val][round-1]
268 |                 error_sum[val] += error
269 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
270 | 
271 |         # fang piao
272 |         if phi["uni"] <= min_phi:
273 |             phi["uni"] = min_phi
274 |         elif phi["uni"] >= max_phi:
275 |             phi["uni"] = max_phi
276 |         for val in list(set(exchange)):
277 |             if phi[val] <= min_phi:
278 |                 phi[val] = min_phi
279 |             elif phi[val] >= max_phi:
280 |                 phi[val] = max_phi
281 | 
282 |         imp_index = ((round+1)*cntr_size)
283 | 
284 |         if round == cntr_rounds - 1:
285 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
286 | 
287 |         for i in range(round*cntr_size, imp_index):
288 | 
289 |             clk = y[i]
290 |             pctr = yp[i]
291 |             mp = mplist[i]
292 |             bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange[i]]))))
293 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
294 | 
295 |             if round == 0:
296 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
297 | 
298 |             if total_cost['all'] + mp < budget:
299 |                 total_bid_num["all"] += 1
300 |                 if exchange[i] in total_bid_num:
301 |                     total_bid_num[exchange[i]] += 1
302 |                 else:
303 |                     total_bid_num[exchange[i]] = 1
304 | 
305 |                 if bid > mp:
306 |                     total_wins["all"] += 1
307 |                     total_clks["all"] += clk
308 |                     total_cost["all"] += mp
309 | 
310 |                     if exchange[i] in total_wins:
311 |                         total_wins[exchange[i]] += 1
312 |                     else:
313 |                         total_wins[exchange[i]] = 1
314 | 
315 |                     if exchange[i] in total_clks:
316 |                         total_clks[exchange[i]] += clk
317 |                     else:
318 |                         total_clks[exchange[i]] = clk
319 | 
320 |                     if exchange[i] in total_cost:
321 |                         total_cost[exchange[i]] += mp
322 |                     else:
323 |                         total_cost[exchange[i]] = mp
324 | 
325 |             # lin bid with out any control
326 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
327 | 
328 |             if lin_total_cost["all"] + mp < lin_budget:
329 |                 lin_total_bid_num["all"] += 1
330 |                 if exchange[i] in lin_total_bid_num:
331 |                     lin_total_bid_num[exchange[i]] += 1
332 |                 else:
333 |                     lin_total_bid_num[exchange[i]] = 1
334 | 
335 |                 if lin_bid > mp:
336 |                     lin_total_clks["all"] += clk
337 |                     lin_total_wins["all"] += 1
338 |                     lin_total_cost["all"] += mp
339 | 
340 |                     if exchange[i] in lin_total_wins:
341 |                         lin_total_wins[exchange[i]] += 1
342 |                     else:
343 |                         lin_total_wins[exchange[i]] = 1
344 | 
345 |                     if exchange[i] in lin_total_clks:
346 |                         lin_total_clks[exchange[i]] += clk
347 |                     else:
348 |                         lin_total_clks[exchange[i]] = clk
349 | 
350 |                     if exchange[i] in lin_total_cost:
351 |                         lin_total_cost[exchange[i]] += mp
352 |                     else:
353 |                         lin_total_cost[exchange[i]] = mp
354 | 
355 |             # bid with uniform control
356 |             uni_bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi["uni"]))))
357 |             if uni_total_cost["all"] + mp < uni_budget:
358 |                 uni_total_bid_num["all"] += 1
359 |                 if exchange[i] in uni_total_bid_num:
360 |                     uni_total_bid_num[exchange[i]] += 1
361 |                 else:
362 |                     uni_total_bid_num[exchange[i]] = 1
363 | 
364 |                 if uni_bid > mp:
365 |                     uni_total_clks["all"] += clk
366 |                     uni_total_wins["all"] += 1
367 |                     uni_total_cost["all"] += mp
368 | 
369 |                     if exchange[i] in uni_total_wins:
370 |                         uni_total_wins[exchange[i]] += 1
371 |                     else:
372 |                         uni_total_wins[exchange[i]] = 1
373 | 
374 |                     if exchange[i] in uni_total_clks:
375 |                         uni_total_clks[exchange[i]] += clk
376 |                     else:
377 |                         uni_total_clks[exchange[i]] = clk
378 | 
379 |                     if exchange[i] in uni_total_cost:
380 |                         uni_total_cost[exchange[i]] += mp
381 |                     else:
382 |                         uni_total_cost[exchange[i]] = mp
383 | 
384 |         ecpcs["all"][round] = total_cost["all"] * 1.0 / total_clks["all"] / 1000.0
385 | 
386 |         for val in list(set(exchange)):
387 |             if round == 0:
388 |                 ecpcs[val] = {}
389 |             if total_clks[val] == 0:
390 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
391 |             else:
392 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
393 |             fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_test_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_test_ori_ecpc[advertiser]))
394 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_test_clicks[advertiser], total_cost["all"], budget, 0.0, advs_test_ori_ecpc[advertiser]))
395 |         if lin_total_clks["all"] == 0:
396 |             lin_total_clks["all"] = 1
397 |         lin_ecpc = lin_total_cost["all"] * 1.0 /lin_total_clks["all"] / 1000.0
398 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "lin", lin_ecpc, 0.0, lin_total_bid_num["all"], lin_total_wins["all"], lin_total_clks["all"], lin_total_clks["all"] * 1.0/advs_test_clicks[advertiser], lin_total_cost["all"], lin_budget, 0.0, advs_test_ori_ecpc[advertiser]))
399 |         if uni_total_clks["all"] == 0:
400 |             uni_total_clks["all"] = 1
401 |         ecpcs["uni"][round] = uni_total_cost["all"] * 1.0 /uni_total_clks["all"] / 1000.0
402 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "uni", ecpcs["uni"][round], phi["uni"], uni_total_bid_num["all"], uni_total_wins["all"], uni_total_clks["all"], uni_total_clks["all"] * 1.0/advs_test_clicks[advertiser], uni_total_cost["all"], uni_budget, adex_ref["all"], advs_test_ori_ecpc[advertiser]))
403 | 
404 | 
405 |     # print lin and control clicks and cost on each ad exchange
406 |     zfo = open("../report/lin-control-clk-cost-adex-" + advertiser + ".txt", "w")
407 |     zfo.write("campaign\texchange\tlin.bid\tuni.bid\tctrl.bid\tlin.imp\tuni.imp\tctrl.imp\tlin.clk\tuni.clk\tctrl.clk\tlin.cost\tuni.cost\tctrl.cost\tuni.ref\tctrl.ref\n")
408 |     for ex in sorted(total_clks):
409 |         ref = 0.0
410 |         if ex in advs_adex_ref[advertiser]:
411 |             ref = advs_adex_ref[advertiser][ex]
412 |         uni_ref = advs_adex_ref[advertiser]["all"]
413 |         zfo.write("%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\t%.4f\n" %
414 |                     (advertiser, ex,
415 |                     lin_total_bid_num[ex], uni_total_bid_num[ex], total_bid_num[ex],
416 |                     lin_total_wins[ex], uni_total_wins[ex], total_wins[ex],
417 |                     lin_total_clks[ex], uni_total_clks[ex], total_clks[ex],
418 |                     lin_total_cost[ex], uni_total_cost[ex], total_cost[ex],
419 |                     uni_ref, ref))
420 |     zfo.close()
421 | 
422 |     # print "lin total cost: " + str(lin_total_cost)
423 | 
424 |     # train
425 |     ecpcs = {}
426 |     first_round = True
427 |     sec_round = False
428 |     cntr_size = int(len(yp_train) / cntr_rounds)
429 |     adex_ref = advs_adex_ref[advertiser]
430 |     error_sum = {}
431 |     phi = {}
432 |     total_cost = {}
433 |     total_clks = {}
434 |     total_wins = {}
435 |     total_bid_num = {}
436 |     lin_total_wins = 0
437 |     lin_total_clks = 0
438 |     lin_total_cost = 0
439 |     uni_total_wins = 0
440 |     uni_total_clks = 0
441 |     uni_total_cost = 0
442 | 
443 |     for val in list(set(exchange_train)):
444 |         total_clks[val] = 0
445 |         total_cost[val] = 0.0
446 |         total_wins[val] = 0
447 |         total_bid_num[val] = 0
448 | 
449 |     for round in range(0, cntr_rounds):
450 |         if first_round and (not sec_round):
451 |             ecpcs["all"] = {}
452 |             ecpcs["uni"] = {}
453 |             error_sum["uni"] = 0.0
454 |             phi["uni"] = 0.0
455 |             for val in list(set(exchange_train)):
456 |                 phi[val] = 0.0
457 |                 error_sum[val] = 0.0
458 |             first_round = False
459 |             sec_round = True
460 |         elif sec_round and (not first_round):
461 |             error = adex_ref["all"] - ecpcs["uni"][round-1]
462 |             error_sum["uni"] += error
463 |             phi["uni"] = para_p_uni*error + para_i_uni*error_sum["uni"]
464 |             for val in list(set(exchange_train)):
465 |                 error = adex_ref[val] - ecpcs[val][round-1]
466 |                 error_sum[val] += error
467 |                 phi[val] = para_p*error + para_i*error_sum[val]
468 |             sec_round = False
469 |         else:
470 |             error = adex_ref["all"] - ecpcs["uni"][round-1]
471 |             error_sum["uni"] += error
472 |             phi["uni"] = para_p_uni*error + para_i_uni*error_sum["uni"] + para_d_uni*(ecpcs["uni"][round-2]-ecpcs["uni"][round-1])
473 |             for val in list(set(exchange_train)):
474 |                 error = adex_ref[val] - ecpcs[val][round-1]
475 |                 error_sum[val] += error
476 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
477 | 
478 |         # fang piao
479 |         if phi["uni"] <= min_phi:
480 |             phi["uni"] = min_phi
481 |         elif phi["uni"] >= max_phi:
482 |             phi["uni"] = max_phi
483 |         for val in list(set(exchange_train)):
484 |             if phi[val] <= min_phi:
485 |                 phi[val] = min_phi
486 |             elif phi[val] >= max_phi:
487 |                 phi[val] = max_phi
488 | 
489 |         imp_index = ((round+1)*cntr_size)
490 | 
491 |         if round == cntr_rounds - 1:
492 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
493 | 
494 |         for i in range(round*cntr_size, imp_index):
495 |             if i == 0:
496 |                 total_bid_num["all"] = 1
497 |             else:
498 |                 total_bid_num["all"] += 1
499 | 
500 |             if exchange_train[i] in total_bid_num:
501 |                 total_bid_num[exchange_train[i]] += 1
502 |             else:
503 |                 total_bid_num[exchange_train[i]] = 1
504 | 
505 |             clk = y_train[i]
506 |             pctr = yp_train[i]
507 |             mp = mplist_train[i]
508 |             bid = max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange_train[i]])))
509 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
510 | 
511 |             if round == 0:
512 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
513 | 
514 |             if bid > mp:
515 |                 if not "all" in total_wins:
516 |                     total_wins["all"] = 1
517 |                     total_clks["all"] = clk
518 |                     total_cost["all"] = mp
519 |                 else:
520 |                     total_wins["all"] += 1
521 |                     total_clks["all"] += clk
522 |                     total_cost["all"] += mp
523 | 
524 |                 if exchange_train[i] in total_wins:
525 |                     total_wins[exchange_train[i]] += 1
526 |                 else:
527 |                     total_wins[exchange_train[i]] = 1
528 | 
529 |                 if exchange_train[i] in total_clks:
530 |                     total_clks[exchange_train[i]] += clk
531 |                 else:
532 |                     total_clks[exchange_train[i]] = clk
533 | 
534 |                 if exchange_train[i] in total_cost:
535 |                     total_cost[exchange_train[i]] += mp
536 |                 else:
537 |                     total_cost[exchange_train[i]] = mp
538 | 
539 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
540 | 
541 |             if lin_bid > mp:
542 |                 lin_total_clks += clk
543 |                 lin_total_wins += 1
544 |                 lin_total_cost += mp
545 | 
546 |             uni_bid = max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi["uni"])))
547 |             if uni_bid > mp:
548 |                 uni_total_clks += clk
549 |                 uni_total_wins += 1
550 |                 uni_total_cost += mp
551 | 
552 | 
553 | 
554 |         ecpcs["all"][round] = total_cost["all"]  * 1.0 / total_clks["all"] / 1000.0
555 | 
556 |         for val in list(set(exchange_train)):
557 |             if round == 0:
558 |                 ecpcs[val] = {}
559 |             if total_clks[val] == 0:
560 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
561 |             else:
562 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
563 |             fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_train_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_train_ori_ecpc[advertiser]))
564 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_train_clicks[advertiser], total_cost["all"], budget, 0.0, advs_train_ori_ecpc[advertiser]))
565 |         if lin_total_clks == 0:
566 |             lin_total_clks = 1
567 |         lin_ecpc = lin_total_cost * 1.0 /lin_total_clks / 1000.0
568 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "lin", lin_ecpc, 0.0, total_bid_num["all"], lin_total_wins, lin_total_clks, lin_total_clks * 1.0/advs_train_clicks[advertiser], lin_total_cost, lin_budget, 0.0, advs_train_ori_ecpc[advertiser]))
569 |         if uni_total_clks == 0:
570 |             uni_total_clks = 1
571 |         uni_ecpc = uni_total_cost * 1.0 /uni_total_clks / 1000.0
572 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "uni", uni_ecpc, phi["uni"], total_bid_num["all"], uni_total_wins, uni_total_clks, uni_total_clks * 1.0/advs_train_clicks[advertiser], uni_total_cost, uni_budget, adex_ref["all"], advs_train_ori_ecpc[advertiser]))
573 |         ecpcs["uni"][round] = uni_ecpc
574 | 
575 |     # weinan changes the report from test to train because test has the budget, which leads no settling
576 |     adex_ref["uni"] = adex_ref["all"]
577 | 
578 |     settling_time.append(cal_settling_time(ecpcs, adex_ref))
579 |     rmse_ss.append(cal_rmse_ss(ecpcs, adex_ref))
580 |     sd_ss.append(cal_sd_ss(ecpcs, adex_ref))
581 |     rise_time.append(cal_rise_time(ecpcs, adex_ref))
582 |     overshoot.append(cal_overshoot(ecpcs, adex_ref))
583 | 
584 |     fo.close()
585 | 
586 | 
587 | random.seed(10)
588 | 
589 | # if len(sys.argv) != 3:
590 | #     print 'campaignId mode'
591 | #     exit(-1)
592 | 
593 | mplist = []
594 | y = []
595 | yp = []
596 | mplist_train = []
597 | y_train = []
598 | yp_train = []
599 | featWeight = {}
600 | exchange = []
601 | exchange_train = []
602 | 
603 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzpc.txt", 'r')
604 | for line in fi:
605 |     data = line.strip().split("\t")
606 |     if advertiser in advs_filtered_adex and data[3] in advs_filtered_adex[advertiser]:
607 |         continue  # filtered ad exchange for this advertiser
608 |     y.append(int(data[0]))
609 |     mplist.append(int(data[1]))
610 |     yp.append(float(data[2]))
611 |     exchange.append(data[3].replace("\n", ""))
612 | fi.close()
613 | 
614 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzpc.txt", 'r')
615 | for line in fi:
616 |     data = line.strip().split("\t")
617 |     if advertiser in advs_filtered_adex and data[3] in advs_filtered_adex[advertiser]:
618 |         continue  # filtered ad exchange for this advertiser
619 |     y_train.append(int(data[0]))
620 |     mplist_train.append(int(data[1]))
621 |     yp_train.append(float(data[2]))
622 |     exchange_train.append(data[3].replace("\n", ""))
623 | fi.close()
624 | 
625 | basectr = sum(yp_train) / float(len(yp_train))
626 | 
627 | # for reporting
628 | parameters = []
629 | overshoot = []
630 | settling_time = []
631 | rise_time = []
632 | rmse_ss = []
633 | sd_ss = []
634 | report_path = ""
635 | 
636 | 
637 | if mode == "single": # single mode
638 |     out_path = "../exp-data/"+advertiser+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+"-eco.tsv"
639 |     control(para_p, para_i, para_d, out_path)
640 |     report_path = "../report/report-single-eco.tsv"
641 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t" + \
642 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
643 |     parameters.append(parameter)
644 |     rout = open(report_path, 'w')
645 |     rout.write("campaign\ttotal-rounds\tbase-bid\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\texchange\n")
646 |     for val in list(set(exchange)):
647 |         for idx, value in enumerate(parameters):
648 |             rout.write(value+"\t"+str(rise_time[idx][val])+"\t"+str(settling_time[idx][val])+"\t"+str(overshoot[idx][val])+"\t" + \
649 |                    str(rmse_ss[idx][val]) + "\t" + str(sd_ss[idx][val]) + "\t" + val +"\n")
650 |     parameter_uni = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t" + \
651 |                 str(para_p_uni)+"\t"+str(para_i_uni)+"\t"+str(para_d_uni)+"\t"+str(settle_con)+"\t"+str(rise_con)
652 |     rout.write(parameter_uni+"\t"+str(rise_time[0]["uni"])+"\t"+str(settling_time[0]["uni"])+"\t"+str(overshoot[0]["uni"])+"\t" + \
653 |                    str(rmse_ss[0]["uni"]) + "\t" + str(sd_ss[0]["uni"]) + "\t" + "uni" +"\n")
654 |     rout.close()
655 | else:
656 |     print "wrong mode entered"
657 | 
658 | 
659 | 
660 | 
661 | 
662 | 


--------------------------------------------------------------------------------
/python/control-ecpc-multiex-pid-eco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import os
  4 | import random
  5 | import math
  6 | from sklearn.metrics import roc_auc_score
  7 | from sklearn.metrics import mean_squared_error
  8 | 
  9 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
 10 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
 11 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
 12 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 13 | advs_train_ori_ecpc = {"1458": 86550.0000, "2259": 277700.0000, "2261": 297640.0000, "2821": 140070.0000, "2997": 14210.0000, "3358": 118510.0000, "3386": 105520.0000, "3427": 109160.0000, "3476": 151980.0000}
 14 | advs_test_ori_ecpc = {"1458": 83270.0000, "2259": 332040.0000, "2261": 296870.0000, "2821": 173240.0000, "2997": 16170.0000, "3358": 100770.0000, "3386": 92170.0000, "3427": 117360.0000, "3476": 144460.0000}
 15 | advs_train_cost = {"1458": 212400000, "2259": 77754000, "2261": 61610000, "2821": 118082000, "2997": 19689000, "3358": 160943000, "3386": 219066000, "3427": 210239000, "3476": 156088000}
 16 | advs_test_cost = {"1458": 45216000, "2259": 43497000, "2261": 28795000, "2821": 68257000, "2997": 8617000, "3358": 34159000, "3386": 45715000, "3427": 46356000, "3476": 43627000}
 17 | advs_base_bid = {"1458": 69, "2259": 93, "2261":90, "2821":90, "2997":63, "3358":92, "3386":77, "3427": 81, "3476": 79}
 18 | 
 19 | advs_test_lin_ori_cost = {"1458": 1630539.0, "2259": 23220744.0, "2261": 14531891.0, "2821": 28152710.0, "2997": 4241173.0, "3358": 2518069.0, "3386": 13076826.0, "3427": 4986476.0, "3476": 9075114.0}
 20 | budget_damping = 0.5
 21 | 
 22 | # pre-calculated references using economics model
 23 | advs_adex_ref = {'3386': {'1': 19.650368000000004, '3': 37.711368000000014, '2': 44.213328000000004},
 24 |                  '2997': {'null': 5.983467456521738},
 25 |                  '3476': {'1': 19.33278951724137, '3': 23.360453999999994, '2': 26.20928985365854},
 26 |                  '3427': {'1': 9.885587179487173, '3': 8.355310526315792, '2': 6.185100000000001},
 27 |                  '2821': {'1': 61.942298684210535, '3': 51.99172861445784, '2': 64.04917959183673, '4': 49.54528928571428},
 28 |                  '2259': {'1': 155.06496331797234, '3': 102.05130064516129, '2': 136.23116210526317},
 29 |                  '2261': {'1': 68.34028703703704, '3': 99.87262888198757, '2': 139.2594528301887},
 30 |                  '1458': {'1': 2.580408429906542, '3': 2.2326444905660416, '2': 1.517053307692311},
 31 |                  '3358': {'1': 8.342108526315794, '3': 1.9785028543689314, '2': 8.860251913043474}}
 32 | 
 33 | # control parameters
 34 | advs_optimal_p = {"1458": 1000000,   "2259": 100000, "2261":500000,  "2821":40000, "2997":1000000,  "3358":500000,  "3386":50000,  "3427": 500000, "3476": 500000}
 35 | advs_optimal_i = {"1458": 100,       "2259": 20000,  "2261":5000,   "2821":200,   "2997":1000,   "3358":100,   "3386":10000,   "3427": 1000,  "3476": 1000}
 36 | advs_optimal_d = {"1458": 100,       "2259": 10000,  "2261":50000,   "2821":100,   "2997":10000,   "3358":100,   "3386":1000,   "3427": 1000,  "3476": 10005}
 37 | advs_optimal_min_phi = {"1458": -2,  "2259": -0.6, "2261":-0.8,     "2821":-0.5,   "2997":-0.5,   "3358":-1,   "3386":-0.45,   "3427": -1,  "3476": -2}
 38 | advs_optimal_max_phi = {"1458": 5,  "2259": 4,      "2261":5,       "2821":5,       "2997":5,       "3358":2,   "3386":2,   "3427": 2,  "3476": 3}
 39 | 
 40 | advs_filtered_adex = {"2259":[]}
 41 | 
 42 | advertiser = "2259"
 43 | if len(sys.argv) > 1:
 44 |     advertiser = sys.argv[1]
 45 | mode = "batch"
 46 | basebid = advs_base_bid[advertiser]
 47 | print "%s\t%s\t%d" % (advertiser, mode, basebid)
 48 | 
 49 | # parameter setting
 50 | minbid = 5
 51 | cntr_rounds = 40
 52 | div = 1e-6
 53 | para_p = advs_optimal_p[advertiser] * div
 54 | para_i = advs_optimal_i[advertiser] * div
 55 | para_d = advs_optimal_d[advertiser] * div
 56 | max_p = advs_optimal_p[advertiser] * 20
 57 | min_p = advs_optimal_p[advertiser] / 5
 58 | p_num = 20
 59 | base_step_p = math.exp(math.log(max_p/min_p) / p_num)
 60 | max_i = 50000.
 61 | min_i = 25000.
 62 | i_num = 4
 63 | base_step_i = math.exp(math.log(max_i/min_i) / i_num)
 64 | max_d = 20000.
 65 | min_d = 10000.
 66 | d_num = 4
 67 | base_step_d = math.exp(math.log(max_d/min_d) / d_num)
 68 | 
 69 | para_ps = range(0, p_num, 1)
 70 | para_is = range(0, i_num, 1)
 71 | para_ds = range(0, d_num, 1)
 72 | settle_con = 0.1
 73 | rise_con = 0.9
 74 | min_phi = advs_optimal_min_phi[advertiser]
 75 | max_phi = advs_optimal_max_phi[advertiser]
 76 | #budget = 9999999999999.0
 77 | budget = advs_test_lin_ori_cost[advertiser] * budget_damping
 78 | lin_budget = budget
 79 | 
 80 | def ints(s):
 81 |     res = []
 82 |     for ss in s:
 83 |         res.append(int(ss))
 84 |     return res
 85 | 
 86 | def sigmoid(p):
 87 |     return 1.0 / (1.0 + math.exp(-p))
 88 | 
 89 | def estimator_lr(feats):
 90 |     pred = 0.0
 91 |     for feat in feats:
 92 |         if feat in featWeight:
 93 |             pred += featWeight[feat]
 94 |     pred = sigmoid(pred)
 95 |     return pred
 96 | 
 97 | # bidding functions
 98 | def lin(pctr, basectr, basebid):
 99 |     return int(pctr *  basebid / basectr)
100 | 
101 | # calculate settling time
102 | def cal_settling_time(ecpcs, adex_ref):
103 |     settling_time = {}
104 |     for ex, ex_ecpcs in ecpcs.iteritems():
105 |         if ex == "all":
106 |             continue
107 |         settled = False
108 |         settling_time[ex] = 0
109 |         for key, value in ex_ecpcs.iteritems():
110 |             error = adex_ref[ex] - value
111 |             if abs(error) / adex_ref[ex] <= settle_con and (not settled):
112 |                 settled = True
113 |                 settling_time[ex] = key
114 |             elif abs(error) / adex_ref[ex] > settle_con:
115 |                 settled = False
116 |                 settling_time[ex] = cntr_rounds
117 |     return settling_time
118 | 
119 | # # calculate steady-state error
120 | def cal_rmse_ss(ecpcs, adex_ref):
121 |     settled = False
122 |     settling_time = cal_settling_time(ecpcs, adex_ref)
123 |     rmse = {}
124 |     for ex, ex_ecpcs in ecpcs.iteritems():
125 |         if ex == "all":
126 |             continue
127 |         rmse[ex] = 0.0
128 |         if settling_time[ex] >= cntr_rounds:
129 |             settling_time[ex] = cntr_rounds - 1
130 |         for round in range(settling_time[ex], cntr_rounds):
131 |             rmse[ex] += (ecpcs[ex][round] - adex_ref[ex]) * (ecpcs[ex][round] - adex_ref[ex]) / (adex_ref[ex] * adex_ref[ex])
132 | 
133 |         rmse[ex] /= (cntr_rounds - settling_time[ex])
134 |         rmse[ex] = math.sqrt(rmse[ex]) # weinan: relative rmse
135 |     return rmse
136 | 
137 | # # calculate steady-state standard deviation
138 | def cal_sd_ss(ecpcs, adex_ref):
139 |     settled = False
140 |     settling_time = cal_settling_time(ecpcs, adex_ref)
141 |     sd = {}
142 |     for ex, ex_ecpcs in ecpcs.iteritems():
143 |         if ex == "all":
144 |             continue
145 |         sd[ex] = 0.0
146 |         if settling_time[ex] >= cntr_rounds:
147 |             settling_time[ex] = cntr_rounds - 1
148 |         sum2 = 0.0
149 |         sum = 0.0
150 |         for round in range(settling_time[ex], cntr_rounds):
151 |             sum2 += ecpcs[ex][round] * ecpcs[ex][round]
152 |             sum += ecpcs[ex][round]
153 |         n = cntr_rounds - settling_time[ex]
154 |         mean = sum / n
155 |         sd[ex] = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
156 |     return sd
157 | 
158 | # calculate rise time
159 | def cal_rise_time(ecpcs, adex_ref):
160 |     rise_time = {}
161 |     for ex, ex_ecpcs in ecpcs.iteritems():
162 |         if ex == "all":
163 |             continue
164 |         for key, value in ex_ecpcs.iteritems():
165 |             error = adex_ref[ex] - value
166 |             if abs(error) / adex_ref[ex] <= (1 - rise_con):
167 |                 rise_time[ex] = key
168 |                 break
169 |         try:
170 |             is_set = rise_time[ex]
171 |         except:
172 |             rise_time[ex] = 0
173 |     return rise_time
174 | 
175 | # calculate percentage overshoot
176 | def cal_overshoot(ecpcs, adex_ref):
177 |     overshoot = {}
178 |     for ex, ex_ecpcs in ecpcs.iteritems():
179 |         if ex == "all":
180 |             continue
181 |         max_os = 0.0
182 |         if ex_ecpcs[0] > adex_ref[ex]:
183 |             for key, value in ex_ecpcs.iteritems():
184 |                 if value <= adex_ref[ex]:
185 |                     os = (adex_ref[ex] - value) * 100.0 / adex_ref[ex]
186 |                     if os > max_os:
187 |                         max_os = os
188 |         elif ex_ecpcs[0] < adex_ref[ex]:
189 |             for key, value in ex_ecpcs.iteritems():
190 |                 if value >= adex_ref[ex]:
191 |                     os = (value - adex_ref[ex]) * 100.0 / adex_ref[ex]
192 |                     if os > max_os:
193 |                         max_os = os
194 |         else:
195 |             for key, value in ex_ecpcs.iteritems():
196 |                 os = abs(value - adex_ref[ex]) * 100.0 / adex_ref[ex]
197 |                 if os >= max_os:
198 |                     max_os = os
199 |         overshoot[ex] = max_os
200 |     return overshoot
201 | 
202 | # control function
203 | def control(para_p, para_i, para_d, outfile):
204 |     fo = open(outfile, 'w')
205 |     fo.write("stage\tround\texchange\tecpc\tphi\ttotal_bid_num\ttotal_wins\ttotal_clks\tclick_ratio\ttotal_cost\tbudget\tref\tori_ecpc\n")
206 |     ecpcs = {}
207 |     first_round = True
208 |     sec_round = False
209 |     cntr_size = int(len(yp) / cntr_rounds)
210 |     adex_ref = advs_adex_ref[advertiser]
211 |     error_sum = {}
212 |     phi = {}
213 |     total_cost = {}
214 |     total_clks = {}
215 |     total_wins = {}
216 |     total_bid_num = {}
217 |     lin_total_wins = 0
218 |     lin_total_clks = 0
219 |     lin_total_cost = 0.0
220 |     total_wins["all"] = 0
221 |     total_clks["all"] = 0
222 |     total_cost["all"] = 0.0
223 |     lin_total_bid_num = 0
224 |     break_round = False
225 | 
226 |     for val in list(set(exchange)):
227 |         total_clks[val] = 0
228 |         total_cost[val] = 0.0
229 |         total_wins[val] = 0
230 |         total_bid_num[val] = 0
231 | 
232 |     for round in range(0, cntr_rounds):
233 |         if first_round and (not sec_round):
234 |             for val in list(set(exchange)):
235 |                 phi[val] = 0.0
236 |                 error_sum[val] = 0.0
237 |                 ecpcs["all"] = {}
238 |             first_round = False
239 |             sec_round = True
240 |         elif sec_round and (not first_round):
241 |             for val in list(set(exchange)):
242 |                 error = adex_ref[val] - ecpcs[val][round-1]
243 |                 error_sum[val] += error
244 |                 phi[val] = para_p*error + para_i*error_sum[val]
245 |             sec_round = False
246 |         else:
247 |             for val in list(set(exchange)):
248 |                 error = adex_ref[val] - ecpcs[val][round-1]
249 |                 error_sum[val] += error
250 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
251 | 
252 |         # fang piao
253 |         for val in list(set(exchange)):
254 |             if phi[val] <= min_phi:
255 |                 phi[val] = min_phi
256 |             elif phi[val] >= max_phi:
257 |                 phi[val] = max_phi
258 | 
259 |         imp_index = ((round+1)*cntr_size)
260 | 
261 |         if round == cntr_rounds - 1:
262 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
263 | 
264 |         for i in range(round*cntr_size, imp_index):
265 |             clk = y[i]
266 |             pctr = yp[i]
267 |             mp = mplist[i]
268 | 
269 |             if total_cost['all'] + mp < budget:
270 |                 if i == 0:
271 |                     total_bid_num["all"] = 1
272 |                 else:
273 |                     total_bid_num["all"] += 1
274 | 
275 |                 if exchange[i] in total_bid_num:
276 |                     total_bid_num[exchange[i]] += 1
277 |                 else:
278 |                     total_bid_num[exchange[i]] = 1
279 | 
280 |             clk = y[i]
281 |             pctr = yp[i]
282 |             mp = mplist[i]
283 |             bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange[i]]))))
284 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
285 | 
286 |             if round == 0:
287 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
288 | 
289 |             if bid > mp:
290 |                 if total_cost['all'] + mp < budget:
291 |                     total_wins["all"] += 1
292 |                     total_clks["all"] += clk
293 |                     total_cost["all"] += mp
294 | 
295 |                     if exchange[i] in total_wins:
296 |                         total_wins[exchange[i]] += 1
297 |                     else:
298 |                         total_wins[exchange[i]] = 1
299 | 
300 |                     if exchange[i] in total_clks:
301 |                         total_clks[exchange[i]] += clk
302 |                     else:
303 |                         total_clks[exchange[i]] = clk
304 | 
305 |                     if exchange[i] in total_cost:
306 |                         total_cost[exchange[i]] += mp
307 |                     else:
308 |                         total_cost[exchange[i]] = mp
309 | 
310 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
311 | 
312 |             if lin_total_cost + mp < lin_budget:
313 |                 lin_total_bid_num += 1
314 | 
315 |             if lin_bid > mp:
316 |                 if lin_total_cost + mp < lin_budget:
317 |                     lin_total_clks += clk
318 |                     lin_total_wins += 1
319 |                     lin_total_cost += mp
320 | 
321 | 
322 |         ecpcs["all"][round] = total_cost["all"] * 1.0 / total_clks["all"] / 1000.0
323 | 
324 |         for val in list(set(exchange)):
325 |             if round == 0:
326 |                 ecpcs[val] = {}
327 |             if total_clks[val] == 0:
328 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
329 |             else:
330 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
331 |             fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_test_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_test_ori_ecpc[advertiser]))
332 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_test_clicks[advertiser], total_cost["all"], budget, 0.0, advs_test_ori_ecpc[advertiser]))
333 |         if lin_total_clks == 0:
334 |             lin_total_clks = 1
335 |         lin_ecpc = lin_total_cost * 1.0 /lin_total_clks / 1000.0
336 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "lin", lin_ecpc, 0.0, lin_total_bid_num, lin_total_wins, lin_total_clks, lin_total_clks * 1.0/advs_test_clicks[advertiser], lin_total_cost, lin_budget, 0.0, advs_test_ori_ecpc[advertiser]))
337 | 
338 |     bid_nums.append(total_bid_num)
339 |     imp_nums.append(total_wins)
340 |     adex_clks.append(total_clks)
341 |     adex_costs.append(total_cost)
342 |     adex_ecpcs.append(ecpcs)
343 | 
344 | 
345 |     # print "lin total cost: " + str(lin_total_cost)
346 | 
347 |     # train
348 |     ecpcs = {}
349 |     first_round = True
350 |     sec_round = False
351 |     cntr_size = int(len(yp_train) / cntr_rounds)
352 |     adex_ref = advs_adex_ref[advertiser]
353 |     error_sum = {}
354 |     phi = {}
355 |     total_cost = {}
356 |     total_clks = {}
357 |     total_wins = {}
358 |     total_bid_num = {}
359 |     lin_total_wins = 0
360 |     lin_total_clks = 0
361 |     lin_total_cost = 0
362 | 
363 |     for val in list(set(exchange_train)):
364 |         total_clks[val] = 0
365 |         total_cost[val] = 0.0
366 |         total_wins[val] = 0
367 |         total_bid_num[val] = 0
368 | 
369 |     for round in range(0, cntr_rounds):
370 |         if first_round and (not sec_round):
371 |             for val in list(set(exchange_train)):
372 |                 phi[val] = 0.0
373 |                 error_sum[val] = 0.0
374 |                 ecpcs["all"] = {}
375 |             first_round = False
376 |             sec_round = True
377 |         elif sec_round and (not first_round):
378 |             for val in list(set(exchange_train)):
379 |                 error = adex_ref[val] - ecpcs[val][round-1]
380 |                 error_sum[val] += error
381 |                 phi[val] = para_p*error + para_i*error_sum[val]
382 |             sec_round = False
383 |         else:
384 |             for val in list(set(exchange_train)):
385 |                 error = adex_ref[val] - ecpcs[val][round-1]
386 |                 error_sum[val] += error
387 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
388 | 
389 |         # fang piao
390 |         for val in list(set(exchange_train)):
391 |             if phi[val] <= min_phi:
392 |                 phi[val] = min_phi
393 |             elif phi[val] >= max_phi:
394 |                 phi[val] = max_phi
395 | 
396 |         imp_index = ((round+1)*cntr_size)
397 | 
398 |         if round == cntr_rounds - 1:
399 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
400 | 
401 |         for i in range(round*cntr_size, imp_index):
402 |             if i == 0:
403 |                 total_bid_num["all"] = 1
404 |             else:
405 |                 total_bid_num["all"] += 1
406 | 
407 |             if exchange_train[i] in total_bid_num:
408 |                 total_bid_num[exchange_train[i]] += 1
409 |             else:
410 |                 total_bid_num[exchange_train[i]] = 1
411 | 
412 |             clk = y_train[i]
413 |             pctr = yp_train[i]
414 |             mp = mplist_train[i]
415 |             bid = max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange_train[i]])))
416 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
417 | 
418 |             if round == 0:
419 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
420 | 
421 |             if bid > mp:
422 |                 if not "all" in total_wins:
423 |                     total_wins["all"] = 1
424 |                     total_clks["all"] = clk
425 |                     total_cost["all"] = mp
426 |                 else:
427 |                     total_wins["all"] += 1
428 |                     total_clks["all"] += clk
429 |                     total_cost["all"] += mp
430 | 
431 |                 if exchange_train[i] in total_wins:
432 |                     total_wins[exchange_train[i]] += 1
433 |                 else:
434 |                     total_wins[exchange_train[i]] = 1
435 | 
436 |                 if exchange_train[i] in total_clks:
437 |                     total_clks[exchange_train[i]] += clk
438 |                 else:
439 |                     total_clks[exchange_train[i]] = clk
440 | 
441 |                 if exchange_train[i] in total_cost:
442 |                     total_cost[exchange_train[i]] += mp
443 |                 else:
444 |                     total_cost[exchange_train[i]] = mp
445 | 
446 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
447 | 
448 |             if lin_bid > mp:
449 |                 lin_total_clks += clk
450 |                 lin_total_wins += 1
451 |                 lin_total_cost += mp
452 | 
453 | 
454 |         ecpcs["all"][round] = total_cost["all"]  * 1.0 / total_clks["all"] / 1000.0
455 | 
456 |         for val in list(set(exchange_train)):
457 |             if round == 0:
458 |                 ecpcs[val] = {}
459 |             if total_clks[val] == 0:
460 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
461 |             else:
462 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
463 |             fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_train_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_train_ori_ecpc[advertiser]))
464 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_train_clicks[advertiser], total_cost["all"], budget, 0.0, advs_train_ori_ecpc[advertiser]))
465 |         if lin_total_clks == 0:
466 |             lin_total_clks = 1
467 |         lin_ecpc = lin_total_cost * 1.0 /lin_total_clks / 1000.0
468 |         fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "lin", lin_ecpc, 0.0, total_bid_num["all"], lin_total_wins, lin_total_clks, lin_total_clks * 1.0/advs_train_clicks[advertiser], lin_total_cost, lin_budget, 0.0, advs_train_ori_ecpc[advertiser]))
469 | 
470 |     # weinan changes the report from test to train because test has the budget, which leads no settling
471 |     settling_time.append(cal_settling_time(ecpcs, adex_ref))
472 |     rmse_ss.append(cal_rmse_ss(ecpcs, adex_ref))
473 |     sd_ss.append(cal_sd_ss(ecpcs, adex_ref))
474 |     rise_time.append(cal_rise_time(ecpcs, adex_ref))
475 |     overshoot.append(cal_overshoot(ecpcs, adex_ref))
476 | 
477 |     fo.close()
478 | 
479 | # control function - batch mode
480 | def control_batch(para_p, para_i, para_d, outfile, rout, parameter):
481 |     global tmp_clks
482 |     global tmp_cost
483 |     # fo = open(outfile, 'w')
484 |     # fo.write("stage\tround\texchange\tecpc\tphi\ttotal_bid_num\ttotal_wins\ttotal_clks\tclick_ratio\ttotal_cost\tbudget\tref\tori_ecpc\n")
485 |     ecpcs = {}
486 |     first_round = True
487 |     sec_round = False
488 |     cntr_size = int(len(yp) / cntr_rounds)
489 |     adex_ref = advs_adex_ref[advertiser]
490 |     error_sum = {}
491 |     phi = {}
492 |     total_cost = {}
493 |     total_clks = {}
494 |     total_wins = {}
495 |     total_bid_num = {}
496 |     lin_total_wins = 0
497 |     lin_total_clks = 0
498 |     lin_total_cost = 0.0
499 |     total_wins["all"] = 0
500 |     total_clks["all"] = 0
501 |     total_cost["all"] = 0.0
502 |     lin_total_bid_num = 0
503 |     break_round = False
504 | 
505 |     for val in list(set(exchange)):
506 |         total_clks[val] = 0
507 |         total_cost[val] = 0.0
508 |         total_wins[val] = 0
509 |         total_bid_num[val] = 0
510 | 
511 |     for round in range(0, cntr_rounds):
512 |         if first_round and (not sec_round):
513 |             for val in list(set(exchange)):
514 |                 phi[val] = 0.0
515 |                 error_sum[val] = 0.0
516 |                 ecpcs["all"] = {}
517 |             first_round = False
518 |             sec_round = True
519 |         elif sec_round and (not first_round):
520 |             for val in list(set(exchange)):
521 |                 error = adex_ref[val] - ecpcs[val][round-1]
522 |                 error_sum[val] += error
523 |                 phi[val] = para_p*error + para_i*error_sum[val]
524 |             sec_round = False
525 |         else:
526 |             for val in list(set(exchange)):
527 |                 error = adex_ref[val] - ecpcs[val][round-1]
528 |                 error_sum[val] += error
529 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
530 | 
531 |         # fang piao
532 |         for val in list(set(exchange)):
533 |             if phi[val] <= min_phi:
534 |                 phi[val] = min_phi
535 |             elif phi[val] >= max_phi:
536 |                 phi[val] = max_phi
537 | 
538 |         imp_index = ((round+1)*cntr_size)
539 | 
540 |         if round == cntr_rounds - 1:
541 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
542 | 
543 |         for i in range(round*cntr_size, imp_index):
544 |             clk = y[i]
545 |             pctr = yp[i]
546 |             mp = mplist[i]
547 | 
548 |             if total_cost['all'] + mp < budget:
549 |                 if i == 0:
550 |                     total_bid_num["all"] = 1
551 |                 else:
552 |                     total_bid_num["all"] += 1
553 | 
554 |                 if exchange[i] in total_bid_num:
555 |                     total_bid_num[exchange[i]] += 1
556 |                 else:
557 |                     total_bid_num[exchange[i]] = 1
558 | 
559 |             clk = y[i]
560 |             pctr = yp[i]
561 |             mp = mplist[i]
562 |             bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange[i]]))))
563 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
564 | 
565 |             if round == 0:
566 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
567 | 
568 |             if bid > mp:
569 |                 if total_cost['all'] + mp < budget:
570 |                     total_wins["all"] += 1
571 |                     total_clks["all"] += clk
572 |                     total_cost["all"] += mp
573 | 
574 |                     if exchange[i] in total_wins:
575 |                         total_wins[exchange[i]] += 1
576 |                     else:
577 |                         total_wins[exchange[i]] = 1
578 | 
579 |                     if exchange[i] in total_clks:
580 |                         total_clks[exchange[i]] += clk
581 |                     else:
582 |                         total_clks[exchange[i]] = clk
583 | 
584 |                     if exchange[i] in total_cost:
585 |                         total_cost[exchange[i]] += mp
586 |                     else:
587 |                         total_cost[exchange[i]] = mp
588 | 
589 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
590 | 
591 |             if lin_total_cost + mp < lin_budget:
592 |                 lin_total_bid_num += 1
593 | 
594 |             if lin_bid > mp:
595 |                 if lin_total_cost + mp < lin_budget:
596 |                     lin_total_clks += clk
597 |                     lin_total_wins += 1
598 |                     lin_total_cost += mp
599 | 
600 | 
601 |         ecpcs["all"][round] = total_cost["all"] * 1.0 / total_clks["all"] / 1000.0
602 | 
603 |         for val in list(set(exchange)):
604 |             if round == 0:
605 |                 ecpcs[val] = {}
606 |             if total_clks[val] == 0:
607 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
608 |             else:
609 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
610 |             # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_test_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_test_ori_ecpc[advertiser]))
611 |         # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_test_clicks[advertiser], total_cost["all"], budget, 0.0, advs_test_ori_ecpc[advertiser]))
612 |         if lin_total_clks == 0:
613 |             lin_total_clks = 1
614 |         lin_ecpc = lin_total_cost * 1.0 /lin_total_clks / 1000.0
615 |         # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("test", round, "lin", lin_ecpc, 0.0, lin_total_bid_num, lin_total_wins, lin_total_clks, lin_total_clks * 1.0/advs_test_clicks[advertiser], lin_total_cost, lin_budget, 0.0, advs_test_ori_ecpc[advertiser]))
616 | 
617 |     test_report_bid_num = total_bid_num
618 |     test_report_total_wins = total_wins
619 |     test_report_total_clks = total_clks
620 |     test_report_total_cost = total_cost
621 |     test_report_ecpcs = ecpcs
622 |     tmp_clks = test_report_total_clks["all"]
623 |     tmp_cost = test_report_total_cost["all"]
624 |     # print "lin total cost: " + str(lin_total_cost)
625 | 
626 |     # train
627 |     ecpcs = {}
628 |     first_round = True
629 |     sec_round = False
630 |     cntr_size = int(len(yp_train) / cntr_rounds)
631 |     adex_ref = advs_adex_ref[advertiser]
632 |     error_sum = {}
633 |     phi = {}
634 |     total_cost = {}
635 |     total_clks = {}
636 |     total_wins = {}
637 |     total_bid_num = {}
638 |     lin_total_wins = 0
639 |     lin_total_clks = 0
640 |     lin_total_cost = 0
641 | 
642 |     for val in list(set(exchange_train)):
643 |         total_clks[val] = 0
644 |         total_cost[val] = 0.0
645 |         total_wins[val] = 0
646 |         total_bid_num[val] = 0
647 | 
648 |     for round in range(0, cntr_rounds):
649 |         if first_round and (not sec_round):
650 |             for val in list(set(exchange_train)):
651 |                 phi[val] = 0.0
652 |                 error_sum[val] = 0.0
653 |                 ecpcs["all"] = {}
654 |             first_round = False
655 |             sec_round = True
656 |         elif sec_round and (not first_round):
657 |             for val in list(set(exchange_train)):
658 |                 error = adex_ref[val] - ecpcs[val][round-1]
659 |                 error_sum[val] += error
660 |                 phi[val] = para_p*error + para_i*error_sum[val]
661 |             sec_round = False
662 |         else:
663 |             for val in list(set(exchange_train)):
664 |                 error = adex_ref[val] - ecpcs[val][round-1]
665 |                 error_sum[val] += error
666 |                 phi[val] = para_p*error + para_i*error_sum[val] + para_d*(ecpcs[val][round-2]-ecpcs[val][round-1])
667 | 
668 |         # fang piao
669 |         for val in list(set(exchange_train)):
670 |             if phi[val] <= min_phi:
671 |                 phi[val] = min_phi
672 |             elif phi[val] >= max_phi:
673 |                 phi[val] = max_phi
674 | 
675 |         imp_index = ((round+1)*cntr_size)
676 | 
677 |         if round == cntr_rounds - 1:
678 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
679 | 
680 |         for i in range(round*cntr_size, imp_index):
681 |             if i == 0:
682 |                 total_bid_num["all"] = 1
683 |             else:
684 |                 total_bid_num["all"] += 1
685 | 
686 |             if exchange_train[i] in total_bid_num:
687 |                 total_bid_num[exchange_train[i]] += 1
688 |             else:
689 |                 total_bid_num[exchange_train[i]] = 1
690 | 
691 |             clk = y_train[i]
692 |             pctr = yp_train[i]
693 |             mp = mplist_train[i]
694 |             bid = max(minbid, lin(pctr, basectr, basebid) * (math.exp(phi[exchange_train[i]])))
695 |             # bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
696 | 
697 |             if round == 0:
698 |                 bid = int(max(minbid, lin(pctr, basectr, basebid) * (math.exp(0))))
699 | 
700 |             if bid > mp:
701 |                 if not "all" in total_wins:
702 |                     total_wins["all"] = 1
703 |                     total_clks["all"] = clk
704 |                     total_cost["all"] = mp
705 |                 else:
706 |                     total_wins["all"] += 1
707 |                     total_clks["all"] += clk
708 |                     total_cost["all"] += mp
709 | 
710 |                 if exchange_train[i] in total_wins:
711 |                     total_wins[exchange_train[i]] += 1
712 |                 else:
713 |                     total_wins[exchange_train[i]] = 1
714 | 
715 |                 if exchange_train[i] in total_clks:
716 |                     total_clks[exchange_train[i]] += clk
717 |                 else:
718 |                     total_clks[exchange_train[i]] = clk
719 | 
720 |                 if exchange_train[i] in total_cost:
721 |                     total_cost[exchange_train[i]] += mp
722 |                 else:
723 |                     total_cost[exchange_train[i]] = mp
724 | 
725 |             lin_bid = int(max(minbid, lin(pctr, basectr, basebid)))
726 | 
727 |             if lin_bid > mp:
728 |                 lin_total_clks += clk
729 |                 lin_total_wins += 1
730 |                 lin_total_cost += mp
731 | 
732 | 
733 |         ecpcs["all"][round] = total_cost["all"]  * 1.0 / total_clks["all"] / 1000.0
734 | 
735 |         for val in list(set(exchange_train)):
736 |             if round == 0:
737 |                 ecpcs[val] = {}
738 |             if total_clks[val] == 0:
739 |                 ecpcs[val][round] = total_cost[val] * 1.0 / (total_clks[val]+1) / 1000.0
740 |             else:
741 |                 ecpcs[val][round] = total_cost[val] * 1.0 / total_clks[val] / 1000.0
742 |             # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, val, ecpcs[val][round], phi[val], total_bid_num[val], total_wins[val], total_clks[val], total_clks[val] * 1.0/advs_train_clicks[advertiser], total_cost[val], budget, adex_ref[val], advs_train_ori_ecpc[advertiser]))
743 |         # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "all", ecpcs["all"][round], 0.0, total_bid_num["all"], total_wins["all"], total_clks["all"], total_clks["all"] * 1.0/advs_train_clicks[advertiser], total_cost["all"], budget, 0.0, advs_train_ori_ecpc[advertiser]))
744 |         if lin_total_clks == 0:
745 |             lin_total_clks = 1
746 |         lin_ecpc = lin_total_cost * 1.0 /lin_total_clks / 1000.0
747 |         # fo.write("%s\t%d\t%s\t%.4f\t%.4f\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n" % ("train", round, "lin", lin_ecpc, 0.0, total_bid_num["all"], lin_total_wins, lin_total_clks, lin_total_clks * 1.0/advs_train_clicks[advertiser], lin_total_cost, lin_budget, 0.0, advs_train_ori_ecpc[advertiser]))
748 | 
749 |     # weinan changes the report from test to train because test has the budget, which leads no settling
750 |     settling_time = cal_settling_time(ecpcs, adex_ref)
751 |     rmse_ss = cal_rmse_ss(ecpcs, adex_ref)
752 |     sd_ss = cal_sd_ss(ecpcs, adex_ref)
753 |     rise_time = cal_rise_time(ecpcs, adex_ref)
754 |     overshoot = cal_overshoot(ecpcs, adex_ref)
755 | 
756 |     for val in list(set(exchange)):
757 |         rout.write(parameter+"\t"+str(rise_time[val])+"\t"+str(settling_time[val])+"\t"+str(overshoot[val])+"\t" + \
758 |                str(rmse_ss[val]) + "\t" + str(sd_ss[val]) + "\t" + str(test_report_bid_num[val]) + "\t" + str(test_report_total_wins[val]) +
759 |                    "\t" + str(test_report_total_clks[val]) + "\t" + str(test_report_total_cost[val]) + "\t" + str(test_report_ecpcs[val][39]) + "\t" + str(budget) + "\t" + val +"\n")
760 |     rout.write(parameter+"\t0.0\t0.0\t0.0\t0.0\t0.0\t" + str(test_report_bid_num["all"]) + "\t" + str(test_report_total_wins["all"]) +
761 |            "\t" + str(test_report_total_clks["all"]) + "\t" + str(test_report_total_cost["all"]) + "\t" + str(test_report_ecpcs["all"][39]) + "\t" + str(budget) + "\t" + "all" +"\n")
762 |     rout.flush()
763 |     # fo.close()
764 | 
765 | random.seed(10)
766 | 
767 | # if len(sys.argv) != 3:
768 | #     print 'campaignId mode'
769 | #     exit(-1)
770 | 
771 | tmp_clks = 0
772 | tmp_cost = 0
773 | 
774 | mplist = []
775 | y = []
776 | yp = []
777 | mplist_train = []
778 | y_train = []
779 | yp_train = []
780 | featWeight = {}
781 | exchange = []
782 | exchange_train = []
783 | 
784 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzpc.txt", 'r')
785 | for line in fi:
786 |     data = line.strip().split("\t")
787 |     if advertiser in advs_filtered_adex and data[3] in advs_filtered_adex[advertiser]:
788 |         continue  # filtered ad exchange for this advertiser
789 |     y.append(int(data[0]))
790 |     mplist.append(int(data[1]))
791 |     yp.append(float(data[2]))
792 |     exchange.append(data[3].replace("\n", ""))
793 | fi.close()
794 | 
795 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzpc.txt", 'r')
796 | for line in fi:
797 |     data = line.strip().split("\t")
798 |     if advertiser in advs_filtered_adex and data[3] in advs_filtered_adex[advertiser]:
799 |         continue  # filtered ad exchange for this advertiser
800 |     y_train.append(int(data[0]))
801 |     mplist_train.append(int(data[1]))
802 |     yp_train.append(float(data[2]))
803 |     exchange_train.append(data[3].replace("\n", ""))
804 | fi.close()
805 | 
806 | basectr = sum(yp_train) / float(len(yp_train))
807 | 
808 | # for reporting
809 | parameters = []
810 | overshoot = []
811 | settling_time = []
812 | rise_time = []
813 | rmse_ss = []
814 | sd_ss = []
815 | bid_nums = []
816 | imp_nums = []
817 | adex_clks = []
818 | adex_costs = []
819 | adex_ecpcs = []
820 | 
821 | report_path = ""
822 | 
823 | 
824 | if mode == "single": # single mode
825 |     out_path = "../exp-data/"+advertiser+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+"-eco.tsv"
826 |     control(para_p, para_i, para_d, out_path)
827 |     report_path = "../report/report-single-eco.tsv"
828 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t" + \
829 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
830 |     parameters.append(parameter)
831 |     rout = open(report_path, 'w')
832 |     rout.write("campaign\ttotal-rounds\tbase-bid\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\texchange\n")
833 |     for val in list(set(exchange)):
834 |         for idx, value in enumerate(parameters):
835 |             rout.write(value+"\t"+str(rise_time[idx][val])+"\t"+str(settling_time[idx][val])+"\t"+str(overshoot[idx][val])+"\t" + \
836 |                    str(rmse_ss[idx][val]) + "\t" + str(sd_ss[idx][val]) + "\t" + val +"\n")
837 |     rout.close()
838 | elif mode == "batch":
839 |     count = 0
840 |     report_path = "../report/"+ advertiser + "-report-batch-eco.tsv"
841 |     rout = open(report_path, 'w')
842 |     rout.write("campaign\ttotal-rounds\tbase-bid\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\tbid_num\timp_num\tclk\tcost\tecpc\tbudget\texchange\n")
843 | 
844 |     for i in para_is:
845 |         if len(para_is) > 1:
846 |             para_i = min_i * pow(base_step_i, i) * div
847 |         for d in para_ds:
848 |             if len(para_ds) > 1:
849 |                 para_d = min_d * pow(base_step_d, d) * div
850 |             for p in para_ps:
851 |                 if len(para_ps) > 1:
852 |                     para_p = min_p * pow(base_step_p, p) * div
853 |                 count += 1
854 |                 directory = "../batch-exp-data/"+advertiser
855 |                 if not os.path.exists(directory):
856 |                     os.makedirs(directory)
857 |                 out_path = directory+"/"+advertiser+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+"-eco.tsv"
858 | 
859 | 
860 |                 parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t" + \
861 |                             str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
862 |                 # parameters.append(parameter)
863 | 
864 |                 control_batch(para_p, para_i, para_d, out_path, rout, parameter)
865 |                 print advertiser + "\t" + str(count) + "\t" + str(tmp_clks) + "\t" + str(tmp_cost) + "\t" + str(budget) + "\t" +str(para_p) + "\t" +str(para_i) + "\t" +str(para_d)
866 |     rout.close()
867 | else:
868 |     print "wrong mode entered"
869 | 
870 | 
871 | 
872 | 
873 | 
874 | 


--------------------------------------------------------------------------------
/python/control-ecpc-pid-example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | from sklearn.metrics import roc_auc_score
  6 | from sklearn.metrics import mean_squared_error
  7 | 
  8 | advertiser = "1458"
  9 | mode = "test"
 10 | ref = 40000
 11 | advs_test_bids = 100000
 12 | advs_train_bids = 100000
 13 | advs_train_clicks = 79
 14 | advs_test_clicks = 65
 15 | basebid = 69
 16 | 
 17 | print "Example of PID control eCPC."
 18 | print "Data sample from campaign 1458 from iPinYou dataset."
 19 | print "Reference eCPC: " + str(ref)
 20 | 
 21 | # parameter setting
 22 | minbid = 5
 23 | cntr_rounds = 40
 24 | para_p = 0.0005
 25 | para_i = 0.000001
 26 | para_d = 0.0001
 27 | div = 1e-6
 28 | para_ps = range(0, 40, 5)
 29 | para_is = range(0, 25, 5)
 30 | para_ds = range(0, 25, 5)
 31 | settle_con = 0.1
 32 | rise_con = 0.9
 33 | min_phi = -2
 34 | max_phi = 5
 35 | 
 36 | 
 37 | # bidding functions
 38 | def lin(pctr, basectr, basebid):
 39 |     return int(pctr *  basebid / basectr)
 40 | 
 41 | # calculate settling time
 42 | def cal_settling_time(ecpcs, ref):
 43 |     settled = False
 44 |     settling_time = 0
 45 |     for key, value in ecpcs.iteritems():
 46 |         error = ref - value
 47 |         if abs(error) / ref <= settle_con and settled == False:
 48 |             settled = True
 49 |             settling_time = key
 50 |         elif abs(error) / ref > settle_con:
 51 |             settled = False
 52 |             settling_time = cntr_rounds
 53 |     return settling_time
 54 | 
 55 | # # calculate steady-state error
 56 | def cal_rmse_ss(ecpcs, ref):
 57 |     settled = False
 58 |     settling_time = cal_settling_time(ecpcs, ref)
 59 |     rmse = 0.0
 60 |     if settling_time >= cntr_rounds:
 61 |         settling_time = cntr_rounds - 1
 62 |     for round in range(settling_time, cntr_rounds):
 63 |         rmse += (ecpcs[round] - ref) * (ecpcs[round] - ref)
 64 |     rmse /= (cntr_rounds - settling_time)
 65 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 66 |     return rmse
 67 | 
 68 | # # calculate steady-state standard deviation
 69 | def cal_sd_ss(ecpcs, ref):
 70 |     settled = False
 71 |     settling_time = cal_settling_time(ecpcs, ref)
 72 |     if settling_time >= cntr_rounds:
 73 |         settling_time = cntr_rounds - 1
 74 |     sum2 = 0.0
 75 |     sum = 0.0
 76 |     for round in range(settling_time, cntr_rounds):
 77 |         sum2 += ecpcs[round] * ecpcs[round]
 78 |         sum += ecpcs[round]
 79 |     n = cntr_rounds - settling_time
 80 |     mean = sum / n
 81 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
 82 |     return sd
 83 | 
 84 | # calculate rise time
 85 | def cal_rise_time(ecpcs, ref, rise_con):
 86 |     rise_time = 0
 87 |     for key, value in ecpcs.iteritems():
 88 |         error = ref - value
 89 |         if abs(error) / ref <= (1 - rise_con):
 90 |             rise_time = key
 91 |             break
 92 |     return rise_time
 93 | 
 94 | # calculate percentage overshoot
 95 | def cal_overshoot(ecpcs, ref):
 96 |     if ecpcs[0] > ref:
 97 |         min = ecpcs[0];
 98 |         for key, value in ecpcs.iteritems():
 99 |             if value <= min:
100 |                 min = value
101 |         if min < ref:
102 |             return (ref - min) * 100.0 / ref
103 |         else:
104 |             return 0.0
105 |     elif ecpcs[0] < ref:
106 |         max = ecpcs[0]
107 |         for key, value in ecpcs.iteritems():
108 |             if value >= max:
109 |                 max = value
110 |         if max > ref:
111 |             return (max - ref) * 100.0 / ref
112 |         else:
113 |             return 0.0
114 |     else:
115 |         max = 0
116 |         for key, value in ecpcs.iteritems():
117 |             if abs(value - ref) >= max:
118 |                 max = value
119 |         return (max - ref) * 100.0 / ref
120 | 
121 | # control function
122 | def control(cntr_rounds, ref, para_p, para_i, para_d, outfile):
123 |     fo = open(outfile, 'w')
124 |     fo.write("round\tecpc\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tref\n")
125 |     ecpcs = {}
126 |     error_sum = 0.0
127 |     first_round = True
128 |     sec_round = False
129 |     cntr_size = int(len(yp) / cntr_rounds)
130 |     total_cost = 0.0
131 |     total_clks = 0
132 |     total_wins = 0
133 |     tc = {}
134 |     for round in range(0, cntr_rounds):
135 |         if first_round and (not sec_round):
136 |             phi = 0.0
137 |             first_round = False
138 |             sec_round = True
139 |         elif sec_round and (not first_round):
140 |             error = ref - ecpcs[round-1]
141 |             error_sum += error
142 |             phi = para_p*error + para_i*error_sum
143 |             sec_round = False
144 |         else:
145 |             error = ref - ecpcs[round-1]
146 |             error_sum += error
147 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs[round-2]-ecpcs[round-1])
148 |         cost = 0
149 |         clks = 0
150 | 
151 |         imp_index = ((round+1)*cntr_size)
152 | 
153 |         if round == cntr_rounds - 1:
154 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
155 | 
156 |         # fang piao
157 |         if phi <= min_phi:
158 |             phi = min_phi
159 |         elif phi >= max_phi:
160 |             phi = max_phi
161 | 
162 |         for i in range(round*cntr_size, imp_index):
163 |             clk = y[i]
164 |             pctr = yp[i]
165 |             mp = mplist[i]
166 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
167 |             if round == 0:
168 |                 bid = 1000.0
169 | 
170 |             if bid > mp:
171 |                 total_wins += 1
172 |                 clks += clk
173 |                 total_clks += clk
174 |                 cost += mp
175 |                 total_cost += mp
176 |         tc[round] = total_cost
177 |         ecpcs[round] = total_cost / (total_clks+1)
178 |         click_ratio = total_clks * 1.0 / advs_test_clicks
179 |         win_ratio = total_wins * 1.0 / advs_test_bids
180 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
181 |     for round in range(0, cntr_rounds):
182 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "test-ref", 0.0, 0, 0.0, 0.0, tc[round], ref))
183 |     overshoot.append(cal_overshoot(ecpcs, ref))
184 |     settling_time.append(cal_settling_time(ecpcs, ref))
185 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
186 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
187 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
188 | 
189 |     # train
190 |     ecpcs_train = {}
191 |     error_sum = 0.0
192 |     first_round = True
193 |     sec_round = False
194 |     cntr_size = int(len(yp_train) / cntr_rounds)
195 |     total_cost = 0.0
196 |     total_clks = 0
197 |     total_wins = 0
198 |     tc_train = {}
199 |     for round in range(0, cntr_rounds):
200 |         if first_round and (not sec_round):
201 |             phi = 0.0
202 |             first_round = False
203 |             sec_round = True
204 |         elif sec_round and (not first_round):
205 |             error = ref - ecpcs_train[round-1]
206 |             error_sum += error
207 |             phi = para_p*error + para_i*error_sum
208 |             sec_round = False
209 |         else:
210 |             error = ref - ecpcs_train[round-1]
211 |             error_sum += error
212 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs_train[round-2]-ecpcs_train[round-1])
213 |         cost = 0
214 |         clks = 0
215 | 
216 |         imp_index = ((round+1)*cntr_size)
217 | 
218 |         if round == cntr_rounds - 1:
219 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
220 | 
221 |         # fang piao
222 |         if phi <= min_phi:
223 |             phi = min_phi
224 |         elif phi >= max_phi:
225 |             phi = max_phi
226 | 
227 |         for i in range(round*cntr_size, imp_index):
228 |             clk = y_train[i]
229 |             pctr = yp_train[i]
230 |             mp = mplist_train[i]
231 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
232 |             if round == 0:
233 |                 bid = 1000.0
234 | 
235 |             if bid > mp:
236 |                 total_wins += 1
237 |                 clks += clk
238 |                 total_clks += clk
239 |                 cost += mp
240 |                 total_cost += mp
241 |         tc_train[round] = total_cost
242 |         ecpcs_train[round] = total_cost / (total_clks+1)
243 |         click_ratio = total_clks * 1.0 / advs_train_clicks
244 |         win_ratio = total_wins * 1.0 / advs_train_bids
245 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
246 |     for round in range(0, cntr_rounds):
247 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, tc_train[round], ref))
248 |     fo.close()
249 | 
250 | def control_test(cntr_rounds, ref, para_p, para_i , para_d):
251 |     ecpcs = {}
252 |     error_sum = 0.0
253 |     first_round = True
254 |     sec_round = False
255 |     cntr_size = int(len(yp) / cntr_rounds)
256 |     total_cost = 0.0
257 |     total_clks = 0
258 |     total_wins = 0
259 |     print "test performance:"
260 |     print "round\tecpc\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tref"
261 |     for round in range(0, cntr_rounds):
262 |         if first_round and (not sec_round):
263 |             phi = 0.0
264 |             first_round = False
265 |             sec_round = True
266 |         elif sec_round and (not first_round):
267 |             error = ref - ecpcs[round-1]
268 |             error_sum += error
269 |             phi = para_p*error + para_i*error_sum
270 |             sec_round = False
271 |         else:
272 |             error = ref - ecpcs[round-1]
273 |             error_sum += error
274 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs[round-2]-ecpcs[round-1])
275 |         cost = 0
276 |         clks = 0
277 | 
278 |         imp_index = ((round+1)*cntr_size)
279 | 
280 |         if round == cntr_rounds - 1:
281 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
282 | 
283 |         # phi bound
284 |         if phi <= min_phi:
285 |             phi = min_phi
286 |         elif phi >= max_phi:
287 |             phi = max_phi
288 | 
289 |         for i in range(round*cntr_size, imp_index):
290 |             clk = y[i]
291 |             pctr = yp[i]
292 |             mp = mplist[i]
293 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
294 |             if round == 0:
295 |                 bid = 1000.0
296 | 
297 |             if bid > mp:
298 |                 total_wins += 1
299 |                 clks += clk
300 |                 total_clks += clk
301 |                 cost += mp
302 |                 total_cost += mp
303 |         ecpcs[round] = total_cost / (total_clks+1)
304 |         click_ratio = total_clks * 1.0 / advs_test_clicks
305 |         win_ratio = total_wins * 1.0 / advs_test_bids
306 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
307 |     overshoot.append(cal_overshoot(ecpcs, ref))
308 |     settling_time.append(cal_settling_time(ecpcs, ref))
309 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
310 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
311 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
312 | 
313 |     # train
314 |     print "\ntrain performance:"
315 |     print "round\tecpc\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tref"
316 |     ecpcs_train = {}
317 |     error_sum = 0.0
318 |     first_round = True
319 |     sec_round = False
320 |     cntr_size = int(len(yp_train) / cntr_rounds)
321 |     total_cost = 0.0
322 |     total_clks = 0
323 |     total_wins = 0
324 |     for round in range(0, cntr_rounds):
325 |         if first_round and (not sec_round):
326 |             phi = 0.0
327 |             first_round = False
328 |             sec_round = True
329 |         elif sec_round and (not first_round):
330 |             error = ref - ecpcs_train[round-1]
331 |             error_sum += error
332 |             phi = para_p*error + para_i*error_sum
333 |             sec_round = False
334 |         else:
335 |             error = ref - ecpcs_train[round-1]
336 |             error_sum += error
337 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs_train[round-2]-ecpcs_train[round-1])
338 |         cost = 0
339 |         clks = 0
340 | 
341 |         imp_index = ((round+1)*cntr_size)
342 | 
343 |         if round == cntr_rounds - 1:
344 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
345 | 
346 |         # phi bound
347 |         if phi <= min_phi:
348 |             phi = min_phi
349 |         elif phi >= max_phi:
350 |             phi = max_phi
351 | 
352 |         for i in range(round*cntr_size, imp_index):
353 |             clk = y_train[i]
354 |             pctr = yp_train[i]
355 |             mp = mplist_train[i]
356 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
357 |             if round == 0:
358 |                 bid = 1000.0
359 | 
360 |             if bid > mp:
361 |                 total_wins += 1
362 |                 clks += clk
363 |                 total_clks += clk
364 |                 cost += mp
365 |                 total_cost += mp
366 |         ecpcs_train[round] = total_cost / (total_clks+1)
367 |         click_ratio = total_clks * 1.0 / advs_train_clicks
368 |         win_ratio = total_wins * 1.0 / advs_train_bids
369 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs_train[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
370 | 
371 | random.seed(10)
372 | 
373 | # if len(sys.argv) != 3:
374 | #     print 'campaignId mode'
375 | #     exit(-1)
376 | 
377 | mplist = []
378 | y = []
379 | yp = []
380 | mplist_train = []
381 | y_train = []
382 | yp_train = []
383 | 
384 | 
385 | #initialize the lr
386 | fi = open("../exp-data/train.txt", 'r')
387 | for line in fi:
388 |     s = line.strip().split()
389 |     y_train.append(int(s[0]))
390 |     mplist_train.append(int(s[1]))
391 |     yp_train.append(float(s[2]))
392 | fi.close()
393 | 
394 | fi = open("../exp-data/test.txt", 'r')
395 | for line in fi:
396 |     s = line.strip().split()
397 |     y.append(int(s[0]))
398 |     mplist.append(int(s[1]))
399 |     yp.append(float(s[2]))
400 | fi.close()
401 | 
402 | basectr = sum(yp_train) / float(len(yp_train))
403 | 
404 | # for reporting
405 | parameters = []
406 | overshoot = []
407 | settling_time = []
408 | rise_time = []
409 | rmse_ss = []
410 | sd_ss = []
411 | report_path = ""
412 | 
413 | 
414 | if mode == "test": # test mode
415 |     report_path = "../report/report-test.tsv"
416 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
417 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
418 |     parameters.append(parameter)
419 |     control_test(cntr_rounds, ref, para_p, para_i, para_d)
420 |     rout = open(report_path, 'w')
421 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
422 |     for idx, val in enumerate(parameters):
423 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
424 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
425 |     rout.close()
426 | elif mode == "batch":# batch mode
427 |     report_path = "../report/report-batch.tsv"
428 |     for temp_p in para_ps:
429 |         for temp_i in para_is:
430 |             for temp_d in para_ds:
431 |                 para_p = temp_p * 1.0 * div
432 |                 para_i = temp_i * 1.0 * div
433 |                 para_d = temp_d * 1.0 * div
434 |                 out_path = "../exp-data/"+advertiser+"_ref="+str(ref)+"_p=" + \
435 |                            str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
436 |                 control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
437 |                 parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
438 |                            str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
439 |                 parameters.append(parameter)
440 |     rout = open(report_path, 'w')
441 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
442 |     for idx, val in enumerate(parameters):
443 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
444 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
445 |     rout.close()
446 | elif mode == "single": # single mode
447 |     out_path = "../exp-data/"+advertiser+"_ref="+str(ref)+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
448 |     control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
449 |     report_path = "../exp-data/report-single.tsv"
450 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
451 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
452 |     parameters.append(parameter)
453 |     rout = open(report_path, 'w')
454 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
455 |     for idx, val in enumerate(parameters):
456 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
457 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
458 |     rout.close()
459 | else:
460 |     print "wrong mode entered"
461 | 
462 | 
463 | 
464 | 
465 | 
466 | 


--------------------------------------------------------------------------------
/python/control-ecpc-pid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | from sklearn.metrics import roc_auc_score
  6 | from sklearn.metrics import mean_squared_error
  7 | 
  8 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  9 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
 10 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
 11 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 12 | 
 13 | advertiser = "2821"
 14 | mode = "single"
 15 | basebid = 90
 16 | ref = 120000
 17 | print "%s\t%s\t%d\t%f" % (advertiser, mode, basebid, ref)
 18 | 
 19 | # parameter setting
 20 | minbid = 5
 21 | cntr_rounds = 40
 22 | para_p = 0.002
 23 | para_i = 0.000001
 24 | para_d = 0.000001
 25 | div = 1e-6
 26 | para_ps = range(0, 40, 5)
 27 | para_is = range(0, 25, 5)
 28 | para_ds = range(0, 25, 5)
 29 | settle_con = 0.1
 30 | rise_con = 0.9
 31 | min_phi = -100
 32 | max_phi = 100
 33 | 
 34 | def ints(s):
 35 |     res = []
 36 |     for ss in s:
 37 |         res.append(int(ss))
 38 |     return res
 39 | 
 40 | def sigmoid(p):
 41 |     return 1.0 / (1.0 + math.exp(-p))
 42 | 
 43 | def estimator_lr(feats):
 44 |     pred = 0.0
 45 |     for feat in feats:
 46 |         if feat in featWeight:
 47 |             pred += featWeight[feat]
 48 |     pred = sigmoid(pred)
 49 |     return pred
 50 | 
 51 | # bidding functions
 52 | def lin(pctr, basectr, basebid):
 53 |     return int(pctr *  basebid / basectr)
 54 | 
 55 | # calculate settling time
 56 | def cal_settling_time(ecpcs, ref):
 57 |     settled = False
 58 |     settling_time = 0
 59 |     for key, value in ecpcs.iteritems():
 60 |         error = ref - value
 61 |         if abs(error) / ref <= settle_con and settled == False:
 62 |             settled = True
 63 |             settling_time = key
 64 |         elif abs(error) / ref > settle_con:
 65 |             settled = False
 66 |             settling_time = cntr_rounds
 67 |     return settling_time
 68 | 
 69 | # # calculate steady-state error
 70 | def cal_rmse_ss(ecpcs, ref):
 71 |     settled = False
 72 |     settling_time = cal_settling_time(ecpcs, ref)
 73 |     rmse = 0.0
 74 |     if settling_time >= cntr_rounds:
 75 |         settling_time = cntr_rounds - 1
 76 |     for round in range(settling_time, cntr_rounds):
 77 |         rmse += (ecpcs[round] - ref) * (ecpcs[round] - ref)
 78 |     rmse /= (cntr_rounds - settling_time)
 79 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 80 |     return rmse
 81 | 
 82 | # # calculate steady-state standard deviation
 83 | def cal_sd_ss(ecpcs, ref):
 84 |     settled = False
 85 |     settling_time = cal_settling_time(ecpcs, ref)
 86 |     if settling_time >= cntr_rounds:
 87 |         settling_time = cntr_rounds - 1
 88 |     sum2 = 0.0
 89 |     sum = 0.0
 90 |     for round in range(settling_time, cntr_rounds):
 91 |         sum2 += ecpcs[round] * ecpcs[round]
 92 |         sum += ecpcs[round]
 93 |     n = cntr_rounds - settling_time
 94 |     mean = sum / n
 95 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
 96 |     return sd
 97 | 
 98 | # calculate rise time
 99 | def cal_rise_time(ecpcs, ref, rise_con):
100 |     rise_time = 0
101 |     for key, value in ecpcs.iteritems():
102 |         error = ref - value
103 |         if abs(error) / ref <= (1 - rise_con):
104 |             rise_time = key
105 |             break
106 |     return rise_time
107 | 
108 | # calculate percentage overshoot
109 | def cal_overshoot(ecpcs, ref):
110 |     if ecpcs[0] > ref:
111 |         min = ecpcs[0];
112 |         for key, value in ecpcs.iteritems():
113 |             if value <= min:
114 |                 min = value
115 |         if min < ref:
116 |             return (ref - min) * 100.0 / ref
117 |         else:
118 |             return 0.0
119 |     elif ecpcs[0] < ref:
120 |         max = ecpcs[0]
121 |         for key, value in ecpcs.iteritems():
122 |             if value >= max:
123 |                 max = value
124 |         if max > ref:
125 |             return (max - ref) * 100.0 / ref
126 |         else:
127 |             return 0.0
128 |     else:
129 |         max = 0
130 |         for key, value in ecpcs.iteritems():
131 |             if abs(value - ref) >= max:
132 |                 max = value
133 |         return (max - ref) * 100.0 / ref
134 | 
135 | # control function
136 | def control(cntr_rounds, ref, para_p, para_i, para_d, outfile):
137 |     fo = open(outfile, 'w')
138 |     fo.write("round\tecpc\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tref\n")
139 |     ecpcs = {}
140 |     error_sum = 0.0
141 |     first_round = True
142 |     sec_round = False
143 |     cntr_size = int(len(yp) / cntr_rounds)
144 |     total_cost = 0.0
145 |     total_clks = 0
146 |     total_wins = 0
147 |     tc = {}
148 |     for round in range(0, cntr_rounds):
149 |         if first_round and (not sec_round):
150 |             phi = 0.0
151 |             first_round = False
152 |             sec_round = True
153 |         elif sec_round and (not first_round):
154 |             error = ref - ecpcs[round-1]
155 |             error_sum += error
156 |             phi = para_p*error + para_i*error_sum
157 |             sec_round = False
158 |         else:
159 |             error = ref - ecpcs[round-1]
160 |             error_sum += error
161 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs[round-2]-ecpcs[round-1])
162 |         cost = 0
163 |         clks = 0
164 | 
165 |         imp_index = ((round+1)*cntr_size)
166 | 
167 |         if round == cntr_rounds - 1:
168 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
169 | 
170 |         # fang piao
171 |         if phi <= min_phi:
172 |             phi = min_phi
173 |         elif phi >= max_phi:
174 |             phi = max_phi
175 | 
176 |         for i in range(round*cntr_size, imp_index):
177 |             clk = y[i]
178 |             pctr = yp[i]
179 |             mp = mplist[i]
180 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
181 |             if round == 0:
182 |                 bid = 1000.0
183 | 
184 |             if bid > mp:
185 |                 total_wins += 1
186 |                 clks += clk
187 |                 total_clks += clk
188 |                 cost += mp
189 |                 total_cost += mp
190 |         tc[round] = total_cost
191 |         ecpcs[round] = total_cost / (total_clks+1)
192 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
193 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
194 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
195 |     for round in range(0, cntr_rounds):
196 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "test-ref", 0.0, 0, 0.0, 0.0, tc[round], ref))
197 |     overshoot.append(cal_overshoot(ecpcs, ref))
198 |     settling_time.append(cal_settling_time(ecpcs, ref))
199 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
200 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
201 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
202 | 
203 |     # train
204 |     ecpcs_train = {}
205 |     error_sum = 0.0
206 |     first_round = True
207 |     sec_round = False
208 |     cntr_size = int(len(yp_train) / cntr_rounds)
209 |     total_cost = 0.0
210 |     total_clks = 0
211 |     total_wins = 0
212 |     tc_train = {}
213 |     for round in range(0, cntr_rounds):
214 |         if first_round and (not sec_round):
215 |             phi = 0.0
216 |             first_round = False
217 |             sec_round = True
218 |         elif sec_round and (not first_round):
219 |             error = ref - ecpcs_train[round-1]
220 |             error_sum += error
221 |             phi = para_p*error + para_i*error_sum
222 |             sec_round = False
223 |         else:
224 |             error = ref - ecpcs_train[round-1]
225 |             error_sum += error
226 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs_train[round-2]-ecpcs_train[round-1])
227 |         cost = 0
228 |         clks = 0
229 | 
230 |         imp_index = ((round+1)*cntr_size)
231 | 
232 |         if round == cntr_rounds - 1:
233 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
234 | 
235 |         # fang piao
236 |         if phi <= min_phi:
237 |             phi = min_phi
238 |         elif phi >= max_phi:
239 |             phi = max_phi
240 | 
241 |         for i in range(round*cntr_size, imp_index):
242 |             clk = y_train[i]
243 |             pctr = yp_train[i]
244 |             mp = mplist_train[i]
245 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
246 |             if round == 0:
247 |                 bid = 1000.0
248 | 
249 |             if bid > mp:
250 |                 total_wins += 1
251 |                 clks += clk
252 |                 total_clks += clk
253 |                 cost += mp
254 |                 total_cost += mp
255 |         tc_train[round] = total_cost
256 |         ecpcs_train[round] = total_cost / (total_clks+1)
257 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
258 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
259 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
260 |     for round in range(0, cntr_rounds):
261 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, tc_train[round], ref))
262 |     fo.close()
263 | 
264 | def control_test(cntr_rounds, ref, para_p, para_i , para_d):
265 |     ecpcs = {}
266 |     error_sum = 0.0
267 |     first_round = True
268 |     sec_round = False
269 |     cntr_size = int(len(yp) / cntr_rounds)
270 |     total_cost = 0.0
271 |     total_clks = 0
272 |     total_wins = 0
273 |     for round in range(0, cntr_rounds):
274 |         if first_round and (not sec_round):
275 |             phi = 0.0
276 |             first_round = False
277 |             sec_round = True
278 |         elif sec_round and (not first_round):
279 |             error = ref - ecpcs[round-1]
280 |             error_sum += error
281 |             phi = para_p*error + para_i*error_sum
282 |             sec_round = False
283 |         else:
284 |             error = ref - ecpcs[round-1]
285 |             error_sum += error
286 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs[round-2]-ecpcs[round-1])
287 |         cost = 0
288 |         clks = 0
289 | 
290 |         imp_index = ((round+1)*cntr_size)
291 | 
292 |         if round == cntr_rounds - 1:
293 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
294 | 
295 |         # fang piao
296 |         if phi <= min_phi:
297 |             phi = min_phi
298 |         elif phi >= max_phi:
299 |             phi = max_phi
300 | 
301 |         for i in range(round*cntr_size, imp_index):
302 |             clk = y[i]
303 |             pctr = yp[i]
304 |             mp = mplist[i]
305 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
306 |             if round == 0:
307 |                 bid = 1000.0
308 | 
309 |             if bid > mp:
310 |                 total_wins += 1
311 |                 clks += clk
312 |                 total_clks += clk
313 |                 cost += mp
314 |                 total_cost += mp
315 |         ecpcs[round] = total_cost / (total_clks+1)
316 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
317 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
318 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
319 |     overshoot.append(cal_overshoot(ecpcs, ref))
320 |     settling_time.append(cal_settling_time(ecpcs, ref))
321 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
322 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
323 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
324 | 
325 |     # train
326 |     ecpcs_train = {}
327 |     error_sum = 0.0
328 |     first_round = True
329 |     sec_round = False
330 |     cntr_size = int(len(yp_train) / cntr_rounds)
331 |     total_cost = 0.0
332 |     total_clks = 0
333 |     total_wins = 0
334 |     for round in range(0, cntr_rounds):
335 |         if first_round and (not sec_round):
336 |             phi = 0.0
337 |             first_round = False
338 |             sec_round = True
339 |         elif sec_round and (not first_round):
340 |             error = ref - ecpcs_train[round-1]
341 |             error_sum += error
342 |             phi = para_p*error + para_i*error_sum
343 |             sec_round = False
344 |         else:
345 |             error = ref - ecpcs_train[round-1]
346 |             error_sum += error
347 |             phi = para_p*error + para_i*error_sum + para_d*(ecpcs_train[round-2]-ecpcs_train[round-1])
348 |         cost = 0
349 |         clks = 0
350 | 
351 |         imp_index = ((round+1)*cntr_size)
352 | 
353 |         if round == cntr_rounds - 1:
354 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
355 | 
356 |         # fang piao
357 |         if phi <= min_phi:
358 |             phi = min_phi
359 |         elif phi >= max_phi:
360 |             phi = max_phi
361 | 
362 |         for i in range(round*cntr_size, imp_index):
363 |             clk = y_train[i]
364 |             pctr = yp_train[i]
365 |             mp = mplist_train[i]
366 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
367 |             if round == 0:
368 |                 bid = 1000.0
369 | 
370 |             if bid > mp:
371 |                 total_wins += 1
372 |                 clks += clk
373 |                 total_clks += clk
374 |                 cost += mp
375 |                 total_cost += mp
376 |         ecpcs_train[round] = total_cost / (total_clks+1)
377 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
378 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
379 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs_train[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
380 | 
381 | random.seed(10)
382 | 
383 | # if len(sys.argv) != 3:
384 | #     print 'campaignId mode'
385 | #     exit(-1)
386 | 
387 | mplist = []
388 | y = []
389 | yp = []
390 | mplist_train = []
391 | y_train = []
392 | yp_train = []
393 | featWeight = {}
394 | 
395 | 
396 | #initialize the lr
397 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt.lr.weight", 'r')
398 | for line in fi:
399 |     s = line.strip().split()
400 |     feat = int(s[0])
401 |     weight = float(s[1])
402 |     featWeight[feat] = weight
403 | fi.close()
404 | 
405 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzx.txt", 'r')
406 | for line in fi:
407 |     data = ints(line.strip().replace(":1", "").split())
408 |     clk = data[0]
409 |     mp = data[1]
410 |     fsid = 2 # feature start id
411 |     feats = data[fsid:]
412 |     pred = estimator_lr(feats)
413 |     y.append(clk)
414 |     yp.append(pred)
415 |     mplist.append(mp)
416 | fi.close()
417 | 
418 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt", 'r')
419 | for line in fi:
420 |     data = ints(line.strip().replace(":1", "").split())
421 |     clk = data[0]
422 |     mp = data[1]
423 |     fsid = 2 # feature start id
424 |     feats = data[fsid:]
425 |     pred = estimator_lr(feats)
426 |     y_train.append(clk)
427 |     yp_train.append(pred)
428 |     mplist_train.append(mp)
429 | fi.close()
430 | 
431 | basectr = sum(yp_train) / float(len(yp_train))
432 | 
433 | # for reporting
434 | parameters = []
435 | overshoot = []
436 | settling_time = []
437 | rise_time = []
438 | rmse_ss = []
439 | sd_ss = []
440 | report_path = ""
441 | 
442 | 
443 | if mode == "test": # test mode
444 |     report_path = "../report/report-test.tsv"
445 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
446 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
447 |     parameters.append(parameter)
448 |     control_test(cntr_rounds, ref, para_p, para_i, para_d)
449 |     rout = open(report_path, 'w')
450 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
451 |     for idx, val in enumerate(parameters):
452 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
453 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
454 |     rout.close()
455 | elif mode == "batch":# batch mode
456 |     report_path = "../report/report-batch.tsv"
457 |     for temp_p in para_ps:
458 |         for temp_i in para_is:
459 |             for temp_d in para_ds:
460 |                 para_p = temp_p * 1.0 * div
461 |                 para_i = temp_i * 1.0 * div
462 |                 para_d = temp_d * 1.0 * div
463 |                 out_path = "../exp-data/"+advertiser+"_ref="+str(ref)+"_p=" + \
464 |                            str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
465 |                 control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
466 |                 parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
467 |                            str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
468 |                 parameters.append(parameter)
469 |     rout = open(report_path, 'w')
470 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
471 |     for idx, val in enumerate(parameters):
472 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
473 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
474 |     rout.close()
475 | elif mode == "single": # single mode
476 |     out_path = "../exp-data/"+advertiser+"_ref="+str(ref)+"_p="+str(para_p)+"_i="+str(para_i)+"_d="+str(para_d)+".tsv"
477 |     control(cntr_rounds, ref, para_p, para_i, para_d, out_path)
478 |     report_path = "../report/report-single.tsv"
479 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
480 |                 str(para_p)+"\t"+str(para_i)+"\t"+str(para_d)+"\t"+str(settle_con)+"\t"+str(rise_con)
481 |     parameters.append(parameter)
482 |     rout = open(report_path, 'w')
483 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tp\ti\td\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
484 |     for idx, val in enumerate(parameters):
485 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
486 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
487 |     rout.close()
488 | else:
489 |     print "wrong mode entered"
490 | 
491 | 
492 | 
493 | 
494 | 


--------------------------------------------------------------------------------
/python/control-ecpc-waterlevel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | 
  6 | advs_train_bids = {"1458": 3083056, "2259": 835556, "2261": 687617, "2821": 1322561, "2997": 312437, "3358": 1742104, "3386": 2847802, "3427": 2593765, "3476": 1970360}
  7 | advs_test_bids = {"1458": 614638, "2259": 417197, "2261": 343862, "2821": 661964, "2997": 156063, "3358": 300928, "3386": 545421, "3427": 536795, "3476": 523848}
  8 | advs_train_clicks = {"1458": 2454, "2259": 280, "2261": 207, "2821": 843, "2997": 1386, "3358": 1358, "3386": 2076, "3427": 1926, "3476": 1027}
  9 | advs_test_clicks = {"1458": 543, "2259": 131, "2261": 97, "2821": 394, "2997": 533, "3358": 339, "3386": 496, "3427": 395, "3476": 302}
 10 | advs_train_cost = {"1458": 212400000, "2259": 77754000, "2261": 61610000, "2821": 118082000, "2997": 19689000, "3358": 160943000, "3386": 219066000, "3427": 210239000, "3476": 156088000}
 11 | advs_test_cost = {"1458": 45216000, "2259": 43497000, "2261": 28795000, "2821": 68257000, "2997": 8617000, "3358": 34159000, "3386": 45715000, "3427": 46356000, "3476": 43627000}
 12 | 
 13 | advertiser = "1458"
 14 | mode = "single"
 15 | basebid = 69
 16 | ref = 20000
 17 | temp = 20000
 18 | print "%s\t%s\t%d\t%f" % (advertiser, mode, basebid, ref)
 19 | 
 20 | # parameter setting
 21 | minbid = 5
 22 | cntr_rounds = 40
 23 | para_gamma = 200
 24 | div = 1
 25 | para_gammas = range(60, 120, 5)
 26 | settle_con = 0.1
 27 | rise_con = 0.9
 28 | min_phi = 0.7
 29 | max_phi = 100
 30 | damping = 0.25
 31 | budget_train = damping * advs_train_cost[advertiser]
 32 | budget_test = damping * advs_test_cost[advertiser]
 33 | print "test-budget: " + str(budget_test)
 34 | print "train-budget: " + str(budget_train)
 35 | max_ref = ref * 1.05
 36 | min_ref = ref * 0.95
 37 | 
 38 | def ints(s):
 39 |     res = []
 40 |     for ss in s:
 41 |         res.append(int(ss))
 42 |     return res
 43 | 
 44 | def sigmoid(p):
 45 |     return 1.0 / (1.0 + math.exp(-p))
 46 | 
 47 | def estimator_lr(feats):
 48 |     pred = 0.0
 49 |     for feat in feats:
 50 |         if feat in featWeight:
 51 |             pred += featWeight[feat]
 52 |     pred = sigmoid(pred)
 53 |     return pred
 54 | 
 55 | # bidding functions
 56 | def lin(pctr, basectr, basebid):
 57 |     return int(pctr *  basebid / basectr)
 58 | 
 59 | # calculate settling time
 60 | def cal_settling_time(ecpcs, ref):
 61 |     settled = False
 62 |     settling_time = 0
 63 |     for key, value in ecpcs.iteritems():
 64 |         error = ref - value
 65 |         if abs(error) / ref <= settle_con and settled == False:
 66 |             settled = True
 67 |             settling_time = key
 68 |         elif abs(error) / ref > settle_con:
 69 |             settled = False
 70 |             settling_time = cntr_rounds
 71 |     return settling_time
 72 | 
 73 | # # calculate steady-state error
 74 | def cal_rmse_ss(ecpcs, ref):
 75 |     settled = False
 76 |     settling_time = cal_settling_time(ecpcs, ref)
 77 |     rmse = 0.0
 78 |     if settling_time >= cntr_rounds:
 79 |         settling_time = cntr_rounds - 1
 80 |     for round in range(settling_time, cntr_rounds):
 81 |         rmse += (ecpcs[round] - ref) * (ecpcs[round] - ref)
 82 |     rmse /= (cntr_rounds - settling_time)
 83 |     rmse = math.sqrt(rmse) / ref # weinan: relative rmse
 84 |     return rmse
 85 | 
 86 | # # calculate steady-state standard deviation
 87 | def cal_sd_ss(ecpcs, ref):
 88 |     settled = False
 89 |     settling_time = cal_settling_time(ecpcs, ref)
 90 |     if settling_time >= cntr_rounds:
 91 |         settling_time = cntr_rounds - 1
 92 |     sum2 = 0.0
 93 |     sum = 0.0
 94 |     for round in range(settling_time, cntr_rounds):
 95 |         sum2 += ecpcs[round] * ecpcs[round]
 96 |         sum += ecpcs[round]
 97 |     n = cntr_rounds - settling_time
 98 |     mean = sum / n
 99 |     sd = math.sqrt(sum2 / n - mean * mean) / mean # weinan: relative sd
100 |     return sd
101 | 
102 | # calculate rise time
103 | def cal_rise_time(ecpcs, ref, rise_con):
104 |     rise_time = 0
105 |     for key, value in ecpcs.iteritems():
106 |         error = ref - value
107 |         if abs(error) / ref <= (1 - rise_con):
108 |             rise_time = key
109 |             break
110 |     return rise_time
111 | 
112 | # calculate percentage overshoot
113 | def cal_overshoot(ecpcs, ref):
114 |     if ecpcs[0] > ref:
115 |         min = ecpcs[0];
116 |         for key, value in ecpcs.iteritems():
117 |             if value <= min:
118 |                 min = value
119 |         if min < ref:
120 |             return (ref - min) * 100.0 / ref
121 |         else:
122 |             return 0.0
123 |     elif ecpcs[0] < ref:
124 |         max = ecpcs[0]
125 |         for key, value in ecpcs.iteritems():
126 |             if value >= max:
127 |                 max = value
128 |         if max > ref:
129 |             return (max - ref) * 100.0 / ref
130 |         else:
131 |             return 0.0
132 |     else:
133 |         max = 0
134 |         for key, value in ecpcs.iteritems():
135 |             if abs(value - ref) >= max:
136 |                 max = value
137 |         return (max - ref) * 100.0 / ref
138 | 
139 | # control function
140 | def control(cntr_rounds, ref, para_gamma, outfile):
141 |     fo = open(outfile, 'w')
142 |     fo.write("round\tecpc\tstage\tphi\ttotal_click\tclick_ratio\twin_ratio\ttotal_cost\tref\n")
143 |     ecpcs = {}
144 |     click_nums = {}
145 |     first_round = True
146 |     cntr_size = int(len(yp) / cntr_rounds)
147 |     total_cost = 0.0
148 |     total_clks = 0
149 |     total_wins = 0
150 |     temp_ref = {}
151 |     temp_ref[0] = temp
152 |     tc = {}
153 |     for round in range(0, cntr_rounds):
154 |         if first_round:
155 |             phi = 0.0
156 |             first_round = False
157 |         else:
158 |             temp_ref[round] = (budget_test - total_cost) / ((budget_test / ref) - ((total_cost / ecpcs[round-1])))
159 |             if temp_ref[round] <= min_ref:
160 |                 temp_ref[round] = min_ref
161 |             elif temp_ref[round] >= max_ref:
162 |                 temp_ref[round] = max_ref
163 |             # error = click_nums[round-1] * 1.0 / (budget_test / temp_ref[round]) - 1.0 / cntr_rounds
164 |             error = click_nums[round-1] * 1.0 / (budget_test / ref) - 1.0 / cntr_rounds
165 |             phi = para_gamma * (-1) * error
166 |         cost = 0
167 |         clks = 0
168 | 
169 |         imp_index = ((round+1)*cntr_size)
170 | 
171 |         if round == cntr_rounds - 1:
172 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
173 | 
174 |         # fang piao
175 |         if phi <= min_phi:
176 |             phi = min_phi
177 |         elif phi >= max_phi:
178 |             phi = max_phi
179 | 
180 |         for i in range(round*cntr_size, imp_index):
181 |             clk = y[i]
182 |             pctr = yp[i]
183 |             mp = mplist[i]
184 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
185 |             if round == 0:
186 |                 bid = 1000.0
187 | 
188 |             if bid > mp:
189 |                 total_wins += 1
190 |                 clks += clk
191 |                 total_clks += clk
192 |                 cost += mp
193 |                 total_cost += mp
194 |         tc[round] = total_cost
195 |         ecpcs[round] = total_cost / (total_clks+1)
196 |         click_nums[round] = clks
197 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
198 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
199 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs[round], "test", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
200 |     for round in range(0, cntr_rounds):
201 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "test-ref", 0.0, 0,  0.0, 0.0, tc[round], ref))
202 |     overshoot.append(cal_overshoot(ecpcs, ref))
203 |     settling_time.append(cal_settling_time(ecpcs, ref))
204 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
205 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
206 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
207 | 
208 |     # train
209 |     ecpcs_train = {}
210 |     click_nums_train = {}
211 |     first_round = True
212 |     cntr_size = int(len(yp_train) / cntr_rounds)
213 |     total_cost = 0.0
214 |     total_clks = 0
215 |     total_wins = 0
216 |     temp_ref = {}
217 |     temp_ref[0] = temp
218 |     tc = {}
219 |     for round in range(0, cntr_rounds):
220 |         if first_round:
221 |             phi = 0.0
222 |             first_round = False
223 |         else:
224 |             temp_ref[round] = (budget_train - total_cost) / ((budget_train / ref)  - ((total_cost / ecpcs_train[round-1])))
225 |             if temp_ref[round] <= min_ref:
226 |                 temp_ref[round] = min_ref
227 |             elif temp_ref[round] >= max_ref:
228 |                 temp_ref[round] = max_ref
229 |             # error = click_nums_train[round-1] * 1.0 / (budget_train / temp_ref[round]) - 1.0 / cntr_rounds
230 |             error = click_nums_train[round-1] * 1.0 / (budget_train / ref) - 1.0 / cntr_rounds
231 |             phi = para_gamma * (-1) * error
232 |         cost = 0
233 |         clks = 0
234 | 
235 |         imp_index = ((round+1)*cntr_size)
236 | 
237 |         if round == cntr_rounds - 1:
238 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
239 | 
240 |         # fang piao
241 |         if phi <= min_phi:
242 |             phi = min_phi
243 |         elif phi >= max_phi:
244 |             phi = max_phi
245 | 
246 |         for i in range(round*cntr_size, imp_index):
247 |             clk = y_train[i]
248 |             pctr = yp_train[i]
249 |             mp = mplist_train[i]
250 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
251 |             if round == 0:
252 |                 bid = 1000.0
253 | 
254 |             if bid > mp:
255 |                 total_wins += 1
256 |                 clks += clk
257 |                 total_clks += clk
258 |                 cost += mp
259 |                 total_cost += mp
260 |         tc[round] = total_cost
261 |         ecpcs_train[round] = total_cost / (total_clks+1)
262 |         click_nums_train[round] = clks
263 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
264 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
265 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ecpcs_train[round], "train", phi, total_clks,  click_ratio, win_ratio, total_cost, ref))
266 |     for round in range(0, cntr_rounds):
267 |         fo.write("%d\t%.4f\t%s\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f\n" % (round, ref, "train-ref", 0.0, 0,  0.0, 0.0, total_cost, ref))
268 |     fo.close()
269 | 
270 | def control_test(cntr_rounds, ref, para_gamma):
271 |     ecpcs = {}
272 |     click_nums = {}
273 |     first_round = True
274 |     cntr_size = int(len(yp) / cntr_rounds)
275 |     total_cost = 0.0
276 |     total_clks = 0
277 |     total_wins = 0
278 |     temp_ref = {}
279 |     temp_ref[0] = temp
280 |     for round in range(0, cntr_rounds):
281 |         if first_round:
282 |             phi = 0.0
283 |             first_round = False
284 |         else:
285 |             temp_ref[round] = (budget_test - total_cost) / ((budget_test / ref) - ((total_cost / ecpcs[round-1])))
286 |             if temp_ref[round] <= min_ref:
287 |                 temp_ref[round] = min_ref
288 |             elif temp_ref[round] >= max_ref:
289 |                 temp_ref[round] = max_ref
290 |             # error = click_nums[round-1] * 1.0 / (budget_test / temp_ref[round]) - 1.0 / cntr_rounds
291 |             error = click_nums[round-1] * 1.0 / (budget_test / ref) - 1.0 / cntr_rounds
292 |             phi = para_gamma * (-1) * error
293 |         cost = 0
294 |         clks = 0
295 | 
296 |         imp_index = ((round+1)*cntr_size)
297 | 
298 |         if round == cntr_rounds - 1:
299 |             imp_index = imp_index + (len(yp) - cntr_size*cntr_rounds)
300 | 
301 |         # fang piao
302 |         if phi <= min_phi:
303 |             phi = min_phi
304 |         elif phi >= max_phi:
305 |             phi = max_phi
306 | 
307 |         for i in range(round*cntr_size, imp_index):
308 |             clk = y[i]
309 |             pctr = yp[i]
310 |             mp = mplist[i]
311 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
312 |             if round == 0:
313 |                 bid = 1000.0
314 | 
315 |             if bid > mp:
316 |                 total_wins += 1
317 |                 clks += clk
318 |                 total_clks += clk
319 |                 cost += mp
320 |                 total_cost += mp
321 |         ecpcs[round] = total_cost / (total_clks+1)
322 |         click_nums[round] = clks
323 |         click_ratio = total_clks * 1.0 / advs_test_clicks[advertiser]
324 |         win_ratio = total_wins * 1.0 / advs_test_bids[advertiser]
325 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
326 |     overshoot.append(cal_overshoot(ecpcs, ref))
327 |     settling_time.append(cal_settling_time(ecpcs, ref))
328 |     rise_time.append(cal_rise_time(ecpcs, ref, rise_con))
329 |     rmse_ss.append(cal_rmse_ss(ecpcs, ref))
330 |     sd_ss.append(cal_sd_ss(ecpcs, ref))
331 | 
332 |     # train
333 |     ecpcs_train = {}
334 |     click_nums_train = {}
335 |     first_round = True
336 |     cntr_size = int(len(yp_train) / cntr_rounds)
337 |     total_cost = 0.0
338 |     total_clks = 0
339 |     total_wins = 0
340 |     temp_ref = {}
341 |     temp_ref[0] = temp
342 |     for round in range(0, cntr_rounds):
343 |         if first_round:
344 |             phi = 0.0
345 |             first_round = False
346 |         else:
347 |             temp_ref[round] = (budget_train - total_cost) / ((budget_train / ref)  - ((total_cost / ecpcs_train[round-1])))
348 |             if temp_ref[round] <= min_ref:
349 |                 temp_ref[round] = min_ref
350 |             elif temp_ref[round] >= max_ref:
351 |                 temp_ref[round] = max_ref
352 |             # error = click_nums_train[round-1] * 1.0 / (budget_train / temp_ref[round]) - 1.0 / cntr_rounds
353 |             error = click_nums_train[round-1] * 1.0 / (budget_train / ref) - 1.0 / cntr_rounds
354 |             phi = para_gamma * (-1) * error
355 |         cost = 0
356 |         clks = 0
357 | 
358 |         imp_index = ((round+1)*cntr_size)
359 | 
360 |         if round == cntr_rounds - 1:
361 |             imp_index = imp_index + (len(yp_train) - cntr_size*cntr_rounds)
362 | 
363 |         # fang piao
364 |         if phi <= min_phi:
365 |             phi = min_phi
366 |         elif phi >= max_phi:
367 |             phi = max_phi
368 | 
369 |         for i in range(round*cntr_size, imp_index):
370 |             clk = y_train[i]
371 |             pctr = yp_train[i]
372 |             mp = mplist_train[i]
373 |             bid = max(minbid,lin(pctr, basectr, basebid) * (math.exp(phi)))
374 |             if round == 0:
375 |                 bid = 1000.0
376 | 
377 |             if bid > mp:
378 |                 total_wins += 1
379 |                 clks += clk
380 |                 total_clks += clk
381 |                 cost += mp
382 |                 total_cost += mp
383 |         ecpcs_train[round] = total_cost / (total_clks+1)
384 |         click_nums_train[round] = clks
385 |         click_ratio = total_clks * 1.0 / advs_train_clicks[advertiser]
386 |         win_ratio = total_wins * 1.0 / advs_train_bids[advertiser]
387 |         print "%d\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.1f" % (round, ecpcs_train[round], phi, total_clks, click_ratio, win_ratio, total_cost, ref)
388 | 
389 | random.seed(10)
390 | 
391 | # if len(sys.argv) != 3:
392 | #     print 'campaignId mode'
393 | #     exit(-1)
394 | 
395 | mplist = []
396 | y = []
397 | yp = []
398 | mplist_train = []
399 | y_train = []
400 | yp_train = []
401 | featWeight = {}
402 | 
403 | 
404 | #initialize the lr
405 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt.lr.weight", 'r')
406 | for line in fi:
407 |     s = line.strip().split()
408 |     feat = int(s[0])
409 |     weight = float(s[1])
410 |     featWeight[feat] = weight
411 | fi.close()
412 | 
413 | fi = open("../../make-ipinyou-data/"+advertiser+"/test.yzx.txt", 'r')
414 | for line in fi:
415 |     data = ints(line.strip().replace(":1", "").split())
416 |     clk = data[0]
417 |     mp = data[1]
418 |     fsid = 2 # feature start id
419 |     feats = data[fsid:]
420 |     pred = estimator_lr(feats)
421 |     y.append(clk)
422 |     yp.append(pred)
423 |     mplist.append(mp)
424 | fi.close()
425 | 
426 | fi = open("../../make-ipinyou-data/"+advertiser+"/train.yzx.txt", 'r')
427 | for line in fi:
428 |     data = ints(line.strip().replace(":1", "").split())
429 |     clk = data[0]
430 |     mp = data[1]
431 |     fsid = 2 # feature start id
432 |     feats = data[fsid:]
433 |     pred = estimator_lr(feats)
434 |     y_train.append(clk)
435 |     yp_train.append(pred)
436 |     mplist_train.append(mp)
437 | fi.close()
438 | 
439 | basectr = sum(yp_train) / float(len(yp_train))
440 | 
441 | # for reporting
442 | parameters = []
443 | overshoot = []
444 | settling_time = []
445 | rise_time = []
446 | rmse_ss = []
447 | sd_ss = []
448 | report_path = ""
449 | 
450 | 
451 | if mode == "test": # test mode
452 |     report_path = "../report/report-ecpc-waterlevel-test.tsv"
453 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
454 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
455 |     parameters.append(parameter)
456 |     control_test(cntr_rounds, ref, para_gamma)
457 |     rout = open(report_path, 'w')
458 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
459 |     for idx, val in enumerate(parameters):
460 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
461 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
462 |     rout.close()
463 | elif mode == "batch":# batch mode
464 |     report_path = "../report/report-ecpc-waterlevel-batch.tsv"
465 |     for temp_gamma in para_gammas:
466 |         para_gamma = temp_gamma * 1.0 * div
467 |         out_path = "../exp-data/win_waterlevel_"+advertiser+"_ref="+str(ref)+"_gamma=" + \
468 |                     str(para_gamma)+".tsv"
469 |         control(cntr_rounds, ref, para_gamma, out_path)
470 |         parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
471 |                      str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
472 |         parameters.append(parameter)
473 |     rout = open(report_path, 'w')
474 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\t\overshoot\trmse-ss\tsd-ss\n")
475 |     for idx, val in enumerate(parameters):
476 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
477 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
478 |     rout.close()
479 | elif mode == "single": # single mode
480 |     out_path = "../exp-data/win_waterlevel_"+advertiser+"_ref="+str(ref)+"_p="+str(para_gamma)+".tsv"
481 |     control(cntr_rounds, ref, para_gamma, out_path)
482 |     report_path = "../report/report-ecpc-waterlevel-single.tsv"
483 |     parameter = ""+advertiser+"\t"+str(cntr_rounds)+"\t"+str(basebid)+"\t"+str(ref)+"\t" + \
484 |                 str(para_gamma)+"\t"+str(settle_con)+"\t"+str(rise_con)
485 |     parameters.append(parameter)
486 |     rout = open(report_path, 'w')
487 |     rout.write("campaign\ttotal-rounds\tbase-bid\tref\tgamma\tsettle-con\trise-con\trise-time\tsettling-time\tovershoot\trmse-ss\tsd-ss\n")
488 |     for idx, val in enumerate(parameters):
489 |         rout.write(val+"\t"+str(rise_time[idx])+"\t"+str(settling_time[idx])+"\t"+str(overshoot[idx])+"\t" + \
490 |                    str(rmse_ss[idx]) + "\t" + str(sd_ss[idx]))
491 |     rout.close()
492 | else:
493 |     print "wrong mode entered"


--------------------------------------------------------------------------------
/python/lryzx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import random
  4 | import math
  5 | import operator
  6 | from sklearn.metrics import roc_auc_score
  7 | from sklearn.metrics import mean_squared_error
  8 | 
  9 | 
 10 | bufferCaseNum = 1000000
 11 | eta = 0.01
 12 | lamb = 1E-6
 13 | featWeight = {}
 14 | trainRounds = 10
 15 | random.seed(10)
 16 | initWeight = 0.05
 17 | 
 18 | def nextInitWeight():
 19 |     return (random.random() - 0.5) * initWeight
 20 | 
 21 | def ints(s):
 22 |     res = []
 23 |     for ss in s:
 24 |         res.append(int(ss))
 25 |     return res
 26 | 
 27 | def sigmoid(p):
 28 |     return 1.0 / (1.0 + math.exp(-p))
 29 | 
 30 | 
 31 | if len(sys.argv) < 3:
 32 |     print 'Usage: train.yzx.txt test.yzx.txt'
 33 |     exit(-1)
 34 | 
 35 | 
 36 | for round in range(0, trainRounds):
 37 |     # train for this round
 38 |     fi = open(sys.argv[1], 'r')
 39 |     lineNum = 0
 40 |     trainData = []
 41 |     for line in fi:
 42 |         lineNum = (lineNum + 1) % bufferCaseNum
 43 |         trainData.append(ints(line.replace(":1", "").split()))
 44 |         if lineNum == 0:
 45 |             for data in trainData:
 46 |                 clk = data[0]
 47 |                 mp = data[1]
 48 |                 fsid = 2 # feature start id
 49 |                 # predict
 50 |                 pred = 0.0
 51 |                 for i in range(fsid, len(data)):
 52 |                     feat = data[i]
 53 |                     if feat not in featWeight:
 54 |                         featWeight[feat] = nextInitWeight()
 55 |                     pred += featWeight[feat]
 56 |                 pred = sigmoid(pred)
 57 |                 # start to update weight
 58 |                 # w_i = w_i + learning_rate * [ (y - p) * x_i - lamb * w_i ] 
 59 |                 for i in range(fsid, len(data)):
 60 |                     feat = data[i]
 61 |                     featWeight[feat] = featWeight[feat] * (1 - lamb) + eta * (clk - pred)
 62 |             trainData = []
 63 | 
 64 |     if len(trainData) > 0:
 65 |         for data in trainData:
 66 |             clk = data[0]
 67 |             mp = data[1]
 68 |             fsid = 2 # feature start id
 69 |             # predict
 70 |             pred = 0.0
 71 |             for i in range(fsid, len(data)):
 72 |                 feat = data[i]
 73 |                 if feat not in featWeight:
 74 |                     featWeight[feat] = nextInitWeight()
 75 |                 pred += featWeight[feat]
 76 |             pred = sigmoid(pred)
 77 |             # start to update weight
 78 |             # w_i = w_i + learning_rate * [ (y - p) * x_i - lamb * w_i ]
 79 |             for i in range(fsid, len(data)):
 80 |                 feat = data[i]
 81 |                 featWeight[feat] = featWeight[feat] * (1 - lamb) + eta * (clk - pred)
 82 |     fi.close()
 83 | 
 84 |     # test for this round
 85 |     y = []
 86 |     yp = []
 87 |     fi = open(sys.argv[2], 'r')
 88 |     for line in fi:
 89 |         data = ints(line.replace(":1", "").split())
 90 |         clk = data[0]
 91 |         mp = data[1]
 92 |         fsid = 2 # feature start id
 93 |         pred = 0.0
 94 |         for i in range(fsid, len(data)):
 95 |             feat = data[i]
 96 |             if feat in featWeight:
 97 |                 pred += featWeight[feat]
 98 |         pred = sigmoid(pred)
 99 |         y.append(clk)
100 |         yp.append(pred)
101 |     fi.close()
102 |     auc = roc_auc_score(y, yp)
103 |     rmse = math.sqrt(mean_squared_error(y, yp))
104 |     print str(round) + '\t' + str(auc) + '\t' + str(rmse)
105 | 
106 | # output the weights
107 | fo = open(sys.argv[1] + '.lr.weight', 'w')
108 | featvalue = sorted(featWeight.iteritems(), key=operator.itemgetter(0))
109 | for fv in featvalue:
110 |     fo.write(str(fv[0]) + '\t' + str(fv[1]) + '\n')
111 | fo.close()
112 | 
113 | 
114 | # output the prediction
115 | fi = open(sys.argv[2], 'r')
116 | fo = open(sys.argv[2] + '.lr.pred', 'w')
117 | 
118 | for line in fi:
119 |     data = ints(line.replace(":1", "").split())
120 |     pred = 0.0
121 |     for i in range(1, len(data)):
122 |         feat = data[i]
123 |         if feat in featWeight:
124 |             pred += featWeight[feat]
125 |     pred = sigmoid(pred)
126 |     fo.write(str(pred) + '\n')    
127 | fo.close()
128 | fi.close()
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/python/make-yzpc.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import random
  3 | import math
  4 | from sklearn.metrics import roc_auc_score
  5 | from sklearn.metrics import mean_squared_error
  6 | 
  7 | ADVERTISERS = ["1458", "2259", "2261", "2821", "2997", "3358", "3386", "3427", "3476"]
  8 | chanel_index = 11
  9 | 
 10 | def ints(s):
 11 |     res = []
 12 |     for ss in s:
 13 |         res.append(int(ss))
 14 |     return res
 15 | 
 16 | def sigmoid(p):
 17 |     return 1.0 / (1.0 + math.exp(-p))
 18 | 
 19 | def estimator_lr(feats):
 20 |     pred = 0.0
 21 |     for feat in feats:
 22 |         if feat in featWeight:
 23 |             pred += featWeight[feat]
 24 |     pred = sigmoid(pred)
 25 |     return pred
 26 | 
 27 | for adv in ADVERTISERS:
 28 |     mplist = []
 29 |     y = []
 30 |     yp = []
 31 |     mplist_train = []
 32 |     y_train = []
 33 |     yp_train = []
 34 |     featWeight = {}
 35 |     exchange_train = []
 36 |     exchange = []
 37 | 
 38 | 
 39 |     #initialize the lr
 40 |     fi = open("../../make-ipinyou-data/"+adv+"/train.yzx.txt.lr.weight", 'r')
 41 |     for line in fi:
 42 |         s = line.strip().split()
 43 |         feat = int(s[0])
 44 |         weight = float(s[1])
 45 |         featWeight[feat] = weight
 46 |     fi.close()
 47 | 
 48 |     fi = open("../../make-ipinyou-data/"+adv+"/test.yzx.txt", 'r')
 49 |     print "read " + adv + " test yzx"
 50 |     for line in fi:
 51 |         data = ints(line.strip().replace(":1", "").split())
 52 |         clk = data[0]
 53 |         mp = data[1]
 54 |         fsid = 2 # feature start id
 55 |         feats = data[fsid:]
 56 |         pred = estimator_lr(feats)
 57 |         y.append(clk)
 58 |         yp.append(pred)
 59 |         mplist.append(mp)
 60 |     fi.close()
 61 | 
 62 |     fi = open("../../make-ipinyou-data/"+adv+"/train.yzx.txt", 'r')
 63 |     print "read " + adv + " train yzx"
 64 |     for line in fi:
 65 |         data = ints(line.strip().replace(":1", "").split())
 66 |         clk = data[0]
 67 |         mp = data[1]
 68 |         fsid = 2 # feature start id
 69 |         feats = data[fsid:]
 70 |         pred = estimator_lr(feats)
 71 |         y_train.append(clk)
 72 |         yp_train.append(pred)
 73 |         mplist_train.append(mp)
 74 |     fi.close()
 75 | 
 76 |     fi = open("../../make-ipinyou-data/"+adv+"/train.log.txt", 'r')
 77 |     print "read " + adv + " train log"
 78 |     for i, line in enumerate(fi):
 79 |         if i > 0:
 80 |             data = line.split("\t")
 81 |             exchange_train.append(data[chanel_index])
 82 |     fi.close
 83 | 
 84 |     fi = open("../../make-ipinyou-data/"+adv+"/test.log.txt", 'r')
 85 |     print "read " + adv + " test log"
 86 |     for i, line in enumerate(fi):
 87 |         if i > 0:
 88 |             data = line.split("\t")
 89 |             exchange.append(data[chanel_index])
 90 |     fi.close
 91 | 
 92 |     fo = open("../../make-ipinyou-data/"+adv+"/train.yzpc.txt", 'w')
 93 |     print "write " + adv + " train yzpc"
 94 |     for i, val in enumerate(exchange_train):
 95 |         if i == len(exchange_train) - 1:
 96 |             fo.write("%d\t%d\t%f\t%s" % (y_train[i], mplist_train[i], yp_train[i], val))
 97 |         else:
 98 |             fo.write("%d\t%d\t%f\t%s\n" % (y_train[i], mplist_train[i], yp_train[i], val))
 99 |     fo.close()
100 | 
101 |     fo = open("../../make-ipinyou-data/"+adv+"/test.yzpc.txt", 'w')
102 |     print "write " + adv + " test yzpc"
103 |     for i, val in enumerate(exchange):
104 |         if i == len(exchange_train) - 1:
105 |             fo.write("%d\t%d\t%f\t%s" % (y[i], mplist[i], yp[i], val))
106 |         else:
107 |             fo.write("%d\t%d\t%f\t%s\n" % (y[i], mplist[i], yp[i], val))
108 |     fo.close()


--------------------------------------------------------------------------------
/report/example-report-single.tsv:
--------------------------------------------------------------------------------
1 | campaign	total-rounds	base-bid	ref	p	i	d	settle-con	rise-con	rise-time	settling-time	overshoot	rmse-ss	sd-ss
2 | 2821	40	90	120000	0.002	1e-06	1e-06	0.1	0.9	4	40	8.53885964912	0.184023765432	0.0


--------------------------------------------------------------------------------
/report/example-report-test.tsv:
--------------------------------------------------------------------------------
1 | campaign	total-rounds	base-bid	ref	p	i	d	settle-con	rise-con	rise-time	settling-time	overshoot	rmse-ss	sd-ss
2 | 1458	40	69	40000	0.003	1e-06	0.0001	0.1	0.9	2	31	16.5125	0.059622299085	0.0550374573616


--------------------------------------------------------------------------------
/scripts/pid-bid-optimisation.sh:
--------------------------------------------------------------------------------
1 | advs="1458" # "2259 2261 2821 2997 3358 3386 3427 3476"
2 | for adv in $advs; do
3 |     python ../python/control-ecpc-multiex-pid-bid-optimisation.py $adv
4 | done
5 | 


--------------------------------------------------------------------------------
/scripts/run_demo_example.sh:
--------------------------------------------------------------------------------
1 | python ../python/control-ecpc-pid-example.py
2 | 
3 | 


--------------------------------------------------------------------------------
/scripts/run_ipinyou_campaign.sh:
--------------------------------------------------------------------------------
1 | python ../python/control-ecpc-pid.py
2 | 
3 | 


--------------------------------------------------------------------------------
/scripts/run_lr.sh:
--------------------------------------------------------------------------------
1 | advs="1458 2259 2261 2821 2997 3358 3386 3427 3476"
2 | folder=../../make-ipinyou-data
3 | figurefolder=../r-figures
4 | for adv in $advs; do
5 |     echo $adv
6 |     python ../python/lryzx.py $folder/$adv/train.yzx.txt $folder/$adv/test.yzx.txt
7 | done
8 | 


--------------------------------------------------------------------------------