├── .gitignore
├── Horses.ipynb
├── NYSE.ipynb
├── Provably Fair Machine Learning.ipynb
├── README.md
├── RidgeFair.ipynb
├── Yahoo Testing.ipynb
├── Yahoo.ipynb
├── evaluation_T.py
├── evaluation_d.py
├── evaluation_k.py
├── fairml.py
├── figures_T_50x
├── avg_regret_diff.png
├── avg_regret_ic.png
├── avg_regret_ti.png
├── cum_regret_diff.png
├── cum_regret_ic.png
├── cum_regret_ti.png
├── final_regret_diff.png
├── final_regret_ic.png
└── final_regret_ti.png
├── figures_d_50x
├── avg_regret_diff.png
├── avg_regret_ic.png
├── avg_regret_ti.png
├── cum_regret_diff.png
├── cum_regret_ic.png
├── cum_regret_ti.png
├── final_regret_diff.png
├── final_regret_ic.png
└── final_regret_ti.png
├── figures_k_50x
├── avg_regret_diff.png
├── avg_regret_ic.png
├── avg_regret_ti.png
├── cum_regret_diff.png
├── cum_regret_ic.png
├── cum_regret_ti.png
├── final_regret_diff.png
├── final_regret_ic.png
└── final_regret_ti.png
├── paper
├── .gitignore
├── Makefile
├── acl.bst
├── acl2015.sty
├── figures
│ ├── T_50x_avg_regret_diff.png
│ ├── T_50x_avg_regret_ic.png
│ ├── T_50x_avg_regret_ti.png
│ ├── T_50x_cum_regret_diff.png
│ ├── T_50x_cum_regret_ic.png
│ ├── T_50x_cum_regret_ti.png
│ ├── T_50x_final_regret_diff.png
│ ├── T_50x_final_regret_ic.png
│ ├── T_50x_final_regret_ti.png
│ ├── d_50x_avg_regret_diff.png
│ ├── d_50x_avg_regret_ic.png
│ ├── d_50x_avg_regret_ti.png
│ ├── d_50x_cum_regret_diff.png
│ ├── d_50x_cum_regret_ic.png
│ ├── d_50x_cum_regret_ti.png
│ ├── d_50x_final_regret_diff.png
│ ├── d_50x_final_regret_ic.png
│ ├── d_50x_final_regret_ti.png
│ ├── k_50x_avg_regret_diff.png
│ ├── k_50x_avg_regret_ic.png
│ ├── k_50x_avg_regret_ti.png
│ ├── k_50x_cum_regret_diff.png
│ ├── k_50x_cum_regret_ic.png
│ ├── k_50x_cum_regret_ti.png
│ ├── k_50x_final_regret_diff.png
│ ├── k_50x_final_regret_ic.png
│ ├── k_50x_final_regret_ti.png
│ ├── yahoo-interval-chaining.png
│ └── yahoo-top-interval.png
├── paper.bib
├── paper.pdf
└── paper.tex
├── references
└── rawlsian_fairness.pdf
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | fairml
2 |
--------------------------------------------------------------------------------
/Horses.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import numpy as np"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 53,
18 | "metadata": {
19 | "collapsed": false
20 | },
21 | "outputs": [],
22 | "source": [
23 | "# data_dir = './horses/'\n",
24 | "# horses_df = pd.read_csv(data_dir + 'horses.csv',\n",
25 | "# usecols=['age', 'sex_id', 'prize_money'])\n",
26 | "# horse_sexes_df = pd.read_csv(data_dir + 'horse_sexes.csv').set_index('id')\n",
27 | "# riders_df = pd.read_csv(data_dir + 'riders.csv').set_index('id')"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 182,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [],
37 | "source": [
38 | "# Dataset Source: https://www.kaggle.com/gunner38/horseracing\n",
39 | "tips_df = pd.read_csv('horse_tips.csv', encoding='latin1')\n",
40 | "tips_df['Bet Type'] = tips_df['Bet Type'].apply(lambda x : 1 if x == 'Win' else 0)\n",
41 | "tips_df['Result'] = tips_df['Result'].apply(lambda x : 1 if x == 'Win' else 0)\n",
42 | "\n",
43 | "horses = np.sort(tips_df['Horse'].unique())\n",
44 | "tracks = np.sort(tips_df['Track'].unique())\n",
45 | "\n",
46 | "tips_df['Horse'] = tips_df['Horse'].apply(lambda x : np.where(horses == x)[0][0])\n",
47 | "tips_df['Track'] = tips_df['Track'].apply(lambda x : np.where(tracks == x)[0][0])"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 97,
53 | "metadata": {
54 | "collapsed": false
55 | },
56 | "outputs": [],
57 | "source": [
58 | "tipsters = tips_df['Tipster'].unique()"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 151,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/html": [
71 | "
\n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " | \n",
76 | " Success Rate | \n",
77 | "
\n",
78 | " \n",
79 | " \n",
80 | " \n",
81 | " Tipster G | \n",
82 | " 0.672414 | \n",
83 | "
\n",
84 | " \n",
85 | " Tipster C | \n",
86 | " 0.616226 | \n",
87 | "
\n",
88 | " \n",
89 | " Tipster F | \n",
90 | " 0.520548 | \n",
91 | "
\n",
92 | " \n",
93 | " Tipster M | \n",
94 | " 0.520376 | \n",
95 | "
\n",
96 | " \n",
97 | " Tipster N | \n",
98 | " 0.496386 | \n",
99 | "
\n",
100 | " \n",
101 | "
\n",
102 | "
"
103 | ],
104 | "text/plain": [
105 | " Success Rate\n",
106 | "Tipster G 0.672414\n",
107 | "Tipster C 0.616226\n",
108 | "Tipster F 0.520548\n",
109 | "Tipster M 0.520376\n",
110 | "Tipster N 0.496386"
111 | ]
112 | },
113 | "execution_count": 151,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "success_rates = dict()\n",
120 | "for tipster in tipsters:\n",
121 | " successes = tips_df[(tips_df['Tipster'] == tipster) & (tips_df['Bet Type'] == tips_df['Result'])].shape[0]\n",
122 | " total = tips_df[(tips_df['Tipster'] == tipster)].shape[0]\n",
123 | " success_rates[tipster] = successes/total\n",
124 | "successes_df = pd.DataFrame(pd.Series(success_rates), columns=['Success Rate']).sort_values(by='Success Rate', ascending=False)\n",
125 | "successes_df.head(5)"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 149,
131 | "metadata": {
132 | "collapsed": false
133 | },
134 | "outputs": [],
135 | "source": [
136 | "X_G = tips_df[tips_df.Tipster=='Tipster G']\n",
137 | "X_C = tips_df[tips_df.Tipster=='Tipster C']\n",
138 | "X_F = tips_df[tips_df.Tipster=='Tipster F']\n",
139 | "X_M = tips_df[tips_df.Tipster=='Tipster M']\n",
140 | "X_N = tips_df[tips_df.Tipster=='Tipster N']"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "We model the online learning algorithm as follows.\n",
148 | "\n",
149 | "In each round, we are given a betting scheme from each tipster, drawn uniformly at random from the pool of possible schemes. We model the quality function (the return) using OLS on the features. The true return is computed as if ten dollars were bet in any scheme from the odds and the true result (i.e. 10 * the odds)."
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 183,
155 | "metadata": {
156 | "collapsed": false
157 | },
158 | "outputs": [
159 | {
160 | "data": {
161 | "text/html": [
162 | "\n",
163 | "
\n",
164 | " \n",
165 | " \n",
166 | " | \n",
167 | " UID | \n",
168 | " ID | \n",
169 | " Tipster | \n",
170 | " Date | \n",
171 | " Track | \n",
172 | " Horse | \n",
173 | " Bet Type | \n",
174 | " Odds | \n",
175 | " Result | \n",
176 | " TipsterActive | \n",
177 | "
\n",
178 | " \n",
179 | " \n",
180 | " \n",
181 | " 0 | \n",
182 | " 1 | \n",
183 | " 1 | \n",
184 | " Tipster A | \n",
185 | " 24/07/2015 | \n",
186 | " 2 | \n",
187 | " 5158 | \n",
188 | " 1 | \n",
189 | " 8.00 | \n",
190 | " 0 | \n",
191 | " True | \n",
192 | "
\n",
193 | " \n",
194 | " 1 | \n",
195 | " 2 | \n",
196 | " 2 | \n",
197 | " Tipster A | \n",
198 | " 24/07/2015 | \n",
199 | " 96 | \n",
200 | " 13108 | \n",
201 | " 1 | \n",
202 | " 4.50 | \n",
203 | " 0 | \n",
204 | " True | \n",
205 | "
\n",
206 | " \n",
207 | " 2 | \n",
208 | " 3 | \n",
209 | " 3 | \n",
210 | " Tipster A | \n",
211 | " 24/07/2015 | \n",
212 | " 114 | \n",
213 | " 13411 | \n",
214 | " 1 | \n",
215 | " 7.00 | \n",
216 | " 0 | \n",
217 | " True | \n",
218 | "
\n",
219 | " \n",
220 | " 3 | \n",
221 | " 4 | \n",
222 | " 4 | \n",
223 | " Tipster A | \n",
224 | " 24/07/2015 | \n",
225 | " 74 | \n",
226 | " 8976 | \n",
227 | " 1 | \n",
228 | " 5.00 | \n",
229 | " 0 | \n",
230 | " True | \n",
231 | "
\n",
232 | " \n",
233 | " 4 | \n",
234 | " 5 | \n",
235 | " 5 | \n",
236 | " Tipster A | \n",
237 | " 25/07/2015 | \n",
238 | " 2 | \n",
239 | " 10554 | \n",
240 | " 1 | \n",
241 | " 4.33 | \n",
242 | " 1 | \n",
243 | " True | \n",
244 | "
\n",
245 | " \n",
246 | " 5 | \n",
247 | " 6 | \n",
248 | " 6 | \n",
249 | " Tipster A | \n",
250 | " 25/07/2015 | \n",
251 | " 114 | \n",
252 | " 327 | \n",
253 | " 1 | \n",
254 | " 6.00 | \n",
255 | " 0 | \n",
256 | " True | \n",
257 | "
\n",
258 | " \n",
259 | " 6 | \n",
260 | " 7 | \n",
261 | " 7 | \n",
262 | " Tipster A | \n",
263 | " 25/07/2015 | \n",
264 | " 73 | \n",
265 | " 9791 | \n",
266 | " 1 | \n",
267 | " 6.00 | \n",
268 | " 0 | \n",
269 | " True | \n",
270 | "
\n",
271 | " \n",
272 | " 7 | \n",
273 | " 8 | \n",
274 | " 8 | \n",
275 | " Tipster A | \n",
276 | " 25/07/2015 | \n",
277 | " 62 | \n",
278 | " 2019 | \n",
279 | " 1 | \n",
280 | " 6.00 | \n",
281 | " 0 | \n",
282 | " True | \n",
283 | "
\n",
284 | " \n",
285 | " 8 | \n",
286 | " 9 | \n",
287 | " 9 | \n",
288 | " Tipster A | \n",
289 | " 26/07/2015 | \n",
290 | " 12 | \n",
291 | " 12101 | \n",
292 | " 1 | \n",
293 | " 5.50 | \n",
294 | " 0 | \n",
295 | " True | \n",
296 | "
\n",
297 | " \n",
298 | " 9 | \n",
299 | " 10 | \n",
300 | " 10 | \n",
301 | " Tipster A | \n",
302 | " 26/07/2015 | \n",
303 | " 80 | \n",
304 | " 115 | \n",
305 | " 1 | \n",
306 | " 2.00 | \n",
307 | " 0 | \n",
308 | " True | \n",
309 | "
\n",
310 | " \n",
311 | " 10 | \n",
312 | " 11 | \n",
313 | " 11 | \n",
314 | " Tipster A | \n",
315 | " 26/07/2015 | \n",
316 | " 104 | \n",
317 | " 8878 | \n",
318 | " 0 | \n",
319 | " 10.00 | \n",
320 | " 0 | \n",
321 | " True | \n",
322 | "
\n",
323 | " \n",
324 | " 11 | \n",
325 | " 12 | \n",
326 | " 12 | \n",
327 | " Tipster A | \n",
328 | " 01/08/2015 | \n",
329 | " 96 | \n",
330 | " 5508 | \n",
331 | " 1 | \n",
332 | " 8.00 | \n",
333 | " 0 | \n",
334 | " True | \n",
335 | "
\n",
336 | " \n",
337 | " 12 | \n",
338 | " 13 | \n",
339 | " 13 | \n",
340 | " Tipster A | \n",
341 | " 01/08/2015 | \n",
342 | " 47 | \n",
343 | " 4484 | \n",
344 | " 1 | \n",
345 | " 6.00 | \n",
346 | " 0 | \n",
347 | " True | \n",
348 | "
\n",
349 | " \n",
350 | " 13 | \n",
351 | " 14 | \n",
352 | " 14 | \n",
353 | " Tipster A | \n",
354 | " 02/08/2015 | \n",
355 | " 18 | \n",
356 | " 6602 | \n",
357 | " 1 | \n",
358 | " 6.00 | \n",
359 | " 0 | \n",
360 | " True | \n",
361 | "
\n",
362 | " \n",
363 | " 14 | \n",
364 | " 15 | \n",
365 | " 15 | \n",
366 | " Tipster A | \n",
367 | " 04/08/2015 | \n",
368 | " 85 | \n",
369 | " 15390 | \n",
370 | " 1 | \n",
371 | " 3.25 | \n",
372 | " 1 | \n",
373 | " True | \n",
374 | "
\n",
375 | " \n",
376 | " 15 | \n",
377 | " 16 | \n",
378 | " 16 | \n",
379 | " Tipster A | \n",
380 | " 04/08/2015 | \n",
381 | " 14 | \n",
382 | " 7851 | \n",
383 | " 1 | \n",
384 | " 2.75 | \n",
385 | " 0 | \n",
386 | " True | \n",
387 | "
\n",
388 | " \n",
389 | " 16 | \n",
390 | " 17 | \n",
391 | " 17 | \n",
392 | " Tipster A | \n",
393 | " 04/08/2015 | \n",
394 | " 16 | \n",
395 | " 1554 | \n",
396 | " 1 | \n",
397 | " 3.50 | \n",
398 | " 0 | \n",
399 | " True | \n",
400 | "
\n",
401 | " \n",
402 | " 17 | \n",
403 | " 18 | \n",
404 | " 18 | \n",
405 | " Tipster A | \n",
406 | " 05/08/2015 | \n",
407 | " 11 | \n",
408 | " 12537 | \n",
409 | " 1 | \n",
410 | " 3.25 | \n",
411 | " 0 | \n",
412 | " True | \n",
413 | "
\n",
414 | " \n",
415 | " 18 | \n",
416 | " 19 | \n",
417 | " 19 | \n",
418 | " Tipster A | \n",
419 | " 05/08/2015 | \n",
420 | " 73 | \n",
421 | " 10307 | \n",
422 | " 1 | \n",
423 | " 3.25 | \n",
424 | " 0 | \n",
425 | " True | \n",
426 | "
\n",
427 | " \n",
428 | " 19 | \n",
429 | " 20 | \n",
430 | " 20 | \n",
431 | " Tipster A | \n",
432 | " 06/08/2015 | \n",
433 | " 111 | \n",
434 | " 6326 | \n",
435 | " 0 | \n",
436 | " 11.00 | \n",
437 | " 0 | \n",
438 | " True | \n",
439 | "
\n",
440 | " \n",
441 | " 20 | \n",
442 | " 21 | \n",
443 | " 21 | \n",
444 | " Tipster A | \n",
445 | " 14/08/2015 | \n",
446 | " 74 | \n",
447 | " 3358 | \n",
448 | " 1 | \n",
449 | " 1.80 | \n",
450 | " 0 | \n",
451 | " True | \n",
452 | "
\n",
453 | " \n",
454 | " 21 | \n",
455 | " 22 | \n",
456 | " 22 | \n",
457 | " Tipster A | \n",
458 | " 15/08/2015 | \n",
459 | " 72 | \n",
460 | " 15487 | \n",
461 | " 1 | \n",
462 | " 4.00 | \n",
463 | " 0 | \n",
464 | " True | \n",
465 | "
\n",
466 | " \n",
467 | " 22 | \n",
468 | " 23 | \n",
469 | " 23 | \n",
470 | " Tipster A | \n",
471 | " 15/08/2015 | \n",
472 | " 72 | \n",
473 | " 2269 | \n",
474 | " 0 | \n",
475 | " 7.50 | \n",
476 | " 0 | \n",
477 | " True | \n",
478 | "
\n",
479 | " \n",
480 | " 23 | \n",
481 | " 24 | \n",
482 | " 24 | \n",
483 | " Tipster A | \n",
484 | " 15/08/2015 | \n",
485 | " 72 | \n",
486 | " 304 | \n",
487 | " 1 | \n",
488 | " 4.50 | \n",
489 | " 1 | \n",
490 | " True | \n",
491 | "
\n",
492 | " \n",
493 | " 24 | \n",
494 | " 25 | \n",
495 | " 25 | \n",
496 | " Tipster A | \n",
497 | " 15/08/2015 | \n",
498 | " 66 | \n",
499 | " 14466 | \n",
500 | " 1 | \n",
501 | " 3.25 | \n",
502 | " 0 | \n",
503 | " True | \n",
504 | "
\n",
505 | " \n",
506 | " 25 | \n",
507 | " 26 | \n",
508 | " 26 | \n",
509 | " Tipster A | \n",
510 | " 16/08/2015 | \n",
511 | " 102 | \n",
512 | " 10427 | \n",
513 | " 0 | \n",
514 | " 8.00 | \n",
515 | " 0 | \n",
516 | " True | \n",
517 | "
\n",
518 | " \n",
519 | " 26 | \n",
520 | " 27 | \n",
521 | " 27 | \n",
522 | " Tipster A | \n",
523 | " 16/08/2015 | \n",
524 | " 102 | \n",
525 | " 6517 | \n",
526 | " 1 | \n",
527 | " 2.50 | \n",
528 | " 0 | \n",
529 | " True | \n",
530 | "
\n",
531 | " \n",
532 | " 27 | \n",
533 | " 28 | \n",
534 | " 28 | \n",
535 | " Tipster A | \n",
536 | " 18/08/2015 | \n",
537 | " 59 | \n",
538 | " 4285 | \n",
539 | " 1 | \n",
540 | " 11.00 | \n",
541 | " 1 | \n",
542 | " True | \n",
543 | "
\n",
544 | " \n",
545 | " 28 | \n",
546 | " 29 | \n",
547 | " 29 | \n",
548 | " Tipster A | \n",
549 | " 18/08/2015 | \n",
550 | " 16 | \n",
551 | " 5607 | \n",
552 | " 0 | \n",
553 | " 11.00 | \n",
554 | " 1 | \n",
555 | " True | \n",
556 | "
\n",
557 | " \n",
558 | " 29 | \n",
559 | " 30 | \n",
560 | " 30 | \n",
561 | " Tipster A | \n",
562 | " 18/08/2015 | \n",
563 | " 16 | \n",
564 | " 14522 | \n",
565 | " 1 | \n",
566 | " 3.25 | \n",
567 | " 0 | \n",
568 | " True | \n",
569 | "
\n",
570 | " \n",
571 | " ... | \n",
572 | " ... | \n",
573 | " ... | \n",
574 | " ... | \n",
575 | " ... | \n",
576 | " ... | \n",
577 | " ... | \n",
578 | " ... | \n",
579 | " ... | \n",
580 | " ... | \n",
581 | " ... | \n",
582 | "
\n",
583 | " \n",
584 | " 38218 | \n",
585 | " 38219 | \n",
586 | " 580 | \n",
587 | " Tipster E1 | \n",
588 | " 30/01/2016 | \n",
589 | " 26 | \n",
590 | " 7209 | \n",
591 | " 1 | \n",
592 | " 3.00 | \n",
593 | " 0 | \n",
594 | " False | \n",
595 | "
\n",
596 | " \n",
597 | " 38219 | \n",
598 | " 38220 | \n",
599 | " 581 | \n",
600 | " Tipster E1 | \n",
601 | " 03/02/2016 | \n",
602 | " 73 | \n",
603 | " 14716 | \n",
604 | " 0 | \n",
605 | " 34.00 | \n",
606 | " 0 | \n",
607 | " False | \n",
608 | "
\n",
609 | " \n",
610 | " 38220 | \n",
611 | " 38221 | \n",
612 | " 582 | \n",
613 | " Tipster E1 | \n",
614 | " 10/02/2016 | \n",
615 | " 12 | \n",
616 | " 14716 | \n",
617 | " 0 | \n",
618 | " 15.00 | \n",
619 | " 0 | \n",
620 | " False | \n",
621 | "
\n",
622 | " \n",
623 | " 38221 | \n",
624 | " 38222 | \n",
625 | " 583 | \n",
626 | " Tipster E1 | \n",
627 | " 13/02/2016 | \n",
628 | " 111 | \n",
629 | " 1384 | \n",
630 | " 0 | \n",
631 | " 13.00 | \n",
632 | " 1 | \n",
633 | " False | \n",
634 | "
\n",
635 | " \n",
636 | " 38222 | \n",
637 | " 38223 | \n",
638 | " 584 | \n",
639 | " Tipster E1 | \n",
640 | " 14/02/2016 | \n",
641 | " 92 | \n",
642 | " 4982 | \n",
643 | " 1 | \n",
644 | " 4.50 | \n",
645 | " 0 | \n",
646 | " False | \n",
647 | "
\n",
648 | " \n",
649 | " 38223 | \n",
650 | " 38224 | \n",
651 | " 585 | \n",
652 | " Tipster E1 | \n",
653 | " 25/02/2016 | \n",
654 | " 47 | \n",
655 | " 13367 | \n",
656 | " 0 | \n",
657 | " 11.00 | \n",
658 | " 0 | \n",
659 | " False | \n",
660 | "
\n",
661 | " \n",
662 | " 38224 | \n",
663 | " 38225 | \n",
664 | " 586 | \n",
665 | " Tipster E1 | \n",
666 | " 13/03/2016 | \n",
667 | " 106 | \n",
668 | " 2723 | \n",
669 | " 1 | \n",
670 | " 5.50 | \n",
671 | " 0 | \n",
672 | " False | \n",
673 | "
\n",
674 | " \n",
675 | " 38225 | \n",
676 | " 38226 | \n",
677 | " 587 | \n",
678 | " Tipster E1 | \n",
679 | " 13/03/2016 | \n",
680 | " 53 | \n",
681 | " 8120 | \n",
682 | " 0 | \n",
683 | " 21.00 | \n",
684 | " 0 | \n",
685 | " False | \n",
686 | "
\n",
687 | " \n",
688 | " 38226 | \n",
689 | " 38227 | \n",
690 | " 588 | \n",
691 | " Tipster E1 | \n",
692 | " 15/03/2016 | \n",
693 | " 17 | \n",
694 | " 480 | \n",
695 | " 1 | \n",
696 | " 5.00 | \n",
697 | " 1 | \n",
698 | " False | \n",
699 | "
\n",
700 | " \n",
701 | " 38227 | \n",
702 | " 38228 | \n",
703 | " 589 | \n",
704 | " Tipster E1 | \n",
705 | " 15/03/2016 | \n",
706 | " 17 | \n",
707 | " 2365 | \n",
708 | " 0 | \n",
709 | " 26.00 | \n",
710 | " 0 | \n",
711 | " False | \n",
712 | "
\n",
713 | " \n",
714 | " 38228 | \n",
715 | " 38229 | \n",
716 | " 590 | \n",
717 | " Tipster E1 | \n",
718 | " 15/03/2016 | \n",
719 | " 17 | \n",
720 | " 9238 | \n",
721 | " 0 | \n",
722 | " 67.00 | \n",
723 | " 0 | \n",
724 | " False | \n",
725 | "
\n",
726 | " \n",
727 | " 38229 | \n",
728 | " 38230 | \n",
729 | " 591 | \n",
730 | " Tipster E1 | \n",
731 | " 17/03/2016 | \n",
732 | " 17 | \n",
733 | " 1722 | \n",
734 | " 1 | \n",
735 | " 6.50 | \n",
736 | " 1 | \n",
737 | " False | \n",
738 | "
\n",
739 | " \n",
740 | " 38230 | \n",
741 | " 38231 | \n",
742 | " 592 | \n",
743 | " Tipster E1 | \n",
744 | " 17/03/2016 | \n",
745 | " 16 | \n",
746 | " 2133 | \n",
747 | " 1 | \n",
748 | " 6.50 | \n",
749 | " 0 | \n",
750 | " False | \n",
751 | "
\n",
752 | " \n",
753 | " 38231 | \n",
754 | " 38232 | \n",
755 | " 593 | \n",
756 | " Tipster E1 | \n",
757 | " 18/03/2016 | \n",
758 | " 17 | \n",
759 | " 3129 | \n",
760 | " 0 | \n",
761 | " 17.00 | \n",
762 | " 0 | \n",
763 | " False | \n",
764 | "
\n",
765 | " \n",
766 | " 38232 | \n",
767 | " 38233 | \n",
768 | " 594 | \n",
769 | " Tipster E1 | \n",
770 | " 18/03/2016 | \n",
771 | " 17 | \n",
772 | " 8066 | \n",
773 | " 1 | \n",
774 | " 8.00 | \n",
775 | " 0 | \n",
776 | " False | \n",
777 | "
\n",
778 | " \n",
779 | " 38233 | \n",
780 | " 38234 | \n",
781 | " 595 | \n",
782 | " Tipster E1 | \n",
783 | " 18/03/2016 | \n",
784 | " 17 | \n",
785 | " 11674 | \n",
786 | " 0 | \n",
787 | " 11.00 | \n",
788 | " 1 | \n",
789 | " False | \n",
790 | "
\n",
791 | " \n",
792 | " 38234 | \n",
793 | " 38235 | \n",
794 | " 596 | \n",
795 | " Tipster E1 | \n",
796 | " 19/03/2016 | \n",
797 | " 54 | \n",
798 | " 8847 | \n",
799 | " 0 | \n",
800 | " 11.00 | \n",
801 | " 1 | \n",
802 | " False | \n",
803 | "
\n",
804 | " \n",
805 | " 38235 | \n",
806 | " 38236 | \n",
807 | " 597 | \n",
808 | " Tipster E1 | \n",
809 | " 20/03/2016 | \n",
810 | " 12 | \n",
811 | " 9366 | \n",
812 | " 1 | \n",
813 | " 5.00 | \n",
814 | " 0 | \n",
815 | " False | \n",
816 | "
\n",
817 | " \n",
818 | " 38236 | \n",
819 | " 38237 | \n",
820 | " 598 | \n",
821 | " Tipster E1 | \n",
822 | " 26/03/2016 | \n",
823 | " 67 | \n",
824 | " 1628 | \n",
825 | " 1 | \n",
826 | " 7.50 | \n",
827 | " 0 | \n",
828 | " False | \n",
829 | "
\n",
830 | " \n",
831 | " 38237 | \n",
832 | " 38238 | \n",
833 | " 599 | \n",
834 | " Tipster E1 | \n",
835 | " 28/03/2016 | \n",
836 | " 35 | \n",
837 | " 11945 | \n",
838 | " 0 | \n",
839 | " 29.00 | \n",
840 | " 0 | \n",
841 | " False | \n",
842 | "
\n",
843 | " \n",
844 | " 38238 | \n",
845 | " 38239 | \n",
846 | " 600 | \n",
847 | " Tipster E1 | \n",
848 | " 30/03/2016 | \n",
849 | " 92 | \n",
850 | " 1436 | \n",
851 | " 1 | \n",
852 | " 5.00 | \n",
853 | " 0 | \n",
854 | " False | \n",
855 | "
\n",
856 | " \n",
857 | " 38239 | \n",
858 | " 38240 | \n",
859 | " 601 | \n",
860 | " Tipster E1 | \n",
861 | " 30/03/2016 | \n",
862 | " 92 | \n",
863 | " 6295 | \n",
864 | " 1 | \n",
865 | " 6.00 | \n",
866 | " 0 | \n",
867 | " False | \n",
868 | "
\n",
869 | " \n",
870 | " 38240 | \n",
871 | " 38241 | \n",
872 | " 602 | \n",
873 | " Tipster E1 | \n",
874 | " 30/03/2016 | \n",
875 | " 34 | \n",
876 | " 407 | \n",
877 | " 1 | \n",
878 | " 4.33 | \n",
879 | " 0 | \n",
880 | " False | \n",
881 | "
\n",
882 | " \n",
883 | " 38241 | \n",
884 | " 38242 | \n",
885 | " 603 | \n",
886 | " Tipster E1 | \n",
887 | " 01/04/2016 | \n",
888 | " 107 | \n",
889 | " 14141 | \n",
890 | " 1 | \n",
891 | " 4.50 | \n",
892 | " 0 | \n",
893 | " False | \n",
894 | "
\n",
895 | " \n",
896 | " 38242 | \n",
897 | " 38243 | \n",
898 | " 604 | \n",
899 | " Tipster E1 | \n",
900 | " 01/04/2016 | \n",
901 | " 111 | \n",
902 | " 5905 | \n",
903 | " 1 | \n",
904 | " 10.00 | \n",
905 | " 0 | \n",
906 | " False | \n",
907 | "
\n",
908 | " \n",
909 | " 38243 | \n",
910 | " 38244 | \n",
911 | " 605 | \n",
912 | " Tipster E1 | \n",
913 | " 02/04/2016 | \n",
914 | " 54 | \n",
915 | " 12946 | \n",
916 | " 1 | \n",
917 | " 7.00 | \n",
918 | " 0 | \n",
919 | " False | \n",
920 | "
\n",
921 | " \n",
922 | " 38244 | \n",
923 | " 38245 | \n",
924 | " 606 | \n",
925 | " Tipster E1 | \n",
926 | " 02/04/2016 | \n",
927 | " 26 | \n",
928 | " 4591 | \n",
929 | " 0 | \n",
930 | " 12.00 | \n",
931 | " 0 | \n",
932 | " False | \n",
933 | "
\n",
934 | " \n",
935 | " 38245 | \n",
936 | " 38246 | \n",
937 | " 607 | \n",
938 | " Tipster E1 | \n",
939 | " 02/04/2016 | \n",
940 | " 26 | \n",
941 | " 6824 | \n",
942 | " 1 | \n",
943 | " 7.00 | \n",
944 | " 0 | \n",
945 | " False | \n",
946 | "
\n",
947 | " \n",
948 | " 38246 | \n",
949 | " 38247 | \n",
950 | " 608 | \n",
951 | " Tipster E1 | \n",
952 | " 02/04/2016 | \n",
953 | " 53 | \n",
954 | " 7068 | \n",
955 | " 1 | \n",
956 | " 4.33 | \n",
957 | " 0 | \n",
958 | " False | \n",
959 | "
\n",
960 | " \n",
961 | " 38247 | \n",
962 | " 38248 | \n",
963 | " 609 | \n",
964 | " Tipster E1 | \n",
965 | " 31/05/2016 | \n",
966 | " 82 | \n",
967 | " 4015 | \n",
968 | " 1 | \n",
969 | " 5.00 | \n",
970 | " 0 | \n",
971 | " False | \n",
972 | "
\n",
973 | " \n",
974 | "
\n",
975 | "
38248 rows × 10 columns
\n",
976 | "
"
977 | ],
978 | "text/plain": [
979 | " UID ID Tipster Date Track Horse Bet Type Odds \\\n",
980 | "0 1 1 Tipster A 24/07/2015 2 5158 1 8.00 \n",
981 | "1 2 2 Tipster A 24/07/2015 96 13108 1 4.50 \n",
982 | "2 3 3 Tipster A 24/07/2015 114 13411 1 7.00 \n",
983 | "3 4 4 Tipster A 24/07/2015 74 8976 1 5.00 \n",
984 | "4 5 5 Tipster A 25/07/2015 2 10554 1 4.33 \n",
985 | "5 6 6 Tipster A 25/07/2015 114 327 1 6.00 \n",
986 | "6 7 7 Tipster A 25/07/2015 73 9791 1 6.00 \n",
987 | "7 8 8 Tipster A 25/07/2015 62 2019 1 6.00 \n",
988 | "8 9 9 Tipster A 26/07/2015 12 12101 1 5.50 \n",
989 | "9 10 10 Tipster A 26/07/2015 80 115 1 2.00 \n",
990 | "10 11 11 Tipster A 26/07/2015 104 8878 0 10.00 \n",
991 | "11 12 12 Tipster A 01/08/2015 96 5508 1 8.00 \n",
992 | "12 13 13 Tipster A 01/08/2015 47 4484 1 6.00 \n",
993 | "13 14 14 Tipster A 02/08/2015 18 6602 1 6.00 \n",
994 | "14 15 15 Tipster A 04/08/2015 85 15390 1 3.25 \n",
995 | "15 16 16 Tipster A 04/08/2015 14 7851 1 2.75 \n",
996 | "16 17 17 Tipster A 04/08/2015 16 1554 1 3.50 \n",
997 | "17 18 18 Tipster A 05/08/2015 11 12537 1 3.25 \n",
998 | "18 19 19 Tipster A 05/08/2015 73 10307 1 3.25 \n",
999 | "19 20 20 Tipster A 06/08/2015 111 6326 0 11.00 \n",
1000 | "20 21 21 Tipster A 14/08/2015 74 3358 1 1.80 \n",
1001 | "21 22 22 Tipster A 15/08/2015 72 15487 1 4.00 \n",
1002 | "22 23 23 Tipster A 15/08/2015 72 2269 0 7.50 \n",
1003 | "23 24 24 Tipster A 15/08/2015 72 304 1 4.50 \n",
1004 | "24 25 25 Tipster A 15/08/2015 66 14466 1 3.25 \n",
1005 | "25 26 26 Tipster A 16/08/2015 102 10427 0 8.00 \n",
1006 | "26 27 27 Tipster A 16/08/2015 102 6517 1 2.50 \n",
1007 | "27 28 28 Tipster A 18/08/2015 59 4285 1 11.00 \n",
1008 | "28 29 29 Tipster A 18/08/2015 16 5607 0 11.00 \n",
1009 | "29 30 30 Tipster A 18/08/2015 16 14522 1 3.25 \n",
1010 | "... ... ... ... ... ... ... ... ... \n",
1011 | "38218 38219 580 Tipster E1 30/01/2016 26 7209 1 3.00 \n",
1012 | "38219 38220 581 Tipster E1 03/02/2016 73 14716 0 34.00 \n",
1013 | "38220 38221 582 Tipster E1 10/02/2016 12 14716 0 15.00 \n",
1014 | "38221 38222 583 Tipster E1 13/02/2016 111 1384 0 13.00 \n",
1015 | "38222 38223 584 Tipster E1 14/02/2016 92 4982 1 4.50 \n",
1016 | "38223 38224 585 Tipster E1 25/02/2016 47 13367 0 11.00 \n",
1017 | "38224 38225 586 Tipster E1 13/03/2016 106 2723 1 5.50 \n",
1018 | "38225 38226 587 Tipster E1 13/03/2016 53 8120 0 21.00 \n",
1019 | "38226 38227 588 Tipster E1 15/03/2016 17 480 1 5.00 \n",
1020 | "38227 38228 589 Tipster E1 15/03/2016 17 2365 0 26.00 \n",
1021 | "38228 38229 590 Tipster E1 15/03/2016 17 9238 0 67.00 \n",
1022 | "38229 38230 591 Tipster E1 17/03/2016 17 1722 1 6.50 \n",
1023 | "38230 38231 592 Tipster E1 17/03/2016 16 2133 1 6.50 \n",
1024 | "38231 38232 593 Tipster E1 18/03/2016 17 3129 0 17.00 \n",
1025 | "38232 38233 594 Tipster E1 18/03/2016 17 8066 1 8.00 \n",
1026 | "38233 38234 595 Tipster E1 18/03/2016 17 11674 0 11.00 \n",
1027 | "38234 38235 596 Tipster E1 19/03/2016 54 8847 0 11.00 \n",
1028 | "38235 38236 597 Tipster E1 20/03/2016 12 9366 1 5.00 \n",
1029 | "38236 38237 598 Tipster E1 26/03/2016 67 1628 1 7.50 \n",
1030 | "38237 38238 599 Tipster E1 28/03/2016 35 11945 0 29.00 \n",
1031 | "38238 38239 600 Tipster E1 30/03/2016 92 1436 1 5.00 \n",
1032 | "38239 38240 601 Tipster E1 30/03/2016 92 6295 1 6.00 \n",
1033 | "38240 38241 602 Tipster E1 30/03/2016 34 407 1 4.33 \n",
1034 | "38241 38242 603 Tipster E1 01/04/2016 107 14141 1 4.50 \n",
1035 | "38242 38243 604 Tipster E1 01/04/2016 111 5905 1 10.00 \n",
1036 | "38243 38244 605 Tipster E1 02/04/2016 54 12946 1 7.00 \n",
1037 | "38244 38245 606 Tipster E1 02/04/2016 26 4591 0 12.00 \n",
1038 | "38245 38246 607 Tipster E1 02/04/2016 26 6824 1 7.00 \n",
1039 | "38246 38247 608 Tipster E1 02/04/2016 53 7068 1 4.33 \n",
1040 | "38247 38248 609 Tipster E1 31/05/2016 82 4015 1 5.00 \n",
1041 | "\n",
1042 | " Result TipsterActive \n",
1043 | "0 0 True \n",
1044 | "1 0 True \n",
1045 | "2 0 True \n",
1046 | "3 0 True \n",
1047 | "4 1 True \n",
1048 | "5 0 True \n",
1049 | "6 0 True \n",
1050 | "7 0 True \n",
1051 | "8 0 True \n",
1052 | "9 0 True \n",
1053 | "10 0 True \n",
1054 | "11 0 True \n",
1055 | "12 0 True \n",
1056 | "13 0 True \n",
1057 | "14 1 True \n",
1058 | "15 0 True \n",
1059 | "16 0 True \n",
1060 | "17 0 True \n",
1061 | "18 0 True \n",
1062 | "19 0 True \n",
1063 | "20 0 True \n",
1064 | "21 0 True \n",
1065 | "22 0 True \n",
1066 | "23 1 True \n",
1067 | "24 0 True \n",
1068 | "25 0 True \n",
1069 | "26 0 True \n",
1070 | "27 1 True \n",
1071 | "28 1 True \n",
1072 | "29 0 True \n",
1073 | "... ... ... \n",
1074 | "38218 0 False \n",
1075 | "38219 0 False \n",
1076 | "38220 0 False \n",
1077 | "38221 1 False \n",
1078 | "38222 0 False \n",
1079 | "38223 0 False \n",
1080 | "38224 0 False \n",
1081 | "38225 0 False \n",
1082 | "38226 1 False \n",
1083 | "38227 0 False \n",
1084 | "38228 0 False \n",
1085 | "38229 1 False \n",
1086 | "38230 0 False \n",
1087 | "38231 0 False \n",
1088 | "38232 0 False \n",
1089 | "38233 1 False \n",
1090 | "38234 1 False \n",
1091 | "38235 0 False \n",
1092 | "38236 0 False \n",
1093 | "38237 0 False \n",
1094 | "38238 0 False \n",
1095 | "38239 0 False \n",
1096 | "38240 0 False \n",
1097 | "38241 0 False \n",
1098 | "38242 0 False \n",
1099 | "38243 0 False \n",
1100 | "38244 0 False \n",
1101 | "38245 0 False \n",
1102 | "38246 0 False \n",
1103 | "38247 0 False \n",
1104 | "\n",
1105 | "[38248 rows x 10 columns]"
1106 | ]
1107 | },
1108 | "execution_count": 183,
1109 | "metadata": {},
1110 | "output_type": "execute_result"
1111 | }
1112 | ],
1113 | "source": [
1114 | "tips_df"
1115 | ]
1116 | },
1117 | {
1118 | "cell_type": "code",
1119 | "execution_count": null,
1120 | "metadata": {
1121 | "collapsed": true
1122 | },
1123 | "outputs": [],
1124 | "source": []
1125 | }
1126 | ],
1127 | "metadata": {
1128 | "kernelspec": {
1129 | "display_name": "Python 3",
1130 | "language": "python",
1131 | "name": "python3"
1132 | },
1133 | "language_info": {
1134 | "codemirror_mode": {
1135 | "name": "ipython",
1136 | "version": 3
1137 | },
1138 | "file_extension": ".py",
1139 | "mimetype": "text/x-python",
1140 | "name": "python",
1141 | "nbconvert_exporter": "python",
1142 | "pygments_lexer": "ipython3",
1143 | "version": "3.5.2"
1144 | }
1145 | },
1146 | "nbformat": 4,
1147 | "nbformat_minor": 2
1148 | }
1149 |
--------------------------------------------------------------------------------
/NYSE.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import numpy as np\n",
13 | "\n",
14 | "data_dir = './nyse/'"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 8,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "fundamentals_df = pd.read_csv(data_dir + 'fundamentals.csv')\n",
26 | "prices_df = pd.read_csv(data_dir + 'prices-split-adjusted.csv')"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 13,
32 | "metadata": {
33 | "collapsed": false
34 | },
35 | "outputs": [
36 | {
37 | "data": {
38 | "text/html": [
39 | "\n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " | \n",
44 | " Unnamed: 0 | \n",
45 | " Ticker Symbol | \n",
46 | " Period Ending | \n",
47 | " Accounts Payable | \n",
48 | " Accounts Receivable | \n",
49 | " Add'l income/expense items | \n",
50 | " After Tax ROE | \n",
51 | " Capital Expenditures | \n",
52 | " Capital Surplus | \n",
53 | " Cash Ratio | \n",
54 | " ... | \n",
55 | " Total Current Assets | \n",
56 | " Total Current Liabilities | \n",
57 | " Total Equity | \n",
58 | " Total Liabilities | \n",
59 | " Total Liabilities & Equity | \n",
60 | " Total Revenue | \n",
61 | " Treasury Stock | \n",
62 | " For Year | \n",
63 | " Earnings Per Share | \n",
64 | " Estimated Shares Outstanding | \n",
65 | "
\n",
66 | " \n",
67 | " \n",
68 | " \n",
69 | " 0 | \n",
70 | " 0 | \n",
71 | " AAL | \n",
72 | " 2012-12-31 | \n",
73 | " 3.068000e+09 | \n",
74 | " -222000000.0 | \n",
75 | " -1.961000e+09 | \n",
76 | " 23.0 | \n",
77 | " -1.888000e+09 | \n",
78 | " 4.695000e+09 | \n",
79 | " 53.0 | \n",
80 | " ... | \n",
81 | " 7.072000e+09 | \n",
82 | " 9.011000e+09 | \n",
83 | " -7.987000e+09 | \n",
84 | " 2.489100e+10 | \n",
85 | " 1.690400e+10 | \n",
86 | " 2.485500e+10 | \n",
87 | " -367000000.0 | \n",
88 | " 2012.0 | \n",
89 | " -5.60 | \n",
90 | " 3.350000e+08 | \n",
91 | "
\n",
92 | " \n",
93 | " 1 | \n",
94 | " 1 | \n",
95 | " AAL | \n",
96 | " 2013-12-31 | \n",
97 | " 4.975000e+09 | \n",
98 | " -93000000.0 | \n",
99 | " -2.723000e+09 | \n",
100 | " 67.0 | \n",
101 | " -3.114000e+09 | \n",
102 | " 1.059200e+10 | \n",
103 | " 75.0 | \n",
104 | " ... | \n",
105 | " 1.432300e+10 | \n",
106 | " 1.380600e+10 | \n",
107 | " -2.731000e+09 | \n",
108 | " 4.500900e+10 | \n",
109 | " 4.227800e+10 | \n",
110 | " 2.674300e+10 | \n",
111 | " 0.0 | \n",
112 | " 2013.0 | \n",
113 | " -11.25 | \n",
114 | " 1.630222e+08 | \n",
115 | "
\n",
116 | " \n",
117 | " 2 | \n",
118 | " 2 | \n",
119 | " AAL | \n",
120 | " 2014-12-31 | \n",
121 | " 4.668000e+09 | \n",
122 | " -160000000.0 | \n",
123 | " -1.500000e+08 | \n",
124 | " 143.0 | \n",
125 | " -5.311000e+09 | \n",
126 | " 1.513500e+10 | \n",
127 | " 60.0 | \n",
128 | " ... | \n",
129 | " 1.175000e+10 | \n",
130 | " 1.340400e+10 | \n",
131 | " 2.021000e+09 | \n",
132 | " 4.120400e+10 | \n",
133 | " 4.322500e+10 | \n",
134 | " 4.265000e+10 | \n",
135 | " 0.0 | \n",
136 | " 2014.0 | \n",
137 | " 4.02 | \n",
138 | " 7.169154e+08 | \n",
139 | "
\n",
140 | " \n",
141 | " 3 | \n",
142 | " 3 | \n",
143 | " AAL | \n",
144 | " 2015-12-31 | \n",
145 | " 5.102000e+09 | \n",
146 | " 352000000.0 | \n",
147 | " -7.080000e+08 | \n",
148 | " 135.0 | \n",
149 | " -6.151000e+09 | \n",
150 | " 1.159100e+10 | \n",
151 | " 51.0 | \n",
152 | " ... | \n",
153 | " 9.985000e+09 | \n",
154 | " 1.360500e+10 | \n",
155 | " 5.635000e+09 | \n",
156 | " 4.278000e+10 | \n",
157 | " 4.841500e+10 | \n",
158 | " 4.099000e+10 | \n",
159 | " 0.0 | \n",
160 | " 2015.0 | \n",
161 | " 11.39 | \n",
162 | " 6.681299e+08 | \n",
163 | "
\n",
164 | " \n",
165 | " 4 | \n",
166 | " 4 | \n",
167 | " AAP | \n",
168 | " 2012-12-29 | \n",
169 | " 2.409453e+09 | \n",
170 | " -89482000.0 | \n",
171 | " 6.000000e+05 | \n",
172 | " 32.0 | \n",
173 | " -2.711820e+08 | \n",
174 | " 5.202150e+08 | \n",
175 | " 23.0 | \n",
176 | " ... | \n",
177 | " 3.184200e+09 | \n",
178 | " 2.559638e+09 | \n",
179 | " 1.210694e+09 | \n",
180 | " 3.403120e+09 | \n",
181 | " 4.613814e+09 | \n",
182 | " 6.205003e+09 | \n",
183 | " -27095000.0 | \n",
184 | " 2012.0 | \n",
185 | " 5.29 | \n",
186 | " 7.328355e+07 | \n",
187 | "
\n",
188 | " \n",
189 | "
\n",
190 | "
5 rows × 79 columns
\n",
191 | "
"
192 | ],
193 | "text/plain": [
194 | " Unnamed: 0 Ticker Symbol Period Ending Accounts Payable \\\n",
195 | "0 0 AAL 2012-12-31 3.068000e+09 \n",
196 | "1 1 AAL 2013-12-31 4.975000e+09 \n",
197 | "2 2 AAL 2014-12-31 4.668000e+09 \n",
198 | "3 3 AAL 2015-12-31 5.102000e+09 \n",
199 | "4 4 AAP 2012-12-29 2.409453e+09 \n",
200 | "\n",
201 | " Accounts Receivable Add'l income/expense items After Tax ROE \\\n",
202 | "0 -222000000.0 -1.961000e+09 23.0 \n",
203 | "1 -93000000.0 -2.723000e+09 67.0 \n",
204 | "2 -160000000.0 -1.500000e+08 143.0 \n",
205 | "3 352000000.0 -7.080000e+08 135.0 \n",
206 | "4 -89482000.0 6.000000e+05 32.0 \n",
207 | "\n",
208 | " Capital Expenditures Capital Surplus Cash Ratio \\\n",
209 | "0 -1.888000e+09 4.695000e+09 53.0 \n",
210 | "1 -3.114000e+09 1.059200e+10 75.0 \n",
211 | "2 -5.311000e+09 1.513500e+10 60.0 \n",
212 | "3 -6.151000e+09 1.159100e+10 51.0 \n",
213 | "4 -2.711820e+08 5.202150e+08 23.0 \n",
214 | "\n",
215 | " ... Total Current Assets \\\n",
216 | "0 ... 7.072000e+09 \n",
217 | "1 ... 1.432300e+10 \n",
218 | "2 ... 1.175000e+10 \n",
219 | "3 ... 9.985000e+09 \n",
220 | "4 ... 3.184200e+09 \n",
221 | "\n",
222 | " Total Current Liabilities Total Equity Total Liabilities \\\n",
223 | "0 9.011000e+09 -7.987000e+09 2.489100e+10 \n",
224 | "1 1.380600e+10 -2.731000e+09 4.500900e+10 \n",
225 | "2 1.340400e+10 2.021000e+09 4.120400e+10 \n",
226 | "3 1.360500e+10 5.635000e+09 4.278000e+10 \n",
227 | "4 2.559638e+09 1.210694e+09 3.403120e+09 \n",
228 | "\n",
229 | " Total Liabilities & Equity Total Revenue Treasury Stock For Year \\\n",
230 | "0 1.690400e+10 2.485500e+10 -367000000.0 2012.0 \n",
231 | "1 4.227800e+10 2.674300e+10 0.0 2013.0 \n",
232 | "2 4.322500e+10 4.265000e+10 0.0 2014.0 \n",
233 | "3 4.841500e+10 4.099000e+10 0.0 2015.0 \n",
234 | "4 4.613814e+09 6.205003e+09 -27095000.0 2012.0 \n",
235 | "\n",
236 | " Earnings Per Share Estimated Shares Outstanding \n",
237 | "0 -5.60 3.350000e+08 \n",
238 | "1 -11.25 1.630222e+08 \n",
239 | "2 4.02 7.169154e+08 \n",
240 | "3 11.39 6.681299e+08 \n",
241 | "4 5.29 7.328355e+07 \n",
242 | "\n",
243 | "[5 rows x 79 columns]"
244 | ]
245 | },
246 | "execution_count": 13,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "fundamentals_df.head()"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 16,
258 | "metadata": {
259 | "collapsed": false,
260 | "scrolled": true
261 | },
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/plain": [
266 | "Index(['Unnamed: 0', 'Ticker Symbol', 'Period Ending', 'Accounts Payable',\n",
267 | " 'Accounts Receivable', 'Add'l income/expense items', 'After Tax ROE',\n",
268 | " 'Capital Expenditures', 'Capital Surplus', 'Cash Ratio',\n",
269 | " 'Cash and Cash Equivalents', 'Changes in Inventories', 'Common Stocks',\n",
270 | " 'Cost of Revenue', 'Current Ratio', 'Deferred Asset Charges',\n",
271 | " 'Deferred Liability Charges', 'Depreciation',\n",
272 | " 'Earnings Before Interest and Tax', 'Earnings Before Tax',\n",
273 | " 'Effect of Exchange Rate',\n",
274 | " 'Equity Earnings/Loss Unconsolidated Subsidiary', 'Fixed Assets',\n",
275 | " 'Goodwill', 'Gross Margin', 'Gross Profit', 'Income Tax',\n",
276 | " 'Intangible Assets', 'Interest Expense', 'Inventory', 'Investments',\n",
277 | " 'Liabilities', 'Long-Term Debt', 'Long-Term Investments',\n",
278 | " 'Minority Interest', 'Misc. Stocks', 'Net Borrowings', 'Net Cash Flow',\n",
279 | " 'Net Cash Flow-Operating', 'Net Cash Flows-Financing',\n",
280 | " 'Net Cash Flows-Investing', 'Net Income', 'Net Income Adjustments',\n",
281 | " 'Net Income Applicable to Common Shareholders',\n",
282 | " 'Net Income-Cont. Operations', 'Net Receivables', 'Non-Recurring Items',\n",
283 | " 'Operating Income', 'Operating Margin', 'Other Assets',\n",
284 | " 'Other Current Assets', 'Other Current Liabilities', 'Other Equity',\n",
285 | " 'Other Financing Activities', 'Other Investing Activities',\n",
286 | " 'Other Liabilities', 'Other Operating Activities',\n",
287 | " 'Other Operating Items', 'Pre-Tax Margin', 'Pre-Tax ROE',\n",
288 | " 'Profit Margin', 'Quick Ratio', 'Research and Development',\n",
289 | " 'Retained Earnings', 'Sale and Purchase of Stock',\n",
290 | " 'Sales, General and Admin.',\n",
291 | " 'Short-Term Debt / Current Portion of Long-Term Debt',\n",
292 | " 'Short-Term Investments', 'Total Assets', 'Total Current Assets',\n",
293 | " 'Total Current Liabilities', 'Total Equity', 'Total Liabilities',\n",
294 | " 'Total Liabilities & Equity', 'Total Revenue', 'Treasury Stock',\n",
295 | " 'For Year', 'Earnings Per Share', 'Estimated Shares Outstanding'],\n",
296 | " dtype='object')"
297 | ]
298 | },
299 | "execution_count": 16,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "fundamentals_df.columns"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 9,
311 | "metadata": {
312 | "collapsed": false
313 | },
314 | "outputs": [
315 | {
316 | "data": {
317 | "text/html": [
318 | "\n",
319 | "
\n",
320 | " \n",
321 | " \n",
322 | " | \n",
323 | " date | \n",
324 | " symbol | \n",
325 | " open | \n",
326 | " close | \n",
327 | " low | \n",
328 | " high | \n",
329 | " volume | \n",
330 | "
\n",
331 | " \n",
332 | " \n",
333 | " \n",
334 | " 0 | \n",
335 | " 2016-01-05 | \n",
336 | " WLTW | \n",
337 | " 123.430000 | \n",
338 | " 125.839996 | \n",
339 | " 122.309998 | \n",
340 | " 126.250000 | \n",
341 | " 2163600.0 | \n",
342 | "
\n",
343 | " \n",
344 | " 1 | \n",
345 | " 2016-01-06 | \n",
346 | " WLTW | \n",
347 | " 125.239998 | \n",
348 | " 119.980003 | \n",
349 | " 119.940002 | \n",
350 | " 125.540001 | \n",
351 | " 2386400.0 | \n",
352 | "
\n",
353 | " \n",
354 | " 2 | \n",
355 | " 2016-01-07 | \n",
356 | " WLTW | \n",
357 | " 116.379997 | \n",
358 | " 114.949997 | \n",
359 | " 114.930000 | \n",
360 | " 119.739998 | \n",
361 | " 2489500.0 | \n",
362 | "
\n",
363 | " \n",
364 | " 3 | \n",
365 | " 2016-01-08 | \n",
366 | " WLTW | \n",
367 | " 115.480003 | \n",
368 | " 116.620003 | \n",
369 | " 113.500000 | \n",
370 | " 117.440002 | \n",
371 | " 2006300.0 | \n",
372 | "
\n",
373 | " \n",
374 | " 4 | \n",
375 | " 2016-01-11 | \n",
376 | " WLTW | \n",
377 | " 117.010002 | \n",
378 | " 114.970001 | \n",
379 | " 114.089996 | \n",
380 | " 117.330002 | \n",
381 | " 1408600.0 | \n",
382 | "
\n",
383 | " \n",
384 | "
\n",
385 | "
"
386 | ],
387 | "text/plain": [
388 | " date symbol open close low high \\\n",
389 | "0 2016-01-05 WLTW 123.430000 125.839996 122.309998 126.250000 \n",
390 | "1 2016-01-06 WLTW 125.239998 119.980003 119.940002 125.540001 \n",
391 | "2 2016-01-07 WLTW 116.379997 114.949997 114.930000 119.739998 \n",
392 | "3 2016-01-08 WLTW 115.480003 116.620003 113.500000 117.440002 \n",
393 | "4 2016-01-11 WLTW 117.010002 114.970001 114.089996 117.330002 \n",
394 | "\n",
395 | " volume \n",
396 | "0 2163600.0 \n",
397 | "1 2386400.0 \n",
398 | "2 2489500.0 \n",
399 | "3 2006300.0 \n",
400 | "4 1408600.0 "
401 | ]
402 | },
403 | "execution_count": 9,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | }
407 | ],
408 | "source": [
409 | "prices_df.head()"
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": 10,
415 | "metadata": {
416 | "collapsed": false
417 | },
418 | "outputs": [
419 | {
420 | "data": {
421 | "text/plain": [
422 | "(851264, 7)"
423 | ]
424 | },
425 | "execution_count": 10,
426 | "metadata": {},
427 | "output_type": "execute_result"
428 | }
429 | ],
430 | "source": [
431 | "prices_df.shape"
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": null,
437 | "metadata": {
438 | "collapsed": true
439 | },
440 | "outputs": [],
441 | "source": []
442 | }
443 | ],
444 | "metadata": {
445 | "kernelspec": {
446 | "display_name": "Python 3",
447 | "language": "python",
448 | "name": "python3"
449 | },
450 | "language_info": {
451 | "codemirror_mode": {
452 | "name": "ipython",
453 | "version": 3
454 | },
455 | "file_extension": ".py",
456 | "mimetype": "text/x-python",
457 | "name": "python",
458 | "nbconvert_exporter": "python",
459 | "pygments_lexer": "ipython3",
460 | "version": "3.5.2"
461 | }
462 | },
463 | "nbformat": 4,
464 | "nbformat_minor": 2
465 | }
466 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Rawlsian Fair Machine Learning for Contextual Bandits
2 |
3 | Implementation and evaluation of provably Rawlsian fair ML algorithms for contextual bandits.
4 |
5 | Related Work/Citations:
6 |
7 | * Rawlsian Fairness for Machine Learning (https://arxiv.org/abs/1610.09559)
8 | * Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms (https://arxiv.org/abs/1003.5956)
9 |
10 | ## Installation Instructions
11 |
12 | ### (Option 1) Setting Up virtualenv
13 |
14 | #### OSX
15 |
16 | Install Python 3 from [package](https://www.python.org/downloads/). This allows you to run `python3` and `pip3`. Software is installed into `/Library/Frameworks/Python.framework/Versions/3.x/bin/`.
17 |
18 | Install virtualenv for Python 3 for the user only (which is placed into `~/Library/Python/3.x/bin`):
19 |
20 | ```
21 | $ pip3 install --user virtualenv
22 | ```
23 |
24 | Create the following alias in your `~/.bash_profile`:
25 |
26 | ```
27 | $ echo "alias virtualenv3='~/Library/Python/3.x/bin/virtualenv'" >> ~/.bash_profile
28 | ```
29 |
30 | Create a local virtualenv and activate it:
31 |
32 | ```
33 | $ virtualenv3 fairml
34 | $ source fairml/bin/activate
35 | ```
36 |
37 | With the virtualenv active, install the project requirements into your virtualenv:
38 |
39 | ```
40 | $ pip install -r requirements.txt
41 | ```
42 |
43 | Create a Python kernel for Jupyter that uses your virtualenv:
44 |
45 | ```
46 | $ python -m ipykernel install --user --name=fairml
47 | ```
48 |
49 | You can then launch Jupyter using `jupyter notebook` from inside the project directory and change the kernel to `fairml`.
50 |
51 | ### (Option 2) Using Docker
52 |
53 | You can install [Docker](https://www.docker.com) and use a standard configuration such as `all-spark-notebook` to run the project files.
54 |
--------------------------------------------------------------------------------
/RidgeFair.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np\n",
12 | "from numpy import transpose\n",
13 | "from numpy.linalg import inv, det\n",
14 | "from scipy.stats import norm\n",
15 | "from math import sqrt\n",
16 | "from numpy import log"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 31,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "def ridgeFair(X, Y, k, d, _delta, T, _lambda):\n",
28 | " \"\"\"\n",
29 | " Simulates T rounds of ridgeFair.\n",
30 | " \n",
31 | " :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for each\n",
32 | " time-step and arm\n",
33 | " :param Y: a T x k ndarray of reward function output for each context vector\n",
34 | " :param k: the number of arms\n",
35 | " :param d: the number of features\n",
36 | " :param _delta: confidence parameter\n",
37 | " :param T: the number of iterations\n",
38 | " :param _lambda: \n",
39 | " \"\"\"\n",
40 | " picks = []\n",
41 | " for t in range (T): # for t >= 1\n",
42 | " for i in range(k): # for 1 <= i <= k\n",
43 | " R = 1\n",
44 | " intervals = []\n",
45 | " try:\n",
46 | " X_i = X[:t,i] # design matrix\n",
47 | " Y_i = Y[:t,i] # same with Y\n",
48 | " x_ti = X[t,i] # feature vector for arm i in round t\n",
49 | "\n",
50 | " X_iT = transpose(X_i)\n",
51 | " _idenD = np.identity(d)\n",
52 | " V_it = X_iT.dot(X_i) + (_lambda*_idenD) # computing V_it as line 5\n",
53 | "\n",
54 | " B_it = inv(V_it).dot(X_iT).dot(Y_i) # computing line 6\n",
55 | " \n",
56 | " y_ti = transpose(x_ti).dot(B_it) #computing line 7\n",
57 | " \n",
58 | " V_itI = inv(V_it) # inverse of V_it\n",
59 | " _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti))\n",
60 | " _wti2 = R * sqrt(d*log((1+(t/_lambda))/_delta)) + sqrt(_lambda)\n",
61 | " w_ti = _wti1 * _wti2 # computing W_ti as line 8\n",
62 | "\n",
63 | " intervals.append([y_ti - w_ti, y_ti + w_ti]) # line 9\n",
64 | " except:\n",
65 | " print('Error in assigning interval value.')\n",
66 | " intervals = None\n",
67 | " break\n",
68 | " if not intervals:\n",
69 | " picks.append(np.random.randint(0,k))\n",
70 | " else:\n",
71 | " i_st = np.argmax(np.array(intervals)[:,1]) # line 10\n",
72 | " chain = compute_chain(i_st, np.array(intervals), k) # line 11\n",
73 | " picks.append(np.random.choice(chain)) # play uniformly random from chain\n",
74 | " \n",
75 | " best = [Y[i].max() for i in range(2, T)]\n",
76 | " performance = [Y[t][picks[t-2]] for t in range(2, T)]\n",
77 | " print('Cumulative Regret: {0}'.format(sum(best) - sum(performance)))\n",
78 | " print('Final Regret: {0}'.format(best[-1] - performance[-1]))\n",
79 | " "
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 32,
85 | "metadata": {
86 | "collapsed": true
87 | },
88 | "outputs": [],
89 | "source": [
90 | "def compute_chain(i_st, intervals, k):\n",
91 | " # Sort intervals by decreasing order.\n",
92 | " chain = [i_st]\n",
93 | " ordering = np.argsort(intervals[:,1])[::-1]\n",
94 | " intervals = intervals[ordering,:]\n",
95 | " \n",
96 | " lowest_in_chain = intervals[0][0]\n",
97 | " for i in range(len(intervals)):\n",
98 | " if intervals[i][1] >= lowest_in_chain:\n",
99 | " chain.append(i)\n",
100 | " lowest_in_chain = min(lowest_in_chain, intervals[i][0])\n",
101 | " else:\n",
102 | " return chain\n",
103 | " return chain"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 33,
109 | "metadata": {
110 | "collapsed": true
111 | },
112 | "outputs": [],
113 | "source": [
114 | "def beta(k, d, c):\n",
115 | " \"\"\" \n",
116 | " Generates the scaled down feature weights for a true model from the distribution\n",
117 | " β ∼ U[0, c]^d.\n",
118 | " \n",
119 | " :param k: the number of arms \n",
120 | " :param d: the number of features\n",
121 | " :param c: the scale of the feature weights\n",
122 | " \"\"\"\n",
123 | " return np.random.uniform(0, c+1, size=(k, d))"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 34,
129 | "metadata": {
130 | "scrolled": true
131 | },
132 | "outputs": [
133 | {
134 | "name": "stdout",
135 | "output_type": "stream",
136 | "text": [
137 | "Cumulative Regret: 4636.449117347242\n",
138 | "Final Regret: 0.9403759272439949\n"
139 | ]
140 | }
141 | ],
142 | "source": [
143 | "k = 2\n",
144 | "c = 10\n",
145 | "d = 10\n",
146 | "T = 1000\n",
147 | "X = np.random.uniform(0, 1, size=(T, k, d)) # 3-axis ndarray\n",
148 | "B = beta(k, d, c) # true parameters. B[i]: params for arm i\n",
149 | "Y = np.array([np.diag(X[t].dot(transpose(B))) for t in range(T)])\n",
150 | "ridgeFair(X, Y, k, d, 0.05, T, 1)"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {
157 | "collapsed": true
158 | },
159 | "outputs": [],
160 | "source": []
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "collapsed": true
167 | },
168 | "outputs": [],
169 | "source": []
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {
175 | "collapsed": true
176 | },
177 | "outputs": [],
178 | "source": []
179 | }
180 | ],
181 | "metadata": {
182 | "kernelspec": {
183 | "display_name": "Python 3",
184 | "language": "python",
185 | "name": "python3"
186 | },
187 | "language_info": {
188 | "codemirror_mode": {
189 | "name": "ipython",
190 | "version": 3
191 | },
192 | "file_extension": ".py",
193 | "mimetype": "text/x-python",
194 | "name": "python",
195 | "nbconvert_exporter": "python",
196 | "pygments_lexer": "ipython3",
197 | "version": "3.5.2"
198 | }
199 | },
200 | "nbformat": 4,
201 | "nbformat_minor": 2
202 | }
203 |
--------------------------------------------------------------------------------
/Yahoo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "\"\"\"\n",
12 | "Data wrangling for the Yahoo! Front Page Today Module User Click Log Dataset, version 1.0.\n",
13 | "\n",
14 | "Inspired by:\n",
15 | "Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms \n",
16 | "[https://arxiv.org/pdf/1003.5956.pdf]\n",
17 | "\n",
18 | "Documentation is per reST format used in Sphinx.\n",
19 | "\n",
20 | "Dataset: https://webscope.sandbox.yahoo.com/catalog.php?datatype=r&did=49\n",
21 | "Author: jtcho (jonathan.t.cho@gmail.com)\n",
22 | "\n",
23 | "Many thanks to Yahoo! Research for allowing me to use their dataset.\n",
24 | "\"\"\"\n",
25 | "\n",
26 | "import pandas as pd\n",
27 | "import numpy as np\n",
28 | "import sqlite3\n",
29 | "import time\n",
30 | "import os\n",
31 | "\n",
32 | "dump_dir = 'R6/'\n",
33 | "data_dirs = ['clicks_1/']\n",
34 | "engine = sqlite3.connect('yahoo')"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "# Database cleanup.\n",
46 | "\n",
47 | "c = engine.cursor()\n",
48 | "c.execute('DROP TABLE articles')\n",
49 | "engine.commit()"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {
56 | "collapsed": true
57 | },
58 | "outputs": [],
59 | "source": [
60 | "def extract_article_info(path, item_limit=sys.maxsize):\n",
61 | " \"\"\" \n",
62 | " Given an R6A dataset file, extracts all of the common article vectors\n",
63 | " and compiles them in a single dataframe.\n",
64 | " Note that each article has a constant vector associated with it.\n",
65 | " \n",
66 | " :param path: the file path for the dataset\n",
67 | " :param item_limit: limits the number of items to parse\n",
68 | " :returns: Pandas dataframe containing article vectors indexed by id\n",
69 | " \"\"\"\n",
70 | " t0 = time.time()\n",
71 | " num_iters = 0\n",
72 | " _articles_df = pd.DataFrame(columns=['2', '3', '4', '5', '6', '1'])\n",
73 | " with open(path) as f:\n",
74 | " for line in f:\n",
75 | " num_iters += 1 \n",
76 | " if num_iters > item_limit:\n",
77 | " break\n",
78 | " parts = line.strip().split('|')\n",
79 | " for i in range(2, len(parts)):\n",
80 | " # Extract article vector information.\n",
81 | " article_info = parts[i].split()\n",
82 | " article_id = article_info[0]\n",
83 | " if article_id in _articles_df.index:\n",
84 | " continue\n",
85 | " article_info_parts = list(map(lambda x : x.split(':')[1], article_info[1:]))\n",
86 | " article_info = dict(zip(_articles_df.columns, article_info_parts))\n",
87 | " # I append to an existing DF for quick de-duplication. Also\n",
88 | " # empirically, I observed that there is a small number of unique\n",
89 | " # articles for any dataset, so the overhead of doing this is minimized.\n",
90 | " _articles_df.loc[article_id] = pd.Series(article_info)\n",
91 | "\n",
92 | " t1 = time.time()\n",
93 | " print('Finished processing {0} items in {1} seconds.'.format(num_iters-1, t1 - t0))\n",
94 | " return _articles_df"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 7,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "def process_click_file(path, item_limit=sys.maxsize):\n",
104 | " \"\"\"\n",
105 | " Given an R6A dataset file, parses all of the view event logs and \n",
106 | " compiles them in a single dataframe.\n",
107 | " \n",
108 | " A single view event consists of a unix timestamp, a 6-dimensional vector of\n",
109 | " features describing the user, a set of 20 articles in the article pool\n",
110 | " (the 20 arms of the multi-arm bandit), the id of the article displayed, and\n",
111 | " a boolean marking whether the article was clicked.\n",
112 | " \"\"\"\n",
113 | " t0 = time.time()\n",
114 | " num_iters = 0\n",
115 | " views_cols = ['time', 'user_1', 'user_2', 'user_3', 'user_4', 'user_5', 'user_6', \n",
116 | " 'article_pool', 'displayed', 'clicked']\n",
117 | " views = []\n",
118 | " with open(path) as f:\n",
119 | " for line in f:\n",
120 | " num_iters += 1\n",
121 | " if num_iters > item_limit:\n",
122 | " break\n",
123 | " parts = line.strip().split('|')\n",
124 | " unix_timestamp, disp_article_id, clicked = parts[0].split()\n",
125 | " user_info = list(map(lambda x : x.split(':')[1], parts[1].split()[1:]))\n",
126 | " user_info = dict(zip(views_cols[1:7], user_info))\n",
127 | " user_info['time'] = unix_timestamp\n",
128 | " user_info['displayed'] = disp_article_id\n",
129 | " user_info['clicked'] = clicked\n",
130 | " \n",
131 | " # Extract article vector information.\n",
132 | " article_ids = [parts[i].split()[0] for i in range(2, len(parts))]\n",
133 | " user_info['article_pool'] = article_ids\n",
134 | " # In this case, we construct the DF at the end because we're creating a new row\n",
135 | " # for *every* item... over ~4 million items that becomes very expensive!\n",
136 | " views.append(user_info)\n",
137 | "\n",
138 | " t1 = time.time()\n",
139 | " print('{0}: Finished processing {1} items in {2} seconds.'.format(path, num_iters-1, t1 - t0))\n",
140 | " return pd.DataFrame(views, columns=views_cols)"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 4,
146 | "metadata": {},
147 | "outputs": [
148 | {
149 | "name": "stdout",
150 | "output_type": "stream",
151 | "text": [
152 | "Finished processing 4681991 items in 150.5566005706787 seconds.\n"
153 | ]
154 | }
155 | ],
156 | "source": [
157 | "# Run to populate the articles table.\n",
158 | "articles_df = extract_article_info(dump_dir + 'clicks_1.txt', sys.maxsize).apply(pd.to_numeric)\n",
159 | "articles_df.to_sql('articles', engine, if_exists='replace')"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 9,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "name": "stdout",
169 | "output_type": "stream",
170 | "text": [
171 | "clicks_1/xaa: Finished processing 99999 items in 3.1617259979248047 seconds.\n",
172 | "clicks_1/xab: Finished processing 99999 items in 3.2025344371795654 seconds.\n",
173 | "clicks_1/xac: Finished processing 99999 items in 3.3164455890655518 seconds.\n",
174 | "clicks_1/xad: Finished processing 99999 items in 3.380336046218872 seconds.\n",
175 | "clicks_1/xae: Finished processing 99999 items in 3.0821828842163086 seconds.\n",
176 | "clicks_1/xaf: Finished processing 99999 items in 3.1906492710113525 seconds.\n",
177 | "clicks_1/xag: Finished processing 99999 items in 3.3087258338928223 seconds.\n",
178 | "clicks_1/xah: Finished processing 99999 items in 3.2571945190429688 seconds.\n",
179 | "clicks_1/xai: Finished processing 99999 items in 3.278446674346924 seconds.\n",
180 | "clicks_1/xaj: Finished processing 99999 items in 3.2920501232147217 seconds.\n",
181 | "clicks_1/xak: Finished processing 99999 items in 3.431187629699707 seconds.\n",
182 | "clicks_1/xal: Finished processing 99999 items in 3.40493106842041 seconds.\n",
183 | "clicks_1/xam: Finished processing 99999 items in 3.1150004863739014 seconds.\n",
184 | "clicks_1/xan: Finished processing 99999 items in 3.1503725051879883 seconds.\n",
185 | "clicks_1/xao: Finished processing 99999 items in 3.3162639141082764 seconds.\n",
186 | "clicks_1/xap: Finished processing 99999 items in 3.09061598777771 seconds.\n",
187 | "clicks_1/xaq: Finished processing 99999 items in 3.4392073154449463 seconds.\n",
188 | "clicks_1/xar: Finished processing 99999 items in 3.443249464035034 seconds.\n",
189 | "clicks_1/xas: Finished processing 99999 items in 3.5337443351745605 seconds.\n",
190 | "clicks_1/xat: Finished processing 99999 items in 3.4647445678710938 seconds.\n",
191 | "clicks_1/xau: Finished processing 99999 items in 3.6430513858795166 seconds.\n",
192 | "clicks_1/xav: Finished processing 99999 items in 3.6271255016326904 seconds.\n",
193 | "clicks_1/xaw: Finished processing 99999 items in 3.309832811355591 seconds.\n",
194 | "clicks_1/xax: Finished processing 99999 items in 3.460949420928955 seconds.\n",
195 | "clicks_1/xay: Finished processing 99999 items in 3.426335573196411 seconds.\n",
196 | "clicks_1/xaz: Finished processing 99999 items in 3.510620594024658 seconds.\n",
197 | "clicks_1/xba: Finished processing 99999 items in 3.6194756031036377 seconds.\n",
198 | "clicks_1/xbb: Finished processing 99999 items in 3.7689321041107178 seconds.\n",
199 | "clicks_1/xbc: Finished processing 99999 items in 3.7527005672454834 seconds.\n",
200 | "clicks_1/xbd: Finished processing 99999 items in 3.559547185897827 seconds.\n",
201 | "clicks_1/xbe: Finished processing 99999 items in 3.664827585220337 seconds.\n",
202 | "clicks_1/xbf: Finished processing 99999 items in 3.7467215061187744 seconds.\n",
203 | "clicks_1/xbg: Finished processing 99999 items in 3.2975916862487793 seconds.\n",
204 | "clicks_1/xbh: Finished processing 99999 items in 3.1932389736175537 seconds.\n",
205 | "clicks_1/xbi: Finished processing 99999 items in 3.480050802230835 seconds.\n",
206 | "clicks_1/xbj: Finished processing 99999 items in 3.307481050491333 seconds.\n",
207 | "clicks_1/xbk: Finished processing 99999 items in 3.3213932514190674 seconds.\n",
208 | "clicks_1/xbl: Finished processing 99999 items in 3.602836847305298 seconds.\n",
209 | "clicks_1/xbm: Finished processing 99999 items in 3.3665266036987305 seconds.\n",
210 | "clicks_1/xbn: Finished processing 99999 items in 3.5517754554748535 seconds.\n",
211 | "clicks_1/xbo: Finished processing 99999 items in 3.5413339138031006 seconds.\n",
212 | "clicks_1/xbp: Finished processing 99999 items in 3.082970380783081 seconds.\n",
213 | "clicks_1/xbq: Finished processing 99999 items in 3.1382272243499756 seconds.\n",
214 | "clicks_1/xbr: Finished processing 99999 items in 3.2157583236694336 seconds.\n",
215 | "clicks_1/xbs: Finished processing 99999 items in 3.396573543548584 seconds.\n",
216 | "clicks_1/xbt: Finished processing 99999 items in 3.4965860843658447 seconds.\n",
217 | "clicks_1/xbu: Finished processing 81991 items in 2.8793578147888184 seconds.\n"
218 | ]
219 | }
220 | ],
221 | "source": [
222 | "for fname in os.listdir('clicks_1'):\n",
223 | " if fname != '.DS_Store':\n",
224 | " result = process_click_file('clicks_1/'+fname)\n",
225 | " result['article_pool'] = result['article_pool'].astype(str)\n",
226 | " result.to_sql('clicks', engine, if_exists='append')"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": 18,
232 | "metadata": {},
233 | "outputs": [],
234 | "source": [
235 | "#pd.read_sql_query('select * from articles',con=engine).set_index('index')"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 11,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "data": {
245 | "text/html": [
246 | "\n",
247 | "
\n",
248 | " \n",
249 | " \n",
250 | " | \n",
251 | " count(*) | \n",
252 | "
\n",
253 | " \n",
254 | " \n",
255 | " \n",
256 | " 0 | \n",
257 | " 4681992 | \n",
258 | "
\n",
259 | " \n",
260 | "
\n",
261 | "
"
262 | ],
263 | "text/plain": [
264 | " count(*)\n",
265 | "0 4681992"
266 | ]
267 | },
268 | "execution_count": 11,
269 | "metadata": {},
270 | "output_type": "execute_result"
271 | }
272 | ],
273 | "source": [
274 | "pd.read_sql_query('select count(*) from clicks', con=engine)"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": null,
280 | "metadata": {
281 | "collapsed": true
282 | },
283 | "outputs": [],
284 | "source": []
285 | }
286 | ],
287 | "metadata": {
288 | "kernelspec": {
289 | "display_name": "Python 3",
290 | "language": "python",
291 | "name": "python3"
292 | },
293 | "language_info": {
294 | "codemirror_mode": {
295 | "name": "ipython",
296 | "version": 3
297 | },
298 | "file_extension": ".py",
299 | "mimetype": "text/x-python",
300 | "name": "python",
301 | "nbconvert_exporter": "python",
302 | "pygments_lexer": "ipython3",
303 | "version": "3.6.0"
304 | }
305 | },
306 | "nbformat": 4,
307 | "nbformat_minor": 2
308 | }
309 |
--------------------------------------------------------------------------------
/evaluation_T.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
5 |
6 |
7 | def main():
8 | c_vals = [1.0, 2.0, 5.0, 10.0]
9 |
10 | # Plot: Varying T (# of rounds)
11 | d = 2
12 | k = 2
13 | T_vals = range(3, 1000, 10)
14 |
15 | results = {
16 | '0': {
17 | 'ylabel': 'Average regret - TI',
18 | 'name': 'avg_regret_ti'
19 | },
20 | '1': {
21 | 'ylabel': 'Average regret - IC',
22 | 'name': 'avg_regret_ic'
23 | },
24 | '2': {
25 | 'ylabel': 'Average regret difference (TI - IC)',
26 | 'name': 'avg_regret_diff'
27 | },
28 | '3': {
29 | 'ylabel': 'Cumulative regret - TI',
30 | 'name': 'cum_regret_ti'
31 | },
32 | '4': {
33 | 'ylabel': 'Cumulative regret - IC',
34 | 'name': 'cum_regret_ic'
35 | },
36 | '5': {
37 | 'ylabel': 'Cumulative regret difference (TI - IC)',
38 | 'name': 'cum_regret_diff'
39 | },
40 | '6': {
41 | 'ylabel': 'Final regret - TI',
42 | 'name': 'final_regret_ti'
43 | },
44 | '7': {
45 | 'ylabel': 'Final regret - IC',
46 | 'name': 'final_regret_ic'
47 | },
48 | '8': {
49 | 'ylabel': 'Final regret difference (TI - IC)',
50 | 'name': 'final_regret_diff'
51 | }
52 | }
53 | for _, v in results.items(): # 9 sets of results.
54 | for j in c_vals:
55 | v[str(j)] = []
56 |
57 | for c in c_vals:
58 | for T in T_vals:
59 | cum_regret_tis = []
60 | avg_regret_tis = []
61 | final_regret_tis = []
62 | cum_regret_ics = []
63 | avg_regret_ics = []
64 | final_regret_ics = []
65 | for i in range(0, 50): # 50 trials.
66 | X = np.random.uniform(0, 1, size=(T, k, d))
67 | B = beta(k, d, c)
68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
69 |
70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
71 | X, Y, k, d, 0.05, T, _print_progress=False)
72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
73 | X, Y, c, k, d, 0.05, T, _print_progress=False)
74 | cum_regret_tis.append(cum_regret_ti)
75 | avg_regret_tis.append(avg_regret_ti)
76 | final_regret_tis.append(final_regret_ti)
77 | cum_regret_ics.append(cum_regret_ic)
78 | avg_regret_ics.append(avg_regret_ic)
79 | final_regret_ics.append(final_regret_ic)
80 | cum_regret_ti = mean(cum_regret_tis)
81 | avg_regret_ti = mean(avg_regret_tis)
82 | final_regret_ti = mean(avg_regret_tis)
83 | cum_regret_ic = mean(cum_regret_ics)
84 | avg_regret_ic = mean(avg_regret_ics)
85 | final_regret_ics = mean(final_regret_ics)
86 |
87 | results['0'][str(c)].append(avg_regret_ti)
88 | results['1'][str(c)].append(avg_regret_ic)
89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
90 | results['3'][str(c)].append(cum_regret_ti)
91 | results['4'][str(c)].append(cum_regret_ic)
92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
93 | results['6'][str(c)].append(final_regret_ti)
94 | results['7'][str(c)].append(final_regret_ic)
95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
96 |
97 | for k, v in results.items():
98 | plt.clf()
99 | c1, = plt.plot(T_vals, results[k]['1.0'], label='c=1')
100 | c2, = plt.plot(T_vals, results[k]['2.0'], label='c=2')
101 | c5, = plt.plot(T_vals, results[k]['5.0'], label='c=5')
102 | c10, = plt.plot(T_vals, results[k]['10.0'], label='c=10')
103 | plt.xticks(np.arange(min(T_vals), max(T_vals) + 1, 200))
104 | plt.legend(handles=[c1, c2, c5, c10])
105 | plt.xlabel('T (# of rounds)', fontsize=18)
106 | plt.ylabel(v['ylabel'], fontsize=15)
107 | plt.savefig('figures_T_50x/T_50x_' + v['name'])
108 |
109 |
110 | def mean(numbers):
111 | return float(sum(numbers)) / max(len(numbers), 1)
112 |
113 |
114 | if __name__ == '__main__':
115 | main()
116 |
--------------------------------------------------------------------------------
/evaluation_d.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
5 |
6 |
7 | def main():
8 | c_vals = [1.0, 2.0, 5.0, 10.0]
9 |
10 | # Plot: Varying d (confidence)
11 | d_vals = range(1, 50)
12 | k = 2
13 | T = 1000
14 |
15 | results = {
16 | '0': {
17 | 'ylabel': 'Average regret - TI',
18 | 'name': 'avg_regret_ti'
19 | },
20 | '1': {
21 | 'ylabel': 'Average regret - IC',
22 | 'name': 'avg_regret_ic'
23 | },
24 | '2': {
25 | 'ylabel': 'Average regret difference (TI - IC)',
26 | 'name': 'avg_regret_diff'
27 | },
28 | '3': {
29 | 'ylabel': 'Cumulative regret - TI',
30 | 'name': 'cum_regret_ti'
31 | },
32 | '4': {
33 | 'ylabel': 'Cumulative regret - IC',
34 | 'name': 'cum_regret_ic'
35 | },
36 | '5': {
37 | 'ylabel': 'Cumulative regret difference (TI - IC)',
38 | 'name': 'cum_regret_diff'
39 | },
40 | '6': {
41 | 'ylabel': 'Final regret - TI',
42 | 'name': 'final_regret_ti'
43 | },
44 | '7': {
45 | 'ylabel': 'Final regret - IC',
46 | 'name': 'final_regret_ic'
47 | },
48 | '8': {
49 | 'ylabel': 'Final regret difference (TI - IC)',
50 | 'name': 'final_regret_diff'
51 | }
52 | }
53 | for _, v in results.items(): # 9 sets of results.
54 | for j in c_vals:
55 | v[str(j)] = []
56 |
57 | for c in c_vals:
58 | for d in d_vals:
59 | cum_regret_tis = []
60 | avg_regret_tis = []
61 | final_regret_tis = []
62 | cum_regret_ics = []
63 | avg_regret_ics = []
64 | final_regret_ics = []
65 | for i in range(0, 50): # 500 trials.
66 | X = np.random.uniform(0, 1, size=(T, k, d))
67 | B = beta(k, d, c)
68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
69 |
70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
71 | X, Y, k, d, 0.05, T, _print_progress=False)
72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
73 | X, Y, c, k, d, 0.05, T, _print_progress=False)
74 | cum_regret_tis.append(cum_regret_ti)
75 | avg_regret_tis.append(avg_regret_ti)
76 | final_regret_tis.append(final_regret_ti)
77 | cum_regret_ics.append(cum_regret_ic)
78 | avg_regret_ics.append(avg_regret_ic)
79 | final_regret_ics.append(final_regret_ic)
80 | cum_regret_ti = mean(cum_regret_tis)
81 | avg_regret_ti = mean(avg_regret_tis)
82 | final_regret_ti = mean(avg_regret_tis)
83 | cum_regret_ic = mean(cum_regret_ics)
84 | avg_regret_ic = mean(avg_regret_ics)
85 | final_regret_ics = mean(final_regret_ics)
86 |
87 | results['0'][str(c)].append(avg_regret_ti)
88 | results['1'][str(c)].append(avg_regret_ic)
89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
90 | results['3'][str(c)].append(cum_regret_ti)
91 | results['4'][str(c)].append(cum_regret_ic)
92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
93 | results['6'][str(c)].append(final_regret_ti)
94 | results['7'][str(c)].append(final_regret_ic)
95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
96 |
97 | for k, v in results.items():
98 | plt.clf()
99 | c1, = plt.plot(d_vals, results[k]['1.0'], label='c=1')
100 | c2, = plt.plot(d_vals, results[k]['2.0'], label='c=2')
101 | c5, = plt.plot(d_vals, results[k]['5.0'], label='c=5')
102 | c10, = plt.plot(d_vals, results[k]['10.0'], label='c=10')
103 | plt.xticks(np.arange(min(d_vals), max(d_vals) + 1, 10))
104 | plt.legend(handles=[c1, c2, c5, c10])
105 | plt.xlabel('d (# of features)', fontsize=18)
106 | plt.ylabel(v['ylabel'], fontsize=15)
107 | plt.savefig('figures_d_50x/d_50x_' + v['name'])
108 |
109 |
110 | def mean(numbers):
111 | return float(sum(numbers)) / max(len(numbers), 1)
112 |
113 |
114 | if __name__ == '__main__':
115 | main()
116 |
--------------------------------------------------------------------------------
/evaluation_k.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
5 |
6 |
7 | def main():
8 | c_vals = [1.0, 2.0, 5.0, 10.0]
9 |
10 | # Plot: Varying k (# groups)
11 | d = 2
12 | k_vals = range(1, 50, 5)
13 | T = 1000
14 |
15 | results = {
16 | '0': {
17 | 'ylabel': 'Average regret - TI',
18 | 'name': 'avg_regret_ti'
19 | },
20 | '1': {
21 | 'ylabel': 'Average regret - IC',
22 | 'name': 'avg_regret_ic'
23 | },
24 | '2': {
25 | 'ylabel': 'Average regret difference (TI - IC)',
26 | 'name': 'avg_regret_diff'
27 | },
28 | '3': {
29 | 'ylabel': 'Cumulative regret - TI',
30 | 'name': 'cum_regret_ti'
31 | },
32 | '4': {
33 | 'ylabel': 'Cumulative regret - IC',
34 | 'name': 'cum_regret_ic'
35 | },
36 | '5': {
37 | 'ylabel': 'Cumulative regret difference (TI - IC)',
38 | 'name': 'cum_regret_diff'
39 | },
40 | '6': {
41 | 'ylabel': 'Final regret - TI',
42 | 'name': 'final_regret_ti'
43 | },
44 | '7': {
45 | 'ylabel': 'Final regret - IC',
46 | 'name': 'final_regret_ic'
47 | },
48 | '8': {
49 | 'ylabel': 'Final regret difference (TI - IC)',
50 | 'name': 'final_regret_diff'
51 | }
52 | }
53 | for _, v in results.items(): # 9 sets of results.
54 | for j in c_vals:
55 | v[str(j)] = []
56 |
57 | for c in c_vals:
58 | for k in k_vals:
59 | cum_regret_tis = []
60 | avg_regret_tis = []
61 | final_regret_tis = []
62 | cum_regret_ics = []
63 | avg_regret_ics = []
64 | final_regret_ics = []
65 | for i in range(0, 50): # 50 trials.
66 | X = np.random.uniform(0, 1, size=(T, k, d))
67 | B = beta(k, d, c)
68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
69 |
70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
71 | X, Y, k, d, 0.05, T, _print_progress=False)
72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
73 | X, Y, c, k, d, 0.05, T, _print_progress=False)
74 | cum_regret_tis.append(cum_regret_ti)
75 | avg_regret_tis.append(avg_regret_ti)
76 | final_regret_tis.append(final_regret_ti)
77 | cum_regret_ics.append(cum_regret_ic)
78 | avg_regret_ics.append(avg_regret_ic)
79 | final_regret_ics.append(final_regret_ic)
80 | cum_regret_ti = mean(cum_regret_tis)
81 | avg_regret_ti = mean(avg_regret_tis)
82 | final_regret_ti = mean(avg_regret_tis)
83 | cum_regret_ic = mean(cum_regret_ics)
84 | avg_regret_ic = mean(avg_regret_ics)
85 | final_regret_ics = mean(final_regret_ics)
86 |
87 | results['0'][str(c)].append(avg_regret_ti)
88 | results['1'][str(c)].append(avg_regret_ic)
89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
90 | results['3'][str(c)].append(cum_regret_ti)
91 | results['4'][str(c)].append(cum_regret_ic)
92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
93 | results['6'][str(c)].append(final_regret_ti)
94 | results['7'][str(c)].append(final_regret_ic)
95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
96 |
97 | for k, v in results.items():
98 | plt.clf()
99 | c1, = plt.plot(k_vals, results[k]['1.0'], label='c=1')
100 | c2, = plt.plot(k_vals, results[k]['2.0'], label='c=2')
101 | c5, = plt.plot(k_vals, results[k]['5.0'], label='c=5')
102 | c10, = plt.plot(k_vals, results[k]['10.0'], label='c=10')
103 | plt.xticks(np.arange(min(k_vals), max(k_vals) + 1, 10))
104 | plt.legend(handles=[c1, c2, c5, c10])
105 | plt.xlabel('k (# of groups)', fontsize=18)
106 | plt.ylabel(v['ylabel'], fontsize=15)
107 | plt.savefig('figures_k_50x/k_50x_' + v['name'])
108 |
109 |
110 | def mean(numbers):
111 | return float(sum(numbers)) / max(len(numbers), 1)
112 |
113 |
114 | if __name__ == '__main__':
115 | main()
116 |
--------------------------------------------------------------------------------
/fairml.py:
--------------------------------------------------------------------------------
1 | from math import sqrt
2 | import numpy as np
3 | from numpy import log, transpose
4 | from numpy.linalg import inv
5 | from scipy.stats import norm
6 |
7 |
8 | def eta(T):
9 | """
10 | Generates the cutoff probabilities for exploration rounds in interval
11 | chaining.
12 |
13 | :param T: the total number of iterations
14 | """
15 | return np.array([pow(t, -1/3) for t in range(1, T+1)])
16 |
17 |
18 | def beta(k, d, c):
19 | """
20 | Generates the scaled down feature weights for a true model from the
21 | distribution β ∼ U[0, c]^d.
22 |
23 | :param k: the number of arms
24 | :param d: the number of features
25 | :param c: the scale of the feature weights
26 | """
27 | return np.random.uniform(0, c+1, size=(k, d))
28 |
29 |
30 | def print_progress(s, should_print):
31 | """
32 | Helper function to print the progress of an algorithm as it's running.
33 |
34 | :param s: the string to print
35 | :should_print: whether or not the string should be printed
36 | """
37 | if should_print:
38 | print(s)
39 |
40 |
41 | def top_interval(X, Y, k, d, _delta, T, _print_progress=True):
42 | """
43 | Simulates T rounds of TopInterval for k.
44 |
45 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
46 | each time-step and arm
47 | :param Y: a T x k ndarray of reward function output for each context vector
48 | :param k: the number of arms
49 | :param d: the number of features
50 | :param _delta: confidence parameter
51 | :param T: the number of iterations
52 | :param _print_progress: True if progress should be printed; False otherwise
53 | :returns: cum_regret (the total regret across all T runs of the algorithm),
54 | avg_regret (the regret averaged across all T runs of the algorithm),
55 | final_regret (the regret in the last round of the algorithm)
56 | """
57 | pp = _print_progress
58 | _eta = eta(T) # exploration cutoff probabilities
59 | picks = []
60 | for t in range(T):
61 | print_progress('Iteration [{0} / {1}]'.format(t, T), pp)
62 | if t <= d or np.random.rand() <= _eta[t]:
63 | # Play uniformly at random from [1, k].
64 | picks.append(np.random.randint(0, k))
65 | print_progress('Exploration round.', pp)
66 | else:
67 | intervals = []
68 | for i in range(k):
69 | # Compute beta hat.
70 | _Xti = X[:t+1, i]
71 | _XtiT = transpose(_Xti)
72 | try:
73 | _XTX = inv(_XtiT.dot(_Xti))
74 | except:
75 | print_progress('Encountered singular matrix. Ignoring.', pp)
76 | continue
77 | _Yti = Y[:t+1, i]
78 | Bh_t_i = _XTX.dot(_XtiT).dot(_Yti) # Compute OLS estimators.
79 | yh_t_i = Bh_t_i.dot(X[t, i])
80 | _s2 = np.var(Y[:t+1, i])
81 | # Compute the confidence interval width using the inverse CDF.
82 | w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0,
83 | scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i]))))
84 | intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i])
85 | # Pick the agent with the largest upper bound.
86 | picks.append(np.argmax(np.array(intervals)[:, 1]) if intervals else np.random.randint(0, k))
87 | print_progress('Intervals: {0}'.format(intervals), pp)
88 | # Compute sum of best picks over each iteration.
89 | best = [Y[i].max() for i in range(2, T)]
90 | performance = [Y[t][picks[t-2]] for t in range(2, T)]
91 | cum_regret = sum(best) - sum(performance)
92 | avg_regret = cum_regret / float(T)
93 | final_regret = best[-1] - performance[-1]
94 | print_progress('Cumulative Regret: {0}'.format(cum_regret), pp)
95 | print_progress('Average Regret: {0}'.format(avg_regret), pp)
96 | print_progress('Final Regret: {0}'.format(final_regret), pp)
97 | return cum_regret, avg_regret, final_regret
98 |
99 |
100 | def compute_chain(i_st, intervals, k, _print_progress=True):
101 | # Sort intervals by decreasing order.
102 | pp = _print_progress
103 | chain = [i_st]
104 | print_progress(intervals[:, 1], pp)
105 | ordering = np.argsort(intervals[:, 1])[::-1]
106 | intervals = intervals[ordering, :]
107 |
108 | lowest_in_chain = intervals[0][0]
109 | for i in range(1, k):
110 | if intervals[i][1] >= lowest_in_chain:
111 | chain.append(i)
112 | lowest_in_chain = min(lowest_in_chain, intervals[i][0])
113 | else:
114 | return chain
115 | return chain
116 |
117 |
118 | def interval_chaining(X, Y, c, k, d, _delta, T, _print_progress=True):
119 | """
120 | Simulates T rounds of TopInterval for k.
121 |
122 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
123 | each time-step and arm
124 | :param Y: a T x k ndarray of reward function output for each context vector
125 | :param k: the number of arms
126 | :param d: the number of features
127 | :param _delta: confidence parameter
128 | :param T: the number of iterations
129 | :param _print_progress: True if progress should be printed; False otherwise
130 | :returns: cum_regret (the total regret across all T runs of the algorithm),
131 | avg_regret (the regret averaged across all T runs of the algorithm),
132 | final_regret (the regret in the last round of the algorithm)
133 | """
134 | pp = _print_progress
135 | _eta = eta(T) # exploration cutoff probabilities
136 | picks = []
137 | for t in range(T):
138 | print_progress('Iteration [{0} / {1}]'.format(t, T), pp)
139 | if t <= d or np.random.rand() <= _eta[t]:
140 | # Play uniformly at random from [1, k].
141 | picks.append(np.random.randint(0, k))
142 | print_progress('Exploration round.', pp)
143 | else:
144 | intervals = []
145 | for i in range(k):
146 | # Compute beta hat.
147 | _Xti = X[:t+1, i]
148 | _XtiT = transpose(_Xti)
149 | try:
150 | _XTX = inv(_XtiT.dot(_Xti))
151 | except:
152 | print_progress('Encountered singular matrix. Ignoring.', pp)
153 | continue
154 | _Yti = Y[:t+1, i]
155 | Bh_t_i = _XTX.dot(_XtiT).dot(_Yti) # Compute OLS estimators.
156 | yh_t_i = Bh_t_i.dot(X[t, i])
157 | _s2 = np.var(Y[:t+1, i])
158 | # Compute the confidence interval width using the inverse CDF.
159 | w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0,
160 | scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i]))))
161 | intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i])
162 | # Pick the agent with the largest upper bound.
163 | if not intervals:
164 | picks.append(np.random.randint(0, k))
165 | else:
166 | i_st = np.argmax(np.array(intervals)[:, 1])
167 |
168 | # Chaining
169 | chain = compute_chain(i_st, np.array(intervals), k, pp)
170 | print_progress('Computed chain: {0}'.format(chain), pp)
171 | picks.append(np.random.choice(chain))
172 | print_progress('Intervals: {0}'.format(intervals), pp)
173 | # Compute sum of best picks over each iteration.
174 | best = [Y[i].max() for i in range(2, T)]
175 | performance = [Y[t][picks[t-2]] for t in range(2, T)]
176 | cum_regret = sum(best) - sum(performance)
177 | avg_regret = cum_regret / float(T)
178 | final_regret = best[-1] - performance[-1]
179 | print_progress('Cumulative Regret: {0}'.format(cum_regret), pp)
180 | print_progress('Average Regret: {0}'.format(avg_regret), pp)
181 | print_progress('Final Regret: {0}'.format(final_regret), pp)
182 | return cum_regret, avg_regret, final_regret
183 |
184 |
185 | def ridge_fair(X, Y, k, d, _delta, T, _lambda, _print_progress=True):
186 | """
187 | Simulates T rounds of ridge_fair.
188 |
189 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
190 | each time-step and arm
191 | :param Y: a T x k ndarray of reward function output for each context vector
192 | :param k: the number of arms
193 | :param d: the number of features
194 | :param _delta: confidence parameter
195 | :param T: the number of iterations
196 | :param _lambda: regularization paramameter
197 | """
198 | picks = []
199 | for t in range(T):
200 | for i in range(k):
201 | R = 1
202 | intervals = []
203 | try:
204 | X_i = X[:t, i] # design matrix
205 | Y_i = Y[:t, i] # same with Y
206 | x_ti = X[t, i] # feature vector for arm i in round t
207 |
208 | X_iT = transpose(X_i)
209 | _idenD = np.identity(d)
210 | V_it = X_iT.dot(X_i) + (_lambda * _idenD)
211 |
212 | B_it = inv(V_it).dot(X_iT).dot(Y_i)
213 |
214 | y_ti = transpose(x_ti).dot(B_it)
215 |
216 | V_itI = inv(V_it) # inverse of V_it
217 | _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti))
218 | _wti2 = R * sqrt(d * log((1 + (t / _lambda)) / _delta)) + sqrt(_lambda)
219 | w_ti = _wti1 * _wti2
220 |
221 | intervals.append([y_ti - w_ti, y_ti + w_ti])
222 | except:
223 | print_progress('Error in assigning interval value.', _print_progress)
224 | intervals = None
225 | break
226 | if not intervals:
227 | picks.append(np.random.randint(0, k))
228 | else:
229 | i_st = np.argmax(np.array(intervals)[:, 1])
230 | chain = compute_chain(i_st, np.array(intervals), k)
231 | # play uniformly random from chain
232 | picks.append(np.random.choice(chain))
233 |
234 | best = [Y[i].max() for i in range(2, T)]
235 | performance = [Y[t][picks[t - 2]] for t in range(2, T)]
236 | print_progress('Cumulative Regret: {0}'.format(sum(best) - sum(performance)), _print_progress)
237 | print_progress('Final Regret: {0}'.format(best[-1] - performance[-1]), _print_progress)
238 |
--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_diff.png
--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ic.png
--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ti.png
--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_diff.png
--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ic.png
--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ti.png
--------------------------------------------------------------------------------
/figures_T_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_diff.png
--------------------------------------------------------------------------------
/figures_T_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ic.png
--------------------------------------------------------------------------------
/figures_T_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ti.png
--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_diff.png
--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ic.png
--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ti.png
--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_diff.png
--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ic.png
--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ti.png
--------------------------------------------------------------------------------
/figures_d_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_diff.png
--------------------------------------------------------------------------------
/figures_d_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ic.png
--------------------------------------------------------------------------------
/figures_d_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ti.png
--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_diff.png
--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ic.png
--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ti.png
--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_diff.png
--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ic.png
--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ti.png
--------------------------------------------------------------------------------
/figures_k_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_diff.png
--------------------------------------------------------------------------------
/figures_k_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ic.png
--------------------------------------------------------------------------------
/figures_k_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ti.png
--------------------------------------------------------------------------------
/paper/.gitignore:
--------------------------------------------------------------------------------
1 | ## Core latex/pdflatex auxiliary files:
2 | *.aux
3 | *.lof
4 | *.log
5 | *.lot
6 | *.fls
7 | *.out
8 | *.toc
9 | *.fmt
10 | *.fot
11 | *.cb
12 | *.cb2
13 |
14 | ## Intermediate documents:
15 | *.dvi
16 | *-converted-to.*
17 | # these rules might exclude image files for figures etc.
18 | # *.ps
19 | # *.eps
20 | # *.pdf
21 |
22 | ## Generated if empty string is given at "Please type another file name for output:"
23 | .pdf
24 |
25 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
26 | *.bbl
27 | *.bcf
28 | *.blg
29 | *-blx.aux
30 | *-blx.bib
31 | *.run.xml
32 |
33 | ## Build tool auxiliary files:
34 | *.fdb_latexmk
35 | *.synctex
36 | *.synctex(busy)
37 | *.synctex.gz
38 | *.synctex.gz(busy)
39 | *.pdfsync
40 |
41 | ## Auxiliary and intermediate files from other packages:
42 | # algorithms
43 | *.alg
44 | *.loa
45 |
46 | # achemso
47 | acs-*.bib
48 |
49 | # amsthm
50 | *.thm
51 |
52 | # beamer
53 | *.nav
54 | *.pre
55 | *.snm
56 | *.vrb
57 |
58 | # changes
59 | *.soc
60 |
61 | # cprotect
62 | *.cpt
63 |
64 | # elsarticle (documentclass of Elsevier journals)
65 | *.spl
66 |
67 | # endnotes
68 | *.ent
69 |
70 | # fixme
71 | *.lox
72 |
73 | # feynmf/feynmp
74 | *.mf
75 | *.mp
76 | *.t[1-9]
77 | *.t[1-9][0-9]
78 | *.tfm
79 |
80 | #(r)(e)ledmac/(r)(e)ledpar
81 | *.end
82 | *.?end
83 | *.[1-9]
84 | *.[1-9][0-9]
85 | *.[1-9][0-9][0-9]
86 | *.[1-9]R
87 | *.[1-9][0-9]R
88 | *.[1-9][0-9][0-9]R
89 | *.eledsec[1-9]
90 | *.eledsec[1-9]R
91 | *.eledsec[1-9][0-9]
92 | *.eledsec[1-9][0-9]R
93 | *.eledsec[1-9][0-9][0-9]
94 | *.eledsec[1-9][0-9][0-9]R
95 |
96 | # glossaries
97 | *.acn
98 | *.acr
99 | *.glg
100 | *.glo
101 | *.gls
102 | *.glsdefs
103 |
104 | # gnuplottex
105 | *-gnuplottex-*
106 |
107 | # gregoriotex
108 | *.gaux
109 | *.gtex
110 |
111 | # hyperref
112 | *.brf
113 |
114 | # knitr
115 | *-concordance.tex
116 | # TODO Comment the next line if you want to keep your tikz graphics files
117 | *.tikz
118 | *-tikzDictionary
119 |
120 | # listings
121 | *.lol
122 |
123 | # makeidx
124 | *.idx
125 | *.ilg
126 | *.ind
127 | *.ist
128 |
129 | # minitoc
130 | *.maf
131 | *.mlf
132 | *.mlt
133 | *.mtc[0-9]*
134 | *.slf[0-9]*
135 | *.slt[0-9]*
136 | *.stc[0-9]*
137 |
138 | # minted
139 | _minted*
140 | *.pyg
141 |
142 | # morewrites
143 | *.mw
144 |
145 | # nomencl
146 | *.nlo
147 |
148 | # pax
149 | *.pax
150 |
151 | # pdfpcnotes
152 | *.pdfpc
153 |
154 | # sagetex
155 | *.sagetex.sage
156 | *.sagetex.py
157 | *.sagetex.scmd
158 |
159 | # scrwfile
160 | *.wrt
161 |
162 | # sympy
163 | *.sout
164 | *.sympy
165 | sympy-plots-for-*.tex/
166 |
167 | # pdfcomment
168 | *.upa
169 | *.upb
170 |
171 | # pythontex
172 | *.pytxcode
173 | pythontex-files-*/
174 |
175 | # thmtools
176 | *.loe
177 |
178 | # TikZ & PGF
179 | *.dpth
180 | *.md5
181 | *.auxlock
182 |
183 | # todonotes
184 | *.tdo
185 |
186 | # easy-todo
187 | *.lod
188 |
189 | # xindy
190 | *.xdy
191 |
192 | # xypic precompiled matrices
193 | *.xyc
194 |
195 | # endfloat
196 | *.ttt
197 | *.fff
198 |
199 | # Latexian
200 | TSWLatexianTemp*
201 |
202 | ## Editors:
203 | # WinEdt
204 | *.bak
205 | *.sav
206 |
207 | # Texpad
208 | .texpadtmp
209 |
210 | # Kile
211 | *.backup
212 |
213 | # KBibTeX
214 | *~[0-9]*
215 |
216 | # auto folder when using emacs and auctex
217 | /auto/*
218 |
219 | # expex forward references with \gathertags
220 | *-tags.tex
221 |
--------------------------------------------------------------------------------
/paper/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all: prepare
3 |
4 | prepare:
5 | pdflatex paper.tex
6 |
7 | view: prepare
8 | open -a Skim paper.pdf
9 |
10 | develop: prepare
11 | fswatch -i 'paper.tex' -e '.*' . | xargs -t -n1 -I % bash -c "pdflatex % || osascript -e 'display notification \"Latex compilation failed\" with title \"ERROR\"'"
12 |
13 | clean:
14 | rm -rf *.aux *.listing *.pdf *.out *.log
15 |
--------------------------------------------------------------------------------
/paper/acl.bst:
--------------------------------------------------------------------------------
1 |
2 | % BibTeX `acl' style file for BibTeX version 0.99c, LaTeX version 2.09
3 | % This version was made by modifying `aaai-named' format based on the master
4 | % file by Oren Patashnik (PATASHNIK@SCORE.STANFORD.EDU)
5 |
6 | % Copyright (C) 1985, all rights reserved.
7 | % Modifications Copyright 1988, Peter F. Patel-Schneider
8 | % Further modifictions by Stuart Shieber, 1991, and Fernando Pereira, 1992.
9 | % Copying of this file is authorized only if either
10 | % (1) you make absolutely no changes to your copy, including name, or
11 | % (2) if you do make changes, you name it something other than
12 | % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
13 | % This restriction helps ensure that all standard styles are identical.
14 |
15 | % There are undoubtably bugs in this style. If you make bug fixes,
16 | % improvements, etc. please let me know. My e-mail address is:
17 | % pfps@spar.slb.com
18 |
19 | % Citation format: [author-last-name, year]
20 | % [author-last-name and author-last-name, year]
21 | % [author-last-name {\em et al.}, year]
22 | %
23 | % Reference list ordering: alphabetical by author or whatever passes
24 | % for author in the absence of one.
25 | %
26 | % This BibTeX style has support for short (year only) citations. This
27 | % is done by having the citations actually look like
28 | % \citename{name-info, }year
29 | % The LaTeX style has to have the following
30 | % \let\@internalcite\cite
31 | % \def\cite{\def\citename##1{##1}\@internalcite}
32 | % \def\shortcite{\def\citename##1{}\@internalcite}
33 | % \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
34 | % which makes \shortcite the macro for short citations.
35 |
36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37 | % Changes made by SMS for thesis style
38 | % no emphasis on "et al."
39 | % "Ph.D." includes periods (not "PhD")
40 | % moved year to immediately after author's name
41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
42 | ENTRY
43 | { address
44 | author
45 | booktitle
46 | chapter
47 | edition
48 | editor
49 | howpublished
50 | institution
51 | journal
52 | key
53 | month
54 | note
55 | number
56 | organization
57 | pages
58 | publisher
59 | school
60 | series
61 | title
62 | type
63 | volume
64 | year
65 | }
66 | {}
67 | { label extra.label sort.label }
68 |
69 | INTEGERS { output.state before.all mid.sentence after.sentence after.block }
70 |
71 | FUNCTION {init.state.consts}
72 | { #0 'before.all :=
73 | #1 'mid.sentence :=
74 | #2 'after.sentence :=
75 | #3 'after.block :=
76 | }
77 |
78 | STRINGS { s t }
79 |
80 | FUNCTION {output.nonnull}
81 | { 's :=
82 | output.state mid.sentence =
83 | { ", " * write$ }
84 | { output.state after.block =
85 | { add.period$ write$
86 | newline$
87 | "\newblock " write$
88 | }
89 | { output.state before.all =
90 | 'write$
91 | { add.period$ " " * write$ }
92 | if$
93 | }
94 | if$
95 | mid.sentence 'output.state :=
96 | }
97 | if$
98 | s
99 | }
100 |
101 | FUNCTION {output}
102 | { duplicate$ empty$
103 | 'pop$
104 | 'output.nonnull
105 | if$
106 | }
107 |
108 | FUNCTION {output.check}
109 | { 't :=
110 | duplicate$ empty$
111 | { pop$ "empty " t * " in " * cite$ * warning$ }
112 | 'output.nonnull
113 | if$
114 | }
115 |
116 | FUNCTION {output.bibitem}
117 | { newline$
118 |
119 | "\bibitem[" write$
120 | label write$
121 | "]{" write$
122 |
123 | cite$ write$
124 | "}" write$
125 | newline$
126 | ""
127 | before.all 'output.state :=
128 | }
129 |
130 | FUNCTION {fin.entry}
131 | { add.period$
132 | write$
133 | newline$
134 | }
135 |
136 | FUNCTION {new.block}
137 | { output.state before.all =
138 | 'skip$
139 | { after.block 'output.state := }
140 | if$
141 | }
142 |
143 | FUNCTION {new.sentence}
144 | { output.state after.block =
145 | 'skip$
146 | { output.state before.all =
147 | 'skip$
148 | { after.sentence 'output.state := }
149 | if$
150 | }
151 | if$
152 | }
153 |
154 | FUNCTION {not}
155 | { { #0 }
156 | { #1 }
157 | if$
158 | }
159 |
160 | FUNCTION {and}
161 | { 'skip$
162 | { pop$ #0 }
163 | if$
164 | }
165 |
166 | FUNCTION {or}
167 | { { pop$ #1 }
168 | 'skip$
169 | if$
170 | }
171 |
172 | FUNCTION {new.block.checka}
173 | { empty$
174 | 'skip$
175 | 'new.block
176 | if$
177 | }
178 |
179 | FUNCTION {new.block.checkb}
180 | { empty$
181 | swap$ empty$
182 | and
183 | 'skip$
184 | 'new.block
185 | if$
186 | }
187 |
188 | FUNCTION {new.sentence.checka}
189 | { empty$
190 | 'skip$
191 | 'new.sentence
192 | if$
193 | }
194 |
195 | FUNCTION {new.sentence.checkb}
196 | { empty$
197 | swap$ empty$
198 | and
199 | 'skip$
200 | 'new.sentence
201 | if$
202 | }
203 |
204 | FUNCTION {field.or.null}
205 | { duplicate$ empty$
206 | { pop$ "" }
207 | 'skip$
208 | if$
209 | }
210 |
211 | FUNCTION {emphasize}
212 | { duplicate$ empty$
213 | { pop$ "" }
214 | { "{\em " swap$ * "}" * }
215 | if$
216 | }
217 |
218 | INTEGERS { nameptr namesleft numnames }
219 |
220 | FUNCTION {format.names}
221 | { 's :=
222 | #1 'nameptr :=
223 | s num.names$ 'numnames :=
224 | numnames 'namesleft :=
225 | { namesleft #0 > }
226 |
227 | { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't :=
228 |
229 | nameptr #1 >
230 | { namesleft #1 >
231 | { ", " * t * }
232 | { numnames #2 >
233 | { "," * }
234 | 'skip$
235 | if$
236 | t "others" =
237 | { " et~al." * }
238 | { " and " * t * }
239 | if$
240 | }
241 | if$
242 | }
243 | 't
244 | if$
245 | nameptr #1 + 'nameptr :=
246 | namesleft #1 - 'namesleft :=
247 | }
248 | while$
249 | }
250 |
251 | FUNCTION {format.authors}
252 | { author empty$
253 | { "" }
254 | { author format.names }
255 | if$
256 | }
257 |
258 | FUNCTION {format.editors}
259 | { editor empty$
260 | { "" }
261 | { editor format.names
262 | editor num.names$ #1 >
263 | { ", editors" * }
264 | { ", editor" * }
265 | if$
266 | }
267 | if$
268 | }
269 |
270 | FUNCTION {format.title}
271 | { title empty$
272 | { "" }
273 |
274 | { title "t" change.case$ }
275 |
276 | if$
277 | }
278 |
279 | FUNCTION {n.dashify}
280 | { 't :=
281 | ""
282 | { t empty$ not }
283 | { t #1 #1 substring$ "-" =
284 | { t #1 #2 substring$ "--" = not
285 | { "--" *
286 | t #2 global.max$ substring$ 't :=
287 | }
288 | { { t #1 #1 substring$ "-" = }
289 | { "-" *
290 | t #2 global.max$ substring$ 't :=
291 | }
292 | while$
293 | }
294 | if$
295 | }
296 | { t #1 #1 substring$ *
297 | t #2 global.max$ substring$ 't :=
298 | }
299 | if$
300 | }
301 | while$
302 | }
303 |
304 | FUNCTION {format.date}
305 | { year empty$
306 | { month empty$
307 | { "" }
308 | { "there's a month but no year in " cite$ * warning$
309 | month
310 | }
311 | if$
312 | }
313 | { month empty$
314 | { "" }
315 | { month }
316 | if$
317 | }
318 | if$
319 | }
320 |
321 | FUNCTION {format.btitle}
322 | { title emphasize
323 | }
324 |
325 | FUNCTION {tie.or.space.connect}
326 | { duplicate$ text.length$ #3 <
327 | { "~" }
328 | { " " }
329 | if$
330 | swap$ * *
331 | }
332 |
333 | FUNCTION {either.or.check}
334 | { empty$
335 | 'pop$
336 | { "can't use both " swap$ * " fields in " * cite$ * warning$ }
337 | if$
338 | }
339 |
340 | FUNCTION {format.bvolume}
341 | { volume empty$
342 | { "" }
343 | { "volume" volume tie.or.space.connect
344 | series empty$
345 | 'skip$
346 | { " of " * series emphasize * }
347 | if$
348 | "volume and number" number either.or.check
349 | }
350 | if$
351 | }
352 |
353 | FUNCTION {format.number.series}
354 | { volume empty$
355 | { number empty$
356 | { series field.or.null }
357 | { output.state mid.sentence =
358 | { "number" }
359 | { "Number" }
360 | if$
361 | number tie.or.space.connect
362 | series empty$
363 | { "there's a number but no series in " cite$ * warning$ }
364 | { " in " * series * }
365 | if$
366 | }
367 | if$
368 | }
369 | { "" }
370 | if$
371 | }
372 |
373 | FUNCTION {format.edition}
374 | { edition empty$
375 | { "" }
376 | { output.state mid.sentence =
377 | { edition "l" change.case$ " edition" * }
378 | { edition "t" change.case$ " edition" * }
379 | if$
380 | }
381 | if$
382 | }
383 |
384 | INTEGERS { multiresult }
385 |
386 | FUNCTION {multi.page.check}
387 | { 't :=
388 | #0 'multiresult :=
389 | { multiresult not
390 | t empty$ not
391 | and
392 | }
393 | { t #1 #1 substring$
394 | duplicate$ "-" =
395 | swap$ duplicate$ "," =
396 | swap$ "+" =
397 | or or
398 | { #1 'multiresult := }
399 | { t #2 global.max$ substring$ 't := }
400 | if$
401 | }
402 | while$
403 | multiresult
404 | }
405 |
406 | FUNCTION {format.pages}
407 | { pages empty$
408 | { "" }
409 | { pages multi.page.check
410 | { "pages" pages n.dashify tie.or.space.connect }
411 | { "page" pages tie.or.space.connect }
412 | if$
413 | }
414 | if$
415 | }
416 |
417 | FUNCTION {format.year.label}
418 | { year extra.label *
419 | }
420 |
421 | FUNCTION {format.vol.num.pages}
422 | { volume field.or.null
423 | number empty$
424 | 'skip$
425 | { "(" number * ")" * *
426 | volume empty$
427 | { "there's a number but no volume in " cite$ * warning$ }
428 | 'skip$
429 | if$
430 | }
431 | if$
432 | pages empty$
433 | 'skip$
434 | { duplicate$ empty$
435 | { pop$ format.pages }
436 | { ":" * pages n.dashify * }
437 | if$
438 | }
439 | if$
440 | }
441 |
442 | FUNCTION {format.chapter.pages}
443 | { chapter empty$
444 | 'format.pages
445 | { type empty$
446 | { "chapter" }
447 | { type "l" change.case$ }
448 | if$
449 | chapter tie.or.space.connect
450 | pages empty$
451 | 'skip$
452 | { ", " * format.pages * }
453 | if$
454 | }
455 | if$
456 | }
457 |
458 | FUNCTION {format.in.ed.booktitle}
459 | { booktitle empty$
460 | { "" }
461 | { editor empty$
462 | { "In " booktitle emphasize * }
463 | { "In " format.editors * ", " * booktitle emphasize * }
464 | if$
465 | }
466 | if$
467 | }
468 |
469 | FUNCTION {empty.misc.check}
470 | { author empty$ title empty$ howpublished empty$
471 | month empty$ year empty$ note empty$
472 | and and and and and
473 |
474 | key empty$ not and
475 |
476 | { "all relevant fields are empty in " cite$ * warning$ }
477 | 'skip$
478 | if$
479 | }
480 |
481 | FUNCTION {format.thesis.type}
482 | { type empty$
483 | 'skip$
484 | { pop$
485 | type "t" change.case$
486 | }
487 | if$
488 | }
489 |
490 | FUNCTION {format.tr.number}
491 | { type empty$
492 | { "Technical Report" }
493 | 'type
494 | if$
495 | number empty$
496 | { "t" change.case$ }
497 | { number tie.or.space.connect }
498 | if$
499 | }
500 |
501 | FUNCTION {format.article.crossref}
502 | { key empty$
503 | { journal empty$
504 | { "need key or journal for " cite$ * " to crossref " * crossref *
505 | warning$
506 | ""
507 | }
508 | { "In {\em " journal * "\/}" * }
509 | if$
510 | }
511 | { "In " key * }
512 | if$
513 | " \cite{" * crossref * "}" *
514 | }
515 |
516 | FUNCTION {format.crossref.editor}
517 | { editor #1 "{vv~}{ll}" format.name$
518 | editor num.names$ duplicate$
519 | #2 >
520 | { pop$ " et~al." * }
521 | { #2 <
522 | 'skip$
523 | { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
524 | { " et~al." * }
525 | { " and " * editor #2 "{vv~}{ll}" format.name$ * }
526 | if$
527 | }
528 | if$
529 | }
530 | if$
531 | }
532 |
533 | FUNCTION {format.book.crossref}
534 | { volume empty$
535 | { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
536 | "In "
537 | }
538 | { "Volume" volume tie.or.space.connect
539 | " of " *
540 | }
541 | if$
542 | editor empty$
543 | editor field.or.null author field.or.null =
544 | or
545 | { key empty$
546 | { series empty$
547 | { "need editor, key, or series for " cite$ * " to crossref " *
548 | crossref * warning$
549 | "" *
550 | }
551 | { "{\em " * series * "\/}" * }
552 | if$
553 | }
554 | { key * }
555 | if$
556 | }
557 | { format.crossref.editor * }
558 | if$
559 | " \cite{" * crossref * "}" *
560 | }
561 |
562 | FUNCTION {format.incoll.inproc.crossref}
563 | { editor empty$
564 | editor field.or.null author field.or.null =
565 | or
566 | { key empty$
567 | { booktitle empty$
568 | { "need editor, key, or booktitle for " cite$ * " to crossref " *
569 | crossref * warning$
570 | ""
571 | }
572 | { "In {\em " booktitle * "\/}" * }
573 | if$
574 | }
575 | { "In " key * }
576 | if$
577 | }
578 | { "In " format.crossref.editor * }
579 | if$
580 | " \cite{" * crossref * "}" *
581 | }
582 |
583 | FUNCTION {article}
584 | { output.bibitem
585 | format.authors "author" output.check
586 | new.block
587 | format.year.label "year" output.check
588 | new.block
589 | format.title "title" output.check
590 | new.block
591 | crossref missing$
592 | { journal emphasize "journal" output.check
593 | format.vol.num.pages output
594 | format.date output
595 | }
596 | { format.article.crossref output.nonnull
597 | format.pages output
598 | }
599 | if$
600 | new.block
601 | note output
602 | fin.entry
603 | }
604 |
605 | FUNCTION {book}
606 | { output.bibitem
607 | author empty$
608 | { format.editors "author and editor" output.check }
609 | { format.authors output.nonnull
610 | crossref missing$
611 | { "author and editor" editor either.or.check }
612 | 'skip$
613 | if$
614 | }
615 | if$
616 | new.block
617 | format.year.label "year" output.check
618 | new.block
619 | format.btitle "title" output.check
620 | crossref missing$
621 | { format.bvolume output
622 | new.block
623 | format.number.series output
624 | new.sentence
625 | publisher "publisher" output.check
626 | address output
627 | }
628 | { new.block
629 | format.book.crossref output.nonnull
630 | }
631 | if$
632 | format.edition output
633 | format.date output
634 | new.block
635 | note output
636 | fin.entry
637 | }
638 |
639 | FUNCTION {booklet}
640 | { output.bibitem
641 | format.authors output
642 | new.block
643 | format.year.label "year" output.check
644 | new.block
645 | format.title "title" output.check
646 | howpublished address new.block.checkb
647 | howpublished output
648 | address output
649 | format.date output
650 | new.block
651 | note output
652 | fin.entry
653 | }
654 |
655 | FUNCTION {inbook}
656 | { output.bibitem
657 | author empty$
658 | { format.editors "author and editor" output.check }
659 | { format.authors output.nonnull
660 | crossref missing$
661 | { "author and editor" editor either.or.check }
662 | 'skip$
663 | if$
664 | }
665 | if$
666 | format.year.label "year" output.check
667 | new.block
668 | new.block
669 | format.btitle "title" output.check
670 | crossref missing$
671 | { format.bvolume output
672 | format.chapter.pages "chapter and pages" output.check
673 | new.block
674 | format.number.series output
675 | new.sentence
676 | publisher "publisher" output.check
677 | address output
678 | }
679 | { format.chapter.pages "chapter and pages" output.check
680 | new.block
681 | format.book.crossref output.nonnull
682 | }
683 | if$
684 | format.edition output
685 | format.date output
686 | new.block
687 | note output
688 | fin.entry
689 | }
690 |
691 | FUNCTION {incollection}
692 | { output.bibitem
693 | format.authors "author" output.check
694 | new.block
695 | format.year.label "year" output.check
696 | new.block
697 | format.title "title" output.check
698 | new.block
699 | crossref missing$
700 | { format.in.ed.booktitle "booktitle" output.check
701 | format.bvolume output
702 | format.number.series output
703 | format.chapter.pages output
704 | new.sentence
705 | publisher "publisher" output.check
706 | address output
707 | format.edition output
708 | format.date output
709 | }
710 | { format.incoll.inproc.crossref output.nonnull
711 | format.chapter.pages output
712 | }
713 | if$
714 | new.block
715 | note output
716 | fin.entry
717 | }
718 |
719 | FUNCTION {inproceedings}
720 | { output.bibitem
721 | format.authors "author" output.check
722 | new.block
723 | format.year.label "year" output.check
724 | new.block
725 | format.title "title" output.check
726 | new.block
727 | crossref missing$
728 | { format.in.ed.booktitle "booktitle" output.check
729 | format.bvolume output
730 | format.number.series output
731 | format.pages output
732 | address empty$
733 | { organization publisher new.sentence.checkb
734 | organization output
735 | publisher output
736 | format.date output
737 | }
738 | { address output.nonnull
739 | format.date output
740 | new.sentence
741 | organization output
742 | publisher output
743 | }
744 | if$
745 | }
746 | { format.incoll.inproc.crossref output.nonnull
747 | format.pages output
748 | }
749 | if$
750 | new.block
751 | note output
752 | fin.entry
753 | }
754 |
755 | FUNCTION {conference} { inproceedings }
756 |
757 | FUNCTION {manual}
758 | { output.bibitem
759 | author empty$
760 | { organization empty$
761 | 'skip$
762 | { organization output.nonnull
763 | address output
764 | }
765 | if$
766 | }
767 | { format.authors output.nonnull }
768 | if$
769 | format.year.label "year" output.check
770 | new.block
771 | new.block
772 | format.btitle "title" output.check
773 | author empty$
774 | { organization empty$
775 | { address new.block.checka
776 | address output
777 | }
778 | 'skip$
779 | if$
780 | }
781 | { organization address new.block.checkb
782 | organization output
783 | address output
784 | }
785 | if$
786 | format.edition output
787 | format.date output
788 | new.block
789 | note output
790 | fin.entry
791 | }
792 |
793 | FUNCTION {mastersthesis}
794 | { output.bibitem
795 | format.authors "author" output.check
796 | new.block
797 | format.year.label "year" output.check
798 | new.block
799 | format.title "title" output.check
800 | new.block
801 | "Master's thesis" format.thesis.type output.nonnull
802 | school "school" output.check
803 | address output
804 | format.date output
805 | new.block
806 | note output
807 | fin.entry
808 | }
809 |
810 | FUNCTION {misc}
811 | { output.bibitem
812 | format.authors output
813 | new.block
814 | format.year.label output
815 | new.block
816 | title howpublished new.block.checkb
817 | format.title output
818 | howpublished new.block.checka
819 | howpublished output
820 | format.date output
821 | new.block
822 | note output
823 | fin.entry
824 | empty.misc.check
825 | }
826 |
827 | FUNCTION {phdthesis}
828 | { output.bibitem
829 | format.authors "author" output.check
830 | new.block
831 | format.year.label "year" output.check
832 | new.block
833 | format.btitle "title" output.check
834 | new.block
835 | "{Ph.D.} thesis" format.thesis.type output.nonnull
836 | school "school" output.check
837 | address output
838 | format.date output
839 | new.block
840 | note output
841 | fin.entry
842 | }
843 |
844 | FUNCTION {proceedings}
845 | { output.bibitem
846 | editor empty$
847 | { organization output }
848 | { format.editors output.nonnull }
849 | if$
850 | new.block
851 | format.year.label "year" output.check
852 | new.block
853 | format.btitle "title" output.check
854 | format.bvolume output
855 | format.number.series output
856 | address empty$
857 | { editor empty$
858 | { publisher new.sentence.checka }
859 | { organization publisher new.sentence.checkb
860 | organization output
861 | }
862 | if$
863 | publisher output
864 | format.date output
865 | }
866 | { address output.nonnull
867 | format.date output
868 | new.sentence
869 | editor empty$
870 | 'skip$
871 | { organization output }
872 | if$
873 | publisher output
874 | }
875 | if$
876 | new.block
877 | note output
878 | fin.entry
879 | }
880 |
881 | FUNCTION {techreport}
882 | { output.bibitem
883 | format.authors "author" output.check
884 | new.block
885 | format.year.label "year" output.check
886 | new.block
887 | format.title "title" output.check
888 | new.block
889 | format.tr.number output.nonnull
890 | institution "institution" output.check
891 | address output
892 | format.date output
893 | new.block
894 | note output
895 | fin.entry
896 | }
897 |
898 | FUNCTION {unpublished}
899 | { output.bibitem
900 | format.authors "author" output.check
901 | new.block
902 | format.year.label "year" output.check
903 | new.block
904 | format.title "title" output.check
905 | new.block
906 | note "note" output.check
907 | format.date output
908 | fin.entry
909 | }
910 |
911 | FUNCTION {default.type} { misc }
912 |
913 | MACRO {jan} {"January"}
914 |
915 | MACRO {feb} {"February"}
916 |
917 | MACRO {mar} {"March"}
918 |
919 | MACRO {apr} {"April"}
920 |
921 | MACRO {may} {"May"}
922 |
923 | MACRO {jun} {"June"}
924 |
925 | MACRO {jul} {"July"}
926 |
927 | MACRO {aug} {"August"}
928 |
929 | MACRO {sep} {"September"}
930 |
931 | MACRO {oct} {"October"}
932 |
933 | MACRO {nov} {"November"}
934 |
935 | MACRO {dec} {"December"}
936 |
937 | MACRO {acmcs} {"ACM Computing Surveys"}
938 |
939 | MACRO {acta} {"Acta Informatica"}
940 |
941 | MACRO {cacm} {"Communications of the ACM"}
942 |
943 | MACRO {ibmjrd} {"IBM Journal of Research and Development"}
944 |
945 | MACRO {ibmsj} {"IBM Systems Journal"}
946 |
947 | MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
948 |
949 | MACRO {ieeetc} {"IEEE Transactions on Computers"}
950 |
951 | MACRO {ieeetcad}
952 | {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
953 |
954 | MACRO {ipl} {"Information Processing Letters"}
955 |
956 | MACRO {jacm} {"Journal of the ACM"}
957 |
958 | MACRO {jcss} {"Journal of Computer and System Sciences"}
959 |
960 | MACRO {scp} {"Science of Computer Programming"}
961 |
962 | MACRO {sicomp} {"SIAM Journal on Computing"}
963 |
964 | MACRO {tocs} {"ACM Transactions on Computer Systems"}
965 |
966 | MACRO {tods} {"ACM Transactions on Database Systems"}
967 |
968 | MACRO {tog} {"ACM Transactions on Graphics"}
969 |
970 | MACRO {toms} {"ACM Transactions on Mathematical Software"}
971 |
972 | MACRO {toois} {"ACM Transactions on Office Information Systems"}
973 |
974 | MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
975 |
976 | MACRO {tcs} {"Theoretical Computer Science"}
977 |
978 | READ
979 |
980 | FUNCTION {sortify}
981 | { purify$
982 | "l" change.case$
983 | }
984 |
985 | INTEGERS { len }
986 |
987 | FUNCTION {chop.word}
988 | { 's :=
989 | 'len :=
990 | s #1 len substring$ =
991 | { s len #1 + global.max$ substring$ }
992 | 's
993 | if$
994 | }
995 |
996 | INTEGERS { et.al.char.used }
997 |
998 | FUNCTION {initialize.et.al.char.used}
999 | { #0 'et.al.char.used :=
1000 | }
1001 |
1002 | EXECUTE {initialize.et.al.char.used}
1003 |
1004 | FUNCTION {format.lab.names}
1005 | { 's :=
1006 | s num.names$ 'numnames :=
1007 |
1008 | numnames #1 =
1009 | { s #1 "{vv }{ll}" format.name$ }
1010 | { numnames #2 =
1011 | { s #1 "{vv }{ll }and " format.name$ s #2 "{vv }{ll}" format.name$ *
1012 | }
1013 | { s #1 "{vv }{ll }\bgroup et al.\egroup " format.name$ }
1014 | if$
1015 | }
1016 | if$
1017 |
1018 | }
1019 |
1020 | FUNCTION {author.key.label}
1021 | { author empty$
1022 | { key empty$
1023 |
1024 | { cite$ #1 #3 substring$ }
1025 |
1026 | { key #3 text.prefix$ }
1027 | if$
1028 | }
1029 | { author format.lab.names }
1030 | if$
1031 | }
1032 |
1033 | FUNCTION {author.editor.key.label}
1034 | { author empty$
1035 | { editor empty$
1036 | { key empty$
1037 |
1038 | { cite$ #1 #3 substring$ }
1039 |
1040 | { key #3 text.prefix$ }
1041 | if$
1042 | }
1043 | { editor format.lab.names }
1044 | if$
1045 | }
1046 | { author format.lab.names }
1047 | if$
1048 | }
1049 |
1050 | FUNCTION {author.key.organization.label}
1051 | { author empty$
1052 | { key empty$
1053 | { organization empty$
1054 |
1055 | { cite$ #1 #3 substring$ }
1056 |
1057 | { "The " #4 organization chop.word #3 text.prefix$ }
1058 | if$
1059 | }
1060 | { key #3 text.prefix$ }
1061 | if$
1062 | }
1063 | { author format.lab.names }
1064 | if$
1065 | }
1066 |
1067 | FUNCTION {editor.key.organization.label}
1068 | { editor empty$
1069 | { key empty$
1070 | { organization empty$
1071 |
1072 | { cite$ #1 #3 substring$ }
1073 |
1074 | { "The " #4 organization chop.word #3 text.prefix$ }
1075 | if$
1076 | }
1077 | { key #3 text.prefix$ }
1078 | if$
1079 | }
1080 | { editor format.lab.names }
1081 | if$
1082 | }
1083 |
1084 | FUNCTION {calc.label}
1085 | { type$ "book" =
1086 | type$ "inbook" =
1087 | or
1088 | 'author.editor.key.label
1089 | { type$ "proceedings" =
1090 | 'editor.key.organization.label
1091 | { type$ "manual" =
1092 | 'author.key.organization.label
1093 | 'author.key.label
1094 | if$
1095 | }
1096 | if$
1097 | }
1098 | if$
1099 | duplicate$
1100 |
1101 | "\protect\citename{" swap$ * "}" *
1102 | year field.or.null purify$ *
1103 | 'label :=
1104 | year field.or.null purify$ *
1105 |
1106 | sortify 'sort.label :=
1107 | }
1108 |
1109 | FUNCTION {sort.format.names}
1110 | { 's :=
1111 | #1 'nameptr :=
1112 | ""
1113 | s num.names$ 'numnames :=
1114 | numnames 'namesleft :=
1115 | { namesleft #0 > }
1116 | { nameptr #1 >
1117 | { " " * }
1118 | 'skip$
1119 | if$
1120 |
1121 | s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't :=
1122 |
1123 | nameptr numnames = t "others" = and
1124 | { "et al" * }
1125 | { t sortify * }
1126 | if$
1127 | nameptr #1 + 'nameptr :=
1128 | namesleft #1 - 'namesleft :=
1129 | }
1130 | while$
1131 | }
1132 |
1133 | FUNCTION {sort.format.title}
1134 | { 't :=
1135 | "A " #2
1136 | "An " #3
1137 | "The " #4 t chop.word
1138 | chop.word
1139 | chop.word
1140 | sortify
1141 | #1 global.max$ substring$
1142 | }
1143 |
1144 | FUNCTION {author.sort}
1145 | { author empty$
1146 | { key empty$
1147 | { "to sort, need author or key in " cite$ * warning$
1148 | ""
1149 | }
1150 | { key sortify }
1151 | if$
1152 | }
1153 | { author sort.format.names }
1154 | if$
1155 | }
1156 |
1157 | FUNCTION {author.editor.sort}
1158 | { author empty$
1159 | { editor empty$
1160 | { key empty$
1161 | { "to sort, need author, editor, or key in " cite$ * warning$
1162 | ""
1163 | }
1164 | { key sortify }
1165 | if$
1166 | }
1167 | { editor sort.format.names }
1168 | if$
1169 | }
1170 | { author sort.format.names }
1171 | if$
1172 | }
1173 |
1174 | FUNCTION {author.organization.sort}
1175 | { author empty$
1176 | { organization empty$
1177 | { key empty$
1178 | { "to sort, need author, organization, or key in " cite$ * warning$
1179 | ""
1180 | }
1181 | { key sortify }
1182 | if$
1183 | }
1184 | { "The " #4 organization chop.word sortify }
1185 | if$
1186 | }
1187 | { author sort.format.names }
1188 | if$
1189 | }
1190 |
1191 | FUNCTION {editor.organization.sort}
1192 | { editor empty$
1193 | { organization empty$
1194 | { key empty$
1195 | { "to sort, need editor, organization, or key in " cite$ * warning$
1196 | ""
1197 | }
1198 | { key sortify }
1199 | if$
1200 | }
1201 | { "The " #4 organization chop.word sortify }
1202 | if$
1203 | }
1204 | { editor sort.format.names }
1205 | if$
1206 | }
1207 |
1208 | FUNCTION {presort}
1209 |
1210 | { calc.label
1211 | sort.label
1212 | " "
1213 | *
1214 | type$ "book" =
1215 |
1216 | type$ "inbook" =
1217 | or
1218 | 'author.editor.sort
1219 | { type$ "proceedings" =
1220 | 'editor.organization.sort
1221 | { type$ "manual" =
1222 | 'author.organization.sort
1223 | 'author.sort
1224 | if$
1225 | }
1226 | if$
1227 | }
1228 | if$
1229 |
1230 | *
1231 |
1232 | " "
1233 | *
1234 | year field.or.null sortify
1235 | *
1236 | " "
1237 | *
1238 | title field.or.null
1239 | sort.format.title
1240 | *
1241 | #1 entry.max$ substring$
1242 | 'sort.key$ :=
1243 | }
1244 |
1245 | ITERATE {presort}
1246 |
1247 | SORT
1248 |
1249 | STRINGS { longest.label last.sort.label next.extra }
1250 |
1251 | INTEGERS { longest.label.width last.extra.num }
1252 |
1253 | FUNCTION {initialize.longest.label}
1254 | { "" 'longest.label :=
1255 | #0 int.to.chr$ 'last.sort.label :=
1256 | "" 'next.extra :=
1257 | #0 'longest.label.width :=
1258 | #0 'last.extra.num :=
1259 | }
1260 |
1261 | FUNCTION {forward.pass}
1262 | { last.sort.label sort.label =
1263 | { last.extra.num #1 + 'last.extra.num :=
1264 | last.extra.num int.to.chr$ 'extra.label :=
1265 | }
1266 | { "a" chr.to.int$ 'last.extra.num :=
1267 | "" 'extra.label :=
1268 | sort.label 'last.sort.label :=
1269 | }
1270 | if$
1271 | }
1272 |
1273 | FUNCTION {reverse.pass}
1274 | { next.extra "b" =
1275 | { "a" 'extra.label := }
1276 | 'skip$
1277 | if$
1278 | label extra.label * 'label :=
1279 | label width$ longest.label.width >
1280 | { label 'longest.label :=
1281 | label width$ 'longest.label.width :=
1282 | }
1283 | 'skip$
1284 | if$
1285 | extra.label 'next.extra :=
1286 | }
1287 |
1288 | EXECUTE {initialize.longest.label}
1289 |
1290 | ITERATE {forward.pass}
1291 |
1292 | REVERSE {reverse.pass}
1293 |
1294 | FUNCTION {begin.bib}
1295 |
1296 | { et.al.char.used
1297 | { "\newcommand{\etalchar}[1]{$^{#1}$}" write$ newline$ }
1298 | 'skip$
1299 | if$
1300 | preamble$ empty$
1301 |
1302 | 'skip$
1303 | { preamble$ write$ newline$ }
1304 | if$
1305 |
1306 | "\begin{thebibliography}{" "}" * write$ newline$
1307 |
1308 | }
1309 |
1310 | EXECUTE {begin.bib}
1311 |
1312 | EXECUTE {init.state.consts}
1313 |
1314 | ITERATE {call.type$}
1315 |
1316 | FUNCTION {end.bib}
1317 | { newline$
1318 | "\end{thebibliography}" write$ newline$
1319 | }
1320 |
1321 | EXECUTE {end.bib}
1322 |
1323 |
--------------------------------------------------------------------------------
/paper/acl2015.sty:
--------------------------------------------------------------------------------
1 | % File acl2015.sty
2 | % December 2014
3 |
4 | % This is the LaTeX style file for ACL 2015. It is nearly identical to
5 | % the style files for ACL 2014, EACL 2006, ACL2005, ACL 2002, ACL
6 | % 2001, ACL 2000, EACL 95 and EACL 99.
7 | %
8 | % Changes made include: adapt layout to A4 and centimeters, widen abstract
9 |
10 | % This is the LaTeX style file for ACL 2000. It is nearly identical to the
11 | % style files for EACL 95 and EACL 99. Minor changes include editing the
12 | % instructions to reflect use of \documentclass rather than \documentstyle
13 | % and removing the white space before the title on the first page
14 | % -- John Chen, June 29, 2000
15 |
16 | % To convert from submissions prepared using the style file aclsub.sty
17 | % prepared for the ACL 2000 conference, proceed as follows:
18 | % 1) Remove submission-specific information: \whichsession, \id,
19 | % \wordcount, \otherconferences, \area, \keywords
20 | % 2) \summary should be removed. The summary material should come
21 | % after \maketitle and should be in the ``abstract'' environment
22 | % 3) Check all citations. This style should handle citations correctly
23 | % and also allows multiple citations separated by semicolons.
24 | % 4) Check figures and examples. Because the final format is double-
25 | % column, some adjustments may have to be made to fit text in the column
26 | % or to choose full-width (\figure*} figures.
27 | % 5) Change the style reference from aclsub to acl2000, and be sure
28 | % this style file is in your TeX search path
29 |
30 |
31 | % This is the LaTeX style file for EACL-95. It is identical to the
32 | % style file for ANLP '94 except that the margins are adjusted for A4
33 | % paper. -- abney 13 Dec 94
34 |
35 | % The ANLP '94 style file is a slightly modified
36 | % version of the style used for AAAI and IJCAI, using some changes
37 | % prepared by Fernando Pereira and others and some minor changes
38 | % by Paul Jacobs.
39 |
40 | % Papers prepared using the aclsub.sty file and acl.bst bibtex style
41 | % should be easily converted to final format using this style.
42 | % (1) Submission information (\wordcount, \subject, and \makeidpage)
43 | % should be removed.
44 | % (2) \summary should be removed. The summary material should come
45 | % after \maketitle and should be in the ``abstract'' environment
46 | % (between \begin{abstract} and \end{abstract}).
47 | % (3) Check all citations. This style should handle citations correctly
48 | % and also allows multiple citations separated by semicolons.
49 | % (4) Check figures and examples. Because the final format is double-
50 | % column, some adjustments may have to be made to fit text in the column
51 | % or to choose full-width (\figure*} figures.
52 |
53 | % Place this in a file called aclap.sty in the TeX search path.
54 | % (Placing it in the same directory as the paper should also work.)
55 |
56 | % Prepared by Peter F. Patel-Schneider, liberally using the ideas of
57 | % other style hackers, including Barbara Beeton.
58 | % This style is NOT guaranteed to work. It is provided in the hope
59 | % that it will make the preparation of papers easier.
60 | %
61 | % There are undoubtably bugs in this style. If you make bug fixes,
62 | % improvements, etc. please let me know. My e-mail address is:
63 | % pfps@research.att.com
64 |
65 | % Papers are to be prepared using the ``acl'' bibliography style,
66 | % as follows:
67 | % \documentclass[11pt]{article}
68 | % \usepackage{acl2000}
69 | % \title{Title}
70 | % \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
71 | % Author 3 \\ Address line \\ Address line}
72 | % \begin{document}
73 | % ...
74 | % \bibliography{bibliography-file}
75 | % \bibliographystyle{acl}
76 | % \end{document}
77 |
78 | % Author information can be set in various styles:
79 | % For several authors from the same institution:
80 | % \author{Author 1 \and ... \and Author n \\
81 | % Address line \\ ... \\ Address line}
82 | % if the names do not fit well on one line use
83 | % Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
84 | % For authors from different institutions:
85 | % \author{Author 1 \\ Address line \\ ... \\ Address line
86 | % \And ... \And
87 | % Author n \\ Address line \\ ... \\ Address line}
88 | % To start a seperate ``row'' of authors use \AND, as in
89 | % \author{Author 1 \\ Address line \\ ... \\ Address line
90 | % \AND
91 | % Author 2 \\ Address line \\ ... \\ Address line \And
92 | % Author 3 \\ Address line \\ ... \\ Address line}
93 |
94 | % If the title and author information does not fit in the area allocated,
95 | % place \setlength\titlebox{} right after
96 | % \usepackage{acl2015}
97 | % where can be something larger than 5cm
98 |
99 | \typeout{Conference Style for ACL 2015 -- released December 7, 2014}
100 |
101 | % NOTE: Some laser printers have a serious problem printing TeX output.
102 | % These printing devices, commonly known as ``write-white'' laser
103 | % printers, tend to make characters too light. To get around this
104 | % problem, a darker set of fonts must be created for these devices.
105 | %
106 |
107 |
108 |
109 | % A4 modified by Eneko; again modified by Alexander for 5cm titlebox
110 | \setlength{\paperwidth}{21cm} % A4
111 | \setlength{\paperheight}{29.7cm}% A4
112 | \setlength\topmargin{-0.5cm}
113 | \setlength\oddsidemargin{0cm}
114 | \setlength\textheight{24.7cm}
115 | \setlength\textwidth{16.0cm}
116 | \setlength\columnsep{0.6cm}
117 | \newlength\titlebox
118 | \setlength\titlebox{5cm}
119 | \setlength\headheight{5pt}
120 | \setlength\headsep{0pt}
121 | \thispagestyle{empty}
122 | \pagestyle{empty}
123 |
124 |
125 | \flushbottom \twocolumn \sloppy
126 |
127 | % We're never going to need a table of contents, so just flush it to
128 | % save space --- suggested by drstrip@sandia-2
129 | \def\addcontentsline#1#2#3{}
130 |
131 | % Title stuff, taken from deproc.
132 | \def\maketitle{\par
133 | \begingroup
134 | \def\thefootnote{\fnsymbol{footnote}}
135 | \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
136 | \twocolumn[\@maketitle] \@thanks
137 | \endgroup
138 | \setcounter{footnote}{0}
139 | \let\maketitle\relax \let\@maketitle\relax
140 | \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
141 | \def\@maketitle{\vbox to \titlebox{\hsize\textwidth
142 | \linewidth\hsize \vskip 0.125in minus 0.125in \centering
143 | {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
144 | {\def\and{\unskip\enspace{\rm and}\enspace}%
145 | \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
146 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
147 | \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
148 | \vskip 0.25in plus 1fil minus 0.125in
149 | \hbox to \linewidth\bgroup\large \hfil\hfil
150 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
151 | \hbox to \linewidth\bgroup\large \hfil\hfil
152 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author
153 | \end{tabular}\hss\egroup
154 | \hfil\hfil\egroup}
155 | \vskip 0.3in plus 2fil minus 0.1in
156 | }}
157 |
158 | % margins for abstract
159 | \renewenvironment{abstract}%
160 | {\centerline{\large\bf Abstract}%
161 | \begin{list}{}%
162 | {\setlength{\rightmargin}{0.6cm}%
163 | \setlength{\leftmargin}{0.6cm}}%
164 | \item[]\ignorespaces}%
165 | {\unskip\end{list}}
166 |
167 | %\renewenvironment{abstract}{\centerline{\large\bf
168 | % Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
169 |
170 |
171 | % bibliography
172 |
173 | \def\thebibliography#1{\section*{References}
174 | \global\def\@listi{\leftmargin\leftmargini
175 | \labelwidth\leftmargini \advance\labelwidth-\labelsep
176 | \topsep 1pt plus 2pt minus 1pt
177 | \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt}
178 | \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth
179 | \advance\leftmargin\labelsep\usecounter{enumi}}
180 | \def\newblock{\hskip .11em plus .33em minus -.07em}
181 | \sloppy
182 | \sfcode`\.=1000\relax}
183 |
184 | \def\@up#1{\raise.2ex\hbox{#1}}
185 |
186 | % most of cite format is from aclsub.sty by SMS
187 |
188 | % don't box citations, separate with ; and a space
189 | % also, make the penalty between citations negative: a good place to break
190 | % changed comma back to semicolon pj 2/1/90
191 | % \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
192 | % \def\@citea{}\@cite{\@for\@citeb:=#2\do
193 | % {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined
194 | % {b@\@citeb}{{\bf ?}\@warning
195 | % {Citation `\@citeb' on page \thepage \space undefined}}%
196 | % {\csname b@\@citeb\endcsname}}}{#1}}
197 |
198 | % don't box citations, separate with ; and a space
199 | % Replaced for multiple citations (pj)
200 | % don't box citations and also add space, semicolon between multiple citations
201 | \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
202 | \def\@citea{}\@cite{\@for\@citeb:=#2\do
203 | {\@citea\def\@citea{; }\@ifundefined
204 | {b@\@citeb}{{\bf ?}\@warning
205 | {Citation `\@citeb' on page \thepage \space undefined}}%
206 | {\csname b@\@citeb\endcsname}}}{#1}}
207 |
208 | % Allow short (name-less) citations, when used in
209 | % conjunction with a bibliography style that creates labels like
210 | % \citename{, }
211 | %
212 | \let\@internalcite\cite
213 | \def\cite{\def\citename##1{##1, }\@internalcite}
214 | \def\shortcite{\def\citename##1{}\@internalcite}
215 | \def\newcite{\def\citename##1{{\frenchspacing##1} (}\@internalciteb}
216 |
217 | % Macros for \newcite, which leaves name in running text, and is
218 | % otherwise like \shortcite.
219 | \def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
220 | \def\@citea{}\@newcite{\@for\@citeb:=#2\do
221 | {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined
222 | {b@\@citeb}{{\bf ?}\@warning
223 | {Citation `\@citeb' on page \thepage \space undefined}}%
224 | {\csname b@\@citeb\endcsname}}}{#1}}
225 | \def\@internalciteb{\@ifnextchar [{\@tempswatrue\@citexb}{\@tempswafalse\@citexb[]}}
226 |
227 | \def\@newcite#1#2{{#1\if@tempswa, #2\fi)}}
228 |
229 | \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
230 |
231 | %%% More changes made by SMS (originals in latex.tex)
232 | % Use parentheses instead of square brackets in the text.
233 | \def\@cite#1#2{({#1\if@tempswa , #2\fi})}
234 |
235 | % Don't put a label in the bibliography at all. Just use the unlabeled format
236 | % instead.
237 | \def\thebibliography#1{\vskip\parskip%
238 | \vskip\baselineskip%
239 | \def\baselinestretch{1}%
240 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
241 | \vskip-\parskip%
242 | \vskip-\baselineskip%
243 | \section*{References\@mkboth
244 | {References}{References}}\list
245 | {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
246 | \setlength{\itemindent}{-\parindent}}
247 | \def\newblock{\hskip .11em plus .33em minus -.07em}
248 | \sloppy\clubpenalty4000\widowpenalty4000
249 | \sfcode`\.=1000\relax}
250 | \let\endthebibliography=\endlist
251 |
252 | % Allow for a bibliography of sources of attested examples
253 | \def\thesourcebibliography#1{\vskip\parskip%
254 | \vskip\baselineskip%
255 | \def\baselinestretch{1}%
256 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
257 | \vskip-\parskip%
258 | \vskip-\baselineskip%
259 | \section*{Sources of Attested Examples\@mkboth
260 | {Sources of Attested Examples}{Sources of Attested Examples}}\list
261 | {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
262 | \setlength{\itemindent}{-\parindent}}
263 | \def\newblock{\hskip .11em plus .33em minus -.07em}
264 | \sloppy\clubpenalty4000\widowpenalty4000
265 | \sfcode`\.=1000\relax}
266 | \let\endthesourcebibliography=\endlist
267 |
268 | \def\@lbibitem[#1]#2{\item[]\if@filesw
269 | { \def\protect##1{\string ##1\space}\immediate
270 | \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}}
271 |
272 | \def\@bibitem#1{\item\if@filesw \immediate\write\@auxout
273 | {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces}
274 |
275 | % sections with less space
276 | \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
277 | -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
278 | \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
279 | -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
280 | %% changed by KO to - values to get teh initial parindent right
281 | \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus
282 | -0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
283 | \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
284 | 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
285 | \def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
286 | 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
287 |
288 | % Footnotes
289 | \footnotesep 6.65pt %
290 | \skip\footins 9pt plus 4pt minus 2pt
291 | \def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
292 | \setcounter{footnote}{0}
293 |
294 | % Lists and paragraphs
295 | \parindent 1em
296 | \topsep 4pt plus 1pt minus 2pt
297 | \partopsep 1pt plus 0.5pt minus 0.5pt
298 | \itemsep 2pt plus 1pt minus 0.5pt
299 | \parsep 2pt plus 1pt minus 0.5pt
300 |
301 | \leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
302 | \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
303 | \labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
304 |
305 | \def\@listi{\leftmargin\leftmargini}
306 | \def\@listii{\leftmargin\leftmarginii
307 | \labelwidth\leftmarginii\advance\labelwidth-\labelsep
308 | \topsep 2pt plus 1pt minus 0.5pt
309 | \parsep 1pt plus 0.5pt minus 0.5pt
310 | \itemsep \parsep}
311 | \def\@listiii{\leftmargin\leftmarginiii
312 | \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
313 | \topsep 1pt plus 0.5pt minus 0.5pt
314 | \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
315 | \itemsep \topsep}
316 | \def\@listiv{\leftmargin\leftmarginiv
317 | \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
318 | \def\@listv{\leftmargin\leftmarginv
319 | \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
320 | \def\@listvi{\leftmargin\leftmarginvi
321 | \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
322 |
323 | \abovedisplayskip 7pt plus2pt minus5pt%
324 | \belowdisplayskip \abovedisplayskip
325 | \abovedisplayshortskip 0pt plus3pt%
326 | \belowdisplayshortskip 4pt plus3pt minus3pt%
327 |
328 | % Less leading in most fonts (due to the narrow columns)
329 | % The choices were between 1-pt and 1.5-pt leading
330 | \def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
331 | \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
332 | \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
333 | \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
334 | \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
335 | \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
336 | \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
337 | \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
338 | \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
339 | \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
340 |
--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_diff.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ic.png
--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ti.png
--------------------------------------------------------------------------------
/paper/figures/yahoo-interval-chaining.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-interval-chaining.png
--------------------------------------------------------------------------------
/paper/figures/yahoo-top-interval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-top-interval.png
--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
1 | @article{DBLP:journals/corr/JosephKMNR16,
2 | author = {Matthew Joseph and
3 | Michael Kearns and
4 | Jamie Morgenstern and
5 | Seth Neel and
6 | Aaron Roth},
7 | title = {Rawlsian Fairness for Machine Learning},
8 | journal = {CoRR},
9 | volume = {abs/1610.09559},
10 | year = {2016},
11 | url = {http://arxiv.org/abs/1610.09559},
12 | timestamp = {Wed, 02 Nov 2016 09:51:26 +0100},
13 | biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/JosephKMNR16},
14 | bibsource = {dblp computer science bibliography, http://dblp.org}
15 | }
16 |
17 | @article{DBLP:journals/corr/abs-1003-5956,
18 | author = {Lihong Li and
19 | Wei Chu and
20 | John Langford},
21 | title = {An Unbiased, Data-Driven, Offline Evaluation Method of Contextual
22 | Bandit Algorithms},
23 | journal = {CoRR},
24 | volume = {abs/1003.5956},
25 | year = {2010},
26 | url = {http://arxiv.org/abs/1003.5956},
27 | timestamp = {Mon, 05 Dec 2011 18:04:18 +0100},
28 | biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1003-5956},
29 | bibsource = {dblp computer science bibliography, http://dblp.org}
30 | }
31 |
32 | @misc{yahoo,
33 | title = {Yahoo! Front Page Today Module User Click Log Dataset},
34 | author = {Yahoo!},
35 | howpublished = {\url{https://webscope.sandbox.yahoo.com/catalog.php?datatype=r}},
36 | note = {Accessed: 2017-04-03},
37 | year = {2009}
38 | }
39 |
--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/paper.pdf
--------------------------------------------------------------------------------
/paper/paper.tex:
--------------------------------------------------------------------------------
1 | %
2 | % File acl2015.tex
3 | %
4 | % Contact: car@ir.hit.edu.cn, gdzhou@suda.edu.cn
5 | %%
6 | %% Based on the style files for ACL-2014, which were, in turn,
7 | %% Based on the style files for ACL-2013, which were, in turn,
8 | %% Based on the style files for ACL-2012, which were, in turn,
9 | %% based on the style files for ACL-2011, which were, in turn,
10 | %% based on the style files for ACL-2010, which were, in turn,
11 | %% based on the style files for ACL-IJCNLP-2009, which were, in turn,
12 | %% based on the style files for EACL-2009 and IJCNLP-2008...
13 |
14 | %% Based on the style files for EACL 2006 by
15 | %%e.agirre@ehu.es or Sergi.Balari@uab.es
16 | %% and that of ACL 08 by Joakim Nivre and Noah Smith
17 |
18 | \documentclass[11pt]{article}
19 | \usepackage{acl2015}
20 | \usepackage{times}
21 | \usepackage{url}
22 | \usepackage{latexsym}
23 | \usepackage{hyperref}
24 | \usepackage{tikz}
25 | \usepackage{amsmath}
26 | \usepackage{tabulary}
27 |
28 | \usepackage[labelsep=quad,indention=10pt]{subfig}
29 | \captionsetup*[subfigure]{position=bottom}
30 |
31 | \newcommand{\specialcell}[2][c]{%
32 | \begin{tabular}[#1]{@{}c@{}}#2\end{tabular}}
33 |
34 | \usepackage{graphicx}
35 | \graphicspath{{figures/}}
36 | \DeclareGraphicsExtensions{.eps,.pdf,.jpg,.png}
37 |
38 | \DeclareMathOperator{\wsim}{sim}
39 |
40 | %\setlength\titlebox{5cm}
41 |
42 | % You can expand the titlebox if you need extra space
43 | % to show all the authors. Please do not make the titlebox
44 | % smaller than 5cm (the original size); we will check this
45 | % in the camera-ready version and ask you to change it back.
46 |
47 | \title{Further Empirical Analyses of Rawlsian Fairness for Machine Learning}
48 |
49 | \author{JT Cho \\
50 | {\tt joncho@} \\
51 | {\tt seas.upenn.edu} \\\And
52 | Karinna Loo \\
53 | {\tt kloo@} \\
54 | {\tt seas.upenn.edu} \\\And
55 | Veronica Wharton \\
56 | {\tt whartonv@} \\
57 | {\tt seas.upenn.edu} }
58 | \date{}
59 |
60 | \begin{document}
61 | \maketitle
62 |
63 | %\begin{abstract}
64 |
65 | %\noindent TODO: Abstract
66 |
67 | %\end{abstract}
68 |
69 | \section{Introduction}
70 |
71 | For our CIS 625 final project, our team --- JT Cho, Karinna Loo, and Veronica Wharton --- took a closer look at the topic of fairness in machine learning. The paper that piqued our interest was \textit{Rawlsian Fairness for Machine learning} \cite{DBLP:journals/corr/JosephKMNR16}, which describes two online algorithms in the linear contextual bandit framework that both learn at a rate comparable to (but necessarily worse than) the best algorithms absent of a fairness constraint and also satisfy a specified fairness constraint. The authors present theoretical and empirical results. Our team sought to re-implement the algorithms presented by \newcite{DBLP:journals/corr/JosephKMNR16} and then expand upon their empirical analyses. We were also interested in exploring further fairness analyses using real-world data.
72 |
73 | \section{Project overview}
74 |
75 | Our project consisted of the following steps:
76 |
77 | \begin{enumerate}
78 | \item We read the paper \textit{Rawlsian Fairness for Machine Learning} \cite{DBLP:journals/corr/JosephKMNR16}.
79 | \item We implemented the \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair} algorithms from the paper in Python.
80 | \item We ran our implementations on a Yahoo! dataset containing a fraction of the user click log for news articles displayed in the Featured Tab of the Today Module on the Yahoo! Front Page during the first ten days in May 2009 \cite{yahoo}, to see how well they performed on real data.
81 | \item To empirically evaluate our implementations, we ran experiments similar to those in \cite{DBLP:journals/corr/JosephKMNR16} with randomly-drawn contexts.
82 | \item We compiled our findings into a written report.
83 | \end{enumerate}
84 |
85 | \section{Algorithm implementations}
86 |
87 | The code for our implementations can be found here: \url{https://github.com/jtcho/FairMachineLearning/blob/master/fairml.py}
88 |
89 | All algorithms and code were written using Python 3, along with NumPy\footnote{http://www.numpy.org}, SciPy\footnote{https://www.scipy.org}, and various other Python libraries.
90 |
91 | \section{Implementation: TopInterval}
92 |
93 | The \textsc{TopInterval} learning algorithm was implemented true to form as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. Particular details of note --- to ensure that all matrices used in computation were nonsingular, the first $d$ rounds are always chosen to be exploration rounds, where $d$ is the number of features. Additionally, we found it necessary to pick each arm once in order to observe data for each.
94 |
95 | \section{Implementation: IntervalChaining}
96 |
97 | The implementation for \textsc{IntervalChaining} was simple given \textsc{TopInterval}, as it sufficed to alter the strategy for picking arms in each round to that of picking uniformly at random from the chain containing the top interval.
98 |
99 | \section{Implementation: RidgeFair}
100 |
101 | The \textsc{RidgeFair} algorithm was also implemented as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. This algorithm is very similar in implementation to \textsc{IntervalChaining}, save that it’s narrower confidence intervals allow for derivation of tighter regret bounds. A couple small details to note are, first, that we assume for simplicity (and without loss of generality) that the noise is $R$ sub-Gaussian with parameter $R = 1$ and second, that we play uniformly at random among all arms in the set of actions chained to the max.
102 |
103 | \section{Yahoo! Dataset}
104 |
105 | To expand upon the initial work done by \newcite{DBLP:journals/corr/JosephKMNR16}, we endeavored to test the presented algorithms on a real dataset. A Yahoo! dataset containing logs of user-visits to the front page was procured to evaluate our contextual bandits algorithms \cite{yahoo}. Each log entry details the following:
106 |
107 | \begin{center}
108 | \begin{table}[h]
109 | \fontsize{6}{10}\selectfont
110 | \begin{tabulary}{0.8\textwidth}{|l|l|l|l|l|}
111 | \hline \textbf{unix\_timestamp} & \textbf{displayed\_id} & \textbf{user\_clicked} & \textbf{user\_features} & \textbf{article\_pool}\\\hline
112 | 1241162400&109513&0&$\dots$&[$\dots$]\\\hline
113 | \end{tabulary}
114 | \end{table}
115 | \end{center}
116 |
117 | In each event, a user specified by $6$ features is presented an article from a pool of around $20$ distinct articles, each of which has their own $6$-dimensional feature vector. The event also tracks whether the user clicked the featured article or not.
118 |
119 | In a fashion similar to that presented in \newcite{DBLP:journals/corr/abs-1003-5956}, we devised an evaluation scheme for the various learning algorithms. In our procedure, a random sample is drawn from the set of logged events. The learning algorithm scans through the sampled events linearly, evaluating its predictions for each one. If there happens to be a match between the algorithm's picked arm and the article displayed in the event, the logged event is added to the history.
120 |
121 | Initial attempts to use this approach failed for a couple of reasons. First, the Yahoo! dataset contains a highly disproportionate number of negative samples with respect to positive ones. Therefore, our learning algorithm would not retain useful information over a number of iterations due to only being trained on negative samples. Second, a direct application of the \textsc{TopInterval} and \textsc{IntervalChaining} algorithms relies on the assumption of $20$ distinct underlying groups from which the articles were chosen to be in the article pool, each with their own distinct quality function. This assumption was found to be unreasonable, as we found that an article's index in the article pool had no bearing on its actual likelihood of being clicked by the user when picked. The initial context also does not work well with a fairness analysis. As a consequence, we saw that direct applications of the learning algorithms saw very poor performance.
122 |
123 | To mitigate the first issue, we elected to alter our sampling procedure to separately sample positive and negative samples, and then shuffle them together. A brief argument towards the validity of this approach follows. While the underlying distribution of observed user visits saw mostly negative results, the algorithms performance should be independent of whatever underlying distribution there is - taking into account exclusively the user's features and the articles it is choosing from. Hence, curating the input to the learning algorithm such that it learns equally from both the positive and negative events suffices.
124 |
125 | To resolve the second issue, we made a simplification to the problem context by clustering the articles. Across the million and a half logged events, there are approximately $20$ distinct articles in the article pools. In choosing a smaller number of clusters, we altered the scenario such that a successful event would be if the user clicked an article that was from the same pool chosen by the algorithm. In grouping the articles together, we reduced the number of available arms and also developed the notion of ``groups" implicit in \newcite{DBLP:journals/corr/JosephKMNR16}'s contextual bandits framework. The emergent notion of fairness then lies in discrimination against any particular cluster of articles.
126 |
127 | These modifications resulted in significant improvements in the performance of our implementations on the Yahoo! dataset, as shown in Figure ~\ref{fig:yahoo} below.
128 |
129 | Another novel modification we made was the use of a logit model instead of the simple linear regression used in \newcite{DBLP:journals/corr/JosephKMNR16}. We preserve the original fairness argument of the \textsc{IntervalChaining} algorithm by simply rescaling the output of the OLS estimator and the confidence intervals to $[0, 1]$ via the inverse logit. That is,
130 | $$w_{t,i} = \mathcal{Q}_{\mathcal{F}_{t,i}}(\frac{\delta}{2kT})$$
131 | $$[\ell_{i}^{t}, u_{i}^{t}] = [\Phi(\hat{y}_{t,i} - w_{t,i}), \Phi(\hat{y}_{t,i} + w_{t,i})]$$
132 | where $\Phi(x) = \frac{e^{x}}{1 + e^{x}} = \text{logistic}(x)$. It suffices to note that both OLS and logistic regression are variations of the generalized linear model (GLM).
133 |
134 | \begin{figure*}
135 | \includegraphics[width=\textwidth]{yahoo-interval-chaining.png}
136 | \caption{Performance metrics of the logistic-regression based interval-chaining algorithm with $3$ clusters over 10,000 iterations. Shown on the left is a graph depicting the performance of the learning algorithm vs that of the ``best" player, whose picked article is clicked by the user in every round. The regret is simply the difference in the cumulative number of successes between the two. In practice, this is an unfair comparison to make, as it is unreasonable to expect that the user would click the featured article every visit - and our results stand even stronger in comparison. On the right is a graph denoting the cumulative fraction of successful picks by the algorithm vs. the baseline (randomly selecting one out of the three pools at each step). The learning algorithm appears to converge to approximately $50\%$ accuracy, which is considerably higher than the baseline. \label{fig:yahoo}}
137 | \end{figure*}
138 |
139 | \section{Experimental results}
140 |
141 | We ran experiments that compared the regret of \textsc{IntervalChaining} (IC) with the regret of \textsc{TopInterval} (TI). As in \newcite{DBLP:journals/corr/JosephKMNR16}, we present three sets of empirical results:
142 | \begin{itemize}
143 | \item Varying $T$ (the number of rounds) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $T$. (See Figure \ref{fig:free_T}.)
144 | \item Varying $k$ (the number of arms/groups) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $k$. (See Figure \ref{fig:free_k}.)
145 | \item Varying $d$ (the number of features) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $d$. (See Figure \ref{fig:free_d}.)
146 | \end{itemize}
147 |
148 | For each increasing variable ($T$, $k$, or $d$), we present nine metrics as a function of the variable, each averaged over 50 trials. Contexts are drawn uniformly at random from $[0,1]^d$ and standard Gaussian noise. \newcite{DBLP:journals/corr/JosephKMNR16} only present the average regret difference (metric \#3).
149 | \begin{enumerate}
150 | \item Average regret (TI) - the average regret of \textsc{TopInterval} across all rounds.
151 | \item Average regret (IC) - the average regret of \textsc{IntervalChaining} across all rounds.
152 | \item Average regret difference (TI vs. IC) - the difference between the average regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds.
153 | \item Cumulative regret (TI) - the cumulative regret of \textsc{TopInterval} across all rounds.
154 | \item Cumulative regret (IC) - the cumulative regret of \textsc{IntervalChaining} across all rounds.
155 | \item Cumulative regret difference (TI vs. IC) - the difference between the cumulative regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds.
156 | \item Final regret (TI) - the regret of \textsc{TopInterval} in the final round.
157 | \item Final regret (IC) - the regret of \textsc{IntervalChaining} in the final round.
158 | \item Final regret difference (TI vs. IC) - the difference between the final regrets of \textsc{TopInterval} and \textsc{IntervalChaining}.
159 | \end{enumerate}
160 |
161 | We present our results in Figures \ref{fig:free_T}, \ref{fig:free_k}, and \ref{fig:free_d}.
162 |
163 | \begin{figure*}[ht!]
164 | \centering
165 | \subfloat{ %
166 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ti}
167 | }
168 | \subfloat{ %
169 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ic}
170 | }
171 | \subfloat{ %
172 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_diff}
173 | }
174 | \\
175 | \subfloat{ %
176 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ti}
177 | }
178 | \subfloat{ %
179 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ic}
180 | }
181 | \subfloat{ %
182 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_diff}
183 | }
184 | \\
185 | \subfloat{ %
186 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ti}
187 | }
188 | \subfloat{ %
189 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ic}
190 | }
191 | \subfloat{ %
192 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_diff}
193 | }
194 | \caption{$d=2$, $k=2$, free $T$}
195 | \label{fig:free_T}
196 | \end{figure*}
197 |
198 | \begin{figure*}[ht!]
199 | \centering
200 | \subfloat{ %
201 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ti}
202 | }
203 | \subfloat{ %
204 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ic}
205 | }
206 | \subfloat{ %
207 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_diff}
208 | }
209 | \\
210 | \subfloat{ %
211 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ti}
212 | }
213 | \subfloat{ %
214 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ic}
215 | }
216 | \subfloat{ %
217 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_diff}
218 | }
219 | \\
220 | \subfloat{ %
221 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ti}
222 | }
223 | \subfloat{ %
224 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ic}
225 | }
226 | \subfloat{ %
227 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_diff}
228 | }
229 | \caption{$d=2$, $T=1000$, free $k$}
230 | \label{fig:free_k}
231 | \end{figure*}
232 |
233 | \begin{figure*}[ht!]
234 | \centering
235 | \subfloat{ %
236 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ti}
237 | }
238 | \subfloat{ %
239 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ic}
240 | }
241 | \subfloat{ %
242 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_diff}
243 | }
244 | \\
245 | \subfloat{ %
246 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ti}
247 | }
248 | \subfloat{ %
249 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ic}
250 | }
251 | \subfloat{ %
252 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_diff}
253 | }
254 | \\
255 | \subfloat{ %
256 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ti}
257 | }
258 | \subfloat{ %
259 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ic}
260 | }
261 | \subfloat{ %
262 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_diff}
263 | }
264 | \caption{$k=2$, $T=1000$, free $d$}
265 | \label{fig:free_d}
266 | \end{figure*}
267 |
268 | \section{Conclusion}
269 |
270 | In this work, we present an empirical extension of the work done by \newcite{DBLP:journals/corr/JosephKMNR16} in their paper \textit{Rawlsian Fairness for Machine Learning}. Specifically, we present implementations of their algorithms \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair}; a case study in which we apply the aforementioned algorithms to a Yahoo! clicks dataset; and an extension of one of \newcite{DBLP:journals/corr/JosephKMNR16}'s empirical analyses on randomly generated data. We believe that our results may be useful should these algorithms be used in future real-world settings.
271 |
272 | \bibliography{paper}
273 | \bibliographystyle{acl}
274 |
275 | \end{document}
276 |
--------------------------------------------------------------------------------
/references/rawlsian_fairness.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/references/rawlsian_fairness.pdf
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appdirs==1.4.3
2 | appnope==0.1.0
3 | bleach==2.0.0
4 | cycler==0.10.0
5 | decorator==4.0.11
6 | entrypoints==0.2.2
7 | html5lib==0.999999999
8 | ipykernel==4.6.1
9 | ipython==6.0.0
10 | ipython-genutils==0.2.0
11 | ipywidgets==6.0.0
12 | jedi==0.10.2
13 | Jinja2==2.9.6
14 | jsonschema==2.6.0
15 | jupyter==1.0.0
16 | jupyter-client==5.0.1
17 | jupyter-console==5.1.0
18 | jupyter-core==4.3.0
19 | MarkupSafe==1.0
20 | matplotlib==2.0.1
21 | mistune==0.7.4
22 | nbconvert==5.1.1
23 | nbformat==4.3.0
24 | notebook==5.0.0
25 | numpy==1.12.1
26 | packaging==16.8
27 | pandas==0.19.2
28 | pandocfilters==1.4.1
29 | pexpect==4.2.1
30 | pickleshare==0.7.4
31 | prompt-toolkit==1.0.14
32 | ptyprocess==0.5.1
33 | Pygments==2.2.0
34 | pyparsing==2.2.0
35 | python-dateutil==2.6.0
36 | pytz==2017.2
37 | pyzmq==16.0.2
38 | qtconsole==4.3.0
39 | scikit-learn==0.18.1
40 | scipy==0.19.0
41 | simplegeneric==0.8.1
42 | six==1.10.0
43 | terminado==0.6
44 | testpath==0.3
45 | tornado==4.5.1
46 | traitlets==4.3.2
47 | virtualenv==15.1.0
48 | wcwidth==0.1.7
49 | webencodings==0.5.1
50 | widgetsnbextension==2.0.0
51 |
--------------------------------------------------------------------------------