├── .gitignore ├── Horses.ipynb ├── NYSE.ipynb ├── Provably Fair Machine Learning.ipynb ├── README.md ├── RidgeFair.ipynb ├── Yahoo Testing.ipynb ├── Yahoo.ipynb ├── evaluation_T.py ├── evaluation_d.py ├── evaluation_k.py ├── fairml.py ├── figures_T_50x ├── avg_regret_diff.png ├── avg_regret_ic.png ├── avg_regret_ti.png ├── cum_regret_diff.png ├── cum_regret_ic.png ├── cum_regret_ti.png ├── final_regret_diff.png ├── final_regret_ic.png └── final_regret_ti.png ├── figures_d_50x ├── avg_regret_diff.png ├── avg_regret_ic.png ├── avg_regret_ti.png ├── cum_regret_diff.png ├── cum_regret_ic.png ├── cum_regret_ti.png ├── final_regret_diff.png ├── final_regret_ic.png └── final_regret_ti.png ├── figures_k_50x ├── avg_regret_diff.png ├── avg_regret_ic.png ├── avg_regret_ti.png ├── cum_regret_diff.png ├── cum_regret_ic.png ├── cum_regret_ti.png ├── final_regret_diff.png ├── final_regret_ic.png └── final_regret_ti.png ├── paper ├── .gitignore ├── Makefile ├── acl.bst ├── acl2015.sty ├── figures │ ├── T_50x_avg_regret_diff.png │ ├── T_50x_avg_regret_ic.png │ ├── T_50x_avg_regret_ti.png │ ├── T_50x_cum_regret_diff.png │ ├── T_50x_cum_regret_ic.png │ ├── T_50x_cum_regret_ti.png │ ├── T_50x_final_regret_diff.png │ ├── T_50x_final_regret_ic.png │ ├── T_50x_final_regret_ti.png │ ├── d_50x_avg_regret_diff.png │ ├── d_50x_avg_regret_ic.png │ ├── d_50x_avg_regret_ti.png │ ├── d_50x_cum_regret_diff.png │ ├── d_50x_cum_regret_ic.png │ ├── d_50x_cum_regret_ti.png │ ├── d_50x_final_regret_diff.png │ ├── d_50x_final_regret_ic.png │ ├── d_50x_final_regret_ti.png │ ├── k_50x_avg_regret_diff.png │ ├── k_50x_avg_regret_ic.png │ ├── k_50x_avg_regret_ti.png │ ├── k_50x_cum_regret_diff.png │ ├── k_50x_cum_regret_ic.png │ ├── k_50x_cum_regret_ti.png │ ├── k_50x_final_regret_diff.png │ ├── k_50x_final_regret_ic.png │ ├── k_50x_final_regret_ti.png │ ├── yahoo-interval-chaining.png │ └── yahoo-top-interval.png ├── paper.bib ├── paper.pdf └── paper.tex ├── references └── rawlsian_fairness.pdf └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | fairml 2 | -------------------------------------------------------------------------------- /Horses.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 53, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# data_dir = './horses/'\n", 24 | "# horses_df = pd.read_csv(data_dir + 'horses.csv',\n", 25 | "# usecols=['age', 'sex_id', 'prize_money'])\n", 26 | "# horse_sexes_df = pd.read_csv(data_dir + 'horse_sexes.csv').set_index('id')\n", 27 | "# riders_df = pd.read_csv(data_dir + 'riders.csv').set_index('id')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 182, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "# Dataset Source: https://www.kaggle.com/gunner38/horseracing\n", 39 | "tips_df = pd.read_csv('horse_tips.csv', encoding='latin1')\n", 40 | "tips_df['Bet Type'] = tips_df['Bet Type'].apply(lambda x : 1 if x == 'Win' else 0)\n", 41 | "tips_df['Result'] = tips_df['Result'].apply(lambda x : 1 if x == 'Win' else 0)\n", 42 | "\n", 43 | "horses = np.sort(tips_df['Horse'].unique())\n", 44 | "tracks = np.sort(tips_df['Track'].unique())\n", 45 | "\n", 46 | "tips_df['Horse'] = tips_df['Horse'].apply(lambda x : np.where(horses == x)[0][0])\n", 47 | "tips_df['Track'] = tips_df['Track'].apply(lambda x : np.where(tracks == x)[0][0])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 97, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "tipsters = tips_df['Tipster'].unique()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 151, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/html": [ 71 | "
\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | "
Success Rate
Tipster G0.672414
Tipster C0.616226
Tipster F0.520548
Tipster M0.520376
Tipster N0.496386
\n", 102 | "
" 103 | ], 104 | "text/plain": [ 105 | " Success Rate\n", 106 | "Tipster G 0.672414\n", 107 | "Tipster C 0.616226\n", 108 | "Tipster F 0.520548\n", 109 | "Tipster M 0.520376\n", 110 | "Tipster N 0.496386" 111 | ] 112 | }, 113 | "execution_count": 151, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "success_rates = dict()\n", 120 | "for tipster in tipsters:\n", 121 | " successes = tips_df[(tips_df['Tipster'] == tipster) & (tips_df['Bet Type'] == tips_df['Result'])].shape[0]\n", 122 | " total = tips_df[(tips_df['Tipster'] == tipster)].shape[0]\n", 123 | " success_rates[tipster] = successes/total\n", 124 | "successes_df = pd.DataFrame(pd.Series(success_rates), columns=['Success Rate']).sort_values(by='Success Rate', ascending=False)\n", 125 | "successes_df.head(5)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 149, 131 | "metadata": { 132 | "collapsed": false 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "X_G = tips_df[tips_df.Tipster=='Tipster G']\n", 137 | "X_C = tips_df[tips_df.Tipster=='Tipster C']\n", 138 | "X_F = tips_df[tips_df.Tipster=='Tipster F']\n", 139 | "X_M = tips_df[tips_df.Tipster=='Tipster M']\n", 140 | "X_N = tips_df[tips_df.Tipster=='Tipster N']" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "We model the online learning algorithm as follows.\n", 148 | "\n", 149 | "In each round, we are given a betting scheme from each tipster, drawn uniformly at random from the pool of possible schemes. We model the quality function (the return) using OLS on the features. The true return is computed as if ten dollars were bet in any scheme from the odds and the true result (i.e. 10 * the odds)." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 183, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | "
UIDIDTipsterDateTrackHorseBet TypeOddsResultTipsterActive
011Tipster A24/07/20152515818.000True
122Tipster A24/07/2015961310814.500True
233Tipster A24/07/20151141341117.000True
344Tipster A24/07/201574897615.000True
455Tipster A25/07/201521055414.331True
566Tipster A25/07/201511432716.000True
677Tipster A25/07/201573979116.000True
788Tipster A25/07/201562201916.000True
899Tipster A26/07/2015121210115.500True
91010Tipster A26/07/20158011512.000True
101111Tipster A26/07/20151048878010.000True
111212Tipster A01/08/201596550818.000True
121313Tipster A01/08/201547448416.000True
131414Tipster A02/08/201518660216.000True
141515Tipster A04/08/2015851539013.251True
151616Tipster A04/08/201514785112.750True
161717Tipster A04/08/201516155413.500True
171818Tipster A05/08/2015111253713.250True
181919Tipster A05/08/2015731030713.250True
192020Tipster A06/08/20151116326011.000True
202121Tipster A14/08/201574335811.800True
212222Tipster A15/08/2015721548714.000True
222323Tipster A15/08/201572226907.500True
232424Tipster A15/08/20157230414.501True
242525Tipster A15/08/2015661446613.250True
252626Tipster A16/08/20151021042708.000True
262727Tipster A16/08/2015102651712.500True
272828Tipster A18/08/2015594285111.001True
282929Tipster A18/08/2015165607011.001True
293030Tipster A18/08/2015161452213.250True
.................................
3821838219580Tipster E130/01/201626720913.000False
3821938220581Tipster E103/02/20167314716034.000False
3822038221582Tipster E110/02/20161214716015.000False
3822138222583Tipster E113/02/20161111384013.001False
3822238223584Tipster E114/02/201692498214.500False
3822338224585Tipster E125/02/20164713367011.000False
3822438225586Tipster E113/03/2016106272315.500False
3822538226587Tipster E113/03/2016538120021.000False
3822638227588Tipster E115/03/20161748015.001False
3822738228589Tipster E115/03/2016172365026.000False
3822838229590Tipster E115/03/2016179238067.000False
3822938230591Tipster E117/03/201617172216.501False
3823038231592Tipster E117/03/201616213316.500False
3823138232593Tipster E118/03/2016173129017.000False
3823238233594Tipster E118/03/201617806618.000False
3823338234595Tipster E118/03/20161711674011.001False
3823438235596Tipster E119/03/2016548847011.001False
3823538236597Tipster E120/03/201612936615.000False
3823638237598Tipster E126/03/201667162817.500False
3823738238599Tipster E128/03/20163511945029.000False
3823838239600Tipster E130/03/201692143615.000False
3823938240601Tipster E130/03/201692629516.000False
3824038241602Tipster E130/03/20163440714.330False
3824138242603Tipster E101/04/20161071414114.500False
3824238243604Tipster E101/04/20161115905110.000False
3824338244605Tipster E102/04/2016541294617.000False
3824438245606Tipster E102/04/2016264591012.000False
3824538246607Tipster E102/04/201626682417.000False
3824638247608Tipster E102/04/201653706814.330False
3824738248609Tipster E131/05/201682401515.000False
\n", 975 | "

38248 rows × 10 columns

\n", 976 | "
" 977 | ], 978 | "text/plain": [ 979 | " UID ID Tipster Date Track Horse Bet Type Odds \\\n", 980 | "0 1 1 Tipster A 24/07/2015 2 5158 1 8.00 \n", 981 | "1 2 2 Tipster A 24/07/2015 96 13108 1 4.50 \n", 982 | "2 3 3 Tipster A 24/07/2015 114 13411 1 7.00 \n", 983 | "3 4 4 Tipster A 24/07/2015 74 8976 1 5.00 \n", 984 | "4 5 5 Tipster A 25/07/2015 2 10554 1 4.33 \n", 985 | "5 6 6 Tipster A 25/07/2015 114 327 1 6.00 \n", 986 | "6 7 7 Tipster A 25/07/2015 73 9791 1 6.00 \n", 987 | "7 8 8 Tipster A 25/07/2015 62 2019 1 6.00 \n", 988 | "8 9 9 Tipster A 26/07/2015 12 12101 1 5.50 \n", 989 | "9 10 10 Tipster A 26/07/2015 80 115 1 2.00 \n", 990 | "10 11 11 Tipster A 26/07/2015 104 8878 0 10.00 \n", 991 | "11 12 12 Tipster A 01/08/2015 96 5508 1 8.00 \n", 992 | "12 13 13 Tipster A 01/08/2015 47 4484 1 6.00 \n", 993 | "13 14 14 Tipster A 02/08/2015 18 6602 1 6.00 \n", 994 | "14 15 15 Tipster A 04/08/2015 85 15390 1 3.25 \n", 995 | "15 16 16 Tipster A 04/08/2015 14 7851 1 2.75 \n", 996 | "16 17 17 Tipster A 04/08/2015 16 1554 1 3.50 \n", 997 | "17 18 18 Tipster A 05/08/2015 11 12537 1 3.25 \n", 998 | "18 19 19 Tipster A 05/08/2015 73 10307 1 3.25 \n", 999 | "19 20 20 Tipster A 06/08/2015 111 6326 0 11.00 \n", 1000 | "20 21 21 Tipster A 14/08/2015 74 3358 1 1.80 \n", 1001 | "21 22 22 Tipster A 15/08/2015 72 15487 1 4.00 \n", 1002 | "22 23 23 Tipster A 15/08/2015 72 2269 0 7.50 \n", 1003 | "23 24 24 Tipster A 15/08/2015 72 304 1 4.50 \n", 1004 | "24 25 25 Tipster A 15/08/2015 66 14466 1 3.25 \n", 1005 | "25 26 26 Tipster A 16/08/2015 102 10427 0 8.00 \n", 1006 | "26 27 27 Tipster A 16/08/2015 102 6517 1 2.50 \n", 1007 | "27 28 28 Tipster A 18/08/2015 59 4285 1 11.00 \n", 1008 | "28 29 29 Tipster A 18/08/2015 16 5607 0 11.00 \n", 1009 | "29 30 30 Tipster A 18/08/2015 16 14522 1 3.25 \n", 1010 | "... ... ... ... ... ... ... ... ... \n", 1011 | "38218 38219 580 Tipster E1 30/01/2016 26 7209 1 3.00 \n", 1012 | "38219 38220 581 Tipster E1 03/02/2016 73 14716 0 34.00 \n", 1013 | "38220 38221 582 Tipster E1 10/02/2016 12 14716 0 15.00 \n", 1014 | "38221 38222 583 Tipster E1 13/02/2016 111 1384 0 13.00 \n", 1015 | "38222 38223 584 Tipster E1 14/02/2016 92 4982 1 4.50 \n", 1016 | "38223 38224 585 Tipster E1 25/02/2016 47 13367 0 11.00 \n", 1017 | "38224 38225 586 Tipster E1 13/03/2016 106 2723 1 5.50 \n", 1018 | "38225 38226 587 Tipster E1 13/03/2016 53 8120 0 21.00 \n", 1019 | "38226 38227 588 Tipster E1 15/03/2016 17 480 1 5.00 \n", 1020 | "38227 38228 589 Tipster E1 15/03/2016 17 2365 0 26.00 \n", 1021 | "38228 38229 590 Tipster E1 15/03/2016 17 9238 0 67.00 \n", 1022 | "38229 38230 591 Tipster E1 17/03/2016 17 1722 1 6.50 \n", 1023 | "38230 38231 592 Tipster E1 17/03/2016 16 2133 1 6.50 \n", 1024 | "38231 38232 593 Tipster E1 18/03/2016 17 3129 0 17.00 \n", 1025 | "38232 38233 594 Tipster E1 18/03/2016 17 8066 1 8.00 \n", 1026 | "38233 38234 595 Tipster E1 18/03/2016 17 11674 0 11.00 \n", 1027 | "38234 38235 596 Tipster E1 19/03/2016 54 8847 0 11.00 \n", 1028 | "38235 38236 597 Tipster E1 20/03/2016 12 9366 1 5.00 \n", 1029 | "38236 38237 598 Tipster E1 26/03/2016 67 1628 1 7.50 \n", 1030 | "38237 38238 599 Tipster E1 28/03/2016 35 11945 0 29.00 \n", 1031 | "38238 38239 600 Tipster E1 30/03/2016 92 1436 1 5.00 \n", 1032 | "38239 38240 601 Tipster E1 30/03/2016 92 6295 1 6.00 \n", 1033 | "38240 38241 602 Tipster E1 30/03/2016 34 407 1 4.33 \n", 1034 | "38241 38242 603 Tipster E1 01/04/2016 107 14141 1 4.50 \n", 1035 | "38242 38243 604 Tipster E1 01/04/2016 111 5905 1 10.00 \n", 1036 | "38243 38244 605 Tipster E1 02/04/2016 54 12946 1 7.00 \n", 1037 | "38244 38245 606 Tipster E1 02/04/2016 26 4591 0 12.00 \n", 1038 | "38245 38246 607 Tipster E1 02/04/2016 26 6824 1 7.00 \n", 1039 | "38246 38247 608 Tipster E1 02/04/2016 53 7068 1 4.33 \n", 1040 | "38247 38248 609 Tipster E1 31/05/2016 82 4015 1 5.00 \n", 1041 | "\n", 1042 | " Result TipsterActive \n", 1043 | "0 0 True \n", 1044 | "1 0 True \n", 1045 | "2 0 True \n", 1046 | "3 0 True \n", 1047 | "4 1 True \n", 1048 | "5 0 True \n", 1049 | "6 0 True \n", 1050 | "7 0 True \n", 1051 | "8 0 True \n", 1052 | "9 0 True \n", 1053 | "10 0 True \n", 1054 | "11 0 True \n", 1055 | "12 0 True \n", 1056 | "13 0 True \n", 1057 | "14 1 True \n", 1058 | "15 0 True \n", 1059 | "16 0 True \n", 1060 | "17 0 True \n", 1061 | "18 0 True \n", 1062 | "19 0 True \n", 1063 | "20 0 True \n", 1064 | "21 0 True \n", 1065 | "22 0 True \n", 1066 | "23 1 True \n", 1067 | "24 0 True \n", 1068 | "25 0 True \n", 1069 | "26 0 True \n", 1070 | "27 1 True \n", 1071 | "28 1 True \n", 1072 | "29 0 True \n", 1073 | "... ... ... \n", 1074 | "38218 0 False \n", 1075 | "38219 0 False \n", 1076 | "38220 0 False \n", 1077 | "38221 1 False \n", 1078 | "38222 0 False \n", 1079 | "38223 0 False \n", 1080 | "38224 0 False \n", 1081 | "38225 0 False \n", 1082 | "38226 1 False \n", 1083 | "38227 0 False \n", 1084 | "38228 0 False \n", 1085 | "38229 1 False \n", 1086 | "38230 0 False \n", 1087 | "38231 0 False \n", 1088 | "38232 0 False \n", 1089 | "38233 1 False \n", 1090 | "38234 1 False \n", 1091 | "38235 0 False \n", 1092 | "38236 0 False \n", 1093 | "38237 0 False \n", 1094 | "38238 0 False \n", 1095 | "38239 0 False \n", 1096 | "38240 0 False \n", 1097 | "38241 0 False \n", 1098 | "38242 0 False \n", 1099 | "38243 0 False \n", 1100 | "38244 0 False \n", 1101 | "38245 0 False \n", 1102 | "38246 0 False \n", 1103 | "38247 0 False \n", 1104 | "\n", 1105 | "[38248 rows x 10 columns]" 1106 | ] 1107 | }, 1108 | "execution_count": 183, 1109 | "metadata": {}, 1110 | "output_type": "execute_result" 1111 | } 1112 | ], 1113 | "source": [ 1114 | "tips_df" 1115 | ] 1116 | }, 1117 | { 1118 | "cell_type": "code", 1119 | "execution_count": null, 1120 | "metadata": { 1121 | "collapsed": true 1122 | }, 1123 | "outputs": [], 1124 | "source": [] 1125 | } 1126 | ], 1127 | "metadata": { 1128 | "kernelspec": { 1129 | "display_name": "Python 3", 1130 | "language": "python", 1131 | "name": "python3" 1132 | }, 1133 | "language_info": { 1134 | "codemirror_mode": { 1135 | "name": "ipython", 1136 | "version": 3 1137 | }, 1138 | "file_extension": ".py", 1139 | "mimetype": "text/x-python", 1140 | "name": "python", 1141 | "nbconvert_exporter": "python", 1142 | "pygments_lexer": "ipython3", 1143 | "version": "3.5.2" 1144 | } 1145 | }, 1146 | "nbformat": 4, 1147 | "nbformat_minor": 2 1148 | } 1149 | -------------------------------------------------------------------------------- /NYSE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "\n", 14 | "data_dir = './nyse/'" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 8, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "fundamentals_df = pd.read_csv(data_dir + 'fundamentals.csv')\n", 26 | "prices_df = pd.read_csv(data_dir + 'prices-split-adjusted.csv')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 13, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | "
Unnamed: 0Ticker SymbolPeriod EndingAccounts PayableAccounts ReceivableAdd'l income/expense itemsAfter Tax ROECapital ExpendituresCapital SurplusCash Ratio...Total Current AssetsTotal Current LiabilitiesTotal EquityTotal LiabilitiesTotal Liabilities & EquityTotal RevenueTreasury StockFor YearEarnings Per ShareEstimated Shares Outstanding
00AAL2012-12-313.068000e+09-222000000.0-1.961000e+0923.0-1.888000e+094.695000e+0953.0...7.072000e+099.011000e+09-7.987000e+092.489100e+101.690400e+102.485500e+10-367000000.02012.0-5.603.350000e+08
11AAL2013-12-314.975000e+09-93000000.0-2.723000e+0967.0-3.114000e+091.059200e+1075.0...1.432300e+101.380600e+10-2.731000e+094.500900e+104.227800e+102.674300e+100.02013.0-11.251.630222e+08
22AAL2014-12-314.668000e+09-160000000.0-1.500000e+08143.0-5.311000e+091.513500e+1060.0...1.175000e+101.340400e+102.021000e+094.120400e+104.322500e+104.265000e+100.02014.04.027.169154e+08
33AAL2015-12-315.102000e+09352000000.0-7.080000e+08135.0-6.151000e+091.159100e+1051.0...9.985000e+091.360500e+105.635000e+094.278000e+104.841500e+104.099000e+100.02015.011.396.681299e+08
44AAP2012-12-292.409453e+09-89482000.06.000000e+0532.0-2.711820e+085.202150e+0823.0...3.184200e+092.559638e+091.210694e+093.403120e+094.613814e+096.205003e+09-27095000.02012.05.297.328355e+07
\n", 190 | "

5 rows × 79 columns

\n", 191 | "
" 192 | ], 193 | "text/plain": [ 194 | " Unnamed: 0 Ticker Symbol Period Ending Accounts Payable \\\n", 195 | "0 0 AAL 2012-12-31 3.068000e+09 \n", 196 | "1 1 AAL 2013-12-31 4.975000e+09 \n", 197 | "2 2 AAL 2014-12-31 4.668000e+09 \n", 198 | "3 3 AAL 2015-12-31 5.102000e+09 \n", 199 | "4 4 AAP 2012-12-29 2.409453e+09 \n", 200 | "\n", 201 | " Accounts Receivable Add'l income/expense items After Tax ROE \\\n", 202 | "0 -222000000.0 -1.961000e+09 23.0 \n", 203 | "1 -93000000.0 -2.723000e+09 67.0 \n", 204 | "2 -160000000.0 -1.500000e+08 143.0 \n", 205 | "3 352000000.0 -7.080000e+08 135.0 \n", 206 | "4 -89482000.0 6.000000e+05 32.0 \n", 207 | "\n", 208 | " Capital Expenditures Capital Surplus Cash Ratio \\\n", 209 | "0 -1.888000e+09 4.695000e+09 53.0 \n", 210 | "1 -3.114000e+09 1.059200e+10 75.0 \n", 211 | "2 -5.311000e+09 1.513500e+10 60.0 \n", 212 | "3 -6.151000e+09 1.159100e+10 51.0 \n", 213 | "4 -2.711820e+08 5.202150e+08 23.0 \n", 214 | "\n", 215 | " ... Total Current Assets \\\n", 216 | "0 ... 7.072000e+09 \n", 217 | "1 ... 1.432300e+10 \n", 218 | "2 ... 1.175000e+10 \n", 219 | "3 ... 9.985000e+09 \n", 220 | "4 ... 3.184200e+09 \n", 221 | "\n", 222 | " Total Current Liabilities Total Equity Total Liabilities \\\n", 223 | "0 9.011000e+09 -7.987000e+09 2.489100e+10 \n", 224 | "1 1.380600e+10 -2.731000e+09 4.500900e+10 \n", 225 | "2 1.340400e+10 2.021000e+09 4.120400e+10 \n", 226 | "3 1.360500e+10 5.635000e+09 4.278000e+10 \n", 227 | "4 2.559638e+09 1.210694e+09 3.403120e+09 \n", 228 | "\n", 229 | " Total Liabilities & Equity Total Revenue Treasury Stock For Year \\\n", 230 | "0 1.690400e+10 2.485500e+10 -367000000.0 2012.0 \n", 231 | "1 4.227800e+10 2.674300e+10 0.0 2013.0 \n", 232 | "2 4.322500e+10 4.265000e+10 0.0 2014.0 \n", 233 | "3 4.841500e+10 4.099000e+10 0.0 2015.0 \n", 234 | "4 4.613814e+09 6.205003e+09 -27095000.0 2012.0 \n", 235 | "\n", 236 | " Earnings Per Share Estimated Shares Outstanding \n", 237 | "0 -5.60 3.350000e+08 \n", 238 | "1 -11.25 1.630222e+08 \n", 239 | "2 4.02 7.169154e+08 \n", 240 | "3 11.39 6.681299e+08 \n", 241 | "4 5.29 7.328355e+07 \n", 242 | "\n", 243 | "[5 rows x 79 columns]" 244 | ] 245 | }, 246 | "execution_count": 13, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "fundamentals_df.head()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 16, 258 | "metadata": { 259 | "collapsed": false, 260 | "scrolled": true 261 | }, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "Index(['Unnamed: 0', 'Ticker Symbol', 'Period Ending', 'Accounts Payable',\n", 267 | " 'Accounts Receivable', 'Add'l income/expense items', 'After Tax ROE',\n", 268 | " 'Capital Expenditures', 'Capital Surplus', 'Cash Ratio',\n", 269 | " 'Cash and Cash Equivalents', 'Changes in Inventories', 'Common Stocks',\n", 270 | " 'Cost of Revenue', 'Current Ratio', 'Deferred Asset Charges',\n", 271 | " 'Deferred Liability Charges', 'Depreciation',\n", 272 | " 'Earnings Before Interest and Tax', 'Earnings Before Tax',\n", 273 | " 'Effect of Exchange Rate',\n", 274 | " 'Equity Earnings/Loss Unconsolidated Subsidiary', 'Fixed Assets',\n", 275 | " 'Goodwill', 'Gross Margin', 'Gross Profit', 'Income Tax',\n", 276 | " 'Intangible Assets', 'Interest Expense', 'Inventory', 'Investments',\n", 277 | " 'Liabilities', 'Long-Term Debt', 'Long-Term Investments',\n", 278 | " 'Minority Interest', 'Misc. Stocks', 'Net Borrowings', 'Net Cash Flow',\n", 279 | " 'Net Cash Flow-Operating', 'Net Cash Flows-Financing',\n", 280 | " 'Net Cash Flows-Investing', 'Net Income', 'Net Income Adjustments',\n", 281 | " 'Net Income Applicable to Common Shareholders',\n", 282 | " 'Net Income-Cont. Operations', 'Net Receivables', 'Non-Recurring Items',\n", 283 | " 'Operating Income', 'Operating Margin', 'Other Assets',\n", 284 | " 'Other Current Assets', 'Other Current Liabilities', 'Other Equity',\n", 285 | " 'Other Financing Activities', 'Other Investing Activities',\n", 286 | " 'Other Liabilities', 'Other Operating Activities',\n", 287 | " 'Other Operating Items', 'Pre-Tax Margin', 'Pre-Tax ROE',\n", 288 | " 'Profit Margin', 'Quick Ratio', 'Research and Development',\n", 289 | " 'Retained Earnings', 'Sale and Purchase of Stock',\n", 290 | " 'Sales, General and Admin.',\n", 291 | " 'Short-Term Debt / Current Portion of Long-Term Debt',\n", 292 | " 'Short-Term Investments', 'Total Assets', 'Total Current Assets',\n", 293 | " 'Total Current Liabilities', 'Total Equity', 'Total Liabilities',\n", 294 | " 'Total Liabilities & Equity', 'Total Revenue', 'Treasury Stock',\n", 295 | " 'For Year', 'Earnings Per Share', 'Estimated Shares Outstanding'],\n", 296 | " dtype='object')" 297 | ] 298 | }, 299 | "execution_count": 16, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "fundamentals_df.columns" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 9, 311 | "metadata": { 312 | "collapsed": false 313 | }, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/html": [ 318 | "
\n", 319 | "\n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | "
datesymbolopencloselowhighvolume
02016-01-05WLTW123.430000125.839996122.309998126.2500002163600.0
12016-01-06WLTW125.239998119.980003119.940002125.5400012386400.0
22016-01-07WLTW116.379997114.949997114.930000119.7399982489500.0
32016-01-08WLTW115.480003116.620003113.500000117.4400022006300.0
42016-01-11WLTW117.010002114.970001114.089996117.3300021408600.0
\n", 385 | "
" 386 | ], 387 | "text/plain": [ 388 | " date symbol open close low high \\\n", 389 | "0 2016-01-05 WLTW 123.430000 125.839996 122.309998 126.250000 \n", 390 | "1 2016-01-06 WLTW 125.239998 119.980003 119.940002 125.540001 \n", 391 | "2 2016-01-07 WLTW 116.379997 114.949997 114.930000 119.739998 \n", 392 | "3 2016-01-08 WLTW 115.480003 116.620003 113.500000 117.440002 \n", 393 | "4 2016-01-11 WLTW 117.010002 114.970001 114.089996 117.330002 \n", 394 | "\n", 395 | " volume \n", 396 | "0 2163600.0 \n", 397 | "1 2386400.0 \n", 398 | "2 2489500.0 \n", 399 | "3 2006300.0 \n", 400 | "4 1408600.0 " 401 | ] 402 | }, 403 | "execution_count": 9, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "prices_df.head()" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 10, 415 | "metadata": { 416 | "collapsed": false 417 | }, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/plain": [ 422 | "(851264, 7)" 423 | ] 424 | }, 425 | "execution_count": 10, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "prices_df.shape" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": { 438 | "collapsed": true 439 | }, 440 | "outputs": [], 441 | "source": [] 442 | } 443 | ], 444 | "metadata": { 445 | "kernelspec": { 446 | "display_name": "Python 3", 447 | "language": "python", 448 | "name": "python3" 449 | }, 450 | "language_info": { 451 | "codemirror_mode": { 452 | "name": "ipython", 453 | "version": 3 454 | }, 455 | "file_extension": ".py", 456 | "mimetype": "text/x-python", 457 | "name": "python", 458 | "nbconvert_exporter": "python", 459 | "pygments_lexer": "ipython3", 460 | "version": "3.5.2" 461 | } 462 | }, 463 | "nbformat": 4, 464 | "nbformat_minor": 2 465 | } 466 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rawlsian Fair Machine Learning for Contextual Bandits 2 | 3 | Implementation and evaluation of provably Rawlsian fair ML algorithms for contextual bandits. 4 | 5 | Related Work/Citations: 6 | 7 | * Rawlsian Fairness for Machine Learning (https://arxiv.org/abs/1610.09559) 8 | * Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms (https://arxiv.org/abs/1003.5956) 9 | 10 | ## Installation Instructions 11 | 12 | ### (Option 1) Setting Up virtualenv 13 | 14 | #### OSX 15 | 16 | Install Python 3 from [package](https://www.python.org/downloads/). This allows you to run `python3` and `pip3`. Software is installed into `/Library/Frameworks/Python.framework/Versions/3.x/bin/`. 17 | 18 | Install virtualenv for Python 3 for the user only (which is placed into `~/Library/Python/3.x/bin`): 19 | 20 | ``` 21 | $ pip3 install --user virtualenv 22 | ``` 23 | 24 | Create the following alias in your `~/.bash_profile`: 25 | 26 | ``` 27 | $ echo "alias virtualenv3='~/Library/Python/3.x/bin/virtualenv'" >> ~/.bash_profile 28 | ``` 29 | 30 | Create a local virtualenv and activate it: 31 | 32 | ``` 33 | $ virtualenv3 fairml 34 | $ source fairml/bin/activate 35 | ``` 36 | 37 | With the virtualenv active, install the project requirements into your virtualenv: 38 | 39 | ``` 40 | $ pip install -r requirements.txt 41 | ``` 42 | 43 | Create a Python kernel for Jupyter that uses your virtualenv: 44 | 45 | ``` 46 | $ python -m ipykernel install --user --name=fairml 47 | ``` 48 | 49 | You can then launch Jupyter using `jupyter notebook` from inside the project directory and change the kernel to `fairml`. 50 | 51 | ### (Option 2) Using Docker 52 | 53 | You can install [Docker](https://www.docker.com) and use a standard configuration such as `all-spark-notebook` to run the project files. 54 | -------------------------------------------------------------------------------- /RidgeFair.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from numpy import transpose\n", 13 | "from numpy.linalg import inv, det\n", 14 | "from scipy.stats import norm\n", 15 | "from math import sqrt\n", 16 | "from numpy import log" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 31, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "def ridgeFair(X, Y, k, d, _delta, T, _lambda):\n", 28 | " \"\"\"\n", 29 | " Simulates T rounds of ridgeFair.\n", 30 | " \n", 31 | " :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for each\n", 32 | " time-step and arm\n", 33 | " :param Y: a T x k ndarray of reward function output for each context vector\n", 34 | " :param k: the number of arms\n", 35 | " :param d: the number of features\n", 36 | " :param _delta: confidence parameter\n", 37 | " :param T: the number of iterations\n", 38 | " :param _lambda: \n", 39 | " \"\"\"\n", 40 | " picks = []\n", 41 | " for t in range (T): # for t >= 1\n", 42 | " for i in range(k): # for 1 <= i <= k\n", 43 | " R = 1\n", 44 | " intervals = []\n", 45 | " try:\n", 46 | " X_i = X[:t,i] # design matrix\n", 47 | " Y_i = Y[:t,i] # same with Y\n", 48 | " x_ti = X[t,i] # feature vector for arm i in round t\n", 49 | "\n", 50 | " X_iT = transpose(X_i)\n", 51 | " _idenD = np.identity(d)\n", 52 | " V_it = X_iT.dot(X_i) + (_lambda*_idenD) # computing V_it as line 5\n", 53 | "\n", 54 | " B_it = inv(V_it).dot(X_iT).dot(Y_i) # computing line 6\n", 55 | " \n", 56 | " y_ti = transpose(x_ti).dot(B_it) #computing line 7\n", 57 | " \n", 58 | " V_itI = inv(V_it) # inverse of V_it\n", 59 | " _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti))\n", 60 | " _wti2 = R * sqrt(d*log((1+(t/_lambda))/_delta)) + sqrt(_lambda)\n", 61 | " w_ti = _wti1 * _wti2 # computing W_ti as line 8\n", 62 | "\n", 63 | " intervals.append([y_ti - w_ti, y_ti + w_ti]) # line 9\n", 64 | " except:\n", 65 | " print('Error in assigning interval value.')\n", 66 | " intervals = None\n", 67 | " break\n", 68 | " if not intervals:\n", 69 | " picks.append(np.random.randint(0,k))\n", 70 | " else:\n", 71 | " i_st = np.argmax(np.array(intervals)[:,1]) # line 10\n", 72 | " chain = compute_chain(i_st, np.array(intervals), k) # line 11\n", 73 | " picks.append(np.random.choice(chain)) # play uniformly random from chain\n", 74 | " \n", 75 | " best = [Y[i].max() for i in range(2, T)]\n", 76 | " performance = [Y[t][picks[t-2]] for t in range(2, T)]\n", 77 | " print('Cumulative Regret: {0}'.format(sum(best) - sum(performance)))\n", 78 | " print('Final Regret: {0}'.format(best[-1] - performance[-1]))\n", 79 | " " 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 32, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "def compute_chain(i_st, intervals, k):\n", 91 | " # Sort intervals by decreasing order.\n", 92 | " chain = [i_st]\n", 93 | " ordering = np.argsort(intervals[:,1])[::-1]\n", 94 | " intervals = intervals[ordering,:]\n", 95 | " \n", 96 | " lowest_in_chain = intervals[0][0]\n", 97 | " for i in range(len(intervals)):\n", 98 | " if intervals[i][1] >= lowest_in_chain:\n", 99 | " chain.append(i)\n", 100 | " lowest_in_chain = min(lowest_in_chain, intervals[i][0])\n", 101 | " else:\n", 102 | " return chain\n", 103 | " return chain" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 33, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "def beta(k, d, c):\n", 115 | " \"\"\" \n", 116 | " Generates the scaled down feature weights for a true model from the distribution\n", 117 | " β ∼ U[0, c]^d.\n", 118 | " \n", 119 | " :param k: the number of arms \n", 120 | " :param d: the number of features\n", 121 | " :param c: the scale of the feature weights\n", 122 | " \"\"\"\n", 123 | " return np.random.uniform(0, c+1, size=(k, d))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 34, 129 | "metadata": { 130 | "scrolled": true 131 | }, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "Cumulative Regret: 4636.449117347242\n", 138 | "Final Regret: 0.9403759272439949\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "k = 2\n", 144 | "c = 10\n", 145 | "d = 10\n", 146 | "T = 1000\n", 147 | "X = np.random.uniform(0, 1, size=(T, k, d)) # 3-axis ndarray\n", 148 | "B = beta(k, d, c) # true parameters. B[i]: params for arm i\n", 149 | "Y = np.array([np.diag(X[t].dot(transpose(B))) for t in range(T)])\n", 150 | "ridgeFair(X, Y, k, d, 0.05, T, 1)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "collapsed": true 176 | }, 177 | "outputs": [], 178 | "source": [] 179 | } 180 | ], 181 | "metadata": { 182 | "kernelspec": { 183 | "display_name": "Python 3", 184 | "language": "python", 185 | "name": "python3" 186 | }, 187 | "language_info": { 188 | "codemirror_mode": { 189 | "name": "ipython", 190 | "version": 3 191 | }, 192 | "file_extension": ".py", 193 | "mimetype": "text/x-python", 194 | "name": "python", 195 | "nbconvert_exporter": "python", 196 | "pygments_lexer": "ipython3", 197 | "version": "3.5.2" 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 2 202 | } 203 | -------------------------------------------------------------------------------- /Yahoo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "\"\"\"\n", 12 | "Data wrangling for the Yahoo! Front Page Today Module User Click Log Dataset, version 1.0.\n", 13 | "\n", 14 | "Inspired by:\n", 15 | "Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms \n", 16 | "[https://arxiv.org/pdf/1003.5956.pdf]\n", 17 | "\n", 18 | "Documentation is per reST format used in Sphinx.\n", 19 | "\n", 20 | "Dataset: https://webscope.sandbox.yahoo.com/catalog.php?datatype=r&did=49\n", 21 | "Author: jtcho (jonathan.t.cho@gmail.com)\n", 22 | "\n", 23 | "Many thanks to Yahoo! Research for allowing me to use their dataset.\n", 24 | "\"\"\"\n", 25 | "\n", 26 | "import pandas as pd\n", 27 | "import numpy as np\n", 28 | "import sqlite3\n", 29 | "import time\n", 30 | "import os\n", 31 | "\n", 32 | "dump_dir = 'R6/'\n", 33 | "data_dirs = ['clicks_1/']\n", 34 | "engine = sqlite3.connect('yahoo')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "# Database cleanup.\n", 46 | "\n", 47 | "c = engine.cursor()\n", 48 | "c.execute('DROP TABLE articles')\n", 49 | "engine.commit()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "def extract_article_info(path, item_limit=sys.maxsize):\n", 61 | " \"\"\" \n", 62 | " Given an R6A dataset file, extracts all of the common article vectors\n", 63 | " and compiles them in a single dataframe.\n", 64 | " Note that each article has a constant vector associated with it.\n", 65 | " \n", 66 | " :param path: the file path for the dataset\n", 67 | " :param item_limit: limits the number of items to parse\n", 68 | " :returns: Pandas dataframe containing article vectors indexed by id\n", 69 | " \"\"\"\n", 70 | " t0 = time.time()\n", 71 | " num_iters = 0\n", 72 | " _articles_df = pd.DataFrame(columns=['2', '3', '4', '5', '6', '1'])\n", 73 | " with open(path) as f:\n", 74 | " for line in f:\n", 75 | " num_iters += 1 \n", 76 | " if num_iters > item_limit:\n", 77 | " break\n", 78 | " parts = line.strip().split('|')\n", 79 | " for i in range(2, len(parts)):\n", 80 | " # Extract article vector information.\n", 81 | " article_info = parts[i].split()\n", 82 | " article_id = article_info[0]\n", 83 | " if article_id in _articles_df.index:\n", 84 | " continue\n", 85 | " article_info_parts = list(map(lambda x : x.split(':')[1], article_info[1:]))\n", 86 | " article_info = dict(zip(_articles_df.columns, article_info_parts))\n", 87 | " # I append to an existing DF for quick de-duplication. Also\n", 88 | " # empirically, I observed that there is a small number of unique\n", 89 | " # articles for any dataset, so the overhead of doing this is minimized.\n", 90 | " _articles_df.loc[article_id] = pd.Series(article_info)\n", 91 | "\n", 92 | " t1 = time.time()\n", 93 | " print('Finished processing {0} items in {1} seconds.'.format(num_iters-1, t1 - t0))\n", 94 | " return _articles_df" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def process_click_file(path, item_limit=sys.maxsize):\n", 104 | " \"\"\"\n", 105 | " Given an R6A dataset file, parses all of the view event logs and \n", 106 | " compiles them in a single dataframe.\n", 107 | " \n", 108 | " A single view event consists of a unix timestamp, a 6-dimensional vector of\n", 109 | " features describing the user, a set of 20 articles in the article pool\n", 110 | " (the 20 arms of the multi-arm bandit), the id of the article displayed, and\n", 111 | " a boolean marking whether the article was clicked.\n", 112 | " \"\"\"\n", 113 | " t0 = time.time()\n", 114 | " num_iters = 0\n", 115 | " views_cols = ['time', 'user_1', 'user_2', 'user_3', 'user_4', 'user_5', 'user_6', \n", 116 | " 'article_pool', 'displayed', 'clicked']\n", 117 | " views = []\n", 118 | " with open(path) as f:\n", 119 | " for line in f:\n", 120 | " num_iters += 1\n", 121 | " if num_iters > item_limit:\n", 122 | " break\n", 123 | " parts = line.strip().split('|')\n", 124 | " unix_timestamp, disp_article_id, clicked = parts[0].split()\n", 125 | " user_info = list(map(lambda x : x.split(':')[1], parts[1].split()[1:]))\n", 126 | " user_info = dict(zip(views_cols[1:7], user_info))\n", 127 | " user_info['time'] = unix_timestamp\n", 128 | " user_info['displayed'] = disp_article_id\n", 129 | " user_info['clicked'] = clicked\n", 130 | " \n", 131 | " # Extract article vector information.\n", 132 | " article_ids = [parts[i].split()[0] for i in range(2, len(parts))]\n", 133 | " user_info['article_pool'] = article_ids\n", 134 | " # In this case, we construct the DF at the end because we're creating a new row\n", 135 | " # for *every* item... over ~4 million items that becomes very expensive!\n", 136 | " views.append(user_info)\n", 137 | "\n", 138 | " t1 = time.time()\n", 139 | " print('{0}: Finished processing {1} items in {2} seconds.'.format(path, num_iters-1, t1 - t0))\n", 140 | " return pd.DataFrame(views, columns=views_cols)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 4, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "Finished processing 4681991 items in 150.5566005706787 seconds.\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "# Run to populate the articles table.\n", 158 | "articles_df = extract_article_info(dump_dir + 'clicks_1.txt', sys.maxsize).apply(pd.to_numeric)\n", 159 | "articles_df.to_sql('articles', engine, if_exists='replace')" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "clicks_1/xaa: Finished processing 99999 items in 3.1617259979248047 seconds.\n", 172 | "clicks_1/xab: Finished processing 99999 items in 3.2025344371795654 seconds.\n", 173 | "clicks_1/xac: Finished processing 99999 items in 3.3164455890655518 seconds.\n", 174 | "clicks_1/xad: Finished processing 99999 items in 3.380336046218872 seconds.\n", 175 | "clicks_1/xae: Finished processing 99999 items in 3.0821828842163086 seconds.\n", 176 | "clicks_1/xaf: Finished processing 99999 items in 3.1906492710113525 seconds.\n", 177 | "clicks_1/xag: Finished processing 99999 items in 3.3087258338928223 seconds.\n", 178 | "clicks_1/xah: Finished processing 99999 items in 3.2571945190429688 seconds.\n", 179 | "clicks_1/xai: Finished processing 99999 items in 3.278446674346924 seconds.\n", 180 | "clicks_1/xaj: Finished processing 99999 items in 3.2920501232147217 seconds.\n", 181 | "clicks_1/xak: Finished processing 99999 items in 3.431187629699707 seconds.\n", 182 | "clicks_1/xal: Finished processing 99999 items in 3.40493106842041 seconds.\n", 183 | "clicks_1/xam: Finished processing 99999 items in 3.1150004863739014 seconds.\n", 184 | "clicks_1/xan: Finished processing 99999 items in 3.1503725051879883 seconds.\n", 185 | "clicks_1/xao: Finished processing 99999 items in 3.3162639141082764 seconds.\n", 186 | "clicks_1/xap: Finished processing 99999 items in 3.09061598777771 seconds.\n", 187 | "clicks_1/xaq: Finished processing 99999 items in 3.4392073154449463 seconds.\n", 188 | "clicks_1/xar: Finished processing 99999 items in 3.443249464035034 seconds.\n", 189 | "clicks_1/xas: Finished processing 99999 items in 3.5337443351745605 seconds.\n", 190 | "clicks_1/xat: Finished processing 99999 items in 3.4647445678710938 seconds.\n", 191 | "clicks_1/xau: Finished processing 99999 items in 3.6430513858795166 seconds.\n", 192 | "clicks_1/xav: Finished processing 99999 items in 3.6271255016326904 seconds.\n", 193 | "clicks_1/xaw: Finished processing 99999 items in 3.309832811355591 seconds.\n", 194 | "clicks_1/xax: Finished processing 99999 items in 3.460949420928955 seconds.\n", 195 | "clicks_1/xay: Finished processing 99999 items in 3.426335573196411 seconds.\n", 196 | "clicks_1/xaz: Finished processing 99999 items in 3.510620594024658 seconds.\n", 197 | "clicks_1/xba: Finished processing 99999 items in 3.6194756031036377 seconds.\n", 198 | "clicks_1/xbb: Finished processing 99999 items in 3.7689321041107178 seconds.\n", 199 | "clicks_1/xbc: Finished processing 99999 items in 3.7527005672454834 seconds.\n", 200 | "clicks_1/xbd: Finished processing 99999 items in 3.559547185897827 seconds.\n", 201 | "clicks_1/xbe: Finished processing 99999 items in 3.664827585220337 seconds.\n", 202 | "clicks_1/xbf: Finished processing 99999 items in 3.7467215061187744 seconds.\n", 203 | "clicks_1/xbg: Finished processing 99999 items in 3.2975916862487793 seconds.\n", 204 | "clicks_1/xbh: Finished processing 99999 items in 3.1932389736175537 seconds.\n", 205 | "clicks_1/xbi: Finished processing 99999 items in 3.480050802230835 seconds.\n", 206 | "clicks_1/xbj: Finished processing 99999 items in 3.307481050491333 seconds.\n", 207 | "clicks_1/xbk: Finished processing 99999 items in 3.3213932514190674 seconds.\n", 208 | "clicks_1/xbl: Finished processing 99999 items in 3.602836847305298 seconds.\n", 209 | "clicks_1/xbm: Finished processing 99999 items in 3.3665266036987305 seconds.\n", 210 | "clicks_1/xbn: Finished processing 99999 items in 3.5517754554748535 seconds.\n", 211 | "clicks_1/xbo: Finished processing 99999 items in 3.5413339138031006 seconds.\n", 212 | "clicks_1/xbp: Finished processing 99999 items in 3.082970380783081 seconds.\n", 213 | "clicks_1/xbq: Finished processing 99999 items in 3.1382272243499756 seconds.\n", 214 | "clicks_1/xbr: Finished processing 99999 items in 3.2157583236694336 seconds.\n", 215 | "clicks_1/xbs: Finished processing 99999 items in 3.396573543548584 seconds.\n", 216 | "clicks_1/xbt: Finished processing 99999 items in 3.4965860843658447 seconds.\n", 217 | "clicks_1/xbu: Finished processing 81991 items in 2.8793578147888184 seconds.\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "for fname in os.listdir('clicks_1'):\n", 223 | " if fname != '.DS_Store':\n", 224 | " result = process_click_file('clicks_1/'+fname)\n", 225 | " result['article_pool'] = result['article_pool'].astype(str)\n", 226 | " result.to_sql('clicks', engine, if_exists='append')" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 18, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "#pd.read_sql_query('select * from articles',con=engine).set_index('index')" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 11, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | "
\n", 247 | "\n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | "
count(*)
04681992
\n", 261 | "
" 262 | ], 263 | "text/plain": [ 264 | " count(*)\n", 265 | "0 4681992" 266 | ] 267 | }, 268 | "execution_count": 11, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "pd.read_sql_query('select count(*) from clicks', con=engine)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "collapsed": true 282 | }, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.6.0" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /evaluation_T.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval 5 | 6 | 7 | def main(): 8 | c_vals = [1.0, 2.0, 5.0, 10.0] 9 | 10 | # Plot: Varying T (# of rounds) 11 | d = 2 12 | k = 2 13 | T_vals = range(3, 1000, 10) 14 | 15 | results = { 16 | '0': { 17 | 'ylabel': 'Average regret - TI', 18 | 'name': 'avg_regret_ti' 19 | }, 20 | '1': { 21 | 'ylabel': 'Average regret - IC', 22 | 'name': 'avg_regret_ic' 23 | }, 24 | '2': { 25 | 'ylabel': 'Average regret difference (TI - IC)', 26 | 'name': 'avg_regret_diff' 27 | }, 28 | '3': { 29 | 'ylabel': 'Cumulative regret - TI', 30 | 'name': 'cum_regret_ti' 31 | }, 32 | '4': { 33 | 'ylabel': 'Cumulative regret - IC', 34 | 'name': 'cum_regret_ic' 35 | }, 36 | '5': { 37 | 'ylabel': 'Cumulative regret difference (TI - IC)', 38 | 'name': 'cum_regret_diff' 39 | }, 40 | '6': { 41 | 'ylabel': 'Final regret - TI', 42 | 'name': 'final_regret_ti' 43 | }, 44 | '7': { 45 | 'ylabel': 'Final regret - IC', 46 | 'name': 'final_regret_ic' 47 | }, 48 | '8': { 49 | 'ylabel': 'Final regret difference (TI - IC)', 50 | 'name': 'final_regret_diff' 51 | } 52 | } 53 | for _, v in results.items(): # 9 sets of results. 54 | for j in c_vals: 55 | v[str(j)] = [] 56 | 57 | for c in c_vals: 58 | for T in T_vals: 59 | cum_regret_tis = [] 60 | avg_regret_tis = [] 61 | final_regret_tis = [] 62 | cum_regret_ics = [] 63 | avg_regret_ics = [] 64 | final_regret_ics = [] 65 | for i in range(0, 50): # 50 trials. 66 | X = np.random.uniform(0, 1, size=(T, k, d)) 67 | B = beta(k, d, c) 68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)]) 69 | 70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval( 71 | X, Y, k, d, 0.05, T, _print_progress=False) 72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining( 73 | X, Y, c, k, d, 0.05, T, _print_progress=False) 74 | cum_regret_tis.append(cum_regret_ti) 75 | avg_regret_tis.append(avg_regret_ti) 76 | final_regret_tis.append(final_regret_ti) 77 | cum_regret_ics.append(cum_regret_ic) 78 | avg_regret_ics.append(avg_regret_ic) 79 | final_regret_ics.append(final_regret_ic) 80 | cum_regret_ti = mean(cum_regret_tis) 81 | avg_regret_ti = mean(avg_regret_tis) 82 | final_regret_ti = mean(avg_regret_tis) 83 | cum_regret_ic = mean(cum_regret_ics) 84 | avg_regret_ic = mean(avg_regret_ics) 85 | final_regret_ics = mean(final_regret_ics) 86 | 87 | results['0'][str(c)].append(avg_regret_ti) 88 | results['1'][str(c)].append(avg_regret_ic) 89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic)) 90 | results['3'][str(c)].append(cum_regret_ti) 91 | results['4'][str(c)].append(cum_regret_ic) 92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic)) 93 | results['6'][str(c)].append(final_regret_ti) 94 | results['7'][str(c)].append(final_regret_ic) 95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic)) 96 | 97 | for k, v in results.items(): 98 | plt.clf() 99 | c1, = plt.plot(T_vals, results[k]['1.0'], label='c=1') 100 | c2, = plt.plot(T_vals, results[k]['2.0'], label='c=2') 101 | c5, = plt.plot(T_vals, results[k]['5.0'], label='c=5') 102 | c10, = plt.plot(T_vals, results[k]['10.0'], label='c=10') 103 | plt.xticks(np.arange(min(T_vals), max(T_vals) + 1, 200)) 104 | plt.legend(handles=[c1, c2, c5, c10]) 105 | plt.xlabel('T (# of rounds)', fontsize=18) 106 | plt.ylabel(v['ylabel'], fontsize=15) 107 | plt.savefig('figures_T_50x/T_50x_' + v['name']) 108 | 109 | 110 | def mean(numbers): 111 | return float(sum(numbers)) / max(len(numbers), 1) 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /evaluation_d.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval 5 | 6 | 7 | def main(): 8 | c_vals = [1.0, 2.0, 5.0, 10.0] 9 | 10 | # Plot: Varying d (confidence) 11 | d_vals = range(1, 50) 12 | k = 2 13 | T = 1000 14 | 15 | results = { 16 | '0': { 17 | 'ylabel': 'Average regret - TI', 18 | 'name': 'avg_regret_ti' 19 | }, 20 | '1': { 21 | 'ylabel': 'Average regret - IC', 22 | 'name': 'avg_regret_ic' 23 | }, 24 | '2': { 25 | 'ylabel': 'Average regret difference (TI - IC)', 26 | 'name': 'avg_regret_diff' 27 | }, 28 | '3': { 29 | 'ylabel': 'Cumulative regret - TI', 30 | 'name': 'cum_regret_ti' 31 | }, 32 | '4': { 33 | 'ylabel': 'Cumulative regret - IC', 34 | 'name': 'cum_regret_ic' 35 | }, 36 | '5': { 37 | 'ylabel': 'Cumulative regret difference (TI - IC)', 38 | 'name': 'cum_regret_diff' 39 | }, 40 | '6': { 41 | 'ylabel': 'Final regret - TI', 42 | 'name': 'final_regret_ti' 43 | }, 44 | '7': { 45 | 'ylabel': 'Final regret - IC', 46 | 'name': 'final_regret_ic' 47 | }, 48 | '8': { 49 | 'ylabel': 'Final regret difference (TI - IC)', 50 | 'name': 'final_regret_diff' 51 | } 52 | } 53 | for _, v in results.items(): # 9 sets of results. 54 | for j in c_vals: 55 | v[str(j)] = [] 56 | 57 | for c in c_vals: 58 | for d in d_vals: 59 | cum_regret_tis = [] 60 | avg_regret_tis = [] 61 | final_regret_tis = [] 62 | cum_regret_ics = [] 63 | avg_regret_ics = [] 64 | final_regret_ics = [] 65 | for i in range(0, 50): # 500 trials. 66 | X = np.random.uniform(0, 1, size=(T, k, d)) 67 | B = beta(k, d, c) 68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)]) 69 | 70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval( 71 | X, Y, k, d, 0.05, T, _print_progress=False) 72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining( 73 | X, Y, c, k, d, 0.05, T, _print_progress=False) 74 | cum_regret_tis.append(cum_regret_ti) 75 | avg_regret_tis.append(avg_regret_ti) 76 | final_regret_tis.append(final_regret_ti) 77 | cum_regret_ics.append(cum_regret_ic) 78 | avg_regret_ics.append(avg_regret_ic) 79 | final_regret_ics.append(final_regret_ic) 80 | cum_regret_ti = mean(cum_regret_tis) 81 | avg_regret_ti = mean(avg_regret_tis) 82 | final_regret_ti = mean(avg_regret_tis) 83 | cum_regret_ic = mean(cum_regret_ics) 84 | avg_regret_ic = mean(avg_regret_ics) 85 | final_regret_ics = mean(final_regret_ics) 86 | 87 | results['0'][str(c)].append(avg_regret_ti) 88 | results['1'][str(c)].append(avg_regret_ic) 89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic)) 90 | results['3'][str(c)].append(cum_regret_ti) 91 | results['4'][str(c)].append(cum_regret_ic) 92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic)) 93 | results['6'][str(c)].append(final_regret_ti) 94 | results['7'][str(c)].append(final_regret_ic) 95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic)) 96 | 97 | for k, v in results.items(): 98 | plt.clf() 99 | c1, = plt.plot(d_vals, results[k]['1.0'], label='c=1') 100 | c2, = plt.plot(d_vals, results[k]['2.0'], label='c=2') 101 | c5, = plt.plot(d_vals, results[k]['5.0'], label='c=5') 102 | c10, = plt.plot(d_vals, results[k]['10.0'], label='c=10') 103 | plt.xticks(np.arange(min(d_vals), max(d_vals) + 1, 10)) 104 | plt.legend(handles=[c1, c2, c5, c10]) 105 | plt.xlabel('d (# of features)', fontsize=18) 106 | plt.ylabel(v['ylabel'], fontsize=15) 107 | plt.savefig('figures_d_50x/d_50x_' + v['name']) 108 | 109 | 110 | def mean(numbers): 111 | return float(sum(numbers)) / max(len(numbers), 1) 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /evaluation_k.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval 5 | 6 | 7 | def main(): 8 | c_vals = [1.0, 2.0, 5.0, 10.0] 9 | 10 | # Plot: Varying k (# groups) 11 | d = 2 12 | k_vals = range(1, 50, 5) 13 | T = 1000 14 | 15 | results = { 16 | '0': { 17 | 'ylabel': 'Average regret - TI', 18 | 'name': 'avg_regret_ti' 19 | }, 20 | '1': { 21 | 'ylabel': 'Average regret - IC', 22 | 'name': 'avg_regret_ic' 23 | }, 24 | '2': { 25 | 'ylabel': 'Average regret difference (TI - IC)', 26 | 'name': 'avg_regret_diff' 27 | }, 28 | '3': { 29 | 'ylabel': 'Cumulative regret - TI', 30 | 'name': 'cum_regret_ti' 31 | }, 32 | '4': { 33 | 'ylabel': 'Cumulative regret - IC', 34 | 'name': 'cum_regret_ic' 35 | }, 36 | '5': { 37 | 'ylabel': 'Cumulative regret difference (TI - IC)', 38 | 'name': 'cum_regret_diff' 39 | }, 40 | '6': { 41 | 'ylabel': 'Final regret - TI', 42 | 'name': 'final_regret_ti' 43 | }, 44 | '7': { 45 | 'ylabel': 'Final regret - IC', 46 | 'name': 'final_regret_ic' 47 | }, 48 | '8': { 49 | 'ylabel': 'Final regret difference (TI - IC)', 50 | 'name': 'final_regret_diff' 51 | } 52 | } 53 | for _, v in results.items(): # 9 sets of results. 54 | for j in c_vals: 55 | v[str(j)] = [] 56 | 57 | for c in c_vals: 58 | for k in k_vals: 59 | cum_regret_tis = [] 60 | avg_regret_tis = [] 61 | final_regret_tis = [] 62 | cum_regret_ics = [] 63 | avg_regret_ics = [] 64 | final_regret_ics = [] 65 | for i in range(0, 50): # 50 trials. 66 | X = np.random.uniform(0, 1, size=(T, k, d)) 67 | B = beta(k, d, c) 68 | Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)]) 69 | 70 | cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval( 71 | X, Y, k, d, 0.05, T, _print_progress=False) 72 | cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining( 73 | X, Y, c, k, d, 0.05, T, _print_progress=False) 74 | cum_regret_tis.append(cum_regret_ti) 75 | avg_regret_tis.append(avg_regret_ti) 76 | final_regret_tis.append(final_regret_ti) 77 | cum_regret_ics.append(cum_regret_ic) 78 | avg_regret_ics.append(avg_regret_ic) 79 | final_regret_ics.append(final_regret_ic) 80 | cum_regret_ti = mean(cum_regret_tis) 81 | avg_regret_ti = mean(avg_regret_tis) 82 | final_regret_ti = mean(avg_regret_tis) 83 | cum_regret_ic = mean(cum_regret_ics) 84 | avg_regret_ic = mean(avg_regret_ics) 85 | final_regret_ics = mean(final_regret_ics) 86 | 87 | results['0'][str(c)].append(avg_regret_ti) 88 | results['1'][str(c)].append(avg_regret_ic) 89 | results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic)) 90 | results['3'][str(c)].append(cum_regret_ti) 91 | results['4'][str(c)].append(cum_regret_ic) 92 | results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic)) 93 | results['6'][str(c)].append(final_regret_ti) 94 | results['7'][str(c)].append(final_regret_ic) 95 | results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic)) 96 | 97 | for k, v in results.items(): 98 | plt.clf() 99 | c1, = plt.plot(k_vals, results[k]['1.0'], label='c=1') 100 | c2, = plt.plot(k_vals, results[k]['2.0'], label='c=2') 101 | c5, = plt.plot(k_vals, results[k]['5.0'], label='c=5') 102 | c10, = plt.plot(k_vals, results[k]['10.0'], label='c=10') 103 | plt.xticks(np.arange(min(k_vals), max(k_vals) + 1, 10)) 104 | plt.legend(handles=[c1, c2, c5, c10]) 105 | plt.xlabel('k (# of groups)', fontsize=18) 106 | plt.ylabel(v['ylabel'], fontsize=15) 107 | plt.savefig('figures_k_50x/k_50x_' + v['name']) 108 | 109 | 110 | def mean(numbers): 111 | return float(sum(numbers)) / max(len(numbers), 1) 112 | 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /fairml.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | import numpy as np 3 | from numpy import log, transpose 4 | from numpy.linalg import inv 5 | from scipy.stats import norm 6 | 7 | 8 | def eta(T): 9 | """ 10 | Generates the cutoff probabilities for exploration rounds in interval 11 | chaining. 12 | 13 | :param T: the total number of iterations 14 | """ 15 | return np.array([pow(t, -1/3) for t in range(1, T+1)]) 16 | 17 | 18 | def beta(k, d, c): 19 | """ 20 | Generates the scaled down feature weights for a true model from the 21 | distribution β ∼ U[0, c]^d. 22 | 23 | :param k: the number of arms 24 | :param d: the number of features 25 | :param c: the scale of the feature weights 26 | """ 27 | return np.random.uniform(0, c+1, size=(k, d)) 28 | 29 | 30 | def print_progress(s, should_print): 31 | """ 32 | Helper function to print the progress of an algorithm as it's running. 33 | 34 | :param s: the string to print 35 | :should_print: whether or not the string should be printed 36 | """ 37 | if should_print: 38 | print(s) 39 | 40 | 41 | def top_interval(X, Y, k, d, _delta, T, _print_progress=True): 42 | """ 43 | Simulates T rounds of TopInterval for k. 44 | 45 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for 46 | each time-step and arm 47 | :param Y: a T x k ndarray of reward function output for each context vector 48 | :param k: the number of arms 49 | :param d: the number of features 50 | :param _delta: confidence parameter 51 | :param T: the number of iterations 52 | :param _print_progress: True if progress should be printed; False otherwise 53 | :returns: cum_regret (the total regret across all T runs of the algorithm), 54 | avg_regret (the regret averaged across all T runs of the algorithm), 55 | final_regret (the regret in the last round of the algorithm) 56 | """ 57 | pp = _print_progress 58 | _eta = eta(T) # exploration cutoff probabilities 59 | picks = [] 60 | for t in range(T): 61 | print_progress('Iteration [{0} / {1}]'.format(t, T), pp) 62 | if t <= d or np.random.rand() <= _eta[t]: 63 | # Play uniformly at random from [1, k]. 64 | picks.append(np.random.randint(0, k)) 65 | print_progress('Exploration round.', pp) 66 | else: 67 | intervals = [] 68 | for i in range(k): 69 | # Compute beta hat. 70 | _Xti = X[:t+1, i] 71 | _XtiT = transpose(_Xti) 72 | try: 73 | _XTX = inv(_XtiT.dot(_Xti)) 74 | except: 75 | print_progress('Encountered singular matrix. Ignoring.', pp) 76 | continue 77 | _Yti = Y[:t+1, i] 78 | Bh_t_i = _XTX.dot(_XtiT).dot(_Yti) # Compute OLS estimators. 79 | yh_t_i = Bh_t_i.dot(X[t, i]) 80 | _s2 = np.var(Y[:t+1, i]) 81 | # Compute the confidence interval width using the inverse CDF. 82 | w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0, 83 | scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i])))) 84 | intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i]) 85 | # Pick the agent with the largest upper bound. 86 | picks.append(np.argmax(np.array(intervals)[:, 1]) if intervals else np.random.randint(0, k)) 87 | print_progress('Intervals: {0}'.format(intervals), pp) 88 | # Compute sum of best picks over each iteration. 89 | best = [Y[i].max() for i in range(2, T)] 90 | performance = [Y[t][picks[t-2]] for t in range(2, T)] 91 | cum_regret = sum(best) - sum(performance) 92 | avg_regret = cum_regret / float(T) 93 | final_regret = best[-1] - performance[-1] 94 | print_progress('Cumulative Regret: {0}'.format(cum_regret), pp) 95 | print_progress('Average Regret: {0}'.format(avg_regret), pp) 96 | print_progress('Final Regret: {0}'.format(final_regret), pp) 97 | return cum_regret, avg_regret, final_regret 98 | 99 | 100 | def compute_chain(i_st, intervals, k, _print_progress=True): 101 | # Sort intervals by decreasing order. 102 | pp = _print_progress 103 | chain = [i_st] 104 | print_progress(intervals[:, 1], pp) 105 | ordering = np.argsort(intervals[:, 1])[::-1] 106 | intervals = intervals[ordering, :] 107 | 108 | lowest_in_chain = intervals[0][0] 109 | for i in range(1, k): 110 | if intervals[i][1] >= lowest_in_chain: 111 | chain.append(i) 112 | lowest_in_chain = min(lowest_in_chain, intervals[i][0]) 113 | else: 114 | return chain 115 | return chain 116 | 117 | 118 | def interval_chaining(X, Y, c, k, d, _delta, T, _print_progress=True): 119 | """ 120 | Simulates T rounds of TopInterval for k. 121 | 122 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for 123 | each time-step and arm 124 | :param Y: a T x k ndarray of reward function output for each context vector 125 | :param k: the number of arms 126 | :param d: the number of features 127 | :param _delta: confidence parameter 128 | :param T: the number of iterations 129 | :param _print_progress: True if progress should be printed; False otherwise 130 | :returns: cum_regret (the total regret across all T runs of the algorithm), 131 | avg_regret (the regret averaged across all T runs of the algorithm), 132 | final_regret (the regret in the last round of the algorithm) 133 | """ 134 | pp = _print_progress 135 | _eta = eta(T) # exploration cutoff probabilities 136 | picks = [] 137 | for t in range(T): 138 | print_progress('Iteration [{0} / {1}]'.format(t, T), pp) 139 | if t <= d or np.random.rand() <= _eta[t]: 140 | # Play uniformly at random from [1, k]. 141 | picks.append(np.random.randint(0, k)) 142 | print_progress('Exploration round.', pp) 143 | else: 144 | intervals = [] 145 | for i in range(k): 146 | # Compute beta hat. 147 | _Xti = X[:t+1, i] 148 | _XtiT = transpose(_Xti) 149 | try: 150 | _XTX = inv(_XtiT.dot(_Xti)) 151 | except: 152 | print_progress('Encountered singular matrix. Ignoring.', pp) 153 | continue 154 | _Yti = Y[:t+1, i] 155 | Bh_t_i = _XTX.dot(_XtiT).dot(_Yti) # Compute OLS estimators. 156 | yh_t_i = Bh_t_i.dot(X[t, i]) 157 | _s2 = np.var(Y[:t+1, i]) 158 | # Compute the confidence interval width using the inverse CDF. 159 | w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0, 160 | scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i])))) 161 | intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i]) 162 | # Pick the agent with the largest upper bound. 163 | if not intervals: 164 | picks.append(np.random.randint(0, k)) 165 | else: 166 | i_st = np.argmax(np.array(intervals)[:, 1]) 167 | 168 | # Chaining 169 | chain = compute_chain(i_st, np.array(intervals), k, pp) 170 | print_progress('Computed chain: {0}'.format(chain), pp) 171 | picks.append(np.random.choice(chain)) 172 | print_progress('Intervals: {0}'.format(intervals), pp) 173 | # Compute sum of best picks over each iteration. 174 | best = [Y[i].max() for i in range(2, T)] 175 | performance = [Y[t][picks[t-2]] for t in range(2, T)] 176 | cum_regret = sum(best) - sum(performance) 177 | avg_regret = cum_regret / float(T) 178 | final_regret = best[-1] - performance[-1] 179 | print_progress('Cumulative Regret: {0}'.format(cum_regret), pp) 180 | print_progress('Average Regret: {0}'.format(avg_regret), pp) 181 | print_progress('Final Regret: {0}'.format(final_regret), pp) 182 | return cum_regret, avg_regret, final_regret 183 | 184 | 185 | def ridge_fair(X, Y, k, d, _delta, T, _lambda, _print_progress=True): 186 | """ 187 | Simulates T rounds of ridge_fair. 188 | 189 | :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for 190 | each time-step and arm 191 | :param Y: a T x k ndarray of reward function output for each context vector 192 | :param k: the number of arms 193 | :param d: the number of features 194 | :param _delta: confidence parameter 195 | :param T: the number of iterations 196 | :param _lambda: regularization paramameter 197 | """ 198 | picks = [] 199 | for t in range(T): 200 | for i in range(k): 201 | R = 1 202 | intervals = [] 203 | try: 204 | X_i = X[:t, i] # design matrix 205 | Y_i = Y[:t, i] # same with Y 206 | x_ti = X[t, i] # feature vector for arm i in round t 207 | 208 | X_iT = transpose(X_i) 209 | _idenD = np.identity(d) 210 | V_it = X_iT.dot(X_i) + (_lambda * _idenD) 211 | 212 | B_it = inv(V_it).dot(X_iT).dot(Y_i) 213 | 214 | y_ti = transpose(x_ti).dot(B_it) 215 | 216 | V_itI = inv(V_it) # inverse of V_it 217 | _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti)) 218 | _wti2 = R * sqrt(d * log((1 + (t / _lambda)) / _delta)) + sqrt(_lambda) 219 | w_ti = _wti1 * _wti2 220 | 221 | intervals.append([y_ti - w_ti, y_ti + w_ti]) 222 | except: 223 | print_progress('Error in assigning interval value.', _print_progress) 224 | intervals = None 225 | break 226 | if not intervals: 227 | picks.append(np.random.randint(0, k)) 228 | else: 229 | i_st = np.argmax(np.array(intervals)[:, 1]) 230 | chain = compute_chain(i_st, np.array(intervals), k) 231 | # play uniformly random from chain 232 | picks.append(np.random.choice(chain)) 233 | 234 | best = [Y[i].max() for i in range(2, T)] 235 | performance = [Y[t][picks[t - 2]] for t in range(2, T)] 236 | print_progress('Cumulative Regret: {0}'.format(sum(best) - sum(performance)), _print_progress) 237 | print_progress('Final Regret: {0}'.format(best[-1] - performance[-1]), _print_progress) 238 | -------------------------------------------------------------------------------- /figures_T_50x/avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_diff.png -------------------------------------------------------------------------------- /figures_T_50x/avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ic.png -------------------------------------------------------------------------------- /figures_T_50x/avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ti.png -------------------------------------------------------------------------------- /figures_T_50x/cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_diff.png -------------------------------------------------------------------------------- /figures_T_50x/cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ic.png -------------------------------------------------------------------------------- /figures_T_50x/cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ti.png -------------------------------------------------------------------------------- /figures_T_50x/final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_diff.png -------------------------------------------------------------------------------- /figures_T_50x/final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ic.png -------------------------------------------------------------------------------- /figures_T_50x/final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ti.png -------------------------------------------------------------------------------- /figures_d_50x/avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_diff.png -------------------------------------------------------------------------------- /figures_d_50x/avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ic.png -------------------------------------------------------------------------------- /figures_d_50x/avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ti.png -------------------------------------------------------------------------------- /figures_d_50x/cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_diff.png -------------------------------------------------------------------------------- /figures_d_50x/cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ic.png -------------------------------------------------------------------------------- /figures_d_50x/cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ti.png -------------------------------------------------------------------------------- /figures_d_50x/final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_diff.png -------------------------------------------------------------------------------- /figures_d_50x/final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ic.png -------------------------------------------------------------------------------- /figures_d_50x/final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ti.png -------------------------------------------------------------------------------- /figures_k_50x/avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_diff.png -------------------------------------------------------------------------------- /figures_k_50x/avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ic.png -------------------------------------------------------------------------------- /figures_k_50x/avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ti.png -------------------------------------------------------------------------------- /figures_k_50x/cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_diff.png -------------------------------------------------------------------------------- /figures_k_50x/cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ic.png -------------------------------------------------------------------------------- /figures_k_50x/cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ti.png -------------------------------------------------------------------------------- /figures_k_50x/final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_diff.png -------------------------------------------------------------------------------- /figures_k_50x/final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ic.png -------------------------------------------------------------------------------- /figures_k_50x/final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ti.png -------------------------------------------------------------------------------- /paper/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | # *.pdf 21 | 22 | ## Generated if empty string is given at "Please type another file name for output:" 23 | .pdf 24 | 25 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 26 | *.bbl 27 | *.bcf 28 | *.blg 29 | *-blx.aux 30 | *-blx.bib 31 | *.run.xml 32 | 33 | ## Build tool auxiliary files: 34 | *.fdb_latexmk 35 | *.synctex 36 | *.synctex(busy) 37 | *.synctex.gz 38 | *.synctex.gz(busy) 39 | *.pdfsync 40 | 41 | ## Auxiliary and intermediate files from other packages: 42 | # algorithms 43 | *.alg 44 | *.loa 45 | 46 | # achemso 47 | acs-*.bib 48 | 49 | # amsthm 50 | *.thm 51 | 52 | # beamer 53 | *.nav 54 | *.pre 55 | *.snm 56 | *.vrb 57 | 58 | # changes 59 | *.soc 60 | 61 | # cprotect 62 | *.cpt 63 | 64 | # elsarticle (documentclass of Elsevier journals) 65 | *.spl 66 | 67 | # endnotes 68 | *.ent 69 | 70 | # fixme 71 | *.lox 72 | 73 | # feynmf/feynmp 74 | *.mf 75 | *.mp 76 | *.t[1-9] 77 | *.t[1-9][0-9] 78 | *.tfm 79 | 80 | #(r)(e)ledmac/(r)(e)ledpar 81 | *.end 82 | *.?end 83 | *.[1-9] 84 | *.[1-9][0-9] 85 | *.[1-9][0-9][0-9] 86 | *.[1-9]R 87 | *.[1-9][0-9]R 88 | *.[1-9][0-9][0-9]R 89 | *.eledsec[1-9] 90 | *.eledsec[1-9]R 91 | *.eledsec[1-9][0-9] 92 | *.eledsec[1-9][0-9]R 93 | *.eledsec[1-9][0-9][0-9] 94 | *.eledsec[1-9][0-9][0-9]R 95 | 96 | # glossaries 97 | *.acn 98 | *.acr 99 | *.glg 100 | *.glo 101 | *.gls 102 | *.glsdefs 103 | 104 | # gnuplottex 105 | *-gnuplottex-* 106 | 107 | # gregoriotex 108 | *.gaux 109 | *.gtex 110 | 111 | # hyperref 112 | *.brf 113 | 114 | # knitr 115 | *-concordance.tex 116 | # TODO Comment the next line if you want to keep your tikz graphics files 117 | *.tikz 118 | *-tikzDictionary 119 | 120 | # listings 121 | *.lol 122 | 123 | # makeidx 124 | *.idx 125 | *.ilg 126 | *.ind 127 | *.ist 128 | 129 | # minitoc 130 | *.maf 131 | *.mlf 132 | *.mlt 133 | *.mtc[0-9]* 134 | *.slf[0-9]* 135 | *.slt[0-9]* 136 | *.stc[0-9]* 137 | 138 | # minted 139 | _minted* 140 | *.pyg 141 | 142 | # morewrites 143 | *.mw 144 | 145 | # nomencl 146 | *.nlo 147 | 148 | # pax 149 | *.pax 150 | 151 | # pdfpcnotes 152 | *.pdfpc 153 | 154 | # sagetex 155 | *.sagetex.sage 156 | *.sagetex.py 157 | *.sagetex.scmd 158 | 159 | # scrwfile 160 | *.wrt 161 | 162 | # sympy 163 | *.sout 164 | *.sympy 165 | sympy-plots-for-*.tex/ 166 | 167 | # pdfcomment 168 | *.upa 169 | *.upb 170 | 171 | # pythontex 172 | *.pytxcode 173 | pythontex-files-*/ 174 | 175 | # thmtools 176 | *.loe 177 | 178 | # TikZ & PGF 179 | *.dpth 180 | *.md5 181 | *.auxlock 182 | 183 | # todonotes 184 | *.tdo 185 | 186 | # easy-todo 187 | *.lod 188 | 189 | # xindy 190 | *.xdy 191 | 192 | # xypic precompiled matrices 193 | *.xyc 194 | 195 | # endfloat 196 | *.ttt 197 | *.fff 198 | 199 | # Latexian 200 | TSWLatexianTemp* 201 | 202 | ## Editors: 203 | # WinEdt 204 | *.bak 205 | *.sav 206 | 207 | # Texpad 208 | .texpadtmp 209 | 210 | # Kile 211 | *.backup 212 | 213 | # KBibTeX 214 | *~[0-9]* 215 | 216 | # auto folder when using emacs and auctex 217 | /auto/* 218 | 219 | # expex forward references with \gathertags 220 | *-tags.tex 221 | -------------------------------------------------------------------------------- /paper/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: prepare 3 | 4 | prepare: 5 | pdflatex paper.tex 6 | 7 | view: prepare 8 | open -a Skim paper.pdf 9 | 10 | develop: prepare 11 | fswatch -i 'paper.tex' -e '.*' . | xargs -t -n1 -I % bash -c "pdflatex % || osascript -e 'display notification \"Latex compilation failed\" with title \"ERROR\"'" 12 | 13 | clean: 14 | rm -rf *.aux *.listing *.pdf *.out *.log 15 | -------------------------------------------------------------------------------- /paper/acl.bst: -------------------------------------------------------------------------------- 1 | 2 | % BibTeX `acl' style file for BibTeX version 0.99c, LaTeX version 2.09 3 | % This version was made by modifying `aaai-named' format based on the master 4 | % file by Oren Patashnik (PATASHNIK@SCORE.STANFORD.EDU) 5 | 6 | % Copyright (C) 1985, all rights reserved. 7 | % Modifications Copyright 1988, Peter F. Patel-Schneider 8 | % Further modifictions by Stuart Shieber, 1991, and Fernando Pereira, 1992. 9 | % Copying of this file is authorized only if either 10 | % (1) you make absolutely no changes to your copy, including name, or 11 | % (2) if you do make changes, you name it something other than 12 | % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. 13 | % This restriction helps ensure that all standard styles are identical. 14 | 15 | % There are undoubtably bugs in this style. If you make bug fixes, 16 | % improvements, etc. please let me know. My e-mail address is: 17 | % pfps@spar.slb.com 18 | 19 | % Citation format: [author-last-name, year] 20 | % [author-last-name and author-last-name, year] 21 | % [author-last-name {\em et al.}, year] 22 | % 23 | % Reference list ordering: alphabetical by author or whatever passes 24 | % for author in the absence of one. 25 | % 26 | % This BibTeX style has support for short (year only) citations. This 27 | % is done by having the citations actually look like 28 | % \citename{name-info, }year 29 | % The LaTeX style has to have the following 30 | % \let\@internalcite\cite 31 | % \def\cite{\def\citename##1{##1}\@internalcite} 32 | % \def\shortcite{\def\citename##1{}\@internalcite} 33 | % \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill} 34 | % which makes \shortcite the macro for short citations. 35 | 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | % Changes made by SMS for thesis style 38 | % no emphasis on "et al." 39 | % "Ph.D." includes periods (not "PhD") 40 | % moved year to immediately after author's name 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 | ENTRY 43 | { address 44 | author 45 | booktitle 46 | chapter 47 | edition 48 | editor 49 | howpublished 50 | institution 51 | journal 52 | key 53 | month 54 | note 55 | number 56 | organization 57 | pages 58 | publisher 59 | school 60 | series 61 | title 62 | type 63 | volume 64 | year 65 | } 66 | {} 67 | { label extra.label sort.label } 68 | 69 | INTEGERS { output.state before.all mid.sentence after.sentence after.block } 70 | 71 | FUNCTION {init.state.consts} 72 | { #0 'before.all := 73 | #1 'mid.sentence := 74 | #2 'after.sentence := 75 | #3 'after.block := 76 | } 77 | 78 | STRINGS { s t } 79 | 80 | FUNCTION {output.nonnull} 81 | { 's := 82 | output.state mid.sentence = 83 | { ", " * write$ } 84 | { output.state after.block = 85 | { add.period$ write$ 86 | newline$ 87 | "\newblock " write$ 88 | } 89 | { output.state before.all = 90 | 'write$ 91 | { add.period$ " " * write$ } 92 | if$ 93 | } 94 | if$ 95 | mid.sentence 'output.state := 96 | } 97 | if$ 98 | s 99 | } 100 | 101 | FUNCTION {output} 102 | { duplicate$ empty$ 103 | 'pop$ 104 | 'output.nonnull 105 | if$ 106 | } 107 | 108 | FUNCTION {output.check} 109 | { 't := 110 | duplicate$ empty$ 111 | { pop$ "empty " t * " in " * cite$ * warning$ } 112 | 'output.nonnull 113 | if$ 114 | } 115 | 116 | FUNCTION {output.bibitem} 117 | { newline$ 118 | 119 | "\bibitem[" write$ 120 | label write$ 121 | "]{" write$ 122 | 123 | cite$ write$ 124 | "}" write$ 125 | newline$ 126 | "" 127 | before.all 'output.state := 128 | } 129 | 130 | FUNCTION {fin.entry} 131 | { add.period$ 132 | write$ 133 | newline$ 134 | } 135 | 136 | FUNCTION {new.block} 137 | { output.state before.all = 138 | 'skip$ 139 | { after.block 'output.state := } 140 | if$ 141 | } 142 | 143 | FUNCTION {new.sentence} 144 | { output.state after.block = 145 | 'skip$ 146 | { output.state before.all = 147 | 'skip$ 148 | { after.sentence 'output.state := } 149 | if$ 150 | } 151 | if$ 152 | } 153 | 154 | FUNCTION {not} 155 | { { #0 } 156 | { #1 } 157 | if$ 158 | } 159 | 160 | FUNCTION {and} 161 | { 'skip$ 162 | { pop$ #0 } 163 | if$ 164 | } 165 | 166 | FUNCTION {or} 167 | { { pop$ #1 } 168 | 'skip$ 169 | if$ 170 | } 171 | 172 | FUNCTION {new.block.checka} 173 | { empty$ 174 | 'skip$ 175 | 'new.block 176 | if$ 177 | } 178 | 179 | FUNCTION {new.block.checkb} 180 | { empty$ 181 | swap$ empty$ 182 | and 183 | 'skip$ 184 | 'new.block 185 | if$ 186 | } 187 | 188 | FUNCTION {new.sentence.checka} 189 | { empty$ 190 | 'skip$ 191 | 'new.sentence 192 | if$ 193 | } 194 | 195 | FUNCTION {new.sentence.checkb} 196 | { empty$ 197 | swap$ empty$ 198 | and 199 | 'skip$ 200 | 'new.sentence 201 | if$ 202 | } 203 | 204 | FUNCTION {field.or.null} 205 | { duplicate$ empty$ 206 | { pop$ "" } 207 | 'skip$ 208 | if$ 209 | } 210 | 211 | FUNCTION {emphasize} 212 | { duplicate$ empty$ 213 | { pop$ "" } 214 | { "{\em " swap$ * "}" * } 215 | if$ 216 | } 217 | 218 | INTEGERS { nameptr namesleft numnames } 219 | 220 | FUNCTION {format.names} 221 | { 's := 222 | #1 'nameptr := 223 | s num.names$ 'numnames := 224 | numnames 'namesleft := 225 | { namesleft #0 > } 226 | 227 | { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := 228 | 229 | nameptr #1 > 230 | { namesleft #1 > 231 | { ", " * t * } 232 | { numnames #2 > 233 | { "," * } 234 | 'skip$ 235 | if$ 236 | t "others" = 237 | { " et~al." * } 238 | { " and " * t * } 239 | if$ 240 | } 241 | if$ 242 | } 243 | 't 244 | if$ 245 | nameptr #1 + 'nameptr := 246 | namesleft #1 - 'namesleft := 247 | } 248 | while$ 249 | } 250 | 251 | FUNCTION {format.authors} 252 | { author empty$ 253 | { "" } 254 | { author format.names } 255 | if$ 256 | } 257 | 258 | FUNCTION {format.editors} 259 | { editor empty$ 260 | { "" } 261 | { editor format.names 262 | editor num.names$ #1 > 263 | { ", editors" * } 264 | { ", editor" * } 265 | if$ 266 | } 267 | if$ 268 | } 269 | 270 | FUNCTION {format.title} 271 | { title empty$ 272 | { "" } 273 | 274 | { title "t" change.case$ } 275 | 276 | if$ 277 | } 278 | 279 | FUNCTION {n.dashify} 280 | { 't := 281 | "" 282 | { t empty$ not } 283 | { t #1 #1 substring$ "-" = 284 | { t #1 #2 substring$ "--" = not 285 | { "--" * 286 | t #2 global.max$ substring$ 't := 287 | } 288 | { { t #1 #1 substring$ "-" = } 289 | { "-" * 290 | t #2 global.max$ substring$ 't := 291 | } 292 | while$ 293 | } 294 | if$ 295 | } 296 | { t #1 #1 substring$ * 297 | t #2 global.max$ substring$ 't := 298 | } 299 | if$ 300 | } 301 | while$ 302 | } 303 | 304 | FUNCTION {format.date} 305 | { year empty$ 306 | { month empty$ 307 | { "" } 308 | { "there's a month but no year in " cite$ * warning$ 309 | month 310 | } 311 | if$ 312 | } 313 | { month empty$ 314 | { "" } 315 | { month } 316 | if$ 317 | } 318 | if$ 319 | } 320 | 321 | FUNCTION {format.btitle} 322 | { title emphasize 323 | } 324 | 325 | FUNCTION {tie.or.space.connect} 326 | { duplicate$ text.length$ #3 < 327 | { "~" } 328 | { " " } 329 | if$ 330 | swap$ * * 331 | } 332 | 333 | FUNCTION {either.or.check} 334 | { empty$ 335 | 'pop$ 336 | { "can't use both " swap$ * " fields in " * cite$ * warning$ } 337 | if$ 338 | } 339 | 340 | FUNCTION {format.bvolume} 341 | { volume empty$ 342 | { "" } 343 | { "volume" volume tie.or.space.connect 344 | series empty$ 345 | 'skip$ 346 | { " of " * series emphasize * } 347 | if$ 348 | "volume and number" number either.or.check 349 | } 350 | if$ 351 | } 352 | 353 | FUNCTION {format.number.series} 354 | { volume empty$ 355 | { number empty$ 356 | { series field.or.null } 357 | { output.state mid.sentence = 358 | { "number" } 359 | { "Number" } 360 | if$ 361 | number tie.or.space.connect 362 | series empty$ 363 | { "there's a number but no series in " cite$ * warning$ } 364 | { " in " * series * } 365 | if$ 366 | } 367 | if$ 368 | } 369 | { "" } 370 | if$ 371 | } 372 | 373 | FUNCTION {format.edition} 374 | { edition empty$ 375 | { "" } 376 | { output.state mid.sentence = 377 | { edition "l" change.case$ " edition" * } 378 | { edition "t" change.case$ " edition" * } 379 | if$ 380 | } 381 | if$ 382 | } 383 | 384 | INTEGERS { multiresult } 385 | 386 | FUNCTION {multi.page.check} 387 | { 't := 388 | #0 'multiresult := 389 | { multiresult not 390 | t empty$ not 391 | and 392 | } 393 | { t #1 #1 substring$ 394 | duplicate$ "-" = 395 | swap$ duplicate$ "," = 396 | swap$ "+" = 397 | or or 398 | { #1 'multiresult := } 399 | { t #2 global.max$ substring$ 't := } 400 | if$ 401 | } 402 | while$ 403 | multiresult 404 | } 405 | 406 | FUNCTION {format.pages} 407 | { pages empty$ 408 | { "" } 409 | { pages multi.page.check 410 | { "pages" pages n.dashify tie.or.space.connect } 411 | { "page" pages tie.or.space.connect } 412 | if$ 413 | } 414 | if$ 415 | } 416 | 417 | FUNCTION {format.year.label} 418 | { year extra.label * 419 | } 420 | 421 | FUNCTION {format.vol.num.pages} 422 | { volume field.or.null 423 | number empty$ 424 | 'skip$ 425 | { "(" number * ")" * * 426 | volume empty$ 427 | { "there's a number but no volume in " cite$ * warning$ } 428 | 'skip$ 429 | if$ 430 | } 431 | if$ 432 | pages empty$ 433 | 'skip$ 434 | { duplicate$ empty$ 435 | { pop$ format.pages } 436 | { ":" * pages n.dashify * } 437 | if$ 438 | } 439 | if$ 440 | } 441 | 442 | FUNCTION {format.chapter.pages} 443 | { chapter empty$ 444 | 'format.pages 445 | { type empty$ 446 | { "chapter" } 447 | { type "l" change.case$ } 448 | if$ 449 | chapter tie.or.space.connect 450 | pages empty$ 451 | 'skip$ 452 | { ", " * format.pages * } 453 | if$ 454 | } 455 | if$ 456 | } 457 | 458 | FUNCTION {format.in.ed.booktitle} 459 | { booktitle empty$ 460 | { "" } 461 | { editor empty$ 462 | { "In " booktitle emphasize * } 463 | { "In " format.editors * ", " * booktitle emphasize * } 464 | if$ 465 | } 466 | if$ 467 | } 468 | 469 | FUNCTION {empty.misc.check} 470 | { author empty$ title empty$ howpublished empty$ 471 | month empty$ year empty$ note empty$ 472 | and and and and and 473 | 474 | key empty$ not and 475 | 476 | { "all relevant fields are empty in " cite$ * warning$ } 477 | 'skip$ 478 | if$ 479 | } 480 | 481 | FUNCTION {format.thesis.type} 482 | { type empty$ 483 | 'skip$ 484 | { pop$ 485 | type "t" change.case$ 486 | } 487 | if$ 488 | } 489 | 490 | FUNCTION {format.tr.number} 491 | { type empty$ 492 | { "Technical Report" } 493 | 'type 494 | if$ 495 | number empty$ 496 | { "t" change.case$ } 497 | { number tie.or.space.connect } 498 | if$ 499 | } 500 | 501 | FUNCTION {format.article.crossref} 502 | { key empty$ 503 | { journal empty$ 504 | { "need key or journal for " cite$ * " to crossref " * crossref * 505 | warning$ 506 | "" 507 | } 508 | { "In {\em " journal * "\/}" * } 509 | if$ 510 | } 511 | { "In " key * } 512 | if$ 513 | " \cite{" * crossref * "}" * 514 | } 515 | 516 | FUNCTION {format.crossref.editor} 517 | { editor #1 "{vv~}{ll}" format.name$ 518 | editor num.names$ duplicate$ 519 | #2 > 520 | { pop$ " et~al." * } 521 | { #2 < 522 | 'skip$ 523 | { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = 524 | { " et~al." * } 525 | { " and " * editor #2 "{vv~}{ll}" format.name$ * } 526 | if$ 527 | } 528 | if$ 529 | } 530 | if$ 531 | } 532 | 533 | FUNCTION {format.book.crossref} 534 | { volume empty$ 535 | { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ 536 | "In " 537 | } 538 | { "Volume" volume tie.or.space.connect 539 | " of " * 540 | } 541 | if$ 542 | editor empty$ 543 | editor field.or.null author field.or.null = 544 | or 545 | { key empty$ 546 | { series empty$ 547 | { "need editor, key, or series for " cite$ * " to crossref " * 548 | crossref * warning$ 549 | "" * 550 | } 551 | { "{\em " * series * "\/}" * } 552 | if$ 553 | } 554 | { key * } 555 | if$ 556 | } 557 | { format.crossref.editor * } 558 | if$ 559 | " \cite{" * crossref * "}" * 560 | } 561 | 562 | FUNCTION {format.incoll.inproc.crossref} 563 | { editor empty$ 564 | editor field.or.null author field.or.null = 565 | or 566 | { key empty$ 567 | { booktitle empty$ 568 | { "need editor, key, or booktitle for " cite$ * " to crossref " * 569 | crossref * warning$ 570 | "" 571 | } 572 | { "In {\em " booktitle * "\/}" * } 573 | if$ 574 | } 575 | { "In " key * } 576 | if$ 577 | } 578 | { "In " format.crossref.editor * } 579 | if$ 580 | " \cite{" * crossref * "}" * 581 | } 582 | 583 | FUNCTION {article} 584 | { output.bibitem 585 | format.authors "author" output.check 586 | new.block 587 | format.year.label "year" output.check 588 | new.block 589 | format.title "title" output.check 590 | new.block 591 | crossref missing$ 592 | { journal emphasize "journal" output.check 593 | format.vol.num.pages output 594 | format.date output 595 | } 596 | { format.article.crossref output.nonnull 597 | format.pages output 598 | } 599 | if$ 600 | new.block 601 | note output 602 | fin.entry 603 | } 604 | 605 | FUNCTION {book} 606 | { output.bibitem 607 | author empty$ 608 | { format.editors "author and editor" output.check } 609 | { format.authors output.nonnull 610 | crossref missing$ 611 | { "author and editor" editor either.or.check } 612 | 'skip$ 613 | if$ 614 | } 615 | if$ 616 | new.block 617 | format.year.label "year" output.check 618 | new.block 619 | format.btitle "title" output.check 620 | crossref missing$ 621 | { format.bvolume output 622 | new.block 623 | format.number.series output 624 | new.sentence 625 | publisher "publisher" output.check 626 | address output 627 | } 628 | { new.block 629 | format.book.crossref output.nonnull 630 | } 631 | if$ 632 | format.edition output 633 | format.date output 634 | new.block 635 | note output 636 | fin.entry 637 | } 638 | 639 | FUNCTION {booklet} 640 | { output.bibitem 641 | format.authors output 642 | new.block 643 | format.year.label "year" output.check 644 | new.block 645 | format.title "title" output.check 646 | howpublished address new.block.checkb 647 | howpublished output 648 | address output 649 | format.date output 650 | new.block 651 | note output 652 | fin.entry 653 | } 654 | 655 | FUNCTION {inbook} 656 | { output.bibitem 657 | author empty$ 658 | { format.editors "author and editor" output.check } 659 | { format.authors output.nonnull 660 | crossref missing$ 661 | { "author and editor" editor either.or.check } 662 | 'skip$ 663 | if$ 664 | } 665 | if$ 666 | format.year.label "year" output.check 667 | new.block 668 | new.block 669 | format.btitle "title" output.check 670 | crossref missing$ 671 | { format.bvolume output 672 | format.chapter.pages "chapter and pages" output.check 673 | new.block 674 | format.number.series output 675 | new.sentence 676 | publisher "publisher" output.check 677 | address output 678 | } 679 | { format.chapter.pages "chapter and pages" output.check 680 | new.block 681 | format.book.crossref output.nonnull 682 | } 683 | if$ 684 | format.edition output 685 | format.date output 686 | new.block 687 | note output 688 | fin.entry 689 | } 690 | 691 | FUNCTION {incollection} 692 | { output.bibitem 693 | format.authors "author" output.check 694 | new.block 695 | format.year.label "year" output.check 696 | new.block 697 | format.title "title" output.check 698 | new.block 699 | crossref missing$ 700 | { format.in.ed.booktitle "booktitle" output.check 701 | format.bvolume output 702 | format.number.series output 703 | format.chapter.pages output 704 | new.sentence 705 | publisher "publisher" output.check 706 | address output 707 | format.edition output 708 | format.date output 709 | } 710 | { format.incoll.inproc.crossref output.nonnull 711 | format.chapter.pages output 712 | } 713 | if$ 714 | new.block 715 | note output 716 | fin.entry 717 | } 718 | 719 | FUNCTION {inproceedings} 720 | { output.bibitem 721 | format.authors "author" output.check 722 | new.block 723 | format.year.label "year" output.check 724 | new.block 725 | format.title "title" output.check 726 | new.block 727 | crossref missing$ 728 | { format.in.ed.booktitle "booktitle" output.check 729 | format.bvolume output 730 | format.number.series output 731 | format.pages output 732 | address empty$ 733 | { organization publisher new.sentence.checkb 734 | organization output 735 | publisher output 736 | format.date output 737 | } 738 | { address output.nonnull 739 | format.date output 740 | new.sentence 741 | organization output 742 | publisher output 743 | } 744 | if$ 745 | } 746 | { format.incoll.inproc.crossref output.nonnull 747 | format.pages output 748 | } 749 | if$ 750 | new.block 751 | note output 752 | fin.entry 753 | } 754 | 755 | FUNCTION {conference} { inproceedings } 756 | 757 | FUNCTION {manual} 758 | { output.bibitem 759 | author empty$ 760 | { organization empty$ 761 | 'skip$ 762 | { organization output.nonnull 763 | address output 764 | } 765 | if$ 766 | } 767 | { format.authors output.nonnull } 768 | if$ 769 | format.year.label "year" output.check 770 | new.block 771 | new.block 772 | format.btitle "title" output.check 773 | author empty$ 774 | { organization empty$ 775 | { address new.block.checka 776 | address output 777 | } 778 | 'skip$ 779 | if$ 780 | } 781 | { organization address new.block.checkb 782 | organization output 783 | address output 784 | } 785 | if$ 786 | format.edition output 787 | format.date output 788 | new.block 789 | note output 790 | fin.entry 791 | } 792 | 793 | FUNCTION {mastersthesis} 794 | { output.bibitem 795 | format.authors "author" output.check 796 | new.block 797 | format.year.label "year" output.check 798 | new.block 799 | format.title "title" output.check 800 | new.block 801 | "Master's thesis" format.thesis.type output.nonnull 802 | school "school" output.check 803 | address output 804 | format.date output 805 | new.block 806 | note output 807 | fin.entry 808 | } 809 | 810 | FUNCTION {misc} 811 | { output.bibitem 812 | format.authors output 813 | new.block 814 | format.year.label output 815 | new.block 816 | title howpublished new.block.checkb 817 | format.title output 818 | howpublished new.block.checka 819 | howpublished output 820 | format.date output 821 | new.block 822 | note output 823 | fin.entry 824 | empty.misc.check 825 | } 826 | 827 | FUNCTION {phdthesis} 828 | { output.bibitem 829 | format.authors "author" output.check 830 | new.block 831 | format.year.label "year" output.check 832 | new.block 833 | format.btitle "title" output.check 834 | new.block 835 | "{Ph.D.} thesis" format.thesis.type output.nonnull 836 | school "school" output.check 837 | address output 838 | format.date output 839 | new.block 840 | note output 841 | fin.entry 842 | } 843 | 844 | FUNCTION {proceedings} 845 | { output.bibitem 846 | editor empty$ 847 | { organization output } 848 | { format.editors output.nonnull } 849 | if$ 850 | new.block 851 | format.year.label "year" output.check 852 | new.block 853 | format.btitle "title" output.check 854 | format.bvolume output 855 | format.number.series output 856 | address empty$ 857 | { editor empty$ 858 | { publisher new.sentence.checka } 859 | { organization publisher new.sentence.checkb 860 | organization output 861 | } 862 | if$ 863 | publisher output 864 | format.date output 865 | } 866 | { address output.nonnull 867 | format.date output 868 | new.sentence 869 | editor empty$ 870 | 'skip$ 871 | { organization output } 872 | if$ 873 | publisher output 874 | } 875 | if$ 876 | new.block 877 | note output 878 | fin.entry 879 | } 880 | 881 | FUNCTION {techreport} 882 | { output.bibitem 883 | format.authors "author" output.check 884 | new.block 885 | format.year.label "year" output.check 886 | new.block 887 | format.title "title" output.check 888 | new.block 889 | format.tr.number output.nonnull 890 | institution "institution" output.check 891 | address output 892 | format.date output 893 | new.block 894 | note output 895 | fin.entry 896 | } 897 | 898 | FUNCTION {unpublished} 899 | { output.bibitem 900 | format.authors "author" output.check 901 | new.block 902 | format.year.label "year" output.check 903 | new.block 904 | format.title "title" output.check 905 | new.block 906 | note "note" output.check 907 | format.date output 908 | fin.entry 909 | } 910 | 911 | FUNCTION {default.type} { misc } 912 | 913 | MACRO {jan} {"January"} 914 | 915 | MACRO {feb} {"February"} 916 | 917 | MACRO {mar} {"March"} 918 | 919 | MACRO {apr} {"April"} 920 | 921 | MACRO {may} {"May"} 922 | 923 | MACRO {jun} {"June"} 924 | 925 | MACRO {jul} {"July"} 926 | 927 | MACRO {aug} {"August"} 928 | 929 | MACRO {sep} {"September"} 930 | 931 | MACRO {oct} {"October"} 932 | 933 | MACRO {nov} {"November"} 934 | 935 | MACRO {dec} {"December"} 936 | 937 | MACRO {acmcs} {"ACM Computing Surveys"} 938 | 939 | MACRO {acta} {"Acta Informatica"} 940 | 941 | MACRO {cacm} {"Communications of the ACM"} 942 | 943 | MACRO {ibmjrd} {"IBM Journal of Research and Development"} 944 | 945 | MACRO {ibmsj} {"IBM Systems Journal"} 946 | 947 | MACRO {ieeese} {"IEEE Transactions on Software Engineering"} 948 | 949 | MACRO {ieeetc} {"IEEE Transactions on Computers"} 950 | 951 | MACRO {ieeetcad} 952 | {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} 953 | 954 | MACRO {ipl} {"Information Processing Letters"} 955 | 956 | MACRO {jacm} {"Journal of the ACM"} 957 | 958 | MACRO {jcss} {"Journal of Computer and System Sciences"} 959 | 960 | MACRO {scp} {"Science of Computer Programming"} 961 | 962 | MACRO {sicomp} {"SIAM Journal on Computing"} 963 | 964 | MACRO {tocs} {"ACM Transactions on Computer Systems"} 965 | 966 | MACRO {tods} {"ACM Transactions on Database Systems"} 967 | 968 | MACRO {tog} {"ACM Transactions on Graphics"} 969 | 970 | MACRO {toms} {"ACM Transactions on Mathematical Software"} 971 | 972 | MACRO {toois} {"ACM Transactions on Office Information Systems"} 973 | 974 | MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} 975 | 976 | MACRO {tcs} {"Theoretical Computer Science"} 977 | 978 | READ 979 | 980 | FUNCTION {sortify} 981 | { purify$ 982 | "l" change.case$ 983 | } 984 | 985 | INTEGERS { len } 986 | 987 | FUNCTION {chop.word} 988 | { 's := 989 | 'len := 990 | s #1 len substring$ = 991 | { s len #1 + global.max$ substring$ } 992 | 's 993 | if$ 994 | } 995 | 996 | INTEGERS { et.al.char.used } 997 | 998 | FUNCTION {initialize.et.al.char.used} 999 | { #0 'et.al.char.used := 1000 | } 1001 | 1002 | EXECUTE {initialize.et.al.char.used} 1003 | 1004 | FUNCTION {format.lab.names} 1005 | { 's := 1006 | s num.names$ 'numnames := 1007 | 1008 | numnames #1 = 1009 | { s #1 "{vv }{ll}" format.name$ } 1010 | { numnames #2 = 1011 | { s #1 "{vv }{ll }and " format.name$ s #2 "{vv }{ll}" format.name$ * 1012 | } 1013 | { s #1 "{vv }{ll }\bgroup et al.\egroup " format.name$ } 1014 | if$ 1015 | } 1016 | if$ 1017 | 1018 | } 1019 | 1020 | FUNCTION {author.key.label} 1021 | { author empty$ 1022 | { key empty$ 1023 | 1024 | { cite$ #1 #3 substring$ } 1025 | 1026 | { key #3 text.prefix$ } 1027 | if$ 1028 | } 1029 | { author format.lab.names } 1030 | if$ 1031 | } 1032 | 1033 | FUNCTION {author.editor.key.label} 1034 | { author empty$ 1035 | { editor empty$ 1036 | { key empty$ 1037 | 1038 | { cite$ #1 #3 substring$ } 1039 | 1040 | { key #3 text.prefix$ } 1041 | if$ 1042 | } 1043 | { editor format.lab.names } 1044 | if$ 1045 | } 1046 | { author format.lab.names } 1047 | if$ 1048 | } 1049 | 1050 | FUNCTION {author.key.organization.label} 1051 | { author empty$ 1052 | { key empty$ 1053 | { organization empty$ 1054 | 1055 | { cite$ #1 #3 substring$ } 1056 | 1057 | { "The " #4 organization chop.word #3 text.prefix$ } 1058 | if$ 1059 | } 1060 | { key #3 text.prefix$ } 1061 | if$ 1062 | } 1063 | { author format.lab.names } 1064 | if$ 1065 | } 1066 | 1067 | FUNCTION {editor.key.organization.label} 1068 | { editor empty$ 1069 | { key empty$ 1070 | { organization empty$ 1071 | 1072 | { cite$ #1 #3 substring$ } 1073 | 1074 | { "The " #4 organization chop.word #3 text.prefix$ } 1075 | if$ 1076 | } 1077 | { key #3 text.prefix$ } 1078 | if$ 1079 | } 1080 | { editor format.lab.names } 1081 | if$ 1082 | } 1083 | 1084 | FUNCTION {calc.label} 1085 | { type$ "book" = 1086 | type$ "inbook" = 1087 | or 1088 | 'author.editor.key.label 1089 | { type$ "proceedings" = 1090 | 'editor.key.organization.label 1091 | { type$ "manual" = 1092 | 'author.key.organization.label 1093 | 'author.key.label 1094 | if$ 1095 | } 1096 | if$ 1097 | } 1098 | if$ 1099 | duplicate$ 1100 | 1101 | "\protect\citename{" swap$ * "}" * 1102 | year field.or.null purify$ * 1103 | 'label := 1104 | year field.or.null purify$ * 1105 | 1106 | sortify 'sort.label := 1107 | } 1108 | 1109 | FUNCTION {sort.format.names} 1110 | { 's := 1111 | #1 'nameptr := 1112 | "" 1113 | s num.names$ 'numnames := 1114 | numnames 'namesleft := 1115 | { namesleft #0 > } 1116 | { nameptr #1 > 1117 | { " " * } 1118 | 'skip$ 1119 | if$ 1120 | 1121 | s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := 1122 | 1123 | nameptr numnames = t "others" = and 1124 | { "et al" * } 1125 | { t sortify * } 1126 | if$ 1127 | nameptr #1 + 'nameptr := 1128 | namesleft #1 - 'namesleft := 1129 | } 1130 | while$ 1131 | } 1132 | 1133 | FUNCTION {sort.format.title} 1134 | { 't := 1135 | "A " #2 1136 | "An " #3 1137 | "The " #4 t chop.word 1138 | chop.word 1139 | chop.word 1140 | sortify 1141 | #1 global.max$ substring$ 1142 | } 1143 | 1144 | FUNCTION {author.sort} 1145 | { author empty$ 1146 | { key empty$ 1147 | { "to sort, need author or key in " cite$ * warning$ 1148 | "" 1149 | } 1150 | { key sortify } 1151 | if$ 1152 | } 1153 | { author sort.format.names } 1154 | if$ 1155 | } 1156 | 1157 | FUNCTION {author.editor.sort} 1158 | { author empty$ 1159 | { editor empty$ 1160 | { key empty$ 1161 | { "to sort, need author, editor, or key in " cite$ * warning$ 1162 | "" 1163 | } 1164 | { key sortify } 1165 | if$ 1166 | } 1167 | { editor sort.format.names } 1168 | if$ 1169 | } 1170 | { author sort.format.names } 1171 | if$ 1172 | } 1173 | 1174 | FUNCTION {author.organization.sort} 1175 | { author empty$ 1176 | { organization empty$ 1177 | { key empty$ 1178 | { "to sort, need author, organization, or key in " cite$ * warning$ 1179 | "" 1180 | } 1181 | { key sortify } 1182 | if$ 1183 | } 1184 | { "The " #4 organization chop.word sortify } 1185 | if$ 1186 | } 1187 | { author sort.format.names } 1188 | if$ 1189 | } 1190 | 1191 | FUNCTION {editor.organization.sort} 1192 | { editor empty$ 1193 | { organization empty$ 1194 | { key empty$ 1195 | { "to sort, need editor, organization, or key in " cite$ * warning$ 1196 | "" 1197 | } 1198 | { key sortify } 1199 | if$ 1200 | } 1201 | { "The " #4 organization chop.word sortify } 1202 | if$ 1203 | } 1204 | { editor sort.format.names } 1205 | if$ 1206 | } 1207 | 1208 | FUNCTION {presort} 1209 | 1210 | { calc.label 1211 | sort.label 1212 | " " 1213 | * 1214 | type$ "book" = 1215 | 1216 | type$ "inbook" = 1217 | or 1218 | 'author.editor.sort 1219 | { type$ "proceedings" = 1220 | 'editor.organization.sort 1221 | { type$ "manual" = 1222 | 'author.organization.sort 1223 | 'author.sort 1224 | if$ 1225 | } 1226 | if$ 1227 | } 1228 | if$ 1229 | 1230 | * 1231 | 1232 | " " 1233 | * 1234 | year field.or.null sortify 1235 | * 1236 | " " 1237 | * 1238 | title field.or.null 1239 | sort.format.title 1240 | * 1241 | #1 entry.max$ substring$ 1242 | 'sort.key$ := 1243 | } 1244 | 1245 | ITERATE {presort} 1246 | 1247 | SORT 1248 | 1249 | STRINGS { longest.label last.sort.label next.extra } 1250 | 1251 | INTEGERS { longest.label.width last.extra.num } 1252 | 1253 | FUNCTION {initialize.longest.label} 1254 | { "" 'longest.label := 1255 | #0 int.to.chr$ 'last.sort.label := 1256 | "" 'next.extra := 1257 | #0 'longest.label.width := 1258 | #0 'last.extra.num := 1259 | } 1260 | 1261 | FUNCTION {forward.pass} 1262 | { last.sort.label sort.label = 1263 | { last.extra.num #1 + 'last.extra.num := 1264 | last.extra.num int.to.chr$ 'extra.label := 1265 | } 1266 | { "a" chr.to.int$ 'last.extra.num := 1267 | "" 'extra.label := 1268 | sort.label 'last.sort.label := 1269 | } 1270 | if$ 1271 | } 1272 | 1273 | FUNCTION {reverse.pass} 1274 | { next.extra "b" = 1275 | { "a" 'extra.label := } 1276 | 'skip$ 1277 | if$ 1278 | label extra.label * 'label := 1279 | label width$ longest.label.width > 1280 | { label 'longest.label := 1281 | label width$ 'longest.label.width := 1282 | } 1283 | 'skip$ 1284 | if$ 1285 | extra.label 'next.extra := 1286 | } 1287 | 1288 | EXECUTE {initialize.longest.label} 1289 | 1290 | ITERATE {forward.pass} 1291 | 1292 | REVERSE {reverse.pass} 1293 | 1294 | FUNCTION {begin.bib} 1295 | 1296 | { et.al.char.used 1297 | { "\newcommand{\etalchar}[1]{$^{#1}$}" write$ newline$ } 1298 | 'skip$ 1299 | if$ 1300 | preamble$ empty$ 1301 | 1302 | 'skip$ 1303 | { preamble$ write$ newline$ } 1304 | if$ 1305 | 1306 | "\begin{thebibliography}{" "}" * write$ newline$ 1307 | 1308 | } 1309 | 1310 | EXECUTE {begin.bib} 1311 | 1312 | EXECUTE {init.state.consts} 1313 | 1314 | ITERATE {call.type$} 1315 | 1316 | FUNCTION {end.bib} 1317 | { newline$ 1318 | "\end{thebibliography}" write$ newline$ 1319 | } 1320 | 1321 | EXECUTE {end.bib} 1322 | 1323 | -------------------------------------------------------------------------------- /paper/acl2015.sty: -------------------------------------------------------------------------------- 1 | % File acl2015.sty 2 | % December 2014 3 | 4 | % This is the LaTeX style file for ACL 2015. It is nearly identical to 5 | % the style files for ACL 2014, EACL 2006, ACL2005, ACL 2002, ACL 6 | % 2001, ACL 2000, EACL 95 and EACL 99. 7 | % 8 | % Changes made include: adapt layout to A4 and centimeters, widen abstract 9 | 10 | % This is the LaTeX style file for ACL 2000. It is nearly identical to the 11 | % style files for EACL 95 and EACL 99. Minor changes include editing the 12 | % instructions to reflect use of \documentclass rather than \documentstyle 13 | % and removing the white space before the title on the first page 14 | % -- John Chen, June 29, 2000 15 | 16 | % To convert from submissions prepared using the style file aclsub.sty 17 | % prepared for the ACL 2000 conference, proceed as follows: 18 | % 1) Remove submission-specific information: \whichsession, \id, 19 | % \wordcount, \otherconferences, \area, \keywords 20 | % 2) \summary should be removed. The summary material should come 21 | % after \maketitle and should be in the ``abstract'' environment 22 | % 3) Check all citations. This style should handle citations correctly 23 | % and also allows multiple citations separated by semicolons. 24 | % 4) Check figures and examples. Because the final format is double- 25 | % column, some adjustments may have to be made to fit text in the column 26 | % or to choose full-width (\figure*} figures. 27 | % 5) Change the style reference from aclsub to acl2000, and be sure 28 | % this style file is in your TeX search path 29 | 30 | 31 | % This is the LaTeX style file for EACL-95. It is identical to the 32 | % style file for ANLP '94 except that the margins are adjusted for A4 33 | % paper. -- abney 13 Dec 94 34 | 35 | % The ANLP '94 style file is a slightly modified 36 | % version of the style used for AAAI and IJCAI, using some changes 37 | % prepared by Fernando Pereira and others and some minor changes 38 | % by Paul Jacobs. 39 | 40 | % Papers prepared using the aclsub.sty file and acl.bst bibtex style 41 | % should be easily converted to final format using this style. 42 | % (1) Submission information (\wordcount, \subject, and \makeidpage) 43 | % should be removed. 44 | % (2) \summary should be removed. The summary material should come 45 | % after \maketitle and should be in the ``abstract'' environment 46 | % (between \begin{abstract} and \end{abstract}). 47 | % (3) Check all citations. This style should handle citations correctly 48 | % and also allows multiple citations separated by semicolons. 49 | % (4) Check figures and examples. Because the final format is double- 50 | % column, some adjustments may have to be made to fit text in the column 51 | % or to choose full-width (\figure*} figures. 52 | 53 | % Place this in a file called aclap.sty in the TeX search path. 54 | % (Placing it in the same directory as the paper should also work.) 55 | 56 | % Prepared by Peter F. Patel-Schneider, liberally using the ideas of 57 | % other style hackers, including Barbara Beeton. 58 | % This style is NOT guaranteed to work. It is provided in the hope 59 | % that it will make the preparation of papers easier. 60 | % 61 | % There are undoubtably bugs in this style. If you make bug fixes, 62 | % improvements, etc. please let me know. My e-mail address is: 63 | % pfps@research.att.com 64 | 65 | % Papers are to be prepared using the ``acl'' bibliography style, 66 | % as follows: 67 | % \documentclass[11pt]{article} 68 | % \usepackage{acl2000} 69 | % \title{Title} 70 | % \author{Author 1 \and Author 2 \\ Address line \\ Address line \And 71 | % Author 3 \\ Address line \\ Address line} 72 | % \begin{document} 73 | % ... 74 | % \bibliography{bibliography-file} 75 | % \bibliographystyle{acl} 76 | % \end{document} 77 | 78 | % Author information can be set in various styles: 79 | % For several authors from the same institution: 80 | % \author{Author 1 \and ... \and Author n \\ 81 | % Address line \\ ... \\ Address line} 82 | % if the names do not fit well on one line use 83 | % Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\ 84 | % For authors from different institutions: 85 | % \author{Author 1 \\ Address line \\ ... \\ Address line 86 | % \And ... \And 87 | % Author n \\ Address line \\ ... \\ Address line} 88 | % To start a seperate ``row'' of authors use \AND, as in 89 | % \author{Author 1 \\ Address line \\ ... \\ Address line 90 | % \AND 91 | % Author 2 \\ Address line \\ ... \\ Address line \And 92 | % Author 3 \\ Address line \\ ... \\ Address line} 93 | 94 | % If the title and author information does not fit in the area allocated, 95 | % place \setlength\titlebox{} right after 96 | % \usepackage{acl2015} 97 | % where can be something larger than 5cm 98 | 99 | \typeout{Conference Style for ACL 2015 -- released December 7, 2014} 100 | 101 | % NOTE: Some laser printers have a serious problem printing TeX output. 102 | % These printing devices, commonly known as ``write-white'' laser 103 | % printers, tend to make characters too light. To get around this 104 | % problem, a darker set of fonts must be created for these devices. 105 | % 106 | 107 | 108 | 109 | % A4 modified by Eneko; again modified by Alexander for 5cm titlebox 110 | \setlength{\paperwidth}{21cm} % A4 111 | \setlength{\paperheight}{29.7cm}% A4 112 | \setlength\topmargin{-0.5cm} 113 | \setlength\oddsidemargin{0cm} 114 | \setlength\textheight{24.7cm} 115 | \setlength\textwidth{16.0cm} 116 | \setlength\columnsep{0.6cm} 117 | \newlength\titlebox 118 | \setlength\titlebox{5cm} 119 | \setlength\headheight{5pt} 120 | \setlength\headsep{0pt} 121 | \thispagestyle{empty} 122 | \pagestyle{empty} 123 | 124 | 125 | \flushbottom \twocolumn \sloppy 126 | 127 | % We're never going to need a table of contents, so just flush it to 128 | % save space --- suggested by drstrip@sandia-2 129 | \def\addcontentsline#1#2#3{} 130 | 131 | % Title stuff, taken from deproc. 132 | \def\maketitle{\par 133 | \begingroup 134 | \def\thefootnote{\fnsymbol{footnote}} 135 | \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} 136 | \twocolumn[\@maketitle] \@thanks 137 | \endgroup 138 | \setcounter{footnote}{0} 139 | \let\maketitle\relax \let\@maketitle\relax 140 | \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} 141 | \def\@maketitle{\vbox to \titlebox{\hsize\textwidth 142 | \linewidth\hsize \vskip 0.125in minus 0.125in \centering 143 | {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in 144 | {\def\and{\unskip\enspace{\rm and}\enspace}% 145 | \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil 146 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}% 147 | \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup 148 | \vskip 0.25in plus 1fil minus 0.125in 149 | \hbox to \linewidth\bgroup\large \hfil\hfil 150 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf} 151 | \hbox to \linewidth\bgroup\large \hfil\hfil 152 | \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author 153 | \end{tabular}\hss\egroup 154 | \hfil\hfil\egroup} 155 | \vskip 0.3in plus 2fil minus 0.1in 156 | }} 157 | 158 | % margins for abstract 159 | \renewenvironment{abstract}% 160 | {\centerline{\large\bf Abstract}% 161 | \begin{list}{}% 162 | {\setlength{\rightmargin}{0.6cm}% 163 | \setlength{\leftmargin}{0.6cm}}% 164 | \item[]\ignorespaces}% 165 | {\unskip\end{list}} 166 | 167 | %\renewenvironment{abstract}{\centerline{\large\bf 168 | % Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex} 169 | 170 | 171 | % bibliography 172 | 173 | \def\thebibliography#1{\section*{References} 174 | \global\def\@listi{\leftmargin\leftmargini 175 | \labelwidth\leftmargini \advance\labelwidth-\labelsep 176 | \topsep 1pt plus 2pt minus 1pt 177 | \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt} 178 | \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth 179 | \advance\leftmargin\labelsep\usecounter{enumi}} 180 | \def\newblock{\hskip .11em plus .33em minus -.07em} 181 | \sloppy 182 | \sfcode`\.=1000\relax} 183 | 184 | \def\@up#1{\raise.2ex\hbox{#1}} 185 | 186 | % most of cite format is from aclsub.sty by SMS 187 | 188 | % don't box citations, separate with ; and a space 189 | % also, make the penalty between citations negative: a good place to break 190 | % changed comma back to semicolon pj 2/1/90 191 | % \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi 192 | % \def\@citea{}\@cite{\@for\@citeb:=#2\do 193 | % {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined 194 | % {b@\@citeb}{{\bf ?}\@warning 195 | % {Citation `\@citeb' on page \thepage \space undefined}}% 196 | % {\csname b@\@citeb\endcsname}}}{#1}} 197 | 198 | % don't box citations, separate with ; and a space 199 | % Replaced for multiple citations (pj) 200 | % don't box citations and also add space, semicolon between multiple citations 201 | \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi 202 | \def\@citea{}\@cite{\@for\@citeb:=#2\do 203 | {\@citea\def\@citea{; }\@ifundefined 204 | {b@\@citeb}{{\bf ?}\@warning 205 | {Citation `\@citeb' on page \thepage \space undefined}}% 206 | {\csname b@\@citeb\endcsname}}}{#1}} 207 | 208 | % Allow short (name-less) citations, when used in 209 | % conjunction with a bibliography style that creates labels like 210 | % \citename{, } 211 | % 212 | \let\@internalcite\cite 213 | \def\cite{\def\citename##1{##1, }\@internalcite} 214 | \def\shortcite{\def\citename##1{}\@internalcite} 215 | \def\newcite{\def\citename##1{{\frenchspacing##1} (}\@internalciteb} 216 | 217 | % Macros for \newcite, which leaves name in running text, and is 218 | % otherwise like \shortcite. 219 | \def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi 220 | \def\@citea{}\@newcite{\@for\@citeb:=#2\do 221 | {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined 222 | {b@\@citeb}{{\bf ?}\@warning 223 | {Citation `\@citeb' on page \thepage \space undefined}}% 224 | {\csname b@\@citeb\endcsname}}}{#1}} 225 | \def\@internalciteb{\@ifnextchar [{\@tempswatrue\@citexb}{\@tempswafalse\@citexb[]}} 226 | 227 | \def\@newcite#1#2{{#1\if@tempswa, #2\fi)}} 228 | 229 | \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill} 230 | 231 | %%% More changes made by SMS (originals in latex.tex) 232 | % Use parentheses instead of square brackets in the text. 233 | \def\@cite#1#2{({#1\if@tempswa , #2\fi})} 234 | 235 | % Don't put a label in the bibliography at all. Just use the unlabeled format 236 | % instead. 237 | \def\thebibliography#1{\vskip\parskip% 238 | \vskip\baselineskip% 239 | \def\baselinestretch{1}% 240 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi% 241 | \vskip-\parskip% 242 | \vskip-\baselineskip% 243 | \section*{References\@mkboth 244 | {References}{References}}\list 245 | {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent} 246 | \setlength{\itemindent}{-\parindent}} 247 | \def\newblock{\hskip .11em plus .33em minus -.07em} 248 | \sloppy\clubpenalty4000\widowpenalty4000 249 | \sfcode`\.=1000\relax} 250 | \let\endthebibliography=\endlist 251 | 252 | % Allow for a bibliography of sources of attested examples 253 | \def\thesourcebibliography#1{\vskip\parskip% 254 | \vskip\baselineskip% 255 | \def\baselinestretch{1}% 256 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi% 257 | \vskip-\parskip% 258 | \vskip-\baselineskip% 259 | \section*{Sources of Attested Examples\@mkboth 260 | {Sources of Attested Examples}{Sources of Attested Examples}}\list 261 | {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent} 262 | \setlength{\itemindent}{-\parindent}} 263 | \def\newblock{\hskip .11em plus .33em minus -.07em} 264 | \sloppy\clubpenalty4000\widowpenalty4000 265 | \sfcode`\.=1000\relax} 266 | \let\endthesourcebibliography=\endlist 267 | 268 | \def\@lbibitem[#1]#2{\item[]\if@filesw 269 | { \def\protect##1{\string ##1\space}\immediate 270 | \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}} 271 | 272 | \def\@bibitem#1{\item\if@filesw \immediate\write\@auxout 273 | {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces} 274 | 275 | % sections with less space 276 | \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus 277 | -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}} 278 | \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus 279 | -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}} 280 | %% changed by KO to - values to get teh initial parindent right 281 | \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus 282 | -0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}} 283 | \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus 284 | 0.5ex minus .2ex}{-1em}{\normalsize\bf}} 285 | \def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus 286 | 0.5ex minus .2ex}{-1em}{\normalsize\bf}} 287 | 288 | % Footnotes 289 | \footnotesep 6.65pt % 290 | \skip\footins 9pt plus 4pt minus 2pt 291 | \def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt } 292 | \setcounter{footnote}{0} 293 | 294 | % Lists and paragraphs 295 | \parindent 1em 296 | \topsep 4pt plus 1pt minus 2pt 297 | \partopsep 1pt plus 0.5pt minus 0.5pt 298 | \itemsep 2pt plus 1pt minus 0.5pt 299 | \parsep 2pt plus 1pt minus 0.5pt 300 | 301 | \leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em 302 | \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em 303 | \labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt 304 | 305 | \def\@listi{\leftmargin\leftmargini} 306 | \def\@listii{\leftmargin\leftmarginii 307 | \labelwidth\leftmarginii\advance\labelwidth-\labelsep 308 | \topsep 2pt plus 1pt minus 0.5pt 309 | \parsep 1pt plus 0.5pt minus 0.5pt 310 | \itemsep \parsep} 311 | \def\@listiii{\leftmargin\leftmarginiii 312 | \labelwidth\leftmarginiii\advance\labelwidth-\labelsep 313 | \topsep 1pt plus 0.5pt minus 0.5pt 314 | \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt 315 | \itemsep \topsep} 316 | \def\@listiv{\leftmargin\leftmarginiv 317 | \labelwidth\leftmarginiv\advance\labelwidth-\labelsep} 318 | \def\@listv{\leftmargin\leftmarginv 319 | \labelwidth\leftmarginv\advance\labelwidth-\labelsep} 320 | \def\@listvi{\leftmargin\leftmarginvi 321 | \labelwidth\leftmarginvi\advance\labelwidth-\labelsep} 322 | 323 | \abovedisplayskip 7pt plus2pt minus5pt% 324 | \belowdisplayskip \abovedisplayskip 325 | \abovedisplayshortskip 0pt plus3pt% 326 | \belowdisplayshortskip 4pt plus3pt minus3pt% 327 | 328 | % Less leading in most fonts (due to the narrow columns) 329 | % The choices were between 1-pt and 1.5-pt leading 330 | \def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} 331 | \def\small{\@setsize\small{10pt}\ixpt\@ixpt} 332 | \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt} 333 | \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt} 334 | \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt} 335 | \def\large{\@setsize\large{14pt}\xiipt\@xiipt} 336 | \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt} 337 | \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt} 338 | \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt} 339 | \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt} 340 | -------------------------------------------------------------------------------- /paper/figures/T_50x_avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/T_50x_avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/T_50x_avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/T_50x_cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/T_50x_cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/T_50x_cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/T_50x_final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/T_50x_final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/T_50x_final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/d_50x_avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/d_50x_avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/d_50x_avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/d_50x_cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/d_50x_cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/d_50x_cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/d_50x_final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/d_50x_final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/d_50x_final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/k_50x_avg_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/k_50x_avg_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/k_50x_avg_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/k_50x_cum_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/k_50x_cum_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/k_50x_cum_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/k_50x_final_regret_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_diff.png -------------------------------------------------------------------------------- /paper/figures/k_50x_final_regret_ic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ic.png -------------------------------------------------------------------------------- /paper/figures/k_50x_final_regret_ti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ti.png -------------------------------------------------------------------------------- /paper/figures/yahoo-interval-chaining.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-interval-chaining.png -------------------------------------------------------------------------------- /paper/figures/yahoo-top-interval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-top-interval.png -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @article{DBLP:journals/corr/JosephKMNR16, 2 | author = {Matthew Joseph and 3 | Michael Kearns and 4 | Jamie Morgenstern and 5 | Seth Neel and 6 | Aaron Roth}, 7 | title = {Rawlsian Fairness for Machine Learning}, 8 | journal = {CoRR}, 9 | volume = {abs/1610.09559}, 10 | year = {2016}, 11 | url = {http://arxiv.org/abs/1610.09559}, 12 | timestamp = {Wed, 02 Nov 2016 09:51:26 +0100}, 13 | biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/JosephKMNR16}, 14 | bibsource = {dblp computer science bibliography, http://dblp.org} 15 | } 16 | 17 | @article{DBLP:journals/corr/abs-1003-5956, 18 | author = {Lihong Li and 19 | Wei Chu and 20 | John Langford}, 21 | title = {An Unbiased, Data-Driven, Offline Evaluation Method of Contextual 22 | Bandit Algorithms}, 23 | journal = {CoRR}, 24 | volume = {abs/1003.5956}, 25 | year = {2010}, 26 | url = {http://arxiv.org/abs/1003.5956}, 27 | timestamp = {Mon, 05 Dec 2011 18:04:18 +0100}, 28 | biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1003-5956}, 29 | bibsource = {dblp computer science bibliography, http://dblp.org} 30 | } 31 | 32 | @misc{yahoo, 33 | title = {Yahoo! Front Page Today Module User Click Log Dataset}, 34 | author = {Yahoo!}, 35 | howpublished = {\url{https://webscope.sandbox.yahoo.com/catalog.php?datatype=r}}, 36 | note = {Accessed: 2017-04-03}, 37 | year = {2009} 38 | } 39 | -------------------------------------------------------------------------------- /paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/paper.pdf -------------------------------------------------------------------------------- /paper/paper.tex: -------------------------------------------------------------------------------- 1 | % 2 | % File acl2015.tex 3 | % 4 | % Contact: car@ir.hit.edu.cn, gdzhou@suda.edu.cn 5 | %% 6 | %% Based on the style files for ACL-2014, which were, in turn, 7 | %% Based on the style files for ACL-2013, which were, in turn, 8 | %% Based on the style files for ACL-2012, which were, in turn, 9 | %% based on the style files for ACL-2011, which were, in turn, 10 | %% based on the style files for ACL-2010, which were, in turn, 11 | %% based on the style files for ACL-IJCNLP-2009, which were, in turn, 12 | %% based on the style files for EACL-2009 and IJCNLP-2008... 13 | 14 | %% Based on the style files for EACL 2006 by 15 | %%e.agirre@ehu.es or Sergi.Balari@uab.es 16 | %% and that of ACL 08 by Joakim Nivre and Noah Smith 17 | 18 | \documentclass[11pt]{article} 19 | \usepackage{acl2015} 20 | \usepackage{times} 21 | \usepackage{url} 22 | \usepackage{latexsym} 23 | \usepackage{hyperref} 24 | \usepackage{tikz} 25 | \usepackage{amsmath} 26 | \usepackage{tabulary} 27 | 28 | \usepackage[labelsep=quad,indention=10pt]{subfig} 29 | \captionsetup*[subfigure]{position=bottom} 30 | 31 | \newcommand{\specialcell}[2][c]{% 32 | \begin{tabular}[#1]{@{}c@{}}#2\end{tabular}} 33 | 34 | \usepackage{graphicx} 35 | \graphicspath{{figures/}} 36 | \DeclareGraphicsExtensions{.eps,.pdf,.jpg,.png} 37 | 38 | \DeclareMathOperator{\wsim}{sim} 39 | 40 | %\setlength\titlebox{5cm} 41 | 42 | % You can expand the titlebox if you need extra space 43 | % to show all the authors. Please do not make the titlebox 44 | % smaller than 5cm (the original size); we will check this 45 | % in the camera-ready version and ask you to change it back. 46 | 47 | \title{Further Empirical Analyses of Rawlsian Fairness for Machine Learning} 48 | 49 | \author{JT Cho \\ 50 | {\tt joncho@} \\ 51 | {\tt seas.upenn.edu} \\\And 52 | Karinna Loo \\ 53 | {\tt kloo@} \\ 54 | {\tt seas.upenn.edu} \\\And 55 | Veronica Wharton \\ 56 | {\tt whartonv@} \\ 57 | {\tt seas.upenn.edu} } 58 | \date{} 59 | 60 | \begin{document} 61 | \maketitle 62 | 63 | %\begin{abstract} 64 | 65 | %\noindent TODO: Abstract 66 | 67 | %\end{abstract} 68 | 69 | \section{Introduction} 70 | 71 | For our CIS 625 final project, our team --- JT Cho, Karinna Loo, and Veronica Wharton --- took a closer look at the topic of fairness in machine learning. The paper that piqued our interest was \textit{Rawlsian Fairness for Machine learning} \cite{DBLP:journals/corr/JosephKMNR16}, which describes two online algorithms in the linear contextual bandit framework that both learn at a rate comparable to (but necessarily worse than) the best algorithms absent of a fairness constraint and also satisfy a specified fairness constraint. The authors present theoretical and empirical results. Our team sought to re-implement the algorithms presented by \newcite{DBLP:journals/corr/JosephKMNR16} and then expand upon their empirical analyses. We were also interested in exploring further fairness analyses using real-world data. 72 | 73 | \section{Project overview} 74 | 75 | Our project consisted of the following steps: 76 | 77 | \begin{enumerate} 78 | \item We read the paper \textit{Rawlsian Fairness for Machine Learning} \cite{DBLP:journals/corr/JosephKMNR16}. 79 | \item We implemented the \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair} algorithms from the paper in Python. 80 | \item We ran our implementations on a Yahoo! dataset containing a fraction of the user click log for news articles displayed in the Featured Tab of the Today Module on the Yahoo! Front Page during the first ten days in May 2009 \cite{yahoo}, to see how well they performed on real data. 81 | \item To empirically evaluate our implementations, we ran experiments similar to those in \cite{DBLP:journals/corr/JosephKMNR16} with randomly-drawn contexts. 82 | \item We compiled our findings into a written report. 83 | \end{enumerate} 84 | 85 | \section{Algorithm implementations} 86 | 87 | The code for our implementations can be found here: \url{https://github.com/jtcho/FairMachineLearning/blob/master/fairml.py} 88 | 89 | All algorithms and code were written using Python 3, along with NumPy\footnote{http://www.numpy.org}, SciPy\footnote{https://www.scipy.org}, and various other Python libraries. 90 | 91 | \section{Implementation: TopInterval} 92 | 93 | The \textsc{TopInterval} learning algorithm was implemented true to form as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. Particular details of note --- to ensure that all matrices used in computation were nonsingular, the first $d$ rounds are always chosen to be exploration rounds, where $d$ is the number of features. Additionally, we found it necessary to pick each arm once in order to observe data for each. 94 | 95 | \section{Implementation: IntervalChaining} 96 | 97 | The implementation for \textsc{IntervalChaining} was simple given \textsc{TopInterval}, as it sufficed to alter the strategy for picking arms in each round to that of picking uniformly at random from the chain containing the top interval. 98 | 99 | \section{Implementation: RidgeFair} 100 | 101 | The \textsc{RidgeFair} algorithm was also implemented as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. This algorithm is very similar in implementation to \textsc{IntervalChaining}, save that it’s narrower confidence intervals allow for derivation of tighter regret bounds. A couple small details to note are, first, that we assume for simplicity (and without loss of generality) that the noise is $R$ sub-Gaussian with parameter $R = 1$ and second, that we play uniformly at random among all arms in the set of actions chained to the max. 102 | 103 | \section{Yahoo! Dataset} 104 | 105 | To expand upon the initial work done by \newcite{DBLP:journals/corr/JosephKMNR16}, we endeavored to test the presented algorithms on a real dataset. A Yahoo! dataset containing logs of user-visits to the front page was procured to evaluate our contextual bandits algorithms \cite{yahoo}. Each log entry details the following: 106 | 107 | \begin{center} 108 | \begin{table}[h] 109 | \fontsize{6}{10}\selectfont 110 | \begin{tabulary}{0.8\textwidth}{|l|l|l|l|l|} 111 | \hline \textbf{unix\_timestamp} & \textbf{displayed\_id} & \textbf{user\_clicked} & \textbf{user\_features} & \textbf{article\_pool}\\\hline 112 | 1241162400&109513&0&$\dots$&[$\dots$]\\\hline 113 | \end{tabulary} 114 | \end{table} 115 | \end{center} 116 | 117 | In each event, a user specified by $6$ features is presented an article from a pool of around $20$ distinct articles, each of which has their own $6$-dimensional feature vector. The event also tracks whether the user clicked the featured article or not. 118 | 119 | In a fashion similar to that presented in \newcite{DBLP:journals/corr/abs-1003-5956}, we devised an evaluation scheme for the various learning algorithms. In our procedure, a random sample is drawn from the set of logged events. The learning algorithm scans through the sampled events linearly, evaluating its predictions for each one. If there happens to be a match between the algorithm's picked arm and the article displayed in the event, the logged event is added to the history. 120 | 121 | Initial attempts to use this approach failed for a couple of reasons. First, the Yahoo! dataset contains a highly disproportionate number of negative samples with respect to positive ones. Therefore, our learning algorithm would not retain useful information over a number of iterations due to only being trained on negative samples. Second, a direct application of the \textsc{TopInterval} and \textsc{IntervalChaining} algorithms relies on the assumption of $20$ distinct underlying groups from which the articles were chosen to be in the article pool, each with their own distinct quality function. This assumption was found to be unreasonable, as we found that an article's index in the article pool had no bearing on its actual likelihood of being clicked by the user when picked. The initial context also does not work well with a fairness analysis. As a consequence, we saw that direct applications of the learning algorithms saw very poor performance. 122 | 123 | To mitigate the first issue, we elected to alter our sampling procedure to separately sample positive and negative samples, and then shuffle them together. A brief argument towards the validity of this approach follows. While the underlying distribution of observed user visits saw mostly negative results, the algorithms performance should be independent of whatever underlying distribution there is - taking into account exclusively the user's features and the articles it is choosing from. Hence, curating the input to the learning algorithm such that it learns equally from both the positive and negative events suffices. 124 | 125 | To resolve the second issue, we made a simplification to the problem context by clustering the articles. Across the million and a half logged events, there are approximately $20$ distinct articles in the article pools. In choosing a smaller number of clusters, we altered the scenario such that a successful event would be if the user clicked an article that was from the same pool chosen by the algorithm. In grouping the articles together, we reduced the number of available arms and also developed the notion of ``groups" implicit in \newcite{DBLP:journals/corr/JosephKMNR16}'s contextual bandits framework. The emergent notion of fairness then lies in discrimination against any particular cluster of articles. 126 | 127 | These modifications resulted in significant improvements in the performance of our implementations on the Yahoo! dataset, as shown in Figure ~\ref{fig:yahoo} below. 128 | 129 | Another novel modification we made was the use of a logit model instead of the simple linear regression used in \newcite{DBLP:journals/corr/JosephKMNR16}. We preserve the original fairness argument of the \textsc{IntervalChaining} algorithm by simply rescaling the output of the OLS estimator and the confidence intervals to $[0, 1]$ via the inverse logit. That is, 130 | $$w_{t,i} = \mathcal{Q}_{\mathcal{F}_{t,i}}(\frac{\delta}{2kT})$$ 131 | $$[\ell_{i}^{t}, u_{i}^{t}] = [\Phi(\hat{y}_{t,i} - w_{t,i}), \Phi(\hat{y}_{t,i} + w_{t,i})]$$ 132 | where $\Phi(x) = \frac{e^{x}}{1 + e^{x}} = \text{logistic}(x)$. It suffices to note that both OLS and logistic regression are variations of the generalized linear model (GLM). 133 | 134 | \begin{figure*} 135 | \includegraphics[width=\textwidth]{yahoo-interval-chaining.png} 136 | \caption{Performance metrics of the logistic-regression based interval-chaining algorithm with $3$ clusters over 10,000 iterations. Shown on the left is a graph depicting the performance of the learning algorithm vs that of the ``best" player, whose picked article is clicked by the user in every round. The regret is simply the difference in the cumulative number of successes between the two. In practice, this is an unfair comparison to make, as it is unreasonable to expect that the user would click the featured article every visit - and our results stand even stronger in comparison. On the right is a graph denoting the cumulative fraction of successful picks by the algorithm vs. the baseline (randomly selecting one out of the three pools at each step). The learning algorithm appears to converge to approximately $50\%$ accuracy, which is considerably higher than the baseline. \label{fig:yahoo}} 137 | \end{figure*} 138 | 139 | \section{Experimental results} 140 | 141 | We ran experiments that compared the regret of \textsc{IntervalChaining} (IC) with the regret of \textsc{TopInterval} (TI). As in \newcite{DBLP:journals/corr/JosephKMNR16}, we present three sets of empirical results: 142 | \begin{itemize} 143 | \item Varying $T$ (the number of rounds) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $T$. (See Figure \ref{fig:free_T}.) 144 | \item Varying $k$ (the number of arms/groups) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $k$. (See Figure \ref{fig:free_k}.) 145 | \item Varying $d$ (the number of features) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $d$. (See Figure \ref{fig:free_d}.) 146 | \end{itemize} 147 | 148 | For each increasing variable ($T$, $k$, or $d$), we present nine metrics as a function of the variable, each averaged over 50 trials. Contexts are drawn uniformly at random from $[0,1]^d$ and standard Gaussian noise. \newcite{DBLP:journals/corr/JosephKMNR16} only present the average regret difference (metric \#3). 149 | \begin{enumerate} 150 | \item Average regret (TI) - the average regret of \textsc{TopInterval} across all rounds. 151 | \item Average regret (IC) - the average regret of \textsc{IntervalChaining} across all rounds. 152 | \item Average regret difference (TI vs. IC) - the difference between the average regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds. 153 | \item Cumulative regret (TI) - the cumulative regret of \textsc{TopInterval} across all rounds. 154 | \item Cumulative regret (IC) - the cumulative regret of \textsc{IntervalChaining} across all rounds. 155 | \item Cumulative regret difference (TI vs. IC) - the difference between the cumulative regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds. 156 | \item Final regret (TI) - the regret of \textsc{TopInterval} in the final round. 157 | \item Final regret (IC) - the regret of \textsc{IntervalChaining} in the final round. 158 | \item Final regret difference (TI vs. IC) - the difference between the final regrets of \textsc{TopInterval} and \textsc{IntervalChaining}. 159 | \end{enumerate} 160 | 161 | We present our results in Figures \ref{fig:free_T}, \ref{fig:free_k}, and \ref{fig:free_d}. 162 | 163 | \begin{figure*}[ht!] 164 | \centering 165 | \subfloat{ % 166 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ti} 167 | } 168 | \subfloat{ % 169 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ic} 170 | } 171 | \subfloat{ % 172 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_diff} 173 | } 174 | \\ 175 | \subfloat{ % 176 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ti} 177 | } 178 | \subfloat{ % 179 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ic} 180 | } 181 | \subfloat{ % 182 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_diff} 183 | } 184 | \\ 185 | \subfloat{ % 186 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ti} 187 | } 188 | \subfloat{ % 189 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ic} 190 | } 191 | \subfloat{ % 192 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_diff} 193 | } 194 | \caption{$d=2$, $k=2$, free $T$} 195 | \label{fig:free_T} 196 | \end{figure*} 197 | 198 | \begin{figure*}[ht!] 199 | \centering 200 | \subfloat{ % 201 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ti} 202 | } 203 | \subfloat{ % 204 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ic} 205 | } 206 | \subfloat{ % 207 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_diff} 208 | } 209 | \\ 210 | \subfloat{ % 211 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ti} 212 | } 213 | \subfloat{ % 214 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ic} 215 | } 216 | \subfloat{ % 217 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_diff} 218 | } 219 | \\ 220 | \subfloat{ % 221 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ti} 222 | } 223 | \subfloat{ % 224 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ic} 225 | } 226 | \subfloat{ % 227 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_diff} 228 | } 229 | \caption{$d=2$, $T=1000$, free $k$} 230 | \label{fig:free_k} 231 | \end{figure*} 232 | 233 | \begin{figure*}[ht!] 234 | \centering 235 | \subfloat{ % 236 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ti} 237 | } 238 | \subfloat{ % 239 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ic} 240 | } 241 | \subfloat{ % 242 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_diff} 243 | } 244 | \\ 245 | \subfloat{ % 246 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ti} 247 | } 248 | \subfloat{ % 249 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ic} 250 | } 251 | \subfloat{ % 252 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_diff} 253 | } 254 | \\ 255 | \subfloat{ % 256 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ti} 257 | } 258 | \subfloat{ % 259 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ic} 260 | } 261 | \subfloat{ % 262 | \includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_diff} 263 | } 264 | \caption{$k=2$, $T=1000$, free $d$} 265 | \label{fig:free_d} 266 | \end{figure*} 267 | 268 | \section{Conclusion} 269 | 270 | In this work, we present an empirical extension of the work done by \newcite{DBLP:journals/corr/JosephKMNR16} in their paper \textit{Rawlsian Fairness for Machine Learning}. Specifically, we present implementations of their algorithms \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair}; a case study in which we apply the aforementioned algorithms to a Yahoo! clicks dataset; and an extension of one of \newcite{DBLP:journals/corr/JosephKMNR16}'s empirical analyses on randomly generated data. We believe that our results may be useful should these algorithms be used in future real-world settings. 271 | 272 | \bibliography{paper} 273 | \bibliographystyle{acl} 274 | 275 | \end{document} 276 | -------------------------------------------------------------------------------- /references/rawlsian_fairness.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/references/rawlsian_fairness.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | appnope==0.1.0 3 | bleach==2.0.0 4 | cycler==0.10.0 5 | decorator==4.0.11 6 | entrypoints==0.2.2 7 | html5lib==0.999999999 8 | ipykernel==4.6.1 9 | ipython==6.0.0 10 | ipython-genutils==0.2.0 11 | ipywidgets==6.0.0 12 | jedi==0.10.2 13 | Jinja2==2.9.6 14 | jsonschema==2.6.0 15 | jupyter==1.0.0 16 | jupyter-client==5.0.1 17 | jupyter-console==5.1.0 18 | jupyter-core==4.3.0 19 | MarkupSafe==1.0 20 | matplotlib==2.0.1 21 | mistune==0.7.4 22 | nbconvert==5.1.1 23 | nbformat==4.3.0 24 | notebook==5.0.0 25 | numpy==1.12.1 26 | packaging==16.8 27 | pandas==0.19.2 28 | pandocfilters==1.4.1 29 | pexpect==4.2.1 30 | pickleshare==0.7.4 31 | prompt-toolkit==1.0.14 32 | ptyprocess==0.5.1 33 | Pygments==2.2.0 34 | pyparsing==2.2.0 35 | python-dateutil==2.6.0 36 | pytz==2017.2 37 | pyzmq==16.0.2 38 | qtconsole==4.3.0 39 | scikit-learn==0.18.1 40 | scipy==0.19.0 41 | simplegeneric==0.8.1 42 | six==1.10.0 43 | terminado==0.6 44 | testpath==0.3 45 | tornado==4.5.1 46 | traitlets==4.3.2 47 | virtualenv==15.1.0 48 | wcwidth==0.1.7 49 | webencodings==0.5.1 50 | widgetsnbextension==2.0.0 51 | --------------------------------------------------------------------------------