├── .gitignore
├── Horses.ipynb
├── NYSE.ipynb
├── Provably Fair Machine Learning.ipynb
├── README.md
├── RidgeFair.ipynb
├── Yahoo Testing.ipynb
├── Yahoo.ipynb
├── evaluation_T.py
├── evaluation_d.py
├── evaluation_k.py
├── fairml.py
├── figures_T_50x
    ├── avg_regret_diff.png
    ├── avg_regret_ic.png
    ├── avg_regret_ti.png
    ├── cum_regret_diff.png
    ├── cum_regret_ic.png
    ├── cum_regret_ti.png
    ├── final_regret_diff.png
    ├── final_regret_ic.png
    └── final_regret_ti.png
├── figures_d_50x
    ├── avg_regret_diff.png
    ├── avg_regret_ic.png
    ├── avg_regret_ti.png
    ├── cum_regret_diff.png
    ├── cum_regret_ic.png
    ├── cum_regret_ti.png
    ├── final_regret_diff.png
    ├── final_regret_ic.png
    └── final_regret_ti.png
├── figures_k_50x
    ├── avg_regret_diff.png
    ├── avg_regret_ic.png
    ├── avg_regret_ti.png
    ├── cum_regret_diff.png
    ├── cum_regret_ic.png
    ├── cum_regret_ti.png
    ├── final_regret_diff.png
    ├── final_regret_ic.png
    └── final_regret_ti.png
├── paper
    ├── .gitignore
    ├── Makefile
    ├── acl.bst
    ├── acl2015.sty
    ├── figures
    │   ├── T_50x_avg_regret_diff.png
    │   ├── T_50x_avg_regret_ic.png
    │   ├── T_50x_avg_regret_ti.png
    │   ├── T_50x_cum_regret_diff.png
    │   ├── T_50x_cum_regret_ic.png
    │   ├── T_50x_cum_regret_ti.png
    │   ├── T_50x_final_regret_diff.png
    │   ├── T_50x_final_regret_ic.png
    │   ├── T_50x_final_regret_ti.png
    │   ├── d_50x_avg_regret_diff.png
    │   ├── d_50x_avg_regret_ic.png
    │   ├── d_50x_avg_regret_ti.png
    │   ├── d_50x_cum_regret_diff.png
    │   ├── d_50x_cum_regret_ic.png
    │   ├── d_50x_cum_regret_ti.png
    │   ├── d_50x_final_regret_diff.png
    │   ├── d_50x_final_regret_ic.png
    │   ├── d_50x_final_regret_ti.png
    │   ├── k_50x_avg_regret_diff.png
    │   ├── k_50x_avg_regret_ic.png
    │   ├── k_50x_avg_regret_ti.png
    │   ├── k_50x_cum_regret_diff.png
    │   ├── k_50x_cum_regret_ic.png
    │   ├── k_50x_cum_regret_ti.png
    │   ├── k_50x_final_regret_diff.png
    │   ├── k_50x_final_regret_ic.png
    │   ├── k_50x_final_regret_ti.png
    │   ├── yahoo-interval-chaining.png
    │   └── yahoo-top-interval.png
    ├── paper.bib
    ├── paper.pdf
    └── paper.tex
├── references
    └── rawlsian_fairness.pdf
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | fairml
2 | 


--------------------------------------------------------------------------------
/Horses.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {
   7 |     "collapsed": true
   8 |    },
   9 |    "outputs": [],
  10 |    "source": [
  11 |     "import pandas as pd\n",
  12 |     "import numpy as np"
  13 |    ]
  14 |   },
  15 |   {
  16 |    "cell_type": "code",
  17 |    "execution_count": 53,
  18 |    "metadata": {
  19 |     "collapsed": false
  20 |    },
  21 |    "outputs": [],
  22 |    "source": [
  23 |     "# data_dir = './horses/'\n",
  24 |     "# horses_df = pd.read_csv(data_dir + 'horses.csv',\n",
  25 |     "#                         usecols=['age', 'sex_id', 'prize_money'])\n",
  26 |     "# horse_sexes_df = pd.read_csv(data_dir + 'horse_sexes.csv').set_index('id')\n",
  27 |     "# riders_df = pd.read_csv(data_dir + 'riders.csv').set_index('id')"
  28 |    ]
  29 |   },
  30 |   {
  31 |    "cell_type": "code",
  32 |    "execution_count": 182,
  33 |    "metadata": {
  34 |     "collapsed": false
  35 |    },
  36 |    "outputs": [],
  37 |    "source": [
  38 |     "# Dataset Source: https://www.kaggle.com/gunner38/horseracing\n",
  39 |     "tips_df = pd.read_csv('horse_tips.csv', encoding='latin1')\n",
  40 |     "tips_df['Bet Type'] = tips_df['Bet Type'].apply(lambda x : 1 if x == 'Win' else 0)\n",
  41 |     "tips_df['Result'] = tips_df['Result'].apply(lambda x : 1 if x == 'Win' else 0)\n",
  42 |     "\n",
  43 |     "horses = np.sort(tips_df['Horse'].unique())\n",
  44 |     "tracks = np.sort(tips_df['Track'].unique())\n",
  45 |     "\n",
  46 |     "tips_df['Horse'] = tips_df['Horse'].apply(lambda x : np.where(horses == x)[0][0])\n",
  47 |     "tips_df['Track'] = tips_df['Track'].apply(lambda x : np.where(tracks == x)[0][0])"
  48 |    ]
  49 |   },
  50 |   {
  51 |    "cell_type": "code",
  52 |    "execution_count": 97,
  53 |    "metadata": {
  54 |     "collapsed": false
  55 |    },
  56 |    "outputs": [],
  57 |    "source": [
  58 |     "tipsters = tips_df['Tipster'].unique()"
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "code",
  63 |    "execution_count": 151,
  64 |    "metadata": {
  65 |     "collapsed": false
  66 |    },
  67 |    "outputs": [
  68 |     {
  69 |      "data": {
  70 |       "text/html": [
  71 |        "<div>\n",
  72 |        "<table border=\"1\" class=\"dataframe\">\n",
  73 |        "  <thead>\n",
  74 |        "    <tr style=\"text-align: right;\">\n",
  75 |        "      <th></th>\n",
  76 |        "      <th>Success Rate</th>\n",
  77 |        "    </tr>\n",
  78 |        "  </thead>\n",
  79 |        "  <tbody>\n",
  80 |        "    <tr>\n",
  81 |        "      <th>Tipster G</th>\n",
  82 |        "      <td>0.672414</td>\n",
  83 |        "    </tr>\n",
  84 |        "    <tr>\n",
  85 |        "      <th>Tipster C</th>\n",
  86 |        "      <td>0.616226</td>\n",
  87 |        "    </tr>\n",
  88 |        "    <tr>\n",
  89 |        "      <th>Tipster F</th>\n",
  90 |        "      <td>0.520548</td>\n",
  91 |        "    </tr>\n",
  92 |        "    <tr>\n",
  93 |        "      <th>Tipster M</th>\n",
  94 |        "      <td>0.520376</td>\n",
  95 |        "    </tr>\n",
  96 |        "    <tr>\n",
  97 |        "      <th>Tipster N</th>\n",
  98 |        "      <td>0.496386</td>\n",
  99 |        "    </tr>\n",
 100 |        "  </tbody>\n",
 101 |        "</table>\n",
 102 |        "</div>"
 103 |       ],
 104 |       "text/plain": [
 105 |        "           Success Rate\n",
 106 |        "Tipster G      0.672414\n",
 107 |        "Tipster C      0.616226\n",
 108 |        "Tipster F      0.520548\n",
 109 |        "Tipster M      0.520376\n",
 110 |        "Tipster N      0.496386"
 111 |       ]
 112 |      },
 113 |      "execution_count": 151,
 114 |      "metadata": {},
 115 |      "output_type": "execute_result"
 116 |     }
 117 |    ],
 118 |    "source": [
 119 |     "success_rates = dict()\n",
 120 |     "for tipster in tipsters:\n",
 121 |     "    successes = tips_df[(tips_df['Tipster'] == tipster) & (tips_df['Bet Type'] == tips_df['Result'])].shape[0]\n",
 122 |     "    total = tips_df[(tips_df['Tipster'] == tipster)].shape[0]\n",
 123 |     "    success_rates[tipster] = successes/total\n",
 124 |     "successes_df = pd.DataFrame(pd.Series(success_rates), columns=['Success Rate']).sort_values(by='Success Rate', ascending=False)\n",
 125 |     "successes_df.head(5)"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "code",
 130 |    "execution_count": 149,
 131 |    "metadata": {
 132 |     "collapsed": false
 133 |    },
 134 |    "outputs": [],
 135 |    "source": [
 136 |     "X_G = tips_df[tips_df.Tipster=='Tipster G']\n",
 137 |     "X_C = tips_df[tips_df.Tipster=='Tipster C']\n",
 138 |     "X_F = tips_df[tips_df.Tipster=='Tipster F']\n",
 139 |     "X_M = tips_df[tips_df.Tipster=='Tipster M']\n",
 140 |     "X_N = tips_df[tips_df.Tipster=='Tipster N']"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "markdown",
 145 |    "metadata": {},
 146 |    "source": [
 147 |     "We model the online learning algorithm as follows.\n",
 148 |     "\n",
 149 |     "In each round, we are given a betting scheme from each tipster, drawn uniformly at random from the pool of possible schemes. We model the quality function (the return) using OLS on the features. The true return is computed as if ten dollars were bet in any scheme from the odds and the true result (i.e. 10 * the odds)."
 150 |    ]
 151 |   },
 152 |   {
 153 |    "cell_type": "code",
 154 |    "execution_count": 183,
 155 |    "metadata": {
 156 |     "collapsed": false
 157 |    },
 158 |    "outputs": [
 159 |     {
 160 |      "data": {
 161 |       "text/html": [
 162 |        "<div>\n",
 163 |        "<table border=\"1\" class=\"dataframe\">\n",
 164 |        "  <thead>\n",
 165 |        "    <tr style=\"text-align: right;\">\n",
 166 |        "      <th></th>\n",
 167 |        "      <th>UID</th>\n",
 168 |        "      <th>ID</th>\n",
 169 |        "      <th>Tipster</th>\n",
 170 |        "      <th>Date</th>\n",
 171 |        "      <th>Track</th>\n",
 172 |        "      <th>Horse</th>\n",
 173 |        "      <th>Bet Type</th>\n",
 174 |        "      <th>Odds</th>\n",
 175 |        "      <th>Result</th>\n",
 176 |        "      <th>TipsterActive</th>\n",
 177 |        "    </tr>\n",
 178 |        "  </thead>\n",
 179 |        "  <tbody>\n",
 180 |        "    <tr>\n",
 181 |        "      <th>0</th>\n",
 182 |        "      <td>1</td>\n",
 183 |        "      <td>1</td>\n",
 184 |        "      <td>Tipster A</td>\n",
 185 |        "      <td>24/07/2015</td>\n",
 186 |        "      <td>2</td>\n",
 187 |        "      <td>5158</td>\n",
 188 |        "      <td>1</td>\n",
 189 |        "      <td>8.00</td>\n",
 190 |        "      <td>0</td>\n",
 191 |        "      <td>True</td>\n",
 192 |        "    </tr>\n",
 193 |        "    <tr>\n",
 194 |        "      <th>1</th>\n",
 195 |        "      <td>2</td>\n",
 196 |        "      <td>2</td>\n",
 197 |        "      <td>Tipster A</td>\n",
 198 |        "      <td>24/07/2015</td>\n",
 199 |        "      <td>96</td>\n",
 200 |        "      <td>13108</td>\n",
 201 |        "      <td>1</td>\n",
 202 |        "      <td>4.50</td>\n",
 203 |        "      <td>0</td>\n",
 204 |        "      <td>True</td>\n",
 205 |        "    </tr>\n",
 206 |        "    <tr>\n",
 207 |        "      <th>2</th>\n",
 208 |        "      <td>3</td>\n",
 209 |        "      <td>3</td>\n",
 210 |        "      <td>Tipster A</td>\n",
 211 |        "      <td>24/07/2015</td>\n",
 212 |        "      <td>114</td>\n",
 213 |        "      <td>13411</td>\n",
 214 |        "      <td>1</td>\n",
 215 |        "      <td>7.00</td>\n",
 216 |        "      <td>0</td>\n",
 217 |        "      <td>True</td>\n",
 218 |        "    </tr>\n",
 219 |        "    <tr>\n",
 220 |        "      <th>3</th>\n",
 221 |        "      <td>4</td>\n",
 222 |        "      <td>4</td>\n",
 223 |        "      <td>Tipster A</td>\n",
 224 |        "      <td>24/07/2015</td>\n",
 225 |        "      <td>74</td>\n",
 226 |        "      <td>8976</td>\n",
 227 |        "      <td>1</td>\n",
 228 |        "      <td>5.00</td>\n",
 229 |        "      <td>0</td>\n",
 230 |        "      <td>True</td>\n",
 231 |        "    </tr>\n",
 232 |        "    <tr>\n",
 233 |        "      <th>4</th>\n",
 234 |        "      <td>5</td>\n",
 235 |        "      <td>5</td>\n",
 236 |        "      <td>Tipster A</td>\n",
 237 |        "      <td>25/07/2015</td>\n",
 238 |        "      <td>2</td>\n",
 239 |        "      <td>10554</td>\n",
 240 |        "      <td>1</td>\n",
 241 |        "      <td>4.33</td>\n",
 242 |        "      <td>1</td>\n",
 243 |        "      <td>True</td>\n",
 244 |        "    </tr>\n",
 245 |        "    <tr>\n",
 246 |        "      <th>5</th>\n",
 247 |        "      <td>6</td>\n",
 248 |        "      <td>6</td>\n",
 249 |        "      <td>Tipster A</td>\n",
 250 |        "      <td>25/07/2015</td>\n",
 251 |        "      <td>114</td>\n",
 252 |        "      <td>327</td>\n",
 253 |        "      <td>1</td>\n",
 254 |        "      <td>6.00</td>\n",
 255 |        "      <td>0</td>\n",
 256 |        "      <td>True</td>\n",
 257 |        "    </tr>\n",
 258 |        "    <tr>\n",
 259 |        "      <th>6</th>\n",
 260 |        "      <td>7</td>\n",
 261 |        "      <td>7</td>\n",
 262 |        "      <td>Tipster A</td>\n",
 263 |        "      <td>25/07/2015</td>\n",
 264 |        "      <td>73</td>\n",
 265 |        "      <td>9791</td>\n",
 266 |        "      <td>1</td>\n",
 267 |        "      <td>6.00</td>\n",
 268 |        "      <td>0</td>\n",
 269 |        "      <td>True</td>\n",
 270 |        "    </tr>\n",
 271 |        "    <tr>\n",
 272 |        "      <th>7</th>\n",
 273 |        "      <td>8</td>\n",
 274 |        "      <td>8</td>\n",
 275 |        "      <td>Tipster A</td>\n",
 276 |        "      <td>25/07/2015</td>\n",
 277 |        "      <td>62</td>\n",
 278 |        "      <td>2019</td>\n",
 279 |        "      <td>1</td>\n",
 280 |        "      <td>6.00</td>\n",
 281 |        "      <td>0</td>\n",
 282 |        "      <td>True</td>\n",
 283 |        "    </tr>\n",
 284 |        "    <tr>\n",
 285 |        "      <th>8</th>\n",
 286 |        "      <td>9</td>\n",
 287 |        "      <td>9</td>\n",
 288 |        "      <td>Tipster A</td>\n",
 289 |        "      <td>26/07/2015</td>\n",
 290 |        "      <td>12</td>\n",
 291 |        "      <td>12101</td>\n",
 292 |        "      <td>1</td>\n",
 293 |        "      <td>5.50</td>\n",
 294 |        "      <td>0</td>\n",
 295 |        "      <td>True</td>\n",
 296 |        "    </tr>\n",
 297 |        "    <tr>\n",
 298 |        "      <th>9</th>\n",
 299 |        "      <td>10</td>\n",
 300 |        "      <td>10</td>\n",
 301 |        "      <td>Tipster A</td>\n",
 302 |        "      <td>26/07/2015</td>\n",
 303 |        "      <td>80</td>\n",
 304 |        "      <td>115</td>\n",
 305 |        "      <td>1</td>\n",
 306 |        "      <td>2.00</td>\n",
 307 |        "      <td>0</td>\n",
 308 |        "      <td>True</td>\n",
 309 |        "    </tr>\n",
 310 |        "    <tr>\n",
 311 |        "      <th>10</th>\n",
 312 |        "      <td>11</td>\n",
 313 |        "      <td>11</td>\n",
 314 |        "      <td>Tipster A</td>\n",
 315 |        "      <td>26/07/2015</td>\n",
 316 |        "      <td>104</td>\n",
 317 |        "      <td>8878</td>\n",
 318 |        "      <td>0</td>\n",
 319 |        "      <td>10.00</td>\n",
 320 |        "      <td>0</td>\n",
 321 |        "      <td>True</td>\n",
 322 |        "    </tr>\n",
 323 |        "    <tr>\n",
 324 |        "      <th>11</th>\n",
 325 |        "      <td>12</td>\n",
 326 |        "      <td>12</td>\n",
 327 |        "      <td>Tipster A</td>\n",
 328 |        "      <td>01/08/2015</td>\n",
 329 |        "      <td>96</td>\n",
 330 |        "      <td>5508</td>\n",
 331 |        "      <td>1</td>\n",
 332 |        "      <td>8.00</td>\n",
 333 |        "      <td>0</td>\n",
 334 |        "      <td>True</td>\n",
 335 |        "    </tr>\n",
 336 |        "    <tr>\n",
 337 |        "      <th>12</th>\n",
 338 |        "      <td>13</td>\n",
 339 |        "      <td>13</td>\n",
 340 |        "      <td>Tipster A</td>\n",
 341 |        "      <td>01/08/2015</td>\n",
 342 |        "      <td>47</td>\n",
 343 |        "      <td>4484</td>\n",
 344 |        "      <td>1</td>\n",
 345 |        "      <td>6.00</td>\n",
 346 |        "      <td>0</td>\n",
 347 |        "      <td>True</td>\n",
 348 |        "    </tr>\n",
 349 |        "    <tr>\n",
 350 |        "      <th>13</th>\n",
 351 |        "      <td>14</td>\n",
 352 |        "      <td>14</td>\n",
 353 |        "      <td>Tipster A</td>\n",
 354 |        "      <td>02/08/2015</td>\n",
 355 |        "      <td>18</td>\n",
 356 |        "      <td>6602</td>\n",
 357 |        "      <td>1</td>\n",
 358 |        "      <td>6.00</td>\n",
 359 |        "      <td>0</td>\n",
 360 |        "      <td>True</td>\n",
 361 |        "    </tr>\n",
 362 |        "    <tr>\n",
 363 |        "      <th>14</th>\n",
 364 |        "      <td>15</td>\n",
 365 |        "      <td>15</td>\n",
 366 |        "      <td>Tipster A</td>\n",
 367 |        "      <td>04/08/2015</td>\n",
 368 |        "      <td>85</td>\n",
 369 |        "      <td>15390</td>\n",
 370 |        "      <td>1</td>\n",
 371 |        "      <td>3.25</td>\n",
 372 |        "      <td>1</td>\n",
 373 |        "      <td>True</td>\n",
 374 |        "    </tr>\n",
 375 |        "    <tr>\n",
 376 |        "      <th>15</th>\n",
 377 |        "      <td>16</td>\n",
 378 |        "      <td>16</td>\n",
 379 |        "      <td>Tipster A</td>\n",
 380 |        "      <td>04/08/2015</td>\n",
 381 |        "      <td>14</td>\n",
 382 |        "      <td>7851</td>\n",
 383 |        "      <td>1</td>\n",
 384 |        "      <td>2.75</td>\n",
 385 |        "      <td>0</td>\n",
 386 |        "      <td>True</td>\n",
 387 |        "    </tr>\n",
 388 |        "    <tr>\n",
 389 |        "      <th>16</th>\n",
 390 |        "      <td>17</td>\n",
 391 |        "      <td>17</td>\n",
 392 |        "      <td>Tipster A</td>\n",
 393 |        "      <td>04/08/2015</td>\n",
 394 |        "      <td>16</td>\n",
 395 |        "      <td>1554</td>\n",
 396 |        "      <td>1</td>\n",
 397 |        "      <td>3.50</td>\n",
 398 |        "      <td>0</td>\n",
 399 |        "      <td>True</td>\n",
 400 |        "    </tr>\n",
 401 |        "    <tr>\n",
 402 |        "      <th>17</th>\n",
 403 |        "      <td>18</td>\n",
 404 |        "      <td>18</td>\n",
 405 |        "      <td>Tipster A</td>\n",
 406 |        "      <td>05/08/2015</td>\n",
 407 |        "      <td>11</td>\n",
 408 |        "      <td>12537</td>\n",
 409 |        "      <td>1</td>\n",
 410 |        "      <td>3.25</td>\n",
 411 |        "      <td>0</td>\n",
 412 |        "      <td>True</td>\n",
 413 |        "    </tr>\n",
 414 |        "    <tr>\n",
 415 |        "      <th>18</th>\n",
 416 |        "      <td>19</td>\n",
 417 |        "      <td>19</td>\n",
 418 |        "      <td>Tipster A</td>\n",
 419 |        "      <td>05/08/2015</td>\n",
 420 |        "      <td>73</td>\n",
 421 |        "      <td>10307</td>\n",
 422 |        "      <td>1</td>\n",
 423 |        "      <td>3.25</td>\n",
 424 |        "      <td>0</td>\n",
 425 |        "      <td>True</td>\n",
 426 |        "    </tr>\n",
 427 |        "    <tr>\n",
 428 |        "      <th>19</th>\n",
 429 |        "      <td>20</td>\n",
 430 |        "      <td>20</td>\n",
 431 |        "      <td>Tipster A</td>\n",
 432 |        "      <td>06/08/2015</td>\n",
 433 |        "      <td>111</td>\n",
 434 |        "      <td>6326</td>\n",
 435 |        "      <td>0</td>\n",
 436 |        "      <td>11.00</td>\n",
 437 |        "      <td>0</td>\n",
 438 |        "      <td>True</td>\n",
 439 |        "    </tr>\n",
 440 |        "    <tr>\n",
 441 |        "      <th>20</th>\n",
 442 |        "      <td>21</td>\n",
 443 |        "      <td>21</td>\n",
 444 |        "      <td>Tipster A</td>\n",
 445 |        "      <td>14/08/2015</td>\n",
 446 |        "      <td>74</td>\n",
 447 |        "      <td>3358</td>\n",
 448 |        "      <td>1</td>\n",
 449 |        "      <td>1.80</td>\n",
 450 |        "      <td>0</td>\n",
 451 |        "      <td>True</td>\n",
 452 |        "    </tr>\n",
 453 |        "    <tr>\n",
 454 |        "      <th>21</th>\n",
 455 |        "      <td>22</td>\n",
 456 |        "      <td>22</td>\n",
 457 |        "      <td>Tipster A</td>\n",
 458 |        "      <td>15/08/2015</td>\n",
 459 |        "      <td>72</td>\n",
 460 |        "      <td>15487</td>\n",
 461 |        "      <td>1</td>\n",
 462 |        "      <td>4.00</td>\n",
 463 |        "      <td>0</td>\n",
 464 |        "      <td>True</td>\n",
 465 |        "    </tr>\n",
 466 |        "    <tr>\n",
 467 |        "      <th>22</th>\n",
 468 |        "      <td>23</td>\n",
 469 |        "      <td>23</td>\n",
 470 |        "      <td>Tipster A</td>\n",
 471 |        "      <td>15/08/2015</td>\n",
 472 |        "      <td>72</td>\n",
 473 |        "      <td>2269</td>\n",
 474 |        "      <td>0</td>\n",
 475 |        "      <td>7.50</td>\n",
 476 |        "      <td>0</td>\n",
 477 |        "      <td>True</td>\n",
 478 |        "    </tr>\n",
 479 |        "    <tr>\n",
 480 |        "      <th>23</th>\n",
 481 |        "      <td>24</td>\n",
 482 |        "      <td>24</td>\n",
 483 |        "      <td>Tipster A</td>\n",
 484 |        "      <td>15/08/2015</td>\n",
 485 |        "      <td>72</td>\n",
 486 |        "      <td>304</td>\n",
 487 |        "      <td>1</td>\n",
 488 |        "      <td>4.50</td>\n",
 489 |        "      <td>1</td>\n",
 490 |        "      <td>True</td>\n",
 491 |        "    </tr>\n",
 492 |        "    <tr>\n",
 493 |        "      <th>24</th>\n",
 494 |        "      <td>25</td>\n",
 495 |        "      <td>25</td>\n",
 496 |        "      <td>Tipster A</td>\n",
 497 |        "      <td>15/08/2015</td>\n",
 498 |        "      <td>66</td>\n",
 499 |        "      <td>14466</td>\n",
 500 |        "      <td>1</td>\n",
 501 |        "      <td>3.25</td>\n",
 502 |        "      <td>0</td>\n",
 503 |        "      <td>True</td>\n",
 504 |        "    </tr>\n",
 505 |        "    <tr>\n",
 506 |        "      <th>25</th>\n",
 507 |        "      <td>26</td>\n",
 508 |        "      <td>26</td>\n",
 509 |        "      <td>Tipster A</td>\n",
 510 |        "      <td>16/08/2015</td>\n",
 511 |        "      <td>102</td>\n",
 512 |        "      <td>10427</td>\n",
 513 |        "      <td>0</td>\n",
 514 |        "      <td>8.00</td>\n",
 515 |        "      <td>0</td>\n",
 516 |        "      <td>True</td>\n",
 517 |        "    </tr>\n",
 518 |        "    <tr>\n",
 519 |        "      <th>26</th>\n",
 520 |        "      <td>27</td>\n",
 521 |        "      <td>27</td>\n",
 522 |        "      <td>Tipster A</td>\n",
 523 |        "      <td>16/08/2015</td>\n",
 524 |        "      <td>102</td>\n",
 525 |        "      <td>6517</td>\n",
 526 |        "      <td>1</td>\n",
 527 |        "      <td>2.50</td>\n",
 528 |        "      <td>0</td>\n",
 529 |        "      <td>True</td>\n",
 530 |        "    </tr>\n",
 531 |        "    <tr>\n",
 532 |        "      <th>27</th>\n",
 533 |        "      <td>28</td>\n",
 534 |        "      <td>28</td>\n",
 535 |        "      <td>Tipster A</td>\n",
 536 |        "      <td>18/08/2015</td>\n",
 537 |        "      <td>59</td>\n",
 538 |        "      <td>4285</td>\n",
 539 |        "      <td>1</td>\n",
 540 |        "      <td>11.00</td>\n",
 541 |        "      <td>1</td>\n",
 542 |        "      <td>True</td>\n",
 543 |        "    </tr>\n",
 544 |        "    <tr>\n",
 545 |        "      <th>28</th>\n",
 546 |        "      <td>29</td>\n",
 547 |        "      <td>29</td>\n",
 548 |        "      <td>Tipster A</td>\n",
 549 |        "      <td>18/08/2015</td>\n",
 550 |        "      <td>16</td>\n",
 551 |        "      <td>5607</td>\n",
 552 |        "      <td>0</td>\n",
 553 |        "      <td>11.00</td>\n",
 554 |        "      <td>1</td>\n",
 555 |        "      <td>True</td>\n",
 556 |        "    </tr>\n",
 557 |        "    <tr>\n",
 558 |        "      <th>29</th>\n",
 559 |        "      <td>30</td>\n",
 560 |        "      <td>30</td>\n",
 561 |        "      <td>Tipster A</td>\n",
 562 |        "      <td>18/08/2015</td>\n",
 563 |        "      <td>16</td>\n",
 564 |        "      <td>14522</td>\n",
 565 |        "      <td>1</td>\n",
 566 |        "      <td>3.25</td>\n",
 567 |        "      <td>0</td>\n",
 568 |        "      <td>True</td>\n",
 569 |        "    </tr>\n",
 570 |        "    <tr>\n",
 571 |        "      <th>...</th>\n",
 572 |        "      <td>...</td>\n",
 573 |        "      <td>...</td>\n",
 574 |        "      <td>...</td>\n",
 575 |        "      <td>...</td>\n",
 576 |        "      <td>...</td>\n",
 577 |        "      <td>...</td>\n",
 578 |        "      <td>...</td>\n",
 579 |        "      <td>...</td>\n",
 580 |        "      <td>...</td>\n",
 581 |        "      <td>...</td>\n",
 582 |        "    </tr>\n",
 583 |        "    <tr>\n",
 584 |        "      <th>38218</th>\n",
 585 |        "      <td>38219</td>\n",
 586 |        "      <td>580</td>\n",
 587 |        "      <td>Tipster E1</td>\n",
 588 |        "      <td>30/01/2016</td>\n",
 589 |        "      <td>26</td>\n",
 590 |        "      <td>7209</td>\n",
 591 |        "      <td>1</td>\n",
 592 |        "      <td>3.00</td>\n",
 593 |        "      <td>0</td>\n",
 594 |        "      <td>False</td>\n",
 595 |        "    </tr>\n",
 596 |        "    <tr>\n",
 597 |        "      <th>38219</th>\n",
 598 |        "      <td>38220</td>\n",
 599 |        "      <td>581</td>\n",
 600 |        "      <td>Tipster E1</td>\n",
 601 |        "      <td>03/02/2016</td>\n",
 602 |        "      <td>73</td>\n",
 603 |        "      <td>14716</td>\n",
 604 |        "      <td>0</td>\n",
 605 |        "      <td>34.00</td>\n",
 606 |        "      <td>0</td>\n",
 607 |        "      <td>False</td>\n",
 608 |        "    </tr>\n",
 609 |        "    <tr>\n",
 610 |        "      <th>38220</th>\n",
 611 |        "      <td>38221</td>\n",
 612 |        "      <td>582</td>\n",
 613 |        "      <td>Tipster E1</td>\n",
 614 |        "      <td>10/02/2016</td>\n",
 615 |        "      <td>12</td>\n",
 616 |        "      <td>14716</td>\n",
 617 |        "      <td>0</td>\n",
 618 |        "      <td>15.00</td>\n",
 619 |        "      <td>0</td>\n",
 620 |        "      <td>False</td>\n",
 621 |        "    </tr>\n",
 622 |        "    <tr>\n",
 623 |        "      <th>38221</th>\n",
 624 |        "      <td>38222</td>\n",
 625 |        "      <td>583</td>\n",
 626 |        "      <td>Tipster E1</td>\n",
 627 |        "      <td>13/02/2016</td>\n",
 628 |        "      <td>111</td>\n",
 629 |        "      <td>1384</td>\n",
 630 |        "      <td>0</td>\n",
 631 |        "      <td>13.00</td>\n",
 632 |        "      <td>1</td>\n",
 633 |        "      <td>False</td>\n",
 634 |        "    </tr>\n",
 635 |        "    <tr>\n",
 636 |        "      <th>38222</th>\n",
 637 |        "      <td>38223</td>\n",
 638 |        "      <td>584</td>\n",
 639 |        "      <td>Tipster E1</td>\n",
 640 |        "      <td>14/02/2016</td>\n",
 641 |        "      <td>92</td>\n",
 642 |        "      <td>4982</td>\n",
 643 |        "      <td>1</td>\n",
 644 |        "      <td>4.50</td>\n",
 645 |        "      <td>0</td>\n",
 646 |        "      <td>False</td>\n",
 647 |        "    </tr>\n",
 648 |        "    <tr>\n",
 649 |        "      <th>38223</th>\n",
 650 |        "      <td>38224</td>\n",
 651 |        "      <td>585</td>\n",
 652 |        "      <td>Tipster E1</td>\n",
 653 |        "      <td>25/02/2016</td>\n",
 654 |        "      <td>47</td>\n",
 655 |        "      <td>13367</td>\n",
 656 |        "      <td>0</td>\n",
 657 |        "      <td>11.00</td>\n",
 658 |        "      <td>0</td>\n",
 659 |        "      <td>False</td>\n",
 660 |        "    </tr>\n",
 661 |        "    <tr>\n",
 662 |        "      <th>38224</th>\n",
 663 |        "      <td>38225</td>\n",
 664 |        "      <td>586</td>\n",
 665 |        "      <td>Tipster E1</td>\n",
 666 |        "      <td>13/03/2016</td>\n",
 667 |        "      <td>106</td>\n",
 668 |        "      <td>2723</td>\n",
 669 |        "      <td>1</td>\n",
 670 |        "      <td>5.50</td>\n",
 671 |        "      <td>0</td>\n",
 672 |        "      <td>False</td>\n",
 673 |        "    </tr>\n",
 674 |        "    <tr>\n",
 675 |        "      <th>38225</th>\n",
 676 |        "      <td>38226</td>\n",
 677 |        "      <td>587</td>\n",
 678 |        "      <td>Tipster E1</td>\n",
 679 |        "      <td>13/03/2016</td>\n",
 680 |        "      <td>53</td>\n",
 681 |        "      <td>8120</td>\n",
 682 |        "      <td>0</td>\n",
 683 |        "      <td>21.00</td>\n",
 684 |        "      <td>0</td>\n",
 685 |        "      <td>False</td>\n",
 686 |        "    </tr>\n",
 687 |        "    <tr>\n",
 688 |        "      <th>38226</th>\n",
 689 |        "      <td>38227</td>\n",
 690 |        "      <td>588</td>\n",
 691 |        "      <td>Tipster E1</td>\n",
 692 |        "      <td>15/03/2016</td>\n",
 693 |        "      <td>17</td>\n",
 694 |        "      <td>480</td>\n",
 695 |        "      <td>1</td>\n",
 696 |        "      <td>5.00</td>\n",
 697 |        "      <td>1</td>\n",
 698 |        "      <td>False</td>\n",
 699 |        "    </tr>\n",
 700 |        "    <tr>\n",
 701 |        "      <th>38227</th>\n",
 702 |        "      <td>38228</td>\n",
 703 |        "      <td>589</td>\n",
 704 |        "      <td>Tipster E1</td>\n",
 705 |        "      <td>15/03/2016</td>\n",
 706 |        "      <td>17</td>\n",
 707 |        "      <td>2365</td>\n",
 708 |        "      <td>0</td>\n",
 709 |        "      <td>26.00</td>\n",
 710 |        "      <td>0</td>\n",
 711 |        "      <td>False</td>\n",
 712 |        "    </tr>\n",
 713 |        "    <tr>\n",
 714 |        "      <th>38228</th>\n",
 715 |        "      <td>38229</td>\n",
 716 |        "      <td>590</td>\n",
 717 |        "      <td>Tipster E1</td>\n",
 718 |        "      <td>15/03/2016</td>\n",
 719 |        "      <td>17</td>\n",
 720 |        "      <td>9238</td>\n",
 721 |        "      <td>0</td>\n",
 722 |        "      <td>67.00</td>\n",
 723 |        "      <td>0</td>\n",
 724 |        "      <td>False</td>\n",
 725 |        "    </tr>\n",
 726 |        "    <tr>\n",
 727 |        "      <th>38229</th>\n",
 728 |        "      <td>38230</td>\n",
 729 |        "      <td>591</td>\n",
 730 |        "      <td>Tipster E1</td>\n",
 731 |        "      <td>17/03/2016</td>\n",
 732 |        "      <td>17</td>\n",
 733 |        "      <td>1722</td>\n",
 734 |        "      <td>1</td>\n",
 735 |        "      <td>6.50</td>\n",
 736 |        "      <td>1</td>\n",
 737 |        "      <td>False</td>\n",
 738 |        "    </tr>\n",
 739 |        "    <tr>\n",
 740 |        "      <th>38230</th>\n",
 741 |        "      <td>38231</td>\n",
 742 |        "      <td>592</td>\n",
 743 |        "      <td>Tipster E1</td>\n",
 744 |        "      <td>17/03/2016</td>\n",
 745 |        "      <td>16</td>\n",
 746 |        "      <td>2133</td>\n",
 747 |        "      <td>1</td>\n",
 748 |        "      <td>6.50</td>\n",
 749 |        "      <td>0</td>\n",
 750 |        "      <td>False</td>\n",
 751 |        "    </tr>\n",
 752 |        "    <tr>\n",
 753 |        "      <th>38231</th>\n",
 754 |        "      <td>38232</td>\n",
 755 |        "      <td>593</td>\n",
 756 |        "      <td>Tipster E1</td>\n",
 757 |        "      <td>18/03/2016</td>\n",
 758 |        "      <td>17</td>\n",
 759 |        "      <td>3129</td>\n",
 760 |        "      <td>0</td>\n",
 761 |        "      <td>17.00</td>\n",
 762 |        "      <td>0</td>\n",
 763 |        "      <td>False</td>\n",
 764 |        "    </tr>\n",
 765 |        "    <tr>\n",
 766 |        "      <th>38232</th>\n",
 767 |        "      <td>38233</td>\n",
 768 |        "      <td>594</td>\n",
 769 |        "      <td>Tipster E1</td>\n",
 770 |        "      <td>18/03/2016</td>\n",
 771 |        "      <td>17</td>\n",
 772 |        "      <td>8066</td>\n",
 773 |        "      <td>1</td>\n",
 774 |        "      <td>8.00</td>\n",
 775 |        "      <td>0</td>\n",
 776 |        "      <td>False</td>\n",
 777 |        "    </tr>\n",
 778 |        "    <tr>\n",
 779 |        "      <th>38233</th>\n",
 780 |        "      <td>38234</td>\n",
 781 |        "      <td>595</td>\n",
 782 |        "      <td>Tipster E1</td>\n",
 783 |        "      <td>18/03/2016</td>\n",
 784 |        "      <td>17</td>\n",
 785 |        "      <td>11674</td>\n",
 786 |        "      <td>0</td>\n",
 787 |        "      <td>11.00</td>\n",
 788 |        "      <td>1</td>\n",
 789 |        "      <td>False</td>\n",
 790 |        "    </tr>\n",
 791 |        "    <tr>\n",
 792 |        "      <th>38234</th>\n",
 793 |        "      <td>38235</td>\n",
 794 |        "      <td>596</td>\n",
 795 |        "      <td>Tipster E1</td>\n",
 796 |        "      <td>19/03/2016</td>\n",
 797 |        "      <td>54</td>\n",
 798 |        "      <td>8847</td>\n",
 799 |        "      <td>0</td>\n",
 800 |        "      <td>11.00</td>\n",
 801 |        "      <td>1</td>\n",
 802 |        "      <td>False</td>\n",
 803 |        "    </tr>\n",
 804 |        "    <tr>\n",
 805 |        "      <th>38235</th>\n",
 806 |        "      <td>38236</td>\n",
 807 |        "      <td>597</td>\n",
 808 |        "      <td>Tipster E1</td>\n",
 809 |        "      <td>20/03/2016</td>\n",
 810 |        "      <td>12</td>\n",
 811 |        "      <td>9366</td>\n",
 812 |        "      <td>1</td>\n",
 813 |        "      <td>5.00</td>\n",
 814 |        "      <td>0</td>\n",
 815 |        "      <td>False</td>\n",
 816 |        "    </tr>\n",
 817 |        "    <tr>\n",
 818 |        "      <th>38236</th>\n",
 819 |        "      <td>38237</td>\n",
 820 |        "      <td>598</td>\n",
 821 |        "      <td>Tipster E1</td>\n",
 822 |        "      <td>26/03/2016</td>\n",
 823 |        "      <td>67</td>\n",
 824 |        "      <td>1628</td>\n",
 825 |        "      <td>1</td>\n",
 826 |        "      <td>7.50</td>\n",
 827 |        "      <td>0</td>\n",
 828 |        "      <td>False</td>\n",
 829 |        "    </tr>\n",
 830 |        "    <tr>\n",
 831 |        "      <th>38237</th>\n",
 832 |        "      <td>38238</td>\n",
 833 |        "      <td>599</td>\n",
 834 |        "      <td>Tipster E1</td>\n",
 835 |        "      <td>28/03/2016</td>\n",
 836 |        "      <td>35</td>\n",
 837 |        "      <td>11945</td>\n",
 838 |        "      <td>0</td>\n",
 839 |        "      <td>29.00</td>\n",
 840 |        "      <td>0</td>\n",
 841 |        "      <td>False</td>\n",
 842 |        "    </tr>\n",
 843 |        "    <tr>\n",
 844 |        "      <th>38238</th>\n",
 845 |        "      <td>38239</td>\n",
 846 |        "      <td>600</td>\n",
 847 |        "      <td>Tipster E1</td>\n",
 848 |        "      <td>30/03/2016</td>\n",
 849 |        "      <td>92</td>\n",
 850 |        "      <td>1436</td>\n",
 851 |        "      <td>1</td>\n",
 852 |        "      <td>5.00</td>\n",
 853 |        "      <td>0</td>\n",
 854 |        "      <td>False</td>\n",
 855 |        "    </tr>\n",
 856 |        "    <tr>\n",
 857 |        "      <th>38239</th>\n",
 858 |        "      <td>38240</td>\n",
 859 |        "      <td>601</td>\n",
 860 |        "      <td>Tipster E1</td>\n",
 861 |        "      <td>30/03/2016</td>\n",
 862 |        "      <td>92</td>\n",
 863 |        "      <td>6295</td>\n",
 864 |        "      <td>1</td>\n",
 865 |        "      <td>6.00</td>\n",
 866 |        "      <td>0</td>\n",
 867 |        "      <td>False</td>\n",
 868 |        "    </tr>\n",
 869 |        "    <tr>\n",
 870 |        "      <th>38240</th>\n",
 871 |        "      <td>38241</td>\n",
 872 |        "      <td>602</td>\n",
 873 |        "      <td>Tipster E1</td>\n",
 874 |        "      <td>30/03/2016</td>\n",
 875 |        "      <td>34</td>\n",
 876 |        "      <td>407</td>\n",
 877 |        "      <td>1</td>\n",
 878 |        "      <td>4.33</td>\n",
 879 |        "      <td>0</td>\n",
 880 |        "      <td>False</td>\n",
 881 |        "    </tr>\n",
 882 |        "    <tr>\n",
 883 |        "      <th>38241</th>\n",
 884 |        "      <td>38242</td>\n",
 885 |        "      <td>603</td>\n",
 886 |        "      <td>Tipster E1</td>\n",
 887 |        "      <td>01/04/2016</td>\n",
 888 |        "      <td>107</td>\n",
 889 |        "      <td>14141</td>\n",
 890 |        "      <td>1</td>\n",
 891 |        "      <td>4.50</td>\n",
 892 |        "      <td>0</td>\n",
 893 |        "      <td>False</td>\n",
 894 |        "    </tr>\n",
 895 |        "    <tr>\n",
 896 |        "      <th>38242</th>\n",
 897 |        "      <td>38243</td>\n",
 898 |        "      <td>604</td>\n",
 899 |        "      <td>Tipster E1</td>\n",
 900 |        "      <td>01/04/2016</td>\n",
 901 |        "      <td>111</td>\n",
 902 |        "      <td>5905</td>\n",
 903 |        "      <td>1</td>\n",
 904 |        "      <td>10.00</td>\n",
 905 |        "      <td>0</td>\n",
 906 |        "      <td>False</td>\n",
 907 |        "    </tr>\n",
 908 |        "    <tr>\n",
 909 |        "      <th>38243</th>\n",
 910 |        "      <td>38244</td>\n",
 911 |        "      <td>605</td>\n",
 912 |        "      <td>Tipster E1</td>\n",
 913 |        "      <td>02/04/2016</td>\n",
 914 |        "      <td>54</td>\n",
 915 |        "      <td>12946</td>\n",
 916 |        "      <td>1</td>\n",
 917 |        "      <td>7.00</td>\n",
 918 |        "      <td>0</td>\n",
 919 |        "      <td>False</td>\n",
 920 |        "    </tr>\n",
 921 |        "    <tr>\n",
 922 |        "      <th>38244</th>\n",
 923 |        "      <td>38245</td>\n",
 924 |        "      <td>606</td>\n",
 925 |        "      <td>Tipster E1</td>\n",
 926 |        "      <td>02/04/2016</td>\n",
 927 |        "      <td>26</td>\n",
 928 |        "      <td>4591</td>\n",
 929 |        "      <td>0</td>\n",
 930 |        "      <td>12.00</td>\n",
 931 |        "      <td>0</td>\n",
 932 |        "      <td>False</td>\n",
 933 |        "    </tr>\n",
 934 |        "    <tr>\n",
 935 |        "      <th>38245</th>\n",
 936 |        "      <td>38246</td>\n",
 937 |        "      <td>607</td>\n",
 938 |        "      <td>Tipster E1</td>\n",
 939 |        "      <td>02/04/2016</td>\n",
 940 |        "      <td>26</td>\n",
 941 |        "      <td>6824</td>\n",
 942 |        "      <td>1</td>\n",
 943 |        "      <td>7.00</td>\n",
 944 |        "      <td>0</td>\n",
 945 |        "      <td>False</td>\n",
 946 |        "    </tr>\n",
 947 |        "    <tr>\n",
 948 |        "      <th>38246</th>\n",
 949 |        "      <td>38247</td>\n",
 950 |        "      <td>608</td>\n",
 951 |        "      <td>Tipster E1</td>\n",
 952 |        "      <td>02/04/2016</td>\n",
 953 |        "      <td>53</td>\n",
 954 |        "      <td>7068</td>\n",
 955 |        "      <td>1</td>\n",
 956 |        "      <td>4.33</td>\n",
 957 |        "      <td>0</td>\n",
 958 |        "      <td>False</td>\n",
 959 |        "    </tr>\n",
 960 |        "    <tr>\n",
 961 |        "      <th>38247</th>\n",
 962 |        "      <td>38248</td>\n",
 963 |        "      <td>609</td>\n",
 964 |        "      <td>Tipster E1</td>\n",
 965 |        "      <td>31/05/2016</td>\n",
 966 |        "      <td>82</td>\n",
 967 |        "      <td>4015</td>\n",
 968 |        "      <td>1</td>\n",
 969 |        "      <td>5.00</td>\n",
 970 |        "      <td>0</td>\n",
 971 |        "      <td>False</td>\n",
 972 |        "    </tr>\n",
 973 |        "  </tbody>\n",
 974 |        "</table>\n",
 975 |        "<p>38248 rows × 10 columns</p>\n",
 976 |        "</div>"
 977 |       ],
 978 |       "text/plain": [
 979 |        "         UID   ID     Tipster        Date  Track  Horse  Bet Type   Odds  \\\n",
 980 |        "0          1    1   Tipster A  24/07/2015      2   5158         1   8.00   \n",
 981 |        "1          2    2   Tipster A  24/07/2015     96  13108         1   4.50   \n",
 982 |        "2          3    3   Tipster A  24/07/2015    114  13411         1   7.00   \n",
 983 |        "3          4    4   Tipster A  24/07/2015     74   8976         1   5.00   \n",
 984 |        "4          5    5   Tipster A  25/07/2015      2  10554         1   4.33   \n",
 985 |        "5          6    6   Tipster A  25/07/2015    114    327         1   6.00   \n",
 986 |        "6          7    7   Tipster A  25/07/2015     73   9791         1   6.00   \n",
 987 |        "7          8    8   Tipster A  25/07/2015     62   2019         1   6.00   \n",
 988 |        "8          9    9   Tipster A  26/07/2015     12  12101         1   5.50   \n",
 989 |        "9         10   10   Tipster A  26/07/2015     80    115         1   2.00   \n",
 990 |        "10        11   11   Tipster A  26/07/2015    104   8878         0  10.00   \n",
 991 |        "11        12   12   Tipster A  01/08/2015     96   5508         1   8.00   \n",
 992 |        "12        13   13   Tipster A  01/08/2015     47   4484         1   6.00   \n",
 993 |        "13        14   14   Tipster A  02/08/2015     18   6602         1   6.00   \n",
 994 |        "14        15   15   Tipster A  04/08/2015     85  15390         1   3.25   \n",
 995 |        "15        16   16   Tipster A  04/08/2015     14   7851         1   2.75   \n",
 996 |        "16        17   17   Tipster A  04/08/2015     16   1554         1   3.50   \n",
 997 |        "17        18   18   Tipster A  05/08/2015     11  12537         1   3.25   \n",
 998 |        "18        19   19   Tipster A  05/08/2015     73  10307         1   3.25   \n",
 999 |        "19        20   20   Tipster A  06/08/2015    111   6326         0  11.00   \n",
1000 |        "20        21   21   Tipster A  14/08/2015     74   3358         1   1.80   \n",
1001 |        "21        22   22   Tipster A  15/08/2015     72  15487         1   4.00   \n",
1002 |        "22        23   23   Tipster A  15/08/2015     72   2269         0   7.50   \n",
1003 |        "23        24   24   Tipster A  15/08/2015     72    304         1   4.50   \n",
1004 |        "24        25   25   Tipster A  15/08/2015     66  14466         1   3.25   \n",
1005 |        "25        26   26   Tipster A  16/08/2015    102  10427         0   8.00   \n",
1006 |        "26        27   27   Tipster A  16/08/2015    102   6517         1   2.50   \n",
1007 |        "27        28   28   Tipster A  18/08/2015     59   4285         1  11.00   \n",
1008 |        "28        29   29   Tipster A  18/08/2015     16   5607         0  11.00   \n",
1009 |        "29        30   30   Tipster A  18/08/2015     16  14522         1   3.25   \n",
1010 |        "...      ...  ...         ...         ...    ...    ...       ...    ...   \n",
1011 |        "38218  38219  580  Tipster E1  30/01/2016     26   7209         1   3.00   \n",
1012 |        "38219  38220  581  Tipster E1  03/02/2016     73  14716         0  34.00   \n",
1013 |        "38220  38221  582  Tipster E1  10/02/2016     12  14716         0  15.00   \n",
1014 |        "38221  38222  583  Tipster E1  13/02/2016    111   1384         0  13.00   \n",
1015 |        "38222  38223  584  Tipster E1  14/02/2016     92   4982         1   4.50   \n",
1016 |        "38223  38224  585  Tipster E1  25/02/2016     47  13367         0  11.00   \n",
1017 |        "38224  38225  586  Tipster E1  13/03/2016    106   2723         1   5.50   \n",
1018 |        "38225  38226  587  Tipster E1  13/03/2016     53   8120         0  21.00   \n",
1019 |        "38226  38227  588  Tipster E1  15/03/2016     17    480         1   5.00   \n",
1020 |        "38227  38228  589  Tipster E1  15/03/2016     17   2365         0  26.00   \n",
1021 |        "38228  38229  590  Tipster E1  15/03/2016     17   9238         0  67.00   \n",
1022 |        "38229  38230  591  Tipster E1  17/03/2016     17   1722         1   6.50   \n",
1023 |        "38230  38231  592  Tipster E1  17/03/2016     16   2133         1   6.50   \n",
1024 |        "38231  38232  593  Tipster E1  18/03/2016     17   3129         0  17.00   \n",
1025 |        "38232  38233  594  Tipster E1  18/03/2016     17   8066         1   8.00   \n",
1026 |        "38233  38234  595  Tipster E1  18/03/2016     17  11674         0  11.00   \n",
1027 |        "38234  38235  596  Tipster E1  19/03/2016     54   8847         0  11.00   \n",
1028 |        "38235  38236  597  Tipster E1  20/03/2016     12   9366         1   5.00   \n",
1029 |        "38236  38237  598  Tipster E1  26/03/2016     67   1628         1   7.50   \n",
1030 |        "38237  38238  599  Tipster E1  28/03/2016     35  11945         0  29.00   \n",
1031 |        "38238  38239  600  Tipster E1  30/03/2016     92   1436         1   5.00   \n",
1032 |        "38239  38240  601  Tipster E1  30/03/2016     92   6295         1   6.00   \n",
1033 |        "38240  38241  602  Tipster E1  30/03/2016     34    407         1   4.33   \n",
1034 |        "38241  38242  603  Tipster E1  01/04/2016    107  14141         1   4.50   \n",
1035 |        "38242  38243  604  Tipster E1  01/04/2016    111   5905         1  10.00   \n",
1036 |        "38243  38244  605  Tipster E1  02/04/2016     54  12946         1   7.00   \n",
1037 |        "38244  38245  606  Tipster E1  02/04/2016     26   4591         0  12.00   \n",
1038 |        "38245  38246  607  Tipster E1  02/04/2016     26   6824         1   7.00   \n",
1039 |        "38246  38247  608  Tipster E1  02/04/2016     53   7068         1   4.33   \n",
1040 |        "38247  38248  609  Tipster E1  31/05/2016     82   4015         1   5.00   \n",
1041 |        "\n",
1042 |        "       Result TipsterActive  \n",
1043 |        "0           0          True  \n",
1044 |        "1           0          True  \n",
1045 |        "2           0          True  \n",
1046 |        "3           0          True  \n",
1047 |        "4           1          True  \n",
1048 |        "5           0          True  \n",
1049 |        "6           0          True  \n",
1050 |        "7           0          True  \n",
1051 |        "8           0          True  \n",
1052 |        "9           0          True  \n",
1053 |        "10          0          True  \n",
1054 |        "11          0          True  \n",
1055 |        "12          0          True  \n",
1056 |        "13          0          True  \n",
1057 |        "14          1          True  \n",
1058 |        "15          0          True  \n",
1059 |        "16          0          True  \n",
1060 |        "17          0          True  \n",
1061 |        "18          0          True  \n",
1062 |        "19          0          True  \n",
1063 |        "20          0          True  \n",
1064 |        "21          0          True  \n",
1065 |        "22          0          True  \n",
1066 |        "23          1          True  \n",
1067 |        "24          0          True  \n",
1068 |        "25          0          True  \n",
1069 |        "26          0          True  \n",
1070 |        "27          1          True  \n",
1071 |        "28          1          True  \n",
1072 |        "29          0          True  \n",
1073 |        "...       ...           ...  \n",
1074 |        "38218       0         False  \n",
1075 |        "38219       0         False  \n",
1076 |        "38220       0         False  \n",
1077 |        "38221       1         False  \n",
1078 |        "38222       0         False  \n",
1079 |        "38223       0         False  \n",
1080 |        "38224       0         False  \n",
1081 |        "38225       0         False  \n",
1082 |        "38226       1         False  \n",
1083 |        "38227       0         False  \n",
1084 |        "38228       0         False  \n",
1085 |        "38229       1         False  \n",
1086 |        "38230       0         False  \n",
1087 |        "38231       0         False  \n",
1088 |        "38232       0         False  \n",
1089 |        "38233       1         False  \n",
1090 |        "38234       1         False  \n",
1091 |        "38235       0         False  \n",
1092 |        "38236       0         False  \n",
1093 |        "38237       0         False  \n",
1094 |        "38238       0         False  \n",
1095 |        "38239       0         False  \n",
1096 |        "38240       0         False  \n",
1097 |        "38241       0         False  \n",
1098 |        "38242       0         False  \n",
1099 |        "38243       0         False  \n",
1100 |        "38244       0         False  \n",
1101 |        "38245       0         False  \n",
1102 |        "38246       0         False  \n",
1103 |        "38247       0         False  \n",
1104 |        "\n",
1105 |        "[38248 rows x 10 columns]"
1106 |       ]
1107 |      },
1108 |      "execution_count": 183,
1109 |      "metadata": {},
1110 |      "output_type": "execute_result"
1111 |     }
1112 |    ],
1113 |    "source": [
1114 |     "tips_df"
1115 |    ]
1116 |   },
1117 |   {
1118 |    "cell_type": "code",
1119 |    "execution_count": null,
1120 |    "metadata": {
1121 |     "collapsed": true
1122 |    },
1123 |    "outputs": [],
1124 |    "source": []
1125 |   }
1126 |  ],
1127 |  "metadata": {
1128 |   "kernelspec": {
1129 |    "display_name": "Python 3",
1130 |    "language": "python",
1131 |    "name": "python3"
1132 |   },
1133 |   "language_info": {
1134 |    "codemirror_mode": {
1135 |     "name": "ipython",
1136 |     "version": 3
1137 |    },
1138 |    "file_extension": ".py",
1139 |    "mimetype": "text/x-python",
1140 |    "name": "python",
1141 |    "nbconvert_exporter": "python",
1142 |    "pygments_lexer": "ipython3",
1143 |    "version": "3.5.2"
1144 |   }
1145 |  },
1146 |  "nbformat": 4,
1147 |  "nbformat_minor": 2
1148 | }
1149 | 


--------------------------------------------------------------------------------
/NYSE.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "\n",
 14 |     "data_dir = './nyse/'"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 8,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "fundamentals_df = pd.read_csv(data_dir + 'fundamentals.csv')\n",
 26 |     "prices_df = pd.read_csv(data_dir + 'prices-split-adjusted.csv')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 13,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/html": [
 39 |        "<div>\n",
 40 |        "<table border=\"1\" class=\"dataframe\">\n",
 41 |        "  <thead>\n",
 42 |        "    <tr style=\"text-align: right;\">\n",
 43 |        "      <th></th>\n",
 44 |        "      <th>Unnamed: 0</th>\n",
 45 |        "      <th>Ticker Symbol</th>\n",
 46 |        "      <th>Period Ending</th>\n",
 47 |        "      <th>Accounts Payable</th>\n",
 48 |        "      <th>Accounts Receivable</th>\n",
 49 |        "      <th>Add'l income/expense items</th>\n",
 50 |        "      <th>After Tax ROE</th>\n",
 51 |        "      <th>Capital Expenditures</th>\n",
 52 |        "      <th>Capital Surplus</th>\n",
 53 |        "      <th>Cash Ratio</th>\n",
 54 |        "      <th>...</th>\n",
 55 |        "      <th>Total Current Assets</th>\n",
 56 |        "      <th>Total Current Liabilities</th>\n",
 57 |        "      <th>Total Equity</th>\n",
 58 |        "      <th>Total Liabilities</th>\n",
 59 |        "      <th>Total Liabilities &amp; Equity</th>\n",
 60 |        "      <th>Total Revenue</th>\n",
 61 |        "      <th>Treasury Stock</th>\n",
 62 |        "      <th>For Year</th>\n",
 63 |        "      <th>Earnings Per Share</th>\n",
 64 |        "      <th>Estimated Shares Outstanding</th>\n",
 65 |        "    </tr>\n",
 66 |        "  </thead>\n",
 67 |        "  <tbody>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>0</th>\n",
 70 |        "      <td>0</td>\n",
 71 |        "      <td>AAL</td>\n",
 72 |        "      <td>2012-12-31</td>\n",
 73 |        "      <td>3.068000e+09</td>\n",
 74 |        "      <td>-222000000.0</td>\n",
 75 |        "      <td>-1.961000e+09</td>\n",
 76 |        "      <td>23.0</td>\n",
 77 |        "      <td>-1.888000e+09</td>\n",
 78 |        "      <td>4.695000e+09</td>\n",
 79 |        "      <td>53.0</td>\n",
 80 |        "      <td>...</td>\n",
 81 |        "      <td>7.072000e+09</td>\n",
 82 |        "      <td>9.011000e+09</td>\n",
 83 |        "      <td>-7.987000e+09</td>\n",
 84 |        "      <td>2.489100e+10</td>\n",
 85 |        "      <td>1.690400e+10</td>\n",
 86 |        "      <td>2.485500e+10</td>\n",
 87 |        "      <td>-367000000.0</td>\n",
 88 |        "      <td>2012.0</td>\n",
 89 |        "      <td>-5.60</td>\n",
 90 |        "      <td>3.350000e+08</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>1</th>\n",
 94 |        "      <td>1</td>\n",
 95 |        "      <td>AAL</td>\n",
 96 |        "      <td>2013-12-31</td>\n",
 97 |        "      <td>4.975000e+09</td>\n",
 98 |        "      <td>-93000000.0</td>\n",
 99 |        "      <td>-2.723000e+09</td>\n",
100 |        "      <td>67.0</td>\n",
101 |        "      <td>-3.114000e+09</td>\n",
102 |        "      <td>1.059200e+10</td>\n",
103 |        "      <td>75.0</td>\n",
104 |        "      <td>...</td>\n",
105 |        "      <td>1.432300e+10</td>\n",
106 |        "      <td>1.380600e+10</td>\n",
107 |        "      <td>-2.731000e+09</td>\n",
108 |        "      <td>4.500900e+10</td>\n",
109 |        "      <td>4.227800e+10</td>\n",
110 |        "      <td>2.674300e+10</td>\n",
111 |        "      <td>0.0</td>\n",
112 |        "      <td>2013.0</td>\n",
113 |        "      <td>-11.25</td>\n",
114 |        "      <td>1.630222e+08</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>2</th>\n",
118 |        "      <td>2</td>\n",
119 |        "      <td>AAL</td>\n",
120 |        "      <td>2014-12-31</td>\n",
121 |        "      <td>4.668000e+09</td>\n",
122 |        "      <td>-160000000.0</td>\n",
123 |        "      <td>-1.500000e+08</td>\n",
124 |        "      <td>143.0</td>\n",
125 |        "      <td>-5.311000e+09</td>\n",
126 |        "      <td>1.513500e+10</td>\n",
127 |        "      <td>60.0</td>\n",
128 |        "      <td>...</td>\n",
129 |        "      <td>1.175000e+10</td>\n",
130 |        "      <td>1.340400e+10</td>\n",
131 |        "      <td>2.021000e+09</td>\n",
132 |        "      <td>4.120400e+10</td>\n",
133 |        "      <td>4.322500e+10</td>\n",
134 |        "      <td>4.265000e+10</td>\n",
135 |        "      <td>0.0</td>\n",
136 |        "      <td>2014.0</td>\n",
137 |        "      <td>4.02</td>\n",
138 |        "      <td>7.169154e+08</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>3</th>\n",
142 |        "      <td>3</td>\n",
143 |        "      <td>AAL</td>\n",
144 |        "      <td>2015-12-31</td>\n",
145 |        "      <td>5.102000e+09</td>\n",
146 |        "      <td>352000000.0</td>\n",
147 |        "      <td>-7.080000e+08</td>\n",
148 |        "      <td>135.0</td>\n",
149 |        "      <td>-6.151000e+09</td>\n",
150 |        "      <td>1.159100e+10</td>\n",
151 |        "      <td>51.0</td>\n",
152 |        "      <td>...</td>\n",
153 |        "      <td>9.985000e+09</td>\n",
154 |        "      <td>1.360500e+10</td>\n",
155 |        "      <td>5.635000e+09</td>\n",
156 |        "      <td>4.278000e+10</td>\n",
157 |        "      <td>4.841500e+10</td>\n",
158 |        "      <td>4.099000e+10</td>\n",
159 |        "      <td>0.0</td>\n",
160 |        "      <td>2015.0</td>\n",
161 |        "      <td>11.39</td>\n",
162 |        "      <td>6.681299e+08</td>\n",
163 |        "    </tr>\n",
164 |        "    <tr>\n",
165 |        "      <th>4</th>\n",
166 |        "      <td>4</td>\n",
167 |        "      <td>AAP</td>\n",
168 |        "      <td>2012-12-29</td>\n",
169 |        "      <td>2.409453e+09</td>\n",
170 |        "      <td>-89482000.0</td>\n",
171 |        "      <td>6.000000e+05</td>\n",
172 |        "      <td>32.0</td>\n",
173 |        "      <td>-2.711820e+08</td>\n",
174 |        "      <td>5.202150e+08</td>\n",
175 |        "      <td>23.0</td>\n",
176 |        "      <td>...</td>\n",
177 |        "      <td>3.184200e+09</td>\n",
178 |        "      <td>2.559638e+09</td>\n",
179 |        "      <td>1.210694e+09</td>\n",
180 |        "      <td>3.403120e+09</td>\n",
181 |        "      <td>4.613814e+09</td>\n",
182 |        "      <td>6.205003e+09</td>\n",
183 |        "      <td>-27095000.0</td>\n",
184 |        "      <td>2012.0</td>\n",
185 |        "      <td>5.29</td>\n",
186 |        "      <td>7.328355e+07</td>\n",
187 |        "    </tr>\n",
188 |        "  </tbody>\n",
189 |        "</table>\n",
190 |        "<p>5 rows × 79 columns</p>\n",
191 |        "</div>"
192 |       ],
193 |       "text/plain": [
194 |        "   Unnamed: 0 Ticker Symbol Period Ending  Accounts Payable  \\\n",
195 |        "0           0           AAL    2012-12-31      3.068000e+09   \n",
196 |        "1           1           AAL    2013-12-31      4.975000e+09   \n",
197 |        "2           2           AAL    2014-12-31      4.668000e+09   \n",
198 |        "3           3           AAL    2015-12-31      5.102000e+09   \n",
199 |        "4           4           AAP    2012-12-29      2.409453e+09   \n",
200 |        "\n",
201 |        "   Accounts Receivable  Add'l income/expense items  After Tax ROE  \\\n",
202 |        "0         -222000000.0               -1.961000e+09           23.0   \n",
203 |        "1          -93000000.0               -2.723000e+09           67.0   \n",
204 |        "2         -160000000.0               -1.500000e+08          143.0   \n",
205 |        "3          352000000.0               -7.080000e+08          135.0   \n",
206 |        "4          -89482000.0                6.000000e+05           32.0   \n",
207 |        "\n",
208 |        "   Capital Expenditures  Capital Surplus  Cash Ratio  \\\n",
209 |        "0         -1.888000e+09     4.695000e+09        53.0   \n",
210 |        "1         -3.114000e+09     1.059200e+10        75.0   \n",
211 |        "2         -5.311000e+09     1.513500e+10        60.0   \n",
212 |        "3         -6.151000e+09     1.159100e+10        51.0   \n",
213 |        "4         -2.711820e+08     5.202150e+08        23.0   \n",
214 |        "\n",
215 |        "               ...               Total Current Assets  \\\n",
216 |        "0              ...                       7.072000e+09   \n",
217 |        "1              ...                       1.432300e+10   \n",
218 |        "2              ...                       1.175000e+10   \n",
219 |        "3              ...                       9.985000e+09   \n",
220 |        "4              ...                       3.184200e+09   \n",
221 |        "\n",
222 |        "   Total Current Liabilities  Total Equity  Total Liabilities  \\\n",
223 |        "0               9.011000e+09 -7.987000e+09       2.489100e+10   \n",
224 |        "1               1.380600e+10 -2.731000e+09       4.500900e+10   \n",
225 |        "2               1.340400e+10  2.021000e+09       4.120400e+10   \n",
226 |        "3               1.360500e+10  5.635000e+09       4.278000e+10   \n",
227 |        "4               2.559638e+09  1.210694e+09       3.403120e+09   \n",
228 |        "\n",
229 |        "   Total Liabilities & Equity  Total Revenue  Treasury Stock  For Year  \\\n",
230 |        "0                1.690400e+10   2.485500e+10    -367000000.0    2012.0   \n",
231 |        "1                4.227800e+10   2.674300e+10             0.0    2013.0   \n",
232 |        "2                4.322500e+10   4.265000e+10             0.0    2014.0   \n",
233 |        "3                4.841500e+10   4.099000e+10             0.0    2015.0   \n",
234 |        "4                4.613814e+09   6.205003e+09     -27095000.0    2012.0   \n",
235 |        "\n",
236 |        "   Earnings Per Share  Estimated Shares Outstanding  \n",
237 |        "0               -5.60                  3.350000e+08  \n",
238 |        "1              -11.25                  1.630222e+08  \n",
239 |        "2                4.02                  7.169154e+08  \n",
240 |        "3               11.39                  6.681299e+08  \n",
241 |        "4                5.29                  7.328355e+07  \n",
242 |        "\n",
243 |        "[5 rows x 79 columns]"
244 |       ]
245 |      },
246 |      "execution_count": 13,
247 |      "metadata": {},
248 |      "output_type": "execute_result"
249 |     }
250 |    ],
251 |    "source": [
252 |     "fundamentals_df.head()"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 16,
258 |    "metadata": {
259 |     "collapsed": false,
260 |     "scrolled": true
261 |    },
262 |    "outputs": [
263 |     {
264 |      "data": {
265 |       "text/plain": [
266 |        "Index(['Unnamed: 0', 'Ticker Symbol', 'Period Ending', 'Accounts Payable',\n",
267 |        "       'Accounts Receivable', 'Add'l income/expense items', 'After Tax ROE',\n",
268 |        "       'Capital Expenditures', 'Capital Surplus', 'Cash Ratio',\n",
269 |        "       'Cash and Cash Equivalents', 'Changes in Inventories', 'Common Stocks',\n",
270 |        "       'Cost of Revenue', 'Current Ratio', 'Deferred Asset Charges',\n",
271 |        "       'Deferred Liability Charges', 'Depreciation',\n",
272 |        "       'Earnings Before Interest and Tax', 'Earnings Before Tax',\n",
273 |        "       'Effect of Exchange Rate',\n",
274 |        "       'Equity Earnings/Loss Unconsolidated Subsidiary', 'Fixed Assets',\n",
275 |        "       'Goodwill', 'Gross Margin', 'Gross Profit', 'Income Tax',\n",
276 |        "       'Intangible Assets', 'Interest Expense', 'Inventory', 'Investments',\n",
277 |        "       'Liabilities', 'Long-Term Debt', 'Long-Term Investments',\n",
278 |        "       'Minority Interest', 'Misc. Stocks', 'Net Borrowings', 'Net Cash Flow',\n",
279 |        "       'Net Cash Flow-Operating', 'Net Cash Flows-Financing',\n",
280 |        "       'Net Cash Flows-Investing', 'Net Income', 'Net Income Adjustments',\n",
281 |        "       'Net Income Applicable to Common Shareholders',\n",
282 |        "       'Net Income-Cont. Operations', 'Net Receivables', 'Non-Recurring Items',\n",
283 |        "       'Operating Income', 'Operating Margin', 'Other Assets',\n",
284 |        "       'Other Current Assets', 'Other Current Liabilities', 'Other Equity',\n",
285 |        "       'Other Financing Activities', 'Other Investing Activities',\n",
286 |        "       'Other Liabilities', 'Other Operating Activities',\n",
287 |        "       'Other Operating Items', 'Pre-Tax Margin', 'Pre-Tax ROE',\n",
288 |        "       'Profit Margin', 'Quick Ratio', 'Research and Development',\n",
289 |        "       'Retained Earnings', 'Sale and Purchase of Stock',\n",
290 |        "       'Sales, General and Admin.',\n",
291 |        "       'Short-Term Debt / Current Portion of Long-Term Debt',\n",
292 |        "       'Short-Term Investments', 'Total Assets', 'Total Current Assets',\n",
293 |        "       'Total Current Liabilities', 'Total Equity', 'Total Liabilities',\n",
294 |        "       'Total Liabilities & Equity', 'Total Revenue', 'Treasury Stock',\n",
295 |        "       'For Year', 'Earnings Per Share', 'Estimated Shares Outstanding'],\n",
296 |        "      dtype='object')"
297 |       ]
298 |      },
299 |      "execution_count": 16,
300 |      "metadata": {},
301 |      "output_type": "execute_result"
302 |     }
303 |    ],
304 |    "source": [
305 |     "fundamentals_df.columns"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 9,
311 |    "metadata": {
312 |     "collapsed": false
313 |    },
314 |    "outputs": [
315 |     {
316 |      "data": {
317 |       "text/html": [
318 |        "<div>\n",
319 |        "<table border=\"1\" class=\"dataframe\">\n",
320 |        "  <thead>\n",
321 |        "    <tr style=\"text-align: right;\">\n",
322 |        "      <th></th>\n",
323 |        "      <th>date</th>\n",
324 |        "      <th>symbol</th>\n",
325 |        "      <th>open</th>\n",
326 |        "      <th>close</th>\n",
327 |        "      <th>low</th>\n",
328 |        "      <th>high</th>\n",
329 |        "      <th>volume</th>\n",
330 |        "    </tr>\n",
331 |        "  </thead>\n",
332 |        "  <tbody>\n",
333 |        "    <tr>\n",
334 |        "      <th>0</th>\n",
335 |        "      <td>2016-01-05</td>\n",
336 |        "      <td>WLTW</td>\n",
337 |        "      <td>123.430000</td>\n",
338 |        "      <td>125.839996</td>\n",
339 |        "      <td>122.309998</td>\n",
340 |        "      <td>126.250000</td>\n",
341 |        "      <td>2163600.0</td>\n",
342 |        "    </tr>\n",
343 |        "    <tr>\n",
344 |        "      <th>1</th>\n",
345 |        "      <td>2016-01-06</td>\n",
346 |        "      <td>WLTW</td>\n",
347 |        "      <td>125.239998</td>\n",
348 |        "      <td>119.980003</td>\n",
349 |        "      <td>119.940002</td>\n",
350 |        "      <td>125.540001</td>\n",
351 |        "      <td>2386400.0</td>\n",
352 |        "    </tr>\n",
353 |        "    <tr>\n",
354 |        "      <th>2</th>\n",
355 |        "      <td>2016-01-07</td>\n",
356 |        "      <td>WLTW</td>\n",
357 |        "      <td>116.379997</td>\n",
358 |        "      <td>114.949997</td>\n",
359 |        "      <td>114.930000</td>\n",
360 |        "      <td>119.739998</td>\n",
361 |        "      <td>2489500.0</td>\n",
362 |        "    </tr>\n",
363 |        "    <tr>\n",
364 |        "      <th>3</th>\n",
365 |        "      <td>2016-01-08</td>\n",
366 |        "      <td>WLTW</td>\n",
367 |        "      <td>115.480003</td>\n",
368 |        "      <td>116.620003</td>\n",
369 |        "      <td>113.500000</td>\n",
370 |        "      <td>117.440002</td>\n",
371 |        "      <td>2006300.0</td>\n",
372 |        "    </tr>\n",
373 |        "    <tr>\n",
374 |        "      <th>4</th>\n",
375 |        "      <td>2016-01-11</td>\n",
376 |        "      <td>WLTW</td>\n",
377 |        "      <td>117.010002</td>\n",
378 |        "      <td>114.970001</td>\n",
379 |        "      <td>114.089996</td>\n",
380 |        "      <td>117.330002</td>\n",
381 |        "      <td>1408600.0</td>\n",
382 |        "    </tr>\n",
383 |        "  </tbody>\n",
384 |        "</table>\n",
385 |        "</div>"
386 |       ],
387 |       "text/plain": [
388 |        "         date symbol        open       close         low        high  \\\n",
389 |        "0  2016-01-05   WLTW  123.430000  125.839996  122.309998  126.250000   \n",
390 |        "1  2016-01-06   WLTW  125.239998  119.980003  119.940002  125.540001   \n",
391 |        "2  2016-01-07   WLTW  116.379997  114.949997  114.930000  119.739998   \n",
392 |        "3  2016-01-08   WLTW  115.480003  116.620003  113.500000  117.440002   \n",
393 |        "4  2016-01-11   WLTW  117.010002  114.970001  114.089996  117.330002   \n",
394 |        "\n",
395 |        "      volume  \n",
396 |        "0  2163600.0  \n",
397 |        "1  2386400.0  \n",
398 |        "2  2489500.0  \n",
399 |        "3  2006300.0  \n",
400 |        "4  1408600.0  "
401 |       ]
402 |      },
403 |      "execution_count": 9,
404 |      "metadata": {},
405 |      "output_type": "execute_result"
406 |     }
407 |    ],
408 |    "source": [
409 |     "prices_df.head()"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 10,
415 |    "metadata": {
416 |     "collapsed": false
417 |    },
418 |    "outputs": [
419 |     {
420 |      "data": {
421 |       "text/plain": [
422 |        "(851264, 7)"
423 |       ]
424 |      },
425 |      "execution_count": 10,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "prices_df.shape"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "metadata": {
438 |     "collapsed": true
439 |    },
440 |    "outputs": [],
441 |    "source": []
442 |   }
443 |  ],
444 |  "metadata": {
445 |   "kernelspec": {
446 |    "display_name": "Python 3",
447 |    "language": "python",
448 |    "name": "python3"
449 |   },
450 |   "language_info": {
451 |    "codemirror_mode": {
452 |     "name": "ipython",
453 |     "version": 3
454 |    },
455 |    "file_extension": ".py",
456 |    "mimetype": "text/x-python",
457 |    "name": "python",
458 |    "nbconvert_exporter": "python",
459 |    "pygments_lexer": "ipython3",
460 |    "version": "3.5.2"
461 |   }
462 |  },
463 |  "nbformat": 4,
464 |  "nbformat_minor": 2
465 | }
466 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Rawlsian Fair Machine Learning for Contextual Bandits
 2 | 
 3 | Implementation and evaluation of provably Rawlsian fair ML algorithms for contextual bandits.
 4 | 
 5 | Related Work/Citations:
 6 | 
 7 | * Rawlsian Fairness for Machine Learning (https://arxiv.org/abs/1610.09559)
 8 | * Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms (https://arxiv.org/abs/1003.5956)
 9 | 
10 | ## Installation Instructions
11 | 
12 | ### (Option 1) Setting Up virtualenv
13 | 
14 | #### OSX
15 | 
16 | Install Python 3 from [package](https://www.python.org/downloads/). This allows you to run `python3` and `pip3`. Software is installed into `/Library/Frameworks/Python.framework/Versions/3.x/bin/`.
17 | 
18 | Install virtualenv for Python 3 for the user only (which is placed into `~/Library/Python/3.x/bin`):
19 | 
20 | ```
21 | $ pip3 install --user virtualenv
22 | ```
23 | 
24 | Create the following alias in your `~/.bash_profile`:
25 | 
26 | ```
27 | $ echo "alias virtualenv3='~/Library/Python/3.x/bin/virtualenv'" >> ~/.bash_profile
28 | ```
29 | 
30 | Create a local virtualenv and activate it:
31 | 
32 | ```
33 | $ virtualenv3 fairml
34 | $ source fairml/bin/activate
35 | ```
36 | 
37 | With the virtualenv active, install the project requirements into your virtualenv:
38 | 
39 | ```
40 | $ pip install -r requirements.txt
41 | ```
42 | 
43 | Create a Python kernel for Jupyter that uses your virtualenv:
44 | 
45 | ```
46 | $ python -m ipykernel install --user --name=fairml
47 | ```
48 | 
49 | You can then launch Jupyter using `jupyter notebook` from inside the project directory and change the kernel to `fairml`.
50 | 
51 | ### (Option 2) Using Docker
52 | 
53 | You can install [Docker](https://www.docker.com) and use a standard configuration such as `all-spark-notebook` to run the project files.
54 | 


--------------------------------------------------------------------------------
/RidgeFair.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "from numpy import transpose\n",
 13 |     "from numpy.linalg import inv, det\n",
 14 |     "from scipy.stats import norm\n",
 15 |     "from math import sqrt\n",
 16 |     "from numpy import log"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 31,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "def ridgeFair(X, Y, k, d, _delta, T, _lambda):\n",
 28 |     "    \"\"\"\n",
 29 |     "    Simulates T rounds of ridgeFair.\n",
 30 |     "    \n",
 31 |     "    :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for each\n",
 32 |     "              time-step and arm\n",
 33 |     "    :param Y: a T x k ndarray of reward function output for each context vector\n",
 34 |     "    :param k: the number of arms\n",
 35 |     "    :param d: the number of features\n",
 36 |     "    :param _delta: confidence parameter\n",
 37 |     "    :param T: the number of iterations\n",
 38 |     "    :param _lambda:   \n",
 39 |     "    \"\"\"\n",
 40 |     "    picks = []\n",
 41 |     "    for t in range (T): # for t >= 1\n",
 42 |     "        for i in range(k): # for 1 <= i <= k\n",
 43 |     "            R = 1\n",
 44 |     "            intervals = []\n",
 45 |     "            try:\n",
 46 |     "                X_i = X[:t,i] # design matrix\n",
 47 |     "                Y_i = Y[:t,i] # same with Y\n",
 48 |     "                x_ti = X[t,i] # feature vector for arm i in round t\n",
 49 |     "\n",
 50 |     "                X_iT = transpose(X_i)\n",
 51 |     "                _idenD = np.identity(d)\n",
 52 |     "                V_it = X_iT.dot(X_i) + (_lambda*_idenD) # computing V_it as line 5\n",
 53 |     "\n",
 54 |     "                B_it = inv(V_it).dot(X_iT).dot(Y_i) # computing line 6\n",
 55 |     "                \n",
 56 |     "                y_ti = transpose(x_ti).dot(B_it) #computing line 7\n",
 57 |     "                \n",
 58 |     "                V_itI = inv(V_it) # inverse of V_it\n",
 59 |     "                _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti))\n",
 60 |     "                _wti2 = R * sqrt(d*log((1+(t/_lambda))/_delta)) + sqrt(_lambda)\n",
 61 |     "                w_ti = _wti1 * _wti2 # computing W_ti as line 8\n",
 62 |     "\n",
 63 |     "                intervals.append([y_ti - w_ti, y_ti + w_ti]) # line 9\n",
 64 |     "            except:\n",
 65 |     "                    print('Error in assigning interval value.')\n",
 66 |     "                    intervals = None\n",
 67 |     "                    break\n",
 68 |     "            if not intervals:\n",
 69 |     "                picks.append(np.random.randint(0,k))\n",
 70 |     "            else:\n",
 71 |     "                i_st = np.argmax(np.array(intervals)[:,1]) # line 10\n",
 72 |     "                chain = compute_chain(i_st, np.array(intervals), k) # line 11\n",
 73 |     "                picks.append(np.random.choice(chain)) # play uniformly random from chain\n",
 74 |     "                \n",
 75 |     "    best = [Y[i].max() for i in range(2, T)]\n",
 76 |     "    performance = [Y[t][picks[t-2]] for t in range(2, T)]\n",
 77 |     "    print('Cumulative Regret: {0}'.format(sum(best) - sum(performance)))\n",
 78 |     "    print('Final Regret: {0}'.format(best[-1] - performance[-1]))\n",
 79 |     "      "
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 32,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "def compute_chain(i_st, intervals, k):\n",
 91 |     "    # Sort intervals by decreasing order.\n",
 92 |     "    chain = [i_st]\n",
 93 |     "    ordering = np.argsort(intervals[:,1])[::-1]\n",
 94 |     "    intervals = intervals[ordering,:]\n",
 95 |     "    \n",
 96 |     "    lowest_in_chain = intervals[0][0]\n",
 97 |     "    for i in range(len(intervals)):\n",
 98 |     "        if intervals[i][1] >= lowest_in_chain:\n",
 99 |     "            chain.append(i)\n",
100 |     "            lowest_in_chain = min(lowest_in_chain, intervals[i][0])\n",
101 |     "        else:\n",
102 |     "            return chain\n",
103 |     "    return chain"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 33,
109 |    "metadata": {
110 |     "collapsed": true
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "def beta(k, d, c):\n",
115 |     "    \"\"\" \n",
116 |     "    Generates the scaled down feature weights for a true model from the distribution\n",
117 |     "    β ∼ U[0, c]^d.\n",
118 |     "    \n",
119 |     "    :param k: the number of arms \n",
120 |     "    :param d: the number of features\n",
121 |     "    :param c: the scale of the feature weights\n",
122 |     "    \"\"\"\n",
123 |     "    return np.random.uniform(0, c+1, size=(k, d))"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 34,
129 |    "metadata": {
130 |     "scrolled": true
131 |    },
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "Cumulative Regret: 4636.449117347242\n",
138 |       "Final Regret: 0.9403759272439949\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "k = 2\n",
144 |     "c = 10\n",
145 |     "d = 10\n",
146 |     "T = 1000\n",
147 |     "X = np.random.uniform(0, 1, size=(T, k, d)) # 3-axis ndarray\n",
148 |     "B = beta(k, d, c)                           # true parameters. B[i]: params for arm i\n",
149 |     "Y = np.array([np.diag(X[t].dot(transpose(B))) for t in range(T)])\n",
150 |     "ridgeFair(X, Y, k, d, 0.05, T, 1)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {
157 |     "collapsed": true
158 |    },
159 |    "outputs": [],
160 |    "source": []
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {
166 |     "collapsed": true
167 |    },
168 |    "outputs": [],
169 |    "source": []
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {
175 |     "collapsed": true
176 |    },
177 |    "outputs": [],
178 |    "source": []
179 |   }
180 |  ],
181 |  "metadata": {
182 |   "kernelspec": {
183 |    "display_name": "Python 3",
184 |    "language": "python",
185 |    "name": "python3"
186 |   },
187 |   "language_info": {
188 |    "codemirror_mode": {
189 |     "name": "ipython",
190 |     "version": 3
191 |    },
192 |    "file_extension": ".py",
193 |    "mimetype": "text/x-python",
194 |    "name": "python",
195 |    "nbconvert_exporter": "python",
196 |    "pygments_lexer": "ipython3",
197 |    "version": "3.5.2"
198 |   }
199 |  },
200 |  "nbformat": 4,
201 |  "nbformat_minor": 2
202 | }
203 | 


--------------------------------------------------------------------------------
/Yahoo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "\"\"\"\n",
 12 |     "Data wrangling for the Yahoo! Front Page Today Module User Click Log Dataset, version 1.0.\n",
 13 |     "\n",
 14 |     "Inspired by:\n",
 15 |     "Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms \n",
 16 |     "[https://arxiv.org/pdf/1003.5956.pdf]\n",
 17 |     "\n",
 18 |     "Documentation is per reST format used in Sphinx.\n",
 19 |     "\n",
 20 |     "Dataset: https://webscope.sandbox.yahoo.com/catalog.php?datatype=r&did=49\n",
 21 |     "Author: jtcho (jonathan.t.cho@gmail.com)\n",
 22 |     "\n",
 23 |     "Many thanks to Yahoo! Research for allowing me to use their dataset.\n",
 24 |     "\"\"\"\n",
 25 |     "\n",
 26 |     "import pandas as pd\n",
 27 |     "import numpy as np\n",
 28 |     "import sqlite3\n",
 29 |     "import time\n",
 30 |     "import os\n",
 31 |     "\n",
 32 |     "dump_dir = 'R6/'\n",
 33 |     "data_dirs = ['clicks_1/']\n",
 34 |     "engine = sqlite3.connect('yahoo')"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# Database cleanup.\n",
 46 |     "\n",
 47 |     "c = engine.cursor()\n",
 48 |     "c.execute('DROP TABLE articles')\n",
 49 |     "engine.commit()"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {
 56 |     "collapsed": true
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def extract_article_info(path, item_limit=sys.maxsize):\n",
 61 |     "    \"\"\" \n",
 62 |     "    Given an R6A dataset file, extracts all of the common article vectors\n",
 63 |     "    and compiles them in a single dataframe.\n",
 64 |     "    Note that each article has a constant vector associated with it.\n",
 65 |     "    \n",
 66 |     "    :param path:       the file path for the dataset\n",
 67 |     "    :param item_limit: limits the number of items to parse\n",
 68 |     "    :returns: Pandas dataframe containing article vectors indexed by id\n",
 69 |     "    \"\"\"\n",
 70 |     "    t0 = time.time()\n",
 71 |     "    num_iters = 0\n",
 72 |     "    _articles_df = pd.DataFrame(columns=['2', '3', '4', '5', '6', '1'])\n",
 73 |     "    with open(path) as f:\n",
 74 |     "        for line in f:\n",
 75 |     "            num_iters += 1 \n",
 76 |     "            if num_iters > item_limit:\n",
 77 |     "                break\n",
 78 |     "            parts = line.strip().split('|')\n",
 79 |     "            for i in range(2, len(parts)):\n",
 80 |     "                # Extract article vector information.\n",
 81 |     "                article_info = parts[i].split()\n",
 82 |     "                article_id = article_info[0]\n",
 83 |     "                if article_id in _articles_df.index:\n",
 84 |     "                    continue\n",
 85 |     "                article_info_parts = list(map(lambda x : x.split(':')[1], article_info[1:]))\n",
 86 |     "                article_info = dict(zip(_articles_df.columns, article_info_parts))\n",
 87 |     "                # I append to an existing DF for quick de-duplication. Also\n",
 88 |     "                # empirically, I observed that there is a small number of unique\n",
 89 |     "                # articles for any dataset, so the overhead of doing this is minimized.\n",
 90 |     "                _articles_df.loc[article_id] = pd.Series(article_info)\n",
 91 |     "\n",
 92 |     "    t1 = time.time()\n",
 93 |     "    print('Finished processing {0} items in {1} seconds.'.format(num_iters-1, t1 - t0))\n",
 94 |     "    return _articles_df"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 7,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def process_click_file(path, item_limit=sys.maxsize):\n",
104 |     "    \"\"\"\n",
105 |     "    Given an R6A dataset file, parses all of the view event logs and \n",
106 |     "    compiles them in a single dataframe.\n",
107 |     "    \n",
108 |     "    A single view event consists of a unix timestamp, a 6-dimensional vector of\n",
109 |     "    features describing the user, a set of 20 articles in the article pool\n",
110 |     "    (the 20 arms of the multi-arm bandit), the id of the article displayed, and\n",
111 |     "    a boolean marking whether the article was clicked.\n",
112 |     "    \"\"\"\n",
113 |     "    t0 = time.time()\n",
114 |     "    num_iters = 0\n",
115 |     "    views_cols = ['time', 'user_1', 'user_2', 'user_3', 'user_4', 'user_5', 'user_6', \n",
116 |     "                  'article_pool', 'displayed', 'clicked']\n",
117 |     "    views = []\n",
118 |     "    with open(path) as f:\n",
119 |     "        for line in f:\n",
120 |     "            num_iters += 1\n",
121 |     "            if num_iters > item_limit:\n",
122 |     "                break\n",
123 |     "            parts = line.strip().split('|')\n",
124 |     "            unix_timestamp, disp_article_id, clicked = parts[0].split()\n",
125 |     "            user_info = list(map(lambda x : x.split(':')[1], parts[1].split()[1:]))\n",
126 |     "            user_info = dict(zip(views_cols[1:7], user_info))\n",
127 |     "            user_info['time'] = unix_timestamp\n",
128 |     "            user_info['displayed'] = disp_article_id\n",
129 |     "            user_info['clicked'] = clicked\n",
130 |     "    \n",
131 |     "            # Extract article vector information.\n",
132 |     "            article_ids = [parts[i].split()[0] for i in range(2, len(parts))]\n",
133 |     "            user_info['article_pool'] = article_ids\n",
134 |     "            # In this case, we construct the DF at the end because we're creating a new row\n",
135 |     "            # for *every* item... over ~4 million items that becomes very expensive!\n",
136 |     "            views.append(user_info)\n",
137 |     "\n",
138 |     "    t1 = time.time()\n",
139 |     "    print('{0}: Finished processing {1} items in {2} seconds.'.format(path, num_iters-1, t1 - t0))\n",
140 |     "    return pd.DataFrame(views, columns=views_cols)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 4,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "Finished processing 4681991 items in 150.5566005706787 seconds.\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "# Run to populate the articles table.\n",
158 |     "articles_df = extract_article_info(dump_dir + 'clicks_1.txt', sys.maxsize).apply(pd.to_numeric)\n",
159 |     "articles_df.to_sql('articles', engine, if_exists='replace')"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 9,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "clicks_1/xaa: Finished processing 99999 items in 3.1617259979248047 seconds.\n",
172 |       "clicks_1/xab: Finished processing 99999 items in 3.2025344371795654 seconds.\n",
173 |       "clicks_1/xac: Finished processing 99999 items in 3.3164455890655518 seconds.\n",
174 |       "clicks_1/xad: Finished processing 99999 items in 3.380336046218872 seconds.\n",
175 |       "clicks_1/xae: Finished processing 99999 items in 3.0821828842163086 seconds.\n",
176 |       "clicks_1/xaf: Finished processing 99999 items in 3.1906492710113525 seconds.\n",
177 |       "clicks_1/xag: Finished processing 99999 items in 3.3087258338928223 seconds.\n",
178 |       "clicks_1/xah: Finished processing 99999 items in 3.2571945190429688 seconds.\n",
179 |       "clicks_1/xai: Finished processing 99999 items in 3.278446674346924 seconds.\n",
180 |       "clicks_1/xaj: Finished processing 99999 items in 3.2920501232147217 seconds.\n",
181 |       "clicks_1/xak: Finished processing 99999 items in 3.431187629699707 seconds.\n",
182 |       "clicks_1/xal: Finished processing 99999 items in 3.40493106842041 seconds.\n",
183 |       "clicks_1/xam: Finished processing 99999 items in 3.1150004863739014 seconds.\n",
184 |       "clicks_1/xan: Finished processing 99999 items in 3.1503725051879883 seconds.\n",
185 |       "clicks_1/xao: Finished processing 99999 items in 3.3162639141082764 seconds.\n",
186 |       "clicks_1/xap: Finished processing 99999 items in 3.09061598777771 seconds.\n",
187 |       "clicks_1/xaq: Finished processing 99999 items in 3.4392073154449463 seconds.\n",
188 |       "clicks_1/xar: Finished processing 99999 items in 3.443249464035034 seconds.\n",
189 |       "clicks_1/xas: Finished processing 99999 items in 3.5337443351745605 seconds.\n",
190 |       "clicks_1/xat: Finished processing 99999 items in 3.4647445678710938 seconds.\n",
191 |       "clicks_1/xau: Finished processing 99999 items in 3.6430513858795166 seconds.\n",
192 |       "clicks_1/xav: Finished processing 99999 items in 3.6271255016326904 seconds.\n",
193 |       "clicks_1/xaw: Finished processing 99999 items in 3.309832811355591 seconds.\n",
194 |       "clicks_1/xax: Finished processing 99999 items in 3.460949420928955 seconds.\n",
195 |       "clicks_1/xay: Finished processing 99999 items in 3.426335573196411 seconds.\n",
196 |       "clicks_1/xaz: Finished processing 99999 items in 3.510620594024658 seconds.\n",
197 |       "clicks_1/xba: Finished processing 99999 items in 3.6194756031036377 seconds.\n",
198 |       "clicks_1/xbb: Finished processing 99999 items in 3.7689321041107178 seconds.\n",
199 |       "clicks_1/xbc: Finished processing 99999 items in 3.7527005672454834 seconds.\n",
200 |       "clicks_1/xbd: Finished processing 99999 items in 3.559547185897827 seconds.\n",
201 |       "clicks_1/xbe: Finished processing 99999 items in 3.664827585220337 seconds.\n",
202 |       "clicks_1/xbf: Finished processing 99999 items in 3.7467215061187744 seconds.\n",
203 |       "clicks_1/xbg: Finished processing 99999 items in 3.2975916862487793 seconds.\n",
204 |       "clicks_1/xbh: Finished processing 99999 items in 3.1932389736175537 seconds.\n",
205 |       "clicks_1/xbi: Finished processing 99999 items in 3.480050802230835 seconds.\n",
206 |       "clicks_1/xbj: Finished processing 99999 items in 3.307481050491333 seconds.\n",
207 |       "clicks_1/xbk: Finished processing 99999 items in 3.3213932514190674 seconds.\n",
208 |       "clicks_1/xbl: Finished processing 99999 items in 3.602836847305298 seconds.\n",
209 |       "clicks_1/xbm: Finished processing 99999 items in 3.3665266036987305 seconds.\n",
210 |       "clicks_1/xbn: Finished processing 99999 items in 3.5517754554748535 seconds.\n",
211 |       "clicks_1/xbo: Finished processing 99999 items in 3.5413339138031006 seconds.\n",
212 |       "clicks_1/xbp: Finished processing 99999 items in 3.082970380783081 seconds.\n",
213 |       "clicks_1/xbq: Finished processing 99999 items in 3.1382272243499756 seconds.\n",
214 |       "clicks_1/xbr: Finished processing 99999 items in 3.2157583236694336 seconds.\n",
215 |       "clicks_1/xbs: Finished processing 99999 items in 3.396573543548584 seconds.\n",
216 |       "clicks_1/xbt: Finished processing 99999 items in 3.4965860843658447 seconds.\n",
217 |       "clicks_1/xbu: Finished processing 81991 items in 2.8793578147888184 seconds.\n"
218 |      ]
219 |     }
220 |    ],
221 |    "source": [
222 |     "for fname in os.listdir('clicks_1'):\n",
223 |     "    if fname != '.DS_Store':\n",
224 |     "        result = process_click_file('clicks_1/'+fname)\n",
225 |     "        result['article_pool'] = result['article_pool'].astype(str)\n",
226 |     "        result.to_sql('clicks', engine, if_exists='append')"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 18,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "#pd.read_sql_query('select * from articles',con=engine).set_index('index')"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 11,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<table border=\"1\" class=\"dataframe\">\n",
248 |        "  <thead>\n",
249 |        "    <tr style=\"text-align: right;\">\n",
250 |        "      <th></th>\n",
251 |        "      <th>count(*)</th>\n",
252 |        "    </tr>\n",
253 |        "  </thead>\n",
254 |        "  <tbody>\n",
255 |        "    <tr>\n",
256 |        "      <th>0</th>\n",
257 |        "      <td>4681992</td>\n",
258 |        "    </tr>\n",
259 |        "  </tbody>\n",
260 |        "</table>\n",
261 |        "</div>"
262 |       ],
263 |       "text/plain": [
264 |        "   count(*)\n",
265 |        "0   4681992"
266 |       ]
267 |      },
268 |      "execution_count": 11,
269 |      "metadata": {},
270 |      "output_type": "execute_result"
271 |     }
272 |    ],
273 |    "source": [
274 |     "pd.read_sql_query('select count(*) from clicks', con=engine)"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {
281 |     "collapsed": true
282 |    },
283 |    "outputs": [],
284 |    "source": []
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "Python 3",
290 |    "language": "python",
291 |    "name": "python3"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.6.0"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 2
308 | }
309 | 


--------------------------------------------------------------------------------
/evaluation_T.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
  5 | 
  6 | 
  7 | def main():
  8 |     c_vals = [1.0, 2.0, 5.0, 10.0]
  9 | 
 10 |     # Plot: Varying T (# of rounds)
 11 |     d = 2
 12 |     k = 2
 13 |     T_vals = range(3, 1000, 10)
 14 | 
 15 |     results = {
 16 |         '0': {
 17 |             'ylabel': 'Average regret - TI',
 18 |             'name': 'avg_regret_ti'
 19 |         },
 20 |         '1': {
 21 |             'ylabel': 'Average regret - IC',
 22 |             'name': 'avg_regret_ic'
 23 |         },
 24 |         '2': {
 25 |             'ylabel': 'Average regret difference (TI - IC)',
 26 |             'name': 'avg_regret_diff'
 27 |         },
 28 |         '3': {
 29 |             'ylabel': 'Cumulative regret - TI',
 30 |             'name': 'cum_regret_ti'
 31 |         },
 32 |         '4': {
 33 |             'ylabel': 'Cumulative regret - IC',
 34 |             'name': 'cum_regret_ic'
 35 |         },
 36 |         '5': {
 37 |             'ylabel': 'Cumulative regret difference (TI - IC)',
 38 |             'name': 'cum_regret_diff'
 39 |         },
 40 |         '6': {
 41 |             'ylabel': 'Final regret - TI',
 42 |             'name': 'final_regret_ti'
 43 |         },
 44 |         '7': {
 45 |             'ylabel': 'Final regret - IC',
 46 |             'name': 'final_regret_ic'
 47 |         },
 48 |         '8': {
 49 |             'ylabel': 'Final regret difference (TI - IC)',
 50 |             'name': 'final_regret_diff'
 51 |         }
 52 |     }
 53 |     for _, v in results.items():  # 9 sets of results.
 54 |         for j in c_vals:
 55 |             v[str(j)] = []
 56 | 
 57 |     for c in c_vals:
 58 |         for T in T_vals:
 59 |             cum_regret_tis = []
 60 |             avg_regret_tis = []
 61 |             final_regret_tis = []
 62 |             cum_regret_ics = []
 63 |             avg_regret_ics = []
 64 |             final_regret_ics = []
 65 |             for i in range(0, 50):  # 50 trials.
 66 |                 X = np.random.uniform(0, 1, size=(T, k, d))
 67 |                 B = beta(k, d, c)
 68 |                 Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
 69 | 
 70 |                 cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
 71 |                         X, Y, k, d, 0.05, T, _print_progress=False)
 72 |                 cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
 73 |                         X, Y, c, k, d, 0.05, T, _print_progress=False)
 74 |                 cum_regret_tis.append(cum_regret_ti)
 75 |                 avg_regret_tis.append(avg_regret_ti)
 76 |                 final_regret_tis.append(final_regret_ti)
 77 |                 cum_regret_ics.append(cum_regret_ic)
 78 |                 avg_regret_ics.append(avg_regret_ic)
 79 |                 final_regret_ics.append(final_regret_ic)
 80 |             cum_regret_ti = mean(cum_regret_tis)
 81 |             avg_regret_ti = mean(avg_regret_tis)
 82 |             final_regret_ti = mean(avg_regret_tis)
 83 |             cum_regret_ic = mean(cum_regret_ics)
 84 |             avg_regret_ic = mean(avg_regret_ics)
 85 |             final_regret_ics = mean(final_regret_ics)
 86 | 
 87 |             results['0'][str(c)].append(avg_regret_ti)
 88 |             results['1'][str(c)].append(avg_regret_ic)
 89 |             results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
 90 |             results['3'][str(c)].append(cum_regret_ti)
 91 |             results['4'][str(c)].append(cum_regret_ic)
 92 |             results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
 93 |             results['6'][str(c)].append(final_regret_ti)
 94 |             results['7'][str(c)].append(final_regret_ic)
 95 |             results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
 96 | 
 97 |     for k, v in results.items():
 98 |         plt.clf()
 99 |         c1, = plt.plot(T_vals, results[k]['1.0'], label='c=1')
100 |         c2, = plt.plot(T_vals, results[k]['2.0'], label='c=2')
101 |         c5, = plt.plot(T_vals, results[k]['5.0'], label='c=5')
102 |         c10, = plt.plot(T_vals, results[k]['10.0'], label='c=10')
103 |         plt.xticks(np.arange(min(T_vals), max(T_vals) + 1, 200))
104 |         plt.legend(handles=[c1, c2, c5, c10])
105 |         plt.xlabel('T (# of rounds)', fontsize=18)
106 |         plt.ylabel(v['ylabel'], fontsize=15)
107 |         plt.savefig('figures_T_50x/T_50x_' + v['name'])
108 | 
109 | 
110 | def mean(numbers):
111 |     return float(sum(numbers)) / max(len(numbers), 1)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/evaluation_d.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
  5 | 
  6 | 
  7 | def main():
  8 |     c_vals = [1.0, 2.0, 5.0, 10.0]
  9 | 
 10 |     # Plot: Varying d (confidence)
 11 |     d_vals = range(1, 50)
 12 |     k = 2
 13 |     T = 1000
 14 | 
 15 |     results = {
 16 |         '0': {
 17 |             'ylabel': 'Average regret - TI',
 18 |             'name': 'avg_regret_ti'
 19 |         },
 20 |         '1': {
 21 |             'ylabel': 'Average regret - IC',
 22 |             'name': 'avg_regret_ic'
 23 |         },
 24 |         '2': {
 25 |             'ylabel': 'Average regret difference (TI - IC)',
 26 |             'name': 'avg_regret_diff'
 27 |         },
 28 |         '3': {
 29 |             'ylabel': 'Cumulative regret - TI',
 30 |             'name': 'cum_regret_ti'
 31 |         },
 32 |         '4': {
 33 |             'ylabel': 'Cumulative regret - IC',
 34 |             'name': 'cum_regret_ic'
 35 |         },
 36 |         '5': {
 37 |             'ylabel': 'Cumulative regret difference (TI - IC)',
 38 |             'name': 'cum_regret_diff'
 39 |         },
 40 |         '6': {
 41 |             'ylabel': 'Final regret - TI',
 42 |             'name': 'final_regret_ti'
 43 |         },
 44 |         '7': {
 45 |             'ylabel': 'Final regret - IC',
 46 |             'name': 'final_regret_ic'
 47 |         },
 48 |         '8': {
 49 |             'ylabel': 'Final regret difference (TI - IC)',
 50 |             'name': 'final_regret_diff'
 51 |         }
 52 |     }
 53 |     for _, v in results.items():  # 9 sets of results.
 54 |         for j in c_vals:
 55 |             v[str(j)] = []
 56 | 
 57 |     for c in c_vals:
 58 |         for d in d_vals:
 59 |             cum_regret_tis = []
 60 |             avg_regret_tis = []
 61 |             final_regret_tis = []
 62 |             cum_regret_ics = []
 63 |             avg_regret_ics = []
 64 |             final_regret_ics = []
 65 |             for i in range(0, 50):  # 500 trials.
 66 |                 X = np.random.uniform(0, 1, size=(T, k, d))
 67 |                 B = beta(k, d, c)
 68 |                 Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
 69 | 
 70 |                 cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
 71 |                         X, Y, k, d, 0.05, T, _print_progress=False)
 72 |                 cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
 73 |                         X, Y, c, k, d, 0.05, T, _print_progress=False)
 74 |                 cum_regret_tis.append(cum_regret_ti)
 75 |                 avg_regret_tis.append(avg_regret_ti)
 76 |                 final_regret_tis.append(final_regret_ti)
 77 |                 cum_regret_ics.append(cum_regret_ic)
 78 |                 avg_regret_ics.append(avg_regret_ic)
 79 |                 final_regret_ics.append(final_regret_ic)
 80 |             cum_regret_ti = mean(cum_regret_tis)
 81 |             avg_regret_ti = mean(avg_regret_tis)
 82 |             final_regret_ti = mean(avg_regret_tis)
 83 |             cum_regret_ic = mean(cum_regret_ics)
 84 |             avg_regret_ic = mean(avg_regret_ics)
 85 |             final_regret_ics = mean(final_regret_ics)
 86 | 
 87 |             results['0'][str(c)].append(avg_regret_ti)
 88 |             results['1'][str(c)].append(avg_regret_ic)
 89 |             results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
 90 |             results['3'][str(c)].append(cum_regret_ti)
 91 |             results['4'][str(c)].append(cum_regret_ic)
 92 |             results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
 93 |             results['6'][str(c)].append(final_regret_ti)
 94 |             results['7'][str(c)].append(final_regret_ic)
 95 |             results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
 96 | 
 97 |     for k, v in results.items():
 98 |         plt.clf()
 99 |         c1, = plt.plot(d_vals, results[k]['1.0'], label='c=1')
100 |         c2, = plt.plot(d_vals, results[k]['2.0'], label='c=2')
101 |         c5, = plt.plot(d_vals, results[k]['5.0'], label='c=5')
102 |         c10, = plt.plot(d_vals, results[k]['10.0'], label='c=10')
103 |         plt.xticks(np.arange(min(d_vals), max(d_vals) + 1, 10))
104 |         plt.legend(handles=[c1, c2, c5, c10])
105 |         plt.xlabel('d (# of features)', fontsize=18)
106 |         plt.ylabel(v['ylabel'], fontsize=15)
107 |         plt.savefig('figures_d_50x/d_50x_' + v['name'])
108 | 
109 | 
110 | def mean(numbers):
111 |     return float(sum(numbers)) / max(len(numbers), 1)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/evaluation_k.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from fairml import beta, compute_chain, eta, interval_chaining, top_interval
  5 | 
  6 | 
  7 | def main():
  8 |     c_vals = [1.0, 2.0, 5.0, 10.0]
  9 | 
 10 |     # Plot: Varying k (# groups)
 11 |     d = 2
 12 |     k_vals = range(1, 50, 5)
 13 |     T = 1000
 14 | 
 15 |     results = {
 16 |         '0': {
 17 |             'ylabel': 'Average regret - TI',
 18 |             'name': 'avg_regret_ti'
 19 |         },
 20 |         '1': {
 21 |             'ylabel': 'Average regret - IC',
 22 |             'name': 'avg_regret_ic'
 23 |         },
 24 |         '2': {
 25 |             'ylabel': 'Average regret difference (TI - IC)',
 26 |             'name': 'avg_regret_diff'
 27 |         },
 28 |         '3': {
 29 |             'ylabel': 'Cumulative regret - TI',
 30 |             'name': 'cum_regret_ti'
 31 |         },
 32 |         '4': {
 33 |             'ylabel': 'Cumulative regret - IC',
 34 |             'name': 'cum_regret_ic'
 35 |         },
 36 |         '5': {
 37 |             'ylabel': 'Cumulative regret difference (TI - IC)',
 38 |             'name': 'cum_regret_diff'
 39 |         },
 40 |         '6': {
 41 |             'ylabel': 'Final regret - TI',
 42 |             'name': 'final_regret_ti'
 43 |         },
 44 |         '7': {
 45 |             'ylabel': 'Final regret - IC',
 46 |             'name': 'final_regret_ic'
 47 |         },
 48 |         '8': {
 49 |             'ylabel': 'Final regret difference (TI - IC)',
 50 |             'name': 'final_regret_diff'
 51 |         }
 52 |     }
 53 |     for _, v in results.items():  # 9 sets of results.
 54 |         for j in c_vals:
 55 |             v[str(j)] = []
 56 | 
 57 |     for c in c_vals:
 58 |         for k in k_vals:
 59 |             cum_regret_tis = []
 60 |             avg_regret_tis = []
 61 |             final_regret_tis = []
 62 |             cum_regret_ics = []
 63 |             avg_regret_ics = []
 64 |             final_regret_ics = []
 65 |             for i in range(0, 50):  # 50 trials.
 66 |                 X = np.random.uniform(0, 1, size=(T, k, d))
 67 |                 B = beta(k, d, c)
 68 |                 Y = np.array([np.diag(X[t].dot(np.transpose(B))) for t in range(T)])
 69 | 
 70 |                 cum_regret_ti, avg_regret_ti, final_regret_ti = top_interval(
 71 |                         X, Y, k, d, 0.05, T, _print_progress=False)
 72 |                 cum_regret_ic, avg_regret_ic, final_regret_ic = interval_chaining(
 73 |                         X, Y, c, k, d, 0.05, T, _print_progress=False)
 74 |                 cum_regret_tis.append(cum_regret_ti)
 75 |                 avg_regret_tis.append(avg_regret_ti)
 76 |                 final_regret_tis.append(final_regret_ti)
 77 |                 cum_regret_ics.append(cum_regret_ic)
 78 |                 avg_regret_ics.append(avg_regret_ic)
 79 |                 final_regret_ics.append(final_regret_ic)
 80 |             cum_regret_ti = mean(cum_regret_tis)
 81 |             avg_regret_ti = mean(avg_regret_tis)
 82 |             final_regret_ti = mean(avg_regret_tis)
 83 |             cum_regret_ic = mean(cum_regret_ics)
 84 |             avg_regret_ic = mean(avg_regret_ics)
 85 |             final_regret_ics = mean(final_regret_ics)
 86 | 
 87 |             results['0'][str(c)].append(avg_regret_ti)
 88 |             results['1'][str(c)].append(avg_regret_ic)
 89 |             results['2'][str(c)].append(abs(avg_regret_ti - avg_regret_ic))
 90 |             results['3'][str(c)].append(cum_regret_ti)
 91 |             results['4'][str(c)].append(cum_regret_ic)
 92 |             results['5'][str(c)].append(abs(cum_regret_ti - cum_regret_ic))
 93 |             results['6'][str(c)].append(final_regret_ti)
 94 |             results['7'][str(c)].append(final_regret_ic)
 95 |             results['8'][str(c)].append(abs(final_regret_ti - final_regret_ic))
 96 | 
 97 |     for k, v in results.items():
 98 |         plt.clf()
 99 |         c1, = plt.plot(k_vals, results[k]['1.0'], label='c=1')
100 |         c2, = plt.plot(k_vals, results[k]['2.0'], label='c=2')
101 |         c5, = plt.plot(k_vals, results[k]['5.0'], label='c=5')
102 |         c10, = plt.plot(k_vals, results[k]['10.0'], label='c=10')
103 |         plt.xticks(np.arange(min(k_vals), max(k_vals) + 1, 10))
104 |         plt.legend(handles=[c1, c2, c5, c10])
105 |         plt.xlabel('k (# of groups)', fontsize=18)
106 |         plt.ylabel(v['ylabel'], fontsize=15)
107 |         plt.savefig('figures_k_50x/k_50x_' + v['name'])
108 | 
109 | 
110 | def mean(numbers):
111 |     return float(sum(numbers)) / max(len(numbers), 1)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/fairml.py:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | import numpy as np
  3 | from numpy import log, transpose
  4 | from numpy.linalg import inv
  5 | from scipy.stats import norm
  6 | 
  7 | 
  8 | def eta(T):
  9 |     """
 10 |     Generates the cutoff probabilities for exploration rounds in interval
 11 |     chaining.
 12 | 
 13 |     :param T: the total number of iterations
 14 |     """
 15 |     return np.array([pow(t, -1/3) for t in range(1, T+1)])
 16 | 
 17 | 
 18 | def beta(k, d, c):
 19 |     """
 20 |     Generates the scaled down feature weights for a true model from the
 21 |     distribution β ∼ U[0, c]^d.
 22 | 
 23 |     :param k: the number of arms
 24 |     :param d: the number of features
 25 |     :param c: the scale of the feature weights
 26 |     """
 27 |     return np.random.uniform(0, c+1, size=(k, d))
 28 | 
 29 | 
 30 | def print_progress(s, should_print):
 31 |     """
 32 |     Helper function to print the progress of an algorithm as it's running.
 33 | 
 34 |     :param s: the string to print
 35 |     :should_print: whether or not the string should be printed
 36 |     """
 37 |     if should_print:
 38 |         print(s)
 39 | 
 40 | 
 41 | def top_interval(X, Y, k, d, _delta, T, _print_progress=True):
 42 |     """
 43 |     Simulates T rounds of TopInterval for k.
 44 | 
 45 |     :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
 46 |               each time-step and arm
 47 |     :param Y: a T x k ndarray of reward function output for each context vector
 48 |     :param k: the number of arms
 49 |     :param d: the number of features
 50 |     :param _delta: confidence parameter
 51 |     :param T: the number of iterations
 52 |     :param _print_progress: True if progress should be printed; False otherwise
 53 |     :returns: cum_regret (the total regret across all T runs of the algorithm),
 54 |               avg_regret (the regret averaged across all T runs of the algorithm),
 55 |               final_regret (the regret in the last round of the algorithm)
 56 |     """
 57 |     pp = _print_progress
 58 |     _eta = eta(T)  # exploration cutoff probabilities
 59 |     picks = []
 60 |     for t in range(T):
 61 |         print_progress('Iteration [{0} / {1}]'.format(t, T), pp)
 62 |         if t <= d or np.random.rand() <= _eta[t]:
 63 |             # Play uniformly at random from [1, k].
 64 |             picks.append(np.random.randint(0, k))
 65 |             print_progress('Exploration round.', pp)
 66 |         else:
 67 |             intervals = []
 68 |             for i in range(k):
 69 |                 # Compute beta hat.
 70 |                 _Xti = X[:t+1, i]
 71 |                 _XtiT = transpose(_Xti)
 72 |                 try:
 73 |                     _XTX = inv(_XtiT.dot(_Xti))
 74 |                 except:
 75 |                     print_progress('Encountered singular matrix. Ignoring.', pp)
 76 |                     continue
 77 |                 _Yti = Y[:t+1, i]
 78 |                 Bh_t_i = _XTX.dot(_XtiT).dot(_Yti)  # Compute OLS estimators.
 79 |                 yh_t_i = Bh_t_i.dot(X[t, i])
 80 |                 _s2 = np.var(Y[:t+1, i])
 81 |                 # Compute the confidence interval width using the inverse CDF.
 82 |                 w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0,
 83 |                                  scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i]))))
 84 |                 intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i])
 85 |             # Pick the agent with the largest upper bound.
 86 |             picks.append(np.argmax(np.array(intervals)[:, 1]) if intervals else np.random.randint(0, k))
 87 |             print_progress('Intervals: {0}'.format(intervals), pp)
 88 |     # Compute sum of best picks over each iteration.
 89 |     best = [Y[i].max() for i in range(2, T)]
 90 |     performance = [Y[t][picks[t-2]] for t in range(2, T)]
 91 |     cum_regret = sum(best) - sum(performance)
 92 |     avg_regret = cum_regret / float(T)
 93 |     final_regret = best[-1] - performance[-1]
 94 |     print_progress('Cumulative Regret: {0}'.format(cum_regret), pp)
 95 |     print_progress('Average Regret: {0}'.format(avg_regret), pp)
 96 |     print_progress('Final Regret: {0}'.format(final_regret), pp)
 97 |     return cum_regret, avg_regret, final_regret
 98 | 
 99 | 
100 | def compute_chain(i_st, intervals, k, _print_progress=True):
101 |     # Sort intervals by decreasing order.
102 |     pp = _print_progress
103 |     chain = [i_st]
104 |     print_progress(intervals[:, 1], pp)
105 |     ordering = np.argsort(intervals[:, 1])[::-1]
106 |     intervals = intervals[ordering, :]
107 | 
108 |     lowest_in_chain = intervals[0][0]
109 |     for i in range(1, k):
110 |         if intervals[i][1] >= lowest_in_chain:
111 |             chain.append(i)
112 |             lowest_in_chain = min(lowest_in_chain, intervals[i][0])
113 |         else:
114 |             return chain
115 |     return chain
116 | 
117 | 
118 | def interval_chaining(X, Y, c, k, d, _delta, T, _print_progress=True):
119 |     """
120 |     Simulates T rounds of TopInterval for k.
121 | 
122 |     :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
123 |               each time-step and arm
124 |     :param Y: a T x k ndarray of reward function output for each context vector
125 |     :param k: the number of arms
126 |     :param d: the number of features
127 |     :param _delta: confidence parameter
128 |     :param T: the number of iterations
129 |     :param _print_progress: True if progress should be printed; False otherwise
130 |     :returns: cum_regret (the total regret across all T runs of the algorithm),
131 |               avg_regret (the regret averaged across all T runs of the algorithm),
132 |               final_regret (the regret in the last round of the algorithm)
133 |     """
134 |     pp = _print_progress
135 |     _eta = eta(T)  # exploration cutoff probabilities
136 |     picks = []
137 |     for t in range(T):
138 |         print_progress('Iteration [{0} / {1}]'.format(t, T), pp)
139 |         if t <= d or np.random.rand() <= _eta[t]:
140 |             # Play uniformly at random from [1, k].
141 |             picks.append(np.random.randint(0, k))
142 |             print_progress('Exploration round.', pp)
143 |         else:
144 |             intervals = []
145 |             for i in range(k):
146 |                 # Compute beta hat.
147 |                 _Xti = X[:t+1, i]
148 |                 _XtiT = transpose(_Xti)
149 |                 try:
150 |                     _XTX = inv(_XtiT.dot(_Xti))
151 |                 except:
152 |                     print_progress('Encountered singular matrix. Ignoring.', pp)
153 |                     continue
154 |                 _Yti = Y[:t+1, i]
155 |                 Bh_t_i = _XTX.dot(_XtiT).dot(_Yti)  # Compute OLS estimators.
156 |                 yh_t_i = Bh_t_i.dot(X[t, i])
157 |                 _s2 = np.var(Y[:t+1, i])
158 |                 # Compute the confidence interval width using the inverse CDF.
159 |                 w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0,
160 |                                  scale=np.sqrt(_s2 * X[t, i].dot(_XTX).dot(transpose(X[t, i]))))
161 |                 intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i])
162 |             # Pick the agent with the largest upper bound.
163 |             if not intervals:
164 |                 picks.append(np.random.randint(0, k))
165 |             else:
166 |                 i_st = np.argmax(np.array(intervals)[:, 1])
167 | 
168 |                 # Chaining
169 |                 chain = compute_chain(i_st, np.array(intervals), k, pp)
170 |                 print_progress('Computed chain: {0}'.format(chain), pp)
171 |                 picks.append(np.random.choice(chain))
172 |             print_progress('Intervals: {0}'.format(intervals), pp)
173 |     # Compute sum of best picks over each iteration.
174 |     best = [Y[i].max() for i in range(2, T)]
175 |     performance = [Y[t][picks[t-2]] for t in range(2, T)]
176 |     cum_regret = sum(best) - sum(performance)
177 |     avg_regret = cum_regret / float(T)
178 |     final_regret = best[-1] - performance[-1]
179 |     print_progress('Cumulative Regret: {0}'.format(cum_regret), pp)
180 |     print_progress('Average Regret: {0}'.format(avg_regret), pp)
181 |     print_progress('Final Regret: {0}'.format(final_regret), pp)
182 |     return cum_regret, avg_regret, final_regret
183 | 
184 | 
185 | def ridge_fair(X, Y, k, d, _delta, T, _lambda, _print_progress=True):
186 |     """
187 |     Simulates T rounds of ridge_fair.
188 | 
189 |     :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for
190 |               each time-step and arm
191 |     :param Y: a T x k ndarray of reward function output for each context vector
192 |     :param k: the number of arms
193 |     :param d: the number of features
194 |     :param _delta: confidence parameter
195 |     :param T: the number of iterations
196 |     :param _lambda: regularization paramameter
197 |     """
198 |     picks = []
199 |     for t in range(T):
200 |         for i in range(k):
201 |             R = 1
202 |             intervals = []
203 |             try:
204 |                 X_i = X[:t, i]  # design matrix
205 |                 Y_i = Y[:t, i]  # same with Y
206 |                 x_ti = X[t, i]  # feature vector for arm i in round t
207 | 
208 |                 X_iT = transpose(X_i)
209 |                 _idenD = np.identity(d)
210 |                 V_it = X_iT.dot(X_i) + (_lambda * _idenD)
211 | 
212 |                 B_it = inv(V_it).dot(X_iT).dot(Y_i)
213 | 
214 |                 y_ti = transpose(x_ti).dot(B_it)
215 | 
216 |                 V_itI = inv(V_it)  # inverse of V_it
217 |                 _wti1 = sqrt(transpose(x_ti).dot(V_itI).dot(x_ti))
218 |                 _wti2 = R * sqrt(d * log((1 + (t / _lambda)) / _delta)) + sqrt(_lambda)
219 |                 w_ti = _wti1 * _wti2
220 | 
221 |                 intervals.append([y_ti - w_ti, y_ti + w_ti])
222 |             except:
223 |                     print_progress('Error in assigning interval value.', _print_progress)
224 |                     intervals = None
225 |                     break
226 |             if not intervals:
227 |                 picks.append(np.random.randint(0, k))
228 |             else:
229 |                 i_st = np.argmax(np.array(intervals)[:, 1])
230 |                 chain = compute_chain(i_st, np.array(intervals), k)
231 |                 # play uniformly random from chain
232 |                 picks.append(np.random.choice(chain))
233 | 
234 |     best = [Y[i].max() for i in range(2, T)]
235 |     performance = [Y[t][picks[t - 2]] for t in range(2, T)]
236 |     print_progress('Cumulative Regret: {0}'.format(sum(best) - sum(performance)), _print_progress)
237 |     print_progress('Final Regret: {0}'.format(best[-1] - performance[-1]), _print_progress)
238 | 


--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_diff.png


--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ic.png


--------------------------------------------------------------------------------
/figures_T_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/avg_regret_ti.png


--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_diff.png


--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ic.png


--------------------------------------------------------------------------------
/figures_T_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/cum_regret_ti.png


--------------------------------------------------------------------------------
/figures_T_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_diff.png


--------------------------------------------------------------------------------
/figures_T_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ic.png


--------------------------------------------------------------------------------
/figures_T_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_T_50x/final_regret_ti.png


--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_diff.png


--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ic.png


--------------------------------------------------------------------------------
/figures_d_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/avg_regret_ti.png


--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_diff.png


--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ic.png


--------------------------------------------------------------------------------
/figures_d_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/cum_regret_ti.png


--------------------------------------------------------------------------------
/figures_d_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_diff.png


--------------------------------------------------------------------------------
/figures_d_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ic.png


--------------------------------------------------------------------------------
/figures_d_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_d_50x/final_regret_ti.png


--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_diff.png


--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ic.png


--------------------------------------------------------------------------------
/figures_k_50x/avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/avg_regret_ti.png


--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_diff.png


--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ic.png


--------------------------------------------------------------------------------
/figures_k_50x/cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/cum_regret_ti.png


--------------------------------------------------------------------------------
/figures_k_50x/final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_diff.png


--------------------------------------------------------------------------------
/figures_k_50x/final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ic.png


--------------------------------------------------------------------------------
/figures_k_50x/final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/figures_k_50x/final_regret_ti.png


--------------------------------------------------------------------------------
/paper/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | 
 14 | ## Intermediate documents:
 15 | *.dvi
 16 | *-converted-to.*
 17 | # these rules might exclude image files for figures etc.
 18 | # *.ps
 19 | # *.eps
 20 | # *.pdf
 21 | 
 22 | ## Generated if empty string is given at "Please type another file name for output:"
 23 | .pdf
 24 | 
 25 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 26 | *.bbl
 27 | *.bcf
 28 | *.blg
 29 | *-blx.aux
 30 | *-blx.bib
 31 | *.run.xml
 32 | 
 33 | ## Build tool auxiliary files:
 34 | *.fdb_latexmk
 35 | *.synctex
 36 | *.synctex(busy)
 37 | *.synctex.gz
 38 | *.synctex.gz(busy)
 39 | *.pdfsync
 40 | 
 41 | ## Auxiliary and intermediate files from other packages:
 42 | # algorithms
 43 | *.alg
 44 | *.loa
 45 | 
 46 | # achemso
 47 | acs-*.bib
 48 | 
 49 | # amsthm
 50 | *.thm
 51 | 
 52 | # beamer
 53 | *.nav
 54 | *.pre
 55 | *.snm
 56 | *.vrb
 57 | 
 58 | # changes
 59 | *.soc
 60 | 
 61 | # cprotect
 62 | *.cpt
 63 | 
 64 | # elsarticle (documentclass of Elsevier journals)
 65 | *.spl
 66 | 
 67 | # endnotes
 68 | *.ent
 69 | 
 70 | # fixme
 71 | *.lox
 72 | 
 73 | # feynmf/feynmp
 74 | *.mf
 75 | *.mp
 76 | *.t[1-9]
 77 | *.t[1-9][0-9]
 78 | *.tfm
 79 | 
 80 | #(r)(e)ledmac/(r)(e)ledpar
 81 | *.end
 82 | *.?end
 83 | *.[1-9]
 84 | *.[1-9][0-9]
 85 | *.[1-9][0-9][0-9]
 86 | *.[1-9]R
 87 | *.[1-9][0-9]R
 88 | *.[1-9][0-9][0-9]R
 89 | *.eledsec[1-9]
 90 | *.eledsec[1-9]R
 91 | *.eledsec[1-9][0-9]
 92 | *.eledsec[1-9][0-9]R
 93 | *.eledsec[1-9][0-9][0-9]
 94 | *.eledsec[1-9][0-9][0-9]R
 95 | 
 96 | # glossaries
 97 | *.acn
 98 | *.acr
 99 | *.glg
100 | *.glo
101 | *.gls
102 | *.glsdefs
103 | 
104 | # gnuplottex
105 | *-gnuplottex-*
106 | 
107 | # gregoriotex
108 | *.gaux
109 | *.gtex
110 | 
111 | # hyperref
112 | *.brf
113 | 
114 | # knitr
115 | *-concordance.tex
116 | # TODO Comment the next line if you want to keep your tikz graphics files
117 | *.tikz
118 | *-tikzDictionary
119 | 
120 | # listings
121 | *.lol
122 | 
123 | # makeidx
124 | *.idx
125 | *.ilg
126 | *.ind
127 | *.ist
128 | 
129 | # minitoc
130 | *.maf
131 | *.mlf
132 | *.mlt
133 | *.mtc[0-9]*
134 | *.slf[0-9]*
135 | *.slt[0-9]*
136 | *.stc[0-9]*
137 | 
138 | # minted
139 | _minted*
140 | *.pyg
141 | 
142 | # morewrites
143 | *.mw
144 | 
145 | # nomencl
146 | *.nlo
147 | 
148 | # pax
149 | *.pax
150 | 
151 | # pdfpcnotes
152 | *.pdfpc
153 | 
154 | # sagetex
155 | *.sagetex.sage
156 | *.sagetex.py
157 | *.sagetex.scmd
158 | 
159 | # scrwfile
160 | *.wrt
161 | 
162 | # sympy
163 | *.sout
164 | *.sympy
165 | sympy-plots-for-*.tex/
166 | 
167 | # pdfcomment
168 | *.upa
169 | *.upb
170 | 
171 | # pythontex
172 | *.pytxcode
173 | pythontex-files-*/
174 | 
175 | # thmtools
176 | *.loe
177 | 
178 | # TikZ & PGF
179 | *.dpth
180 | *.md5
181 | *.auxlock
182 | 
183 | # todonotes
184 | *.tdo
185 | 
186 | # easy-todo
187 | *.lod
188 | 
189 | # xindy
190 | *.xdy
191 | 
192 | # xypic precompiled matrices
193 | *.xyc
194 | 
195 | # endfloat
196 | *.ttt
197 | *.fff
198 | 
199 | # Latexian
200 | TSWLatexianTemp*
201 | 
202 | ## Editors:
203 | # WinEdt
204 | *.bak
205 | *.sav
206 | 
207 | # Texpad
208 | .texpadtmp
209 | 
210 | # Kile
211 | *.backup
212 | 
213 | # KBibTeX
214 | *~[0-9]*
215 | 
216 | # auto folder when using emacs and auctex
217 | /auto/*
218 | 
219 | # expex forward references with \gathertags
220 | *-tags.tex
221 | 


--------------------------------------------------------------------------------
/paper/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: prepare
 3 | 
 4 | prepare:
 5 | 	pdflatex paper.tex
 6 | 
 7 | view: prepare
 8 | 	open -a Skim paper.pdf
 9 | 
10 | develop: prepare
11 | 	fswatch -i 'paper.tex' -e '.*' . | xargs -t -n1 -I % bash -c "pdflatex % || osascript -e 'display notification \"Latex compilation failed\" with title \"ERROR\"'"
12 | 
13 | clean:
14 | 	rm -rf *.aux *.listing *.pdf *.out *.log
15 | 


--------------------------------------------------------------------------------
/paper/acl.bst:
--------------------------------------------------------------------------------
   1 | 
   2 | % BibTeX `acl' style file for BibTeX version 0.99c, LaTeX version 2.09
   3 | % This version was made by modifying `aaai-named' format based on the master
   4 | % file by Oren Patashnik (PATASHNIK@SCORE.STANFORD.EDU)
   5 | 
   6 | % Copyright (C) 1985, all rights reserved.
   7 | % Modifications Copyright 1988, Peter F. Patel-Schneider
   8 | % Further modifictions by Stuart Shieber, 1991, and Fernando Pereira, 1992.
   9 | % Copying of this file is authorized only if either
  10 | % (1) you make absolutely no changes to your copy, including name, or
  11 | % (2) if you do make changes, you name it something other than
  12 | % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
  13 | % This restriction helps ensure that all standard styles are identical.
  14 | 
  15 | % There are undoubtably bugs in this style.  If you make bug fixes,
  16 | % improvements, etc.  please let me know.  My e-mail address is:
  17 | %       pfps@spar.slb.com
  18 | 
  19 | %   Citation format: [author-last-name, year]
  20 | %                    [author-last-name and author-last-name, year]
  21 | %                    [author-last-name {\em et al.}, year]
  22 | %
  23 | %   Reference list ordering: alphabetical by author or whatever passes
  24 | %       for author in the absence of one.
  25 | %
  26 | % This BibTeX style has support for short (year only) citations.  This
  27 | % is done by having the citations actually look like
  28 | %         \citename{name-info, }year
  29 | % The LaTeX style has to have the following
  30 | %     \let\@internalcite\cite
  31 | %     \def\cite{\def\citename##1{##1}\@internalcite}
  32 | %     \def\shortcite{\def\citename##1{}\@internalcite}
  33 | %     \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
  34 | % which makes \shortcite the macro for short citations.
  35 | 
  36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  37 | % Changes made by SMS for thesis style
  38 | %   no emphasis on "et al."
  39 | %   "Ph.D." includes periods (not "PhD")
  40 | %   moved year to immediately after author's name
  41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  42 | ENTRY
  43 |   { address
  44 |     author
  45 |     booktitle
  46 |     chapter
  47 |     edition
  48 |     editor
  49 |     howpublished
  50 |     institution
  51 |     journal
  52 |     key
  53 |     month
  54 |     note
  55 |     number
  56 |     organization
  57 |     pages
  58 |     publisher
  59 |     school
  60 |     series
  61 |     title
  62 |     type
  63 |     volume
  64 |     year
  65 |   }
  66 |   {}
  67 |   { label extra.label sort.label }
  68 | 
  69 | INTEGERS { output.state before.all mid.sentence after.sentence after.block }
  70 | 
  71 | FUNCTION {init.state.consts}
  72 | { #0 'before.all :=
  73 |   #1 'mid.sentence :=
  74 |   #2 'after.sentence :=
  75 |   #3 'after.block :=
  76 | }
  77 | 
  78 | STRINGS { s t }
  79 | 
  80 | FUNCTION {output.nonnull}
  81 | { 's :=
  82 |   output.state mid.sentence =
  83 |     { ", " * write$ }
  84 |     { output.state after.block =
  85 |         { add.period$ write$
  86 |           newline$
  87 |           "\newblock " write$
  88 |         }
  89 |         { output.state before.all =
  90 |             'write$
  91 |             { add.period$ " " * write$ }
  92 |           if$
  93 |         }
  94 |       if$
  95 |       mid.sentence 'output.state :=
  96 |     }
  97 |   if$
  98 |   s
  99 | }
 100 | 
 101 | FUNCTION {output}
 102 | { duplicate$ empty$
 103 |     'pop$
 104 |     'output.nonnull
 105 |   if$
 106 | }
 107 | 
 108 | FUNCTION {output.check}
 109 | { 't :=
 110 |   duplicate$ empty$
 111 |     { pop$ "empty " t * " in " * cite$ * warning$ }
 112 |     'output.nonnull
 113 |   if$
 114 | }
 115 | 
 116 | FUNCTION {output.bibitem}
 117 | { newline$
 118 | 
 119 |   "\bibitem[" write$
 120 |   label write$
 121 |   "]{" write$
 122 | 
 123 |   cite$ write$
 124 |   "}" write$
 125 |   newline$
 126 |   ""
 127 |   before.all 'output.state :=
 128 | }
 129 | 
 130 | FUNCTION {fin.entry}
 131 | { add.period$
 132 |   write$
 133 |   newline$
 134 | }
 135 | 
 136 | FUNCTION {new.block}
 137 | { output.state before.all =
 138 |     'skip$
 139 |     { after.block 'output.state := }
 140 |   if$
 141 | }
 142 | 
 143 | FUNCTION {new.sentence}
 144 | { output.state after.block =
 145 |     'skip$
 146 |     { output.state before.all =
 147 |         'skip$
 148 |         { after.sentence 'output.state := }
 149 |       if$
 150 |     }
 151 |   if$
 152 | }
 153 | 
 154 | FUNCTION {not}
 155 | {   { #0 }
 156 |     { #1 }
 157 |   if$
 158 | }
 159 | 
 160 | FUNCTION {and}
 161 | {   'skip$
 162 |     { pop$ #0 }
 163 |   if$
 164 | }
 165 | 
 166 | FUNCTION {or}
 167 | {   { pop$ #1 }
 168 |     'skip$
 169 |   if$
 170 | }
 171 | 
 172 | FUNCTION {new.block.checka}
 173 | { empty$
 174 |     'skip$
 175 |     'new.block
 176 |   if$
 177 | }
 178 | 
 179 | FUNCTION {new.block.checkb}
 180 | { empty$
 181 |   swap$ empty$
 182 |   and
 183 |     'skip$
 184 |     'new.block
 185 |   if$
 186 | }
 187 | 
 188 | FUNCTION {new.sentence.checka}
 189 | { empty$
 190 |     'skip$
 191 |     'new.sentence
 192 |   if$
 193 | }
 194 | 
 195 | FUNCTION {new.sentence.checkb}
 196 | { empty$
 197 |   swap$ empty$
 198 |   and
 199 |     'skip$
 200 |     'new.sentence
 201 |   if$
 202 | }
 203 | 
 204 | FUNCTION {field.or.null}
 205 | { duplicate$ empty$
 206 |     { pop$ "" }
 207 |     'skip$
 208 |   if$
 209 | }
 210 | 
 211 | FUNCTION {emphasize}
 212 | { duplicate$ empty$
 213 |     { pop$ "" }
 214 |     { "{\em " swap$ * "}" * }
 215 |   if$
 216 | }
 217 | 
 218 | INTEGERS { nameptr namesleft numnames }
 219 | 
 220 | FUNCTION {format.names}
 221 | { 's :=
 222 |   #1 'nameptr :=
 223 |   s num.names$ 'numnames :=
 224 |   numnames 'namesleft :=
 225 |     { namesleft #0 > }
 226 | 
 227 |     { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't :=
 228 | 
 229 |       nameptr #1 >
 230 |         { namesleft #1 >
 231 |             { ", " * t * }
 232 |             { numnames #2 >
 233 |                 { "," * }
 234 |                 'skip$
 235 |               if$
 236 |               t "others" =
 237 |                 { " et~al." * }
 238 |                 { " and " * t * }
 239 |               if$
 240 |             }
 241 |           if$
 242 |         }
 243 |         't
 244 |       if$
 245 |       nameptr #1 + 'nameptr :=
 246 |       namesleft #1 - 'namesleft :=
 247 |     }
 248 |   while$
 249 | }
 250 | 
 251 | FUNCTION {format.authors}
 252 | { author empty$
 253 |     { "" }
 254 |     { author format.names }
 255 |   if$
 256 | }
 257 | 
 258 | FUNCTION {format.editors}
 259 | { editor empty$
 260 |     { "" }
 261 |     { editor format.names
 262 |       editor num.names$ #1 >
 263 |         { ", editors" * }
 264 |         { ", editor" * }
 265 |       if$
 266 |     }
 267 |   if$
 268 | }
 269 | 
 270 | FUNCTION {format.title}
 271 | { title empty$
 272 |     { "" }
 273 | 
 274 |     { title "t" change.case$ }
 275 | 
 276 |   if$
 277 | }
 278 | 
 279 | FUNCTION {n.dashify}
 280 | { 't :=
 281 |   ""
 282 |     { t empty$ not }
 283 |     { t #1 #1 substring$ "-" =
 284 |         { t #1 #2 substring$ "--" = not
 285 |             { "--" *
 286 |               t #2 global.max$ substring$ 't :=
 287 |             }
 288 |             {   { t #1 #1 substring$ "-" = }
 289 |                 { "-" *
 290 |                   t #2 global.max$ substring$ 't :=
 291 |                 }
 292 |               while$
 293 |             }
 294 |           if$
 295 |         }
 296 |         { t #1 #1 substring$ *
 297 |           t #2 global.max$ substring$ 't :=
 298 |         }
 299 |       if$
 300 |     }
 301 |   while$
 302 | }
 303 | 
 304 | FUNCTION {format.date}
 305 | { year empty$
 306 |     { month empty$
 307 |         { "" }
 308 |         { "there's a month but no year in " cite$ * warning$
 309 |           month
 310 |         }
 311 |       if$
 312 |     }
 313 |     { month empty$
 314 |         { "" }
 315 |         { month }
 316 |       if$
 317 |     }
 318 |   if$
 319 | }
 320 | 
 321 | FUNCTION {format.btitle}
 322 | { title emphasize
 323 | }
 324 | 
 325 | FUNCTION {tie.or.space.connect}
 326 | { duplicate$ text.length$ #3 <
 327 |     { "~" }
 328 |     { " " }
 329 |   if$
 330 |   swap$ * *
 331 | }
 332 | 
 333 | FUNCTION {either.or.check}
 334 | { empty$
 335 |     'pop$
 336 |     { "can't use both " swap$ * " fields in " * cite$ * warning$ }
 337 |   if$
 338 | }
 339 | 
 340 | FUNCTION {format.bvolume}
 341 | { volume empty$
 342 |     { "" }
 343 |     { "volume" volume tie.or.space.connect
 344 |       series empty$
 345 |         'skip$
 346 |         { " of " * series emphasize * }
 347 |       if$
 348 |       "volume and number" number either.or.check
 349 |     }
 350 |   if$
 351 | }
 352 | 
 353 | FUNCTION {format.number.series}
 354 | { volume empty$
 355 |     { number empty$
 356 |         { series field.or.null }
 357 |         { output.state mid.sentence =
 358 |             { "number" }
 359 |             { "Number" }
 360 |           if$
 361 |           number tie.or.space.connect
 362 |           series empty$
 363 |             { "there's a number but no series in " cite$ * warning$ }
 364 |             { " in " * series * }
 365 |           if$
 366 |         }
 367 |       if$
 368 |     }
 369 |     { "" }
 370 |   if$
 371 | }
 372 | 
 373 | FUNCTION {format.edition}
 374 | { edition empty$
 375 |     { "" }
 376 |     { output.state mid.sentence =
 377 |         { edition "l" change.case$ " edition" * }
 378 |         { edition "t" change.case$ " edition" * }
 379 |       if$
 380 |     }
 381 |   if$
 382 | }
 383 | 
 384 | INTEGERS { multiresult }
 385 | 
 386 | FUNCTION {multi.page.check}
 387 | { 't :=
 388 |   #0 'multiresult :=
 389 |     { multiresult not
 390 |       t empty$ not
 391 |       and
 392 |     }
 393 |     { t #1 #1 substring$
 394 |       duplicate$ "-" =
 395 |       swap$ duplicate$ "," =
 396 |       swap$ "+" =
 397 |       or or
 398 |         { #1 'multiresult := }
 399 |         { t #2 global.max$ substring$ 't := }
 400 |       if$
 401 |     }
 402 |   while$
 403 |   multiresult
 404 | }
 405 | 
 406 | FUNCTION {format.pages}
 407 | { pages empty$
 408 |     { "" }
 409 |     { pages multi.page.check
 410 |         { "pages" pages n.dashify tie.or.space.connect }
 411 |         { "page" pages tie.or.space.connect }
 412 |       if$
 413 |     }
 414 |   if$
 415 | }
 416 | 
 417 | FUNCTION {format.year.label}
 418 | { year extra.label *
 419 | }
 420 | 
 421 | FUNCTION {format.vol.num.pages}
 422 | { volume field.or.null
 423 |   number empty$
 424 |     'skip$
 425 |     { "(" number * ")" * *
 426 |       volume empty$
 427 |         { "there's a number but no volume in " cite$ * warning$ }
 428 |         'skip$
 429 |       if$
 430 |     }
 431 |   if$
 432 |   pages empty$
 433 |     'skip$
 434 |     { duplicate$ empty$
 435 |         { pop$ format.pages }
 436 |         { ":" * pages n.dashify * }
 437 |       if$
 438 |     }
 439 |   if$
 440 | }
 441 | 
 442 | FUNCTION {format.chapter.pages}
 443 | { chapter empty$
 444 |     'format.pages
 445 |     { type empty$
 446 |         { "chapter" }
 447 |         { type "l" change.case$ }
 448 |       if$
 449 |       chapter tie.or.space.connect
 450 |       pages empty$
 451 |         'skip$
 452 |         { ", " * format.pages * }
 453 |       if$
 454 |     }
 455 |   if$
 456 | }
 457 | 
 458 | FUNCTION {format.in.ed.booktitle}
 459 | { booktitle empty$
 460 |     { "" }
 461 |     { editor empty$
 462 |         { "In " booktitle emphasize * }
 463 |         { "In " format.editors * ", " * booktitle emphasize * }
 464 |       if$
 465 |     }
 466 |   if$
 467 | }
 468 | 
 469 | FUNCTION {empty.misc.check}
 470 | { author empty$ title empty$ howpublished empty$
 471 |   month empty$ year empty$ note empty$
 472 |   and and and and and
 473 | 
 474 |   key empty$ not and
 475 | 
 476 |     { "all relevant fields are empty in " cite$ * warning$ }
 477 |     'skip$
 478 |   if$
 479 | }
 480 | 
 481 | FUNCTION {format.thesis.type}
 482 | { type empty$
 483 |     'skip$
 484 |     { pop$
 485 |       type "t" change.case$
 486 |     }
 487 |   if$
 488 | }
 489 | 
 490 | FUNCTION {format.tr.number}
 491 | { type empty$
 492 |     { "Technical Report" }
 493 |     'type
 494 |   if$
 495 |   number empty$
 496 |     { "t" change.case$ }
 497 |     { number tie.or.space.connect }
 498 |   if$
 499 | }
 500 | 
 501 | FUNCTION {format.article.crossref}
 502 | { key empty$
 503 |     { journal empty$
 504 |         { "need key or journal for " cite$ * " to crossref " * crossref *
 505 |           warning$
 506 |           ""
 507 |         }
 508 |         { "In {\em " journal * "\/}" * }
 509 |       if$
 510 |     }
 511 |     { "In " key * }
 512 |   if$
 513 |   " \cite{" * crossref * "}" *
 514 | }
 515 | 
 516 | FUNCTION {format.crossref.editor}
 517 | { editor #1 "{vv~}{ll}" format.name$
 518 |   editor num.names$ duplicate$
 519 |   #2 >
 520 |     { pop$ " et~al." * }
 521 |     { #2 <
 522 |         'skip$
 523 |         { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
 524 |             { " et~al." * }
 525 |             { " and " * editor #2 "{vv~}{ll}" format.name$ * }
 526 |           if$
 527 |         }
 528 |       if$
 529 |     }
 530 |   if$
 531 | }
 532 | 
 533 | FUNCTION {format.book.crossref}
 534 | { volume empty$
 535 |     { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
 536 |       "In "
 537 |     }
 538 |     { "Volume" volume tie.or.space.connect
 539 |       " of " *
 540 |     }
 541 |   if$
 542 |   editor empty$
 543 |   editor field.or.null author field.or.null =
 544 |   or
 545 |     { key empty$
 546 |         { series empty$
 547 |             { "need editor, key, or series for " cite$ * " to crossref " *
 548 |               crossref * warning$
 549 |               "" *
 550 |             }
 551 |             { "{\em " * series * "\/}" * }
 552 |           if$
 553 |         }
 554 |         { key * }
 555 |       if$
 556 |     }
 557 |     { format.crossref.editor * }
 558 |   if$
 559 |   " \cite{" * crossref * "}" *
 560 | }
 561 | 
 562 | FUNCTION {format.incoll.inproc.crossref}
 563 | { editor empty$
 564 |   editor field.or.null author field.or.null =
 565 |   or
 566 |     { key empty$
 567 |         { booktitle empty$
 568 |             { "need editor, key, or booktitle for " cite$ * " to crossref " *
 569 |               crossref * warning$
 570 |               ""
 571 |             }
 572 |             { "In {\em " booktitle * "\/}" * }
 573 |           if$
 574 |         }
 575 |         { "In " key * }
 576 |       if$
 577 |     }
 578 |     { "In " format.crossref.editor * }
 579 |   if$
 580 |   " \cite{" * crossref * "}" *
 581 | }
 582 | 
 583 | FUNCTION {article}
 584 | { output.bibitem
 585 |   format.authors "author" output.check
 586 |   new.block
 587 |   format.year.label "year" output.check
 588 |   new.block
 589 |   format.title "title" output.check
 590 |   new.block
 591 |   crossref missing$
 592 |     { journal emphasize "journal" output.check
 593 |       format.vol.num.pages output
 594 |       format.date output
 595 |     }
 596 |     { format.article.crossref output.nonnull
 597 |       format.pages output
 598 |     }
 599 |   if$
 600 |   new.block
 601 |   note output
 602 |   fin.entry
 603 | }
 604 | 
 605 | FUNCTION {book}
 606 | { output.bibitem
 607 |   author empty$
 608 |     { format.editors "author and editor" output.check }
 609 |     { format.authors output.nonnull
 610 |       crossref missing$
 611 |         { "author and editor" editor either.or.check }
 612 |         'skip$
 613 |       if$
 614 |     }
 615 |   if$
 616 |   new.block
 617 |   format.year.label "year" output.check
 618 |   new.block
 619 |   format.btitle "title" output.check
 620 |   crossref missing$
 621 |     { format.bvolume output
 622 |       new.block
 623 |       format.number.series output
 624 |       new.sentence
 625 |       publisher "publisher" output.check
 626 |       address output
 627 |     }
 628 |     { new.block
 629 |       format.book.crossref output.nonnull
 630 |     }
 631 |   if$
 632 |   format.edition output
 633 |   format.date output
 634 |   new.block
 635 |   note output
 636 |   fin.entry
 637 | }
 638 | 
 639 | FUNCTION {booklet}
 640 | { output.bibitem
 641 |   format.authors output
 642 |   new.block
 643 |   format.year.label "year" output.check
 644 |   new.block
 645 |   format.title "title" output.check
 646 |   howpublished address new.block.checkb
 647 |   howpublished output
 648 |   address output
 649 |   format.date output
 650 |   new.block
 651 |   note output
 652 |   fin.entry
 653 | }
 654 | 
 655 | FUNCTION {inbook}
 656 | { output.bibitem
 657 |   author empty$
 658 |     { format.editors "author and editor" output.check }
 659 |     { format.authors output.nonnull
 660 |       crossref missing$
 661 |         { "author and editor" editor either.or.check }
 662 |         'skip$
 663 |       if$
 664 |     }
 665 |   if$
 666 |   format.year.label "year" output.check
 667 |   new.block
 668 |   new.block
 669 |   format.btitle "title" output.check
 670 |   crossref missing$
 671 |     { format.bvolume output
 672 |       format.chapter.pages "chapter and pages" output.check
 673 |       new.block
 674 |       format.number.series output
 675 |       new.sentence
 676 |       publisher "publisher" output.check
 677 |       address output
 678 |     }
 679 |     { format.chapter.pages "chapter and pages" output.check
 680 |       new.block
 681 |       format.book.crossref output.nonnull
 682 |     }
 683 |   if$
 684 |   format.edition output
 685 |   format.date output
 686 |   new.block
 687 |   note output
 688 |   fin.entry
 689 | }
 690 | 
 691 | FUNCTION {incollection}
 692 | { output.bibitem
 693 |   format.authors "author" output.check
 694 |   new.block
 695 |   format.year.label "year" output.check
 696 |   new.block
 697 |   format.title "title" output.check
 698 |   new.block
 699 |   crossref missing$
 700 |     { format.in.ed.booktitle "booktitle" output.check
 701 |       format.bvolume output
 702 |       format.number.series output
 703 |       format.chapter.pages output
 704 |       new.sentence
 705 |       publisher "publisher" output.check
 706 |       address output
 707 |       format.edition output
 708 |       format.date output
 709 |     }
 710 |     { format.incoll.inproc.crossref output.nonnull
 711 |       format.chapter.pages output
 712 |     }
 713 |   if$
 714 |   new.block
 715 |   note output
 716 |   fin.entry
 717 | }
 718 | 
 719 | FUNCTION {inproceedings}
 720 | { output.bibitem
 721 |   format.authors "author" output.check
 722 |   new.block
 723 |   format.year.label "year" output.check
 724 |   new.block
 725 |   format.title "title" output.check
 726 |   new.block
 727 |   crossref missing$
 728 |     { format.in.ed.booktitle "booktitle" output.check
 729 |       format.bvolume output
 730 |       format.number.series output
 731 |       format.pages output
 732 |       address empty$
 733 |         { organization publisher new.sentence.checkb
 734 |           organization output
 735 |           publisher output
 736 |           format.date output
 737 |         }
 738 |         { address output.nonnull
 739 |           format.date output
 740 |           new.sentence
 741 |           organization output
 742 |           publisher output
 743 |         }
 744 |       if$
 745 |     }
 746 |     { format.incoll.inproc.crossref output.nonnull
 747 |       format.pages output
 748 |     }
 749 |   if$
 750 |   new.block
 751 |   note output
 752 |   fin.entry
 753 | }
 754 | 
 755 | FUNCTION {conference} { inproceedings }
 756 | 
 757 | FUNCTION {manual}
 758 | { output.bibitem
 759 |   author empty$
 760 |     { organization empty$
 761 |         'skip$
 762 |         { organization output.nonnull
 763 |           address output
 764 |         }
 765 |       if$
 766 |     }
 767 |     { format.authors output.nonnull }
 768 |   if$
 769 |   format.year.label "year" output.check
 770 |   new.block
 771 |   new.block
 772 |   format.btitle "title" output.check
 773 |   author empty$
 774 |     { organization empty$
 775 |         { address new.block.checka
 776 |           address output
 777 |         }
 778 |         'skip$
 779 |       if$
 780 |     }
 781 |     { organization address new.block.checkb
 782 |       organization output
 783 |       address output
 784 |     }
 785 |   if$
 786 |   format.edition output
 787 |   format.date output
 788 |   new.block
 789 |   note output
 790 |   fin.entry
 791 | }
 792 | 
 793 | FUNCTION {mastersthesis}
 794 | { output.bibitem
 795 |   format.authors "author" output.check
 796 |   new.block
 797 |   format.year.label "year" output.check
 798 |   new.block
 799 |   format.title "title" output.check
 800 |   new.block
 801 |   "Master's thesis" format.thesis.type output.nonnull
 802 |   school "school" output.check
 803 |   address output
 804 |   format.date output
 805 |   new.block
 806 |   note output
 807 |   fin.entry
 808 | }
 809 | 
 810 | FUNCTION {misc}
 811 | { output.bibitem
 812 |   format.authors output 
 813 |   new.block
 814 |   format.year.label output
 815 |   new.block
 816 |   title howpublished new.block.checkb
 817 |   format.title output
 818 |   howpublished new.block.checka
 819 |   howpublished output
 820 |   format.date output
 821 |   new.block
 822 |   note output
 823 |   fin.entry
 824 |   empty.misc.check
 825 | }
 826 | 
 827 | FUNCTION {phdthesis}
 828 | { output.bibitem
 829 |   format.authors "author" output.check
 830 |   new.block
 831 |   format.year.label "year" output.check
 832 |   new.block
 833 |   format.btitle "title" output.check
 834 |   new.block
 835 |   "{Ph.D.} thesis" format.thesis.type output.nonnull
 836 |   school "school" output.check
 837 |   address output
 838 |   format.date output
 839 |   new.block
 840 |   note output
 841 |   fin.entry
 842 | }
 843 | 
 844 | FUNCTION {proceedings}
 845 | { output.bibitem
 846 |   editor empty$
 847 |     { organization output }
 848 |     { format.editors output.nonnull }
 849 |   if$
 850 |   new.block
 851 |   format.year.label "year" output.check
 852 |   new.block
 853 |   format.btitle "title" output.check
 854 |   format.bvolume output
 855 |   format.number.series output
 856 |   address empty$
 857 |     { editor empty$
 858 |         { publisher new.sentence.checka }
 859 |         { organization publisher new.sentence.checkb
 860 |           organization output
 861 |         }
 862 |       if$
 863 |       publisher output
 864 |       format.date output
 865 |     }
 866 |     { address output.nonnull
 867 |       format.date output
 868 |       new.sentence
 869 |       editor empty$
 870 |         'skip$
 871 |         { organization output }
 872 |       if$
 873 |       publisher output
 874 |     }
 875 |   if$
 876 |   new.block
 877 |   note output
 878 |   fin.entry
 879 | }
 880 | 
 881 | FUNCTION {techreport}
 882 | { output.bibitem
 883 |   format.authors "author" output.check
 884 |   new.block
 885 |   format.year.label "year" output.check
 886 |   new.block
 887 |   format.title "title" output.check
 888 |   new.block
 889 |   format.tr.number output.nonnull
 890 |   institution "institution" output.check
 891 |   address output
 892 |   format.date output
 893 |   new.block
 894 |   note output
 895 |   fin.entry
 896 | }
 897 | 
 898 | FUNCTION {unpublished}
 899 | { output.bibitem
 900 |   format.authors "author" output.check
 901 |   new.block
 902 |   format.year.label "year" output.check
 903 |   new.block
 904 |   format.title "title" output.check
 905 |   new.block
 906 |   note "note" output.check
 907 |   format.date output
 908 |   fin.entry
 909 | }
 910 | 
 911 | FUNCTION {default.type} { misc }
 912 | 
 913 | MACRO {jan} {"January"}
 914 | 
 915 | MACRO {feb} {"February"}
 916 | 
 917 | MACRO {mar} {"March"}
 918 | 
 919 | MACRO {apr} {"April"}
 920 | 
 921 | MACRO {may} {"May"}
 922 | 
 923 | MACRO {jun} {"June"}
 924 | 
 925 | MACRO {jul} {"July"}
 926 | 
 927 | MACRO {aug} {"August"}
 928 | 
 929 | MACRO {sep} {"September"}
 930 | 
 931 | MACRO {oct} {"October"}
 932 | 
 933 | MACRO {nov} {"November"}
 934 | 
 935 | MACRO {dec} {"December"}
 936 | 
 937 | MACRO {acmcs} {"ACM Computing Surveys"}
 938 | 
 939 | MACRO {acta} {"Acta Informatica"}
 940 | 
 941 | MACRO {cacm} {"Communications of the ACM"}
 942 | 
 943 | MACRO {ibmjrd} {"IBM Journal of Research and Development"}
 944 | 
 945 | MACRO {ibmsj} {"IBM Systems Journal"}
 946 | 
 947 | MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
 948 | 
 949 | MACRO {ieeetc} {"IEEE Transactions on Computers"}
 950 | 
 951 | MACRO {ieeetcad}
 952 |  {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
 953 | 
 954 | MACRO {ipl} {"Information Processing Letters"}
 955 | 
 956 | MACRO {jacm} {"Journal of the ACM"}
 957 | 
 958 | MACRO {jcss} {"Journal of Computer and System Sciences"}
 959 | 
 960 | MACRO {scp} {"Science of Computer Programming"}
 961 | 
 962 | MACRO {sicomp} {"SIAM Journal on Computing"}
 963 | 
 964 | MACRO {tocs} {"ACM Transactions on Computer Systems"}
 965 | 
 966 | MACRO {tods} {"ACM Transactions on Database Systems"}
 967 | 
 968 | MACRO {tog} {"ACM Transactions on Graphics"}
 969 | 
 970 | MACRO {toms} {"ACM Transactions on Mathematical Software"}
 971 | 
 972 | MACRO {toois} {"ACM Transactions on Office Information Systems"}
 973 | 
 974 | MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
 975 | 
 976 | MACRO {tcs} {"Theoretical Computer Science"}
 977 | 
 978 | READ
 979 | 
 980 | FUNCTION {sortify}
 981 | { purify$
 982 |   "l" change.case$
 983 | }
 984 | 
 985 | INTEGERS { len }
 986 | 
 987 | FUNCTION {chop.word}
 988 | { 's :=
 989 |   'len :=
 990 |   s #1 len substring$ =
 991 |     { s len #1 + global.max$ substring$ }
 992 |     's
 993 |   if$
 994 | }
 995 | 
 996 | INTEGERS { et.al.char.used }
 997 | 
 998 | FUNCTION {initialize.et.al.char.used}
 999 | { #0 'et.al.char.used :=
1000 | }
1001 | 
1002 | EXECUTE {initialize.et.al.char.used}
1003 | 
1004 | FUNCTION {format.lab.names}
1005 | { 's :=
1006 |   s num.names$ 'numnames :=
1007 | 
1008 |   numnames #1 =
1009 |     { s #1 "{vv }{ll}" format.name$ }
1010 |     { numnames #2 =
1011 |         { s #1 "{vv }{ll }and " format.name$ s #2 "{vv }{ll}" format.name$ *
1012 |         }
1013 |         { s #1 "{vv }{ll }\bgroup et al.\egroup " format.name$ }
1014 |       if$
1015 |     }
1016 |   if$
1017 | 
1018 | }
1019 | 
1020 | FUNCTION {author.key.label}
1021 | { author empty$
1022 |     { key empty$
1023 | 
1024 |         { cite$ #1 #3 substring$ }
1025 | 
1026 |         { key #3 text.prefix$ }
1027 |       if$
1028 |     }
1029 |     { author format.lab.names }
1030 |   if$
1031 | }
1032 | 
1033 | FUNCTION {author.editor.key.label}
1034 | { author empty$
1035 |     { editor empty$
1036 |         { key empty$
1037 | 
1038 |             { cite$ #1 #3 substring$ }
1039 | 
1040 |             { key #3 text.prefix$ }
1041 |           if$
1042 |         }
1043 |         { editor format.lab.names }
1044 |       if$
1045 |     }
1046 |     { author format.lab.names }
1047 |   if$
1048 | }
1049 | 
1050 | FUNCTION {author.key.organization.label}
1051 | { author empty$
1052 |     { key empty$
1053 |         { organization empty$
1054 | 
1055 |             { cite$ #1 #3 substring$ }
1056 | 
1057 |             { "The " #4 organization chop.word #3 text.prefix$ }
1058 |           if$
1059 |         }
1060 |         { key #3 text.prefix$ }
1061 |       if$
1062 |     }
1063 |     { author format.lab.names }
1064 |   if$
1065 | }
1066 | 
1067 | FUNCTION {editor.key.organization.label}
1068 | { editor empty$
1069 |     { key empty$
1070 |         { organization empty$
1071 | 
1072 |             { cite$ #1 #3 substring$ }
1073 | 
1074 |             { "The " #4 organization chop.word #3 text.prefix$ }
1075 |           if$
1076 |         }
1077 |         { key #3 text.prefix$ }
1078 |       if$
1079 |     }
1080 |     { editor format.lab.names }
1081 |   if$
1082 | }
1083 | 
1084 | FUNCTION {calc.label}
1085 | { type$ "book" =
1086 |   type$ "inbook" =
1087 |   or
1088 |     'author.editor.key.label
1089 |     { type$ "proceedings" =
1090 |         'editor.key.organization.label
1091 |         { type$ "manual" =
1092 |             'author.key.organization.label
1093 |             'author.key.label
1094 |           if$
1095 |         }
1096 |       if$
1097 |     }
1098 |   if$
1099 |   duplicate$
1100 | 
1101 |   "\protect\citename{" swap$ * "}" *
1102 |   year field.or.null purify$ *
1103 |   'label :=
1104 |   year field.or.null purify$ *
1105 | 
1106 |   sortify 'sort.label :=
1107 | }
1108 | 
1109 | FUNCTION {sort.format.names}
1110 | { 's :=
1111 |   #1 'nameptr :=
1112 |   ""
1113 |   s num.names$ 'numnames :=
1114 |   numnames 'namesleft :=
1115 |     { namesleft #0 > }
1116 |     { nameptr #1 >
1117 |         { "   " * }
1118 |         'skip$
1119 |       if$
1120 | 
1121 |       s nameptr "{vv{ } }{ll{ }}{  ff{ }}{  jj{ }}" format.name$ 't :=
1122 | 
1123 |       nameptr numnames = t "others" = and
1124 |         { "et al" * }
1125 |         { t sortify * }
1126 |       if$
1127 |       nameptr #1 + 'nameptr :=
1128 |       namesleft #1 - 'namesleft :=
1129 |     }
1130 |   while$
1131 | }
1132 | 
1133 | FUNCTION {sort.format.title}
1134 | { 't :=
1135 |   "A " #2
1136 |     "An " #3
1137 |       "The " #4 t chop.word
1138 |     chop.word
1139 |   chop.word
1140 |   sortify
1141 |   #1 global.max$ substring$
1142 | }
1143 | 
1144 | FUNCTION {author.sort}
1145 | { author empty$
1146 |     { key empty$
1147 |         { "to sort, need author or key in " cite$ * warning$
1148 |           ""
1149 |         }
1150 |         { key sortify }
1151 |       if$
1152 |     }
1153 |     { author sort.format.names }
1154 |   if$
1155 | }
1156 | 
1157 | FUNCTION {author.editor.sort}
1158 | { author empty$
1159 |     { editor empty$
1160 |         { key empty$
1161 |             { "to sort, need author, editor, or key in " cite$ * warning$
1162 |               ""
1163 |             }
1164 |             { key sortify }
1165 |           if$
1166 |         }
1167 |         { editor sort.format.names }
1168 |       if$
1169 |     }
1170 |     { author sort.format.names }
1171 |   if$
1172 | }
1173 | 
1174 | FUNCTION {author.organization.sort}
1175 | { author empty$
1176 |     { organization empty$
1177 |         { key empty$
1178 |             { "to sort, need author, organization, or key in " cite$ * warning$
1179 |               ""
1180 |             }
1181 |             { key sortify }
1182 |           if$
1183 |         }
1184 |         { "The " #4 organization chop.word sortify }
1185 |       if$
1186 |     }
1187 |     { author sort.format.names }
1188 |   if$
1189 | }
1190 | 
1191 | FUNCTION {editor.organization.sort}
1192 | { editor empty$
1193 |     { organization empty$
1194 |         { key empty$
1195 |             { "to sort, need editor, organization, or key in " cite$ * warning$
1196 |               ""
1197 |             }
1198 |             { key sortify }
1199 |           if$
1200 |         }
1201 |         { "The " #4 organization chop.word sortify }
1202 |       if$
1203 |     }
1204 |     { editor sort.format.names }
1205 |   if$
1206 | }
1207 | 
1208 | FUNCTION {presort}
1209 | 
1210 | { calc.label
1211 |   sort.label
1212 |   "    "
1213 |   *
1214 |   type$ "book" =
1215 | 
1216 |   type$ "inbook" =
1217 |   or
1218 |     'author.editor.sort
1219 |     { type$ "proceedings" =
1220 |         'editor.organization.sort
1221 |         { type$ "manual" =
1222 |             'author.organization.sort
1223 |             'author.sort
1224 |           if$
1225 |         }
1226 |       if$
1227 |     }
1228 |   if$
1229 | 
1230 |   *
1231 | 
1232 |   "    "
1233 |   *
1234 |   year field.or.null sortify
1235 |   *
1236 |   "    "
1237 |   *
1238 |   title field.or.null
1239 |   sort.format.title
1240 |   *
1241 |   #1 entry.max$ substring$
1242 |   'sort.key$ :=
1243 | }
1244 | 
1245 | ITERATE {presort}
1246 | 
1247 | SORT
1248 | 
1249 | STRINGS { longest.label last.sort.label next.extra }
1250 | 
1251 | INTEGERS { longest.label.width last.extra.num }
1252 | 
1253 | FUNCTION {initialize.longest.label}
1254 | { "" 'longest.label :=
1255 |   #0 int.to.chr$ 'last.sort.label :=
1256 |   "" 'next.extra :=
1257 |   #0 'longest.label.width :=
1258 |   #0 'last.extra.num :=
1259 | }
1260 | 
1261 | FUNCTION {forward.pass}
1262 | { last.sort.label sort.label =
1263 |     { last.extra.num #1 + 'last.extra.num :=
1264 |       last.extra.num int.to.chr$ 'extra.label :=
1265 |     }
1266 |     { "a" chr.to.int$ 'last.extra.num :=
1267 |       "" 'extra.label :=
1268 |       sort.label 'last.sort.label :=
1269 |     }
1270 |   if$
1271 | }
1272 | 
1273 | FUNCTION {reverse.pass}
1274 | { next.extra "b" =
1275 |     { "a" 'extra.label := }
1276 |     'skip$
1277 |   if$
1278 |   label extra.label * 'label :=
1279 |   label width$ longest.label.width >
1280 |     { label 'longest.label :=
1281 |       label width$ 'longest.label.width :=
1282 |     }
1283 |     'skip$
1284 |   if$
1285 |   extra.label 'next.extra :=
1286 | }
1287 | 
1288 | EXECUTE {initialize.longest.label}
1289 | 
1290 | ITERATE {forward.pass}
1291 | 
1292 | REVERSE {reverse.pass}
1293 | 
1294 | FUNCTION {begin.bib}
1295 | 
1296 | { et.al.char.used
1297 |     { "\newcommand{\etalchar}[1]{$^{#1}$}" write$ newline$ }
1298 |     'skip$
1299 |   if$
1300 |   preamble$ empty$
1301 | 
1302 |     'skip$
1303 |     { preamble$ write$ newline$ }
1304 |   if$
1305 | 
1306 |   "\begin{thebibliography}{" "}" * write$ newline$
1307 | 
1308 | }
1309 | 
1310 | EXECUTE {begin.bib}
1311 | 
1312 | EXECUTE {init.state.consts}
1313 | 
1314 | ITERATE {call.type$}
1315 | 
1316 | FUNCTION {end.bib}
1317 | { newline$
1318 |   "\end{thebibliography}" write$ newline$
1319 | }
1320 | 
1321 | EXECUTE {end.bib}
1322 | 
1323 | 


--------------------------------------------------------------------------------
/paper/acl2015.sty:
--------------------------------------------------------------------------------
  1 | % File acl2015.sty
  2 | % December 2014
  3 | 
  4 | % This is the LaTeX style file for ACL 2015. It is nearly identical to
  5 | % the style files for ACL 2014, EACL 2006, ACL2005, ACL 2002, ACL
  6 | % 2001, ACL 2000, EACL 95 and EACL 99. 
  7 | %
  8 | % Changes made include: adapt layout to A4 and centimeters, widen abstract
  9 | 
 10 | % This is the LaTeX style file for ACL 2000.  It is nearly identical to the
 11 | % style files for EACL 95 and EACL 99.  Minor changes include editing the
 12 | % instructions to reflect use of \documentclass rather than \documentstyle
 13 | % and removing the white space before the title on the first page
 14 | % -- John Chen, June 29, 2000
 15 | 
 16 | % To convert from submissions prepared using the style file aclsub.sty
 17 | % prepared for the ACL 2000 conference, proceed as follows:
 18 | % 1) Remove submission-specific information:  \whichsession, \id,
 19 | %    \wordcount, \otherconferences, \area, \keywords
 20 | % 2) \summary should be removed.  The summary material should come
 21 | %     after \maketitle and should be in the ``abstract'' environment
 22 | % 3) Check all citations.  This style should handle citations correctly
 23 | %    and also allows multiple citations separated by semicolons.
 24 | % 4) Check figures and examples.  Because the final format is double-
 25 | %    column, some adjustments may have to be made to fit text in the column
 26 | %    or to choose full-width (\figure*} figures.
 27 | % 5) Change the style reference from aclsub to acl2000, and be sure
 28 | %    this style file is in your TeX search path
 29 | 
 30 | 
 31 | % This is the LaTeX style file for EACL-95.  It is identical to the
 32 | % style file for ANLP '94 except that the margins are adjusted for A4
 33 | % paper.  -- abney 13 Dec 94
 34 | 
 35 | % The ANLP '94 style file is a slightly modified
 36 | % version of the style used for AAAI and IJCAI, using some changes
 37 | % prepared by Fernando Pereira and others and some minor changes 
 38 | % by Paul Jacobs.
 39 | 
 40 | % Papers prepared using the aclsub.sty file and acl.bst bibtex style
 41 | % should be easily converted to final format using this style.  
 42 | % (1) Submission information (\wordcount, \subject, and \makeidpage)
 43 | % should be removed.
 44 | % (2) \summary should be removed.  The summary material should come
 45 | % after \maketitle and should be in the ``abstract'' environment
 46 | % (between \begin{abstract} and \end{abstract}).
 47 | % (3) Check all citations.  This style should handle citations correctly
 48 | % and also allows multiple citations separated by semicolons.
 49 | % (4) Check figures and examples.  Because the final format is double-
 50 | % column, some adjustments may have to be made to fit text in the column
 51 | % or to choose full-width (\figure*} figures.
 52 | 
 53 | % Place this in a file called aclap.sty in the TeX search path.  
 54 | % (Placing it in the same directory as the paper should also work.)
 55 | 
 56 | % Prepared by Peter F. Patel-Schneider, liberally using the ideas of
 57 | % other style hackers, including Barbara Beeton.
 58 | % This style is NOT guaranteed to work.  It is provided in the hope
 59 | % that it will make the preparation of papers easier.
 60 | %
 61 | % There are undoubtably bugs in this style.  If you make bug fixes,
 62 | % improvements, etc.  please let me know.  My e-mail address is:
 63 | %       pfps@research.att.com
 64 | 
 65 | % Papers are to be prepared using the ``acl'' bibliography style,
 66 | % as follows:
 67 | %       \documentclass[11pt]{article}
 68 | %       \usepackage{acl2000}
 69 | %       \title{Title}
 70 | %       \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
 71 | %               Author 3 \\ Address line \\ Address line}
 72 | %       \begin{document}
 73 | %       ...
 74 | %       \bibliography{bibliography-file}
 75 | %       \bibliographystyle{acl}
 76 | %       \end{document}
 77 | 
 78 | % Author information can be set in various styles:
 79 | % For several authors from the same institution:
 80 | % \author{Author 1 \and ... \and Author n \\
 81 | %         Address line \\ ... \\ Address line}
 82 | % if the names do not fit well on one line use
 83 | %         Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
 84 | % For authors from different institutions:
 85 | % \author{Author 1 \\ Address line \\  ... \\ Address line
 86 | %         \And  ... \And
 87 | %         Author n \\ Address line \\ ... \\ Address line}
 88 | % To start a seperate ``row'' of authors use \AND, as in
 89 | % \author{Author 1 \\ Address line \\  ... \\ Address line
 90 | %         \AND
 91 | %         Author 2 \\ Address line \\ ... \\ Address line \And
 92 | %         Author 3 \\ Address line \\ ... \\ Address line}
 93 | 
 94 | % If the title and author information does not fit in the area allocated,
 95 | % place \setlength\titlebox{<new height>} right after
 96 | % \usepackage{acl2015}
 97 | % where <new height> can be something larger than 5cm
 98 | 
 99 | \typeout{Conference Style for ACL 2015 -- released December 7, 2014}
100 | 
101 | % NOTE:  Some laser printers have a serious problem printing TeX output.
102 | % These printing devices, commonly known as ``write-white'' laser
103 | % printers, tend to make characters too light.  To get around this
104 | % problem, a darker set of fonts must be created for these devices.
105 | %
106 | 
107 | 
108 | 
109 | % A4 modified by Eneko; again modified by Alexander for 5cm titlebox
110 | \setlength{\paperwidth}{21cm}   % A4
111 | \setlength{\paperheight}{29.7cm}% A4
112 | \setlength\topmargin{-0.5cm}    
113 | \setlength\oddsidemargin{0cm}   
114 | \setlength\textheight{24.7cm} 
115 | \setlength\textwidth{16.0cm}
116 | \setlength\columnsep{0.6cm}  
117 | \newlength\titlebox 
118 | \setlength\titlebox{5cm}
119 | \setlength\headheight{5pt}   
120 | \setlength\headsep{0pt}
121 | \thispagestyle{empty}        
122 | \pagestyle{empty}
123 | 
124 | 
125 | \flushbottom \twocolumn \sloppy
126 | 
127 | % We're never going to need a table of contents, so just flush it to
128 | % save space --- suggested by drstrip@sandia-2
129 | \def\addcontentsline#1#2#3{}
130 | 
131 | % Title stuff, taken from deproc.
132 | \def\maketitle{\par
133 |  \begingroup
134 |    \def\thefootnote{\fnsymbol{footnote}}
135 |    \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
136 |    \twocolumn[\@maketitle] \@thanks
137 |  \endgroup
138 |  \setcounter{footnote}{0}
139 |  \let\maketitle\relax \let\@maketitle\relax
140 |  \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
141 | \def\@maketitle{\vbox to \titlebox{\hsize\textwidth
142 |  \linewidth\hsize \vskip 0.125in minus 0.125in \centering
143 |  {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
144 |  {\def\and{\unskip\enspace{\rm and}\enspace}%
145 |   \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil 
146 |            \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
147 |   \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
148 |           \vskip 0.25in plus 1fil minus 0.125in
149 |            \hbox to \linewidth\bgroup\large \hfil\hfil
150 |              \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
151 |   \hbox to \linewidth\bgroup\large \hfil\hfil
152 |     \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author 
153 |                             \end{tabular}\hss\egroup
154 |     \hfil\hfil\egroup}
155 |   \vskip 0.3in plus 2fil minus 0.1in
156 | }}
157 | 
158 | % margins for abstract
159 | \renewenvironment{abstract}%
160 | 		 {\centerline{\large\bf Abstract}%
161 | 		  \begin{list}{}%
162 | 		     {\setlength{\rightmargin}{0.6cm}%
163 | 		      \setlength{\leftmargin}{0.6cm}}%
164 | 		   \item[]\ignorespaces}%
165 | 		 {\unskip\end{list}}
166 | 		     
167 | %\renewenvironment{abstract}{\centerline{\large\bf  
168 | % Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
169 | 
170 | 
171 | % bibliography
172 | 
173 | \def\thebibliography#1{\section*{References}
174 |   \global\def\@listi{\leftmargin\leftmargini
175 |                \labelwidth\leftmargini \advance\labelwidth-\labelsep
176 |                \topsep 1pt plus 2pt minus 1pt
177 |                \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt}
178 |   \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth
179 |     \advance\leftmargin\labelsep\usecounter{enumi}}
180 |     \def\newblock{\hskip .11em plus .33em minus -.07em}
181 |     \sloppy
182 |     \sfcode`\.=1000\relax}
183 | 
184 | \def\@up#1{\raise.2ex\hbox{#1}}
185 | 
186 | % most of cite format is from aclsub.sty by SMS
187 | 
188 | % don't box citations, separate with ; and a space
189 | % also, make the penalty between citations negative: a good place to break
190 | % changed comma back to semicolon pj 2/1/90
191 | % \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
192 | % \def\@citea{}\@cite{\@for\@citeb:=#2\do
193 | %   {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined
194 | %      {b@\@citeb}{{\bf ?}\@warning
195 | %      {Citation `\@citeb' on page \thepage \space undefined}}%
196 | % {\csname b@\@citeb\endcsname}}}{#1}}
197 | 
198 | % don't box citations, separate with ; and a space
199 | % Replaced for multiple citations (pj) 
200 | % don't box citations and also add space, semicolon between multiple citations
201 | \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
202 |   \def\@citea{}\@cite{\@for\@citeb:=#2\do
203 |      {\@citea\def\@citea{; }\@ifundefined
204 |        {b@\@citeb}{{\bf ?}\@warning
205 |         {Citation `\@citeb' on page \thepage \space undefined}}%
206 |  {\csname b@\@citeb\endcsname}}}{#1}}
207 | 
208 | % Allow short (name-less) citations, when used in
209 | % conjunction with a bibliography style that creates labels like
210 | %       \citename{<names>, }<year>
211 | % 
212 | \let\@internalcite\cite
213 | \def\cite{\def\citename##1{##1, }\@internalcite}
214 | \def\shortcite{\def\citename##1{}\@internalcite}
215 | \def\newcite{\def\citename##1{{\frenchspacing##1} (}\@internalciteb}
216 | 
217 | % Macros for \newcite, which leaves name in running text, and is
218 | % otherwise like \shortcite.
219 | \def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
220 |   \def\@citea{}\@newcite{\@for\@citeb:=#2\do
221 |     {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined
222 |        {b@\@citeb}{{\bf ?}\@warning
223 |        {Citation `\@citeb' on page \thepage \space undefined}}%
224 | {\csname b@\@citeb\endcsname}}}{#1}}
225 | \def\@internalciteb{\@ifnextchar [{\@tempswatrue\@citexb}{\@tempswafalse\@citexb[]}}
226 | 
227 | \def\@newcite#1#2{{#1\if@tempswa, #2\fi)}}
228 | 
229 | \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
230 | 
231 | %%% More changes made by SMS (originals in latex.tex)
232 | % Use parentheses instead of square brackets in the text.
233 | \def\@cite#1#2{({#1\if@tempswa , #2\fi})}
234 | 
235 | % Don't put a label in the bibliography at all.  Just use the unlabeled format
236 | % instead.
237 | \def\thebibliography#1{\vskip\parskip%
238 | \vskip\baselineskip%
239 | \def\baselinestretch{1}%
240 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
241 | \vskip-\parskip%
242 | \vskip-\baselineskip%
243 | \section*{References\@mkboth
244 |  {References}{References}}\list
245 |  {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
246 |  \setlength{\itemindent}{-\parindent}}
247 |  \def\newblock{\hskip .11em plus .33em minus -.07em}
248 |  \sloppy\clubpenalty4000\widowpenalty4000
249 |  \sfcode`\.=1000\relax}
250 | \let\endthebibliography=\endlist
251 | 
252 | % Allow for a bibliography of sources of attested examples
253 | \def\thesourcebibliography#1{\vskip\parskip%
254 | \vskip\baselineskip%
255 | \def\baselinestretch{1}%
256 | \ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
257 | \vskip-\parskip%
258 | \vskip-\baselineskip%
259 | \section*{Sources of Attested Examples\@mkboth
260 |  {Sources of Attested Examples}{Sources of Attested Examples}}\list
261 |  {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
262 |  \setlength{\itemindent}{-\parindent}}
263 |  \def\newblock{\hskip .11em plus .33em minus -.07em}
264 |  \sloppy\clubpenalty4000\widowpenalty4000
265 |  \sfcode`\.=1000\relax}
266 | \let\endthesourcebibliography=\endlist
267 | 
268 | \def\@lbibitem[#1]#2{\item[]\if@filesw 
269 |       { \def\protect##1{\string ##1\space}\immediate
270 |         \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}}
271 | 
272 | \def\@bibitem#1{\item\if@filesw \immediate\write\@auxout
273 |        {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces}
274 | 
275 | % sections with less space
276 | \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
277 |     -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
278 | \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
279 |     -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
280 | %% changed by KO to - values to get teh initial parindent right
281 | \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus
282 |    -0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
283 | \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
284 |    0.5ex minus .2ex}{-1em}{\normalsize\bf}}
285 | \def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
286 |    0.5ex minus .2ex}{-1em}{\normalsize\bf}}
287 | 
288 | % Footnotes
289 | \footnotesep 6.65pt %
290 | \skip\footins 9pt plus 4pt minus 2pt
291 | \def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
292 | \setcounter{footnote}{0}
293 | 
294 | % Lists and paragraphs
295 | \parindent 1em
296 | \topsep 4pt plus 1pt minus 2pt
297 | \partopsep 1pt plus 0.5pt minus 0.5pt
298 | \itemsep 2pt plus 1pt minus 0.5pt
299 | \parsep 2pt plus 1pt minus 0.5pt
300 | 
301 | \leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
302 | \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
303 | \labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
304 | 
305 | \def\@listi{\leftmargin\leftmargini}
306 | \def\@listii{\leftmargin\leftmarginii
307 |    \labelwidth\leftmarginii\advance\labelwidth-\labelsep
308 |    \topsep 2pt plus 1pt minus 0.5pt
309 |    \parsep 1pt plus 0.5pt minus 0.5pt
310 |    \itemsep \parsep}
311 | \def\@listiii{\leftmargin\leftmarginiii
312 |     \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
313 |     \topsep 1pt plus 0.5pt minus 0.5pt 
314 |     \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
315 |     \itemsep \topsep}
316 | \def\@listiv{\leftmargin\leftmarginiv
317 |      \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
318 | \def\@listv{\leftmargin\leftmarginv
319 |      \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
320 | \def\@listvi{\leftmargin\leftmarginvi
321 |      \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
322 | 
323 | \abovedisplayskip 7pt plus2pt minus5pt%
324 | \belowdisplayskip \abovedisplayskip
325 | \abovedisplayshortskip  0pt plus3pt%   
326 | \belowdisplayshortskip  4pt plus3pt minus3pt%
327 | 
328 | % Less leading in most fonts (due to the narrow columns)
329 | % The choices were between 1-pt and 1.5-pt leading
330 | \def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
331 | \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
332 | \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
333 | \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
334 | \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
335 | \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
336 | \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
337 | \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
338 | \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
339 | \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
340 | 


--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_avg_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_cum_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/T_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/T_50x_final_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_avg_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_cum_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/d_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/d_50x_final_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_avg_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_avg_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_cum_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_cum_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_diff.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_ic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ic.png


--------------------------------------------------------------------------------
/paper/figures/k_50x_final_regret_ti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/k_50x_final_regret_ti.png


--------------------------------------------------------------------------------
/paper/figures/yahoo-interval-chaining.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-interval-chaining.png


--------------------------------------------------------------------------------
/paper/figures/yahoo-top-interval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/figures/yahoo-top-interval.png


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
 1 | @article{DBLP:journals/corr/JosephKMNR16,
 2 |   author    = {Matthew Joseph and
 3 |                Michael Kearns and
 4 |                Jamie Morgenstern and
 5 |                Seth Neel and
 6 |                Aaron Roth},
 7 |   title     = {Rawlsian Fairness for Machine Learning},
 8 |   journal   = {CoRR},
 9 |   volume    = {abs/1610.09559},
10 |   year      = {2016},
11 |   url       = {http://arxiv.org/abs/1610.09559},
12 |   timestamp = {Wed, 02 Nov 2016 09:51:26 +0100},
13 |   biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/JosephKMNR16},
14 |   bibsource = {dblp computer science bibliography, http://dblp.org}
15 | }
16 | 
17 | @article{DBLP:journals/corr/abs-1003-5956,
18 |   author    = {Lihong Li and
19 |                Wei Chu and
20 |                John Langford},
21 |   title     = {An Unbiased, Data-Driven, Offline Evaluation Method of Contextual
22 |                Bandit Algorithms},
23 |   journal   = {CoRR},
24 |   volume    = {abs/1003.5956},
25 |   year      = {2010},
26 |   url       = {http://arxiv.org/abs/1003.5956},
27 |   timestamp = {Mon, 05 Dec 2011 18:04:18 +0100},
28 |   biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1003-5956},
29 |   bibsource = {dblp computer science bibliography, http://dblp.org}
30 | }
31 | 
32 | @misc{yahoo,
33 |   title = {Yahoo! Front Page Today Module User Click Log Dataset},
34 |   author = {Yahoo!},
35 |   howpublished = {\url{https://webscope.sandbox.yahoo.com/catalog.php?datatype=r}},
36 |   note = {Accessed: 2017-04-03},
37 |   year = {2009}
38 | }
39 | 


--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/paper/paper.pdf


--------------------------------------------------------------------------------
/paper/paper.tex:
--------------------------------------------------------------------------------
  1 | %
  2 | % File acl2015.tex
  3 | %
  4 | % Contact: car@ir.hit.edu.cn, gdzhou@suda.edu.cn
  5 | %%
  6 | %% Based on the style files for ACL-2014, which were, in turn,
  7 | %% Based on the style files for ACL-2013, which were, in turn,
  8 | %% Based on the style files for ACL-2012, which were, in turn,
  9 | %% based on the style files for ACL-2011, which were, in turn, 
 10 | %% based on the style files for ACL-2010, which were, in turn, 
 11 | %% based on the style files for ACL-IJCNLP-2009, which were, in turn,
 12 | %% based on the style files for EACL-2009 and IJCNLP-2008...
 13 | 
 14 | %% Based on the style files for EACL 2006 by 
 15 | %%e.agirre@ehu.es or Sergi.Balari@uab.es
 16 | %% and that of ACL 08 by Joakim Nivre and Noah Smith
 17 | 
 18 | \documentclass[11pt]{article}
 19 | \usepackage{acl2015}
 20 | \usepackage{times}
 21 | \usepackage{url}
 22 | \usepackage{latexsym}
 23 | \usepackage{hyperref}
 24 | \usepackage{tikz}
 25 | \usepackage{amsmath}
 26 | \usepackage{tabulary}
 27 | 
 28 | \usepackage[labelsep=quad,indention=10pt]{subfig}
 29 | \captionsetup*[subfigure]{position=bottom}
 30 | 
 31 | \newcommand{\specialcell}[2][c]{%
 32 |   \begin{tabular}[#1]{@{}c@{}}#2\end{tabular}}
 33 | 
 34 | \usepackage{graphicx}
 35 | \graphicspath{{figures/}}
 36 | \DeclareGraphicsExtensions{.eps,.pdf,.jpg,.png}
 37 | 
 38 | \DeclareMathOperator{\wsim}{sim}
 39 | 
 40 | %\setlength\titlebox{5cm}
 41 | 
 42 | % You can expand the titlebox if you need extra space
 43 | % to show all the authors. Please do not make the titlebox
 44 | % smaller than 5cm (the original size); we will check this
 45 | % in the camera-ready version and ask you to change it back.
 46 | 
 47 | \title{Further Empirical Analyses of Rawlsian Fairness for Machine Learning}
 48 | 
 49 | \author{JT Cho \\
 50 | 	{\tt joncho@} \\
 51 | 	{\tt seas.upenn.edu} \\\And
 52 | 	Karinna Loo \\
 53 | 	{\tt kloo@} \\
 54 | 	{\tt seas.upenn.edu} \\\And
 55 |   	Veronica Wharton \\
 56 |   	{\tt whartonv@} \\
 57 | 	{\tt seas.upenn.edu} }
 58 | \date{}
 59 | 
 60 | \begin{document}
 61 | \maketitle
 62 | 
 63 | %\begin{abstract}
 64 | 
 65 | %\noindent TODO: Abstract 
 66 | 
 67 | %\end{abstract}
 68 | 
 69 | \section{Introduction}
 70 | 
 71 | For our CIS 625 final project, our team --- JT Cho, Karinna Loo, and Veronica Wharton --- took a closer look at the topic of fairness in machine learning. The paper that piqued our interest was \textit{Rawlsian Fairness for Machine learning} \cite{DBLP:journals/corr/JosephKMNR16}, which describes two online algorithms in the linear contextual bandit framework that both learn at a rate comparable to (but necessarily worse than) the best algorithms absent of a fairness constraint and also satisfy a specified fairness constraint. The authors present theoretical and empirical results. Our team sought to re-implement the algorithms presented by \newcite{DBLP:journals/corr/JosephKMNR16} and then expand upon their empirical analyses. We were also interested in exploring further fairness analyses using real-world data.
 72 | 
 73 | \section{Project overview}
 74 | 
 75 | Our project consisted of the following steps:
 76 | 
 77 | \begin{enumerate}
 78 | 	\item We read the paper \textit{Rawlsian Fairness for Machine Learning}  \cite{DBLP:journals/corr/JosephKMNR16}.
 79 | 	\item We implemented the \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair} algorithms from the paper in Python. 
 80 | 	\item We ran our implementations on a Yahoo! dataset containing a fraction of the user click log for news articles displayed in the Featured Tab of the Today Module on the Yahoo! Front Page during the first ten days in May 2009 \cite{yahoo}, to see how well they performed on real data.
 81 | 	\item To empirically evaluate our implementations, we ran experiments similar to those in \cite{DBLP:journals/corr/JosephKMNR16} with randomly-drawn contexts. 
 82 | 	\item We compiled our findings into a written report.
 83 | \end{enumerate}
 84 | 
 85 | \section{Algorithm implementations}
 86 | 
 87 | The code for our implementations can be found here: \url{https://github.com/jtcho/FairMachineLearning/blob/master/fairml.py}
 88 | 
 89 | All algorithms and code were written using Python 3, along with NumPy\footnote{http://www.numpy.org}, SciPy\footnote{https://www.scipy.org}, and various other Python libraries.
 90 | 
 91 | \section{Implementation: TopInterval}
 92 | 
 93 | The \textsc{TopInterval} learning algorithm was implemented true to form as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. Particular details of note --- to ensure that all matrices used in computation were nonsingular, the first $d$ rounds are always chosen to be exploration rounds, where $d$ is the number of features. Additionally, we found it necessary to pick each arm once in order to observe data for each.
 94 | 
 95 | \section{Implementation: IntervalChaining}
 96 | 
 97 | The implementation for \textsc{IntervalChaining} was simple given \textsc{TopInterval}, as it sufficed to alter the strategy for picking arms in each round to that of picking uniformly at random from the chain containing the top interval.
 98 | 
 99 | \section{Implementation: RidgeFair}
100 | 
101 | The \textsc{RidgeFair} algorithm was also implemented as presented in \newcite{DBLP:journals/corr/JosephKMNR16}. This algorithm is very similar in implementation to \textsc{IntervalChaining}, save that it’s narrower confidence intervals allow for derivation of tighter regret bounds. A couple small details to note are, first, that we assume for simplicity (and without loss of generality) that the noise is $R$ sub-Gaussian with parameter $R = 1$ and second, that we play uniformly at random among all arms in the set of actions chained to the max.
102 | 
103 | \section{Yahoo! Dataset}
104 | 
105 | To expand upon the initial work done by \newcite{DBLP:journals/corr/JosephKMNR16}, we endeavored to test the presented algorithms on a real dataset. A Yahoo! dataset containing logs of user-visits to the front page was procured to evaluate our contextual bandits algorithms \cite{yahoo}. Each log entry details the following:
106 | 
107 | \begin{center}
108 | \begin{table}[h]
109 | \fontsize{6}{10}\selectfont
110 | \begin{tabulary}{0.8\textwidth}{|l|l|l|l|l|}
111 | \hline \textbf{unix\_timestamp} & \textbf{displayed\_id} & \textbf{user\_clicked} & \textbf{user\_features} & \textbf{article\_pool}\\\hline
112 | 1241162400&109513&0&$\dots$&[$\dots$]\\\hline
113 | \end{tabulary}
114 | \end{table}
115 | \end{center}
116 | 
117 | In each event, a user specified by $6$ features is presented an article from a pool of around $20$ distinct articles, each of which has their own $6$-dimensional feature vector. The event also tracks whether the user clicked the featured article or not.
118 | 
119 | In a fashion similar to that presented in \newcite{DBLP:journals/corr/abs-1003-5956}, we devised an evaluation scheme for the various learning algorithms. In our procedure, a random sample is drawn from the set of logged events. The learning algorithm scans through the sampled events linearly, evaluating its predictions for each one. If there happens to be a match between the algorithm's picked arm and the article displayed in the event, the logged event is added to the history.
120 | 
121 | Initial attempts to use this approach failed for a couple of reasons. First, the Yahoo! dataset contains a highly disproportionate number of negative samples with respect to positive ones. Therefore, our learning algorithm would not retain useful information over a number of iterations due to only being trained on negative samples. Second, a direct application of the \textsc{TopInterval} and \textsc{IntervalChaining} algorithms relies on the assumption of $20$ distinct underlying groups from which the articles were chosen to be in the article pool, each with their own distinct quality function. This assumption was found to be unreasonable, as we found that an article's index in the article pool had no bearing on its actual likelihood of being clicked by the user when picked. The initial context also does not work well with a fairness analysis. As a consequence, we saw that direct applications of the learning algorithms saw very poor performance.
122 | 
123 | To mitigate the first issue, we elected to alter our sampling procedure to separately sample positive and negative samples, and then shuffle them together. A brief argument towards the validity of this approach follows. While the underlying distribution of observed user visits saw mostly negative results, the algorithms performance should be independent of whatever underlying distribution there is - taking into account exclusively the user's features and the articles it is choosing from. Hence, curating the input to the learning algorithm such that it learns equally from both the positive and negative events suffices.
124 | 
125 | To resolve the second issue, we made a simplification to the problem context by clustering the articles. Across the million and a half logged events, there are approximately $20$ distinct articles in the article pools. In choosing a smaller number of clusters, we altered the scenario such that a successful event would be if the user clicked an article that was from the same pool chosen by the algorithm. In grouping the articles together, we reduced the number of available arms and also developed the notion of ``groups" implicit in \newcite{DBLP:journals/corr/JosephKMNR16}'s contextual bandits framework. The emergent notion of fairness then lies in discrimination against any particular cluster of articles.
126 | 
127 | These modifications resulted in significant improvements in the performance of our implementations on the Yahoo! dataset, as shown in Figure ~\ref{fig:yahoo} below.
128 | 
129 | Another novel modification we made was the use of a logit model instead of the simple linear regression used in \newcite{DBLP:journals/corr/JosephKMNR16}. We preserve the original fairness argument of the \textsc{IntervalChaining} algorithm by simply rescaling the output of the OLS estimator and the confidence intervals to $[0, 1]$ via the inverse logit. That is,
130 | $$w_{t,i} = \mathcal{Q}_{\mathcal{F}_{t,i}}(\frac{\delta}{2kT})$$
131 | $$[\ell_{i}^{t}, u_{i}^{t}] = [\Phi(\hat{y}_{t,i} - w_{t,i}), \Phi(\hat{y}_{t,i} + w_{t,i})]$$
132 | where $\Phi(x) = \frac{e^{x}}{1 + e^{x}} = \text{logistic}(x)$. It suffices to note that both OLS and logistic regression are variations of the generalized linear model (GLM).
133 | 
134 | \begin{figure*}
135 | \includegraphics[width=\textwidth]{yahoo-interval-chaining.png}
136 | \caption{Performance metrics of the logistic-regression based interval-chaining algorithm with $3$ clusters over 10,000 iterations. Shown on the left is a graph depicting the performance of the learning algorithm vs that of the ``best" player, whose picked article is clicked by the user in every round. The regret is simply the difference in the cumulative number of successes between the two. In practice, this is an unfair comparison to make, as it is unreasonable to expect that the user would click the featured article every visit - and our results stand even stronger in comparison. On the right is a graph denoting the cumulative fraction of successful picks by the algorithm vs. the baseline (randomly selecting one out of the three pools at each step). The learning algorithm appears to converge to approximately $50\%$ accuracy, which is considerably higher than the baseline. \label{fig:yahoo}}
137 | \end{figure*}
138 | 
139 | \section{Experimental results}
140 | 
141 | We ran experiments that compared the regret of \textsc{IntervalChaining} (IC) with the regret of \textsc{TopInterval} (TI). As in \newcite{DBLP:journals/corr/JosephKMNR16}, we present three sets of empirical results: 
142 | \begin{itemize}
143 | 	\item Varying $T$ (the number of rounds) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $T$. (See Figure \ref{fig:free_T}.)
144 | 	\item Varying $k$ (the number of arms/groups) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $k$. (See Figure \ref{fig:free_k}.)
145 | 	\item Varying $d$ (the number of features) - we measured the average regret of \textsc{IntervalChaining} and \textsc{TopInterval} as a function of increasing $d$. (See Figure \ref{fig:free_d}.)
146 | \end{itemize} 
147 | 
148 | For each increasing variable ($T$, $k$, or $d$), we present nine metrics as a function of the variable, each averaged over 50 trials. Contexts are drawn uniformly at random from $[0,1]^d$ and standard Gaussian noise. \newcite{DBLP:journals/corr/JosephKMNR16} only present the average regret difference (metric \#3).
149 | \begin{enumerate}
150 | 	\item Average regret (TI) - the average regret of \textsc{TopInterval} across all rounds.
151 | 	\item Average regret (IC) - the average regret of \textsc{IntervalChaining} across all rounds.
152 | 	\item Average regret difference (TI vs. IC) - the difference between the average regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds.
153 | 	\item Cumulative regret (TI) - the cumulative regret of \textsc{TopInterval} across all rounds.
154 | 	\item Cumulative regret (IC) - the cumulative regret of \textsc{IntervalChaining} across all rounds.
155 | 	\item Cumulative regret difference (TI vs. IC) - the difference between the cumulative regrets of \textsc{TopInterval} and \textsc{IntervalChaining} across all rounds.
156 | 	\item Final regret (TI) - the regret of \textsc{TopInterval} in the final round.
157 | 	\item Final regret (IC) - the regret of \textsc{IntervalChaining} in the final round.
158 | 	\item Final regret difference (TI vs. IC) - the difference between the final regrets of \textsc{TopInterval} and \textsc{IntervalChaining}.
159 | \end{enumerate}
160 | 
161 | We present our results in Figures \ref{fig:free_T}, \ref{fig:free_k}, and \ref{fig:free_d}.
162 | 
163 | \begin{figure*}[ht!]
164 | 	\centering
165 | 	\subfloat{ %
166 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ti}
167 | 	}
168 | 	\subfloat{ %
169 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_ic}
170 | 	}
171 | 	\subfloat{ %
172 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_avg_regret_diff}
173 | 	}
174 | 	\\
175 | 	\subfloat{ %
176 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ti}
177 | 	}
178 | 	\subfloat{ %
179 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_ic}
180 | 	}
181 | 	\subfloat{ %
182 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_cum_regret_diff}
183 | 	}
184 | 	\\
185 | 	\subfloat{ %
186 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ti}
187 | 	}
188 | 	\subfloat{ %
189 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_ic}
190 | 	}
191 | 	\subfloat{ %
192 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{T_50x_final_regret_diff}
193 | 	}
194 | 	\caption{$d=2$, $k=2$, free $T$}
195 | 	\label{fig:free_T}
196 | \end{figure*}
197 | 
198 | \begin{figure*}[ht!]
199 | 	\centering
200 | 	\subfloat{ %
201 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ti}
202 | 	}
203 | 	\subfloat{ %
204 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_ic}
205 | 	}
206 | 	\subfloat{ %
207 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_avg_regret_diff}
208 | 	}
209 | 	\\
210 | 	\subfloat{ %
211 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ti}
212 | 	}
213 | 	\subfloat{ %
214 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_ic}
215 | 	}
216 | 	\subfloat{ %
217 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_cum_regret_diff}
218 | 	}
219 | 	\\
220 | 	\subfloat{ %
221 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ti}
222 | 	}
223 | 	\subfloat{ %
224 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_ic}
225 | 	}
226 | 	\subfloat{ %
227 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{k_50x_final_regret_diff}
228 | 	}
229 | 	\caption{$d=2$, $T=1000$, free $k$}
230 | 	\label{fig:free_k}
231 | \end{figure*}
232 | 
233 | \begin{figure*}[ht!]
234 | 	\centering
235 | 	\subfloat{ %
236 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ti}
237 | 	}
238 | 	\subfloat{ %
239 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_ic}
240 | 	}
241 | 	\subfloat{ %
242 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_avg_regret_diff}
243 | 	}
244 | 	\\
245 | 	\subfloat{ %
246 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ti}
247 | 	}
248 | 	\subfloat{ %
249 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_ic}
250 | 	}
251 | 	\subfloat{ %
252 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_cum_regret_diff}
253 | 	}
254 | 	\\
255 | 	\subfloat{ %
256 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ti}
257 | 	}
258 | 	\subfloat{ %
259 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_ic}
260 | 	}
261 | 	\subfloat{ %
262 | 		\includegraphics[width=0.33\textwidth, height=0.33\textheight, keepaspectratio]{d_50x_final_regret_diff}
263 | 	}
264 | 	\caption{$k=2$, $T=1000$, free $d$}
265 | 	\label{fig:free_d}
266 | \end{figure*}
267 | 
268 | \section{Conclusion}
269 | 
270 | In this work, we present an empirical extension of the work done by \newcite{DBLP:journals/corr/JosephKMNR16} in their paper \textit{Rawlsian Fairness for Machine Learning}. Specifically, we present implementations of their algorithms \textsc{TopInterval}, \textsc{IntervalChaining}, and \textsc{RidgeFair}; a case study in which we apply the aforementioned algorithms to a Yahoo! clicks dataset; and an extension of one of \newcite{DBLP:journals/corr/JosephKMNR16}'s empirical analyses on randomly generated data. We believe that our results may be useful should these algorithms be used in future real-world settings. 
271 | 
272 | \bibliography{paper}
273 | \bibliographystyle{acl}
274 | 
275 | \end{document}
276 | 


--------------------------------------------------------------------------------
/references/rawlsian_fairness.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtcho/FairMachineLearning/b7309a3e4e9030a7c1e7139b82b1fbfe24166f2b/references/rawlsian_fairness.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | appnope==0.1.0
 3 | bleach==2.0.0
 4 | cycler==0.10.0
 5 | decorator==4.0.11
 6 | entrypoints==0.2.2
 7 | html5lib==0.999999999
 8 | ipykernel==4.6.1
 9 | ipython==6.0.0
10 | ipython-genutils==0.2.0
11 | ipywidgets==6.0.0
12 | jedi==0.10.2
13 | Jinja2==2.9.6
14 | jsonschema==2.6.0
15 | jupyter==1.0.0
16 | jupyter-client==5.0.1
17 | jupyter-console==5.1.0
18 | jupyter-core==4.3.0
19 | MarkupSafe==1.0
20 | matplotlib==2.0.1
21 | mistune==0.7.4
22 | nbconvert==5.1.1
23 | nbformat==4.3.0
24 | notebook==5.0.0
25 | numpy==1.12.1
26 | packaging==16.8
27 | pandas==0.19.2
28 | pandocfilters==1.4.1
29 | pexpect==4.2.1
30 | pickleshare==0.7.4
31 | prompt-toolkit==1.0.14
32 | ptyprocess==0.5.1
33 | Pygments==2.2.0
34 | pyparsing==2.2.0
35 | python-dateutil==2.6.0
36 | pytz==2017.2
37 | pyzmq==16.0.2
38 | qtconsole==4.3.0
39 | scikit-learn==0.18.1
40 | scipy==0.19.0
41 | simplegeneric==0.8.1
42 | six==1.10.0
43 | terminado==0.6
44 | testpath==0.3
45 | tornado==4.5.1
46 | traitlets==4.3.2
47 | virtualenv==15.1.0
48 | wcwidth==0.1.7
49 | webencodings==0.5.1
50 | widgetsnbextension==2.0.0
51 | 


--------------------------------------------------------------------------------