├── .gitignore ├── Experimental_UserStudy_Groups ├── .ipynb_checkpoints │ └── Get Started-checkpoint.ipynb ├── Get Started.ipynb ├── Group_1_Notebooks │ ├── .ipynb_checkpoints │ │ ├── Get Started-checkpoint.ipynb │ │ ├── Task_1-checkpoint 2.ipynb │ │ ├── Task_1-checkpoint.ipynb │ │ └── Untitled-checkpoint.ipynb │ ├── Get Started.ipynb │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ ├── Task_3.ipynb │ └── fklearn │ │ ├── AIF360_Debiasing_Example.ipynb │ │ ├── AIF360_Example.ipynb │ │ ├── AIF360_PrePostProcessing_Example.ipynb │ │ ├── NOTES.txt │ │ ├── datasets.py │ │ ├── fair_model_selection.py │ │ ├── fair_selection_aif.py │ │ ├── metric_library.py │ │ ├── ml_pipeline.py │ │ ├── sample.py │ │ ├── sample_aif.py │ │ └── scikit_learn_wrapper.py ├── Group_2_Notebooks │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ ├── Task_3.ipynb │ └── fklearn │ │ ├── AIF360_Debiasing_Example.ipynb │ │ ├── AIF360_Example.ipynb │ │ ├── AIF360_PrePostProcessing_Example.ipynb │ │ ├── NOTES.txt │ │ ├── __init__.py │ │ ├── datasets.py │ │ ├── fair_model_selection.py │ │ ├── fair_selection_aif.py │ │ ├── interface │ │ ├── __init__.py │ │ ├── exports │ │ │ ├── plot.json │ │ │ └── plot.png │ │ ├── main.py │ │ ├── plot.py │ │ └── static │ │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ ├── styles-notebook.css │ │ │ └── styles.css │ │ │ └── data │ │ │ ├── explanations.json │ │ │ ├── out.csv │ │ │ └── test-file.csv │ │ ├── metric_library.py │ │ ├── ml_pipeline.py │ │ ├── sample.py │ │ ├── sample_aif.py │ │ └── scikit_learn_wrapper.py ├── Group_3_Notebooks │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ └── Task_3.ipynb ├── Group_4_Notebooks │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ └── Task_3.ipynb ├── Group_5_Notebooks │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ └── Task_3.ipynb └── Group_6_Notebooks │ ├── Task_1.ipynb │ ├── Task_2.ipynb │ └── Task_3.ipynb ├── README.md ├── examples ├── .ipynb_checkpoints │ └── Fairkit_learn_Tutorial-checkpoint.ipynb ├── Adversarial_Debiasing_Example.ipynb ├── Fairkit_learn_Tutorial.ipynb ├── PrePostProcessing_Example.ipynb └── fklearn_Example.ipynb ├── fklearn_pkg ├── LICENSE ├── README.md ├── fairkit_learn ├── fklearn │ ├── NOTES.txt │ ├── __init__.py │ ├── datasets.py │ ├── fair_model_selection.py │ ├── fair_selection_aif.py │ ├── interface │ │ ├── __init__.py │ │ ├── main.py │ │ ├── plot.py │ │ └── static │ │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ ├── styles-notebook.css │ │ │ └── styles.css │ │ │ └── data │ │ │ ├── explanations.json │ │ │ ├── out.csv │ │ │ └── test-file.csv │ ├── metric_library.py │ ├── ml_pipeline.py │ ├── sample.py │ ├── sample_aif.py │ └── scikit_learn_wrapper.py └── setup.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | fklearn_pkg/build/ 2 | fklearn_pkg/dist/ 3 | fklearn_pkg/fairkit_learn.egg-info/ 4 | ../examples/search_output.csv 5 | .DS_Store -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/.ipynb_checkpoints/Get Started-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "cc513c063b975" 7 | }, 8 | "source": [ 9 | "Today, we will use machine learning tools to train models while being careful of model fairness. To begin, please click the following badge to open the notebook in Google CoLabs:\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/brittjay0104/ml_fairness_exercise/blob/master/Experimental_Groups/Group_1/Task_1.ipynb)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "comet_cell_id": "a907546811606" 18 | }, 19 | "outputs": [], 20 | "source": [] 21 | } 22 | ], 23 | "metadata": { 24 | "comet_paths": [ 25 | [ 26 | "db4861d2/Get Started.ipynb", 27 | 1569537646883 28 | ], 29 | [ 30 | "6f78abed/Get Started.ipynb", 31 | 1569608441749 32 | ] 33 | ], 34 | "comet_tracking": true, 35 | "kernelspec": { 36 | "display_name": "Python 3", 37 | "language": "python", 38 | "name": "python3" 39 | }, 40 | "language_info": { 41 | "codemirror_mode": { 42 | "name": "ipython", 43 | "version": 3 44 | }, 45 | "file_extension": ".py", 46 | "mimetype": "text/x-python", 47 | "name": "python", 48 | "nbconvert_exporter": "python", 49 | "pygments_lexer": "ipython3", 50 | "version": "3.7.3" 51 | } 52 | }, 53 | "nbformat": 4, 54 | "nbformat_minor": 2 55 | } 56 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Get Started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "cc513c063b975" 7 | }, 8 | "source": [ 9 | "Today, we will use machine learning tools to train models while being careful of model fairness. To begin, please click the following badge to open the notebook in Google CoLabs:\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/brittjay0104/ml_fairness_exercise/blob/master/Experimental_Groups/Group_1/Task_1.ipynb)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "comet_cell_id": "a907546811606" 18 | }, 19 | "outputs": [], 20 | "source": [] 21 | } 22 | ], 23 | "metadata": { 24 | "comet_paths": [ 25 | [ 26 | "db4861d2/Get Started.ipynb", 27 | 1569537646883 28 | ], 29 | [ 30 | "6f78abed/Get Started.ipynb", 31 | 1569608441749 32 | ] 33 | ], 34 | "comet_tracking": true, 35 | "kernelspec": { 36 | "display_name": "Python 3 (ipykernel)", 37 | "language": "python", 38 | "name": "python3" 39 | }, 40 | "language_info": { 41 | "codemirror_mode": { 42 | "name": "ipython", 43 | "version": 3 44 | }, 45 | "file_extension": ".py", 46 | "mimetype": "text/x-python", 47 | "name": "python", 48 | "nbconvert_exporter": "python", 49 | "pygments_lexer": "ipython3", 50 | "version": "3.9.7" 51 | } 52 | }, 53 | "nbformat": 4, 54 | "nbformat_minor": 2 55 | } 56 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/.ipynb_checkpoints/Get Started-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "cc513c063b975" 7 | }, 8 | "source": [ 9 | "Today, we will use machine learning tools to train models while being careful of model fairness. To begin, please click the following badge to open the notebook in Google CoLabs:\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/brittjay0104/ml_fairness_exercise/blob/master/Experimental_Groups/Group_1/Task_1.ipynb)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "comet_cell_id": "a907546811606" 18 | }, 19 | "outputs": [], 20 | "source": [] 21 | } 22 | ], 23 | "metadata": { 24 | "comet_paths": [ 25 | [ 26 | "db4861d2/Get Started.ipynb", 27 | 1569537646883 28 | ], 29 | [ 30 | "6f78abed/Get Started.ipynb", 31 | 1569608441749 32 | ] 33 | ], 34 | "comet_tracking": true, 35 | "kernelspec": { 36 | "display_name": "Python 3", 37 | "language": "python", 38 | "name": "python3" 39 | }, 40 | "language_info": { 41 | "codemirror_mode": { 42 | "name": "ipython", 43 | "version": 3 44 | }, 45 | "file_extension": ".py", 46 | "mimetype": "text/x-python", 47 | "name": "python", 48 | "nbconvert_exporter": "python", 49 | "pygments_lexer": "ipython3", 50 | "version": "3.7.3" 51 | } 52 | }, 53 | "nbformat": 4, 54 | "nbformat_minor": 2 55 | } 56 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 4 6 | } 7 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/Get Started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "cc513c063b975" 7 | }, 8 | "source": [ 9 | "Today, we will use machine learning tools to train models while being careful of model fairness. To begin, please click the following badge to open the notebook in Google CoLabs:\n", 10 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/brittjay0104/ml_fairness_exercise/blob/master/Experimental_Groups/Group_1/Task_1.ipynb)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "comet_cell_id": "a907546811606" 18 | }, 19 | "outputs": [], 20 | "source": [] 21 | } 22 | ], 23 | "metadata": { 24 | "comet_paths": [ 25 | [ 26 | "db4861d2/Get Started.ipynb", 27 | 1569537646883 28 | ], 29 | [ 30 | "6f78abed/Get Started.ipynb", 31 | 1569608441749 32 | ] 33 | ], 34 | "comet_tracking": true, 35 | "kernelspec": { 36 | "display_name": "Python 3", 37 | "language": "python", 38 | "name": "python3" 39 | }, 40 | "language_info": { 41 | "codemirror_mode": { 42 | "name": "ipython", 43 | "version": 3 44 | }, 45 | "file_extension": ".py", 46 | "mimetype": "text/x-python", 47 | "name": "python", 48 | "nbconvert_exporter": "python", 49 | "pygments_lexer": "ipython3", 50 | "version": "3.7.3" 51 | } 52 | }, 53 | "nbformat": 4, 54 | "nbformat_minor": 2 55 | } 56 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/Task_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "b4bcefbe0a41f" 7 | }, 8 | "source": [ 9 | "Before beginning task 3, make sure to run the following cell to import all necessary packages. If you need any additional packages, add the import statement(s) to the cell below and re-run the cell before adding and running code that uses the additional packages.\n", 10 | "\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "comet_cell_id": "2c7cd3e562e7" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "# Load all necessary packages\n", 22 | "import numpy as np\n", 23 | "import sklearn as skl\n", 24 | "import six\n", 25 | "\n", 26 | "# dataset\n", 27 | "from aif360.datasets import AdultDataset\n", 28 | "\n", 29 | "# models\n", 30 | "from sklearn.linear_model.logistic import LogisticRegression \n", 31 | "from sklearn.neighbors import KNeighborsClassifier\n", 32 | "from sklearn.ensemble import RandomForestClassifier \n", 33 | "from sklearn.svm import SVC \n", 34 | "\n", 35 | "# metric\n", 36 | "from sklearn.metrics import accuracy_score" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "comet_cell_id": "46991f420accb" 43 | }, 44 | "source": [ 45 | "# Tutorial 3: scikit-learn\n", 46 | "\n", 47 | "Now we show you how to train and evaluate models using scikit-learn. You will use the knowledge from this tutorial to complete Task 3, so please read thoroughly and execute the code cells in order.\n", 48 | "\n", 49 | "## Step 1: Import the dataset\n", 50 | "\n", 51 | "First we need to import the dataset we will use for training and testing our model.\n", 52 | "\n", 53 | "Below, we provide code that imports the Adult dataset. **Note: a warning may pop up when you run this cell. As long as you don't see any errors in the code, it is fine to continue.**" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "comet_cell_id": "936840797dfba" 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "data_orig = AdultDataset()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "comet_cell_id": "1f7cb8ab1c822" 71 | }, 72 | "source": [ 73 | "## Step 2: Split the dataset into train and test data\n", 74 | "\n", 75 | "Now that the dataset has been imported, we need to split the original dataset into training and test data. \n", 76 | "\n", 77 | "The code to do so is as follows:" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "comet_cell_id": "8f3e98f0712d1" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "data_orig_train, data_orig_test = data_orig.split([0.7], shuffle=False)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "comet_cell_id": "8e6392efce817" 95 | }, 96 | "source": [ 97 | "## Step 3: Initialize model \n", 98 | "\n", 99 | "Next, we need to initialize our model. We can initialize a model with the default parameters (see documentation), no parameters (which initializes with default parameter values), or we can modify parameter values.\n", 100 | "\n", 101 | "For the tutorial, we use the Logistic Regression model with default hyper-parameter values; you will be able to use any of the scikit-learn models listed above, and modify hyper-parameter values, when completing the exercise. \n", 102 | "\n", 103 | "Below we provide code for initialzing the Logistic Regression model, with default hyper-parameter values. We also provide (commented) code that reminds you of how to initialize each model available during this exercise." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "comet_cell_id": "e89b66337a2a6" 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "# model is populated with default values; modifying parameters is allowed but optional\n", 115 | "model = LogisticRegression(penalty='l2', dual=False,tol=0.0001,C=1.0,\n", 116 | " fit_intercept=True,intercept_scaling=1,class_weight=None,\n", 117 | " random_state=None,solver='liblinear',max_iter=100, \n", 118 | " multi_class='warn',verbose=0,warm_start=False,\n", 119 | " n_jobs=None)\n", 120 | "\n", 121 | "#model = KNeighborsClassifier(n_neighbors=5,weights='uniform',algorithm='auto',\n", 122 | "# leaf_size=30,p=2,metric='minkowski',metric_params=None,\n", 123 | "# n_jobs=None)\n", 124 | "\n", 125 | "#model = RandomForestClassifier(n_estimators='warn',criterion='gini',max_depth=None,\n", 126 | "# min_samples_leaf=1,min_weight_fraction_leaf=0.0,\n", 127 | "# min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, \n", 128 | "# random_state=None, verbose=0, warm_start=False, class_weight=None)\n", 129 | "\n", 130 | "#model = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, \n", 131 | "# probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, \n", 132 | "# max_iter=-1, decision_function_shape='ovr', random_state=None)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "comet_cell_id": "7bba44eb7e995" 139 | }, 140 | "source": [ 141 | "## Step 4: Train the model\n", 142 | "\n", 143 | "After initialing the model, we train it using the training dataset. \n", 144 | "\n", 145 | "Below we provide code that prepares our dataset to be used with scikit-learn and trains the model using our prepared data." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "comet_cell_id": "470c3e83a0934" 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# prepare data for use with scikit-learn\n", 157 | "from sklearn.preprocessing import StandardScaler\n", 158 | "\n", 159 | "scaler = StandardScaler()\n", 160 | "\n", 161 | "x_train = scaler.fit_transform(data_orig_train.features)\n", 162 | "y_train = data_orig_train.labels.ravel()\n", 163 | "\n", 164 | "\n", 165 | "model.fit(x_train, y_train)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "comet_cell_id": "483779469d731" 172 | }, 173 | "source": [ 174 | "## Step 5: Evaluate the model\n", 175 | "\n", 176 | "Now we're ready to evaluate your trained model with the test data using the performance metric provided by scikit-learn.\n", 177 | "\n", 178 | "Below we provide code snippets that show how to evaluate a model's performance using scikit-learn." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 1, 184 | "metadata": { 185 | "comet_cell_id": "f3c98baf23fd4" 186 | }, 187 | "outputs": [ 188 | { 189 | "ename": "NameError", 190 | "evalue": "name 'lr' is not defined", 191 | "output_type": "error", 192 | "traceback": [ 193 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 194 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 195 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_orig_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_orig_test\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 196 | "\u001b[0;31mNameError\u001b[0m: name 'lr' is not defined" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "x_test = scaler.fit_transform(data_orig_test.features)\n", 202 | "\n", 203 | "predictions = model.predict(x_test)\n", 204 | "accuracy = accuracy_score(data_orig_test.labels.ravel(), predictions)\n", 205 | "\n", 206 | "print ('Accuracy = ' + str(accuracy))\n" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "comet_cell_id": "f42e810454232" 213 | }, 214 | "source": [ 215 | "# Task 3: Model evaluation with scikit-learn\n", 216 | "\n", 217 | "Your turn! Use what you learned in the above tutorial to train and evaluate models for performance, fairness, and overall quality. You will use functionality provided by scikit-learn to meet the following goals:\n", 218 | "\n", 219 | "1. **Describe a model you believe will perform the best (e.g., have the highest accuracy score).** \n", 220 | "\n", 221 | "2. **Describe a model you believe will be the most fair, regardless of performance.** \n", 222 | "\n", 223 | "3. **Describe a model you believe will best balance both performance and fairness.** \n", 224 | "\n", 225 | "Make sure you include any modifications to model hyper-parameters.\n", 226 | "\n", 227 | "**Keep in mind, training machine learning models is often a time intensive endeavor.** One way you can minimize time to finish the assignment is to minimize the times you have to, for example, train a given model to then evaluate it by putting the code that initializes and trains your model(s) in its own separate cell.\n", 228 | "\n", 229 | "\n", 230 | "## Submitting your response \n", 231 | "\n", 232 | "Once you feel you've met the above goals, go to the Evaluating ML Models Exercise Response Form to enter your responses under the section labeled 'Task 3'.\n", 233 | "\n", 234 | "If you accidentally closed your response form, check your email for the link to re-open it." 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 1, 240 | "metadata": { 241 | "comet_cell_id": "9d75f2fa60fd2" 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "# TODO : Use this cell to write code for completing task 3\n", 246 | "\n", 247 | "\n", 248 | "\n" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": { 254 | "comet_cell_id": "23c64e4f35267" 255 | }, 256 | "source": [ 257 | "Once you've completed this final task, make sure you're satisfied with your responses, complete the exercise feedback portion and submit the form." 258 | ] 259 | } 260 | ], 261 | "metadata": { 262 | "comet_paths": [ 263 | [ 264 | "4c7b42fa/ML Model Eval Assignment.ipynb", 265 | 1567249528393 266 | ], 267 | [ 268 | "008a0d50/Task_3.ipynb", 269 | 1567538778080 270 | ], 271 | [ 272 | "db4861d2/Task_3.ipynb", 273 | 1567621567465 274 | ] 275 | ], 276 | "comet_tracking": true, 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.7.3" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 2 297 | } 298 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/NOTES.txt: -------------------------------------------------------------------------------- 1 | Goal: want to be able to use fairkit learn just like you would sklearn 2 | 3 | (1) One potentially useful feature (aside from the feature of finding the best model) would be able to import fairness metrics like you can import accuracy metrics in scikit-learn. This could be especially useful since in our class project we were able to show that having the metrics themselves was useful. 4 | 5 | http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 6 | 7 | Ex scikit-learn: 8 | ``` 9 | from sklearn.metrics import roc_curve 10 | from sklearn.metrics import accuracy_score 11 | ``` 12 | 13 | Ex fairkit-learn: 14 | ``` 15 | from fklearn.fair_metrics import causal_fairness 16 | from fklearn.fair_metrics import false_postive_rate_equality 17 | ``` 18 | 19 | Sorelle A. Friedler (Haverford)'s work: 20 | 21 | This group has already implemented many of these but for the purpose of studying "fairness-enhancing interventions in machine learning" https://arxiv.org/pdf/1802.04422.pdf 22 | 23 | They have a repository with many of the metrics already implemented: 24 | https://github.com/algofairness/fairness-comparison 25 | Problems: 26 | -Not clear documentation on how to run this software as a stand alone package 27 | -How do we not overlap with what they did/ contribute something novel 28 | -Can we collaborate with them? 29 | 30 | (2) For the full model search: 31 | Modeled after http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 32 | ``` 33 | from fklearn.fair_model_selection import FairSearch 34 | 35 | class FairSearch(model_classes, protected_attributes, metrics, hyperparameters): 36 | """ 37 | Description 38 | TODO 39 | 40 | Parameters 41 | ---------- 42 | model_classes : list of stings 43 | List of sklearn model classes that one wants to search over 44 | TODO: put list of supported packages (e.g. sklearn.linear_model.LogisticRegression) 45 | - do we want to implement/support any of the fair-aware ones ourselves? 46 | 47 | protected_attributes : list of ints 48 | List of integers corresponding to the index of the protected attributes in X 49 | TODO: or strings? 50 | 51 | metrics : list of strings 52 | e.g. sklearn supported [sklearn.metrics.accuracy_score, ...] 53 | and our fairness metrics [fklearn.fair_metrics.causal_fairness] 54 | 55 | hyperparameters : list of strings 56 | e.g ['l1', 'class_weight'] 57 | TODO: will need to put in some sort of error if they don't work with the sklearn piece 58 | 59 | Attributes 60 | ---------- 61 | TODO 62 | """ 63 | 64 | def fit(self, X, y): 65 | TODO: or do we want the protected attributes in this function instead? 66 | pass 67 | 68 | ``` 69 | What do we want to return? A model, a Parateo frontier, a visulization? 70 | 71 | 72 | (3) Other great features of sklearn that we would want to include as well: 73 | 74 | sklearn.datasets (fetch and loads popular datasets) 75 | ``` 76 | fklearn.datasets.fetch_propublica 77 | fklearn.datasets.fetch_propublica_vectorized 78 | fklearn.datasets.fetch_adult 79 | fklearn.datasets.fetch_adult_vectorized 80 | ``` 81 | 82 | TODO: 83 | -How many of the "21" definitions of fairness can we implement in this package? 84 | -will eventually want to release as a pip package correct? 85 | -One of the selling points could that it works seamlessly with sklearn 86 | 87 | THOUGHTS: 88 | -Contributions of our work: (1) very user-friendly library of fairness metrics (2) fair-aware models (2) model selection with fairness as a criteria under consideration 89 | 90 | -I think usability and examples are super important. For example, I think it's a big reason why sklearn is used so heavily. Example: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 91 | 92 | -Keep all our notation very similar to fairkit-learn (maybe eventually we can get a pull from them??) 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic IO for loading fair datasets 3 | 4 | Ex: http://scikit-learn.org/stable/datasets/twenty_newsgroups.html#newsgroups 5 | 6 | TODO: should we have "vectorized" and "non-vectorized" versions here like sklearn does? 7 | """ 8 | from __future__ import division 9 | import warnings 10 | import numpy as np 11 | import sklearn 12 | import pandas as pd 13 | 14 | def fetch_adult_income(): 15 | #directly downloads the data 16 | #calls load to load the data 17 | #TODO 18 | pass 19 | 20 | def load_adult_income_train_val(): 21 | #for the user study so they can't access the test set 22 | return load_adult_income(train_val_split=0.5, notest=True) 23 | 24 | def load_adult_income(train_val_split=0.5, notest=False): 25 | """ 26 | Load files and data from the propublica dataset 27 | 28 | Parameters 29 | ---------- 30 | train_val_split : float 31 | Amount to split the training set to create a train and validation set 32 | 33 | Returns 34 | ------- 35 | data : dict 36 | With keys 37 | 38 | X : 2-d ndarray 39 | 40 | y : 1-d ndarray 41 | 42 | (or X_train, y_train, X_test, y_test if subset=='all') 43 | 44 | feat_names : list of strs 45 | List of the feature names corresponding to the indices of the columns 46 | of X 47 | 48 | attribute_map : dict of dicts of dicts 49 | Denotes the protected attributes of the category of protected 50 | attribute (e.g. "Race") and maps the attribute name to the column and value that correspond 51 | to that attribute 52 | e.g. one-hot encoding for a one-hot encoding denoting the columns ("col") and values ("val") 53 | 54 | {"Race": {"Purple": {"col": 0, "val": 1}, "Green": {"col": 1, "val": 1}}, 55 | "Sex" : {"Female": {"col": 0, "val": 1}, "Male": {"col": 1, "val": 1}} 56 | 57 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 58 | "Green": {"col": 0, "val: 2"}} 59 | 60 | Note: these MUST be mutually exclusive categories! 61 | 62 | is_categorical : boolean 63 | True if the y-values are categorical 64 | False otherwise (indicating a one-hot encoding) 65 | 66 | Examples 67 | -------- 68 | >>> from fklearn.datasets import load_adult_income 69 | """ 70 | data = {} 71 | data['is_categorical'] = False 72 | header_names = ["Age", "Workclass", "FNLWGT", "Education", "Education-Num", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per Week", "Native Country", "Income"] 73 | 74 | train_ref = pd.read_csv("../data/adult_income/train.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 75 | train_all = pd.get_dummies(train_ref) 76 | train_all.columns = train_all.columns.str.replace('_ ', '_') 77 | 78 | end_idx_train_val_split = int(np.floor(train_val_split*train_all.shape[0])) 79 | train = train_all[:end_idx_train_val_split] 80 | val = train_all[end_idx_train_val_split:] 81 | 82 | y_train = train["Income_<=50K"].copy() 83 | X_train = train.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 84 | y_val = val["Income_<=50K"].copy() 85 | X_val = val.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 86 | 87 | test_ref = pd.read_csv( "../data/adult_income/test.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 88 | test = pd.get_dummies(test_ref) 89 | test.columns = test.columns.str.replace('_ ', '_') 90 | y_test = test["Income_<=50K."].copy() 91 | X_test = test.drop(["Income_<=50K.","Income_>50K."], axis=1).copy() 92 | 93 | data['feat_names'] = [str(col) for col in X_test.columns] 94 | 95 | assert X_train.shape[1] == X_val.shape[1] == X_test.shape[1] 96 | assert X_train.shape[0] == y_train.shape[0] 97 | assert X_val.shape[0] == y_val.shape[0] 98 | assert X_test.shape[0] == y_test.shape[0] 99 | 100 | data['X_train'] = X_train.values 101 | data['y_train'] = y_train.values 102 | data['X_val'] = X_val.values 103 | data['y_val'] = y_val.values 104 | data['X_test'] = X_test.values 105 | data['y_test'] = y_test.values 106 | 107 | attribute_map = {'Race': {}, 'Sex': {}} 108 | for ii, col in enumerate(X_train): 109 | if col.startswith('Race'): 110 | attribute_map['Race'][col] = {'col': ii, 'val': 1} 111 | elif col.startswith('Sex'): 112 | attribute_map['Sex'][col] = {'col': ii, 'val': 1} 113 | data['attribute_map'] = attribute_map 114 | 115 | if notest: 116 | del data['X_test'] 117 | del data['y_test'] 118 | 119 | unprocessed_train_data = train_ref 120 | 121 | return data, unprocessed_train_data 122 | 123 | def fetch_propublica(subset='train'): 124 | """ 125 | Load files and data from the propublica dataset 126 | 127 | Parameters 128 | ---------- 129 | subset : 'train' or 'test', 'all' 130 | Select which dataset to load 131 | 132 | Returns 133 | ------- 134 | X : 2-d ndarray 135 | 136 | y : 2-d ndarray 137 | 138 | attribute_map : dict of dicts 139 | Denotes the protected attributes of the category of protected 140 | attribute (e.g. "Race") to measure causal fairness 141 | maps the attribute name to the column and value that correspond 142 | to that attribute 143 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 144 | "Green": {"col": 1, "val": 1}} 145 | 146 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 147 | "Green": {"col": 0, "val: 2"}} 148 | 149 | Note: these MUST be mutually exclusive categories! 150 | 151 | is_categorical : boolean 152 | True if the y-values are categorical 153 | False otherwise (indicating a one-hot encoding) 154 | 155 | 156 | Examples 157 | -------- 158 | >>> from fklearn.datasets import fetch_propublica 159 | 160 | """ 161 | pass 162 | 163 | if __name__ == '__main__': 164 | load_adult_income() 165 | 166 | #TODO: other dataset functions 167 | 168 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/fair_model_selection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier 3 | 4 | Inspiration: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 5 | """ 6 | from __future__ import division 7 | import warnings 8 | import six 9 | import itertools 10 | import inspect 11 | import numpy as np 12 | import numpy.ma as ma 13 | 14 | RANDOM = 'random' 15 | GRID = 'grid' 16 | THRESHOLD_STR = 'threshold' 17 | 18 | SEARCH_STRATEGIES = [GRID, RANDOM] 19 | 20 | def filtered_arguments(func): 21 | required_args = six.viewkeys(inspect.signature(func).parameters) 22 | 23 | def inner(*args, **kwargs): 24 | kwargs = { k:v for k,v in six.iteritems(kwargs) if k in required_args } 25 | return func(*args, **kwargs) 26 | return inner 27 | 28 | class FairSearch(): 29 | """ 30 | Description 31 | TODO 32 | 33 | Parameters 34 | ---------- 35 | models : dict 36 | Dictionary of model names as keys and instantiations of model objects as values. 37 | e.g. { 38 | 'SVC': sklearn.svm.SVC(), 39 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 40 | } 41 | 42 | metrics : dict 43 | Dictionary of sklearn and fklearn fairness metrics 44 | e.g. { 45 | 'Causal': fklearn.fair_metrics.causal_discrimination_score, 46 | 'Accuracy': sklearn.metrics.accuracy_score 47 | } 48 | 49 | parameters : dict of dicts of lists 50 | Dictionary with model names as keys and hyperparameter dicts as values. 51 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 52 | settings to try as values. 53 | e.g. { 54 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 55 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 56 | } 57 | 58 | 59 | thresholds : list of floats 60 | List of classifation thresholds to be applied to all classifiers. 61 | Usage is for classifiers that output a probability, rather than a 62 | hard classification. 63 | e.g. [i * 1.0/100 for i in range(100)] 64 | 65 | Attributes 66 | ---------- 67 | pareto_optimal_results : dict of masked arrays 68 | Keys strings describing the model parameter or score metric. 69 | e.g. {'param_C': masked_array(data = [0, --], mask = [False, True]), 70 | 'param_L1': masked_array(data = [0, --], mask = [False, True]), 71 | 'train_causal_fairness_score' : [0.8, 0.7], 72 | 'val_causal_fairness_score' : [0.71, 0.64], 73 | 'test_causal_fairness_score' : [0.7, 0.65], 74 | 'train_accuracy_score' : [0.6, 0.8], 75 | 'val_accuracy_score' : [0.57, 0.81], 76 | 'test_accuracy_score' : [0.55, 0.78], 77 | 'fit_time' : [0.08, 1.1]} 78 | 79 | Examples 80 | -------- 81 | >>> from fklearn.fair_model_selection import FairSearch 82 | """ 83 | 84 | def __init__(self, models, fairness_metrics, performance_metrics, parameters, thresholds): 85 | self.models = models 86 | self.fairness_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(fairness_metrics) } 87 | self.performance_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(performance_metrics) } 88 | self.parameters = parameters 89 | self.thresholds = thresholds 90 | self.search_results = {} 91 | self.pareto_optimal_results = {} 92 | 93 | def _build_grid_param_arrays(self): 94 | self.n_experiments = 0 95 | attribute_categories = [] 96 | 97 | for key, _ in six.iteritems(self.models): 98 | model = self.models[key] 99 | keys, values = zip(*self.parameters[key].items()) 100 | keys = keys + (THRESHOLD_STR, ) 101 | values = values + (self.thresholds, ) 102 | attribute_categories.extend(keys) 103 | self.n_experiments += len([dict(zip(keys, v)) for v in itertools.product(*values)]) 104 | 105 | for attribute in list(set(attribute_categories)): 106 | self.search_results["param_" + attribute] = [np.nan] * self.n_experiments 107 | 108 | return 109 | 110 | def _build_score_arrays(self, data): 111 | scores = {} 112 | 113 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 114 | for fairness_metric, _ in six.iteritems(self.fairness_metrics): 115 | self.search_results["score_" + protected_attribute + "_" + fairness_metric] = [np.nan] * self.n_experiments 116 | for performance_metric, _ in six.iteritems(self.performance_metrics): 117 | self.search_results["score_" + performance_metric] = [np.nan] * self.n_experiments 118 | 119 | return 120 | 121 | def _fit_grid(self, data, verbose=False, n_train_samples=None, n_val_samples=None): 122 | #TODO add verbose functionality 123 | i = -1 124 | args_dict = {} 125 | 126 | if n_train_samples: 127 | train_idx = np.random.choice(data["X_train"].shape[0], n_train_samples, replace=False) 128 | X_train = data["X_train"][train_idx, :] 129 | y_train = data["y_train"][train_idx] 130 | else: 131 | X_train = data["X_train"] 132 | y_train = data["y_train"] 133 | 134 | if n_val_samples: 135 | val_idx = np.random.choice(data["X_val"].shape[0], n_val_samples, replace=0) 136 | X_val = data["X_val"][val_idx, :] 137 | y_val = data["y_val"][val_idx] 138 | else: 139 | X_val = data["X_val"] 140 | y_val = data["y_val"] 141 | 142 | args_dict["X"] = X_val 143 | args_dict["y_true"] = y_val 144 | 145 | for model_key, model_family in six.iteritems(self.models): 146 | parameter_keys, parameter_values = zip(*self.parameters[model_key].items()) 147 | experiments = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 148 | for experiment in experiments: 149 | # Train Model 150 | model = model_family(**experiment) 151 | model = model.fit(X_train, y_train) 152 | args_dict["y_pred_proba"] = model.predict_proba(X_val)[:, 1] 153 | args_dict["trained_model"] = model 154 | 155 | for threshold in self.thresholds: 156 | 157 | args_dict["threshold"] = threshold 158 | args_dict["y_pred"] = args_dict["y_pred_proba"] > threshold 159 | 160 | i += 1 161 | self.search_results["param_threshold"][i] = threshold 162 | # Fill in parameter values 163 | for experiment_key, experiment_value in six.iteritems(experiment): 164 | self.search_results["param_" + experiment_key][i] = experiment_value 165 | 166 | # Evaluate Model 167 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 168 | args_dict["attribute_map"] = data["attribute_map"][protected_attribute] 169 | for fairness_metric, fairness_metric_function in six.iteritems(self.fairness_metrics): 170 | self.search_results["score_" + protected_attribute + "_" + fairness_metric][i] = fairness_metric_function(**args_dict) 171 | 172 | for performance_metric, performance_metric_function in six.iteritems(self.performance_metrics): 173 | self.search_results["score_" + performance_metric][i] = performance_metric_function(**args_dict) 174 | 175 | for key, value in six.iteritems(self.search_results): 176 | # Hacky way to check for nans, but other ways seemed to break 177 | mask = [j != j for j in self.search_results[key]] 178 | self.search_results[key] = ma.array(self.search_results[key], mask=mask) 179 | 180 | self.pareto_optimal_results = self.filter_solution_set() 181 | 182 | def fit(self, data, verbose=1, search_strategy=GRID, n_random_models=None, n_train_samples=None, n_val_samples=None): 183 | """ 184 | Based in part on http://www.codiply.com/blog/hyperparameter-grid-search-across-multiple-models-in-scikit-learn/ 185 | 186 | Parameters 187 | ---------- 188 | X : 2d array-like 189 | Training dataset where rows are instances and columns are features. 190 | 191 | y : 1d array-like 192 | Classification labels 193 | 194 | 195 | attribute_map : dict of dicts 196 | denotes the protected attributes of the category of protected 197 | attribute (e.g. "Race") to measure causal fairness 198 | maps the attribute name to the column and value that correspond 199 | to that attribute 200 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 201 | "Green": {"col": 1, "val": 1}} 202 | 203 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 204 | "Green": {"col": 0, "val: 0"}} 205 | 206 | Note: these MUST be mutually exclusive categories! 207 | 208 | is_categorical : bool (optional) 209 | denotes whether the attribute map represents a categorical encoding. If False 210 | we assume that the encoding is one-hot. 211 | 212 | max_models : None or int 213 | If None, return the entire Pareto frontier of models 214 | Otherwise, return int number of models, ties will be broken randomly 215 | 216 | search_strategy : str 217 | 'random', a random search over models/hyperparameters 218 | 'grid', enumerates the space of models/hyperparameters 219 | 'genetic_algorithms', uses genetic algorithms 220 | """ 221 | 222 | assert search_strategy in SEARCH_STRATEGIES 223 | 224 | if search_strategy == RANDOM: 225 | assert n_random_models > 0 226 | #TODO 227 | 228 | 229 | if search_strategy == GRID: 230 | self._build_grid_param_arrays() 231 | self._build_score_arrays(data) 232 | self._fit_grid(data, verbose=verbose, n_train_samples=n_train_samples, n_val_samples=n_val_samples) 233 | 234 | 235 | def filter_solution_set(self, omitted_score_list=[]): 236 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 237 | assert(self.search_results) 238 | 239 | costs = -1 * np.array([v for k,v in six.iteritems(self.search_results) if ((k[:5] == "score") & (k[6:] not in omitted_score_list))]).T 240 | 241 | is_efficient = np.ones(costs.shape[0], dtype = bool) 242 | for i, c in enumerate(costs): 243 | if is_efficient[i]: 244 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 245 | 246 | return { k:v[is_efficient] for k,v in six.iteritems(self.search_results)} 247 | 248 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/fair_selection_aif.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier using AIF360 metrics 3 | """ 4 | import six 5 | import itertools 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from fklearn.ml_pipeline import MLPipeline 10 | from fklearn.fair_model_selection import filtered_arguments 11 | from aif360.algorithms.inprocessing import AdversarialDebiasing 12 | 13 | # MANDATORY hyperparmaeters for adversarial debiasing 14 | def DEFAULT_ADB_PARAMS(privileged, unprivileged): 15 | """ 16 | Create a dictionary of mandatory hyperparameters for adversarial debiasing 17 | """ 18 | 19 | return {'unprivileged_groups': [unprivileged], 'privileged_groups': [privileged], 20 | 'scope_name': ['adb'], 'sess': [tf.Session()]} 21 | 22 | class ModelSearch(object): 23 | """ 24 | 25 | Parameters 26 | ---------- 27 | models : dict 28 | Dictionary of model names as keys and instantiations of model objects as values. 29 | e.g. { 30 | 'SVC': sklearn.svm.SVC(), 31 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 32 | } 33 | 34 | metrics : dict[str, (MetricClass, str)] 35 | Dictionary of sklearn/AIF360 fairness metrics. The keys are the display names of the metrics, and 36 | the values are 2-tuples with the first element containing the metric class object, and the second 37 | containing the name of the metric function to evaluate. 38 | e.g. { 39 | 'ClassificationMetric': (aif360.metrics.ClassificationMetric, 'num_generalized_true_positives'), 40 | 'BinaryLabelDatasetMetric': (aif360.metrics.BinaryLabelDatasetMetric, 'disparate_impact') 41 | } 42 | 43 | hyperparameters : dict of dicts of lists 44 | Dictionary with model names as keys and hyperparameter dicts as values. 45 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 46 | settings to try as values. 47 | e.g. { 48 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 49 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 50 | } 51 | 52 | 53 | thresholds : list of floats 54 | List of classifation thresholds to be applied to all classifiers. 55 | Usage is for classifiers that output a probability, rather than a 56 | hard classification. 57 | e.g. [i * 1.0/100 for i in range(100)] 58 | """ 59 | 60 | def __init__(self, models, metrics, hyperparameters, thresholds): 61 | self.models = models 62 | self.metrics = metrics 63 | self.hyperparameters = hyperparameters 64 | self.thresholds = thresholds 65 | self.search_results = [] 66 | self.pareto_optimal_results = [] 67 | 68 | def grid_search(self, dataset, privileged=[], unprivileged=[], test_frac=0.3, preprocessors=[], postprocessors=[]): 69 | """ 70 | Performs a grid search over the specified model + hyperparameter pairs, calculating metric evalutations for each model. 71 | 72 | Args: 73 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 74 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 75 | privileged (list[dict]): A list of dictionaries containing privileged groups 76 | unprivileged (list[dict]): A list of dictionaries containing unprivileged groups 77 | """ 78 | 79 | # If any pre/postprocessors were supplied, add the option for None by default 80 | preprocessors += [None] 81 | postprocessors += [None] 82 | 83 | self.model_id = 0 84 | 85 | # Try each unique model 86 | for model_name, ModelClass in six.iteritems(self.models): 87 | 88 | # If no hyperparameters were specified, use the defaults. Otherwise setup a grid search 89 | if len(self.hyperparameters[model_name]) == 0: 90 | param_list = [{}] 91 | else: 92 | parameter_keys, parameter_values = zip(*self.hyperparameters[model_name].items()) 93 | param_list = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 94 | 95 | # Grid search through hyperparameters in the current model 96 | for param_set in param_list: 97 | 98 | model = ModelClass(**param_set) 99 | 100 | # Go through each combination of pre/post processing algorithms 101 | for preprocessor, postprocessor in itertools.product(preprocessors, postprocessors): 102 | 103 | mlp = MLPipeline(model, privileged=privileged, unprivileged=unprivileged, preprocessor=preprocessor, postprocessor=postprocessor) 104 | 105 | # Create a new search result for each threshold value 106 | for threshold in self.thresholds: 107 | 108 | if model_name == 'AdversarialDebiasing': 109 | mlp.model.scope_name = str(self.model_id) 110 | self.model_id += 1 111 | 112 | mlp.model.sess.close() 113 | tf.reset_default_graph() 114 | mlp.model.sess = tf.Session() 115 | 116 | mlp.fit(dataset, test_frac=test_frac, threshold=threshold) 117 | search_result = {'model_class': model_name, 118 | 'hyperparameters': param_set, 119 | 'preprocessor': type(preprocessor).__name__ if preprocessor else 'None', 120 | 'postprocessor': type(postprocessor).__name__ if postprocessor else 'None', 121 | 'metrics': {} 122 | } 123 | 124 | # Populate metrics for this search result 125 | for metric_name, metric_args in six.iteritems(self.metrics): 126 | 127 | # The first metric argument is the Metric Class itself. The rest are the names of 128 | # submetric evaluation functions 129 | MetricClass = metric_args[0] 130 | 131 | for metric_fn in metric_args[1:]: 132 | metric_val = mlp.evaluate(MetricClass, metric_fn) 133 | metric_category = '{} ({})'.format(metric_name, metric_fn) 134 | search_result['metrics'][metric_category] = metric_val 135 | 136 | self.search_results.append(search_result) 137 | 138 | self.pareto_optimal_results = self.__filter_solution_set() 139 | 140 | def __filter_solution_set(self): 141 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 142 | assert(self.search_results) 143 | 144 | costs = -1 * np.array([[v for _, v in six.iteritems(result['metrics'])] for result in self.search_results]) 145 | 146 | is_efficient = np.ones(costs.shape[0], dtype = bool) 147 | for i, c in enumerate(costs): 148 | if is_efficient[i]: 149 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 150 | 151 | return [result for i, result in enumerate(self.search_results) if is_efficient[i]] 152 | 153 | def to_csv(self, filename): 154 | """ 155 | Exports the search results as a CSV file 156 | 157 | Args: 158 | filename (str): The name of the file to save the results to 159 | Raises: 160 | AttributeError: If a grid search has not yet been performed, an AttributeError will be raised 161 | """ 162 | 163 | import csv 164 | 165 | if len(self.search_results) == 0: 166 | raise AttributeError("A grid search must be performed before exporting results to CSV") 167 | 168 | # Compute CSV headers for all metrics in the search results 169 | metric_headers = { metric for res in self.pareto_optimal_results for metric in res['metrics'] } 170 | 171 | with open(filename, mode='w') as csv_file: 172 | headers = ['model', 'hyperparameters', 'preprocessor', 'postprocessor', *list(metric_headers)] 173 | writer = csv.DictWriter(csv_file, fieldnames=headers, lineterminator='\n') 174 | writer.writeheader() 175 | 176 | for result in self.pareto_optimal_results: 177 | metric_dict = {metric_name: metric_val for metric_name, metric_val in six.iteritems(result['metrics'])} 178 | 179 | writer.writerow({'model': result['model_class'], 180 | 'preprocessor': result['preprocessor'], 181 | 'postprocessor': result['postprocessor'], 182 | 'hyperparameters': repr(result['hyperparameters'] or 'Default (see sklearn docs)'), 183 | **metric_dict}) 184 | 185 | 186 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/metric_library.py: -------------------------------------------------------------------------------- 1 | from aif360.metrics import ClassificationMetric 2 | from sklearn.metrics import accuracy_score as accuracy 3 | import math 4 | 5 | def classifier_quality_score(model, test_data, 6 | unprivileged_groups, 7 | privileged_groups): 8 | 9 | classified_data = model.predict(test_data) 10 | metric_library = UnifiedMetricLibrary(test_data, classified_data, unprivileged_groups, privileged_groups) 11 | 12 | # call all metrics 13 | 14 | #accuracy 15 | 16 | acc = metric_library.accuracy_score() 17 | 18 | #fairness 19 | fairness_scores = [] 20 | 21 | # equal opportunity difference 22 | eq_opp_diff = metric_library.equal_opportunity_difference() 23 | fairness_scores.append(eq_opp_diff) 24 | 25 | # average odds difference 26 | avg_odds_diff = metric_library.average_odds_difference() 27 | fairness_scores.append(avg_odds_diff) 28 | 29 | # statistical parity difference 30 | stat_parity_diff = metric_library.statistical_parity_difference() 31 | fairness_scores.append(stat_parity_diff) 32 | 33 | # average odds difference 34 | avg_odds_diff = metric_library.average_odds_difference() 35 | fairness_scores.append(avg_odds_diff) 36 | 37 | # calculate & return overall quality score 38 | max_fair_score = max(fairness_scores) 39 | balance_val = acc * (1-max_fair_score) 40 | 41 | return math.sqrt(balance_val) 42 | 43 | 44 | class UnifiedMetricLibrary(): 45 | 46 | def __init__(self, test_data, classified_data, unprivileged_groups, privileged_groups): 47 | 48 | self.test_data = test_data 49 | self.classified_data = classified_data 50 | 51 | self.classification_metric = ClassificationMetric(test_data, classified_data, unprivileged_groups, privileged_groups) 52 | 53 | def accuracy_score(self): 54 | return accuracy(self.test_data.labels, self.classified_data.labels) 55 | 56 | def num_true_positives(self): 57 | return self.classification_metric.num_true_positives() 58 | 59 | def num_false_positives(self): 60 | return self.classification_metric.num_false_positives() 61 | 62 | def num_false_negatives(self): 63 | return self.classification_metric.num_false_negatives() 64 | 65 | def num_generalized_true_positives(self): 66 | return self.classification_metric.num_generalized_true_positives() 67 | 68 | def num_generalized_false_positives(self): 69 | return self.classification_metric.num_generalized_false_positives() 70 | 71 | def num_generalized_false_negatives(self): 72 | return self.classification_metric.num_generalized_false_negatives() 73 | 74 | def num_generalized_true_negatives(self): 75 | return self.classification_metric.num_generalized_true_negatives() 76 | 77 | def true_positive_rate(self): 78 | return self.classification_metric.true_positive_rate() 79 | 80 | def false_positive_rate(self): 81 | return self.classification_metric.false_positive_rate() 82 | 83 | def false_negative_rate(self): 84 | return self.classification_metric.false_negative_rate() 85 | 86 | def true_negative_rate(self): 87 | return self.classification_metric.true_negative_rate() 88 | 89 | def generalized_true_positive_rate(self): 90 | return self.classification_metric.generalized_true_positive_rate() 91 | 92 | def generalized_false_positive_rate(self): 93 | return self.classification_metric.generalized_false_positive_rate() 94 | 95 | def generalized_false_negative_rate(self): 96 | return self.classification_metric.generalized_false_negative_rate() 97 | 98 | def generalized_true_negative_rate(self): 99 | return self.classification_metric.generalized_true_negative_rate() 100 | 101 | def positive_predictive_value(self): 102 | return self.classification_metric.positive_predictive_value() 103 | 104 | def false_discovery_rate(self): 105 | return self.classification_metric.false_discovery_rate() 106 | 107 | def false_omission_rate(self): 108 | return self.classification_metric.false_omission_rate() 109 | 110 | def negative_predictive_value(self): 111 | return self.classification_metric.negative_predictive_value() 112 | 113 | def error_rate(self): 114 | return self.classification_metric.error_rate() 115 | 116 | def false_positive_rate_difference(self): 117 | return self.classification_metric.false_positive_rate_difference() 118 | 119 | def false_negative_rate_difference(self): 120 | return self.classification_metric.false_negative_rate_difference() 121 | 122 | def false_omission_rate_difference(self): 123 | return self.classification_metric.false_omission_rate_difference() 124 | 125 | def false_discovery_rate_difference(self): 126 | return self.classification_metric.false_discovery_rate_difference() 127 | 128 | def false_positive_rate_ratio(self): 129 | return self.classification_metric.false_positive_rate_ratio() 130 | 131 | def false_negative_rate_ratio(self): 132 | return self.classification_metric.false_negative_rate_ratio() 133 | 134 | def false_omission_rate_ratio(self): 135 | return self.classification_metric.false_omission_rate_ratio() 136 | 137 | def false_discovery_rate_ratio(self): 138 | return self.classification_metric.false_discovery_rate_ratio() 139 | 140 | def average_abs_odds_difference(self): 141 | return self.classification_metric.average_abs_odds_difference() 142 | 143 | def error_rate_difference(self): 144 | return self.classification_metric.error_rate_difference() 145 | 146 | def error_rate_ratio(self): 147 | return self.classification_metric.error_rate_ratio() 148 | 149 | def num_pred_positives(self): 150 | return self.classification_metric.num_pred_positives() 151 | 152 | def num_pred_negatives(self): 153 | return self.classification_metric.num_pred_negatives() 154 | 155 | def selection_rate(self): 156 | return self.classification_metric.selection_rate() 157 | 158 | def equal_opportunity_difference(self): 159 | return abs(self.classification_metric.equal_opportunity_difference()) 160 | 161 | def average_odds_difference(self): 162 | return abs(self.classification_metric.average_odds_difference()) 163 | 164 | def disparate_impact(self): 165 | return abs(self.classification_metric.disparate_impact()) 166 | 167 | def statistical_parity_difference(self): 168 | return abs(self.classification_metric.statistical_parity_difference()) 169 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/ml_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import StandardScaler 3 | 4 | from aif360.metrics import ClassificationMetric 5 | from aif360.algorithms import Transformer 6 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing 7 | 8 | class MLPipeline(object): 9 | 10 | """ 11 | Defines a machine-learning pipeline for evaluating fairness in predictors. For usage, see example at the bottom of the file. 12 | 13 | Args: 14 | model (sklearn.model | aif360.algorithms.inprocessing): An sklearn predictor OR an AIF360 inprocessing algorithm 15 | privileged (list[dict[str, float]]): A list of dictionaries with keys representing privileged attribute + value pairs 16 | unprivileged (list[dict[str, float]]): A list of dictionaries with keys representing unprivileged attribute + value pairs 17 | preprocessor (aif360.algorithms.preprocessing): An instance of an AIF360 preprocessing algorithm 18 | postprocessor (aif360.algorithms.postprocessing): An instance of an AIF360 postprocessing algorithm 19 | """ 20 | 21 | def __init__(self, model, privileged=[], unprivileged=[], preprocessor=None, postprocessor=None): 22 | self.model = model 23 | self.privileged = privileged 24 | self.unprivileged = unprivileged 25 | self.preprocessor = preprocessor 26 | self.postprocessor = postprocessor 27 | self.dataset_train = [] 28 | self.dataset_test = [] 29 | self.test_predictions = [] 30 | 31 | 32 | def fit(self, dataset, test_frac=0.3, threshold=0.5, feature_scaling=False): 33 | """ 34 | Trains our model on the dataset. Uses different control flow depending on if we are using an 35 | sklearn model or an AIF360 inprocessing algorithm 36 | 37 | Args: 38 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 39 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 40 | threshold (float): A real number between 0 and 1 denoting the threshold of acceptable class imbalance 41 | """ 42 | 43 | if test_frac < 0 or test_frac > 1: 44 | raise ValueError("Parameter test_frac must be between 0 and 1") 45 | 46 | dataset_train, dataset_test = dataset.split([1-test_frac], shuffle=False) 47 | 48 | # If a preprocessing algorithm was supplied, apply that transformations first 49 | if self.preprocessor: 50 | dataset_train = self.preprocessor.fit_transform(dataset_train) 51 | dataset_test = self.preprocessor.fit_transform(dataset_test) 52 | 53 | self.dataset_train = dataset_train 54 | self.dataset_test = dataset_test 55 | 56 | self.__fit_inprocessing(threshold, feature_scaling) 57 | 58 | def __fit_inprocessing(self, threshold, feature_scaling): 59 | """ 60 | Trains an AIF360 inprocessing model on the provided dataset. 61 | 62 | Args: 63 | """ 64 | 65 | # Apply feature scaling if specified 66 | if feature_scaling: 67 | scaler = StandardScaler().fit(self.dataset_train.features) 68 | self.dataset_train.features = scaler.fit_transform(self.dataset_train.features) 69 | self.dataset_test.features = scaler.transform(self.dataset_test.features) 70 | 71 | self.model.fit(self.dataset_train) 72 | 73 | 74 | # Make our predictions, without thresholds for now 75 | dataset_test_pred = self.model.predict(self.dataset_test) 76 | 77 | # If a postprocessing algorithm was specified, transform the test results 78 | if self.postprocessor: 79 | dataset_test_pred = self.postprocessor.fit(self.dataset_test, dataset_test_pred) \ 80 | .predict(dataset_test_pred) 81 | 82 | self.classified_data = dataset_test_pred 83 | 84 | 85 | def evaluate(self, metric, submetric): 86 | """ 87 | Evaluates an AIF360 metric against the trained model. 88 | 89 | Args: 90 | metric (aif360.metrics.Metric): An AIF360 metric class 91 | submetric (str): A string denoting the metric evaluation function that is to be called on the provided metric class 92 | Returns: 93 | float: A float denoting the performance of each method evaluation within the specified metric on the trained model 94 | Raises: 95 | AttributeError: If a model has not been trained yet, or 96 | If the provided submetric function does not exist on the metric class, or 97 | If the provided submetric function contains arguments other than "privileged" 98 | 99 | """ 100 | 101 | from inspect import signature 102 | import re 103 | 104 | if not self.dataset_train: 105 | raise AttributeError("A model must be fit before evaluating a metric") 106 | 107 | curr_metric = metric(self.dataset_test, self.classified_data, unprivileged_groups=self.unprivileged, privileged_groups=self.privileged) 108 | 109 | # Retrieve the callable evalation function 'submetric' of this metric instance 110 | submetric_fn = getattr(curr_metric, submetric) 111 | 112 | return submetric_fn() 113 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/sample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.svm import SVC 5 | from sklearn.linear_model import LogisticRegression 6 | 7 | from sklearn.metrics import accuracy_score 8 | from fair_metrics import causal_discrimination_score, group_discrimination_score, false_positive_rate_equality, false_negative_rate_equality 9 | from fair_model_selection import FairSearch 10 | 11 | from datasets import load_adult_income 12 | 13 | import os 14 | 15 | os.chdir("fklearn/") 16 | 17 | 18 | data = load_adult_income() 19 | models = {'LogisticRegression': LogisticRegression} 20 | metrics = {'Causal': group_discrimination_score, 'Accuracy': accuracy_score} 21 | parameters = { 22 | # 'SVC': {'kernel': ['rbf'], 'C': [1, 10], 'probability': [True]}, 23 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 24 | } 25 | 26 | thresholds = [i * 1.0/100 for i in range(10)] 27 | Search = FairSearch(models, metrics, metrics, parameters, thresholds) 28 | Search.fit(data[0]) 29 | 30 | print(Search) 31 | 32 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/sample_aif.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn.svm import SVC 7 | 8 | from aif360.datasets import AdultDataset, GermanDataset 9 | from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric 10 | from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools 11 | from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions import get_distortion_adult 12 | from aif360.algorithms.preprocessing import DisparateImpactRemover, LFR, Reweighing, OptimPreproc 13 | from aif360.algorithms.inprocessing import AdversarialDebiasing 14 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing, EqOddsPostprocessing, RejectOptionClassification 15 | 16 | from fair_selection_aif import AIF360Search, DEFAULT_ADB_PARAMS 17 | 18 | import os 19 | 20 | dataset = GermanDataset() 21 | models = {'LogisticRegression': LogisticRegression, 'KNeighborsClassifier': KNeighborsClassifier} 22 | metrics = {'ClassificationMetric': [ClassificationMetric, 23 | 'num_generalized_true_positives', 24 | 'num_true_negatives', 25 | 'false_positive_rate', 26 | 'false_negative_rate', 27 | 'generalized_false_positive_rate' 28 | ] 29 | # 'BinaryLabelDatasetMetric': [BinaryLabelDatasetMetric, 'disparate_impact'] 30 | } 31 | unprivileged = [{'age': 0, 'sex': 0}] 32 | privileged = [{'age': 1, 'sex': 1}] 33 | preprocessor_args = {'unprivileged_groups': unprivileged, 'privileged_groups': privileged} 34 | 35 | # Hyperparameters may either be specified as a dictionary of string to lists, or by an empty dictionary to 36 | # use the default ones set by sklearn (or AIF360). The keys are the names of the hyperparameters, and the 37 | # values and lists of possible values to form a grid search over 38 | parameters = { 39 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [0.1, 0.5, 1]}, 40 | 'KNeighborsClassifier': {} 41 | } 42 | thresholds = [i * 10.0/100 for i in range(5)] 43 | preprocessors=[DisparateImpactRemover(), Reweighing(**preprocessor_args)] 44 | postprocessors=[CalibratedEqOddsPostprocessing(**preprocessor_args), EqOddsPostprocessing(**preprocessor_args), RejectOptionClassification(**preprocessor_args)] 45 | 46 | Search = AIF360Search(models, metrics, parameters, thresholds) 47 | Search.grid_search(dataset, privileged=privileged, unprivileged=unprivileged, preprocessors=preprocessors, postprocessors=postprocessors) 48 | 49 | Search.to_csv("interface/static/data/test-file.csv") 50 | 51 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_1_Notebooks/fklearn/scikit_learn_wrapper.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression as lr 2 | from sklearn.neighbors import KNeighborsClassifier as knc 3 | from sklearn.ensemble import RandomForestClassifier as rfc 4 | from sklearn.svm import SVC as svc 5 | 6 | class ScikitLearnWrapper(): 7 | 8 | def __init__(self, model_class, **kwargs): 9 | self.model = model_class(**kwargs) 10 | 11 | 12 | def fit(self, dataset_train): 13 | self.model.fit(dataset_train.features, dataset_train.labels.ravel()) 14 | 15 | def predict(self, dataset_test): 16 | 17 | dataset_test_pred = dataset_test.copy() 18 | dataset_test_pred.labels = self.model.predict(dataset_test.features).reshape(-1,1) 19 | 20 | return dataset_test_pred 21 | 22 | 23 | LogisticRegression = lambda **kwargs : ScikitLearnWrapper(lr,**kwargs) 24 | KNeighborsClassifier = lambda **kwargs : ScikitLearnWrapper(knc,**kwargs) 25 | RandomForestClassifier = lambda **kwargs : ScikitLearnWrapper(rfc,**kwargs) 26 | SVC = lambda **kwargs : ScikitLearnWrapper(svc,**kwargs) 27 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/Task_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "comet_cell_id": "b4bcefbe0a41f" 7 | }, 8 | "source": [ 9 | "Before beginning task 2, make sure to run the following cell to import all necessary packages. If you need any additional packages, add the import statement(s) to the cell below and re-run the cell before adding and running code that uses the additional packages. \n", 10 | "\n", 11 | "**For this task you are only allowed to use functionality provided by scikit-learn to train and evaluate your models. If you have your own custom code you would like to add to evaluate your models, you may do so (without using functionality provided by the tools used in the previous tasks).**\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "comet_cell_id": "2c7cd3e562e7" 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "# Load all necessary packages\n", 23 | "import numpy as np\n", 24 | "import sklearn as skl\n", 25 | "import six\n", 26 | "\n", 27 | "# dataset\n", 28 | "from aif360.datasets import GermanDataset\n", 29 | "\n", 30 | "# models\n", 31 | "from sklearn.linear_model.logistic import LogisticRegression \n", 32 | "from sklearn.neighbors import KNeighborsClassifier\n", 33 | "from sklearn.ensemble import RandomForestClassifier \n", 34 | "from sklearn.svm import SVC \n", 35 | "\n", 36 | "# metric\n", 37 | "from sklearn.metrics import accuracy_score" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "comet_cell_id": "46991f420accb" 44 | }, 45 | "source": [ 46 | "# Tutorial 2: scikit-learn\n", 47 | "\n", 48 | "Now we show you how to train and evaluate models using scikit-learn. You will use the knowledge from this tutorial to complete Task 2, so please read thoroughly and execute the code cells in order.\n", 49 | "\n", 50 | "## Step 1: Import the dataset\n", 51 | "\n", 52 | "First we need to import the dataset we will use for training and testing our model.\n", 53 | "\n", 54 | "Below, we provide code that imports the German credit dataset. **Note: a warning may pop up when you run this cell. As long as you don't see any errors in the code, it is fine to continue.**\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "comet_cell_id": "936840797dfba" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "data_orig = GermanDataset()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "comet_cell_id": "1f7cb8ab1c822" 72 | }, 73 | "source": [ 74 | "## Step 2: Split the dataset into train and test data\n", 75 | "\n", 76 | "Now that the dataset has been imported, we need to split the original dataset into training and test data. \n", 77 | "\n", 78 | "The code to do so is as follows:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "comet_cell_id": "8f3e98f0712d1" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "data_orig_train, data_orig_test = data_orig.split([0.7], shuffle=False)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "comet_cell_id": "8e6392efce817" 96 | }, 97 | "source": [ 98 | "## Step 3: Initialize model \n", 99 | "\n", 100 | "Next, we need to initialize our model. We can initialize a model with the default parameters (see documentation), no parameters (which initializes with default parameter values), or we can modify parameter values.\n", 101 | "\n", 102 | "For the tutorial, we use the Logistic Regression model with default hyper-parameter values; you will be able to use any of the scikit-learn models listed above, and modify hyper-parameter values, when completing the exercise. \n", 103 | "\n", 104 | "Below we provide code for initialzing the Logistic Regression model, with default hyper-parameter values. We also provide (commented) code that reminds you of how to initialize each model available during this exercise." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "comet_cell_id": "e89b66337a2a6" 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "# model is populated with default values; modifying parameters is allowed but optional\n", 116 | "model = LogisticRegression(penalty='l2', dual=False,tol=0.0001,C=1.0,\n", 117 | " fit_intercept=True,intercept_scaling=1,class_weight=None,\n", 118 | " random_state=None,solver='liblinear',max_iter=100, \n", 119 | " multi_class='warn',verbose=0,warm_start=False,\n", 120 | " n_jobs=None)\n", 121 | "\n", 122 | "#model = KNeighborsClassifier(n_neighbors=5,weights='uniform',algorithm='auto',\n", 123 | "# leaf_size=30,p=2,metric='minkowski',metric_params=None,\n", 124 | "# n_jobs=None)\n", 125 | "\n", 126 | "#model = RandomForestClassifier(n_estimators='warn',criterion='gini',max_depth=None,\n", 127 | "# min_samples_leaf=1,min_weight_fraction_leaf=0.0,\n", 128 | "# min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, \n", 129 | "# random_state=None, verbose=0, warm_start=False, class_weight=None)\n", 130 | "\n", 131 | "#model = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, \n", 132 | "# probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, \n", 133 | "# max_iter=-1, decision_function_shape='ovr', random_state=None)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "comet_cell_id": "7bba44eb7e995" 140 | }, 141 | "source": [ 142 | "## Step 4: Train the model\n", 143 | "\n", 144 | "After initialing the model, we train it using the training dataset. \n", 145 | "\n", 146 | "Below we provide code that prepares our dataset to be used with scikit-learn and trains the model using our prepared data." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "comet_cell_id": "470c3e83a0934" 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "# prepare data for use with scikit-learn\n", 158 | "from sklearn.preprocessing import StandardScaler\n", 159 | "\n", 160 | "scaler = StandardScaler()\n", 161 | "\n", 162 | "x_train = scaler.fit_transform(data_orig_train.features)\n", 163 | "y_train = data_orig_train.labels.ravel()\n", 164 | "\n", 165 | "\n", 166 | "model.fit(x_train, y_train)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": { 172 | "comet_cell_id": "483779469d731" 173 | }, 174 | "source": [ 175 | "## Step 5: Evaluate the model\n", 176 | "\n", 177 | "Now we're ready to evaluate your trained model with the test data using the performance metric provided by scikit-learn.\n", 178 | "\n", 179 | "Below we provide code snippets that show how to evaluate a model's performance using scikit-learn." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 1, 185 | "metadata": { 186 | "comet_cell_id": "f3c98baf23fd4" 187 | }, 188 | "outputs": [ 189 | { 190 | "ename": "NameError", 191 | "evalue": "name 'lr' is not defined", 192 | "output_type": "error", 193 | "traceback": [ 194 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 195 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 196 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_orig_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_orig_test\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 197 | "\u001b[0;31mNameError\u001b[0m: name 'lr' is not defined" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "x_test = scaler.fit_transform(data_orig_test.features)\n", 203 | "\n", 204 | "predictions = model.predict(x_test)\n", 205 | "accuracy = accuracy_score(data_orig_test.labels.ravel(), predictions)\n", 206 | "\n", 207 | "print ('Accuracy = ' + str(accuracy))\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": { 213 | "comet_cell_id": "f42e810454232" 214 | }, 215 | "source": [ 216 | "# Task 2: Model evaluation with scikit-learn\n", 217 | "\n", 218 | "Your turn! Use what you learned in the above tutorial to train and evaluate models for performance, fairness, and overall quality. You will use functionality provided by scikit-learn to meet the following goals:\n", 219 | "\n", 220 | "1. **Describe a model you believe will perform the best (e.g., have the highest accuracy score).** \n", 221 | "\n", 222 | "2. **Describe a model you believe will be the most fair, regardless of performance.** \n", 223 | "\n", 224 | "3. **Describe a model you believe will best balance both performance and fairness.** \n", 225 | "\n", 226 | "Make sure you include any modifications to model hyper-parameters. **As a reminder, there is no \"absolute best\" model for each of the above goals. You are expected to explore the space of model configurations available to find a model that best meets the above goals.**\n", 227 | "\n", 228 | "**Keep in mind, training machine learning models is often a time intensive endeavor.** One way you can minimize time to finish the assignment is to minimize the times you have to, for example, train a given model to then evaluate it. You can do this by putting the code that initializes and trains your model(s) in its own separate cell and only execute this cell when needed.\n", 229 | "\n", 230 | "\n", 231 | "Once you feel you've met the above goals, go to the Evaluating ML Models Exercise Response Form to enter your responses under the section labeled 'Task 2'.\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "comet_cell_id": "9d75f2fa60fd2" 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# TODO : Use this cell to write code for completing task 2\n", 243 | "\n", 244 | "\n", 245 | "\n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": { 251 | "comet_cell_id": "23c64e4f35267" 252 | }, 253 | "source": [ 254 | "When you're ready to go on to the next task, open a new tab and click here." 255 | ] 256 | } 257 | ], 258 | "metadata": { 259 | "comet_paths": [ 260 | [ 261 | "4c7b42fa/ML Model Eval Assignment.ipynb", 262 | 1567249528393 263 | ], 264 | [ 265 | "008a0d50/Task_3.ipynb", 266 | 1567538778080 267 | ], 268 | [ 269 | "7f57d529/Task_3.ipynb", 270 | 1567556795348 271 | ], 272 | [ 273 | "7f57d529/Task_2.ipynb", 274 | 1567556951248 275 | ], 276 | [ 277 | "3f045f6d/Task_2.ipynb", 278 | 1567604637349 279 | ] 280 | ], 281 | "comet_tracking": true, 282 | "kernelspec": { 283 | "display_name": "Python 3", 284 | "language": "python", 285 | "name": "python3" 286 | }, 287 | "language_info": { 288 | "codemirror_mode": { 289 | "name": "ipython", 290 | "version": 3 291 | }, 292 | "file_extension": ".py", 293 | "mimetype": "text/x-python", 294 | "name": "python", 295 | "nbconvert_exporter": "python", 296 | "pygments_lexer": "ipython3", 297 | "version": "3.7.3" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 2 302 | } 303 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/NOTES.txt: -------------------------------------------------------------------------------- 1 | Goal: want to be able to use fairkit learn just like you would sklearn 2 | 3 | (1) One potentially useful feature (aside from the feature of finding the best model) would be able to import fairness metrics like you can import accuracy metrics in scikit-learn. This could be especially useful since in our class project we were able to show that having the metrics themselves was useful. 4 | 5 | http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 6 | 7 | Ex scikit-learn: 8 | ``` 9 | from sklearn.metrics import roc_curve 10 | from sklearn.metrics import accuracy_score 11 | ``` 12 | 13 | Ex fairkit-learn: 14 | ``` 15 | from fklearn.fair_metrics import causal_fairness 16 | from fklearn.fair_metrics import false_postive_rate_equality 17 | ``` 18 | 19 | Sorelle A. Friedler (Haverford)'s work: 20 | 21 | This group has already implemented many of these but for the purpose of studying "fairness-enhancing interventions in machine learning" https://arxiv.org/pdf/1802.04422.pdf 22 | 23 | They have a repository with many of the metrics already implemented: 24 | https://github.com/algofairness/fairness-comparison 25 | Problems: 26 | -Not clear documentation on how to run this software as a stand alone package 27 | -How do we not overlap with what they did/ contribute something novel 28 | -Can we collaborate with them? 29 | 30 | (2) For the full model search: 31 | Modeled after http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 32 | ``` 33 | from fklearn.fair_model_selection import FairSearch 34 | 35 | class FairSearch(model_classes, protected_attributes, metrics, hyperparameters): 36 | """ 37 | Description 38 | TODO 39 | 40 | Parameters 41 | ---------- 42 | model_classes : list of stings 43 | List of sklearn model classes that one wants to search over 44 | TODO: put list of supported packages (e.g. sklearn.linear_model.LogisticRegression) 45 | - do we want to implement/support any of the fair-aware ones ourselves? 46 | 47 | protected_attributes : list of ints 48 | List of integers corresponding to the index of the protected attributes in X 49 | TODO: or strings? 50 | 51 | metrics : list of strings 52 | e.g. sklearn supported [sklearn.metrics.accuracy_score, ...] 53 | and our fairness metrics [fklearn.fair_metrics.causal_fairness] 54 | 55 | hyperparameters : list of strings 56 | e.g ['l1', 'class_weight'] 57 | TODO: will need to put in some sort of error if they don't work with the sklearn piece 58 | 59 | Attributes 60 | ---------- 61 | TODO 62 | """ 63 | 64 | def fit(self, X, y): 65 | TODO: or do we want the protected attributes in this function instead? 66 | pass 67 | 68 | ``` 69 | What do we want to return? A model, a Parateo frontier, a visulization? 70 | 71 | 72 | (3) Other great features of sklearn that we would want to include as well: 73 | 74 | sklearn.datasets (fetch and loads popular datasets) 75 | ``` 76 | fklearn.datasets.fetch_propublica 77 | fklearn.datasets.fetch_propublica_vectorized 78 | fklearn.datasets.fetch_adult 79 | fklearn.datasets.fetch_adult_vectorized 80 | ``` 81 | 82 | TODO: 83 | -How many of the "21" definitions of fairness can we implement in this package? 84 | -will eventually want to release as a pip package correct? 85 | -One of the selling points could that it works seamlessly with sklearn 86 | 87 | THOUGHTS: 88 | -Contributions of our work: (1) very user-friendly library of fairness metrics (2) fair-aware models (2) model selection with fairness as a criteria under consideration 89 | 90 | -I think usability and examples are super important. For example, I think it's a big reason why sklearn is used so heavily. Example: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 91 | 92 | -Keep all our notation very similar to fairkit-learn (maybe eventually we can get a pull from them??) 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INSPIRED-GMU/fairkit-learn/1c84e9500f8bf11bc2948d7aea8cd8ec0d1297c8/Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/__init__.py -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic IO for loading fair datasets 3 | 4 | Ex: http://scikit-learn.org/stable/datasets/twenty_newsgroups.html#newsgroups 5 | 6 | TODO: should we have "vectorized" and "non-vectorized" versions here like sklearn does? 7 | """ 8 | from __future__ import division 9 | import warnings 10 | import numpy as np 11 | import sklearn 12 | import pandas as pd 13 | 14 | def fetch_adult_income(): 15 | #directly downloads the data 16 | #calls load to load the data 17 | #TODO 18 | pass 19 | 20 | def load_adult_income_train_val(): 21 | #for the user study so they can't access the test set 22 | return load_adult_income(train_val_split=0.5, notest=True) 23 | 24 | def load_adult_income(train_val_split=0.5, notest=False): 25 | """ 26 | Load files and data from the propublica dataset 27 | 28 | Parameters 29 | ---------- 30 | train_val_split : float 31 | Amount to split the training set to create a train and validation set 32 | 33 | Returns 34 | ------- 35 | data : dict 36 | With keys 37 | 38 | X : 2-d ndarray 39 | 40 | y : 1-d ndarray 41 | 42 | (or X_train, y_train, X_test, y_test if subset=='all') 43 | 44 | feat_names : list of strs 45 | List of the feature names corresponding to the indices of the columns 46 | of X 47 | 48 | attribute_map : dict of dicts of dicts 49 | Denotes the protected attributes of the category of protected 50 | attribute (e.g. "Race") and maps the attribute name to the column and value that correspond 51 | to that attribute 52 | e.g. one-hot encoding for a one-hot encoding denoting the columns ("col") and values ("val") 53 | 54 | {"Race": {"Purple": {"col": 0, "val": 1}, "Green": {"col": 1, "val": 1}}, 55 | "Sex" : {"Female": {"col": 0, "val": 1}, "Male": {"col": 1, "val": 1}} 56 | 57 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 58 | "Green": {"col": 0, "val: 2"}} 59 | 60 | Note: these MUST be mutually exclusive categories! 61 | 62 | is_categorical : boolean 63 | True if the y-values are categorical 64 | False otherwise (indicating a one-hot encoding) 65 | 66 | Examples 67 | -------- 68 | >>> from fklearn.datasets import load_adult_income 69 | """ 70 | data = {} 71 | data['is_categorical'] = False 72 | header_names = ["Age", "Workclass", "FNLWGT", "Education", "Education-Num", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per Week", "Native Country", "Income"] 73 | 74 | train_ref = pd.read_csv("../data/adult_income/train.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 75 | train_all = pd.get_dummies(train_ref) 76 | train_all.columns = train_all.columns.str.replace('_ ', '_') 77 | 78 | end_idx_train_val_split = int(np.floor(train_val_split*train_all.shape[0])) 79 | train = train_all[:end_idx_train_val_split] 80 | val = train_all[end_idx_train_val_split:] 81 | 82 | y_train = train["Income_<=50K"].copy() 83 | X_train = train.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 84 | y_val = val["Income_<=50K"].copy() 85 | X_val = val.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 86 | 87 | test_ref = pd.read_csv( "../data/adult_income/test.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 88 | test = pd.get_dummies(test_ref) 89 | test.columns = test.columns.str.replace('_ ', '_') 90 | y_test = test["Income_<=50K."].copy() 91 | X_test = test.drop(["Income_<=50K.","Income_>50K."], axis=1).copy() 92 | 93 | data['feat_names'] = [str(col) for col in X_test.columns] 94 | 95 | assert X_train.shape[1] == X_val.shape[1] == X_test.shape[1] 96 | assert X_train.shape[0] == y_train.shape[0] 97 | assert X_val.shape[0] == y_val.shape[0] 98 | assert X_test.shape[0] == y_test.shape[0] 99 | 100 | data['X_train'] = X_train.values 101 | data['y_train'] = y_train.values 102 | data['X_val'] = X_val.values 103 | data['y_val'] = y_val.values 104 | data['X_test'] = X_test.values 105 | data['y_test'] = y_test.values 106 | 107 | attribute_map = {'Race': {}, 'Sex': {}} 108 | for ii, col in enumerate(X_train): 109 | if col.startswith('Race'): 110 | attribute_map['Race'][col] = {'col': ii, 'val': 1} 111 | elif col.startswith('Sex'): 112 | attribute_map['Sex'][col] = {'col': ii, 'val': 1} 113 | data['attribute_map'] = attribute_map 114 | 115 | if notest: 116 | del data['X_test'] 117 | del data['y_test'] 118 | 119 | unprocessed_train_data = train_ref 120 | 121 | return data, unprocessed_train_data 122 | 123 | def fetch_propublica(subset='train'): 124 | """ 125 | Load files and data from the propublica dataset 126 | 127 | Parameters 128 | ---------- 129 | subset : 'train' or 'test', 'all' 130 | Select which dataset to load 131 | 132 | Returns 133 | ------- 134 | X : 2-d ndarray 135 | 136 | y : 2-d ndarray 137 | 138 | attribute_map : dict of dicts 139 | Denotes the protected attributes of the category of protected 140 | attribute (e.g. "Race") to measure causal fairness 141 | maps the attribute name to the column and value that correspond 142 | to that attribute 143 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 144 | "Green": {"col": 1, "val": 1}} 145 | 146 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 147 | "Green": {"col": 0, "val: 2"}} 148 | 149 | Note: these MUST be mutually exclusive categories! 150 | 151 | is_categorical : boolean 152 | True if the y-values are categorical 153 | False otherwise (indicating a one-hot encoding) 154 | 155 | 156 | Examples 157 | -------- 158 | >>> from fklearn.datasets import fetch_propublica 159 | 160 | """ 161 | pass 162 | 163 | if __name__ == '__main__': 164 | load_adult_income() 165 | 166 | #TODO: other dataset functions 167 | 168 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/fair_model_selection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier 3 | 4 | Inspiration: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 5 | """ 6 | from __future__ import division 7 | import warnings 8 | import six 9 | import itertools 10 | import inspect 11 | import numpy as np 12 | import numpy.ma as ma 13 | 14 | RANDOM = 'random' 15 | GRID = 'grid' 16 | THRESHOLD_STR = 'threshold' 17 | 18 | SEARCH_STRATEGIES = [GRID, RANDOM] 19 | 20 | def filtered_arguments(func): 21 | required_args = six.viewkeys(inspect.signature(func).parameters) 22 | 23 | def inner(*args, **kwargs): 24 | kwargs = { k:v for k,v in six.iteritems(kwargs) if k in required_args } 25 | return func(*args, **kwargs) 26 | return inner 27 | 28 | class FairSearch(): 29 | """ 30 | Description 31 | TODO 32 | 33 | Parameters 34 | ---------- 35 | models : dict 36 | Dictionary of model names as keys and instantiations of model objects as values. 37 | e.g. { 38 | 'SVC': sklearn.svm.SVC(), 39 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 40 | } 41 | 42 | metrics : dict 43 | Dictionary of sklearn and fklearn fairness metrics 44 | e.g. { 45 | 'Causal': fklearn.fair_metrics.causal_discrimination_score, 46 | 'Accuracy': sklearn.metrics.accuracy_score 47 | } 48 | 49 | parameters : dict of dicts of lists 50 | Dictionary with model names as keys and hyperparameter dicts as values. 51 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 52 | settings to try as values. 53 | e.g. { 54 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 55 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 56 | } 57 | 58 | 59 | thresholds : list of floats 60 | List of classifation thresholds to be applied to all classifiers. 61 | Usage is for classifiers that output a probability, rather than a 62 | hard classification. 63 | e.g. [i * 1.0/100 for i in range(100)] 64 | 65 | Attributes 66 | ---------- 67 | pareto_optimal_results : dict of masked arrays 68 | Keys strings describing the model parameter or score metric. 69 | e.g. {'param_C': masked_array(data = [0, --], mask = [False, True]), 70 | 'param_L1': masked_array(data = [0, --], mask = [False, True]), 71 | 'train_causal_fairness_score' : [0.8, 0.7], 72 | 'val_causal_fairness_score' : [0.71, 0.64], 73 | 'test_causal_fairness_score' : [0.7, 0.65], 74 | 'train_accuracy_score' : [0.6, 0.8], 75 | 'val_accuracy_score' : [0.57, 0.81], 76 | 'test_accuracy_score' : [0.55, 0.78], 77 | 'fit_time' : [0.08, 1.1]} 78 | 79 | Examples 80 | -------- 81 | >>> from fklearn.fair_model_selection import FairSearch 82 | """ 83 | 84 | def __init__(self, models, fairness_metrics, performance_metrics, parameters, thresholds): 85 | self.models = models 86 | self.fairness_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(fairness_metrics) } 87 | self.performance_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(performance_metrics) } 88 | self.parameters = parameters 89 | self.thresholds = thresholds 90 | self.search_results = {} 91 | self.pareto_optimal_results = {} 92 | 93 | def _build_grid_param_arrays(self): 94 | self.n_experiments = 0 95 | attribute_categories = [] 96 | 97 | for key, _ in six.iteritems(self.models): 98 | model = self.models[key] 99 | keys, values = zip(*self.parameters[key].items()) 100 | keys = keys + (THRESHOLD_STR, ) 101 | values = values + (self.thresholds, ) 102 | attribute_categories.extend(keys) 103 | self.n_experiments += len([dict(zip(keys, v)) for v in itertools.product(*values)]) 104 | 105 | for attribute in list(set(attribute_categories)): 106 | self.search_results["param_" + attribute] = [np.nan] * self.n_experiments 107 | 108 | return 109 | 110 | def _build_score_arrays(self, data): 111 | scores = {} 112 | 113 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 114 | for fairness_metric, _ in six.iteritems(self.fairness_metrics): 115 | self.search_results["score_" + protected_attribute + "_" + fairness_metric] = [np.nan] * self.n_experiments 116 | for performance_metric, _ in six.iteritems(self.performance_metrics): 117 | self.search_results["score_" + performance_metric] = [np.nan] * self.n_experiments 118 | 119 | return 120 | 121 | def _fit_grid(self, data, verbose=False, n_train_samples=None, n_val_samples=None): 122 | #TODO add verbose functionality 123 | i = -1 124 | args_dict = {} 125 | 126 | if n_train_samples: 127 | train_idx = np.random.choice(data["X_train"].shape[0], n_train_samples, replace=False) 128 | X_train = data["X_train"][train_idx, :] 129 | y_train = data["y_train"][train_idx] 130 | else: 131 | X_train = data["X_train"] 132 | y_train = data["y_train"] 133 | 134 | if n_val_samples: 135 | val_idx = np.random.choice(data["X_val"].shape[0], n_val_samples, replace=0) 136 | X_val = data["X_val"][val_idx, :] 137 | y_val = data["y_val"][val_idx] 138 | else: 139 | X_val = data["X_val"] 140 | y_val = data["y_val"] 141 | 142 | args_dict["X"] = X_val 143 | args_dict["y_true"] = y_val 144 | 145 | for model_key, model_family in six.iteritems(self.models): 146 | parameter_keys, parameter_values = zip(*self.parameters[model_key].items()) 147 | experiments = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 148 | for experiment in experiments: 149 | # Train Model 150 | model = model_family(**experiment) 151 | model = model.fit(X_train, y_train) 152 | args_dict["y_pred_proba"] = model.predict_proba(X_val)[:, 1] 153 | args_dict["trained_model"] = model 154 | 155 | for threshold in self.thresholds: 156 | 157 | args_dict["threshold"] = threshold 158 | args_dict["y_pred"] = args_dict["y_pred_proba"] > threshold 159 | 160 | i += 1 161 | self.search_results["param_threshold"][i] = threshold 162 | # Fill in parameter values 163 | for experiment_key, experiment_value in six.iteritems(experiment): 164 | self.search_results["param_" + experiment_key][i] = experiment_value 165 | 166 | # Evaluate Model 167 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 168 | args_dict["attribute_map"] = data["attribute_map"][protected_attribute] 169 | for fairness_metric, fairness_metric_function in six.iteritems(self.fairness_metrics): 170 | self.search_results["score_" + protected_attribute + "_" + fairness_metric][i] = fairness_metric_function(**args_dict) 171 | 172 | for performance_metric, performance_metric_function in six.iteritems(self.performance_metrics): 173 | self.search_results["score_" + performance_metric][i] = performance_metric_function(**args_dict) 174 | 175 | for key, value in six.iteritems(self.search_results): 176 | # Hacky way to check for nans, but other ways seemed to break 177 | mask = [j != j for j in self.search_results[key]] 178 | self.search_results[key] = ma.array(self.search_results[key], mask=mask) 179 | 180 | self.pareto_optimal_results = self.filter_solution_set() 181 | 182 | def fit(self, data, verbose=1, search_strategy=GRID, n_random_models=None, n_train_samples=None, n_val_samples=None): 183 | """ 184 | Based in part on http://www.codiply.com/blog/hyperparameter-grid-search-across-multiple-models-in-scikit-learn/ 185 | 186 | Parameters 187 | ---------- 188 | X : 2d array-like 189 | Training dataset where rows are instances and columns are features. 190 | 191 | y : 1d array-like 192 | Classification labels 193 | 194 | 195 | attribute_map : dict of dicts 196 | denotes the protected attributes of the category of protected 197 | attribute (e.g. "Race") to measure causal fairness 198 | maps the attribute name to the column and value that correspond 199 | to that attribute 200 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 201 | "Green": {"col": 1, "val": 1}} 202 | 203 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 204 | "Green": {"col": 0, "val: 0"}} 205 | 206 | Note: these MUST be mutually exclusive categories! 207 | 208 | is_categorical : bool (optional) 209 | denotes whether the attribute map represents a categorical encoding. If False 210 | we assume that the encoding is one-hot. 211 | 212 | max_models : None or int 213 | If None, return the entire Pareto frontier of models 214 | Otherwise, return int number of models, ties will be broken randomly 215 | 216 | search_strategy : str 217 | 'random', a random search over models/hyperparameters 218 | 'grid', enumerates the space of models/hyperparameters 219 | 'genetic_algorithms', uses genetic algorithms 220 | """ 221 | 222 | assert search_strategy in SEARCH_STRATEGIES 223 | 224 | if search_strategy == RANDOM: 225 | assert n_random_models > 0 226 | #TODO 227 | 228 | 229 | if search_strategy == GRID: 230 | self._build_grid_param_arrays() 231 | self._build_score_arrays(data) 232 | self._fit_grid(data, verbose=verbose, n_train_samples=n_train_samples, n_val_samples=n_val_samples) 233 | 234 | 235 | def filter_solution_set(self, omitted_score_list=[]): 236 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 237 | assert(self.search_results) 238 | 239 | costs = -1 * np.array([v for k,v in six.iteritems(self.search_results) if ((k[:5] == "score") & (k[6:] not in omitted_score_list))]).T 240 | 241 | is_efficient = np.ones(costs.shape[0], dtype = bool) 242 | for i, c in enumerate(costs): 243 | if is_efficient[i]: 244 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 245 | 246 | return { k:v[is_efficient] for k,v in six.iteritems(self.search_results)} 247 | 248 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/fair_selection_aif.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier using AIF360 metrics 3 | """ 4 | import six 5 | import itertools 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from fklearn.ml_pipeline import MLPipeline 10 | from fklearn.fair_model_selection import filtered_arguments 11 | from aif360.algorithms.inprocessing import AdversarialDebiasing 12 | 13 | # MANDATORY hyperparmaeters for adversarial debiasing 14 | def DEFAULT_ADB_PARAMS(privileged, unprivileged): 15 | """ 16 | Create a dictionary of mandatory hyperparameters for adversarial debiasing 17 | """ 18 | 19 | return {'unprivileged_groups': [unprivileged], 'privileged_groups': [privileged], 20 | 'scope_name': ['adb'], 'sess': [tf.Session()]} 21 | 22 | class ModelSearch(object): 23 | """ 24 | 25 | Parameters 26 | ---------- 27 | models : dict 28 | Dictionary of model names as keys and instantiations of model objects as values. 29 | e.g. { 30 | 'SVC': sklearn.svm.SVC(), 31 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 32 | } 33 | 34 | metrics : dict[str, (MetricClass, str)] 35 | Dictionary of sklearn/AIF360 fairness metrics. The keys are the display names of the metrics, and 36 | the values are 2-tuples with the first element containing the metric class object, and the second 37 | containing the name of the metric function to evaluate. 38 | e.g. { 39 | 'ClassificationMetric': (aif360.metrics.ClassificationMetric, 'num_generalized_true_positives'), 40 | 'BinaryLabelDatasetMetric': (aif360.metrics.BinaryLabelDatasetMetric, 'disparate_impact') 41 | } 42 | 43 | hyperparameters : dict of dicts of lists 44 | Dictionary with model names as keys and hyperparameter dicts as values. 45 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 46 | settings to try as values. 47 | e.g. { 48 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 49 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 50 | } 51 | 52 | 53 | thresholds : list of floats 54 | List of classifation thresholds to be applied to all classifiers. 55 | Usage is for classifiers that output a probability, rather than a 56 | hard classification. 57 | e.g. [i * 1.0/100 for i in range(100)] 58 | """ 59 | 60 | def __init__(self, models, metrics, hyperparameters, thresholds): 61 | self.models = models 62 | self.metrics = metrics 63 | self.hyperparameters = hyperparameters 64 | self.thresholds = thresholds 65 | self.search_results = [] 66 | self.pareto_optimal_results = [] 67 | 68 | def grid_search(self, dataset, privileged=[], unprivileged=[], test_frac=0.3, preprocessors=[], postprocessors=[]): 69 | """ 70 | Performs a grid search over the specified model + hyperparameter pairs, calculating metric evalutations for each model. 71 | 72 | Args: 73 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 74 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 75 | privileged (list[dict]): A list of dictionaries containing privileged groups 76 | unprivileged (list[dict]): A list of dictionaries containing unprivileged groups 77 | """ 78 | 79 | # If any pre/postprocessors were supplied, add the option for None by default 80 | preprocessors += [None] 81 | postprocessors += [None] 82 | 83 | self.model_id = 0 84 | 85 | # Try each unique model 86 | for model_name, ModelClass in six.iteritems(self.models): 87 | 88 | # If no hyperparameters were specified, use the defaults. Otherwise setup a grid search 89 | if len(self.hyperparameters[model_name]) == 0: 90 | param_list = [{}] 91 | else: 92 | parameter_keys, parameter_values = zip(*self.hyperparameters[model_name].items()) 93 | param_list = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 94 | 95 | # Grid search through hyperparameters in the current model 96 | for param_set in param_list: 97 | 98 | model = ModelClass(**param_set) 99 | 100 | # Go through each combination of pre/post processing algorithms 101 | for preprocessor, postprocessor in itertools.product(preprocessors, postprocessors): 102 | 103 | mlp = MLPipeline(model, privileged=privileged, unprivileged=unprivileged, preprocessor=preprocessor, postprocessor=postprocessor) 104 | 105 | # Create a new search result for each threshold value 106 | for threshold in self.thresholds: 107 | 108 | if model_name == 'AdversarialDebiasing': 109 | mlp.model.scope_name = str(self.model_id) 110 | self.model_id += 1 111 | 112 | mlp.model.sess.close() 113 | tf.reset_default_graph() 114 | mlp.model.sess = tf.Session() 115 | 116 | mlp.fit(dataset, test_frac=test_frac, threshold=threshold) 117 | search_result = {'model_class': model_name, 118 | 'hyperparameters': param_set, 119 | 'preprocessor': type(preprocessor).__name__ if preprocessor else 'None', 120 | 'postprocessor': type(postprocessor).__name__ if postprocessor else 'None', 121 | 'metrics': {} 122 | } 123 | 124 | # Populate metrics for this search result 125 | for metric_name, metric_args in six.iteritems(self.metrics): 126 | 127 | # The first metric argument is the Metric Class itself. The rest are the names of 128 | # submetric evaluation functions 129 | MetricClass = metric_args[0] 130 | 131 | for metric_fn in metric_args[1:]: 132 | metric_val = mlp.evaluate(MetricClass, metric_fn) 133 | metric_category = '{} ({})'.format(metric_name, metric_fn) 134 | search_result['metrics'][metric_category] = metric_val 135 | 136 | self.search_results.append(search_result) 137 | 138 | self.pareto_optimal_results = self.__filter_solution_set() 139 | 140 | def __filter_solution_set(self): 141 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 142 | assert(self.search_results) 143 | 144 | costs = -1 * np.array([[v for _, v in six.iteritems(result['metrics'])] for result in self.search_results]) 145 | 146 | is_efficient = np.ones(costs.shape[0], dtype = bool) 147 | for i, c in enumerate(costs): 148 | if is_efficient[i]: 149 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 150 | 151 | return [result for i, result in enumerate(self.search_results) if is_efficient[i]] 152 | 153 | def to_csv(self, filename): 154 | """ 155 | Exports the search results as a CSV file 156 | 157 | Args: 158 | filename (str): The name of the file to save the results to 159 | Raises: 160 | AttributeError: If a grid search has not yet been performed, an AttributeError will be raised 161 | """ 162 | 163 | import csv 164 | 165 | if len(self.search_results) == 0: 166 | raise AttributeError("A grid search must be performed before exporting results to CSV") 167 | 168 | # Compute CSV headers for all metrics in the search results 169 | metric_headers = { metric for res in self.pareto_optimal_results for metric in res['metrics'] } 170 | 171 | with open(filename, mode='w') as csv_file: 172 | headers = ['model', 'hyperparameters', 'preprocessor', 'postprocessor', *list(metric_headers)] 173 | writer = csv.DictWriter(csv_file, fieldnames=headers, lineterminator='\n') 174 | writer.writeheader() 175 | 176 | for result in self.pareto_optimal_results: 177 | metric_dict = {metric_name: metric_val for metric_name, metric_val in six.iteritems(result['metrics'])} 178 | 179 | writer.writerow({'model': result['model_class'], 180 | 'preprocessor': result['preprocessor'], 181 | 'postprocessor': result['postprocessor'], 182 | 'hyperparameters': repr(result['hyperparameters'] or 'Default (see sklearn docs)'), 183 | **metric_dict}) 184 | 185 | 186 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INSPIRED-GMU/fairkit-learn/1c84e9500f8bf11bc2948d7aea8cd8ec0d1297c8/Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/__init__.py -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/exports/plot.json: -------------------------------------------------------------------------------- 1 | {"x_axis": "BinaryLabelDatasetMetric (disparate_impact)", "y_axis": "ClassificationMetric (error_rate_difference)", "model_visibility": {"LogisticRegression": true, "KNeighborsClassifier": true}, "pareto_checkboxes": {"BinaryLabelDatasetMetric (disparate_impact)": true, "ClassificationMetric (error_rate_difference)": true, "ClassificationMetric (error_rate_ratio)": true, "ClassificationMetric (false_negative_rate)": true, "ClassificationMetric (false_positive_rate)": true, "ClassificationMetric (generalized_false_positive_rate)": true, "ClassificationMetric (num_generalized_true_positives)": true, "ClassificationMetric (num_pred_positives)": true, "ClassificationMetric (num_true_negatives)": true}} -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/exports/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INSPIRED-GMU/fairkit-learn/1c84e9500f8bf11bc2948d7aea8cd8ec0d1297c8/Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/exports/plot.png -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/main.py: -------------------------------------------------------------------------------- 1 | from plot import * 2 | 3 | 4 | # Load custom styles 5 | custom_css = Div(text="") 6 | add_btn = Button(label="Add Plot", button_type="success") 7 | remove_btn = Button(label="Remove Plot", button_type="danger") 8 | 9 | # Construct our viewport 10 | l = layout([ 11 | [custom_css], 12 | create_plot(), 13 | [add_btn, remove_btn] 14 | ], sizing_mode="fixed", css_classes=["layout-container"]) 15 | 16 | def add_plot(): 17 | l.children.insert(len(l.children)-1, create_plot()) 18 | 19 | def remove_plot(): 20 | if len(l.children) > 3: 21 | l.children.pop(len(l.children)-2) 22 | 23 | add_btn.on_click(add_plot) 24 | remove_btn.on_click(remove_plot) 25 | 26 | curdoc().add_root(l) 27 | curdoc().title = "FKLEARN" 28 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from os.path import dirname, join 3 | 4 | from bokeh.plotting import figure, output_file, show 5 | from bokeh.layouts import layout, row, column 6 | from bokeh.models import ColumnDataSource, Div 7 | from bokeh.models.widgets import Slider, Select, Toggle, CheckboxGroup, Button 8 | from bokeh.models.callbacks import CustomJS 9 | from bokeh.io import curdoc, export_png 10 | 11 | 12 | def load_csv_data(filestr='fklearn/interface/static/data/test-file.csv'): 13 | """ 14 | Loads in the csv with our data in it, and returns it as a Pandas Dataframe 15 | """ 16 | import csv 17 | 18 | datas = [] 19 | attr_map = {} 20 | 21 | with open(filestr) as csvfile: 22 | csv_rows = csv.reader(csvfile, delimiter=',') 23 | 24 | # Map each index to its attribute name 25 | header = next(csv_rows) 26 | attr_map = { i : header[i] for i in range(2, len(header))} 27 | 28 | for row in csv_rows: 29 | model = row[0] 30 | hyperparams = row[1] 31 | preprocessor = row[2] 32 | postprocessor = row[3] 33 | 34 | # Create a new data point and add it to that model value 35 | datum = {'model': model, 'hyperparameters': hyperparams, 'preprocessor': preprocessor, 'postprocessor': postprocessor, **{ attr_map[i] : float(row[i]) for i in range(4, len(row))}} 36 | datas.append(datum) 37 | 38 | return pd.DataFrame(datas) 39 | 40 | 41 | def load_explanations(filestr='fklearn/interface/static/data/explanations.json'): 42 | """ 43 | Loads in the metric explanations as a dictionary mapping strings to explanations 44 | """ 45 | 46 | import json 47 | 48 | with open(filestr) as f: 49 | return json.load(f) 50 | 51 | 52 | def create_plot(csvfile="fklearn/interface/static/data/test-file.csv"): 53 | """ 54 | Creates and returns a scatter plot from the given data provided by the out.csv file. Each column will appear as a 55 | checkbox to the left of the plot, allowing for hiding of non-optimal data points. Models may be toggled 56 | by clicking on the labeled buttons. As of now, three models are hard-coded (but this is to change in the 57 | future to make this more adaptable to general use cases). 58 | 59 | Args: 60 | csvfile (str): The path name of the csv file to load. By default, we assume that we are in the root directory and load "fklearn/test-file.csv" 61 | """ 62 | 63 | MODEL_COLORS = ['purple', 'orange', 'pink', 'purple', 'green', 'blue'] 64 | 65 | df = load_csv_data(csvfile) 66 | attributes = sorted(set(df.keys()) - {'model'} - {'hyperparameters'} - {'preprocessor'} - {'postprocessor'}) 67 | 68 | # Assign a color to each model, recycling if need be 69 | colors = {model: MODEL_COLORS[i % len(MODEL_COLORS)] for i, model in enumerate(df['model'].unique())} 70 | 71 | # Create a color column and set their respective values 72 | df['color'] = df['model'] 73 | df['visible'] = True 74 | df['optimal'] = True 75 | df.replace({'color': colors}, inplace=True) 76 | 77 | # Initialize the tooltips that will be displayed when hovering over a data point 78 | TOOLTIPS=[ 79 | ("x", "@x"), 80 | ("y", "@y"), 81 | ("params", "@hyperparameters"), 82 | ("preprocessor", "@preprocessor"), 83 | ("postprocessor", "@postprocessor") 84 | ] 85 | 86 | data_source = ColumnDataSource(data={'x': [], 'y': [], 'model': [], 'color': [], 'hyperparameters': [], 'preprocessor': [], 'postprocessor': []}) 87 | 88 | # Construct our scatter plot, receiving data from our data source with the given attributes 89 | p = figure(plot_height=500, plot_width=700, title="", toolbar_location=None, tooltips=TOOLTIPS, sizing_mode="scale_both") 90 | p.circle(x="x", y="y", color="color", source=data_source, size=12, line_color=None, alpha=1.0, legend="model") 91 | p.legend.location = "top_right" 92 | 93 | x_axis = Select(title="X Axis", options=attributes, value=attributes[0], css_classes=['bk-axis-select']) 94 | y_axis = Select(title="Y Axis", options=attributes, value=attributes[1], css_classes=['bk-axis-select']) 95 | 96 | def update(): 97 | """ 98 | Update the plot with specified data 99 | """ 100 | 101 | filtered_df = df[(df['visible'] == True) & (df['optimal'] == True)] 102 | x_name = x_axis.value 103 | y_name = y_axis.value 104 | 105 | p.xaxis.axis_label = x_name 106 | p.yaxis.axis_label = y_name 107 | p.title.text = "{} data selected".format(len(filtered_df)) 108 | data_source.data = { 109 | 'x': filtered_df[x_name].values.astype(float), 110 | 'y': filtered_df[y_name].values.astype(float), 111 | 'model': filtered_df['model'].values, 112 | 'color': filtered_df['color'].values, 113 | 'hyperparameters': filtered_df['hyperparameters'].values, 114 | 'preprocessor': filtered_df['preprocessor'].values, 115 | 'postprocessor': filtered_df['postprocessor'].values 116 | } 117 | 118 | def create_toggle(model): 119 | """ 120 | Creates a function that toggles the visibility of a given model on the plot 121 | """ 122 | 123 | def toggle(toggled): 124 | df.loc[df['model'] == model, 'visible'] = toggled 125 | update() 126 | 127 | return toggle 128 | 129 | def dominates(p1, p2, attributes): 130 | """ 131 | Returns true iff p1 dominates p2. 132 | """ 133 | for attr in attributes: 134 | if p1[attr] >= p2[attr]: 135 | return False 136 | return True 137 | 138 | def filter_optimality(attrs): 139 | """ 140 | Filter by pareto optimality 141 | """ 142 | 143 | attr_values = [attributes[idx] for idx in attrs] 144 | df_list = list(df.iterrows()) 145 | df['optimal'] = True 146 | 147 | # A data point p2 is optimal only if it is not dominated by any other point p1 148 | for j, p2 in df_list: 149 | df.at[j, 'optimal'] = all([not dominates(p1, p2, attr_values) for _, p1 in df_list]) 150 | 151 | update() 152 | 153 | def save_screenshot(visible_attrs, filename='plot'): 154 | """ 155 | Save a screenshot of the plot to the current directory with the specified file name. Also save a JSON file 156 | containing information about the data displayed in the plot 157 | """ 158 | 159 | import json 160 | 161 | # First, export a png of the plot 162 | export_png(p, 'fklearn/interface/exports/{}.png'.format(filename)) 163 | 164 | # Now create a dictionary of metadata pertaining to the current state of the plot 165 | plot_data = {'x_axis': x_axis.value, 'y_axis': y_axis.value} 166 | 167 | # Keep track of which models are visible on the plot 168 | all_models = df['model'].unique() 169 | visible_models = set(df[df['visible'] == True]['model'].unique()) 170 | plot_data['model_visibility'] = { m : m in visible_models for m in all_models } 171 | 172 | # Keep track of which checkboxes were checked when we export the screenshot 173 | plot_data['pareto_checkboxes'] = { attributes[i] : i in visible_attrs for i in range(len(attributes)) } 174 | 175 | with open('fklearn/interface/exports/{}.json'.format(filename), 'w') as f: 176 | json.dump(plot_data, f) 177 | 178 | 179 | 180 | # Create our toggle buttons to show/hide different models on the plot 181 | toggles = [] 182 | for model in colors: 183 | toggle = Toggle(label="{}".format(model), button_type="success", active=True, css_classes=['bk-btn-model-{}'.format(colors[model])]) 184 | toggle.on_click(create_toggle(model)) 185 | toggles.append(toggle) 186 | 187 | x_axis.on_change('value', lambda attr, old, new: update()) 188 | y_axis.on_change('value', lambda attr, old, new: update()) 189 | 190 | checkbox_group = CheckboxGroup(labels=attributes, active=list(range(len(attributes))), css_classes=['bk-checkbox-group']) 191 | checkbox_group.on_click(lambda checked_attrs: filter_optimality(checked_attrs)) 192 | 193 | screenshot_btn = Button(label="Export Plot", button_type="warning", css_classes=['screenshot-btn']) 194 | screenshot_btn.on_click(lambda: save_screenshot(visible_attrs=checkbox_group.active)) 195 | 196 | # Load metric explanations as tooltips for the checkboxes 197 | metric_dict = load_explanations() 198 | 199 | inputs = column(x_axis, y_axis, *toggles, checkbox_group, screenshot_btn, width=320, height=500, sizing_mode="fixed") 200 | plot_row = row(inputs, p, css_classes=['layout-container']) 201 | 202 | # NOTE: Super hacky way to do this, but it was the only easy way I could fine. 203 | metric_tooltip_js = """var checkboxes = document.querySelectorAll('.bk-checkbox-group .bk-input-group label.bk');\n""" 204 | for i in range(len(attributes)): 205 | metric_tooltip_js += """checkboxes[{}].setAttribute('title', `{}`);\n""".format(i, metric_dict[attributes[i]]) 206 | 207 | # Create a setTimeout wrapper around the function so the DOM has a chance to mount 208 | metric_tooltip_js = """setTimeout(function() {\n""" + metric_tooltip_js + """}, 200);\n""" 209 | explanation_callback = CustomJS(args=dict(), code=metric_tooltip_js) 210 | p.x_range.js_on_change('start', explanation_callback) 211 | checkbox_group.js_on_click(explanation_callback) 212 | 213 | # Initial load of our data 214 | filter_optimality(range(len(attributes))) 215 | 216 | return plot_row 217 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/static/css/styles-notebook.css: -------------------------------------------------------------------------------- 1 | .bk-btn-model-red div button.bk-btn:not(.bk-active), 2 | .bk-btn-model-green div button.bk-btn:not(.bk-active), 3 | .bk-btn-model-blue div button.bk-btn:not(.bk-active), 4 | .bk-btn-model-orange div button.bk-btn:not(.bk-active), 5 | .bk-btn-model-yellow div button.bk-btn:not(.bk-active), 6 | .bk-btn-model-purple div button.bk-btn:not(.bk-active) { 7 | opacity: 0.5 !important; 8 | } 9 | 10 | .bk-btn-model-red div button { 11 | margin-top: 30px !important; 12 | background-color: red !important; 13 | border-style: none !important; 14 | } 15 | 16 | .bk-btn-model-green div button { 17 | margin-top: 30px !important; 18 | background-color: green !important; 19 | border-style: none !important; 20 | } 21 | 22 | .bk-btn-model-blue div button { 23 | margin-top: 30px !important; 24 | background-color: blue !important; 25 | border-style: none !important; 26 | } 27 | 28 | .bk-btn-model-orange div button { 29 | margin-top: 30px !important; 30 | background-color: orange !important; 31 | border-style: none !important; 32 | } 33 | 34 | .bk-btn-model-yellow div button { 35 | margin-top: 30px !important; 36 | background-color: yellow !important; 37 | border-style: none !important; 38 | } 39 | 40 | .bk-btn-model-purple div button { 41 | margin-top: 30px !important; 42 | background-color: purple !important; 43 | border-style: none !important; 44 | } 45 | 46 | .bk-checkbox-group .bk-input-group { 47 | margin-top: 30px !important; 48 | height: auto !important; 49 | } 50 | 51 | .bk-axis-select { 52 | height: 50px !important; 53 | } 54 | 55 | .layout-container { 56 | margin-bottom: 50px !important; 57 | border-top: 1px solid grey !important; 58 | } 59 | 60 | .bk.screenshot-btn { 61 | top: 420px !important; 62 | width: 100px !important; 63 | } -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/static/css/styles.css: -------------------------------------------------------------------------------- 1 | .bk-btn-model-red div button.bk-btn:not(.bk-active), 2 | .bk-btn-model-green div button.bk-btn:not(.bk-active), 3 | .bk-btn-model-blue div button.bk-btn:not(.bk-active), 4 | .bk-btn-model-orange div button.bk-btn:not(.bk-active), 5 | .bk-btn-model-yellow div button.bk-btn:not(.bk-active), 6 | .bk-btn-model-purple div button.bk-btn:not(.bk-active) { 7 | opacity: 0.5 !important; 8 | } 9 | 10 | .bk-btn-model-red div button { 11 | margin-top: 30px !important; 12 | background-color: red !important; 13 | border-style: none !important; 14 | } 15 | 16 | .bk-btn-model-green div button { 17 | margin-top: 30px !important; 18 | background-color: green !important; 19 | border-style: none !important; 20 | } 21 | 22 | .bk-btn-model-blue div button { 23 | margin-top: 30px !important; 24 | background-color: blue !important; 25 | border-style: none !important; 26 | } 27 | 28 | .bk-btn-model-orange div button { 29 | margin-top: 30px !important; 30 | background-color: orange !important; 31 | border-style: none !important; 32 | } 33 | 34 | .bk-btn-model-yellow div button { 35 | margin-top: 30px !important; 36 | background-color: yellow !important; 37 | border-style: none !important; 38 | } 39 | 40 | .bk-btn-model-purple div button { 41 | margin-top: 30px !important; 42 | background-color: purple !important; 43 | border-style: none !important; 44 | } 45 | 46 | .bk-checkbox-group { 47 | position: relative !important; 48 | } 49 | 50 | .bk-checkbox-group .bk-input-group { 51 | margin-top: 30px !important; 52 | height: 200px !important; 53 | position: absolute !important; 54 | cursor: pointer; 55 | } 56 | 57 | .bk-axis-select { 58 | height: 50px !important; 59 | } 60 | 61 | .layout-container { 62 | margin-bottom: 50px !important; 63 | border-top: 1px solid grey !important; 64 | } 65 | 66 | .bk.screenshot-btn { 67 | top: 420px !important; 68 | width: 100px !important; 69 | } -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/interface/static/data/explanations.json: -------------------------------------------------------------------------------- 1 | { 2 | "BinaryLabelDatasetMetric (num_positives)": "Compute the number of positives", 3 | "BinaryLabelDatasetMetric (num_negatives)": "Compute the number of negatives", 4 | "BinaryLabelDatasetMetric (base_rate)": "Compute the base rate, 'Pr(Y = 1) = P/(P+N)'", 5 | "BinaryLabelDatasetMetric (disparate_impact)": "'Pr(Y = 1 | D = unprivileged) / Pr(Y = 1 | D = privileged)'", 6 | "BinaryLabelDatasetMetric (statistical_parity_difference)": "'Pr(Y = 1 | D = unprivileged) - Pr(Y = 1 | D = privileged)'", 7 | "UnifiedMetricLibrary (num_true_positives)": "Return the number of instances in the dataset where both the predicted and true labels are 'favorable'", 8 | "UnifiedMetricLibrary (num_false_positives)": "Return the number of false positives", 9 | "UnifiedMetricLibrary (num_false_negatives)": "Return the number of false negatives", 10 | "UnifiedMetricLibrary (num_true_negatives)": "Return the number of true negatives", 11 | "UnifiedMetricLibrary (num_generalized_true_positives)": "Return the generalized number of true positives, the weighted sum of predicted scores where true labels are 'favorable'", 12 | "UnifiedMetricLibrary (num_generalized_false_positives)": "Return the generalized number of false positives, the weighted sum of predicted scores where true labels are 'favorable'", 13 | "UnifiedMetricLibrary (num_generalized_false_negatives)": "Return the generalized number of false negatives, the weighted sum of predicted scores where true labels are 'favorable'", 14 | "UnifiedMetricLibrary (num_generalized_true_negatives)": "Return the generalized number of true negatives, the weighted sum of predicted scores where true labels are 'favorable'", 15 | "UnifiedMetricLibrary (true_positive_rate)": "Return the ratio of true positives to positive examples in the dataset; 'TPR = TP/P'", 16 | "UnifiedMetricLibrary (false_positive_rate)": "'FPR = FP/N'", 17 | "UnifiedMetricLibrary (false_negative_rate)": "'FNR = FN/P'", 18 | "UnifiedMetricLibrary (true_negative_rate)": "'TNR = TN/N'", 19 | "UnifiedMetricLibrary (generalized_true_positive_rate)": "Return the ratio of generalized true positives to positive examples in the dataset; 'GTPR = GTP/P'", 20 | "UnifiedMetricLibrary (generalized_false_positive_rate)": "'GFPR = GFP/N'", 21 | "UnifiedMetricLibrary (generalized_false_negative_rate)": "'GFNR = GFN/P'", 22 | "UnifiedMetricLibrary (generalized_true_negative_rate)": "'GTNR = GTN/N'", 23 | "UnifiedMetricLibrary (positive_predictive_value)": "'PPV = TP/(TP + FP)'", 24 | "UnifiedMetricLibrary (false_discovery_rate)": "'FDR = FP/(TP + FP)'", 25 | "UnifiedMetricLibrary (false_omission_rate)": "'FOR = FN/(TN + FN)'", 26 | "UnifiedMetricLibrary (negative_predictive_value)": "'NPV = TN/(TN + FN)'", 27 | "UnifiedMetricLibrary (accuracy)": "'ACC = (TP + TN)/(P + N)'", 28 | "UnifiedMetricLibrary (error_rate)": "'ERR = (FP + FN)/(P + N)'", 29 | "UnifiedMetricLibrary (true_positive_rate_difference)": "'TPR(D = unprivileged) - TPR(D = privileged)'", 30 | "UnifiedMetricLibrary (false_positive_rate_difference)": "'FPR(D = unprivileged) - FPR(D = privileged)'", 31 | "UnifiedMetricLibrary (false_negative_rate_difference)": "'FNR(D = unprivileged) - FNR(D = privileged)'", 32 | "UnifiedMetricLibrary (false_omission_rate_difference)": "'FOR(D = unprivileged) - FOR(D = privileged)", 33 | "UnifiedMetricLibrary (false_discovery_rate_difference)": "'FDR(D = \text{unprivileged) - FDR(D = privileged)'", 34 | "UnifiedMetricLibrary (false_positive_rate_ratio)": "'FPR(D = unprivileged) / FPR(D = privileged)'", 35 | "UnifiedMetricLibrary (false_negative_rate_ratio)": "'FNR(D = unprivileged) / FNR(D = privileged)'", 36 | "UnifiedMetricLibrary (false_omission_rate_ratio)": "'FOR(D = unprivileged) / FOR(D = privileged)'", 37 | "UnifiedMetricLibrary (false_discovery_rate_ratio)": "'FDR(D = unprivileged) / FDR(D = privileged)'", 38 | "UnifiedMetricLibrary (average_odds_difference)": "Average of difference in FPR and TPR for unprivileged and privileged groups", 39 | "UnifiedMetricLibrary (average_abs_odds_difference)": "Average of absolute difference in FPR and TPR for unprivileged andprivileged groups", 40 | "UnifiedMetricLibrary (error_rate_difference)": "Difference in error rates for unprivileged and privileged groups; ERR(D = unprivileged) - ERR(D = privileged)", 41 | "UnifiedMetricLibrary (error_rate_ratio)": "Ratio of error rates for unprivileged and privileged groups; 'ERR(D = unprivileged) / ERR(D = privileged)", 42 | "UnifiedMetricLibrary (num_pred_positives)": "Return the number of predicted positives", 43 | "UnifiedMetricLibrary (num_pred_negatives)": "Return the number of predicted negatives", 44 | "UnifiedMetricLibrary (selection_rate)": "'Pr(Y_hat = favorable)'", 45 | "UnifiedMetricLibrary (disparate_impact)": "'Pr(Y_hat = 1 | D = unprivileged) / Pr(Y_hat = 1 | D = privileged)'", 46 | "UnifiedMetricLibrary (statistical_parity_difference)": "'Pr(Y_hat = 1 | D = unprivileged) - Pr(Y_hat = 1 | D = privileged)'", 47 | "UnifiedMetricLibrary (theil_index)": "Generalized entropy index with alpha = 1", 48 | "UnifiedMetricLibrary (coefficient_of_variation)": "Two times the square root of the generalized entropy index with alpha = 2", 49 | "UnifiedMetricLibrary (between_group_theil_index)": "The 'between group generalized entropy index' with alpha = 1", 50 | "UnifiedMetricLibrary (between_group_coefficient_of_variation)": "Two times the square root of the 'between group generalized entropy index' with alpha = 2", 51 | "UnifiedMetricLibrary (between_all_groups_theil_index)": "The 'between all groups generalized entropy index' with alpha = 1", 52 | "UnifiedMetricLibrary (between_all_groups_coefficient_of_variation)": "Two times the square root of the 'between all groups generalized entropy index' with alpha = 2", 53 | "UnifiedMetricLibrary (equal_opportunity_difference)": "'TPR(D = unprivileged) - TPR(D = privileged)'", 54 | "UnifiedMetricLibrary (power)": "Return the number of instances in the dataset where both the predicted and true labels are 'favorable'", 55 | "UnifiedMetricLibrary (precision)": "'Precision = TP/(TP + FP)'", 56 | "UnifiedMetricLibrary (recall)": "'Recall = TP/P'", 57 | "UnifiedMetricLibrary (sensitivity)": "'Sensitivity = Recall = TP/P'", 58 | "UnifiedMetricLibrary (specificity)": "'Specificity = TN/N'", 59 | "UnifiedMetricLibrary (accuracy_score)": "Fraction of correct predictions", 60 | "CausalDiscriminationScore": "The fraction of inputs for which changing at least one of those characteristics causes the output to change", 61 | "GroupDiscriminationScore": "A vaive Bayes approach for discrimination-free classification" 62 | } -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/metric_library.py: -------------------------------------------------------------------------------- 1 | from aif360.metrics import ClassificationMetric 2 | from sklearn.metrics import accuracy_score as accuracy 3 | import math 4 | 5 | def classifier_quality_score(model, test_data, 6 | unprivileged_groups, 7 | privileged_groups): 8 | 9 | classified_data = model.predict(test_data) 10 | metric_library = UnifiedMetricLibrary(test_data, classified_data, unprivileged_groups, privileged_groups) 11 | 12 | # call all metrics 13 | 14 | #accuracy 15 | 16 | acc = metric_library.accuracy_score() 17 | 18 | #fairness 19 | fairness_scores = [] 20 | 21 | # equal opportunity difference 22 | eq_opp_diff = metric_library.equal_opportunity_difference() 23 | fairness_scores.append(eq_opp_diff) 24 | 25 | # average odds difference 26 | avg_odds_diff = metric_library.average_odds_difference() 27 | fairness_scores.append(avg_odds_diff) 28 | 29 | # statistical parity difference 30 | stat_parity_diff = metric_library.statistical_parity_difference() 31 | fairness_scores.append(stat_parity_diff) 32 | 33 | # average odds difference 34 | avg_odds_diff = metric_library.average_odds_difference() 35 | fairness_scores.append(avg_odds_diff) 36 | 37 | # calculate & return overall quality score 38 | max_fair_score = max(fairness_scores) 39 | balance_val = acc * (1-max_fair_score) 40 | 41 | return math.sqrt(balance_val) 42 | 43 | 44 | class UnifiedMetricLibrary(): 45 | 46 | def __init__(self, test_data, classified_data, unprivileged_groups, privileged_groups): 47 | 48 | self.test_data = test_data 49 | self.classified_data = classified_data 50 | 51 | self.classification_metric = ClassificationMetric(test_data, classified_data, unprivileged_groups, privileged_groups) 52 | 53 | def accuracy_score(self): 54 | return accuracy(self.test_data.labels, self.classified_data.labels) 55 | 56 | def num_true_positives(self): 57 | return self.classification_metric.num_true_positives() 58 | 59 | def num_false_positives(self): 60 | return self.classification_metric.num_false_positives() 61 | 62 | def num_false_negatives(self): 63 | return self.classification_metric.num_false_negatives() 64 | 65 | def num_generalized_true_positives(self): 66 | return self.classification_metric.num_generalized_true_positives() 67 | 68 | def num_generalized_false_positives(self): 69 | return self.classification_metric.num_generalized_false_positives() 70 | 71 | def num_generalized_false_negatives(self): 72 | return self.classification_metric.num_generalized_false_negatives() 73 | 74 | def num_generalized_true_negatives(self): 75 | return self.classification_metric.num_generalized_true_negatives() 76 | 77 | def true_positive_rate(self): 78 | return self.classification_metric.true_positive_rate() 79 | 80 | def false_positive_rate(self): 81 | return self.classification_metric.false_positive_rate() 82 | 83 | def false_negative_rate(self): 84 | return self.classification_metric.false_negative_rate() 85 | 86 | def true_negative_rate(self): 87 | return self.classification_metric.true_negative_rate() 88 | 89 | def generalized_true_positive_rate(self): 90 | return self.classification_metric.generalized_true_positive_rate() 91 | 92 | def generalized_false_positive_rate(self): 93 | return self.classification_metric.generalized_false_positive_rate() 94 | 95 | def generalized_false_negative_rate(self): 96 | return self.classification_metric.generalized_false_negative_rate() 97 | 98 | def generalized_true_negative_rate(self): 99 | return self.classification_metric.generalized_true_negative_rate() 100 | 101 | def positive_predictive_value(self): 102 | return self.classification_metric.positive_predictive_value() 103 | 104 | def false_discovery_rate(self): 105 | return self.classification_metric.false_discovery_rate() 106 | 107 | def false_omission_rate(self): 108 | return self.classification_metric.false_omission_rate() 109 | 110 | def negative_predictive_value(self): 111 | return self.classification_metric.negative_predictive_value() 112 | 113 | def error_rate(self): 114 | return self.classification_metric.error_rate() 115 | 116 | def false_positive_rate_difference(self): 117 | return self.classification_metric.false_positive_rate_difference() 118 | 119 | def false_negative_rate_difference(self): 120 | return self.classification_metric.false_negative_rate_difference() 121 | 122 | def false_omission_rate_difference(self): 123 | return self.classification_metric.false_omission_rate_difference() 124 | 125 | def false_discovery_rate_difference(self): 126 | return self.classification_metric.false_discovery_rate_difference() 127 | 128 | def false_positive_rate_ratio(self): 129 | return self.classification_metric.false_positive_rate_ratio() 130 | 131 | def false_negative_rate_ratio(self): 132 | return self.classification_metric.false_negative_rate_ratio() 133 | 134 | def false_omission_rate_ratio(self): 135 | return self.classification_metric.false_omission_rate_ratio() 136 | 137 | def false_discovery_rate_ratio(self): 138 | return self.classification_metric.false_discovery_rate_ratio() 139 | 140 | def average_abs_odds_difference(self): 141 | return self.classification_metric.average_abs_odds_difference() 142 | 143 | def error_rate_difference(self): 144 | return self.classification_metric.error_rate_difference() 145 | 146 | def error_rate_ratio(self): 147 | return self.classification_metric.error_rate_ratio() 148 | 149 | def num_pred_positives(self): 150 | return self.classification_metric.num_pred_positives() 151 | 152 | def num_pred_negatives(self): 153 | return self.classification_metric.num_pred_negatives() 154 | 155 | def selection_rate(self): 156 | return self.classification_metric.selection_rate() 157 | 158 | def equal_opportunity_difference(self): 159 | return abs(self.classification_metric.equal_opportunity_difference()) 160 | 161 | def average_odds_difference(self): 162 | return abs(self.classification_metric.average_odds_difference()) 163 | 164 | def disparate_impact(self): 165 | return abs(self.classification_metric.disparate_impact()) 166 | 167 | def statistical_parity_difference(self): 168 | return abs(self.classification_metric.statistical_parity_difference()) 169 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/ml_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import StandardScaler 3 | 4 | from aif360.metrics import ClassificationMetric 5 | from aif360.algorithms import Transformer 6 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing 7 | 8 | class MLPipeline(object): 9 | 10 | """ 11 | Defines a machine-learning pipeline for evaluating fairness in predictors. For usage, see example at the bottom of the file. 12 | 13 | Args: 14 | model (sklearn.model | aif360.algorithms.inprocessing): An sklearn predictor OR an AIF360 inprocessing algorithm 15 | privileged (list[dict[str, float]]): A list of dictionaries with keys representing privileged attribute + value pairs 16 | unprivileged (list[dict[str, float]]): A list of dictionaries with keys representing unprivileged attribute + value pairs 17 | preprocessor (aif360.algorithms.preprocessing): An instance of an AIF360 preprocessing algorithm 18 | postprocessor (aif360.algorithms.postprocessing): An instance of an AIF360 postprocessing algorithm 19 | """ 20 | 21 | def __init__(self, model, privileged=[], unprivileged=[], preprocessor=None, postprocessor=None): 22 | self.model = model 23 | self.privileged = privileged 24 | self.unprivileged = unprivileged 25 | self.preprocessor = preprocessor 26 | self.postprocessor = postprocessor 27 | self.dataset_train = [] 28 | self.dataset_test = [] 29 | self.test_predictions = [] 30 | 31 | 32 | def fit(self, dataset, test_frac=0.3, threshold=0.5, feature_scaling=False): 33 | """ 34 | Trains our model on the dataset. Uses different control flow depending on if we are using an 35 | sklearn model or an AIF360 inprocessing algorithm 36 | 37 | Args: 38 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 39 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 40 | threshold (float): A real number between 0 and 1 denoting the threshold of acceptable class imbalance 41 | """ 42 | 43 | if test_frac < 0 or test_frac > 1: 44 | raise ValueError("Parameter test_frac must be between 0 and 1") 45 | 46 | dataset_train, dataset_test = dataset.split([1-test_frac], shuffle=False) 47 | 48 | # If a preprocessing algorithm was supplied, apply that transformations first 49 | if self.preprocessor: 50 | dataset_train = self.preprocessor.fit_transform(dataset_train) 51 | dataset_test = self.preprocessor.fit_transform(dataset_test) 52 | 53 | self.dataset_train = dataset_train 54 | self.dataset_test = dataset_test 55 | 56 | self.__fit_inprocessing(threshold, feature_scaling) 57 | 58 | def __fit_inprocessing(self, threshold, feature_scaling): 59 | """ 60 | Trains an AIF360 inprocessing model on the provided dataset. 61 | 62 | Args: 63 | """ 64 | 65 | # Apply feature scaling if specified 66 | if feature_scaling: 67 | scaler = StandardScaler().fit(self.dataset_train.features) 68 | self.dataset_train.features = scaler.fit_transform(self.dataset_train.features) 69 | self.dataset_test.features = scaler.transform(self.dataset_test.features) 70 | 71 | self.model.fit(self.dataset_train) 72 | 73 | 74 | # Make our predictions, without thresholds for now 75 | dataset_test_pred = self.model.predict(self.dataset_test) 76 | 77 | # If a postprocessing algorithm was specified, transform the test results 78 | if self.postprocessor: 79 | dataset_test_pred = self.postprocessor.fit(self.dataset_test, dataset_test_pred) \ 80 | .predict(dataset_test_pred) 81 | 82 | self.classified_data = dataset_test_pred 83 | 84 | 85 | def evaluate(self, metric, submetric): 86 | """ 87 | Evaluates an AIF360 metric against the trained model. 88 | 89 | Args: 90 | metric (aif360.metrics.Metric): An AIF360 metric class 91 | submetric (str): A string denoting the metric evaluation function that is to be called on the provided metric class 92 | Returns: 93 | float: A float denoting the performance of each method evaluation within the specified metric on the trained model 94 | Raises: 95 | AttributeError: If a model has not been trained yet, or 96 | If the provided submetric function does not exist on the metric class, or 97 | If the provided submetric function contains arguments other than "privileged" 98 | 99 | """ 100 | 101 | from inspect import signature 102 | import re 103 | 104 | if not self.dataset_train: 105 | raise AttributeError("A model must be fit before evaluating a metric") 106 | 107 | curr_metric = metric(self.dataset_test, self.classified_data, unprivileged_groups=self.unprivileged, privileged_groups=self.privileged) 108 | 109 | # Retrieve the callable evalation function 'submetric' of this metric instance 110 | submetric_fn = getattr(curr_metric, submetric) 111 | 112 | return submetric_fn() 113 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/sample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.svm import SVC 5 | from sklearn.linear_model import LogisticRegression 6 | 7 | from sklearn.metrics import accuracy_score 8 | from fair_metrics import causal_discrimination_score, group_discrimination_score, false_positive_rate_equality, false_negative_rate_equality 9 | from fair_model_selection import FairSearch 10 | 11 | from datasets import load_adult_income 12 | 13 | import os 14 | 15 | os.chdir("fklearn/") 16 | 17 | 18 | data = load_adult_income() 19 | models = {'LogisticRegression': LogisticRegression} 20 | metrics = {'Causal': group_discrimination_score, 'Accuracy': accuracy_score} 21 | parameters = { 22 | # 'SVC': {'kernel': ['rbf'], 'C': [1, 10], 'probability': [True]}, 23 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 24 | } 25 | 26 | thresholds = [i * 1.0/100 for i in range(10)] 27 | Search = FairSearch(models, metrics, metrics, parameters, thresholds) 28 | Search.fit(data[0]) 29 | 30 | print(Search) 31 | 32 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/sample_aif.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn.svm import SVC 7 | 8 | from aif360.datasets import AdultDataset, GermanDataset 9 | from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric 10 | from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools 11 | from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions import get_distortion_adult 12 | from aif360.algorithms.preprocessing import DisparateImpactRemover, LFR, Reweighing, OptimPreproc 13 | from aif360.algorithms.inprocessing import AdversarialDebiasing 14 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing, EqOddsPostprocessing, RejectOptionClassification 15 | 16 | from fair_selection_aif import AIF360Search, DEFAULT_ADB_PARAMS 17 | 18 | import os 19 | 20 | dataset = GermanDataset() 21 | models = {'LogisticRegression': LogisticRegression, 'KNeighborsClassifier': KNeighborsClassifier} 22 | metrics = {'ClassificationMetric': [ClassificationMetric, 23 | 'num_generalized_true_positives', 24 | 'num_true_negatives', 25 | 'false_positive_rate', 26 | 'false_negative_rate', 27 | 'generalized_false_positive_rate' 28 | ] 29 | # 'BinaryLabelDatasetMetric': [BinaryLabelDatasetMetric, 'disparate_impact'] 30 | } 31 | unprivileged = [{'age': 0, 'sex': 0}] 32 | privileged = [{'age': 1, 'sex': 1}] 33 | preprocessor_args = {'unprivileged_groups': unprivileged, 'privileged_groups': privileged} 34 | 35 | # Hyperparameters may either be specified as a dictionary of string to lists, or by an empty dictionary to 36 | # use the default ones set by sklearn (or AIF360). The keys are the names of the hyperparameters, and the 37 | # values and lists of possible values to form a grid search over 38 | parameters = { 39 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [0.1, 0.5, 1]}, 40 | 'KNeighborsClassifier': {} 41 | } 42 | thresholds = [i * 10.0/100 for i in range(5)] 43 | preprocessors=[DisparateImpactRemover(), Reweighing(**preprocessor_args)] 44 | postprocessors=[CalibratedEqOddsPostprocessing(**preprocessor_args), EqOddsPostprocessing(**preprocessor_args), RejectOptionClassification(**preprocessor_args)] 45 | 46 | Search = AIF360Search(models, metrics, parameters, thresholds) 47 | Search.grid_search(dataset, privileged=privileged, unprivileged=unprivileged, preprocessors=preprocessors, postprocessors=postprocessors) 48 | 49 | Search.to_csv("interface/static/data/test-file.csv") 50 | 51 | -------------------------------------------------------------------------------- /Experimental_UserStudy_Groups/Group_2_Notebooks/fklearn/scikit_learn_wrapper.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression as lr 2 | from sklearn.neighbors import KNeighborsClassifier as knc 3 | from sklearn.ensemble import RandomForestClassifier as rfc 4 | from sklearn.svm import SVC as svc 5 | 6 | class ScikitLearnWrapper(): 7 | 8 | def __init__(self, model_class, **kwargs): 9 | self.model = model_class(**kwargs) 10 | 11 | 12 | def fit(self, dataset_train): 13 | self.model.fit(dataset_train.features, dataset_train.labels.ravel()) 14 | 15 | def predict(self, dataset_test): 16 | 17 | dataset_test_pred = dataset_test.copy() 18 | dataset_test_pred.labels = self.model.predict(dataset_test.features).reshape(-1,1) 19 | 20 | return dataset_test_pred 21 | 22 | 23 | LogisticRegression = lambda **kwargs : ScikitLearnWrapper(lr,**kwargs) 24 | KNeighborsClassifier = lambda **kwargs : ScikitLearnWrapper(knc,**kwargs) 25 | RandomForestClassifier = lambda **kwargs : ScikitLearnWrapper(rfc,**kwargs) 26 | SVC = lambda **kwargs : ScikitLearnWrapper(svc,**kwargs) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fairkit-learn: A Python Model Fairness Evaluation Toolkit 2 | 3 | Fairkit-learn is an open-source, publicly available Python toolkit designed 4 | to help data scientists evaluate and explore machine learning models with 5 | respect to quality and fairness metrics simultaneously. 6 | 7 | Fairkit-learn builds on top of [scikit-learn](https://scikit-learn.org/stable/), the state-of-the-art tool suite 8 | for data mining and data analysis, and [AI Fairness 360](https://aif360.mybluemix.net/), the state-of-the-art 9 | Python toolkit for examining, reporting, and mitigating machine learning bias 10 | in individual models. 11 | 12 | Fairkit-learn supports all metrics and learning algorithms available in scikit-learn and AI Fairness 13 | 360, and all of the bias mitigating pre- and post-processing algorithms available in AI Fairness 360, and provides extension points to add more metrics and algorithms. 14 | 15 | # Installation 16 | 17 | To install fairkit-learn, run the following command: 18 | 19 | ``` pip install fairkit_learn==1.9``` 20 | 21 | # Using fairkit-learn 22 | 23 | To use fairkit-learn, first run the following command to install necessary pacakges: 24 | 25 | ```pip install -r requirements.txt``` 26 | 27 | Sample code for how to use fairkit-learn can be found in the examples 28 | folder (e.g., Fairkit_learn_Tutorial.ipynb) in the repo. 29 | -------------------------------------------------------------------------------- /fklearn_pkg/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Brittany Johnson, Jesse Bartola, Rico Angell, Katherine Keith, Sam Witty, Stephen Giguere, and Yuriy Brun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fklearn_pkg/README.md: -------------------------------------------------------------------------------- 1 | # fairkit-learn fairness toolkit 2 | 3 | Fairkit-learn is an open-source, publicly available Python toolkit designed 4 | to help data scientists evaluate and explore machine learning models with 5 | respect to quality and fairness metrics simultaneously. 6 | 7 | Fairkit-learn builds on top of [scikit-learn](https://scikit-learn.org/stable/), the state-of-the-art tool suite 8 | for data mining and data analysis, and [AI Fairness 360](https://aif360.mybluemix.net/), the state-of-the-art 9 | Python toolkit for examining, reporting, and mitigating machine learning bias 10 | in individual models. 11 | 12 | Fairkit-learn supports all metrics and learning algorithms available in scikit-learn and AI Fairness 13 | 360, and all of the bias mitigating pre- and post-processing algorithms available in AI Fairness 360, and provides extension points to add more metrics and algorithms. 14 | 15 | # Installation 16 | 17 | To install fairkit-learn, run the following command: 18 | 19 | ``` pip install fairkit-learn==1.0``` 20 | 21 | # Using fairkit-learn 22 | 23 | Sample code for how to use fairkit-learn can be found in the examples 24 | folder in the repo. 25 | -------------------------------------------------------------------------------- /fklearn_pkg/fairkit_learn: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | echo "hey there, this is my first pip package" 4 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/NOTES.txt: -------------------------------------------------------------------------------- 1 | Goal: want to be able to use fairkit learn just like you would sklearn 2 | 3 | (1) One potentially useful feature (aside from the feature of finding the best model) would be able to import fairness metrics like you can import accuracy metrics in scikit-learn. This could be especially useful since in our class project we were able to show that having the metrics themselves was useful. 4 | 5 | http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 6 | 7 | Ex scikit-learn: 8 | ``` 9 | from sklearn.metrics import roc_curve 10 | from sklearn.metrics import accuracy_score 11 | ``` 12 | 13 | Ex fairkit-learn: 14 | ``` 15 | from fklearn.fair_metrics import causal_fairness 16 | from fklearn.fair_metrics import false_postive_rate_equality 17 | ``` 18 | 19 | Sorelle A. Friedler (Haverford)'s work: 20 | 21 | This group has already implemented many of these but for the purpose of studying "fairness-enhancing interventions in machine learning" https://arxiv.org/pdf/1802.04422.pdf 22 | 23 | They have a repository with many of the metrics already implemented: 24 | https://github.com/algofairness/fairness-comparison 25 | Problems: 26 | -Not clear documentation on how to run this software as a stand alone package 27 | -How do we not overlap with what they did/ contribute something novel 28 | -Can we collaborate with them? 29 | 30 | (2) For the full model search: 31 | Modeled after http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 32 | ``` 33 | from fklearn.fair_model_selection import FairSearch 34 | 35 | class FairSearch(model_classes, protected_attributes, metrics, hyperparameters): 36 | """ 37 | Description 38 | TODO 39 | 40 | Parameters 41 | ---------- 42 | model_classes : list of stings 43 | List of sklearn model classes that one wants to search over 44 | TODO: put list of supported packages (e.g. sklearn.linear_model.LogisticRegression) 45 | - do we want to implement/support any of the fair-aware ones ourselves? 46 | 47 | protected_attributes : list of ints 48 | List of integers corresponding to the index of the protected attributes in X 49 | TODO: or strings? 50 | 51 | metrics : list of strings 52 | e.g. sklearn supported [sklearn.metrics.accuracy_score, ...] 53 | and our fairness metrics [fklearn.fair_metrics.causal_fairness] 54 | 55 | hyperparameters : list of strings 56 | e.g ['l1', 'class_weight'] 57 | TODO: will need to put in some sort of error if they don't work with the sklearn piece 58 | 59 | Attributes 60 | ---------- 61 | TODO 62 | """ 63 | 64 | def fit(self, X, y): 65 | TODO: or do we want the protected attributes in this function instead? 66 | pass 67 | 68 | ``` 69 | What do we want to return? A model, a Parateo frontier, a visulization? 70 | 71 | 72 | (3) Other great features of sklearn that we would want to include as well: 73 | 74 | sklearn.datasets (fetch and loads popular datasets) 75 | ``` 76 | fklearn.datasets.fetch_propublica 77 | fklearn.datasets.fetch_propublica_vectorized 78 | fklearn.datasets.fetch_adult 79 | fklearn.datasets.fetch_adult_vectorized 80 | ``` 81 | 82 | TODO: 83 | -How many of the "21" definitions of fairness can we implement in this package? 84 | -will eventually want to release as a pip package correct? 85 | -One of the selling points could that it works seamlessly with sklearn 86 | 87 | THOUGHTS: 88 | -Contributions of our work: (1) very user-friendly library of fairness metrics (2) fair-aware models (2) model selection with fairness as a criteria under consideration 89 | 90 | -I think usability and examples are super important. For example, I think it's a big reason why sklearn is used so heavily. Example: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 91 | 92 | -Keep all our notation very similar to fairkit-learn (maybe eventually we can get a pull from them??) 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INSPIRED-GMU/fairkit-learn/1c84e9500f8bf11bc2948d7aea8cd8ec0d1297c8/fklearn_pkg/fklearn/__init__.py -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic IO for loading fair datasets 3 | 4 | Ex: http://scikit-learn.org/stable/datasets/twenty_newsgroups.html#newsgroups 5 | 6 | TODO: should we have "vectorized" and "non-vectorized" versions here like sklearn does? 7 | """ 8 | from __future__ import division 9 | import warnings 10 | import numpy as np 11 | import sklearn 12 | import pandas as pd 13 | 14 | def fetch_adult_income(): 15 | #directly downloads the data 16 | #calls load to load the data 17 | #TODO 18 | pass 19 | 20 | def load_adult_income_train_val(): 21 | #for the user study so they can't access the test set 22 | return load_adult_income(train_val_split=0.5, notest=True) 23 | 24 | def load_adult_income(train_val_split=0.5, notest=False): 25 | """ 26 | Load files and data from the propublica dataset 27 | 28 | Parameters 29 | ---------- 30 | train_val_split : float 31 | Amount to split the training set to create a train and validation set 32 | 33 | Returns 34 | ------- 35 | data : dict 36 | With keys 37 | 38 | X : 2-d ndarray 39 | 40 | y : 1-d ndarray 41 | 42 | (or X_train, y_train, X_test, y_test if subset=='all') 43 | 44 | feat_names : list of strs 45 | List of the feature names corresponding to the indices of the columns 46 | of X 47 | 48 | attribute_map : dict of dicts of dicts 49 | Denotes the protected attributes of the category of protected 50 | attribute (e.g. "Race") and maps the attribute name to the column and value that correspond 51 | to that attribute 52 | e.g. one-hot encoding for a one-hot encoding denoting the columns ("col") and values ("val") 53 | 54 | {"Race": {"Purple": {"col": 0, "val": 1}, "Green": {"col": 1, "val": 1}}, 55 | "Sex" : {"Female": {"col": 0, "val": 1}, "Male": {"col": 1, "val": 1}} 56 | 57 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 58 | "Green": {"col": 0, "val: 2"}} 59 | 60 | Note: these MUST be mutually exclusive categories! 61 | 62 | is_categorical : boolean 63 | True if the y-values are categorical 64 | False otherwise (indicating a one-hot encoding) 65 | 66 | Examples 67 | -------- 68 | >>> from fklearn.datasets import load_adult_income 69 | """ 70 | data = {} 71 | data['is_categorical'] = False 72 | header_names = ["Age", "Workclass", "FNLWGT", "Education", "Education-Num", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per Week", "Native Country", "Income"] 73 | 74 | train_ref = pd.read_csv("../data/adult_income/train.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 75 | train_all = pd.get_dummies(train_ref) 76 | train_all.columns = train_all.columns.str.replace('_ ', '_') 77 | 78 | end_idx_train_val_split = int(np.floor(train_val_split*train_all.shape[0])) 79 | train = train_all[:end_idx_train_val_split] 80 | val = train_all[end_idx_train_val_split:] 81 | 82 | y_train = train["Income_<=50K"].copy() 83 | X_train = train.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 84 | y_val = val["Income_<=50K"].copy() 85 | X_val = val.drop(["Income_<=50K","Income_>50K", "Native Country_Holand-Netherlands"], axis=1).copy() 86 | 87 | test_ref = pd.read_csv( "../data/adult_income/test.csv", index_col = False, delimiter=' *, *', engine='python', names = header_names) 88 | test = pd.get_dummies(test_ref) 89 | test.columns = test.columns.str.replace('_ ', '_') 90 | y_test = test["Income_<=50K."].copy() 91 | X_test = test.drop(["Income_<=50K.","Income_>50K."], axis=1).copy() 92 | 93 | data['feat_names'] = [str(col) for col in X_test.columns] 94 | 95 | assert X_train.shape[1] == X_val.shape[1] == X_test.shape[1] 96 | assert X_train.shape[0] == y_train.shape[0] 97 | assert X_val.shape[0] == y_val.shape[0] 98 | assert X_test.shape[0] == y_test.shape[0] 99 | 100 | data['X_train'] = X_train.values 101 | data['y_train'] = y_train.values 102 | data['X_val'] = X_val.values 103 | data['y_val'] = y_val.values 104 | data['X_test'] = X_test.values 105 | data['y_test'] = y_test.values 106 | 107 | attribute_map = {'Race': {}, 'Sex': {}} 108 | for ii, col in enumerate(X_train): 109 | if col.startswith('Race'): 110 | attribute_map['Race'][col] = {'col': ii, 'val': 1} 111 | elif col.startswith('Sex'): 112 | attribute_map['Sex'][col] = {'col': ii, 'val': 1} 113 | data['attribute_map'] = attribute_map 114 | 115 | if notest: 116 | del data['X_test'] 117 | del data['y_test'] 118 | 119 | unprocessed_train_data = train_ref 120 | 121 | return data, unprocessed_train_data 122 | 123 | def fetch_propublica(subset='train'): 124 | """ 125 | Load files and data from the propublica dataset 126 | 127 | Parameters 128 | ---------- 129 | subset : 'train' or 'test', 'all' 130 | Select which dataset to load 131 | 132 | Returns 133 | ------- 134 | X : 2-d ndarray 135 | 136 | y : 2-d ndarray 137 | 138 | attribute_map : dict of dicts 139 | Denotes the protected attributes of the category of protected 140 | attribute (e.g. "Race") to measure causal fairness 141 | maps the attribute name to the column and value that correspond 142 | to that attribute 143 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 144 | "Green": {"col": 1, "val": 1}} 145 | 146 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 147 | "Green": {"col": 0, "val: 2"}} 148 | 149 | Note: these MUST be mutually exclusive categories! 150 | 151 | is_categorical : boolean 152 | True if the y-values are categorical 153 | False otherwise (indicating a one-hot encoding) 154 | 155 | 156 | Examples 157 | -------- 158 | >>> from fklearn.datasets import fetch_propublica 159 | 160 | """ 161 | pass 162 | 163 | if __name__ == '__main__': 164 | load_adult_income() 165 | 166 | #TODO: other dataset functions 167 | 168 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/fair_model_selection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier 3 | 4 | Inspiration: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV 5 | """ 6 | from __future__ import division 7 | import warnings 8 | import six 9 | import itertools 10 | import inspect 11 | import numpy as np 12 | import numpy.ma as ma 13 | 14 | RANDOM = 'random' 15 | GRID = 'grid' 16 | THRESHOLD_STR = 'threshold' 17 | 18 | SEARCH_STRATEGIES = [GRID, RANDOM] 19 | 20 | def filtered_arguments(func): 21 | required_args = six.viewkeys(inspect.signature(func).parameters) 22 | 23 | def inner(*args, **kwargs): 24 | kwargs = { k:v for k,v in six.iteritems(kwargs) if k in required_args } 25 | return func(*args, **kwargs) 26 | return inner 27 | 28 | class FairSearch(): 29 | """ 30 | Description 31 | TODO 32 | 33 | Parameters 34 | ---------- 35 | models : dict 36 | Dictionary of model names as keys and instantiations of model objects as values. 37 | e.g. { 38 | 'SVC': sklearn.svm.SVC(), 39 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 40 | } 41 | 42 | metrics : dict 43 | Dictionary of sklearn and fklearn fairness metrics 44 | e.g. { 45 | 'Causal': fklearn.fair_metrics.causal_discrimination_score, 46 | 'Accuracy': sklearn.metrics.accuracy_score 47 | } 48 | 49 | parameters : dict of dicts of lists 50 | Dictionary with model names as keys and hyperparameter dicts as values. 51 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 52 | settings to try as values. 53 | e.g. { 54 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 55 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 56 | } 57 | 58 | 59 | thresholds : list of floats 60 | List of classifation thresholds to be applied to all classifiers. 61 | Usage is for classifiers that output a probability, rather than a 62 | hard classification. 63 | e.g. [i * 1.0/100 for i in range(100)] 64 | 65 | Attributes 66 | ---------- 67 | pareto_optimal_results : dict of masked arrays 68 | Keys strings describing the model parameter or score metric. 69 | e.g. {'param_C': masked_array(data = [0, --], mask = [False, True]), 70 | 'param_L1': masked_array(data = [0, --], mask = [False, True]), 71 | 'train_causal_fairness_score' : [0.8, 0.7], 72 | 'val_causal_fairness_score' : [0.71, 0.64], 73 | 'test_causal_fairness_score' : [0.7, 0.65], 74 | 'train_accuracy_score' : [0.6, 0.8], 75 | 'val_accuracy_score' : [0.57, 0.81], 76 | 'test_accuracy_score' : [0.55, 0.78], 77 | 'fit_time' : [0.08, 1.1]} 78 | 79 | Examples 80 | -------- 81 | >>> from fklearn.fair_model_selection import FairSearch 82 | """ 83 | 84 | def __init__(self, models, fairness_metrics, performance_metrics, parameters, thresholds): 85 | self.models = models 86 | self.fairness_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(fairness_metrics) } 87 | self.performance_metrics = { k:filtered_arguments(v) for k,v in six.iteritems(performance_metrics) } 88 | self.parameters = parameters 89 | self.thresholds = thresholds 90 | self.search_results = {} 91 | self.pareto_optimal_results = {} 92 | 93 | def _build_grid_param_arrays(self): 94 | self.n_experiments = 0 95 | attribute_categories = [] 96 | 97 | for key, _ in six.iteritems(self.models): 98 | model = self.models[key] 99 | keys, values = zip(*self.parameters[key].items()) 100 | keys = keys + (THRESHOLD_STR, ) 101 | values = values + (self.thresholds, ) 102 | attribute_categories.extend(keys) 103 | self.n_experiments += len([dict(zip(keys, v)) for v in itertools.product(*values)]) 104 | 105 | for attribute in list(set(attribute_categories)): 106 | self.search_results["param_" + attribute] = [np.nan] * self.n_experiments 107 | 108 | return 109 | 110 | def _build_score_arrays(self, data): 111 | scores = {} 112 | 113 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 114 | for fairness_metric, _ in six.iteritems(self.fairness_metrics): 115 | self.search_results["score_" + protected_attribute + "_" + fairness_metric] = [np.nan] * self.n_experiments 116 | for performance_metric, _ in six.iteritems(self.performance_metrics): 117 | self.search_results["score_" + performance_metric] = [np.nan] * self.n_experiments 118 | 119 | return 120 | 121 | def _fit_grid(self, data, verbose=False, n_train_samples=None, n_val_samples=None): 122 | #TODO add verbose functionality 123 | i = -1 124 | args_dict = {} 125 | 126 | if n_train_samples: 127 | train_idx = np.random.choice(data["X_train"].shape[0], n_train_samples, replace=False) 128 | X_train = data["X_train"][train_idx, :] 129 | y_train = data["y_train"][train_idx] 130 | else: 131 | X_train = data["X_train"] 132 | y_train = data["y_train"] 133 | 134 | if n_val_samples: 135 | val_idx = np.random.choice(data["X_val"].shape[0], n_val_samples, replace=0) 136 | X_val = data["X_val"][val_idx, :] 137 | y_val = data["y_val"][val_idx] 138 | else: 139 | X_val = data["X_val"] 140 | y_val = data["y_val"] 141 | 142 | args_dict["X"] = X_val 143 | args_dict["y_true"] = y_val 144 | 145 | for model_key, model_family in six.iteritems(self.models): 146 | parameter_keys, parameter_values = zip(*self.parameters[model_key].items()) 147 | experiments = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 148 | for experiment in experiments: 149 | # Train Model 150 | model = model_family(**experiment) 151 | model = model.fit(X_train, y_train) 152 | args_dict["y_pred_proba"] = model.predict_proba(X_val)[:, 1] 153 | args_dict["trained_model"] = model 154 | 155 | for threshold in self.thresholds: 156 | 157 | args_dict["threshold"] = threshold 158 | args_dict["y_pred"] = args_dict["y_pred_proba"] > threshold 159 | 160 | i += 1 161 | self.search_results["param_threshold"][i] = threshold 162 | # Fill in parameter values 163 | for experiment_key, experiment_value in six.iteritems(experiment): 164 | self.search_results["param_" + experiment_key][i] = experiment_value 165 | 166 | # Evaluate Model 167 | for protected_attribute, _ in six.iteritems(data["attribute_map"]): 168 | args_dict["attribute_map"] = data["attribute_map"][protected_attribute] 169 | for fairness_metric, fairness_metric_function in six.iteritems(self.fairness_metrics): 170 | self.search_results["score_" + protected_attribute + "_" + fairness_metric][i] = fairness_metric_function(**args_dict) 171 | 172 | for performance_metric, performance_metric_function in six.iteritems(self.performance_metrics): 173 | self.search_results["score_" + performance_metric][i] = performance_metric_function(**args_dict) 174 | 175 | for key, value in six.iteritems(self.search_results): 176 | # Hacky way to check for nans, but other ways seemed to break 177 | mask = [j != j for j in self.search_results[key]] 178 | self.search_results[key] = ma.array(self.search_results[key], mask=mask) 179 | 180 | self.pareto_optimal_results = self.filter_solution_set() 181 | 182 | def fit(self, data, verbose=1, search_strategy=GRID, n_random_models=None, n_train_samples=None, n_val_samples=None): 183 | """ 184 | Based in part on http://www.codiply.com/blog/hyperparameter-grid-search-across-multiple-models-in-scikit-learn/ 185 | 186 | Parameters 187 | ---------- 188 | X : 2d array-like 189 | Training dataset where rows are instances and columns are features. 190 | 191 | y : 1d array-like 192 | Classification labels 193 | 194 | 195 | attribute_map : dict of dicts 196 | denotes the protected attributes of the category of protected 197 | attribute (e.g. "Race") to measure causal fairness 198 | maps the attribute name to the column and value that correspond 199 | to that attribute 200 | e.g. one-hot encoding {"Purple": {"col": 0, "val": 1}, 201 | "Green": {"col": 1, "val": 1}} 202 | 203 | e.g. categorical encoding {"Purple": {"col": 0, "val: 1"}, 204 | "Green": {"col": 0, "val: 0"}} 205 | 206 | Note: these MUST be mutually exclusive categories! 207 | 208 | is_categorical : bool (optional) 209 | denotes whether the attribute map represents a categorical encoding. If False 210 | we assume that the encoding is one-hot. 211 | 212 | max_models : None or int 213 | If None, return the entire Pareto frontier of models 214 | Otherwise, return int number of models, ties will be broken randomly 215 | 216 | search_strategy : str 217 | 'random', a random search over models/hyperparameters 218 | 'grid', enumerates the space of models/hyperparameters 219 | 'genetic_algorithms', uses genetic algorithms 220 | """ 221 | 222 | assert search_strategy in SEARCH_STRATEGIES 223 | 224 | if search_strategy == RANDOM: 225 | assert n_random_models > 0 226 | #TODO 227 | 228 | 229 | if search_strategy == GRID: 230 | self._build_grid_param_arrays() 231 | self._build_score_arrays(data) 232 | self._fit_grid(data, verbose=verbose, n_train_samples=n_train_samples, n_val_samples=n_val_samples) 233 | 234 | 235 | def filter_solution_set(self, omitted_score_list=[]): 236 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 237 | assert(self.search_results) 238 | 239 | costs = -1 * np.array([v for k,v in six.iteritems(self.search_results) if ((k[:5] == "score") & (k[6:] not in omitted_score_list))]).T 240 | 241 | is_efficient = np.ones(costs.shape[0], dtype = bool) 242 | for i, c in enumerate(costs): 243 | if is_efficient[i]: 244 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 245 | 246 | return { k:v[is_efficient] for k,v in six.iteritems(self.search_results)} 247 | 248 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/fair_selection_aif.py: -------------------------------------------------------------------------------- 1 | """ 2 | Used to search and return models along the Pareto frontier using AIF360 metrics 3 | """ 4 | import six 5 | import itertools 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from fklearn.ml_pipeline import MLPipeline 10 | from fklearn.fair_model_selection import filtered_arguments 11 | from aif360.algorithms.inprocessing import AdversarialDebiasing 12 | 13 | # MANDATORY hyperparmaeters for adversarial debiasing 14 | def DEFAULT_ADB_PARAMS(privileged, unprivileged): 15 | """ 16 | Create a dictionary of mandatory hyperparameters for adversarial debiasing 17 | """ 18 | 19 | return {'unprivileged_groups': [unprivileged], 'privileged_groups': [privileged], 20 | 'scope_name': ['adb'], 'sess': [tf.Session()]} 21 | 22 | class ModelSearch(object): 23 | """ 24 | 25 | Parameters 26 | ---------- 27 | models : dict 28 | Dictionary of model names as keys and instantiations of model objects as values. 29 | e.g. { 30 | 'SVC': sklearn.svm.SVC(), 31 | 'LogisticRegression': sklearn.linear_model.LogisticRegression() 32 | } 33 | 34 | metrics : dict[str, (MetricClass, str)] 35 | Dictionary of sklearn/AIF360 fairness metrics. The keys are the display names of the metrics, and 36 | the values are 2-tuples with the first element containing the metric class object, and the second 37 | containing the name of the metric function to evaluate. 38 | e.g. { 39 | 'ClassificationMetric': (aif360.metrics.ClassificationMetric, 'num_generalized_true_positives'), 40 | 'BinaryLabelDatasetMetric': (aif360.metrics.BinaryLabelDatasetMetric, 'disparate_impact') 41 | } 42 | 43 | hyperparameters : dict of dicts of lists 44 | Dictionary with model names as keys and hyperparameter dicts as values. 45 | Each hyperparameter dict has hyperparameters as keys and hyperparameter 46 | settings to try as values. 47 | e.g. { 48 | 'SVC': {'kernel': ['rbf'], 'C': [1, 10]}, 49 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 50 | } 51 | 52 | 53 | thresholds : list of floats 54 | List of classifation thresholds to be applied to all classifiers. 55 | Usage is for classifiers that output a probability, rather than a 56 | hard classification. 57 | e.g. [i * 1.0/100 for i in range(100)] 58 | """ 59 | 60 | def __init__(self, models, metrics, hyperparameters, thresholds): 61 | self.models = models 62 | self.metrics = metrics 63 | self.hyperparameters = hyperparameters 64 | self.thresholds = thresholds 65 | self.search_results = [] 66 | self.pareto_optimal_results = [] 67 | 68 | def grid_search(self, dataset, privileged=[], unprivileged=[], test_frac=0.3, preprocessors=[], postprocessors=[]): 69 | """ 70 | Performs a grid search over the specified model + hyperparameter pairs, calculating metric evalutations for each model. 71 | 72 | Args: 73 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 74 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 75 | privileged (list[dict]): A list of dictionaries containing privileged groups 76 | unprivileged (list[dict]): A list of dictionaries containing unprivileged groups 77 | """ 78 | 79 | # If any pre/postprocessors were supplied, add the option for None by default 80 | preprocessors += [None] 81 | postprocessors += [None] 82 | 83 | self.model_id = 0 84 | 85 | # Try each unique model 86 | for model_name, ModelClass in six.iteritems(self.models): 87 | 88 | # If no hyperparameters were specified, use the defaults. Otherwise setup a grid search 89 | if len(self.hyperparameters[model_name]) == 0: 90 | param_list = [{}] 91 | else: 92 | parameter_keys, parameter_values = zip(*self.hyperparameters[model_name].items()) 93 | param_list = [dict(zip(parameter_keys, v)) for v in itertools.product(*parameter_values)] 94 | 95 | # Grid search through hyperparameters in the current model 96 | for param_set in param_list: 97 | 98 | model = ModelClass(**param_set) 99 | 100 | # Go through each combination of pre/post processing algorithms 101 | for preprocessor, postprocessor in itertools.product(preprocessors, postprocessors): 102 | 103 | mlp = MLPipeline(model, privileged=privileged, unprivileged=unprivileged, preprocessor=preprocessor, postprocessor=postprocessor) 104 | 105 | # Create a new search result for each threshold value 106 | for threshold in self.thresholds: 107 | 108 | if model_name == 'AdversarialDebiasing': 109 | mlp.model.scope_name = str(self.model_id) 110 | self.model_id += 1 111 | 112 | mlp.model.sess.close() 113 | tf.reset_default_graph() 114 | mlp.model.sess = tf.Session() 115 | 116 | mlp.fit(dataset, test_frac=test_frac, threshold=threshold) 117 | search_result = {'model_class': model_name, 118 | 'hyperparameters': param_set, 119 | 'preprocessor': type(preprocessor).__name__ if preprocessor else 'None', 120 | 'postprocessor': type(postprocessor).__name__ if postprocessor else 'None', 121 | 'metrics': {} 122 | } 123 | 124 | # Populate metrics for this search result 125 | for metric_name, metric_args in six.iteritems(self.metrics): 126 | 127 | # The first metric argument is the Metric Class itself. The rest are the names of 128 | # submetric evaluation functions 129 | MetricClass = metric_args[0] 130 | 131 | for metric_fn in metric_args[1:]: 132 | metric_val = mlp.evaluate(MetricClass, metric_fn) 133 | metric_category = '{} ({})'.format(metric_name, metric_fn) 134 | search_result['metrics'][metric_category] = metric_val 135 | 136 | self.search_results.append(search_result) 137 | 138 | self.pareto_optimal_results = self.__filter_solution_set() 139 | 140 | def __filter_solution_set(self): 141 | # Inspired by https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python 142 | assert(self.search_results) 143 | 144 | costs = -1 * np.array([[v for _, v in six.iteritems(result['metrics'])] for result in self.search_results]) 145 | 146 | is_efficient = np.ones(costs.shape[0], dtype = bool) 147 | for i, c in enumerate(costs): 148 | if is_efficient[i]: 149 | is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points 150 | 151 | return [result for i, result in enumerate(self.search_results) if is_efficient[i]] 152 | 153 | def to_csv(self, filename): 154 | """ 155 | Exports the search results as a CSV file 156 | 157 | Args: 158 | filename (str): The name of the file to save the results to 159 | Raises: 160 | AttributeError: If a grid search has not yet been performed, an AttributeError will be raised 161 | """ 162 | 163 | import csv 164 | 165 | if len(self.search_results) == 0: 166 | raise AttributeError("A grid search must be performed before exporting results to CSV") 167 | 168 | # Compute CSV headers for all metrics in the search results 169 | metric_headers = { metric for res in self.pareto_optimal_results for metric in res['metrics'] } 170 | 171 | with open(filename, mode='w') as csv_file: 172 | headers = ['model', 'hyperparameters', 'preprocessor', 'postprocessor', *list(metric_headers)] 173 | writer = csv.DictWriter(csv_file, fieldnames=headers, lineterminator='\n') 174 | writer.writeheader() 175 | 176 | for result in self.pareto_optimal_results: 177 | metric_dict = {metric_name: metric_val for metric_name, metric_val in six.iteritems(result['metrics'])} 178 | 179 | writer.writerow({'model': result['model_class'], 180 | 'preprocessor': result['preprocessor'], 181 | 'postprocessor': result['postprocessor'], 182 | 'hyperparameters': repr(result['hyperparameters'] or 'Default (see sklearn docs)'), 183 | **metric_dict}) 184 | 185 | 186 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/INSPIRED-GMU/fairkit-learn/1c84e9500f8bf11bc2948d7aea8cd8ec0d1297c8/fklearn_pkg/fklearn/interface/__init__.py -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/main.py: -------------------------------------------------------------------------------- 1 | from plot import * 2 | 3 | 4 | # Load custom styles 5 | custom_css = Div(text="") 6 | add_btn = Button(label="Add Plot", button_type="success") 7 | remove_btn = Button(label="Remove Plot", button_type="danger") 8 | 9 | # Construct our viewport 10 | l = layout([ 11 | [custom_css], 12 | create_plot(), 13 | [add_btn, remove_btn] 14 | ], sizing_mode="fixed", css_classes=["layout-container"]) 15 | 16 | def add_plot(): 17 | l.children.insert(len(l.children)-1, create_plot()) 18 | 19 | def remove_plot(): 20 | if len(l.children) > 3: 21 | l.children.pop(len(l.children)-2) 22 | 23 | add_btn.on_click(add_plot) 24 | remove_btn.on_click(remove_plot) 25 | 26 | curdoc().add_root(l) 27 | curdoc().title = "FKLEARN" 28 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from os.path import dirname, join 3 | 4 | from bokeh.plotting import figure, output_file, show 5 | from bokeh.layouts import layout, row, column 6 | from bokeh.models import ColumnDataSource, Div 7 | from bokeh.models.widgets import Slider, Select, Toggle, CheckboxGroup, Button 8 | from bokeh.models.callbacks import CustomJS 9 | from bokeh.io import curdoc, export_png 10 | 11 | 12 | explanations_file='' 13 | 14 | def load_csv_data(filestr): 15 | """ 16 | Loads in the csv with our data in it, and returns it as a Pandas Dataframe 17 | """ 18 | import csv 19 | 20 | datas = [] 21 | attr_map = {} 22 | 23 | with open(filestr) as csvfile: 24 | csv_rows = csv.reader(csvfile, delimiter=',') 25 | 26 | # Map each index to its attribute name 27 | header = next(csv_rows) 28 | attr_map = { i : header[i] for i in range(2, len(header))} 29 | 30 | for row in csv_rows: 31 | model = row[0] 32 | hyperparams = row[1] 33 | preprocessor = row[2] 34 | postprocessor = row[3] 35 | 36 | # Create a new data point and add it to that model value 37 | datum = {'model': model, 'hyperparameters': hyperparams, 'preprocessor': preprocessor, 'postprocessor': postprocessor, **{ attr_map[i] : float(row[i]) for i in range(4, len(row))}} 38 | datas.append(datum) 39 | 40 | return pd.DataFrame(datas) 41 | 42 | 43 | def load_explanations(filestr): 44 | """ 45 | Loads in the metric explanations as a dictionary mapping strings to explanations 46 | """ 47 | 48 | import json 49 | 50 | with open(filestr) as f: 51 | return json.load(f) 52 | 53 | 54 | def create_plot(csvfile, jsonfile): 55 | """ 56 | Creates and returns a scatter plot from the given data provided by the out.csv file. Each column will appear as a 57 | checkbox to the left of the plot, allowing for hiding of non-optimal data points. Models may be toggled 58 | by clicking on the labeled buttons. As of now, three models are hard-coded (but this is to change in the 59 | future to make this more adaptable to general use cases). 60 | 61 | Args: 62 | csvfile (str): The path name of the csv file to load. By default, we assume that we are in the root directory and load "fklearn/test-file.csv" 63 | """ 64 | 65 | explanations_file=jsonfile 66 | 67 | MODEL_COLORS = ['purple', 'orange', 'magenta', 'purple', 'green', 'blue'] 68 | 69 | df = load_csv_data(csvfile) 70 | attributes = sorted(set(df.keys()) - {'model'} - {'hyperparameters'} - {'preprocessor'} - {'postprocessor'}) 71 | 72 | # Assign a color to each model, recycling if need be 73 | colors = {model: MODEL_COLORS[i % len(MODEL_COLORS)] for i, model in enumerate(df['model'].unique())} 74 | 75 | # Create a color column and set their respective values 76 | df['color'] = df['model'] 77 | df['visible'] = True 78 | df['optimal'] = True 79 | df.replace({'color': colors}, inplace=True) 80 | 81 | # Initialize the tooltips that will be displayed when hovering over a data point 82 | TOOLTIPS=[ 83 | ("x", "@x"), 84 | ("y", "@y"), 85 | ("params", "@hyperparameters"), 86 | ("preprocessor", "@preprocessor"), 87 | ("postprocessor", "@postprocessor") 88 | ] 89 | 90 | data_source = ColumnDataSource(data={'x': [], 'y': [], 'model': [], 'color': [], 'hyperparameters': [], 'preprocessor': [], 'postprocessor': []}) 91 | 92 | # Construct our scatter plot, receiving data from our data source with the given attributes 93 | p = figure(plot_height=500, plot_width=700, title="", toolbar_location=None, tooltips=TOOLTIPS, sizing_mode="scale_both") 94 | p.circle(x="x", y="y", color="color", source=data_source, size=12, line_color=None, alpha=1.0, legend="model") 95 | p.legend.location = "top_right" 96 | 97 | x_axis = Select(title="X Axis", options=attributes, value=attributes[0], css_classes=['bk-axis-select']) 98 | y_axis = Select(title="Y Axis", options=attributes, value=attributes[1], css_classes=['bk-axis-select']) 99 | 100 | def update(): 101 | """ 102 | Update the plot with specified data 103 | """ 104 | 105 | filtered_df = df[(df['visible'] == True) & (df['optimal'] == True)] 106 | x_name = x_axis.value 107 | y_name = y_axis.value 108 | 109 | p.xaxis.axis_label = x_name 110 | p.yaxis.axis_label = y_name 111 | p.title.text = "{} data selected".format(len(filtered_df)) 112 | data_source.data = { 113 | 'x': filtered_df[x_name].values.astype(float), 114 | 'y': filtered_df[y_name].values.astype(float), 115 | 'model': filtered_df['model'].values, 116 | 'color': filtered_df['color'].values, 117 | 'hyperparameters': filtered_df['hyperparameters'].values, 118 | 'preprocessor': filtered_df['preprocessor'].values, 119 | 'postprocessor': filtered_df['postprocessor'].values 120 | } 121 | 122 | def create_toggle(model): 123 | """ 124 | Creates a function that toggles the visibility of a given model on the plot 125 | """ 126 | 127 | def toggle(toggled): 128 | df.loc[df['model'] == model, 'visible'] = toggled 129 | update() 130 | 131 | return toggle 132 | 133 | def dominates(p1, p2, attributes): 134 | """ 135 | Returns true iff p1 dominates p2. 136 | """ 137 | for attr in attributes: 138 | if p1[attr] >= p2[attr]: 139 | return False 140 | return True 141 | 142 | def filter_optimality(attrs): 143 | """ 144 | Filter by pareto optimality 145 | """ 146 | 147 | attr_values = [attributes[idx] for idx in attrs] 148 | df_list = list(df.iterrows()) 149 | df['optimal'] = True 150 | 151 | # A data point p2 is optimal only if it is not dominated by any other point p1 152 | for j, p2 in df_list: 153 | df.at[j, 'optimal'] = all([not dominates(p1, p2, attr_values) for _, p1 in df_list]) 154 | 155 | update() 156 | 157 | def save_screenshot(visible_attrs, filename='plot'): 158 | """ 159 | Save a screenshot of the plot to the current directory with the specified file name. Also save a JSON file 160 | containing information about the data displayed in the plot 161 | """ 162 | 163 | import json 164 | 165 | # First, export a png of the plot 166 | export_png(p, 'fklearn/interface/exports/{}.png'.format(filename)) 167 | 168 | # Now create a dictionary of metadata pertaining to the current state of the plot 169 | plot_data = {'x_axis': x_axis.value, 'y_axis': y_axis.value} 170 | 171 | # Keep track of which models are visible on the plot 172 | all_models = df['model'].unique() 173 | visible_models = set(df[df['visible'] == True]['model'].unique()) 174 | plot_data['model_visibility'] = { m : m in visible_models for m in all_models } 175 | 176 | # Keep track of which checkboxes were checked when we export the screenshot 177 | plot_data['pareto_checkboxes'] = { attributes[i] : i in visible_attrs for i in range(len(attributes)) } 178 | 179 | with open('output/{}.json'.format(filename), 'w') as f: 180 | json.dump(plot_data, f) 181 | 182 | 183 | 184 | # Create our toggle buttons to show/hide different models on the plot 185 | toggles = [] 186 | for model in colors: 187 | toggle = Toggle(label="{}".format(model), button_type="success", active=True, css_classes=['bk-btn-model-{}'.format(colors[model])]) 188 | toggle.on_click(create_toggle(model)) 189 | toggles.append(toggle) 190 | 191 | x_axis.on_change('value', lambda attr, old, new: update()) 192 | y_axis.on_change('value', lambda attr, old, new: update()) 193 | 194 | checkbox_group = CheckboxGroup(labels=attributes, active=list(range(len(attributes))), css_classes=['bk-checkbox-group']) 195 | checkbox_group.on_click(lambda checked_attrs: filter_optimality(checked_attrs)) 196 | 197 | screenshot_btn = Button(label="Export Plot", button_type="warning", css_classes=['screenshot-btn']) 198 | screenshot_btn.on_click(lambda: save_screenshot(visible_attrs=checkbox_group.active)) 199 | 200 | # Load metric explanations as tooltips for the checkboxes 201 | metric_dict = load_explanations(explanations_file) 202 | 203 | inputs = column(x_axis, y_axis, *toggles, checkbox_group, screenshot_btn, width=320, height=500, sizing_mode="fixed") 204 | plot_row = row(inputs, p, css_classes=['layout-container']) 205 | 206 | # NOTE: Super hacky way to do this, but it was the only easy way I could fine. 207 | metric_tooltip_js = """var checkboxes = document.querySelectorAll('.bk-checkbox-group .bk-input-group label.bk');\n""" 208 | for i in range(len(attributes)): 209 | metric_tooltip_js += """checkboxes[{}].setAttribute('title', `{}`);\n""".format(i, metric_dict[attributes[i]]) 210 | 211 | # Create a setTimeout wrapper around the function so the DOM has a chance to mount 212 | metric_tooltip_js = """setTimeout(function() {\n""" + metric_tooltip_js + """}, 200);\n""" 213 | explanation_callback = CustomJS(args=dict(), code=metric_tooltip_js) 214 | p.x_range.js_on_change('start', explanation_callback) 215 | checkbox_group.js_on_click(explanation_callback) 216 | 217 | # Initial load of our data 218 | filter_optimality(range(len(attributes))) 219 | 220 | return plot_row 221 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/static/css/styles-notebook.css: -------------------------------------------------------------------------------- 1 | .bk-btn-model-red div button.bk-btn:not(.bk-active), 2 | .bk-btn-model-green div button.bk-btn:not(.bk-active), 3 | .bk-btn-model-blue div button.bk-btn:not(.bk-active), 4 | .bk-btn-model-orange div button.bk-btn:not(.bk-active), 5 | .bk-btn-model-yellow div button.bk-btn:not(.bk-active), 6 | .bk-btn-model-purple div button.bk-btn:not(.bk-active) { 7 | opacity: 0.5 !important; 8 | } 9 | 10 | .bk-btn-model-red div button { 11 | margin-top: 30px !important; 12 | background-color: red !important; 13 | border-style: none !important; 14 | } 15 | 16 | .bk-btn-model-green div button { 17 | margin-top: 30px !important; 18 | background-color: green !important; 19 | border-style: none !important; 20 | } 21 | 22 | .bk-btn-model-blue div button { 23 | margin-top: 30px !important; 24 | background-color: blue !important; 25 | border-style: none !important; 26 | } 27 | 28 | .bk-btn-model-orange div button { 29 | margin-top: 30px !important; 30 | background-color: orange !important; 31 | border-style: none !important; 32 | } 33 | 34 | .bk-btn-model-yellow div button { 35 | margin-top: 30px !important; 36 | background-color: yellow !important; 37 | border-style: none !important; 38 | } 39 | 40 | .bk-btn-model-purple div button { 41 | margin-top: 30px !important; 42 | background-color: purple !important; 43 | border-style: none !important; 44 | } 45 | 46 | .bk-btn-model-pink div button { 47 | margin-top: 30px !important; 48 | background-color: pink !important; 49 | border-style: none !important; 50 | } 51 | 52 | .bk-btn-model-magenta div button { 53 | margin-top: 30px !important; 54 | background-color: magenta !important; 55 | border-style: none !important; 56 | } 57 | 58 | .bk-checkbox-group .bk-input-group { 59 | margin-top: 30px !important; 60 | height: auto !important; 61 | } 62 | 63 | .bk-axis-select { 64 | height: 50px !important; 65 | } 66 | 67 | .layout-container { 68 | margin-bottom: 50px !important; 69 | border-top: 1px solid grey !important; 70 | } 71 | 72 | .bk.screenshot-btn { 73 | top: 420px !important; 74 | width: 100px !important; 75 | } 76 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/static/css/styles.css: -------------------------------------------------------------------------------- 1 | .bk-btn-model-red div button.bk-btn:not(.bk-active), 2 | .bk-btn-model-green div button.bk-btn:not(.bk-active), 3 | .bk-btn-model-blue div button.bk-btn:not(.bk-active), 4 | .bk-btn-model-orange div button.bk-btn:not(.bk-active), 5 | .bk-btn-model-yellow div button.bk-btn:not(.bk-active), 6 | .bk-btn-model-purple div button.bk-btn:not(.bk-active) { 7 | opacity: 0.5 !important; 8 | } 9 | 10 | .bk-btn-model-red div button { 11 | margin-top: 30px !important; 12 | background-color: red !important; 13 | border-style: none !important; 14 | } 15 | 16 | .bk-btn-model-green div button { 17 | margin-top: 30px !important; 18 | background-color: green !important; 19 | border-style: none !important; 20 | } 21 | 22 | .bk-btn-model-blue div button { 23 | margin-top: 30px !important; 24 | background-color: blue !important; 25 | border-style: none !important; 26 | } 27 | 28 | .bk-btn-model-orange div button { 29 | margin-top: 30px !important; 30 | background-color: orange !important; 31 | border-style: none !important; 32 | } 33 | 34 | .bk-btn-model-yellow div button { 35 | margin-top: 30px !important; 36 | background-color: yellow !important; 37 | border-style: none !important; 38 | } 39 | 40 | .bk-btn-model-purple div button { 41 | margin-top: 30px !important; 42 | background-color: purple !important; 43 | border-style: none !important; 44 | } 45 | 46 | .bk-checkbox-group { 47 | position: relative !important; 48 | } 49 | 50 | .bk-checkbox-group .bk-input-group { 51 | margin-top: 30px !important; 52 | height: 200px !important; 53 | position: absolute !important; 54 | cursor: pointer; 55 | } 56 | 57 | .bk-axis-select { 58 | height: 50px !important; 59 | } 60 | 61 | .layout-container { 62 | margin-bottom: 50px !important; 63 | border-top: 1px solid grey !important; 64 | } 65 | 66 | .bk.screenshot-btn { 67 | top: 420px !important; 68 | width: 100px !important; 69 | } -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/interface/static/data/explanations.json: -------------------------------------------------------------------------------- 1 | { 2 | "BinaryLabelDatasetMetric (num_positives)": "Compute the number of positives", 3 | "BinaryLabelDatasetMetric (num_negatives)": "Compute the number of negatives", 4 | "BinaryLabelDatasetMetric (base_rate)": "Compute the base rate, 'Pr(Y = 1) = P/(P+N)'", 5 | "BinaryLabelDatasetMetric (disparate_impact)": "'Pr(Y = 1 | D = unprivileged) / Pr(Y = 1 | D = privileged)'", 6 | "BinaryLabelDatasetMetric (statistical_parity_difference)": "'Pr(Y = 1 | D = unprivileged) - Pr(Y = 1 | D = privileged)'", 7 | "UnifiedMetricLibrary (num_true_positives)": "Return the number of instances in the dataset where both the predicted and true labels are 'favorable'", 8 | "UnifiedMetricLibrary (num_false_positives)": "Return the number of false positives", 9 | "UnifiedMetricLibrary (num_false_negatives)": "Return the number of false negatives", 10 | "UnifiedMetricLibrary (num_true_negatives)": "Return the number of true negatives", 11 | "UnifiedMetricLibrary (num_generalized_true_positives)": "Return the generalized number of true positives, the weighted sum of predicted scores where true labels are 'favorable'", 12 | "UnifiedMetricLibrary (num_generalized_false_positives)": "Return the generalized number of false positives, the weighted sum of predicted scores where true labels are 'favorable'", 13 | "UnifiedMetricLibrary (num_generalized_false_negatives)": "Return the generalized number of false negatives, the weighted sum of predicted scores where true labels are 'favorable'", 14 | "UnifiedMetricLibrary (num_generalized_true_negatives)": "Return the generalized number of true negatives, the weighted sum of predicted scores where true labels are 'favorable'", 15 | "UnifiedMetricLibrary (true_positive_rate)": "Return the ratio of true positives to positive examples in the dataset; 'TPR = TP/P'", 16 | "UnifiedMetricLibrary (false_positive_rate)": "'FPR = FP/N'", 17 | "UnifiedMetricLibrary (false_negative_rate)": "'FNR = FN/P'", 18 | "UnifiedMetricLibrary (true_negative_rate)": "'TNR = TN/N'", 19 | "UnifiedMetricLibrary (generalized_true_positive_rate)": "Return the ratio of generalized true positives to positive examples in the dataset; 'GTPR = GTP/P'", 20 | "UnifiedMetricLibrary (generalized_false_positive_rate)": "'GFPR = GFP/N'", 21 | "UnifiedMetricLibrary (generalized_false_negative_rate)": "'GFNR = GFN/P'", 22 | "UnifiedMetricLibrary (generalized_true_negative_rate)": "'GTNR = GTN/N'", 23 | "UnifiedMetricLibrary (positive_predictive_value)": "'PPV = TP/(TP + FP)'", 24 | "UnifiedMetricLibrary (false_discovery_rate)": "'FDR = FP/(TP + FP)'", 25 | "UnifiedMetricLibrary (false_omission_rate)": "'FOR = FN/(TN + FN)'", 26 | "UnifiedMetricLibrary (negative_predictive_value)": "'NPV = TN/(TN + FN)'", 27 | "UnifiedMetricLibrary (accuracy)": "'ACC = (TP + TN)/(P + N)'", 28 | "UnifiedMetricLibrary (error_rate)": "'ERR = (FP + FN)/(P + N)'", 29 | "UnifiedMetricLibrary (true_positive_rate_difference)": "'TPR(D = unprivileged) - TPR(D = privileged)'", 30 | "UnifiedMetricLibrary (false_positive_rate_difference)": "'FPR(D = unprivileged) - FPR(D = privileged)'", 31 | "UnifiedMetricLibrary (false_negative_rate_difference)": "'FNR(D = unprivileged) - FNR(D = privileged)'", 32 | "UnifiedMetricLibrary (false_omission_rate_difference)": "'FOR(D = unprivileged) - FOR(D = privileged)", 33 | "UnifiedMetricLibrary (false_discovery_rate_difference)": "'FDR(D = \text{unprivileged) - FDR(D = privileged)'", 34 | "UnifiedMetricLibrary (false_positive_rate_ratio)": "'FPR(D = unprivileged) / FPR(D = privileged)'", 35 | "UnifiedMetricLibrary (false_negative_rate_ratio)": "'FNR(D = unprivileged) / FNR(D = privileged)'", 36 | "UnifiedMetricLibrary (false_omission_rate_ratio)": "'FOR(D = unprivileged) / FOR(D = privileged)'", 37 | "UnifiedMetricLibrary (false_discovery_rate_ratio)": "'FDR(D = unprivileged) / FDR(D = privileged)'", 38 | "UnifiedMetricLibrary (average_odds_difference)": "Average of difference in FPR and TPR for unprivileged and privileged groups", 39 | "UnifiedMetricLibrary (average_abs_odds_difference)": "Average of absolute difference in FPR and TPR for unprivileged andprivileged groups", 40 | "UnifiedMetricLibrary (error_rate_difference)": "Difference in error rates for unprivileged and privileged groups; ERR(D = unprivileged) - ERR(D = privileged)", 41 | "UnifiedMetricLibrary (error_rate_ratio)": "Ratio of error rates for unprivileged and privileged groups; 'ERR(D = unprivileged) / ERR(D = privileged)", 42 | "UnifiedMetricLibrary (num_pred_positives)": "Return the number of predicted positives", 43 | "UnifiedMetricLibrary (num_pred_negatives)": "Return the number of predicted negatives", 44 | "UnifiedMetricLibrary (selection_rate)": "'Pr(Y_hat = favorable)'", 45 | "UnifiedMetricLibrary (disparate_impact)": "'Pr(Y_hat = 1 | D = unprivileged) / Pr(Y_hat = 1 | D = privileged)'", 46 | "UnifiedMetricLibrary (statistical_parity_difference)": "'Pr(Y_hat = 1 | D = unprivileged) - Pr(Y_hat = 1 | D = privileged)'", 47 | "UnifiedMetricLibrary (theil_index)": "Generalized entropy index with alpha = 1", 48 | "UnifiedMetricLibrary (coefficient_of_variation)": "Two times the square root of the generalized entropy index with alpha = 2", 49 | "UnifiedMetricLibrary (between_group_theil_index)": "The 'between group generalized entropy index' with alpha = 1", 50 | "UnifiedMetricLibrary (between_group_coefficient_of_variation)": "Two times the square root of the 'between group generalized entropy index' with alpha = 2", 51 | "UnifiedMetricLibrary (between_all_groups_theil_index)": "The 'between all groups generalized entropy index' with alpha = 1", 52 | "UnifiedMetricLibrary (between_all_groups_coefficient_of_variation)": "Two times the square root of the 'between all groups generalized entropy index' with alpha = 2", 53 | "UnifiedMetricLibrary (equal_opportunity_difference)": "'TPR(D = unprivileged) - TPR(D = privileged)'", 54 | "UnifiedMetricLibrary (power)": "Return the number of instances in the dataset where both the predicted and true labels are 'favorable'", 55 | "UnifiedMetricLibrary (precision)": "'Precision = TP/(TP + FP)'", 56 | "UnifiedMetricLibrary (recall)": "'Recall = TP/P'", 57 | "UnifiedMetricLibrary (sensitivity)": "'Sensitivity = Recall = TP/P'", 58 | "UnifiedMetricLibrary (specificity)": "'Specificity = TN/N'", 59 | "UnifiedMetricLibrary (accuracy_score)": "Fraction of correct predictions", 60 | "CausalDiscriminationScore": "The fraction of inputs for which changing at least one of those characteristics causes the output to change", 61 | "GroupDiscriminationScore": "A vaive Bayes approach for discrimination-free classification" 62 | } -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/metric_library.py: -------------------------------------------------------------------------------- 1 | from aif360.metrics import ClassificationMetric 2 | from sklearn.metrics import accuracy_score as accuracy 3 | import math 4 | 5 | def classifier_quality_score(model, test_data, 6 | unprivileged_groups, 7 | privileged_groups): 8 | 9 | classified_data = model.predict(test_data) 10 | metric_library = UnifiedMetricLibrary(test_data, classified_data, unprivileged_groups, privileged_groups) 11 | 12 | # call all metrics 13 | 14 | #accuracy 15 | 16 | acc = metric_library.accuracy_score() 17 | 18 | #fairness 19 | fairness_scores = [] 20 | 21 | # equal opportunity difference 22 | eq_opp_diff = metric_library.equal_opportunity_difference() 23 | fairness_scores.append(eq_opp_diff) 24 | 25 | # average odds difference 26 | avg_odds_diff = metric_library.average_odds_difference() 27 | fairness_scores.append(avg_odds_diff) 28 | 29 | # statistical parity difference 30 | stat_parity_diff = metric_library.statistical_parity_difference() 31 | fairness_scores.append(stat_parity_diff) 32 | 33 | # average odds difference 34 | avg_odds_diff = metric_library.average_odds_difference() 35 | fairness_scores.append(avg_odds_diff) 36 | 37 | # calculate & return overall quality score 38 | max_fair_score = max(fairness_scores) 39 | balance_val = acc * (1-max_fair_score) 40 | 41 | return math.sqrt(balance_val) 42 | 43 | 44 | class UnifiedMetricLibrary(): 45 | 46 | def __init__(self, test_data, classified_data, unprivileged_groups, privileged_groups): 47 | 48 | self.test_data = test_data 49 | self.classified_data = classified_data 50 | 51 | self.classification_metric = ClassificationMetric(test_data, classified_data, unprivileged_groups, privileged_groups) 52 | 53 | def accuracy_score(self): 54 | return accuracy(self.test_data.labels, self.classified_data.labels) 55 | 56 | def num_true_positives(self): 57 | return self.classification_metric.num_true_positives() 58 | 59 | def num_false_positives(self): 60 | return self.classification_metric.num_false_positives() 61 | 62 | def num_false_negatives(self): 63 | return self.classification_metric.num_false_negatives() 64 | 65 | def num_generalized_true_positives(self): 66 | return self.classification_metric.num_generalized_true_positives() 67 | 68 | def num_generalized_false_positives(self): 69 | return self.classification_metric.num_generalized_false_positives() 70 | 71 | def num_generalized_false_negatives(self): 72 | return self.classification_metric.num_generalized_false_negatives() 73 | 74 | def num_generalized_true_negatives(self): 75 | return self.classification_metric.num_generalized_true_negatives() 76 | 77 | def true_positive_rate(self): 78 | return self.classification_metric.true_positive_rate() 79 | 80 | def false_positive_rate(self): 81 | return self.classification_metric.false_positive_rate() 82 | 83 | def false_negative_rate(self): 84 | return self.classification_metric.false_negative_rate() 85 | 86 | def true_negative_rate(self): 87 | return self.classification_metric.true_negative_rate() 88 | 89 | def generalized_true_positive_rate(self): 90 | return self.classification_metric.generalized_true_positive_rate() 91 | 92 | def generalized_false_positive_rate(self): 93 | return self.classification_metric.generalized_false_positive_rate() 94 | 95 | def generalized_false_negative_rate(self): 96 | return self.classification_metric.generalized_false_negative_rate() 97 | 98 | def generalized_true_negative_rate(self): 99 | return self.classification_metric.generalized_true_negative_rate() 100 | 101 | def positive_predictive_value(self): 102 | return self.classification_metric.positive_predictive_value() 103 | 104 | def false_discovery_rate(self): 105 | return self.classification_metric.false_discovery_rate() 106 | 107 | def false_omission_rate(self): 108 | return self.classification_metric.false_omission_rate() 109 | 110 | def negative_predictive_value(self): 111 | return self.classification_metric.negative_predictive_value() 112 | 113 | def error_rate(self): 114 | return self.classification_metric.error_rate() 115 | 116 | def false_positive_rate_difference(self): 117 | return self.classification_metric.false_positive_rate_difference() 118 | 119 | def false_negative_rate_difference(self): 120 | return self.classification_metric.false_negative_rate_difference() 121 | 122 | def false_omission_rate_difference(self): 123 | return self.classification_metric.false_omission_rate_difference() 124 | 125 | def false_discovery_rate_difference(self): 126 | return self.classification_metric.false_discovery_rate_difference() 127 | 128 | def false_positive_rate_ratio(self): 129 | return self.classification_metric.false_positive_rate_ratio() 130 | 131 | def false_negative_rate_ratio(self): 132 | return self.classification_metric.false_negative_rate_ratio() 133 | 134 | def false_omission_rate_ratio(self): 135 | return self.classification_metric.false_omission_rate_ratio() 136 | 137 | def false_discovery_rate_ratio(self): 138 | return self.classification_metric.false_discovery_rate_ratio() 139 | 140 | def average_abs_odds_difference(self): 141 | return self.classification_metric.average_abs_odds_difference() 142 | 143 | def error_rate_difference(self): 144 | return self.classification_metric.error_rate_difference() 145 | 146 | def error_rate_ratio(self): 147 | return self.classification_metric.error_rate_ratio() 148 | 149 | def num_pred_positives(self): 150 | return self.classification_metric.num_pred_positives() 151 | 152 | def num_pred_negatives(self): 153 | return self.classification_metric.num_pred_negatives() 154 | 155 | def selection_rate(self): 156 | return self.classification_metric.selection_rate() 157 | 158 | def equal_opportunity_difference(self): 159 | return abs(self.classification_metric.equal_opportunity_difference()) 160 | 161 | def average_odds_difference(self): 162 | return abs(self.classification_metric.average_odds_difference()) 163 | 164 | def disparate_impact(self): 165 | return abs(self.classification_metric.disparate_impact()) 166 | 167 | def statistical_parity_difference(self): 168 | return abs(self.classification_metric.statistical_parity_difference()) 169 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/ml_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import StandardScaler 3 | 4 | from aif360.metrics import ClassificationMetric 5 | from aif360.algorithms import Transformer 6 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing 7 | 8 | class MLPipeline(object): 9 | 10 | """ 11 | Defines a machine-learning pipeline for evaluating fairness in predictors. For usage, see example at the bottom of the file. 12 | 13 | Args: 14 | model (sklearn.model | aif360.algorithms.inprocessing): An sklearn predictor OR an AIF360 inprocessing algorithm 15 | privileged (list[dict[str, float]]): A list of dictionaries with keys representing privileged attribute + value pairs 16 | unprivileged (list[dict[str, float]]): A list of dictionaries with keys representing unprivileged attribute + value pairs 17 | preprocessor (aif360.algorithms.preprocessing): An instance of an AIF360 preprocessing algorithm 18 | postprocessor (aif360.algorithms.postprocessing): An instance of an AIF360 postprocessing algorithm 19 | """ 20 | 21 | def __init__(self, model, privileged=[], unprivileged=[], preprocessor=None, postprocessor=None): 22 | self.model = model 23 | self.privileged = privileged 24 | self.unprivileged = unprivileged 25 | self.preprocessor = preprocessor 26 | self.postprocessor = postprocessor 27 | self.dataset_train = [] 28 | self.dataset_test = [] 29 | self.test_predictions = [] 30 | 31 | 32 | def fit(self, dataset, test_frac=0.3, threshold=0.5, feature_scaling=False): 33 | """ 34 | Trains our model on the dataset. Uses different control flow depending on if we are using an 35 | sklearn model or an AIF360 inprocessing algorithm 36 | 37 | Args: 38 | dataset (aif360.datasets.StructuredDataset): An instance of a structured dataset 39 | test_frac (float): A real number between 0 and 1 denoting the % of the dataset to be used as test data 40 | threshold (float): A real number between 0 and 1 denoting the threshold of acceptable class imbalance 41 | """ 42 | 43 | if test_frac < 0 or test_frac > 1: 44 | raise ValueError("Parameter test_frac must be between 0 and 1") 45 | 46 | dataset_train, dataset_test = dataset.split([1-test_frac], shuffle=False) 47 | 48 | # If a preprocessing algorithm was supplied, apply that transformations first 49 | if self.preprocessor: 50 | dataset_train = self.preprocessor.fit_transform(dataset_train) 51 | dataset_test = self.preprocessor.fit_transform(dataset_test) 52 | 53 | self.dataset_train = dataset_train 54 | self.dataset_test = dataset_test 55 | 56 | self.__fit_inprocessing(threshold, feature_scaling) 57 | 58 | def __fit_inprocessing(self, threshold, feature_scaling): 59 | """ 60 | Trains an AIF360 inprocessing model on the provided dataset. 61 | 62 | Args: 63 | """ 64 | 65 | # Apply feature scaling if specified 66 | if feature_scaling: 67 | scaler = StandardScaler().fit(self.dataset_train.features) 68 | self.dataset_train.features = scaler.fit_transform(self.dataset_train.features) 69 | self.dataset_test.features = scaler.transform(self.dataset_test.features) 70 | 71 | self.model.fit(self.dataset_train) 72 | 73 | 74 | # Make our predictions, without thresholds for now 75 | dataset_test_pred = self.model.predict(self.dataset_test) 76 | 77 | # If a postprocessing algorithm was specified, transform the test results 78 | if self.postprocessor: 79 | dataset_test_pred = self.postprocessor.fit(self.dataset_test, dataset_test_pred) \ 80 | .predict(dataset_test_pred) 81 | 82 | self.classified_data = dataset_test_pred 83 | 84 | 85 | def evaluate(self, metric, submetric): 86 | """ 87 | Evaluates an AIF360 metric against the trained model. 88 | 89 | Args: 90 | metric (aif360.metrics.Metric): An AIF360 metric class 91 | submetric (str): A string denoting the metric evaluation function that is to be called on the provided metric class 92 | Returns: 93 | float: A float denoting the performance of each method evaluation within the specified metric on the trained model 94 | Raises: 95 | AttributeError: If a model has not been trained yet, or 96 | If the provided submetric function does not exist on the metric class, or 97 | If the provided submetric function contains arguments other than "privileged" 98 | 99 | """ 100 | 101 | from inspect import signature 102 | import re 103 | 104 | if not self.dataset_train: 105 | raise AttributeError("A model must be fit before evaluating a metric") 106 | 107 | curr_metric = metric(self.dataset_test, self.classified_data, unprivileged_groups=self.unprivileged, privileged_groups=self.privileged) 108 | 109 | # Retrieve the callable evalation function 'submetric' of this metric instance 110 | submetric_fn = getattr(curr_metric, submetric) 111 | 112 | return submetric_fn() 113 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/sample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.svm import SVC 5 | from sklearn.linear_model import LogisticRegression 6 | 7 | from sklearn.metrics import accuracy_score 8 | from fair_metrics import causal_discrimination_score, group_discrimination_score, false_positive_rate_equality, false_negative_rate_equality 9 | from fair_model_selection import FairSearch 10 | 11 | from datasets import load_adult_income 12 | 13 | import os 14 | 15 | os.chdir("fklearn/") 16 | 17 | 18 | data = load_adult_income() 19 | models = {'LogisticRegression': LogisticRegression} 20 | metrics = {'Causal': group_discrimination_score, 'Accuracy': accuracy_score} 21 | parameters = { 22 | # 'SVC': {'kernel': ['rbf'], 'C': [1, 10], 'probability': [True]}, 23 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [1, 10]} 24 | } 25 | 26 | thresholds = [i * 1.0/100 for i in range(10)] 27 | Search = FairSearch(models, metrics, metrics, parameters, thresholds) 28 | Search.fit(data[0]) 29 | 30 | print(Search) 31 | 32 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/sample_aif.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn as skl 3 | import six 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn.svm import SVC 7 | 8 | from aif360.datasets import AdultDataset, GermanDataset 9 | from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric 10 | from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools 11 | from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions import get_distortion_adult 12 | from aif360.algorithms.preprocessing import DisparateImpactRemover, LFR, Reweighing, OptimPreproc 13 | from aif360.algorithms.inprocessing import AdversarialDebiasing 14 | from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing, EqOddsPostprocessing, RejectOptionClassification 15 | 16 | from fair_selection_aif import AIF360Search, DEFAULT_ADB_PARAMS 17 | 18 | import os 19 | 20 | dataset = GermanDataset() 21 | models = {'LogisticRegression': LogisticRegression, 'KNeighborsClassifier': KNeighborsClassifier} 22 | metrics = {'ClassificationMetric': [ClassificationMetric, 23 | 'num_generalized_true_positives', 24 | 'num_true_negatives', 25 | 'false_positive_rate', 26 | 'false_negative_rate', 27 | 'generalized_false_positive_rate' 28 | ] 29 | # 'BinaryLabelDatasetMetric': [BinaryLabelDatasetMetric, 'disparate_impact'] 30 | } 31 | unprivileged = [{'age': 0, 'sex': 0}] 32 | privileged = [{'age': 1, 'sex': 1}] 33 | preprocessor_args = {'unprivileged_groups': unprivileged, 'privileged_groups': privileged} 34 | 35 | # Hyperparameters may either be specified as a dictionary of string to lists, or by an empty dictionary to 36 | # use the default ones set by sklearn (or AIF360). The keys are the names of the hyperparameters, and the 37 | # values and lists of possible values to form a grid search over 38 | parameters = { 39 | 'LogisticRegression': {'penalty': ['l1', 'l2'], 'C': [0.1, 0.5, 1]}, 40 | 'KNeighborsClassifier': {} 41 | } 42 | thresholds = [i * 10.0/100 for i in range(5)] 43 | preprocessors=[DisparateImpactRemover(), Reweighing(**preprocessor_args)] 44 | postprocessors=[CalibratedEqOddsPostprocessing(**preprocessor_args), EqOddsPostprocessing(**preprocessor_args), RejectOptionClassification(**preprocessor_args)] 45 | 46 | Search = AIF360Search(models, metrics, parameters, thresholds) 47 | Search.grid_search(dataset, privileged=privileged, unprivileged=unprivileged, preprocessors=preprocessors, postprocessors=postprocessors) 48 | 49 | Search.to_csv("interface/static/data/test-file.csv") 50 | 51 | -------------------------------------------------------------------------------- /fklearn_pkg/fklearn/scikit_learn_wrapper.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression as lr 2 | from sklearn.neighbors import KNeighborsClassifier as knc 3 | from sklearn.ensemble import RandomForestClassifier as rfc 4 | from sklearn.svm import SVC as svc 5 | 6 | class ScikitLearnWrapper(): 7 | 8 | def __init__(self, model_class, **kwargs): 9 | self.model = model_class(**kwargs) 10 | 11 | 12 | def fit(self, dataset_train): 13 | self.model.fit(dataset_train.features, dataset_train.labels.ravel()) 14 | 15 | def predict(self, dataset_test): 16 | 17 | dataset_test_pred = dataset_test.copy() 18 | dataset_test_pred.labels = self.model.predict(dataset_test.features).reshape(-1,1) 19 | 20 | return dataset_test_pred 21 | 22 | 23 | LogisticRegression = lambda **kwargs : ScikitLearnWrapper(lr,**kwargs) 24 | KNeighborsClassifier = lambda **kwargs : ScikitLearnWrapper(knc,**kwargs) 25 | RandomForestClassifier = lambda **kwargs : ScikitLearnWrapper(rfc,**kwargs) 26 | SVC = lambda **kwargs : ScikitLearnWrapper(svc,**kwargs) 27 | -------------------------------------------------------------------------------- /fklearn_pkg/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | setuptools.setup( 6 | name='fairkit_learn', 7 | version='1.9', 8 | scripts=['fairkit_learn'] , 9 | author="Brittany Johnson, Jesse Bartola, Rico Angell, Katherine Keith, Sam Witty, Stephen Giguere, and Yuriy Brun", 10 | author_email="bijohnsonphd@gmail.com", 11 | description="A machine learning fairness toolkit", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/brittjay0104/fairkit-learn", 15 | packages=setuptools.find_packages(), 16 | package_data={'fklearn': ['interface/static/data/explanations.json', 'interface/static/css/styles-notebook.css']}, 17 | include_package_data=True, 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aif360==0.2.0 2 | bokeh==1.2.0 3 | BlackBoxAuditing==0.1.54 4 | numpy==1.15.4 5 | pandas==0.23.3 6 | six==1.11.0 7 | scikit_learn==0.20.0 8 | bokeh==1.2.0 9 | numba==0.45.0 10 | tensorflow==1.13.1 11 | --------------------------------------------------------------------------------