├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── examples.ipynb ├── requirements.txt └── stat_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/vim,linux,macos,python,matlab,windows,pycharm+all,intellij+all 3 | 4 | ### Intellij+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # Sensitive or high-churn files 16 | .idea/**/dataSources/ 17 | .idea/**/dataSources.ids 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | .idea/**/dbnavigator.xml 23 | 24 | # Gradle 25 | .idea/**/gradle.xml 26 | .idea/**/libraries 27 | 28 | # Gradle and Maven with auto-import 29 | # When using Gradle or Maven with auto-import, you should exclude module files, 30 | # since they will be recreated, and may cause churn. Uncomment if using 31 | # auto-import. 32 | # .idea/modules.xml 33 | # .idea/*.iml 34 | # .idea/modules 35 | 36 | # CMake 37 | cmake-build-*/ 38 | 39 | # Mongo Explorer plugin 40 | .idea/**/mongoSettings.xml 41 | 42 | # File-based project format 43 | *.iws 44 | 45 | # IntelliJ 46 | out/ 47 | 48 | # mpeltonen/sbt-idea plugin 49 | .idea_modules/ 50 | 51 | # JIRA plugin 52 | atlassian-ide-plugin.xml 53 | 54 | # Cursive Clojure plugin 55 | .idea/replstate.xml 56 | 57 | # Crashlytics plugin (for Android Studio and IntelliJ) 58 | com_crashlytics_export_strings.xml 59 | crashlytics.properties 60 | crashlytics-build.properties 61 | fabric.properties 62 | 63 | # Editor-based Rest Client 64 | .idea/httpRequests 65 | 66 | ### Intellij+all Patch ### 67 | # Ignores the whole .idea folder and all .iml files 68 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 69 | 70 | .idea/ 71 | 72 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 73 | 74 | *.iml 75 | modules.xml 76 | .idea/misc.xml 77 | *.ipr 78 | 79 | ### Linux ### 80 | *~ 81 | 82 | # temporary files which can be created if a process still has a handle open of a deleted file 83 | .fuse_hidden* 84 | 85 | # KDE directory preferences 86 | .directory 87 | 88 | # Linux trash folder which might appear on any partition or disk 89 | .Trash-* 90 | 91 | # .nfs files are created when an open file is removed but is still being accessed 92 | .nfs* 93 | 94 | ### macOS ### 95 | # General 96 | .DS_Store 97 | .AppleDouble 98 | .LSOverride 99 | 100 | # Icon must end with two \r 101 | Icon 102 | 103 | # Thumbnails 104 | ._* 105 | 106 | # Files that might appear in the root of a volume 107 | .DocumentRevisions-V100 108 | .fseventsd 109 | .Spotlight-V100 110 | .TemporaryItems 111 | .Trashes 112 | .VolumeIcon.icns 113 | .com.apple.timemachine.donotpresent 114 | 115 | # Directories potentially created on remote AFP share 116 | .AppleDB 117 | .AppleDesktop 118 | Network Trash Folder 119 | Temporary Items 120 | .apdisk 121 | 122 | ### Matlab ### 123 | # Windows default autosave extension 124 | *.asv 125 | 126 | # OSX / *nix default autosave extension 127 | *.m~ 128 | 129 | # Compiled MEX binaries (all platforms) 130 | *.mex* 131 | 132 | # Packaged app and toolbox files 133 | *.mlappinstall 134 | *.mltbx 135 | 136 | # Generated helpsearch folders 137 | helpsearch*/ 138 | 139 | # Simulink code generation folders 140 | slprj/ 141 | sccprj/ 142 | 143 | # Matlab code generation folders 144 | codegen/ 145 | 146 | # Simulink autosave extension 147 | *.autosave 148 | 149 | # Octave session info 150 | octave-workspace 151 | 152 | ### Python ### 153 | # Byte-compiled / optimized / DLL files 154 | __pycache__/ 155 | *.py[cod] 156 | *$py.class 157 | 158 | # C extensions 159 | *.so 160 | 161 | # Distribution / packaging 162 | .Python 163 | build/ 164 | develop-eggs/ 165 | dist/ 166 | downloads/ 167 | eggs/ 168 | .eggs/ 169 | lib/ 170 | lib64/ 171 | parts/ 172 | sdist/ 173 | var/ 174 | wheels/ 175 | *.egg-info/ 176 | .installed.cfg 177 | *.egg 178 | MANIFEST 179 | 180 | # PyInstaller 181 | # Usually these files are written by a python script from a template 182 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 183 | *.manifest 184 | *.spec 185 | 186 | # Installer logs 187 | pip-log.txt 188 | pip-delete-this-directory.txt 189 | 190 | # Unit test / coverage reports 191 | htmlcov/ 192 | .tox/ 193 | .coverage 194 | .coverage.* 195 | .cache 196 | nosetests.xml 197 | coverage.xml 198 | *.cover 199 | .hypothesis/ 200 | .pytest_cache/ 201 | 202 | # Translations 203 | *.mo 204 | *.pot 205 | 206 | # Django stuff: 207 | *.log 208 | local_settings.py 209 | db.sqlite3 210 | 211 | # Flask stuff: 212 | instance/ 213 | .webassets-cache 214 | 215 | # Scrapy stuff: 216 | .scrapy 217 | 218 | # Sphinx documentation 219 | docs/_build/ 220 | 221 | # PyBuilder 222 | target/ 223 | 224 | # Jupyter Notebook 225 | .ipynb_checkpoints 226 | 227 | # pyenv 228 | .python-version 229 | 230 | # celery beat schedule file 231 | celerybeat-schedule 232 | 233 | # SageMath parsed files 234 | *.sage.py 235 | 236 | # Environments 237 | .env 238 | .venv 239 | env/ 240 | venv/ 241 | ENV/ 242 | env.bak/ 243 | venv.bak/ 244 | 245 | # Spyder project settings 246 | .spyderproject 247 | .spyproject 248 | 249 | # Rope project settings 250 | .ropeproject 251 | 252 | # mkdocs documentation 253 | /site 254 | 255 | # mypy 256 | .mypy_cache/ 257 | 258 | ### Python Patch ### 259 | .venv/ 260 | 261 | ### Vim ### 262 | # Swap 263 | [._]*.s[a-v][a-z] 264 | [._]*.sw[a-p] 265 | [._]s[a-rt-v][a-z] 266 | [._]ss[a-gi-z] 267 | [._]sw[a-p] 268 | 269 | # Session 270 | Session.vim 271 | 272 | # Temporary 273 | .netrwhist 274 | # Auto-generated tag files 275 | tags 276 | # Persistent undo 277 | [._]*.un~ 278 | 279 | ### Windows ### 280 | # Windows thumbnail cache files 281 | Thumbs.db 282 | ehthumbs.db 283 | ehthumbs_vista.db 284 | 285 | # Dump file 286 | *.stackdump 287 | 288 | # Folder config file 289 | [Dd]esktop.ini 290 | 291 | # Recycle Bin used on file shares 292 | $RECYCLE.BIN/ 293 | 294 | # Windows Installer files 295 | *.cab 296 | *.msi 297 | *.msix 298 | *.msm 299 | *.msp 300 | 301 | # Windows shortcuts 302 | *.lnk 303 | 304 | 305 | # End of https://www.gitignore.io/api/vim,linux,macos,python,matlab,windows,pycharm+all,intellij+all 306 | 307 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | 4 | COPY requirements.txt ./ 5 | 6 | RUN pip install --upgrade pip 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN useradd -ms /bin/bash jupyter 10 | USER jupyter 11 | 12 | WORKDIR /workspace 13 | 14 | ENTRYPOINT ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8889"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mateusz Buda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Statistical Utils 2 | 3 | ## Docker setup for example jupyter notebook 4 | 5 | ``` 6 | docker build -t stat-util . 7 | ``` 8 | 9 | ``` 10 | docker run --rm -p 8889:8889 -v `pwd`:/workspace stat-util 11 | ``` 12 | 13 | ## Use cases 14 | 15 | Code for all use cases is provided in `examples.ipynb` notebook. 16 | 17 | #### Evaluate a model with 95% confidence interval 18 | 19 | ```python 20 | from sklearn.metrics import roc_auc_score 21 | 22 | import stat_util 23 | 24 | 25 | score, ci_lower, ci_upper, scores = stat_util.score_ci( 26 | y_true, y_pred, score_fun=roc_auc_score 27 | ) 28 | ``` 29 | 30 | #### Compute p-value for comparison of two models 31 | 32 | ```python 33 | from sklearn.metrics import roc_auc_score 34 | 35 | import stat_util 36 | 37 | 38 | p, z = stat_util.pvalue(y_true, y_pred1, y_pred2, score_fun=roc_auc_score) 39 | ``` 40 | 41 | #### Compute mean performance with 95% confidence interval for a set of readers 42 | 43 | ```python 44 | import numpy as np 45 | from sklearn.metrics import roc_auc_score 46 | 47 | import stat_util 48 | 49 | 50 | mean_score, ci_lower, ci_upper, scores = stat_util.score_stat_ci( 51 | y_true, y_pred_readers, score_fun=roc_auc_score, stat_fun=np.mean 52 | ) 53 | ``` 54 | 55 | #### Compute p-value for comparison of one model and a set of readers 56 | 57 | ```python 58 | import numpy as np 59 | from sklearn.metrics import roc_auc_score 60 | 61 | import stat_util 62 | 63 | 64 | p, z = stat_util.pvalue_stat( 65 | y_true, y_pred, y_pred_readers, score_fun=roc_auc_score, stat_fun=np.mean 66 | ) 67 | ``` 68 | -------------------------------------------------------------------------------- /examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import stat_util\n", 12 | "import numpy as np\n", 13 | "from sklearn.metrics import roc_auc_score\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "%matplotlib inline" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Use case #1\n", 23 | "\n", 24 | "Compute AUC with 95% confidence interval for a single model predictions" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "np.random.seed(42)\n", 36 | "random_array = np.random.rand(100)\n", 37 | "# simulated ground truth\n", 38 | "y_true = np.round(random_array)\n", 39 | "# simulated predictions for a model\n", 40 | "y_pred = random_array + np.random.normal(loc=0.0, scale=0.2, size=len(y_true))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# AUC with 95% confidence interval for a single model\n", 52 | "score, ci_lower, ci_upper, scores = stat_util.score_ci(y_true, y_pred, \n", 53 | " score_fun=roc_auc_score,\n", 54 | " seed=42)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "AUC=0.94, 95% CI: 0.89-0.98\n" 67 | ] 68 | }, 69 | { 70 | "data": { 71 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAENVJREFUeJzt3XuQpFV9xvHvI8vFG3LbUGQXHCwxkVQpkg3BqNFAYRASQYOK15VssmXElJQaXeMfSaykCk0iSpky2RJ1oaKIRgMleCELlIkF6CJ3iLIQCLsCu3IzhDKK/vJHH8yw7ji9091Ob873U9XV5z3n9Nu/6emeZ9737bc7VYUkqT+PW+wCJEmLwwCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdWrJYhcAsN9++9XMzMxilyFJO5Wrrrrqu1W1dKG3n4oAmJmZYcOGDYtdhiTtVJLcMcrt3QUkSZ0yACSpUwaAJHXKAJCkTg0VAEluT3J9kmuSbGh9+yS5OMkt7Xrv1p8kZybZmOS6JIdP8geQJC3MjmwB/FZVHVZVK9ryGmB9VR0CrG/LAC8BDmmX1cBHxlWsJGl8RtkFdAKwrrXXASfO6j+7Bq4A9kpywAj3I0magGEDoICvJLkqyerWt39V3dXadwP7t/Yy4M5Zt93U+iRJU2TYE8GeX1Wbk/wCcHGSf589WFWVZIe+XLgFyWqAgw46aEduqgk5jdMA+CAfXORK9FPO/fvB9clvWtw6dkI+r+c2VABU1eZ2vSXJ54EjgHuSHFBVd7VdPFva9M3AgbNuvrz1bbvOtcBagBUrVvjN9FPgGq5Z7BI0y8yaC3/SPveBbwBw8jUHzjV9bG4//fiJ38fPk8/ruc27CyjJE5M8+dE28GLgBuACYGWbthI4v7UvAN7Q3g10JPDgrF1FkqQpMcwWwP7A55M8Ov+TVfWlJN8AzkuyCrgDeGWbfxFwHLAReBg4ZexVS5JGNm8AVNVtwLO3038vcPR2+gs4dSzVSZImxjOBJalTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjq1ZLELkKbZzJoLF7sEaWLcApCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE55JrCkx1jMs59vP/34RbvvHrkFIEmdGjoAkuyS5OokX2jLBye5MsnGJJ9Oslvr370tb2zjM5MpXZI0ih3ZAngrcPOs5fcBZ1TV04H7gVWtfxVwf+s/o82TJE2ZoQIgyXLgeOCjbTnAUcBn25R1wImtfUJbpo0f3eZLkqbIsFsAHwTeCfy4Le8LPFBVj7TlTcCy1l4G3AnQxh9s8x8jyeokG5Js2Lp16wLLlyQt1LwBkOR3gC1VddU477iq1lbViqpasXTp0nGuWpI0hGHeBvo84KVJjgP2APYEPgTslWRJ+y9/ObC5zd8MHAhsSrIEeApw79grlySNZN4tgKp6d1Utr6oZ4GTgkqp6LXApcFKbthI4v7UvaMu08UuqqsZatSRpZKOcB/Au4G1JNjLYx39W6z8L2Lf1vw1YM1qJkqRJ2KEzgavqMuCy1r4NOGI7c74PvGIMtUmSJsgzgSWpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdmjcAkuyR5OtJrk1yY5K/aP0HJ7kyycYkn06yW+vfvS1vbOMzk/0RJEkLMcwWwP8AR1XVs4HDgGOTHAm8Dzijqp4O3A+savNXAfe3/jPaPEnSlJk3AGrgoba4a7sUcBTw2da/DjixtU9oy7Txo5NkbBVLksZiqGMASXZJcg2wBbgYuBV4oKoeaVM2ActaexlwJ0AbfxDYd5xFS5JGN1QAVNWPquowYDlwBPDLo95xktVJNiTZsHXr1lFXJ0naQTv0LqCqegC4FHgusFeSJW1oObC5tTcDBwK08acA925nXWurakVVrVi6dOkCy5ckLdQw7wJammSv1n48cAxwM4MgOKlNWwmc39oXtGXa+CVVVeMsWpI0uiXzT+EAYF2SXRgExnlV9YUkNwHnJvlL4GrgrDb/LOCcJBuB+4CTJ1C3JGlE8wZAVV0HPGc7/bcxOB6wbf/3gVeMpTpJ0sR4JrAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1apivhJQW3cyaCxe7BOn/HbcAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6tS8AZDkwCSXJrkpyY1J3tr690lycZJb2vXerT9JzkyyMcl1SQ6f9A8hSdpxw2wBPAK8vaoOBY4ETk1yKLAGWF9VhwDr2zLAS4BD2mU18JGxVy1JGtm8AVBVd1XVN1v7v4CbgWXACcC6Nm0dcGJrnwCcXQNXAHslOWDslUuSRrJDxwCSzADPAa4E9q+qu9rQ3cD+rb0MuHPWzTa1PknSFBk6AJI8Cfgn4LSq+t7ssaoqoHbkjpOsTrIhyYatW7fuyE0lSWMwVAAk2ZXBH/9/rKrPte57Ht210663tP7NwIGzbr689T1GVa2tqhVVtWLp0qULrV+StEDDvAsowFnAzVX1gVlDFwArW3slcP6s/je0dwMdCTw4a1eRJGlKLBlizvOA1wPXJ7mm9f0pcDpwXpJVwB3AK9vYRcBxwEbgYeCUsVYsSRqLeQOgqv4NyBzDR29nfgGnjliXJGnCPBNYkjplAEhSpwwASerUMAeBJennYmbNhWNf592r7x2se+3c67799OPHfr87A7cAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKb8UXjtkEl/aLWlxuAUgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6tS8AZDkY0m2JLlhVt8+SS5Ocku73rv1J8mZSTYmuS7J4ZMsXpK0cMNsAXwCOHabvjXA+qo6BFjflgFeAhzSLquBj4ynTEnSuM0bAFX1VeC+bbpPANa19jrgxFn9Z9fAFcBeSQ4YV7GSpPFZ6DGA/avqrta+G9i/tZcBd86at6n1/ZQkq5NsSLJh69atCyxDkrRQIx8ErqoCagG3W1tVK6pqxdKlS0ctQ5K0gxYaAPc8umunXW9p/ZuBA2fNW976JElTZqEBcAGwsrVXAufP6n9DezfQkcCDs3YVSZKmyLzfB5DkU8CLgP2SbAL+DDgdOC/JKuAO4JVt+kXAccBG4GHglAnULEkag3kDoKpePcfQ0duZW8CpoxYlSZo8zwSWpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktSpeb8SUtNnZs2FE1nv3avvHax/7WTWL2m6uAUgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOuXbQCV1b1JvrR7G7acfv2j37RaAJHXKAJCkThkAktQpjwGMYDH3G0rSqNwCkKROGQCS1KmJBECSY5N8K8nGJGsmcR+SpNGM/RhAkl2AvwOOATYB30hyQVXdNO77AvfDS9JCTWIL4AhgY1XdVlU/AM4FTpjA/UiSRjCJAFgG3DlreVPrkyRNkVTVeFeYnAQcW1V/0JZfD/x6Vb1lm3mrgdVt8ZeAb421kNHsB3x3sYsY0s5Sq3WO385Sq3WO36O1PrWqli50JZM4D2AzcOCs5eWt7zGqai2wdgL3P7IkG6pqxWLXMYydpVbrHL+dpVbrHL9x1TqJXUDfAA5JcnCS3YCTgQsmcD+SpBGMfQugqh5J8hbgy8AuwMeq6sZx348kaTQT+SiIqroIuGgS6/45mcpdU3PYWWq1zvHbWWq1zvEbS61jPwgsSdo5+FEQktSp7gJgvo+pSHJQkkuTXJ3kuiTHzRp7VpLLk9yY5Poke0xbnUl2TbKu1XdzkndPqsYh63xqkvWtxsuSLJ81tjLJLe2ycpJ1jlJrksNm/d6vS/Kqaaxz1vieSTYl+fAk6xy11vYc/kp7nt6UZGZK63x/+93fnOTMJJlgnR9LsiXJDXOMp9WwsdV6+KyxHX89VVU3FwYHpW8FngbsBlwLHLrNnLXAH7X2ocDtrb0EuA54dlveF9hlCut8DXBuaz8BuB2YWcQ6PwOsbO2jgHNaex/gtna9d2vvvci/+7lqfQZwSGv/InAXsNe01Tlr/EPAJ4EPT+rxHEetwGXAMa39JOAJ01Yn8BvA19o6dgEuB140wcf0N4HDgRvmGD8O+CIQ4Ejgyta/oNdTb1sAw3xMRQF7tvZTgO+09ouB66rqWoCqureqfjSFdRbwxCRLgMcDPwC+t4h1Hgpc0tqXzhr/beDiqrqvqu4HLgaOnVCdI9VaVd+uqlta+zvAFmDBJ99Mqk6AJL8K7A98ZUL1jaXWJIcCS6rqYoCqeqiqHp62Ohm8nvZgEBy7A7sC90yoTqrqq8B9P2PKCcDZNXAFsFeSA1jg66m3ABjmYyr+HHhdkk0M3sn0x63/GUAl+XKSbyZ555TW+Vngvxn8l/qfwN9U1c96Qk26zmuBl7f2y4AnJ9l3yNuO0yi1/kSSIxj8Mbh12upM8jjgb4F3TKi2bY3ymD4DeCDJ59puzL/O4IMkp6rOqrqcQSDc1S5frqqbJ1TnMOb6WRb0euotAIbxauATVbWcwebWOe2FtQR4PvDadv2yJEcvXplz1nkE8CMGuyoOBt6e5GmLVybvAF6Y5GrghQzOCp/UltOofmat7T+tc4BTqurHi1MiMHedbwYuqqpNi1jbtuaqdQnwgjb+awx2z7xxkWqEOepM8nTgmQw+0WAZcFSSFyxemePV21dCDvMxFatom05VdXkGB3r3Y5CoX62q7wIkuYjBvrr1U1bna4AvVdUPgS1JvgasYLBP8OdeZ9tl8nKAJE8Cfq+qHkiyGXjRNre9bAI1jlxrW94TuBB4T9v0nro6kzwXeEGSNzPYp75bkoeqalLfyTFKrZuAa6rqtjb2zwz2aZ81ZXX+IXBFVT3Uxr4IPBf41wnUOYy5fpaFvZ4mdTBjGi8MAu82Bv8ZP3ow6Fe2mfNF4I2t/UwG+9bD4MDKNxkcWF0C/Atw/BTW+S7g463/icBNwLMWsc79gMe19l8B763/O2j1H+1x3bu191nk3/1cte7GIOhPm5Ln6Hbr3GbOG5n8QeBRHtNd2vylbfnjwKlTWOer2mt9CYP9/+uB353w4zrD3AeBj+exB4G/3voX9Hqa6JN5Gi8Mdpd8m8E+3Pe0vvcCL23tQxkc9b8WuAZ48azbvg64EbgBeP801sngP7/PtDpvAv5kkes8CbilzfkosPus2/4+sLFdTpmC3/12a22/9x+2x/nRy2HTVuc263gjEw6AMfz+j2HwzrrrgU8Au01bnQyC6h+Am9vr6QMTfjw/xeBYww8Z7HVYBbwJeFMbD4Mv3Lq1PW4rZt12h19PngksSZ3yILAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpU/8L++5SCItQWPMAAAAASUVORK5CYII=\n", 72 | "text/plain": [ 73 | "
" 74 | ] 75 | }, 76 | "metadata": { 77 | "needs_background": "light" 78 | }, 79 | "output_type": "display_data" 80 | } 81 | ], 82 | "source": [ 83 | "bins = plt.hist(scores)\n", 84 | "plt.plot([score, score], [0, np.max(bins[0])], color=\"tomato\")\n", 85 | "plt.plot([ci_lower, ci_lower], [0, np.max(bins[0])], color=\"lime\")\n", 86 | "plt.plot([ci_upper, ci_upper], [0, np.max(bins[0])], color=\"lime\")\n", 87 | "print(\"AUC={:.2f}, 95% CI: {:.2f}-{:.2f}\".format(score, ci_lower, ci_upper))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Use case #2\n", 95 | "\n", 96 | "Compare two models by computing p-value for a difference in their performance measured with AUC" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "np.random.seed(42)\n", 108 | "random_array = np.random.rand(100)\n", 109 | "# simulated ground truth\n", 110 | "y_true = np.round(random_array)\n", 111 | "# simulated predictions for model I\n", 112 | "y_pred1 = random_array + np.random.normal(loc=0.0, scale=0.2, size=len(y_true))\n", 113 | "# simulated predictions for model II\n", 114 | "y_pred2 = random_array + np.random.normal(loc=0.0, scale=0.3, size=len(y_true))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# p-value comparing AUC for model I and model II\n", 126 | "p, z = stat_util.pvalue(y_true, y_pred1, y_pred2, \n", 127 | " score_fun=roc_auc_score,\n", 128 | " seed=42)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "p=0.01\n" 141 | ] 142 | }, 143 | { 144 | "data": { 145 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAADuBJREFUeJzt3X/MneVdx/H3RzrAn6PQx4a0ZQ/LagxL3A8rovPXhjp+zJVEtmDUNdikiWKyZibauT+Mxj/gH9ElZksjy4pRB04nzcAftTCNf8D2sHUwQOShg9AGaNcx5sTNsH3947m6HLo+fc7pOU/P81x9v5KTc93Xfd3n/l7czac3932f01QVkqR+fde0C5AkLS+DXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS5NdMuAGDdunU1Ozs77TIkaVV58MEHv1RVM0uNWxFBPzs7y9zc3LTLkKRVJcnTw4zz0o0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwb9Inbu3MnOnTunXYYkjW1FfDN2JTpw4MC0S9CA2V13T2W/T9187VT2K02SZ/SS1DmDXpI6Z9BLUucMeknqnEEvSZ3zqRuNZFpPv0g6fZ7RS1LnDHpJ6txQQZ/kqSQPJzmQZK71XZhkX5In2vva1p8kH0wyn+ShJG9ezglIkk5tlDP6t1bVG6tqS1veBeyvqs3A/rYMcDWwub12AB+aVLGSpNGNc+lmK7CntfcA1w30314L7gcuSHLxGPuRJI1h2KAv4F+SPJhkR+tbX1XPtvZzwPrW3gA8M7DtodYnSZqCYR+v/KmqOpzkB4F9Sf5zcGVVVZIaZcftL4wdAJdccskom0qSRjDUGX1VHW7vR4BPAJcDzx+/JNPej7Thh4FNA5tvbH0nfubuqtpSVVtmZmZOfwaSpFNaMuiTfG+S7z/eBn4R+AKwF9jWhm0D7mrtvcB72tM3VwAvDlzikSSdYcNculkPfCLJ8fF/XVX/lOQzwJ1JtgNPA+9u4+8BrgHmgZeAGydetSRpaEsGfVUdBN5wkv5jwJUn6S/gpolUJ0kam9+MlaTOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0bOuiTnJPkc0k+2ZYvTfJAkvkkdyQ5t/Wf15bn2/rZ5SldkjSMUc7o3ws8NrB8C3BrVb0OeAHY3vq3Ay+0/lvbOEnSlAwV9Ek2AtcCf9GWA7wN+Hgbsge4rrW3tmXa+ivbeEnSFAx7Rv+nwO8C32rLFwFfqaqX2/IhYENrbwCeAWjrX2zjXyHJjiRzSeaOHj16muVLkpayZNAneQdwpKoenOSOq2p3VW2pqi0zMzOT/GhJ0oA1Q4x5C/DOJNcA5wM/APwZcEGSNe2sfSNwuI0/DGwCDiVZA7waODbxyiVJQ1nyjL6q3l9VG6tqFrgBuLeqfhW4D7i+DdsG3NXae9sybf29VVUTrVqSNLRxnqP/PeB9SeZZuAZ/W+u/Dbio9b8P2DVeiZKkcQxz6ebbqupTwKda+yBw+UnGfB141wRqkyRNgN+MlaTOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUuZH+KUHpbDO76+6p7fupm6+d2r7VF8/oJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpc34zdhWa5rc1Ja0+ntFLUueWDPok5yf5dJLPJ3kkyR+2/kuTPJBkPskdSc5t/ee15fm2fnZ5pyBJOpVhzui/Abytqt4AvBG4KskVwC3ArVX1OuAFYHsbvx14ofXf2sZJkqZkyaCvBV9ri69qrwLeBny89e8BrmvtrW2Ztv7KJJlYxZKkkQx1jT7JOUkOAEeAfcCTwFeq6uU25BCwobU3AM8AtPUvAhed5DN3JJlLMnf06NHxZiFJWtRQQV9V36yqNwIbgcuBHx53x1W1u6q2VNWWmZmZcT9OkrSIkZ66qaqvAPcBPwFckOT445kbgcOtfRjYBNDWvxo4NpFqJUkjG+apm5kkF7T2dwO/ADzGQuBf34ZtA+5q7b1tmbb+3qqqSRYtSRreMF+YuhjYk+QcFv5iuLOqPpnkUeBjSf4Y+BxwWxt/G/CXSeaBLwM3LEPdkqQhLRn0VfUQ8KaT9B9k4Xr9if1fB941keokSWPzm7GS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ1bMuiTbEpyX5JHkzyS5L2t/8Ik+5I80d7Xtv4k+WCS+SQPJXnzck9CkrS4Yc7oXwZ+p6ouA64AbkpyGbAL2F9Vm4H9bRngamBze+0APjTxqiVJQ1sy6Kvq2ar6bGv/N/AYsAHYCuxpw/YA17X2VuD2WnA/cEGSiydeuSRpKCNdo08yC7wJeABYX1XPtlXPAetbewPwzMBmh1qfJGkKhg76JN8H/B2ws6q+OriuqgqoUXacZEeSuSRzR48eHWVTSdIIhgr6JK9iIeT/qqr+vnU/f/ySTHs/0voPA5sGNt/Y+l6hqnZX1Zaq2jIzM3O69UuSljDMUzcBbgMeq6o/GVi1F9jW2tuAuwb639OevrkCeHHgEo8k6QxbM8SYtwC/Djyc5EDr+33gZuDOJNuBp4F3t3X3ANcA88BLwI0TrViSNJIlg76q/gPIIquvPMn4Am4asy5J0oT4zVhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6N8w/PCJpCmZ33T2V/T5187VT2a+Wj2f0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzSwZ9ko8kOZLkCwN9FybZl+SJ9r629SfJB5PMJ3koyZuXs3hJ0tKGOaP/KHDVCX27gP1VtRnY35YBrgY2t9cO4EOTKVOSdLqWDPqq+nfgyyd0bwX2tPYe4LqB/ttrwf3ABUkunlSxkqTRne41+vVV9WxrPwesb+0NwDMD4w61PknSlIx9M7aqCqhRt0uyI8lckrmjR4+OW4YkaRGnG/TPH78k096PtP7DwKaBcRtb33eoqt1VtaWqtszMzJxmGZKkpZxu0O8FtrX2NuCugf73tKdvrgBeHLjEI0magjVLDUjyN8DPAeuSHAL+ALgZuDPJduBp4N1t+D3ANcA88BJw4zLULEkawZJBX1W/ssiqK08ytoCbxi1KkjQ5fjNWkjpn0EtS5wx6SercktfotbjZXXdPuwRJWpJn9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ3z1yslvcI0f5X1qZuvndq+e+YZvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOrfqfQFiur2s/d/DYsn6+JJ0pntFLUueWJeiTXJXk8STzSXYtxz4kScOZ+KWbJOcAfw78AnAI+EySvVX16KT3Jakv07pU2vuvZi7HGf3lwHxVHayq/wM+Bmxdhv1IkoawHDdjNwDPDCwfAn58GfYjSRPR+2/wT+2pmyQ7gB1t8WtJHp9WLaew7ulb3vGlaRcxIesA57LyOJeV6YzNJbeMtflrhhm0HEF/GNg0sLyx9b1CVe0Gdi/D/icmyVxVbZl2HZPgXFYm57Iy9TQXWJ5r9J8BNie5NMm5wA3A3mXYjyRpCBM/o6+ql5P8NvDPwDnAR6rqkUnvR5I0nGW5Rl9V9wD3LMdnn2Er+tLSiJzLyuRcVqae5kKqato1SJKWkT+BIEmdOyuDfqmfaEhyXpI72voHkswOrHt/6388ydvPZN0nc7pzSTKb5H+THGivD5/p2k80xFx+Jslnk7yc5PoT1m1L8kR7bTtzVZ/cmHP55sBxmfqDDEPM5X1JHk3yUJL9SV4zsG61HZdTzWVFHZeRVNVZ9WLhBvGTwGuBc4HPA5edMOa3gA+39g3AHa19WRt/HnBp+5xzVulcZoEvTPt4jDiXWeBHgNuB6wf6LwQOtve1rb12Nc6lrfvatI/HiHN5K/A9rf2bA3/GVuNxOelcVtpxGfV1Np7RD/MTDVuBPa39ceDKJGn9H6uqb1TVF4H59nnTMs5cVpol51JVT1XVQ8C3Ttj27cC+qvpyVb0A7AOuOhNFL2Kcuaw0w8zlvqp6qS3ez8J3Z2B1HpfF5rKqnY1Bf7KfaNiw2Jiqehl4EbhoyG3PpHHmAnBpks8l+bckP73cxS5hnP+2q/G4nMr5SeaS3J/kusmWNrJR57Id+MfT3Ha5jTMXWFnHZSSr/h8e0Wl7Frikqo4l+VHgH5K8vqq+Ou3CxGuq6nCS1wL3Jnm4qp6cdlFLSfJrwBbgZ6ddy7gWmcuqPC5wdp7RD/MTDd8ek2QN8Grg2JDbnkmnPZd2+ekYQFU9yMK1yx9a9ooXN85/29V4XBZVVYfb+0HgU8CbJlnciIaaS5KfBz4AvLOqvjHKtmfQOHNZacdlNNO+SXCmXyz8X8xBFm6mHr8h8/oTxtzEK29g3tnar+eVN2MPMt2bsePMZeZ47SzcnDoMXLiS5zIw9qN8583YL7Jww29ta6/WuawFzmvtdcATnHDDcKXNhYXAexLYfEL/qjsup5jLijouI8992gVM6YBfA/xXO6AfaH1/xMLf4ADnA3/Lws3WTwOvHdj2A227x4GrV+tcgF8GHgEOAJ8FfmkVzOXHWLiu+j8s/B/WIwPb/kab4zxw42qdC/CTwMMthB4Gtq+Cufwr8Hz7s3QA2LuKj8tJ57ISj8soL78ZK0mdOxuv0UvSWcWgl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpc/8PgZV6LvaldRgAAAAASUVORK5CYII=\n", 146 | "text/plain": [ 147 | "
" 148 | ] 149 | }, 150 | "metadata": { 151 | "needs_background": "light" 152 | }, 153 | "output_type": "display_data" 154 | } 155 | ], 156 | "source": [ 157 | "bins = plt.hist(z)\n", 158 | "plt.plot([0, 0], [0, np.max(bins[0])], color=\"black\")\n", 159 | "print(\"p={}\".format(p))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Use case #3\n", 167 | "\n", 168 | "Compute mean AUC with 95% confidence interval for a set of 10 readers/models" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 8, 174 | "metadata": { 175 | "collapsed": true 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "np.random.seed(42)\n", 180 | "random_array = np.random.rand(100)\n", 181 | "# simulated ground truth\n", 182 | "y_true = np.round(random_array)\n", 183 | "# simulated predictions for readers\n", 184 | "y_pred_readers = []\n", 185 | "for _ in range(10):\n", 186 | " scale = np.random.normal(loc=0.3, scale=0.1)\n", 187 | " y_pred_r = random_array + np.random.normal(loc=0.0, scale=scale, size=len(y_true))\n", 188 | " y_pred_readers.append(y_pred_r)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "# mean AUC with 95% confidence interval for 10 readers/models\n", 200 | "mean_score, ci_lower, ci_upper, scores = stat_util.score_stat_ci(y_true, y_pred_readers, \n", 201 | " score_fun=roc_auc_score,\n", 202 | " stat_fun=np.mean,\n", 203 | " seed=42)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 10, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "mean AUC=0.87, 95% CI: 0.82-0.91\n" 216 | ] 217 | }, 218 | { 219 | "data": { 220 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAEOxJREFUeJzt3X+wXGV9x/H310TQqhhCrhlMghfHWBvrCPQW6VhHhVEhtCZVRBiFlKaTtoOOju3UWNqpdepM/KcoY0cnI0qgVaS0DBnBtpkA7bRjlBsJv6TIBcKQazCRX7WlUtFv/9jn0uVyb/Zsdvfu5pn3i9nZ5zznOed878nmk3PPOXuIzESSVK8XDLsASdJgGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyi0edgEAy5Yty/Hx8WGXIUlHlN27d/8oM8c6jRuJoB8fH2dycnLYZUjSESUiHmoyzlM3klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9HrWR8t/WmBXf7H1Utf8zDYzEt+M1WjYw55hlzByxjffMPBtXP3ErQCct2fVwLfVxN4tZw+7hMb8zDbjEb0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5RkEfEXsj4s6I2BMRk6VvaUTsiIj7yvuxpT8i4rKImIqIOyLilEH+AJKkQ+vmiP7tmXlSZk6U6c3AzsxcDews0wBnAavLaxPwhX4VK0nqXi+nbtYB20p7G7C+rf/KbNkFLImI43vYjiSpB02DPoF/jojdEbGp9C3PzP2l/QiwvLRXAA+3Lbuv9EmShqDp8+h/PTOnI+IVwI6I+I/2mZmZEZHdbLj8g7EJ4IQTTuhmUUlSFxod0WfmdHk/AFwHnAr8cOaUTHk/UIZPA+3/B4WVpW/2Ordm5kRmToyNjR3+TyBJOqSOQR8RL4mIl820gXcCdwHbgQ1l2Abg+tLeDlxY7r45DXiy7RSPJGmBNTl1sxy4LiJmxn81M/8xIm4FromIjcBDwLll/I3AWmAKeAq4qO9VS5Ia6xj0mfkA8MY5+h8FzpijP4GL+1KdJKlnfjNWkipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVa5x0EfEooi4LSK+UaZPjIhvR8RURHw9Io4q/UeX6akyf3wwpUuSmujmiP4jwD1t058BLs3M1wCPAxtL/0bg8dJ/aRknSRqSRkEfESuBs4EvlekATgeuLUO2AetLe12Zpsw/o4yXJA1B0yP6zwJ/DPy8TB8HPJGZz5TpfcCK0l4BPAxQ5j9Zxj9HRGyKiMmImDx48OBhli9J6qRj0EfEbwAHMnN3PzecmVszcyIzJ8bGxvq5aklSm8UNxrwZeHdErAVeBBwDfA5YEhGLy1H7SmC6jJ8GVgH7ImIx8HLg0b5XLklqpOMRfWZ+IjNXZuY4cB5wU2Z+ALgZOKcM2wBcX9rbyzRl/k2ZmX2tWpLUWC/30X8c+FhETNE6B3956b8cOK70fwzY3FuJkqReNDl186zMvAW4pbQfAE6dY8xPgPf1oTZJUh/4zVhJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlFg+7AKmJ8c03DLsE6YjlEb0kVc6gl6TKGfSSVDmDXpIq1zHoI+JFEfGdiLg9Iu6OiL8o/SdGxLcjYioivh4RR5X+o8v0VJk/PtgfQZJ0KE2O6J8GTs/MNwInAWdGxGnAZ4BLM/M1wOPAxjJ+I/B46b+0jJMkDUnHoM+W/yqTLyyvBE4Hri3924D1pb2uTFPmnxER0beKJUldaXSOPiIWRcQe4ACwA7gfeCIznylD9gErSnsF8DBAmf8kcNwc69wUEZMRMXnw4MHefgpJ0rwaBX1m/iwzTwJWAqcCr+t1w5m5NTMnMnNibGys19VJkubR1V03mfkEcDPwa8CSiJj5Zu1KYLq0p4FVAGX+y4FH+1KtJKlrTe66GYuIJaX9YuAdwD20Av+cMmwDcH1pby/TlPk3ZWb2s2hJUnNNnnVzPLAtIhbR+ofhmsz8RkR8D7g6Iv4SuA24vIy/HLgqIqaAx4DzBlC3JKmhjkGfmXcAJ8/R/wCt8/Wz+38CvK8v1UlacMN8gNzeLWcPbds185uxklQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mV6xj0EbEqIm6OiO9FxN0R8ZHSvzQidkTEfeX92NIfEXFZRExFxB0RccqgfwhJ0vyaHNE/A/xhZq4BTgMujog1wGZgZ2auBnaWaYCzgNXltQn4Qt+rliQ11jHoM3N/Zn63tH8M3AOsANYB28qwbcD60l4HXJktu4AlEXF83yuXJDXS1Tn6iBgHTga+DSzPzP1l1iPA8tJeATzctti+0idJGoLGQR8RLwX+HvhoZv5n+7zMTCC72XBEbIqIyYiYPHjwYDeLSpK60CjoI+KFtEL+bzPzH0r3D2dOyZT3A6V/GljVtvjK0vccmbk1Mycyc2JsbOxw65ckddDkrpsALgfuycy/apu1HdhQ2huA69v6Lyx335wGPNl2ikeStMAWNxjzZuAC4M6I2FP6/gTYAlwTERuBh4Bzy7wbgbXAFPAUcFFfK5YkdaVj0GfmvwExz+wz5hifwMU91iVJ6hO/GStJlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekiq3eNgF6MgyvvmGYZcgqUse0UtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKdQz6iPhyRByIiLva+pZGxI6IuK+8H1v6IyIui4ipiLgjIk4ZZPGSpM6aHNFfAZw5q28zsDMzVwM7yzTAWcDq8toEfKE/ZUqSDlfHoM/MfwUem9W9DthW2tuA9W39V2bLLmBJRBzfr2IlSd073HP0yzNzf2k/Aiwv7RXAw23j9pU+SdKQ9HwxNjMTyG6Xi4hNETEZEZMHDx7stQxJ0jwON+h/OHNKprwfKP3TwKq2cStL3/Nk5tbMnMjMibGxscMsQ5LUyeEG/XZgQ2lvAK5v67+w3H1zGvBk2ykeSdIQdHx6ZUR8DXgbsCwi9gF/DmwBromIjcBDwLll+I3AWmAKeAq4aAA1S6pUt09HfWTTo63ltvb2VNW9W87uaflR1zHoM/P8eWadMcfYBC7utShJUv/4zVhJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLmOjynW6On2md1N9evZ3pJGi0f0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5XzWTQ8G9cwZSQtrmH+X9245e+Db8Ihekipn0EtS5QYS9BFxZkTcGxFTEbF5ENuQJDXT96CPiEXAXwNnAWuA8yNiTb+3I0lqZhBH9KcCU5n5QGb+L3A1sG4A25EkNTCIu25WAA+3Te8D3jSA7QDe+SJJnURm9neFEecAZ2bm75bpC4A3ZeaHZo3bBGwqk78I3HsYm1sG/KiHcofFuhfekVq7dS+sI63uV2XmWKdBgziinwZWtU2vLH3PkZlbga29bCgiJjNzopd1DIN1L7wjtXbrXlhHat2dDOIc/a3A6og4MSKOAs4Dtg9gO5KkBvp+RJ+Zz0TEh4B/AhYBX87Mu/u9HUlSMwN5BEJm3gjcOIh1z9LTqZ8hsu6Fd6TWbt0L60it+5D6fjFWkjRafASCJFVuZIK+02MTIuLSiNhTXt+PiCdK/9vb+vdExE8iYn2Zd0VEPNg276Qh1X5CRNwcEbdFxB0RsbZt3ifKcvdGxLuarnOYdUfEOyJid0TcWd5Pb1vmlrLOmX3+ihGqezwi/qetti+2LfMr5eeZiojLIiJGqO4PzPqM/3zms7wQ+7th7a+KiJ2l7lsiYmXbvA0RcV95bWjrH4V9PmfdEXFSRHwrIu4u897ftsyC5EpfZebQX7Qu2t4PvBo4CrgdWHOI8R+mdZF3dv9S4DHgF8r0FcA5w66d1nm/PyjtNcDetvbtwNHAiWU9i7rdH0Oo+2TglaX9y8B02zK3ABMjur/HgbvmWe93gNOAAL4JnDUqdc8a8wbg/oXa313U/nfAhtI+HbiqtJcCD5T3Y0v72BHa5/PV/VpgdWm/EtgPLCnTVzDgXOn3a1SO6Lt9bML5wNfm6D8H+GZmPjWAGufTpPYEjintlwM/KO11wNWZ+XRmPghMlfUtxGMkDrvuzLwtM2d+hruBF0fE0X2ubz697O85RcTxwDGZuStbf5OvBNb3t+y+1X1+WXYhNal9DXBTad/cNv9dwI7MfCwzHwd2AGeO0D6fs+7M/H5m3lfaPwAOAB2/mDSqRiXo53pswoq5BkbEq2gd/d40x+zzeP4/AJ8uv3pdOqAwalL7J4EPRsQ+WncjfbjDso33Rw96qbvde4HvZubTbX1fKb/S/tkAfh3vte4Ty6mRf4mIt7Stc1+HdfaqX/v7/Tz/Mz7I/Q3Nar8deE9p/xbwsog47hDLjso+n6/uZ0XEqbR+I7i/rXvQudJXoxL03TgPuDYzf9beWY4Q3kDr/v0ZnwBeB/wqrV8dP75QRc5yPnBFZq4E1gJXRcSRsO8PWXdEvB74DPB7bct8IDPfALylvC5YwHpnzFf3fuCEzDwZ+Bjw1Yg45hDrWWid9vebgKcy8662ZUZhfwP8EfDWiLgNeCutb8P/7NCLjIRD1l1y5Srgosz8eekelVxpbFTCptFjE4q5jtoBzgWuy8yfznRk5v5seRr4Cq1f5fqtSe0bgWtKTd8CXkTrmRrzLdvN/jhcvdRNuWh1HXBhZj57pJOZ0+X9x8BX6f8+P+y6yymyR0v/blpHaK8ty69sW37k9nfxvM/+AuxvaFB7Zv4gM99T/hG9pPQ9cYhlR2KfH6JuykHADcAlmbmrbZmFyJX+GvZFgtbpORbTukhzIv9/0eT1c4x7HbCXcv//rHm7gLfP6ju+vAfwWWDLMGqndaHpt0v7l2idew3g9Tz3YuwDtC4gNdofQ6x7SRn/njnWuay0XwhcC/z+CNU9Biwq/a+m9Zd+aZmefWFw7ajUXaZfUOp99ULu7y5qXwa8oLQ/DXyqtJcCD9K6EHtsaY/SPp+v7qOAncBH51jvwHOl73+Gwy6gbeetBb5P6yjrktL3KeDdbWM+OddOpXU3xfTMH1hb/03AncBdwN8ALx1G7bQu+Px7+aDtAd7ZtuwlZbl7abvrYK51jkrdwJ8C/136Zl6vAF4C7AbuoHWR9nOUYB2Rut9b6toDfBf4zbZ1TpTPyf3A55njYGLIn5O3AbtmrW9B9nfD2s8B7itjvgQc3bbs79C60WCK1imQUdrnc9YNfBD46azP+Ell3oLkSj9ffjNWkio3KufoJUkDYtBLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5/wMWSvlK14FC5wAAAABJRU5ErkJggg==\n", 221 | "text/plain": [ 222 | "
" 223 | ] 224 | }, 225 | "metadata": { 226 | "needs_background": "light" 227 | }, 228 | "output_type": "display_data" 229 | } 230 | ], 231 | "source": [ 232 | "bins = plt.hist(scores)\n", 233 | "plt.plot([mean_score, mean_score], [0, np.max(bins[0])], color=\"tomato\")\n", 234 | "plt.plot([ci_lower, ci_lower], [0, np.max(bins[0])], color=\"lime\")\n", 235 | "plt.plot([ci_upper, ci_upper], [0, np.max(bins[0])], color=\"lime\")\n", 236 | "print(\"mean AUC={:.2f}, 95% CI: {:.2f}-{:.2f}\".format(mean_score, ci_lower, ci_upper))" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "### Use case #4\n", 244 | "\n", 245 | "Compare one model to a set of readers by computing p-value for a difference in their performance measured with AUC" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 11, 251 | "metadata": { 252 | "collapsed": true 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "np.random.seed(42)\n", 257 | "random_array = np.random.rand(100)\n", 258 | "# simulated ground truth\n", 259 | "y_true = np.round(random_array)\n", 260 | "# simulated predictions for a model\n", 261 | "y_pred = random_array + np.random.normal(loc=0.0, scale=0.2, size=len(y_true))\n", 262 | "# simulated predictions for readers\n", 263 | "y_pred_readers = []\n", 264 | "for _ in range(10):\n", 265 | " scale = np.random.normal(loc=0.3, scale=0.1)\n", 266 | " y_pred_r = random_array + np.random.normal(loc=0.0, scale=scale, size=len(y_true))\n", 267 | " y_pred_readers.append(y_pred_r)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 12, 273 | "metadata": { 274 | "collapsed": true 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "# p-value comparing AUC for a model to mean AUC for 10 readers\n", 279 | "p, z = stat_util.pvalue_stat(y_true, y_pred, y_pred_readers, \n", 280 | " score_fun=roc_auc_score,\n", 281 | " stat_fun=np.mean,\n", 282 | " seed=42)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 13, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "p=0.009\n" 295 | ] 296 | }, 297 | { 298 | "data": { 299 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAD7JJREFUeJzt3X+s3XV9x/Hna1Rgc05aeq2kBS/GLgsuU9wdY3O/FJ38cJZkaFicNqxJs40lEpdsdWZZtuwPcMmYJoumkc2yzAnTORphP2qBLfsDtGgFgTGuFUIboBURdUwX9L0/7gd3qLe9595zbs+9H56P5OR8vp/v53u+n/c9p69+z/d77rmpKiRJ/fqBSU9AkrS8DHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS59ZMegIA69evr+np6UlPQ5JWlbvuuusrVTW10LgVEfTT09Ps27dv0tOQpFUlycPDjPPUjSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBv0qctVVV3HVVVdNehqSVpkV8ZuxGs7+/fsnPYXnnekdN09s3w9dfcnE9q2+eEQvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS54YK+iQPJbknyf4k+1rfuiR7kjzY7te2/iT5QJLZJHcnec1yFiBJOr7FHNG/rqpeXVUzbXkHsLeqNgN72zLARcDmdtsOfHBck5UkLd4op262ALtaexdw6UD/9TXnDuC0JGeMsB9J0giGDfoC/jXJXUm2t74NVfVoaz8GbGjtjcAjA9sebH2SpAkY9m/G/lxVHUryEmBPkv8cXFlVlaQWs+P2H8Z2gLPOOmsxm0qSFmGoI/qqOtTuDwOfBM4DHn/2lEy7P9yGHwLOHNh8U+s7+jF3VtVMVc1MTU0tvQJJ0nEtGPRJXpjkRc+2gV8GvgjsBra2YVuBm1p7N/DO9umb84GnBk7xSJJOsGFO3WwAPpnk2fEfrap/TvJZ4MYk24CHgbe18bcAFwOzwNPAFWOftSRpaAsGfVUdAF41T/8TwAXz9Bdw5VhmJ0ka2bAXY6WJmt5x86SnIK1afgWCJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOjd00Cc5Kcnnk3yqLZ+d5M4ks0luSHJy6z+lLc+29dPLM3VJ0jAWc0T/LuD+geVrgGur6hXAk8C21r8NeLL1X9vGSZImZKigT7IJuAT4cFsO8Hrg423ILuDS1t7SlmnrL2jjJUkTMOwR/V8Avwd8ty2fDnytqp5pyweBja29EXgEoK1/qo1/jiTbk+xLsu/IkSNLnL4kaSELBn2SNwOHq+quce64qnZW1UxVzUxNTY3zoSVJA9YMMea1wFuSXAycCvwI8H7gtCRr2lH7JuBQG38IOBM4mGQN8GLgibHPXJI0lAWP6KvqPVW1qaqmgcuBW6vq7cBtwGVt2Fbgptbe3ZZp62+tqhrrrCVJQxvlc/S/D7w7ySxz5+Cva/3XAae3/ncDO0aboiRpFMOcuvmeqroduL21DwDnzTPmW8BbxzA3SdIYLCroJZ040ztunsh+H7r6konsV8vHr0CQpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjq3YNAnOTXJZ5J8Icm9Sf649Z+d5M4ks0luSHJy6z+lLc+29dPLW4Ik6XiGOaL/NvD6qnoV8GrgwiTnA9cA11bVK4AngW1t/DbgydZ/bRsnSZqQBYO+5nyzLb6g3Qp4PfDx1r8LuLS1t7Rl2voLkmRsM5YkLcpQ5+iTnJRkP3AY2AN8CfhaVT3ThhwENrb2RuARgLb+KeD0cU5akjS8oYK+qr5TVa8GNgHnAT826o6TbE+yL8m+I0eOjPpwkqRjWNSnbqrqa8BtwM8ApyVZ01ZtAg619iHgTIC2/sXAE/M81s6qmqmqmampqSVOX5K0kGE+dTOV5LTW/kHgjcD9zAX+ZW3YVuCm1t7dlmnrb62qGuekJUnDW7PwEM4AdiU5ibn/GG6sqk8luQ/4WJI/BT4PXNfGXwf8TZJZ4KvA5cswb0nSkBYM+qq6Gzh3nv4DzJ2vP7r/W8BbxzI7SdLI/M1YSeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnVsz6QlodZnecfOkpyBpkTyil6TOLRj0Sc5McluS+5Lcm+RdrX9dkj1JHmz3a1t/knwgyWySu5O8ZrmLkCQd2zBH9M8Av1tV5wDnA1cmOQfYAeytqs3A3rYMcBGwud22Ax8c+6wlSUNbMOir6tGq+lxrfwO4H9gIbAF2tWG7gEtbewtwfc25AzgtyRljn7kkaSiLOkefZBo4F7gT2FBVj7ZVjwEbWnsj8MjAZgdbnyRpAoYO+iQ/DHwCuKqqvj64rqoKqMXsOMn2JPuS7Dty5MhiNpUkLcJQH69M8gLmQv5vq+ofWvfjSc6oqkfbqZnDrf8QcObA5pta33NU1U5gJ8DMzMyi/pOQtHwm+RHah66+ZGL77tkwn7oJcB1wf1X9+cCq3cDW1t4K3DTQ/8726ZvzgacGTvFIkk6wYY7oXwu8A7gnyf7W9wfA1cCNSbYBDwNva+tuAS4GZoGngSvGOmNJ0qIsGPRV9R9AjrH6gnnGF3DliPOSJI2JvxkrSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS5BYM+yV8lOZzkiwN965LsSfJgu1/b+pPkA0lmk9yd5DXLOXlJ0sKGOaL/CHDhUX07gL1VtRnY25YBLgI2t9t24IPjmaYkaakWDPqq+nfgq0d1bwF2tfYu4NKB/utrzh3AaUnOGNdkJUmLt9Rz9Buq6tHWfgzY0NobgUcGxh1sfd8nyfYk+5LsO3LkyBKnIUlayMgXY6uqgFrCdjuraqaqZqampkadhiTpGJYa9I8/e0qm3R9u/YeAMwfGbWp9kqQJWWrQ7wa2tvZW4KaB/ne2T9+cDzw1cIpHkjQBaxYakOTvgF8C1ic5CPwRcDVwY5JtwMPA29rwW4CLgVngaeCKZZizJGkRFgz6qvq1Y6y6YJ6xBVw56qR0fNM7bp70FCStIv5mrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1LkFv+tGkk6USX2P00NXXzKR/Z4oHtFLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjq3LH94JMmFwPuBk4APV9XVy7EfSRqHSf3BEzgxf/Rk7EGf5CTgL4E3AgeBzybZXVX3jXtfk3aiXxyPHXgCgJeef0J3K2mVW45TN+cBs1V1oKr+F/gYsGUZ9iNJGsJynLrZCDwysHwQ+Oll2A8w2bdckrQaTOyPgyfZDmxvi99M8sCID7ke+MqIj7EarH/4mjc/H+qE589zCtbao6HqzDUj7eNlwwxajqA/BJw5sLyp9T1HVe0Edo5rp0n2VdXMuB5vpXq+1AnW2qvnS60rqc7lOEf/WWBzkrOTnAxcDuxehv1IkoYw9iP6qnomye8A/8Lcxyv/qqruHfd+JEnDWZZz9FV1C3DLcjz2cYztNNAK93ypE6y1V8+XWldMnamqSc9BkrSM/AoESercqgn6JOuS7EnyYLtfe4xxW9uYB5NsHei/PckDSfa320tO3OyHk+TCNsfZJDvmWX9Kkhva+juTTA+se0/rfyDJm07kvJdiqbUmmU7yPwPP44dO9NwXY4g6fyHJ55I8k+Syo9bN+1peqUas9TsDz+mK//DGELW+O8l9Se5OsjfJywbWnfjntapWxQ14H7CjtXcA18wzZh1woN2vbe21bd3twMyk6zhOfScBXwJeDpwMfAE456gxvw18qLUvB25o7XPa+FOAs9vjnDTpmpap1mngi5OuYYx1TgM/AVwPXDbQf8zX8kq8jVJrW/fNSdcw5lpfB/xQa//WwOt3Is/rqjmiZ+5rFHa19i7g0nnGvAnYU1VfraongT3AhSdofqMa5qsjBn8GHwcuSJLW/7Gq+nZVfRmYbY+3Uo1S62qyYJ1V9VBV3Q1896htV9treZRaV5thar2tqp5ui3cw9/tEMKHndTUF/YaqerS1HwM2zDNmvq9f2Diw/NftreEfrsDQWGjuzxlTVc8ATwGnD7ntSjJKrQBnJ/l8kn9L8vPLPdkRjPK89PicHs+pSfYluSPJfAdxK8lia90G/NMStx2LiX0FwnySfBp46Tyr3ju4UFWVZLEfF3p7VR1K8iLgE8A7mHsLqdXlUeCsqnoiyU8C/5jklVX19UlPTCN5Wfv3+XLg1iT3VNWXJj2pUSX5dWAG+MVJzmNFHdFX1Ruq6sfnud0EPJ7kDIB2f3iehzjm1y9U1bP33wA+yso7tTHMV0d8b0ySNcCLgSeG3HYlWXKt7fTUEwBVdRdz50p/dNlnvDSjPC89PqfHNPDv8wBz19POHefkxmyoWpO8gbmD1LdU1bcXs+3YTfrCxiIugPwZz70Y+755xqwDvszcRY61rb2OuXcu69uYFzB3zvc3J13TUXNfw9yFmbP5/ws8rzxqzJU89wLlja39Sp57MfYAK/ti7Ci1Tj1bG3MXww4B6yZd01LrHBj7Eb7/Yuz3vZYnXdMy1boWOKW11wMPctTFzZV0G/L1ey5zByGbj+qfyPM68R/aIn64pwN724vg08/+cJh7W/ThgXG/wdzFyFngitb3QuAu4G7gXtpfv5p0TfPUeDHwX+0F8t7W9yfMHREAnAr8favtM8DLB7Z9b9vuAeCiSdeyXLUCv9qew/3A54BfmXQtI9b5U8ydp/1v5t6d3Xu81/JKvi21VuBngXtaYN4DbJt0LWOo9dPA4+11uh/YPcnn1d+MlaTOrahz9JKk8TPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3P8BAnOCxgQcQJUAAAAASUVORK5CYII=\n", 300 | "text/plain": [ 301 | "
" 302 | ] 303 | }, 304 | "metadata": { 305 | "needs_background": "light" 306 | }, 307 | "output_type": "display_data" 308 | } 309 | ], 310 | "source": [ 311 | "bins = plt.hist(z)\n", 312 | "plt.plot([0, 0], [0, np.max(bins[0])], color=\"black\")\n", 313 | "print(\"p={}\".format(p))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [] 324 | } 325 | ], 326 | "metadata": { 327 | "kernelspec": { 328 | "display_name": "Python 3", 329 | "language": "python", 330 | "name": "python3" 331 | }, 332 | "language_info": { 333 | "codemirror_mode": { 334 | "name": "ipython", 335 | "version": 2 336 | }, 337 | "file_extension": ".py", 338 | "mimetype": "text/x-python", 339 | "name": "python", 340 | "nbconvert_exporter": "python", 341 | "pygments_lexer": "ipython2", 342 | "version": "2.7.10" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 2 347 | } 348 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | matplotlib==2.2.4 3 | numpy==1.16.2 4 | scikit-learn==0.20.3 -------------------------------------------------------------------------------- /stat_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import percentileofscore 3 | 4 | 5 | def score_ci( 6 | y_true, 7 | y_pred, 8 | score_fun, 9 | sample_weight=None, 10 | n_bootstraps=2000, 11 | confidence_level=0.95, 12 | seed=None, 13 | reject_one_class_samples=True, 14 | ): 15 | """ 16 | Compute confidence interval for given score function based on labels and predictions using bootstrapping. 17 | :param y_true: 1D list or array of labels. 18 | :param y_pred: 1D list or array of predictions corresponding to elements in y_true. 19 | :param score_fun: Score function for which confidence interval is computed. (e.g. sklearn.metrics.accuracy_score) 20 | :param sample_weight: 1D list or array of sample weights to pass to score_fun, see e.g. sklearn.metrics.roc_auc_score. 21 | :param n_bootstraps: The number of bootstraps. (default: 2000) 22 | :param confidence_level: Confidence level for computing confidence interval. (default: 0.95) 23 | :param seed: Random seed for reproducibility. (default: None) 24 | :param reject_one_class_samples: Whether to reject bootstrapped samples with only one label. For scores like AUC we 25 | need at least one positive and one negative sample. (default: True) 26 | :return: Score evaluated on labels and predictions, lower confidence interval, upper confidence interval, array of 27 | bootstrapped scores. 28 | """ 29 | 30 | assert len(y_true) == len(y_pred) 31 | 32 | score = score_fun(y_true, y_pred) 33 | _, ci_lower, ci_upper, scores = score_stat_ci( 34 | y_true=y_true, 35 | y_preds=y_pred, 36 | score_fun=score_fun, 37 | sample_weight=sample_weight, 38 | n_bootstraps=n_bootstraps, 39 | confidence_level=confidence_level, 40 | seed=seed, 41 | reject_one_class_samples=reject_one_class_samples, 42 | ) 43 | 44 | return score, ci_lower, ci_upper, scores 45 | 46 | 47 | def score_stat_ci( 48 | y_true, 49 | y_preds, 50 | score_fun, 51 | stat_fun=np.mean, 52 | sample_weight=None, 53 | n_bootstraps=2000, 54 | confidence_level=0.95, 55 | seed=None, 56 | reject_one_class_samples=True, 57 | ): 58 | """ 59 | Compute confidence interval for given statistic of a score function based on labels and predictions using 60 | bootstrapping. 61 | :param y_true: 1D list or array of labels. 62 | :param y_preds: A list of lists or 2D array of predictions corresponding to elements in y_true. 63 | :param score_fun: Score function for which confidence interval is computed. (e.g. sklearn.metrics.accuracy_score) 64 | :param stat_fun: Statistic for which confidence interval is computed. (e.g. np.mean) 65 | :param sample_weight: 1D list or array of sample weights to pass to score_fun, see e.g. sklearn.metrics.roc_auc_score. 66 | :param n_bootstraps: The number of bootstraps. (default: 2000) 67 | :param confidence_level: Confidence level for computing confidence interval. (default: 0.95) 68 | :param seed: Random seed for reproducibility. (default: None) 69 | :param reject_one_class_samples: Whether to reject bootstrapped samples with only one label. For scores like AUC we 70 | need at least one positive and one negative sample. (default: True) 71 | :return: Mean score statistic evaluated on labels and predictions, lower confidence interval, upper confidence 72 | interval, array of bootstrapped scores. 73 | """ 74 | 75 | y_true = np.array(y_true) 76 | y_preds = np.atleast_2d(y_preds) 77 | assert all(len(y_true) == len(y) for y in y_preds) 78 | 79 | np.random.seed(seed) 80 | scores = [] 81 | for i in range(n_bootstraps): 82 | readers = np.random.randint(0, len(y_preds), len(y_preds)) 83 | indices = np.random.randint(0, len(y_true), len(y_true)) 84 | if reject_one_class_samples and len(np.unique(y_true[indices])) < 2: 85 | continue 86 | reader_scores = [] 87 | for r in readers: 88 | if sample_weight is not None: 89 | reader_scores.append(score_fun(y_true[indices], y_preds[r][indices], sample_weight=sample_weight[indices])) 90 | else: 91 | reader_scores.append(score_fun(y_true[indices], y_preds[r][indices])) 92 | scores.append(stat_fun(reader_scores)) 93 | 94 | mean_score = np.mean(scores) 95 | sorted_scores = np.array(sorted(scores)) 96 | alpha = (1.0 - confidence_level) / 2.0 97 | ci_lower = sorted_scores[int(round(alpha * len(sorted_scores)))] 98 | ci_upper = sorted_scores[int(round((1.0 - alpha) * len(sorted_scores)))] 99 | return mean_score, ci_lower, ci_upper, scores 100 | 101 | 102 | def pvalue( 103 | y_true, 104 | y_pred1, 105 | y_pred2, 106 | score_fun, 107 | sample_weight=None, 108 | n_bootstraps=2000, 109 | two_tailed=True, 110 | seed=None, 111 | reject_one_class_samples=True, 112 | ): 113 | """ 114 | Compute p-value for hypothesis that score function for model I predictions is higher than for model II predictions 115 | using bootstrapping. 116 | :param y_true: 1D list or array of labels. 117 | :param y_pred1: 1D list or array of predictions for model I corresponding to elements in y_true. 118 | :param y_pred2: 1D list or array of predictions for model II corresponding to elements in y_true. 119 | :param score_fun: Score function for which confidence interval is computed. (e.g. sklearn.metrics.accuracy_score) 120 | :param sample_weight: 1D list or array of sample weights to pass to score_fun, see e.g. sklearn.metrics.roc_auc_score. 121 | :param n_bootstraps: The number of bootstraps. (default: 2000) 122 | :param two_tailed: Whether to use two-tailed test. (default: True) 123 | :param seed: Random seed for reproducibility. (default: None) 124 | :param reject_one_class_samples: Whether to reject bootstrapped samples with only one label. For scores like AUC we 125 | need at least one positive and one negative sample. (default: True) 126 | :return: Computed p-value, array of bootstrapped differences of scores. 127 | """ 128 | 129 | assert len(y_true) == len(y_pred1) 130 | assert len(y_true) == len(y_pred2) 131 | 132 | return pvalue_stat( 133 | y_true=y_true, 134 | y_preds1=y_pred1, 135 | y_preds2=y_pred2, 136 | score_fun=score_fun, 137 | sample_weight=sample_weight, 138 | n_bootstraps=n_bootstraps, 139 | two_tailed=two_tailed, 140 | seed=seed, 141 | reject_one_class_samples=reject_one_class_samples, 142 | ) 143 | 144 | 145 | def pvalue_stat( 146 | y_true, 147 | y_preds1, 148 | y_preds2, 149 | score_fun, 150 | stat_fun=np.mean, 151 | compare_fun=np.subtract, 152 | sample_weight=None, 153 | n_bootstraps=2000, 154 | two_tailed=True, 155 | seed=None, 156 | reject_one_class_samples=True, 157 | ): 158 | """ 159 | Compute p-value for hypothesis that given statistic of score function for model I predictions is higher than for 160 | model II predictions using bootstrapping. 161 | :param y_true: 1D list or array of labels. 162 | :param y_preds1: A list of lists or 2D array of predictions for model I corresponding to elements in y_true. 163 | :param y_preds2: A list of lists or 2D array of predictions for model II corresponding to elements in y_true. 164 | :param score_fun: Score function for which confidence interval is computed. (e.g. sklearn.metrics.accuracy_score) 165 | :param stat_fun: Statistic for which p-value is computed. (e.g. np.mean) 166 | :param compare_fun: Function to determine relative performance. (default: score1 - score2) 167 | :param sample_weight: 1D list or array of sample weights to pass to score_fun, see e.g. sklearn.metrics.roc_auc_score. 168 | :param n_bootstraps: The number of bootstraps. (default: 2000) 169 | :param two_tailed: Whether to use two-tailed test. (default: True) 170 | :param seed: Random seed for reproducibility. (default: None) 171 | :param reject_one_class_samples: Whether to reject bootstrapped samples with only one label. For scores like AUC we 172 | need at least one positive and one negative sample. (default: True) 173 | :return: Computed p-value, array of bootstrapped differences of scores. 174 | """ 175 | 176 | y_true = np.array(y_true) 177 | y_preds1 = np.atleast_2d(y_preds1) 178 | y_preds2 = np.atleast_2d(y_preds2) 179 | assert all(len(y_true) == len(y) for y in y_preds1) 180 | assert all(len(y_true) == len(y) for y in y_preds2) 181 | 182 | np.random.seed(seed) 183 | z = [] 184 | for i in range(n_bootstraps): 185 | readers1 = np.random.randint(0, len(y_preds1), len(y_preds1)) 186 | readers2 = np.random.randint(0, len(y_preds2), len(y_preds2)) 187 | indices = np.random.randint(0, len(y_true), len(y_true)) 188 | if reject_one_class_samples and len(np.unique(y_true[indices])) < 2: 189 | continue 190 | reader1_scores = [] 191 | for r in readers1: 192 | if sample_weight is not None: 193 | reader1_scores.append(score_fun(y_true[indices], y_preds1[r][indices], sample_weight=sample_weight[indices])) 194 | else: 195 | reader1_scores.append(score_fun(y_true[indices], y_preds1[r][indices])) 196 | score1 = stat_fun(reader1_scores) 197 | reader2_scores = [] 198 | for r in readers2: 199 | if sample_weight is not None: 200 | reader2_scores.append(score_fun(y_true[indices], y_preds2[r][indices], sample_weight=sample_weight[indices])) 201 | else: 202 | reader2_scores.append(score_fun(y_true[indices], y_preds2[r][indices])) 203 | score2 = stat_fun(reader2_scores) 204 | z.append(compare_fun(score1, score2)) 205 | 206 | p = percentileofscore(z, 0.0, kind="weak") / 100.0 207 | if two_tailed: 208 | p *= 2.0 209 | return p, z 210 | --------------------------------------------------------------------------------