├── README.md
└── auc.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # metrics
2 | Python implementation of machine learning metrics
3 | 


--------------------------------------------------------------------------------
/auc.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Fast computation of roc auc metric in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Python code to compute the roc auc metric.  That code runs about twcie as fast as the corresponding scikit-learn function."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import numpy as np \n",
 26 |     "from numba import jit\n",
 27 |     "\n",
 28 |     "@jit\n",
 29 |     "def fast_auc(y_true, y_prob):\n",
 30 |     "    y_true = np.asarray(y_true)\n",
 31 |     "    y_true = y_true[np.argsort(y_prob)]\n",
 32 |     "    nfalse = 0\n",
 33 |     "    auc = 0\n",
 34 |     "    n = len(y_true)\n",
 35 |     "    for i in range(n):\n",
 36 |     "        y_i = y_true[i]\n",
 37 |     "        nfalse += (1 - y_i)\n",
 38 |     "        auc += y_i * nfalse\n",
 39 |     "    auc /= (nfalse * (n - nfalse))\n",
 40 |     "    return auc"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "Let's create a random example."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 2,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "y_true = np.random.randint(0,2,1000000)\n",
 59 |     "y_pred = np.random.rand(1000000)"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "The roc auc should be close to 0.5 for random prediction."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "0.501004845745664"
 78 |       ]
 79 |      },
 80 |      "execution_count": 3,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "fast_auc(y_true, y_pred)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "It is the case.  Let's see what scikit-learn code does here."
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 4,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "0.50100484574566395"
105 |       ]
106 |      },
107 |      "execution_count": 4,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "from sklearn.metrics import roc_auc_score\n",
114 |     "\n",
115 |     "roc_auc_score(y_true, y_pred)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "Seems we are in good shape as the result is very close.\n",
123 |     "\n",
124 |     "A little sanity check."
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 5,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "1.0"
136 |       ]
137 |      },
138 |      "execution_count": 5,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "fast_auc(y_true, y_true)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Which one is faster?"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 6,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "10 loops, best of 3: 130 ms per loop\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "%timeit fast_auc(y_true, y_pred)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 7,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "1 loop, best of 3: 275 ms per loop\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "%timeit roc_auc_score(y_true, y_pred)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "My code is more than twice as fast."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {
199 |     "collapsed": true
200 |    },
201 |    "outputs": [],
202 |    "source": []
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "anaconda-cloud": {},
207 |   "kernelspec": {
208 |    "display_name": "Python [conda root]",
209 |    "language": "python",
210 |    "name": "conda-root-py"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.5.1"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 1
227 | }
228 | 


--------------------------------------------------------------------------------