├── README.md ├── SU.py ├── LICENSE └── ML-Example2.ipynb /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Basics of Machine Learning 4 | 5 | Tutorial held at University of Zurich, 23-24 March 2016 6 | 7 | (c) 2016 Jan Šnajder (), FER, University of Zagreb 8 | 9 | Click [here](http://nbviewer.jupyter.org/github/jsnajder/MachineLearningTutorial/blob/master/Machine%20Learning%20Tutorial.ipynb) to open the notebook in your browser or [install the SciPy stack](http://www.scipy.org/install.html) and run the notebook locally. 10 | 11 | 12 | -------------------------------------------------------------------------------- /SU.py: -------------------------------------------------------------------------------- 1 | # Sveuciliste u Zagrebu 2 | # Fakultet elektrotehnike i racunarstva 3 | # 4 | # Strojno ucenje 5 | # http://www.fer.hr/predmet/su 6 | # 7 | # (c) 2015 Jan Snajder 8 | 9 | import pandas as pd 10 | import scipy as sp 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.pipeline import Pipeline 13 | import matplotlib.pyplot as plt 14 | 15 | class MultiColumnLabelEncoder: 16 | def __init__(self,columns = None): 17 | self.columns = columns # array of column names to encode 18 | self.encoders ={} 19 | 20 | def fit(self,X,y=None): 21 | if self.columns is not None: 22 | for colname in self.columns: 23 | self.encoders[colname] = LabelEncoder().fit(X[colname]) 24 | else: 25 | for colname,col in X.iteritems(): 26 | self.encoders[colname] = LabelEncoder().fit(col) 27 | return self 28 | 29 | def transform(self,X): 30 | ''' 31 | Transforms columns of X specified in self.columns using 32 | LabelEncoder(). If no columns specified, transforms all 33 | columns in X. 34 | ''' 35 | output = X.copy() 36 | for colname in self.encoders.keys(): 37 | output[colname] = self.encoders[colname].transform(output[colname]) 38 | return output 39 | 40 | def fit_transform(self,X,y=None): 41 | return self.fit(X,y).transform(X) 42 | 43 | def encoders(self): 44 | return self.encoders 45 | 46 | from sklearn.linear_model import LinearRegression 47 | from sklearn.preprocessing import PolynomialFeatures 48 | 49 | class PolyRegression: 50 | 51 | def __init__(self, order): 52 | self.order = order 53 | self.h = LinearRegression() 54 | 55 | def fit(self, X, y): 56 | Xt = PolynomialFeatures(self.order).fit_transform(X) 57 | self.h.fit(Xt, y) 58 | return self 59 | 60 | def predict(self, X): 61 | Xt = PolynomialFeatures(self.order).fit_transform(X) 62 | return self.h.predict(Xt) 63 | 64 | def __call__(self, x): 65 | return self.predict(x)[0] 66 | 67 | def plot_problem(X, y, h=None, surfaces=True) : 68 | ''' 69 | Plots a two-dimensional labeled dataset (X,y) and, if function h(x) is given, 70 | the decision boundaries (surfaces=False) or decision surfaces (surfaces=True) 71 | ''' 72 | assert X.shape[1] == 2, "Dataset is not two-dimensional" 73 | if h!=None : 74 | # Create a mesh to plot in 75 | r = 0.02 # mesh resolution 76 | x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 77 | y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 78 | xx, yy = sp.meshgrid(sp.arange(x_min, x_max, r), 79 | sp.arange(y_min, y_max, r)) 80 | XX = sp.c_[xx.ravel(), yy.ravel()] 81 | try: 82 | #Z_test = h(XX) 83 | #if sp.shape(Z_test) == () : 84 | # # h returns a scalar when applied to a matrix; map explicitly 85 | # Z = sp.array(map(h,XX)) 86 | #else : 87 | # Z = Z_test 88 | Z = sp.array(map(h,XX)) 89 | except ValueError: 90 | # can't apply to a matrix; map explicitly 91 | Z = sp.array(map(h,XX)) 92 | # Put the result into a color plot 93 | Z = Z.reshape(xx.shape) 94 | if surfaces : 95 | plt.contourf(xx, yy, Z, cmap=plt.cm.Pastel1) 96 | else : 97 | plt.contour(xx, yy, Z) 98 | # Plot the dataset 99 | plt.scatter(X[:,0],X[:,1],c=y, cmap=plt.cm.Paired,marker='o',s=50); 100 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /ML-Example2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example 2: Glass identification" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# Load the basic libraries...\n", 19 | "import scipy as sp\n", 20 | "import pandas as pd\n", 21 | "import sklearn\n", 22 | "%pylab inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "[`Glass Identification Data Set`](https://archive.ics.uci.edu/ml/datasets/Glass+Identification): chemical analysis of 214 glass samples. For the purpose of forensic analysis, we wish to classify these into six classes, each of which corresponds to one glass type." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "D = pd.read_csv(\"/home/jan/Downloads/glass.data\"); D" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "data = D.as_matrix()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "glass_X, glass_y = data[:,0:10], data[:,10]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "from sklearn.svm import SVC\n", 74 | "from sklearn.preprocessing import StandardScaler" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "# Take 1" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "svc = SVC()\n", 93 | "svc.fit(glass_X, glass_y)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "svc.score(glass_X, glass_y)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "# Take 2" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "glass_X, glass_y = data[:,1:10], data[:,10]" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "svc = SVC()\n", 134 | "svc.fit(glass_X,glass_y)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "svc.score(glass_X,glass_y)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "# Take 3" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "from sklearn import cross_validation\n", 164 | "X_train, X_test, y_train, y_test = cross_validation.train_test_split(glass_X,glass_y,train_size=2.0/3,random_state=42)\n", 165 | "print X_train.shape, X_test.shape" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "svc.fit(X_train,y_train)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "svc.score(X_test,y_test)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "# Take 4" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "from sklearn.grid_search import GridSearchCV\n", 206 | "\n", 207 | "param_grid = [{'C': [2**x for x in range(-5,16)], 'gamma': [2**x for x in range(-15,4)]}]\n", 208 | "model = GridSearchCV(SVC(), param_grid)\n", 209 | "scaler = StandardScaler()\n", 210 | "X_train_scaled = scaler.fit_transform(X_train)\n", 211 | "X_test_scaled = scaler.transform(X_test)\n", 212 | "model.fit(X_train_scaled,y_train)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "model.best_estimator_" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "from sklearn.metrics import accuracy_score\n", 235 | "model.score(X_train_scaled,y_train)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "model.score(X_test_scaled,y_test)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "from sklearn.learning_curve import learning_curve" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "(_,_,on_test) = learning_curve(model,X_train,y_train); on_test" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": false 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "avgs = apply_along_axis(mean,1,on_test)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "plot(avgs)" 291 | ] 292 | } 293 | ], 294 | "metadata": { 295 | "kernelspec": { 296 | "display_name": "Python 2", 297 | "language": "python", 298 | "name": "python2" 299 | }, 300 | "language_info": { 301 | "codemirror_mode": { 302 | "name": "ipython", 303 | "version": 2 304 | }, 305 | "file_extension": ".py", 306 | "mimetype": "text/x-python", 307 | "name": "python", 308 | "nbconvert_exporter": "python", 309 | "pygments_lexer": "ipython2", 310 | "version": "2.7.6" 311 | } 312 | }, 313 | "nbformat": 4, 314 | "nbformat_minor": 0 315 | } 316 | --------------------------------------------------------------------------------