├── lec ├── MNK.docx ├── lecture01.pdf ├── lecture02.pdf ├── lecture03.pdf ├── lecture04.pdf ├── lecture05.pdf ├── lecture06.pdf ├── lecture07.pdf ├── lecture08.pdf ├── lecture09.pdf └── lecture10.pdf ├── cheatsheets ├── numpy.pdf ├── pandas.pdf ├── python.pdf ├── scipy.pdf ├── matplotlib.pdf ├── scikit-learn.pdf ├── jupyter-notebook.pdf └── README.md ├── assignments ├── theory01.pdf ├── theory02.pdf ├── programming01 │ ├── programming01.ipynb │ └── test.csv └── programming02 │ ├── programming_assignment_02.ipynb │ └── data_description.txt ├── prac ├── linear_regression.ipynb ├── linear_classifier.ipynb ├── svm.ipynb ├── knn_example.ipynb ├── ensembles.ipynb └── metrics.ipynb └── README.md /lec/MNK.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/MNK.docx -------------------------------------------------------------------------------- /lec/lecture01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture01.pdf -------------------------------------------------------------------------------- /lec/lecture02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture02.pdf -------------------------------------------------------------------------------- /lec/lecture03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture03.pdf -------------------------------------------------------------------------------- /lec/lecture04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture04.pdf -------------------------------------------------------------------------------- /lec/lecture05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture05.pdf -------------------------------------------------------------------------------- /lec/lecture06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture06.pdf -------------------------------------------------------------------------------- /lec/lecture07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture07.pdf -------------------------------------------------------------------------------- /lec/lecture08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture08.pdf -------------------------------------------------------------------------------- /lec/lecture09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture09.pdf -------------------------------------------------------------------------------- /lec/lecture10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/lec/lecture10.pdf -------------------------------------------------------------------------------- /cheatsheets/numpy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/numpy.pdf -------------------------------------------------------------------------------- /cheatsheets/pandas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/pandas.pdf -------------------------------------------------------------------------------- /cheatsheets/python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/python.pdf -------------------------------------------------------------------------------- /cheatsheets/scipy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/scipy.pdf -------------------------------------------------------------------------------- /assignments/theory01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/assignments/theory01.pdf -------------------------------------------------------------------------------- /assignments/theory02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/assignments/theory02.pdf -------------------------------------------------------------------------------- /cheatsheets/matplotlib.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/matplotlib.pdf -------------------------------------------------------------------------------- /cheatsheets/scikit-learn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/scikit-learn.pdf -------------------------------------------------------------------------------- /cheatsheets/jupyter-notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcoursemm/mlcoursemm2019spring/HEAD/cheatsheets/jupyter-notebook.pdf -------------------------------------------------------------------------------- /cheatsheets/README.md: -------------------------------------------------------------------------------- 1 | # Шпаргалки 2 | 3 | В данном разделе собраны краткие справочные материалы ("шпаргалки", cheat sheets) с сайтов [DataCamp](https://www.datacamp.com) и [DataScienceFree](http://www.datasciencefree.com) по языку программирования Python, библиотекам, используемым в машинном обучении, и средствам удобной работы и визуализации. 4 | -------------------------------------------------------------------------------- /prac/linear_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.datasets import load_boston\n", 10 | "from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet\n", 11 | "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", 12 | "from sklearn.model_selection import train_test_split" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "data = load_boston()\n", 30 | "print(data.keys())" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "\".. _boston_dataset:\\n\\nBoston house prices dataset\\n---------------------------\\n\\n**Data Set Characteristics:** \\n\\n :Number of Instances: 506 \\n\\n :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\\n\\n :Attribute Information (in order):\\n - CRIM per capita crime rate by town\\n - ZN proportion of residential land zoned for lots over 25,000 sq.ft.\\n - INDUS proportion of non-retail business acres per town\\n - CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\\n - NOX nitric oxides concentration (parts per 10 million)\\n - RM average number of rooms per dwelling\\n - AGE proportion of owner-occupied units built prior to 1940\\n - DIS weighted distances to five Boston employment centres\\n - RAD index of accessibility to radial highways\\n - TAX full-value property-tax rate per $10,000\\n - PTRATIO pupil-teacher ratio by town\\n - B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\\n - LSTAT % lower status of the population\\n - MEDV Median value of owner-occupied homes in $1000's\\n\\n :Missing Attribute Values: None\\n\\n :Creator: Harrison, D. and Rubinfeld, D.L.\\n\\nThis is a copy of UCI ML housing dataset.\\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/housing/\\n\\n\\nThis dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\\n\\nThe Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\\nprices and the demand for clean air', J. Environ. Economics & Management,\\nvol.5, 81-102, 1978. Used in Belsley, Kuh & Welsch, 'Regression diagnostics\\n...', Wiley, 1980. N.B. Various transformations are used in the table on\\npages 244-261 of the latter.\\n\\nThe Boston house-price data has been used in many machine learning papers that address regression\\nproblems. \\n \\n.. topic:: References\\n\\n - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\\n - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\\n\"" 42 | ] 43 | }, 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "data['DESCR']" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "x = data['data']\n", 60 | "y = data['target']" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "(404, 13) (404,) (102, 13) (102,)\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "trainX, testX, trainY, testY = train_test_split(x, y, random_state=42, test_size=0.2)\n", 78 | "print(trainX.shape, trainY.shape, testX.shape, testY.shape)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 11, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "21.488235294117644" 90 | ] 91 | }, 92 | "execution_count": 11, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "import numpy as np\n", 99 | "np.mean(testY)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "24.291119474973616\n", 112 | "3.189091965887853\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "lr = LinearRegression()\n", 118 | "lr.fit(trainX, trainY)\n", 119 | "y_pred = lr.predict(testX)\n", 120 | "\n", 121 | "print(mean_squared_error(y_pred, testY))\n", 122 | "print(mean_absolute_error(y_pred, testY))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 9, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "24.377609808756368\n", 135 | "3.149302938467151\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "lr = Ridge(alpha=0.5)\n", 141 | "lr.fit(trainX, trainY)\n", 142 | "y_pred = lr.predict(testX)\n", 143 | "\n", 144 | "print(mean_squared_error(y_pred, testY))\n", 145 | "print(mean_absolute_error(y_pred, testY))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 12, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "24.409489761299696\n", 158 | "3.253539767368162\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "lr = Lasso()\n", 164 | "lr.fit(trainX, trainY)\n", 165 | "y_pred = lr.predict(testX)\n", 166 | "\n", 167 | "print(mean_squared_error(y_pred, testY))\n", 168 | "print(mean_absolute_error(y_pred, testY))" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 13, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "23.968733210204782\n", 181 | "3.244000641536975\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "lr = ElasticNet()\n", 187 | "lr.fit(trainX, trainY)\n", 188 | "y_pred = lr.predict(testX)\n", 189 | "\n", 190 | "print(mean_squared_error(y_pred, testY))\n", 191 | "print(mean_absolute_error(y_pred, testY))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python 3", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.7.2" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /prac/linear_classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 35, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.datasets import load_breast_cancer\n", 10 | "from sklearn.model_selection import train_test_split" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 36, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "X, y = load_breast_cancer(return_X_y=True)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 37, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "(455, 30) (455,) (114, 30) (114,)\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "trainX, testX, trainY, testY = train_test_split(X, y, random_state=42, test_size=0.2)\n", 37 | "print(trainX.shape, trainY.shape, testX.shape, testY.shape)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 38, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 50 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 51 | " 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 52 | " 1 1 0]\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(testY)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Logistic Regression" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 42, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 77 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 78 | " 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 79 | " 1 0 0]\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "from sklearn.linear_model import LogisticRegression\n", 85 | "clf = LogisticRegression(random_state=42, solver='liblinear').fit(trainX, trainY)\n", 86 | "out_y = clf.predict(testX)\n", 87 | "print(out_y)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 43, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "[0.16491424 0.83508576]\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "out_prob = clf.predict_proba(testX) \n", 105 | "print(out_prob[0])" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 44, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Logistic score: 0.956140350877193\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "out_score = clf.score(testX, testY)\n", 123 | "print('Logistic score:', out_score)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Perceptron" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 60, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "[0 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0\n", 143 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1\n", 144 | " 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0\n", 145 | " 1 1 0]\n" 146 | ] 147 | }, 148 | { 149 | "name": "stderr", 150 | "output_type": "stream", 151 | "text": [ 152 | "/Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages/sklearn/linear_model/stochastic_gradient.py:166: FutureWarning: max_iter and tol parameters have been added in Perceptron in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.\n", 153 | " FutureWarning)\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "from sklearn.linear_model import Perceptron\n", 159 | "clf = Perceptron(random_state=42).fit(trainX, trainY)\n", 160 | "out_y = clf.predict(testX)\n", 161 | "print(out_y)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 61, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "0.9473684210526315\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "out_score = clf.score(testX, testY)\n", 179 | "print('Perceptron score:', out_score)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## SGD" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 63, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Hinge SGD: [0 0 0 1 1 0 0 0 1 0 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0\n", 199 | " 0 0 0 1 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1 1 0 0 1 0\n", 200 | " 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0\n", 201 | " 1 0 0]\n" 202 | ] 203 | }, 204 | { 205 | "name": "stderr", 206 | "output_type": "stream", 207 | "text": [ 208 | "/Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages/sklearn/linear_model/stochastic_gradient.py:166: FutureWarning: max_iter and tol parameters have been added in SGDClassifier in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.\n", 209 | " FutureWarning)\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "from sklearn.linear_model import SGDClassifier\n", 215 | "# Optimal learning rate: eta = 1.0 / (0.0001 * (t + t0))\n", 216 | "# Losses: \n", 217 | "# ‘log’ loss gives logistic regression, \n", 218 | "# ‘perceptron’ is the linear loss used by the perceptron algorithm\n", 219 | "# 'hinge' means SVM (wait for the next lecture): max(0, 1-y)\n", 220 | "clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1.0, learning_rate='optimal', eta0=1.0, random_state=42).fit(trainX, trainY) \n", 221 | "out_y = clf.predict(testX)\n", 222 | "print(out_y)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 64, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "Hinge SGD: 0.8508771929824561\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "out_score = clf.score(testX, testY)\n", 240 | "print('Hinge SGD:', out_score)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Смотрите другие параметры на страничке класса: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "Python 3", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.7.2" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 2 279 | } 280 | -------------------------------------------------------------------------------- /prac/svm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.datasets import load_breast_cancer\n", 10 | "from sklearn.model_selection import train_test_split" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "X, y = load_breast_cancer(return_X_y=True)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "(455, 30) (455,) (114, 30) (114,)\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "trainX, testX, trainY, testY = train_test_split(X, y, random_state=42, test_size=0.2)\n", 37 | "print(trainX.shape, trainY.shape, testX.shape, testY.shape)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 50 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 51 | " 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 52 | " 1 1 0]\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(testY)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Linear SVM" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 77 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 1\n", 78 | " 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 79 | " 1 1 0]\n" 80 | ] 81 | }, 82 | { 83 | "name": "stderr", 84 | "output_type": "stream", 85 | "text": [ 86 | "/Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages/sklearn/svm/base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", 87 | " \"the number of iterations.\", ConvergenceWarning)\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "from sklearn.svm import LinearSVC\n", 93 | "clf = LinearSVC(random_state=42).fit(trainX, trainY)\n", 94 | "out_y = clf.predict(testX)\n", 95 | "print(out_y)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Можно посмотреть сами значения, по которым принимается решение (берется sign)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "[ 0.69199762 -4.38759904 -1.01912235 1.93128467 1.89234461\n", 115 | " -3.2964059 -5.26378109 -0.40507223 1.70126381 1.27374216\n", 116 | " 1.18599942 -1.25734799 1.34757849 0.71183508 1.5975907\n", 117 | " -0.0931177 1.58225202 1.88117675 1.37601957 -2.6630822\n", 118 | " 0.63153195 1.26480914 -3.93314904 1.40809652 1.47619635\n", 119 | " 2.06311363 1.71906575 1.58762732 1.54328865 -3.88333982\n", 120 | " 1.61655342 1.65903265 1.11010878 0.87730645 1.66028412\n", 121 | " 1.41409448 -1.00371193 1.22816212 -2.6784796 0.91130064\n", 122 | " 1.60156099 -0.93456134 1.87849568 1.27700492 1.75674449\n", 123 | " 1.08976397 1.63900489 1.15090368 1.07731746 1.67746488\n", 124 | " -2.09635816 -3.38571881 1.38992968 1.76618642 1.84212336\n", 125 | " 1.08090589 1.68059145 -5.58415347 0.85295292 1.8812375\n", 126 | " 1.33741148 -2.65809531 -4.32228106 1.23188758 1.42241936\n", 127 | " 1.07108724 -1.67409264 -1.76794146 1.62165487 1.3786163\n", 128 | " 0.02198963 -2.02151827 1.52016319 0.73570675 0.98269703\n", 129 | " 1.66407969 1.11386601 0.52615012 1.46775822 1.54645981\n", 130 | " -0.9525459 1.34676019 0.65319738 -4.30307439 -1.15092702\n", 131 | " 0.48107017 0.20521661 -2.07857954 1.82882935 1.51344997\n", 132 | " 1.18421614 1.00694626 1.01499903 1.3031594 1.70957936\n", 133 | " 1.45616033 -3.09235102 -2.50862498 1.87172908 -1.30467345\n", 134 | " -1.95359287 1.86567599 -3.14184634 -0.93838521 0.68323629\n", 135 | " 0.94823331 1.4522158 -8.79910534 1.26883739 0.8080791\n", 136 | " -1.48407701 1.57092397 0.64272291 -15.02194898]\n", 137 | "[ True True True True True True True True True True True True\n", 138 | " True True True True True True True True True True True True\n", 139 | " True True True True True True True True True True True True\n", 140 | " True True True True True True True True True True True True\n", 141 | " True True True True True True True True True True True True\n", 142 | " True True True True True True True True True True True True\n", 143 | " True True True True True True True True True True True True\n", 144 | " True True True True True True True True True True True True\n", 145 | " True True True True True True True True True True True True\n", 146 | " True True True True True True]\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "import numpy as np\n", 152 | "scores = clf.decision_function(testX)\n", 153 | "print(scores)\n", 154 | "print((scores > 0).astype(np.int32) == out_y)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Можно распечатать коэффициенты линейного классификатора и его свободный член" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "w = [[ 3.43911231e-02 1.94773546e-02 7.65612661e-02 2.75380706e-04\n", 174 | " -6.25380147e-04 -4.17529562e-03 -5.63497625e-03 -2.30135205e-03\n", 175 | " -8.68646191e-04 -1.64102330e-04 1.02674676e-03 6.46553280e-03\n", 176 | " -6.54248109e-03 -9.10916795e-03 -2.66419626e-05 -1.01490971e-03\n", 177 | " -1.24637205e-03 -3.27696470e-04 -2.14486359e-04 -7.67908469e-05\n", 178 | " 3.54632257e-02 -3.85380912e-02 -1.62523046e-02 -6.15154097e-03\n", 179 | " -1.24146310e-03 -1.48087301e-02 -1.73480099e-02 -4.89012805e-03\n", 180 | " -3.60320839e-03 -1.19254538e-03]]\n", 181 | "w0 = [0.00605701]\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "print('w =', clf.coef_)\n", 187 | "print('w0 =', clf.intercept_)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 8, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "Linear score: 0.9210526315789473\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "out_score = clf.score(testX, testY)\n", 205 | "print('Linear score:', out_score)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "## Kernel SVM" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 9, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 225 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 226 | " 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 227 | " 1 0 0]\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "from sklearn.svm import SVC\n", 233 | "clf = SVC(random_state=42, C=1.0, kernel='poly', degree=3, gamma='scale').fit(trainX, trainY)\n", 234 | "out_y = clf.predict(testX)\n", 235 | "print(out_y)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 10, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "Poly score: 0.9649122807017544\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "out_score = clf.score(testX, testY)\n", 253 | "print('Poly score:', out_score)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## См. также" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "* **LibSVM** - https://www.csie.ntu.edu.tw/~cjlin/libsvm/ - с++/Java реализация SVM общего вида со множеством интерфейсов в сторонних ЯП\n", 268 | "* **LibLinear** - https://www.csie.ntu.edu.tw/~cjlin/liblinear/ - версия LibSVM исключительно для линейного ядра (работает в этом случае гораздо быстрее)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "Python 3", 282 | "language": "python", 283 | "name": "python3" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.7.2" 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 2 300 | } 301 | -------------------------------------------------------------------------------- /prac/knn_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Введение в scikit-learn" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Метод k ближайших соседей" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "#### Скачивание данных" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "#! wget https://s3.amazonaws.com/img-datasets/mnist.npz" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import numpy as np\n", 40 | "np.random.seed(123) # for reproducibility" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "#### Разделение данных на обучение и тест" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "['x_test', 'x_train', 'y_train', 'y_test']\n", 60 | "(54000, 28, 28) (6000, 28, 28) (54000,) (6000,)\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "from sklearn.model_selection import train_test_split\n", 66 | "\n", 67 | "\n", 68 | "f = np.load('mnist.npz')\n", 69 | "print(f.files)\n", 70 | "x_train, y_train = f['x_train'], f['y_train']\n", 71 | "\n", 72 | "X_train, X_test, y_train, y_test = train_test_split(\n", 73 | " x_train, y_train, test_size=0.1, random_state=42)\n", 74 | " \n", 75 | "print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "#### Визуализация данных" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADn5JREFUeJzt3X+QVfV5x/HP47Kg4M+NlWwBxTBAh5GKyRadSq2JiTFMUswfcSSZBFtbkonk12SaOjYztX80Y9OqdZpISiqVtJaYmcRKpmJimGQcE0pZrRUQFXSIgshicOpaFZbdp3/sIV3dPd97995zz7m7z/s1s7P3nuece5458Nlz7/3ee77m7gIQz0lVNwCgGoQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQU8rc2VSb5idrRpm7BEJ5U/+rY37U6lm3qfCb2VWS7pDUIekf3f2W1Pona4Yutiua2SWAhG2+pe51G37ab2Ydkr4p6UOSFklaaWaLGn08AOVq5jX/Ukl73f05dz8m6buSVhTTFoBWayb8syS9MOL+/mzZW5jZajPrNbPeAR1tYncAitTyd/vdfZ2797h7T6emtXp3AOrUTPgPSJoz4v7sbBmACaCZ8G+XNN/MzjezqZKulbSpmLYAtFrDQ33uftzM1kj6kYaH+ta7+67COgPQUk2N87v7A5IeKKgXACXi471AUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRV6qW7EU/Hgnm5tdfnd7V03zN2HcqtHd/3fEv3PRFw5geCIvxAUIQfCIrwA0ERfiAowg8ERfiBoBjnR1NS4/iSNPStN3NrDy38VtHtvMV7bv1cbq37Nsb5OfMDQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFBNjfOb2T5J/ZIGJR13954imsLEMedfXkzWvzHrkZbt+2N7lyfrszcfzq0NFt3MBFTEh3ze6+4vF/A4AErE034gqGbD75J+bGaPmtnqIhoCUI5mn/Yvc/cDZnaOpIfM7Cl3f3jkCtkfhdWSdLKmN7k7AEVp6szv7gey332S7pO0dIx11rl7j7v3dGpaM7sDUKCGw29mM8zstBO3JV0paWdRjQForWae9s+UdJ+ZnXicf3X3BwvpCkDLNRx+d39O0oUF9oIKTDlvTrI+eM6ZyfqdszYk60Pj7uj/vTKUfy0ASbrozBeS9c3vuSy3dsbuPQ31NJkw1AcERfiBoAg/EBThB4Ii/EBQhB8Iikt3T3IdixYk63Znf7L+w/l319iDjauf8bj437+UrC/4zH8m62foP4psZ9LhzA8ERfiBoAg/EBThB4Ii/EBQhB8IivADQTHOPwl0vKMrtzb9H44kt934rh8V3U5hFv1lehrt4yX1MVlx5geCIvxAUIQfCIrwA0ERfiAowg8ERfiBoBjnnwSm/1v+3/BWj+O/4ceS9cUPrsmtLVz7RvrBDz/dSEuoE2d+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiq5ji/ma2X9GFJfe5+QbasS9K9kuZK2ifpGnd/pXVtxjZ4+buT9T/uvqekTkZbvOnzyfqCz+ZfW9+LbgbjUs+Z/25JV71t2Y2Strj7fElbsvsAJpCa4Xf3hyW9/XIwKyRtyG5vkHR1wX0BaLFGX/PPdPeD2e2XJM0sqB8AJWn6DT93dyVevpnZajPrNbPeAR1tdncACtJo+A+ZWbckZb/78lZ093Xu3uPuPZ2a1uDuABSt0fBvkrQqu71K0v3FtAOgLDXDb2YbJW2VtNDM9pvZ9ZJukfQBM9sj6f3ZfQATSM1xfndfmVO6ouBeJq29t1+SrA+dMpSsf+bSnybrV5zy+rh7OmH5U+mBmue3zk7WF3x1a8P7btbAlT3pFYbyP0mw/31Tk5ue0mfJ+jv/7hfpfU8AfMIPCIrwA0ERfiAowg8ERfiBoAg/EBSX7i7BWU+mh4223ry2pE5GqzWUN7fCobzOn3Un6yvfuSlZH/T8c9sHZ+xNbntoMD0UeNcnLkvWa3n2d95savsicOYHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAY5y/CJb+dLN/71b9J1k/S9KZ2n5omu+altSscx6913O44/85kfe6U9HHrsPxz26Cnt+3uSJb197/Z3Fd6lyt9OfYycOYHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAY56/TsQ/mXyZ6xW0/SW577pRTkvWhJierXvzgmtxaaorsInQsmJes773unNzaZ1dsTm7b7HEb8sFkvZUu2vapZH2WdpXUST7O/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QVM1xfjNbL+nDkvrc/YJs2c2S/kTS4Wy1m9z9gVY12Q4O/H5nbu2GM58tsZPRFq59I7dW6xMEL33pd5P1ruUHkvUFZ7yYrN8/63s1OpiYFv/8umR93hcOJ+vHC+ylUfWc+e+WdNUYy2939yXZz6QOPjAZ1Qy/uz8s6UgJvQAoUTOv+deY2RNmtt7MziqsIwClaDT8ayXNk7RE0kFJt+ataGarzazXzHoHdLTB3QEoWkPhd/dD7j7o7kOSvi1paWLdde7e4+49nZrWaJ8ACtZQ+M1s5PSpH5W0s5h2AJSlnqG+jZIul3S2me2X9BeSLjezJRoeSdon6dMt7BFAC9QMv7uvHGPxXS3opVIdp5+erA90Vffd8AvXfi5ZP3fof3JrL34lPY6/ec3Xk/WZHenv1J8kS9aHktX29ZGn/yBZP//6fcn68f7+ArtpDT7hBwRF+IGgCD8QFOEHgiL8QFCEHwiKS3dnXv+9hcn6Mx9ZW1Inow0ufi1Zv/EPN+bWLj15oMajp4fyJquXB/O/Bi1Jz285L1mf09/cFN3tgDM/EBThB4Ii/EBQhB8IivADQRF+ICjCDwTFOP8EsHPZP1XdQq4OS58/qpwmO+V967+SrJ/7VxN/HL8WzvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBTj/Jn3fu3nyfr+4/nf/549JeZ34iVp0Ft3ce7/OpZ+7E9t/6Nkfe5f528/d9djyW0n6iXHx4MzPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8EVXOc38zmSPqOpJmSXNI6d7/DzLok3StprqR9kq5x91da12pr/eLCqcn6PV/709zarlXfKLqdCWPHsfS8AJ9/5tqGH/vUj+dPPS5J5/1qR7LuDdaiqOfMf1zSl919kaRLJN1gZosk3Shpi7vPl7Qluw9ggqgZfnc/6O6PZbf7Je2WNEvSCkkbstU2SLq6VU0CKN64XvOb2VxJF0naJmmmux/MSi9p+GUBgAmi7vCb2amSvi/pi+7+6siau7tyXkaZ2Woz6zWz3gEdbapZAMWpK/xm1qnh4N/j7j/IFh8ys+6s3i2pb6xt3X2du/e4e0+nphXRM4AC1Ay/mZmkuyTtdvfbRpQ2SVqV3V4l6f7i2wPQKvV8pfdSSZ+UtMPMHs+W3STpFknfM7PrJf1S0jWtabE9zP5Z/pDWBUNrktseOzt9+eoqp/9e/lT6fdrnt85O1qcftGT9nG82fgns9rzo9+RRM/zu/oikvH/hK4ptB0BZ+IQfEBThB4Ii/EBQhB8IivADQRF+ICgb/mRuOU63Lr/Y4o0OnnTaacn6G8t+q6RORpu+91fJ+uCe50rqBEXY5lv0qh9Jf/giw5kfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Jiiu4SDPX3J+vTNm8vqZPR+M58XJz5gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+IKia4TezOWb2UzN70sx2mdkXsuU3m9kBM3s8+1ne+nYBFKWei3kcl/Rld3/MzE6T9KiZPZTVbnf3v21dewBapWb43f2gpIPZ7X4z2y1pVqsbA9Ba43rNb2ZzJV0kaVu2aI2ZPWFm683srJxtVptZr5n1DuhoU80CKE7d4TezUyV9X9IX3f1VSWslzZO0RMPPDG4dazt3X+fuPe7e06lpBbQMoAh1hd/MOjUc/Hvc/QeS5O6H3H3Q3YckfVvS0ta1CaBo9bzbb5LukrTb3W8bsbx7xGoflbSz+PYAtEo97/ZfKumTknaY2ePZspskrTSzJZJc0j5Jn25JhwBaop53+x+RNNZ83w8U3w6AsvAJPyAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFDm7uXtzOywpF+OWHS2pJdLa2B82rW3du1LordGFdnbee7+G/WsWGr4R+3crNfdeyprIKFde2vXviR6a1RVvfG0HwiK8ANBVR3+dRXvP6Vde2vXviR6a1QlvVX6mh9Adao+8wOoSCXhN7OrzOxpM9trZjdW0UMeM9tnZjuymYd7K+5lvZn1mdnOEcu6zOwhM9uT/R5zmrSKemuLmZsTM0tXeuzabcbr0p/2m1mHpGckfUDSfknbJa109ydLbSSHme2T1OPulY8Jm9llkl6T9B13vyBb9nVJR9z9luwP51nu/mdt0tvNkl6reubmbEKZ7pEzS0u6WtJ1qvDYJfq6RhUctyrO/Esl7XX359z9mKTvSlpRQR9tz90flnTkbYtXSNqQ3d6g4f88pcvprS24+0F3fyy73S/pxMzSlR67RF+VqCL8syS9MOL+frXXlN8u6cdm9qiZra66mTHMzKZNl6SXJM2sspkx1Jy5uUxvm1m6bY5dIzNeF403/EZb5u7vlvQhSTdkT2/bkg+/Zmun4Zq6Zm4uyxgzS/9alceu0Rmvi1ZF+A9ImjPi/uxsWVtw9wPZ7z5J96n9Zh8+dGKS1Ox3X8X9/Fo7zdw81szSaoNj104zXlcR/u2S5pvZ+WY2VdK1kjZV0McoZjYjeyNGZjZD0pVqv9mHN0lald1eJen+Cnt5i3aZuTlvZmlVfOzabsZrdy/9R9JyDb/j/6ykP6+ih5y+3iXpv7OfXVX3Jmmjhp8GDmj4vZHrJb1D0hZJeyT9RFJXG/X2z5J2SHpCw0Hrrqi3ZRp+Sv+EpMezn+VVH7tEX5UcNz7hBwTFG35AUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4L6P0eSVUgGD+hzAAAAAElFTkSuQmCC\n", 93 | "text/plain": [ 94 | "
" 95 | ] 96 | }, 97 | "metadata": { 98 | "needs_background": "light" 99 | }, 100 | "output_type": "display_data" 101 | }, 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "label is 6\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "import matplotlib.pyplot as plt\n", 112 | "%matplotlib inline \n", 113 | "\n", 114 | "index = 105\n", 115 | "\n", 116 | "plt.imshow(X_train[index])\n", 117 | "plt.show()\n", 118 | "print('label is %d' % y_train[index])" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "#### Препроцессинг данных" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "(54000, 784) (6000, 784)\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "x_train = X_train.reshape((len(X_train), -1))\n", 143 | "x_test = X_test.reshape((len(X_test), -1))\n", 144 | "\n", 145 | "print(x_train.shape, x_test.shape)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Обучение модели одного ближайшего соседа" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "from sklearn.neighbors import KNeighborsClassifier" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", 173 | " metric_params=None, n_jobs=-1, n_neighbors=1, p=2,\n", 174 | " weights='uniform')" 175 | ] 176 | }, 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "knn_model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)\n", 184 | "\n", 185 | "knn_model.fit(x_train, y_train)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "#### Тестирование модели" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 8, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "N = 1000\n", 202 | "x_test = x_test[:N]\n", 203 | "y_test = y_test[:N]\n", 204 | "\n", 205 | "y_pred = knn_model.predict(x_test)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 9, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "0.974\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "from sklearn.metrics import accuracy_score\n", 223 | "\n", 224 | "accuracy = accuracy_score(y_test, y_pred)\n", 225 | "\n", 226 | "print(accuracy)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 10, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "0.973\n" 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "knn_model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)\n", 244 | "\n", 245 | "knn_model.fit(x_train, y_train)\n", 246 | "y_pred = knn_model.predict(x_test)\n", 247 | "accuracy = accuracy_score(y_test, y_pred)\n", 248 | "\n", 249 | "print(accuracy)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 11, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "0.976\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "knn_model = KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1)\n", 267 | "\n", 268 | "knn_model.fit(x_train, y_train)\n", 269 | "y_pred = knn_model.predict(x_test)\n", 270 | "accuracy = accuracy_score(y_test, y_pred)\n", 271 | "\n", 272 | "print(accuracy)" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.7.2" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 2 297 | } 298 | -------------------------------------------------------------------------------- /assignments/programming01/programming01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Практическое задание №1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "ФИО: Иванов Иван Иванович\n", 15 | "\n", 16 | "вуз: МГУ \n", 17 | "\n", 18 | "факультет: механико-математический\n", 19 | "\n", 20 | "курс: 3\n", 21 | "\n", 22 | "кафедра (если есть): МаТИС\n", 23 | "\n", 24 | "научный руководитель (если есть): Иванов И.И." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "### Задача: Оценка качества вина по его характеристикам\n", 32 | "### Метрика качества: Accuracy" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "#### 1. Считывание данных (2 балла)\n", 40 | "\n", 41 | "Удобным и самым часто распространнёным форматом для чтения данных является формат csv (comma-separated values).\n", 42 | "Как правило для считывания и препроцессинга данных используется пакет pandas.\n", 43 | "Установить его можно следующей командой" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "! pip install pandas" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 1, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "import pandas as pd\n", 62 | "train_csv_path = 'train.csv'\n", 63 | "test_csv_path = 'test.csv'" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Считывание csv-файла просисходит при помощи функции pandas.read_csv\n", 71 | "Рекомендуем более подробно ознакомиться с аргументами функции тут https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 2, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "train_df = pd.read_csv(train_csv_path)\n", 81 | "test_df = pd.read_csv(test_csv_path)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "type(train_df) " 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "train_df.info()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "train_df.describe()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Тип возвращаемого объекта - DataFrame. Поскольку изучение pandas не является основной целью этого задания. То сразу перейдум к numpy массивам." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 8, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "(, )\n", 128 | "((1279, 12), dtype('float64'), (320, 12), dtype('float64'))\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "train = train_df.values\n", 134 | "test = test_df.values\n", 135 | "print(type(test), type(train))\n", 136 | "print(train.shape, train.dtype, test.shape, test.dtype)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 18, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "((1279, 11), (1279,), (320, 11), (320,))\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "# Последний столбец в массивах train и test - величина, которую мы должны предсказать. \n", 154 | "# Поэтому её надо выделить в отдельный массив.\n", 155 | "\n", 156 | "trainX = # ваш код\n", 157 | "trainY = # ваш код\n", 158 | "\n", 159 | "testX = # ваш код\n", 160 | "testY = # ваш код\n", 161 | "\n", 162 | "print(trainX.shape, trainY.shape, testX.shape, testY.shape)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "На данном шаге у вас должны быть готовы данные для обучения первой модели. " 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "from sklearn.neighbors import KNeighborsClassifier\n", 179 | "from sklearn.metrics import accuracy_score\n", 180 | "\n", 181 | "baseline = KNeighborsClassifier(n_neighbors=1)\n", 182 | "baseline.fit(trainX, trainY)\n", 183 | "baseline_prediction = baseline.predict(testX)\n", 184 | "\n", 185 | "print('accuracy = %.2f' % accuracy_score(testY, baseline_prediction))" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "Можно ли считать этот результат хорошим?\n", 193 | "Какую точность можно получить используя совсем простые модели:\n", 194 | "- случайный выбор ответа\n", 195 | "- константная модель\n", 196 | "?" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "#### 2. Препроцессинг данных (3 балла)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Одна из причин, почему метод ближайшего соседа не даёт хороших результатов, это наличие признаков разного масштаба. \n", 211 | "Поэтому признаки необходимо приводить к одному и тому же диапозону значений.\n", 212 | "Изучите раздел документации\n", 213 | "https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing\n", 214 | "и попробуйте применить к данным все методы из раздела 4.3.1" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 47, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "scaled_trainX = #ваш код\n", 224 | "scaled_testX = # ваш код\n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 48, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "accuracy = 0.64\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "baseline = KNeighborsClassifier(n_neighbors=1)\n", 242 | "baseline.fit(scaled_trainX, trainY)\n", 243 | "baseline_prediction = baseline.predict(scaled_testX)\n", 244 | "\n", 245 | "print('accuracy = %.2f' % accuracy_score(testY, baseline_prediction))" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "#### 3. Подбор параметров для метода ближайшего соседа (5 баллов)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "1.Подберите лучшие параметры для метода ближайших соседей использую 5-fold кроссвалидацию.\n", 267 | "Изучите главы документации:\n", 268 | "https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation\n", 269 | "https://scikit-learn.org/stable/modules/grid_search.html#grid-search\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# ваш код" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "После подбора оптимальных параметров обучите модель с этими параметрами на всем обучающей выборке и посмотрите качество на тесте." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# ваш код" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "#### 4. Линейная регрессия (4 балла)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "Напишите код функций для нахождения весов линейной регрессии (вспомните формулы из лекции 3), а также для применения этих весов (предсказание).\n", 309 | "\n", 310 | "Если эти функции будут правильно реализованы, то в дальнейшем можно будет применять класс MyLinearRegression со стандартным scikit-learn интерфейсом, в котором нас интересуют два метода: fit и predict." 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "import numpy as np\n", 320 | "def solve_linear_regression(X, y):\n", 321 | " # ваш код\n", 322 | " # return weights, bias\n", 323 | "\n", 324 | "def make_prediction(weights, bias, testX):\n", 325 | " #ваш код\n", 326 | "\n", 327 | "class MyLinearRegression():\n", 328 | " def __init__(self,):\n", 329 | " self.name = 'LinearRegression'\n", 330 | " \n", 331 | " def fit(self, X, Y):\n", 332 | " self.weights, self.bias = solve_linear_regression(X, Y)\n", 333 | " print(self.weights)\n", 334 | " return self\n", 335 | " \n", 336 | " def predict(self, X):\n", 337 | " return make_prediction(self.weights, self.bias, X) " 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "Протестируйте ваш класс на предложенном датасете" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "baseline = MyLinearRegression()\n", 354 | "baseline.fit(scaled_trainX, trainY)\n", 355 | "baseline_prediction = baseline.predict(scaled_testX)\n", 356 | "# для подсчета точности необходимо сделать округление до ближайшего целого\n", 357 | "#ваш код" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Сравните результаты вашего класса и аналогичного из scikit-learn\n", 365 | "sklearn.linear_model.LinearRegression" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "#ваш код и заключение" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "#### 5. Лучшая модель (3 балла )\n" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "Попробуйте улучшить результат на тестовом датасете, используя все пройденные до 15 марта (первые 4 лекции) алгоритмы машинного обучения и любые методы препроцессинга и постпроцессинга. Если параметры не дефолтные, то подбр параметров должен осуществлять путём кросс-валидации без привлечения тестового датасета." 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "# ваш код " 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 2", 411 | "language": "python", 412 | "name": "python2" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 2 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython2", 424 | "version": "2.7.12" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 2 429 | } 430 | -------------------------------------------------------------------------------- /assignments/programming02/programming_assignment_02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Практическое задание №2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "ФИО: Иванов Иван Иванович\n", 15 | "\n", 16 | "вуз: МГУ \n", 17 | "\n", 18 | "факультет: механико-математический\n", 19 | "\n", 20 | "курс: 3\n", 21 | "\n", 22 | "кафедра (если есть): МаТИС\n", 23 | "\n", 24 | "научный руководитель (если есть): Иванов И.И." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "#### Данное задание подготовлено с целью ознакомить слушателей с процедурой подготовки данных для алгоритмов машнинного обучения на примере задачи оценки цены недвижимости. Описание данных можно найти в data_description.txt" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### 1. Считывание данных (1 балл)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Считайте данные из файлов train.csv и test.csv в массивы:\n", 46 | "trainX (содержит признаки обучающего множества)\n", 47 | "trainY (содержит правильные ответа для обучающего множества)\n", 48 | "testX (содержит признаки для тестового множества)\n", 49 | "\n", 50 | "Первый столбец содержит порядковый номер объекта, поэтому его рекомендуется сразу удалить" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 16, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "((1460, 79), (1460,), (1459, 79))\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "# ваш код\n", 68 | "\n", 69 | "print(trainX.shape, trainY.shape, testX.shape)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 17, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "[60 'RL' 65.0 8450 'Pave' nan 'Reg' 'Lvl' 'AllPub' 'Inside' 'Gtl'\n", 82 | " 'CollgCr' 'Norm' 'Norm' '1Fam' '2Story' 7 5 2003 2003 'Gable' 'CompShg'\n", 83 | " 'VinylSd' 'VinylSd' 'BrkFace' 196.0 'Gd' 'TA' 'PConc' 'Gd' 'TA' 'No'\n", 84 | " 'GLQ' 706 'Unf' 0 150 856 'GasA' 'Ex' 'Y' 'SBrkr' 856 854 0 1710 1 0 2 1\n", 85 | " 3 1 'Gd' 8 'Typ' 0 nan 'Attchd' 2003.0 'RFn' 2 548 'TA' 'TA' 'Y' 0 61 0 0\n", 86 | " 0 0 nan nan nan 0 2 2008 'WD' 'Normal']\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "print(trainX[0])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### 2. Предварительная обработка данных" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Как правило \"реальные\" данные содержат пропущенные значения и прочие нечисловые признаки, поэтому прежде чем запустить методы fit и predict модели, необходимо сделать все признаки числовыми." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "#### 2.1 Обработка пропущенных значений (2 балла)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Изучите раздел документации https://scikit-learn.org/stable/modules/impute.html#impute и параметры классов SimpleImputer и MissingIndicator" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 18, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "from sklearn.impute import MissingIndicator, SimpleImputer" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 19, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "6965\n", 141 | "7000\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "indicator = MissingIndicator()\n", 147 | "train_mask_missing_values_only = indicator.fit_transform(trainX)\n", 148 | "test_mask_missing_values_only = indicator.fit_transform(testX)\n", 149 | "print(np.sum(train_mask_missing_values_only))\n", 150 | "print(np.sum(test_mask_missing_values_only))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "В данных были обнаружены пропущенные значение. \n", 158 | "При помощи класса MissingIndicator устраните пропущенные значения, использовав самый часто встречаемый элемент." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 20, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "# ваш код\n", 168 | "\n", 169 | "trainX_without_missing = # ваш код\n", 170 | "testX_without_missing = # ваш код" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 21, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "name": "stdout", 180 | "output_type": "stream", 181 | "text": [ 182 | "0\n", 183 | "0\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "indicator = MissingIndicator()\n", 189 | "train_mask_missing_values_only = indicator.fit_transform(trainX_without_missing)\n", 190 | "test_mask_missing_values_only = indicator.fit_transform(testX_without_missing)\n", 191 | "print(np.sum(train_mask_missing_values_only))\n", 192 | "print(np.sum(test_mask_missing_values_only))" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Пропущенные значения удалены." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 22, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "[60 'RL' 65.0 8450 'Pave' 'Grvl' 'Reg' 'Lvl' 'AllPub' 'Inside' 'Gtl'\n", 212 | " 'CollgCr' 'Norm' 'Norm' '1Fam' '2Story' 7 5 2003 2003 'Gable' 'CompShg'\n", 213 | " 'VinylSd' 'VinylSd' 'BrkFace' 196.0 'Gd' 'TA' 'PConc' 'Gd' 'TA' 'No'\n", 214 | " 'GLQ' 706 'Unf' 0 150 856 'GasA' 'Ex' 'Y' 'SBrkr' 856 854 0 1710 1 0 2 1\n", 215 | " 3 1 'Gd' 8 'Typ' 0 'Gd' 'Attchd' 2003.0 'RFn' 2 548 'TA' 'TA' 'Y' 0 61 0\n", 216 | " 0 0 0 'Gd' 'MnPrv' 'Shed' 0 2 2008 'WD' 'Normal']\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "print(trainX_without_missing[0])" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "#### 2.2 Обработка категориальных значений (2 балла)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "Изучите раздел документации https://scikit-learn.org/stable/modules/preprocessing.html#encoding-categorical-features\n", 236 | "\n", 237 | "При помощи класса OrdinalEncoder удалите категориальные признаки." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "#ваш код\n", 247 | "trainX_without_missing_and_cat = #ваш код\n", 248 | "testX_without_missing_and_cat = # ваш код" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 36, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "[ 5. 3. 41. 619. 1. 0. 3. 3. 0. 4. 0. 5. 2. 2.\n", 261 | " 0. 5. 6. 4. 110. 53. 1. 1. 12. 13. 1. 148. 2. 4.\n", 262 | " 2. 2. 3. 3. 2. 512. 5. 0. 107. 261. 1. 0. 1. 4.\n", 263 | " 197. 356. 0. 766. 1. 0. 2. 1. 3. 1. 2. 6. 6. 0.\n", 264 | " 2. 1. 94. 1. 2. 283. 4. 4. 2. 0. 52. 0. 0. 0.\n", 265 | " 0. 2. 2. 2. 0. 1. 2. 8. 4.]\n" 266 | ] 267 | } 268 | ], 269 | "source": [ 270 | "print(trainX_without_missing_and_cat[0])" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "Теперь данные готовы для обучения моделей" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "Опишите достоинства и недостатки данного метода обработки категориальных признаков" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "ваш ответ\n" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "### 3. Разделение данных на обучение и валидацию (1 балл)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "Разделите обучающее множество на обучение(75%) и валидацию(25%), воспользовавшись функцией train_test_split с random_state=42" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "# ваш код" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 46, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "((1095, 79), (365, 79), (1095,), (365,))\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "### 4. Обучение моделей (4 балла)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "Данные готовы для обучения алгоритмов машинного обучения. В качестве базовой можеди возьмём линейную регрессию, меткрика качества - mean_squared_log_error" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 64, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "from sklearn.linear_model import LinearRegression, Lasso\n", 355 | "from sklearn.metrics import mean_squared_log_error" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 66, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "name": "stdout", 365 | "output_type": "stream", 366 | "text": [ 367 | "0.3831817295997725\n" 368 | ] 369 | } 370 | ], 371 | "source": [ 372 | "lr = LinearRegression()\n", 373 | "lr.fit(X_train, y_train)\n", 374 | "y_pred = lr.predict(X_val)\n", 375 | "y_pred[y_pred < 0.0] = 0.0\n", 376 | "\n", 377 | "print(mean_squared_log_error(y_val, y_pred))" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "Текущую модель можно существенно улучшить. Добейтесь на валидации ошибки меньше 0.03 без каких-либо ограничений на алгоритмы" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# ваш код" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "### 5. Способы улучшения качества модели" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "1. Применить другую стратегию обработки пропущенных значений (1 балл + 1 балл, если получили прирост качества для алгоритма с ошибкой меньше 0.03 )\n", 415 | "\n", 416 | "2. Добавить бинарные признаки отсутствия/присутствия значения для столбцов, где есть пропущенные значения (1 балл)\n", 417 | "\n", 418 | "3. Применить стратегию OneHotEncoder для обработки категориальных признаков (2 балла)\n", 419 | "\n", 420 | "4. Применить PCA (3 балла)\n" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "Реализуйте не менее двух идей из предложенных выше" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [] 436 | } 437 | ], 438 | "metadata": { 439 | "kernelspec": { 440 | "display_name": "Python 3", 441 | "language": "python", 442 | "name": "python3" 443 | }, 444 | "language_info": { 445 | "codemirror_mode": { 446 | "name": "ipython", 447 | "version": 2 448 | }, 449 | "file_extension": ".py", 450 | "mimetype": "text/x-python", 451 | "name": "python", 452 | "nbconvert_exporter": "python", 453 | "pygments_lexer": "ipython2", 454 | "version": "2.7.12" 455 | } 456 | }, 457 | "nbformat": 4, 458 | "nbformat_minor": 2 459 | } 460 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [EN version](#en) 2 | 3 | # Введение в компьютерный интеллект. Машинное обучение. 4 | 5 | ## Содержание 6 | * [Новости](#news) 7 | * [Краткая информация](#info) 8 | * [Время и место](#ww) 9 | * [Связь с преподавателями](#feedback) 10 | * [Результаты выполнения заданий](#marks) 11 | * [Программа курса](#program) 12 | * [Список литературы](#lit) 13 | * [Полезные ссылки](#links) 14 | ## Новости 15 | * Подведены итоги курса - см. [детальную разбалловку](https://docs.google.com/spreadsheets/d/19s51nGSJ_-_K8niZpovrUZ5fPiDgU2gfZn_4Rik6Z8w/edit?ts=5c65b52b#gid=0) 16 | * Выложена [десятая лекция](lec/) и [соответствующий ноутбук](prac/) 17 | * Запустили соревнование по машинному обучению https://www.kaggle.com/t/af6a791ec09847f390bddbb03a86cea3 18 | * Выложены лекции с [седьмой по девятую](lec/) 19 | * Выложено [второе практическое задание](assignments/programming02). Дедлайн для получения полного балла - 17 мая 2019 20 | * Второе [теоретическое задание](assignments/theory02.pdf). Дедлайн для получения полного балла - 10 мая 2019 21 | * Выложены лекции с [четвертой по шестую](lec/) и [ноутбуки](prac/) с пятой и шестой лекций 22 | * Выложено [первое практическое задание](assignments/programming01). Дедлайн для получения полного балла - 08 апреля 2019 23 | * Третья [лекция](lec/lecture03.pdf) и [ноутбуки](prac/) с третьего занятия 24 | * Вторая [лекция](lec/lecture02.pdf), первое [теоретическое задание](assignments/theory01.pdf) (дедлайн для получения полного балла - 01 апреля 2019) и [ноутбуки](prac/) со второго занятия 25 | * Первая [лекция](lec/lecture01.pdf) 26 | * Первая лекция состоится в пятницу, 15 февраля, в 18:30 в аудитории 1225 (ГЗ МГУ). 27 | ## Краткая информация 28 | В весеннем семестре 2019 года на механико-математическом факультете МГУ им. М. В. Ломоносова начинается чтение нового спецкурса по выбору студента, посвященного классическим алгоритмам машинного обучения. 29 | 30 | Курс будет читаться на базе кафедры [Математической Теории Интеллектуальных Систем](http://intsys.msu.ru) под руководством д.ф.-м.н., профессора [Бабина Д. Н.](http://intsys.msu.ru/staff/babin/) Курс будут читать к.ф.-м.н. Петюшко А. А. и к.ф.-м.н. Иванов И. Е. 31 | ## Время и место 32 | Курс читается по пятницам, 18:30, ГЗ МГУ, аудитория 1225. 33 | ## Связь с преподавателями 34 | * [Telegram-канал](https://t.me/joinchat/AAAAAEUmx5cJLOdLXsOt8g), в котором будут появляться все важные новости 35 | * Обратная связь - по почте mlcoursemm@gmail.com 36 | * Ну и всегда можно написать в [issues](https://github.com/mlcoursemm/mlcoursemm2019spring/issues) :) 37 | ## Результаты выполнения заданий 38 | * Соревнование по машинному обучению 39 | * Разбалловка, сроки проведения и правила участия можно найти [тут](https://www.kaggle.com/t/af6a791ec09847f390bddbb03a86cea3) 40 | * Второе практическое задание 41 | * Сроки сдачи: дедлайн 17 мая, если будет сдача после него - баллы умножаются на 0.5 42 | * Разбалловка указана в самом ноутбуке 43 | * Решения присылать на почту курса mlcoursemm@gmail.com 44 | * Формат: ноутбук с Вашим кодом 45 | * Второе теоретическое задание 46 | * Сроки сдачи: дедлайн 10 мая, если будет сдача после него - баллы умножаются на 0.5 47 | * Все остальные условия - как и в первом теоретическом задании 48 | * Первое практическое задание 49 | * Сроки сдачи: дедлайн 8 апреля, если будет сдача после него - баллы умножаются на 0.5 50 | * Разбалловка указана в самом ноутбуке 51 | * Решения присылать на почту курса mlcoursemm@gmail.com 52 | * Формат: ноутбук с Вашим кодом 53 | * Первое теоретическое задание 54 | * Сроки сдачи: дедлайн 1 апреля, если будет сдача после него - баллы умножаются на 0.5 55 | * Разбалловка: 1 балл за задачу, если в задаче подпункты - то по баллу за каждый подпункт 56 | * Решения присылать на почту курса mlcoursemm@gmail.com 57 | * Формат: лучше пдф-кой (сделать в ТеХе), но можно и просто сфотографировать решение на камеру 58 | * Итоговая табличка: Ссылка на [гуглдокс](https://docs.google.com/spreadsheets/d/19s51nGSJ_-_K8niZpovrUZ5fPiDgU2gfZn_4Rik6Z8w/edit?ts=5c65b52b#gid=0) 59 | ## Программа курса 60 | Тема 1. 61 | Постановка задач машинного обучения. Понятие искусственного интеллекта. 62 | 63 | Тема 2. 64 | Валидация алгоритмов, различные типы валидации. 65 | 66 | Тема 3. 67 | Переобучение. Недообучение. Декомпозиция ошибки на смещение и разброс. 68 | 69 | Тема 4. 70 | Методы классификации. Байесовский подход. 71 | 72 | Тема 5. 73 | Введение в язык программирования Python. Обзор библиотеки scikit-learn. 74 | 75 | Тема 6. 76 | Методы классификации. Метрические методы. 77 | 78 | Тема 7. 79 | Стохастический градиентный спуск. 80 | 81 | Тема 8. 82 | Методы классификации. Линейные классификаторы. 83 | 84 | Тема 9. 85 | Введение в искусственные нейронные сети. Теорема Новикова. 86 | 87 | Тема 10. 88 | Машины опорных векторов. 89 | 90 | Тема 11. 91 | Решающие деревья и логические методы классификации. 92 | 93 | Тема 12. 94 | Методы восстановления регрессии. Линейные модели. 95 | 96 | Тема 13. 97 | Методы восстановления регрессии. Нелинейные модели. 98 | 99 | Тема 14. 100 | Композиции алгоритмов машинного обучения. Композиции классификаторов. 101 | 102 | Тема 15. 103 | Тестирование алгоритмов, метрики качества. 104 | 105 | Тема 16. 106 | Задачи компьютерного зрения. 107 | ## Список литературы 108 | 1. [Курс лекций по машинному обучению](http://www.machinelearning.ru/wiki/index.php?title=Машинное_обучение_%28курс_лекций%2C_К.В.Воронцов%29) на http://www.machinelearning.ru от Воронцова К. В. 109 | 1. Hastie, T. and Tibshirani, R. and Friedman, J. [The Elements of Statistical Learning](https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdf), 2nd edition, Springer, 2009. 110 | 2. Bishop, C.M. [Pattern Recognition and Machine Learning](https://www.microsoft.com/en-us/research/uploads/prod/2006/01/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf), Springer, 2006. 111 | ## Полезные ссылки 112 | ### Шпаргалки 113 | * Краткая справочная информация по Python, NumPy, SciPy, SciKit-learn, Pandas, MatPlotLib, Jupyter Notebook: см. в [соответствующей папке](./cheatsheets/) курса 114 | ### Введение в Python 115 | * Python Programming in 15 min: [Part1](https://towardsdatascience.com/python-programming-in-15-min-part-1-3ad2d773834c), [Part2](https://towardsdatascience.com/python-programming-in-15-min-part-2-480f78713544), [Part3](https://towardsdatascience.com/python-programming-in-15-min-part-3-ce882f9ab9b2) 116 | * Python Programmin - A Modern Approach: [Code, notebooks and slides](https://github.com/vamsi/python-programming-modern-approach) 117 | * Playground and Cheatsheet for Learning Python: [github repo](https://github.com/trekhleb/learn-python) 118 | ### Введение в машинное обучение 119 | * Homemade Machine Learning: [github repo](https://github.com/trekhleb/homemade-machine-learning) 120 | * Machine learning: [Курс](https://www.coursera.org/learn/machine-learning) Andrew Ng на площадке https://www.coursera.org 121 | 122 | 123 | ### EN version 124 | 125 | # Introduction to Computer Intelligence. Machine learning. 126 | 127 | ## Content 128 | * [News](#news1) 129 | * [Short info](#info1) 130 | * [Time and place](#ww1) 131 | * [Communication with teachers](#feedback1) 132 | * [Task results](#marks1) 133 | * [Course program](#program1) 134 | * [Bibliography](#lit1) 135 | * [Useful links](#links1) 136 | ## News 137 | * Summed up the results - see the [detailed table](https://docs.google.com/spreadsheets/d/19s51nGSJ_-_K8niZpovrUZ5fPiDgU2gfZn_4Rik6Z8w/edit?ts=5c65b52b#gid=0) 138 | * Uploaded the [10th lecture](lec/) and [corresponding notebook](prac/) 139 | * Launched a machine learning competition on [kaggle](https://www.kaggle.com/t/af6a791ec09847f390bddbb03a86cea3) 140 | * Lectures are uploaded: [7th through 9th](lec/) 141 | * Uploaded the [second practical task](assignments/programming02). Deadline for getting the Full Score - May 17, 2019 142 | * Uploaded the [second theoretical task](assignments/theory02.pdf). Deadline for getting the Full Score - May 10, 2019 143 | * Lectures are uploaded: [from 4th to 6th](lec/) and [notebooks](prac/) with the 5th and 6th lectures 144 | * Uploaded the [first practical task](assignments/programming01). Deadline for getting the Full Score - April 08, 2019 145 | * Third [lecture](lec/lecture03.pdf) and [notebooks](prac/) from the third lesson are uploaded 146 | * Second [lecture](lec/lecture02.pdf), first [theoretical task](assignments/theory01.pdf) (deadline for getting the full score - April 01, 2019) and [notebooks](prac/) from the second lesson are uploaded 147 | * First [lecture](lec/lecture01.pdf) 148 | * The first lecture will take place on Friday 15, February, at 6:30 pm in room 1225 (main bilding MSU). 149 | ## Short info 150 | In the spring semester of 2019 at the Faculty of Mechanics and Mathematics of Lomonosov Moscow State University begins reading a new special course of the student's choice, dedicated to classical machine learning algorithms. 151 | 152 | The course will be taught on the basis of the department [Mathematical Theory of Intelligent Systems](http://intsys.msu.ru) under the guidance of Doctor of Physical and Mathematical Sciences, Professor [Babin D.N.](http://intsys.msu.ru/staff/babin/) The course will be delivered by Ph.D. [Petiushko A.A.](https://petiushko.info) and Ph.D. Ivanov I.E. 153 | ## Time and place 154 | The lessons are to be taught on Fridays, 18:30, main bilding MSU, room 1225. 155 | ## Communication with teachers 156 | * [Telegram-channel](https://t.me/joinchat/AAAAAEUmx5cJLOdLXsOt8g), where all important news will appear 157 | * Feedback - by email mlcoursemm@gmail.com 158 | * Well, you can always write in [issues](https://github.com/mlcoursemm/mlcoursemm2019spring/issues) :) 159 | ## Task results 160 | * Machine learning competition 161 | * The resulting table, timing and rules of participation can be found [here](https://www.kaggle.com/t/af6a791ec09847f390bddbb03a86cea3) 162 | * Second practical task 163 | * Timing: deadline on May 17, if there is a delivery after it - points are multiplied by 0.5 164 | * Each task scoring is indicated in the notebook itself 165 | * Send solutions to the course email mlcoursemm@gmail.com 166 | * Format: notebook with your code 167 | * Second theoretical task 168 | * Timing: deadline on May 10, if there is a delivery after it - points are multiplied by 0.5 169 | * All other conditions are the same as in the first theoretical task 170 | * First practical task 171 | * Timing: deadline on April 8, if there is a delivery after it - points are multiplied by 0.5 172 | * Each task scoring is indicated in the notebook itself 173 | * Send solutions to the course email mlcoursemm@gmail.com 174 | * Format: notebook with your code 175 | * The first theoretical task 176 | * Timing: deadline on April 1, if there is delivery after it - points are multiplied by 0.5 177 | * Scoring: 1 point for the task, if there are sub-points in the task, then 1 point for each sub-task 178 | * Send solutions to the course email mlcoursemm@gmail.com 179 | * Format: it is better pdf (done in TeX), but you can just take a picture of the solution with a camera 180 | * Summary table: Link to [googledocs](https://docs.google.com/spreadsheets/d/19s51nGSJ_-_K8niZpovrUZ5fPiDgU2gfZn_4Rik6Z8w/edit?ts=5c65b52b#gid=0) 181 | ## Course program 182 | Topic 1. 183 | Machine learning problems statement. Artificial intelligence concept. 184 | 185 | Topic 2. 186 | Validation of algorithms, various types of validation. 187 | 188 | Topic 3. 189 | Overfitting. Underfitting. Decomposition of the error into bias and variance. 190 | 191 | Topic 4. 192 | Classification methods. Bayesian approach. 193 | 194 | Topic 5. 195 | An introduction to the Python programming language. An overview of the scikit-learn library. 196 | 197 | Topic 6. 198 | Classification methods. Metric methods. 199 | 200 | Topic 7. 201 | Stochastic gradient descent. 202 | 203 | Topic 8. 204 | Classification methods. Linear classifiers. 205 | 206 | Topic 9. 207 | An introduction to artificial neural networks. Novikov's theorem. 208 | 209 | Topic 10. 210 | Support vector machines. 211 | 212 | Topic 11. 213 | Decision trees and logical classification methods. 214 | 215 | Topic 12. 216 | Regression. Linear models. 217 | 218 | Topic 13. 219 | Regression. Non-linear models. 220 | 221 | Topic 14. 222 | Compositions of machine learning algorithms. Compositions of classifiers. 223 | 224 | Topic 15. 225 | Algorithm testing, quality metrics. 226 | 227 | Topic 16. 228 | Computer vision tasks. 229 | ## Bibliography 230 | 1. [Machine Learning Lecture Course](http://www.machinelearning.ru/wiki/index.php?title=Машинное_обучение_%28курс_лекций%2C_К.В.Воронцов%29) on http://www.machinelearning.ru from Vorontsov K.V. 231 | 1. Hastie, T. and Tibshirani, R. and Friedman, J. [The Elements of Statistical Learning](https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdf), 2nd edition, Springer, 2009. 232 | 2. Bishop, C.M. [Pattern Recognition and Machine Learning](https://www.microsoft.com/en-us/research/uploads/prod/2006/01/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf), Springer, 2006. 233 | ## Useful links 234 | ### Cheat sheets 235 | * Quick reference information on Python, NumPy, SciPy, SciKit-learn, Pandas, MatPlotLib, Jupyter Notebook: see in [corresponding folder](./cheatsheets/) course 236 | ### Introduction to Python 237 | * Python Programming in 15 min: [Part1](https://towardsdatascience.com/python-programming-in-15-min-part-1-3ad2d773834c), [Part2](https://towardsdatascience.com/python-programming-in-15-min-part-2-480f78713544), [Part3](https://towardsdatascience.com/python-programming-in-15-min-part-3-ce882f9ab9b2) 238 | * Python Programming - A Modern Approach: [Code, notebooks and slides](https://github.com/vamsi/python-programming-modern-approach) 239 | * Playground and Cheatsheet for Learning Python: [github repo](https://github.com/trekhleb/learn-python) 240 | ### Introduction to machine learning 241 | * Homemade Machine Learning: [github repo](https://github.com/trekhleb/homemade-machine-learning) 242 | * Machine learning: [Course](https://www.coursera.org/learn/machine-learning) by Andrew Ng on the site https://www.coursera.org 243 | -------------------------------------------------------------------------------- /assignments/programming02/data_description.txt: -------------------------------------------------------------------------------- 1 | MSSubClass: Identifies the type of dwelling involved in the sale. 2 | 3 | 20 1-STORY 1946 & NEWER ALL STYLES 4 | 30 1-STORY 1945 & OLDER 5 | 40 1-STORY W/FINISHED ATTIC ALL AGES 6 | 45 1-1/2 STORY - UNFINISHED ALL AGES 7 | 50 1-1/2 STORY FINISHED ALL AGES 8 | 60 2-STORY 1946 & NEWER 9 | 70 2-STORY 1945 & OLDER 10 | 75 2-1/2 STORY ALL AGES 11 | 80 SPLIT OR MULTI-LEVEL 12 | 85 SPLIT FOYER 13 | 90 DUPLEX - ALL STYLES AND AGES 14 | 120 1-STORY PUD (Planned Unit Development) - 1946 & NEWER 15 | 150 1-1/2 STORY PUD - ALL AGES 16 | 160 2-STORY PUD - 1946 & NEWER 17 | 180 PUD - MULTILEVEL - INCL SPLIT LEV/FOYER 18 | 190 2 FAMILY CONVERSION - ALL STYLES AND AGES 19 | 20 | MSZoning: Identifies the general zoning classification of the sale. 21 | 22 | A Agriculture 23 | C Commercial 24 | FV Floating Village Residential 25 | I Industrial 26 | RH Residential High Density 27 | RL Residential Low Density 28 | RP Residential Low Density Park 29 | RM Residential Medium Density 30 | 31 | LotFrontage: Linear feet of street connected to property 32 | 33 | LotArea: Lot size in square feet 34 | 35 | Street: Type of road access to property 36 | 37 | Grvl Gravel 38 | Pave Paved 39 | 40 | Alley: Type of alley access to property 41 | 42 | Grvl Gravel 43 | Pave Paved 44 | NA No alley access 45 | 46 | LotShape: General shape of property 47 | 48 | Reg Regular 49 | IR1 Slightly irregular 50 | IR2 Moderately Irregular 51 | IR3 Irregular 52 | 53 | LandContour: Flatness of the property 54 | 55 | Lvl Near Flat/Level 56 | Bnk Banked - Quick and significant rise from street grade to building 57 | HLS Hillside - Significant slope from side to side 58 | Low Depression 59 | 60 | Utilities: Type of utilities available 61 | 62 | AllPub All public Utilities (E,G,W,& S) 63 | NoSewr Electricity, Gas, and Water (Septic Tank) 64 | NoSeWa Electricity and Gas Only 65 | ELO Electricity only 66 | 67 | LotConfig: Lot configuration 68 | 69 | Inside Inside lot 70 | Corner Corner lot 71 | CulDSac Cul-de-sac 72 | FR2 Frontage on 2 sides of property 73 | FR3 Frontage on 3 sides of property 74 | 75 | LandSlope: Slope of property 76 | 77 | Gtl Gentle slope 78 | Mod Moderate Slope 79 | Sev Severe Slope 80 | 81 | Neighborhood: Physical locations within Ames city limits 82 | 83 | Blmngtn Bloomington Heights 84 | Blueste Bluestem 85 | BrDale Briardale 86 | BrkSide Brookside 87 | ClearCr Clear Creek 88 | CollgCr College Creek 89 | Crawfor Crawford 90 | Edwards Edwards 91 | Gilbert Gilbert 92 | IDOTRR Iowa DOT and Rail Road 93 | MeadowV Meadow Village 94 | Mitchel Mitchell 95 | Names North Ames 96 | NoRidge Northridge 97 | NPkVill Northpark Villa 98 | NridgHt Northridge Heights 99 | NWAmes Northwest Ames 100 | OldTown Old Town 101 | SWISU South & West of Iowa State University 102 | Sawyer Sawyer 103 | SawyerW Sawyer West 104 | Somerst Somerset 105 | StoneBr Stone Brook 106 | Timber Timberland 107 | Veenker Veenker 108 | 109 | Condition1: Proximity to various conditions 110 | 111 | Artery Adjacent to arterial street 112 | Feedr Adjacent to feeder street 113 | Norm Normal 114 | RRNn Within 200' of North-South Railroad 115 | RRAn Adjacent to North-South Railroad 116 | PosN Near positive off-site feature--park, greenbelt, etc. 117 | PosA Adjacent to postive off-site feature 118 | RRNe Within 200' of East-West Railroad 119 | RRAe Adjacent to East-West Railroad 120 | 121 | Condition2: Proximity to various conditions (if more than one is present) 122 | 123 | Artery Adjacent to arterial street 124 | Feedr Adjacent to feeder street 125 | Norm Normal 126 | RRNn Within 200' of North-South Railroad 127 | RRAn Adjacent to North-South Railroad 128 | PosN Near positive off-site feature--park, greenbelt, etc. 129 | PosA Adjacent to postive off-site feature 130 | RRNe Within 200' of East-West Railroad 131 | RRAe Adjacent to East-West Railroad 132 | 133 | BldgType: Type of dwelling 134 | 135 | 1Fam Single-family Detached 136 | 2FmCon Two-family Conversion; originally built as one-family dwelling 137 | Duplx Duplex 138 | TwnhsE Townhouse End Unit 139 | TwnhsI Townhouse Inside Unit 140 | 141 | HouseStyle: Style of dwelling 142 | 143 | 1Story One story 144 | 1.5Fin One and one-half story: 2nd level finished 145 | 1.5Unf One and one-half story: 2nd level unfinished 146 | 2Story Two story 147 | 2.5Fin Two and one-half story: 2nd level finished 148 | 2.5Unf Two and one-half story: 2nd level unfinished 149 | SFoyer Split Foyer 150 | SLvl Split Level 151 | 152 | OverallQual: Rates the overall material and finish of the house 153 | 154 | 10 Very Excellent 155 | 9 Excellent 156 | 8 Very Good 157 | 7 Good 158 | 6 Above Average 159 | 5 Average 160 | 4 Below Average 161 | 3 Fair 162 | 2 Poor 163 | 1 Very Poor 164 | 165 | OverallCond: Rates the overall condition of the house 166 | 167 | 10 Very Excellent 168 | 9 Excellent 169 | 8 Very Good 170 | 7 Good 171 | 6 Above Average 172 | 5 Average 173 | 4 Below Average 174 | 3 Fair 175 | 2 Poor 176 | 1 Very Poor 177 | 178 | YearBuilt: Original construction date 179 | 180 | YearRemodAdd: Remodel date (same as construction date if no remodeling or additions) 181 | 182 | RoofStyle: Type of roof 183 | 184 | Flat Flat 185 | Gable Gable 186 | Gambrel Gabrel (Barn) 187 | Hip Hip 188 | Mansard Mansard 189 | Shed Shed 190 | 191 | RoofMatl: Roof material 192 | 193 | ClyTile Clay or Tile 194 | CompShg Standard (Composite) Shingle 195 | Membran Membrane 196 | Metal Metal 197 | Roll Roll 198 | Tar&Grv Gravel & Tar 199 | WdShake Wood Shakes 200 | WdShngl Wood Shingles 201 | 202 | Exterior1st: Exterior covering on house 203 | 204 | AsbShng Asbestos Shingles 205 | AsphShn Asphalt Shingles 206 | BrkComm Brick Common 207 | BrkFace Brick Face 208 | CBlock Cinder Block 209 | CemntBd Cement Board 210 | HdBoard Hard Board 211 | ImStucc Imitation Stucco 212 | MetalSd Metal Siding 213 | Other Other 214 | Plywood Plywood 215 | PreCast PreCast 216 | Stone Stone 217 | Stucco Stucco 218 | VinylSd Vinyl Siding 219 | Wd Sdng Wood Siding 220 | WdShing Wood Shingles 221 | 222 | Exterior2nd: Exterior covering on house (if more than one material) 223 | 224 | AsbShng Asbestos Shingles 225 | AsphShn Asphalt Shingles 226 | BrkComm Brick Common 227 | BrkFace Brick Face 228 | CBlock Cinder Block 229 | CemntBd Cement Board 230 | HdBoard Hard Board 231 | ImStucc Imitation Stucco 232 | MetalSd Metal Siding 233 | Other Other 234 | Plywood Plywood 235 | PreCast PreCast 236 | Stone Stone 237 | Stucco Stucco 238 | VinylSd Vinyl Siding 239 | Wd Sdng Wood Siding 240 | WdShing Wood Shingles 241 | 242 | MasVnrType: Masonry veneer type 243 | 244 | BrkCmn Brick Common 245 | BrkFace Brick Face 246 | CBlock Cinder Block 247 | None None 248 | Stone Stone 249 | 250 | MasVnrArea: Masonry veneer area in square feet 251 | 252 | ExterQual: Evaluates the quality of the material on the exterior 253 | 254 | Ex Excellent 255 | Gd Good 256 | TA Average/Typical 257 | Fa Fair 258 | Po Poor 259 | 260 | ExterCond: Evaluates the present condition of the material on the exterior 261 | 262 | Ex Excellent 263 | Gd Good 264 | TA Average/Typical 265 | Fa Fair 266 | Po Poor 267 | 268 | Foundation: Type of foundation 269 | 270 | BrkTil Brick & Tile 271 | CBlock Cinder Block 272 | PConc Poured Contrete 273 | Slab Slab 274 | Stone Stone 275 | Wood Wood 276 | 277 | BsmtQual: Evaluates the height of the basement 278 | 279 | Ex Excellent (100+ inches) 280 | Gd Good (90-99 inches) 281 | TA Typical (80-89 inches) 282 | Fa Fair (70-79 inches) 283 | Po Poor (<70 inches 284 | NA No Basement 285 | 286 | BsmtCond: Evaluates the general condition of the basement 287 | 288 | Ex Excellent 289 | Gd Good 290 | TA Typical - slight dampness allowed 291 | Fa Fair - dampness or some cracking or settling 292 | Po Poor - Severe cracking, settling, or wetness 293 | NA No Basement 294 | 295 | BsmtExposure: Refers to walkout or garden level walls 296 | 297 | Gd Good Exposure 298 | Av Average Exposure (split levels or foyers typically score average or above) 299 | Mn Mimimum Exposure 300 | No No Exposure 301 | NA No Basement 302 | 303 | BsmtFinType1: Rating of basement finished area 304 | 305 | GLQ Good Living Quarters 306 | ALQ Average Living Quarters 307 | BLQ Below Average Living Quarters 308 | Rec Average Rec Room 309 | LwQ Low Quality 310 | Unf Unfinshed 311 | NA No Basement 312 | 313 | BsmtFinSF1: Type 1 finished square feet 314 | 315 | BsmtFinType2: Rating of basement finished area (if multiple types) 316 | 317 | GLQ Good Living Quarters 318 | ALQ Average Living Quarters 319 | BLQ Below Average Living Quarters 320 | Rec Average Rec Room 321 | LwQ Low Quality 322 | Unf Unfinshed 323 | NA No Basement 324 | 325 | BsmtFinSF2: Type 2 finished square feet 326 | 327 | BsmtUnfSF: Unfinished square feet of basement area 328 | 329 | TotalBsmtSF: Total square feet of basement area 330 | 331 | Heating: Type of heating 332 | 333 | Floor Floor Furnace 334 | GasA Gas forced warm air furnace 335 | GasW Gas hot water or steam heat 336 | Grav Gravity furnace 337 | OthW Hot water or steam heat other than gas 338 | Wall Wall furnace 339 | 340 | HeatingQC: Heating quality and condition 341 | 342 | Ex Excellent 343 | Gd Good 344 | TA Average/Typical 345 | Fa Fair 346 | Po Poor 347 | 348 | CentralAir: Central air conditioning 349 | 350 | N No 351 | Y Yes 352 | 353 | Electrical: Electrical system 354 | 355 | SBrkr Standard Circuit Breakers & Romex 356 | FuseA Fuse Box over 60 AMP and all Romex wiring (Average) 357 | FuseF 60 AMP Fuse Box and mostly Romex wiring (Fair) 358 | FuseP 60 AMP Fuse Box and mostly knob & tube wiring (poor) 359 | Mix Mixed 360 | 361 | 1stFlrSF: First Floor square feet 362 | 363 | 2ndFlrSF: Second floor square feet 364 | 365 | LowQualFinSF: Low quality finished square feet (all floors) 366 | 367 | GrLivArea: Above grade (ground) living area square feet 368 | 369 | BsmtFullBath: Basement full bathrooms 370 | 371 | BsmtHalfBath: Basement half bathrooms 372 | 373 | FullBath: Full bathrooms above grade 374 | 375 | HalfBath: Half baths above grade 376 | 377 | Bedroom: Bedrooms above grade (does NOT include basement bedrooms) 378 | 379 | Kitchen: Kitchens above grade 380 | 381 | KitchenQual: Kitchen quality 382 | 383 | Ex Excellent 384 | Gd Good 385 | TA Typical/Average 386 | Fa Fair 387 | Po Poor 388 | 389 | TotRmsAbvGrd: Total rooms above grade (does not include bathrooms) 390 | 391 | Functional: Home functionality (Assume typical unless deductions are warranted) 392 | 393 | Typ Typical Functionality 394 | Min1 Minor Deductions 1 395 | Min2 Minor Deductions 2 396 | Mod Moderate Deductions 397 | Maj1 Major Deductions 1 398 | Maj2 Major Deductions 2 399 | Sev Severely Damaged 400 | Sal Salvage only 401 | 402 | Fireplaces: Number of fireplaces 403 | 404 | FireplaceQu: Fireplace quality 405 | 406 | Ex Excellent - Exceptional Masonry Fireplace 407 | Gd Good - Masonry Fireplace in main level 408 | TA Average - Prefabricated Fireplace in main living area or Masonry Fireplace in basement 409 | Fa Fair - Prefabricated Fireplace in basement 410 | Po Poor - Ben Franklin Stove 411 | NA No Fireplace 412 | 413 | GarageType: Garage location 414 | 415 | 2Types More than one type of garage 416 | Attchd Attached to home 417 | Basment Basement Garage 418 | BuiltIn Built-In (Garage part of house - typically has room above garage) 419 | CarPort Car Port 420 | Detchd Detached from home 421 | NA No Garage 422 | 423 | GarageYrBlt: Year garage was built 424 | 425 | GarageFinish: Interior finish of the garage 426 | 427 | Fin Finished 428 | RFn Rough Finished 429 | Unf Unfinished 430 | NA No Garage 431 | 432 | GarageCars: Size of garage in car capacity 433 | 434 | GarageArea: Size of garage in square feet 435 | 436 | GarageQual: Garage quality 437 | 438 | Ex Excellent 439 | Gd Good 440 | TA Typical/Average 441 | Fa Fair 442 | Po Poor 443 | NA No Garage 444 | 445 | GarageCond: Garage condition 446 | 447 | Ex Excellent 448 | Gd Good 449 | TA Typical/Average 450 | Fa Fair 451 | Po Poor 452 | NA No Garage 453 | 454 | PavedDrive: Paved driveway 455 | 456 | Y Paved 457 | P Partial Pavement 458 | N Dirt/Gravel 459 | 460 | WoodDeckSF: Wood deck area in square feet 461 | 462 | OpenPorchSF: Open porch area in square feet 463 | 464 | EnclosedPorch: Enclosed porch area in square feet 465 | 466 | 3SsnPorch: Three season porch area in square feet 467 | 468 | ScreenPorch: Screen porch area in square feet 469 | 470 | PoolArea: Pool area in square feet 471 | 472 | PoolQC: Pool quality 473 | 474 | Ex Excellent 475 | Gd Good 476 | TA Average/Typical 477 | Fa Fair 478 | NA No Pool 479 | 480 | Fence: Fence quality 481 | 482 | GdPrv Good Privacy 483 | MnPrv Minimum Privacy 484 | GdWo Good Wood 485 | MnWw Minimum Wood/Wire 486 | NA No Fence 487 | 488 | MiscFeature: Miscellaneous feature not covered in other categories 489 | 490 | Elev Elevator 491 | Gar2 2nd Garage (if not described in garage section) 492 | Othr Other 493 | Shed Shed (over 100 SF) 494 | TenC Tennis Court 495 | NA None 496 | 497 | MiscVal: $Value of miscellaneous feature 498 | 499 | MoSold: Month Sold (MM) 500 | 501 | YrSold: Year Sold (YYYY) 502 | 503 | SaleType: Type of sale 504 | 505 | WD Warranty Deed - Conventional 506 | CWD Warranty Deed - Cash 507 | VWD Warranty Deed - VA Loan 508 | New Home just constructed and sold 509 | COD Court Officer Deed/Estate 510 | Con Contract 15% Down payment regular terms 511 | ConLw Contract Low Down payment and low interest 512 | ConLI Contract Low Interest 513 | ConLD Contract Low Down 514 | Oth Other 515 | 516 | SaleCondition: Condition of sale 517 | 518 | Normal Normal Sale 519 | Abnorml Abnormal Sale - trade, foreclosure, short sale 520 | AdjLand Adjoining Land Purchase 521 | Alloca Allocation - two linked properties with separate deeds, typically condo with a garage unit 522 | Family Sale between family members 523 | Partial Home was not completed when last assessed (associated with New Homes) 524 | -------------------------------------------------------------------------------- /assignments/programming01/test.csv: -------------------------------------------------------------------------------- 1 | "fixed.acidity","volatile.acidity","citric.acid","residual.sugar","chlorides","free.sulfur.dioxide","total.sulfur.dioxide","density","pH","sulphates","alcohol","quality" 2 | 10.0,0.46,0.44,2.9,0.065,4.0,8.0,0.99674,3.33,0.62,12.2,6.0 3 | 9.5,0.57,0.27,2.3,0.08199999999999999,23.0,144.0,0.99782,3.27,0.55,9.4,5.0 4 | 7.3,0.91,0.1,1.8,0.07400000000000001,20.0,56.0,0.99672,3.35,0.56,9.2,5.0 5 | 8.2,0.23,0.42,1.9,0.069,9.0,17.0,0.9937600000000001,3.21,0.54,12.3,6.0 6 | 7.9,0.37,0.23,1.8,0.077,23.0,49.0,0.9963,3.28,0.67,9.3,5.0 7 | 7.7,0.58,0.01,1.8,0.08800000000000001,12.0,18.0,0.9956799999999999,3.32,0.56,10.5,7.0 8 | 8.8,0.3,0.38,2.3,0.06,19.0,72.0,0.9954299999999999,3.39,0.72,11.8,6.0 9 | 7.1,0.46,0.2,1.9,0.077,28.0,54.0,0.9956,3.37,0.64,10.4,6.0 10 | 6.9,0.685,0.0,2.5,0.105,22.0,37.0,0.9966,3.46,0.57,10.6,6.0 11 | 10.7,0.4,0.48,2.1,0.125,15.0,49.0,0.998,3.03,0.81,9.7,6.0 12 | 7.7,0.69,0.49,1.8,0.115,20.0,112.0,0.9968,3.21,0.71,9.3,5.0 13 | 10.3,0.32,0.45,6.4,0.073,5.0,13.0,0.9976,3.23,0.82,12.6,8.0 14 | 6.9,0.52,0.25,2.6,0.081,10.0,37.0,0.99685,3.46,0.5,11.0,5.0 15 | 6.2,0.36,0.24,2.2,0.095,19.0,42.0,0.9946,3.57,0.57,11.7,6.0 16 | 9.4,0.5,0.34,3.6,0.08199999999999999,5.0,14.0,0.9987,3.29,0.52,10.7,6.0 17 | 11.9,0.39,0.69,2.8,0.095,17.0,35.0,0.9994,3.1,0.61,10.8,6.0 18 | 7.2,0.63,0.0,1.9,0.09699999999999999,14.0,38.0,0.99675,3.37,0.58,9.0,6.0 19 | 9.1,0.36,0.39,1.8,0.06,21.0,55.0,0.99495,3.18,0.82,11.0,7.0 20 | 8.5,0.34,0.44,1.7,0.079,6.0,12.0,0.99605,3.52,0.63,10.7,5.0 21 | 8.8,0.66,0.26,1.7,0.07400000000000001,4.0,23.0,0.9971,3.15,0.74,9.2,5.0 22 | 6.9,0.39,0.24,2.1,0.102,4.0,7.0,0.9946200000000001,3.44,0.58,11.4,4.0 23 | 6.9,0.52,0.25,2.6,0.081,10.0,37.0,0.99685,3.46,0.5,11.0,5.0 24 | 6.8,0.59,0.06,6.0,0.06,11.0,18.0,0.9962,3.41,0.59,10.8,7.0 25 | 7.8,0.53,0.01,1.6,0.077,3.0,19.0,0.995,3.16,0.46,9.8,5.0 26 | 6.8,0.62,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6.0 27 | 7.0,0.55,0.13,2.2,0.075,15.0,35.0,0.9959,3.36,0.59,9.7,6.0 28 | 11.6,0.41,0.54,1.5,0.095,22.0,41.0,0.99735,3.02,0.76,9.9,7.0 29 | 8.8,0.45,0.43,1.4,0.076,12.0,21.0,0.99551,3.21,0.75,10.2,6.0 30 | 8.7,0.69,0.31,3.0,0.086,23.0,81.0,1.0002,3.48,0.74,11.6,6.0 31 | 7.5,0.49,0.2,2.6,0.332,8.0,14.0,0.9968,3.21,0.9,10.5,6.0 32 | 6.8,0.66,0.07,1.6,0.07,16.0,61.0,0.99572,3.29,0.6,9.3,5.0 33 | 8.9,0.28,0.45,1.7,0.067,7.0,12.0,0.99354,3.25,0.55,12.3,7.0 34 | 7.6,0.42,0.08,2.7,0.084,15.0,48.0,0.9968,3.21,0.59,10.0,5.0 35 | 7.0,0.62,0.1,1.4,0.071,27.0,63.0,0.996,3.28,0.61,9.2,5.0 36 | 10.1,0.31,0.35,1.6,0.075,9.0,28.0,0.99672,3.24,0.83,11.2,7.0 37 | 7.3,0.73,0.24,1.9,0.10800000000000001,18.0,102.0,0.9967,3.26,0.59,9.3,5.0 38 | 7.4,0.965,0.0,2.2,0.08800000000000001,16.0,32.0,0.99756,3.58,0.67,10.2,5.0 39 | 5.8,0.29,0.26,1.7,0.063,3.0,11.0,0.9915,3.39,0.54,13.5,6.0 40 | 7.5,0.77,0.2,8.1,0.098,30.0,92.0,0.99892,3.2,0.58,9.2,5.0 41 | 7.2,0.49,0.18,2.7,0.069,13.0,34.0,0.9967,3.29,0.48,9.2,6.0 42 | 7.3,0.67,0.05,3.6,0.107,6.0,20.0,0.9972,3.4,0.63,10.1,5.0 43 | 7.5,0.52,0.4,2.2,0.06,12.0,20.0,0.99474,3.26,0.64,11.8,6.0 44 | 10.8,0.47,0.43,2.1,0.171,27.0,66.0,0.9982,3.17,0.76,10.8,6.0 45 | 6.3,0.98,0.01,2.0,0.057,15.0,33.0,0.9948799999999999,3.6,0.46,11.2,6.0 46 | 8.7,0.675,0.1,1.6,0.09,4.0,11.0,0.99745,3.31,0.65,9.55,5.0 47 | 7.7,0.965,0.1,2.1,0.11199999999999999,11.0,22.0,0.9963,3.26,0.5,9.5,5.0 48 | 9.7,0.66,0.34,2.6,0.094,12.0,88.0,0.9979600000000001,3.26,0.66,10.1,5.0 49 | 7.3,0.55,0.03,1.6,0.07200000000000001,17.0,42.0,0.9956,3.37,0.48,9.0,4.0 50 | 6.9,0.45,0.11,2.4,0.043,6.0,12.0,0.99354,3.3,0.65,11.4,6.0 51 | 9.9,0.54,0.45,2.3,0.071,16.0,40.0,0.9991,3.39,0.62,9.4,5.0 52 | 7.3,0.58,0.3,2.4,0.07400000000000001,15.0,55.0,0.9968,3.46,0.59,10.2,5.0 53 | 7.9,0.54,0.34,2.5,0.076,8.0,17.0,0.99235,3.2,0.72,13.1,8.0 54 | 6.7,0.76,0.02,1.8,0.078,6.0,12.0,0.996,3.55,0.63,9.95,3.0 55 | 5.1,0.51,0.18,2.1,0.042,16.0,101.0,0.9924,3.46,0.87,12.9,7.0 56 | 7.0,0.975,0.04,2.0,0.087,12.0,67.0,0.99565,3.35,0.6,9.4,4.0 57 | 10.8,0.29,0.42,1.6,0.084,19.0,27.0,0.99545,3.28,0.73,11.9,6.0 58 | 7.2,0.49,0.24,2.2,0.07,5.0,36.0,0.996,3.33,0.48,9.4,5.0 59 | 7.2,0.62,0.06,2.7,0.077,15.0,85.0,0.99746,3.51,0.54,9.5,5.0 60 | 7.2,0.54,0.27,2.6,0.084,12.0,78.0,0.9964,3.39,0.71,11.0,5.0 61 | 7.3,0.74,0.08,1.7,0.094,10.0,45.0,0.9957600000000001,3.24,0.5,9.8,5.0 62 | 8.8,0.6,0.29,2.2,0.098,5.0,15.0,0.9988,3.36,0.49,9.1,5.0 63 | 8.1,0.72,0.09,2.8,0.084,18.0,49.0,0.9994,3.43,0.72,11.1,6.0 64 | 7.8,0.645,0.0,5.5,0.086,5.0,18.0,0.9986,3.4,0.55,9.6,6.0 65 | 5.9,0.61,0.08,2.1,0.071,16.0,24.0,0.9937600000000001,3.56,0.77,11.1,6.0 66 | 9.3,0.38,0.48,3.8,0.132,3.0,11.0,0.99577,3.23,0.57,13.2,6.0 67 | 9.0,0.39,0.4,1.3,0.044000000000000004,25.0,50.0,0.9947799999999999,3.2,0.83,10.9,6.0 68 | 9.4,0.33,0.59,2.8,0.079,9.0,30.0,0.9976,3.12,0.54,12.0,6.0 69 | 7.0,0.59,0.0,1.7,0.052000000000000005,3.0,8.0,0.996,3.41,0.47,10.3,5.0 70 | 7.4,0.29,0.38,1.7,0.062,9.0,30.0,0.9968,3.41,0.53,9.5,6.0 71 | 9.8,0.66,0.39,3.2,0.083,21.0,59.0,0.9989,3.37,0.71,11.5,7.0 72 | 8.6,0.52,0.38,1.5,0.096,5.0,18.0,0.99666,3.2,0.52,9.4,5.0 73 | 8.2,0.635,0.1,2.1,0.073,25.0,60.0,0.9963799999999999,3.29,0.75,10.9,6.0 74 | 8.1,0.33,0.44,1.5,0.042,6.0,12.0,0.9954200000000001,3.35,0.61,10.7,5.0 75 | 11.5,0.31,0.51,2.2,0.079,14.0,28.0,0.9982,3.03,0.93,9.8,6.0 76 | 10.8,0.26,0.45,3.3,0.06,20.0,49.0,0.9972,3.13,0.54,9.6,5.0 77 | 8.6,0.83,0.0,2.8,0.095,17.0,43.0,0.9982200000000001,3.33,0.6,10.4,6.0 78 | 7.4,0.59,0.08,4.4,0.086,6.0,29.0,0.9974,3.38,0.5,9.0,4.0 79 | 8.1,0.575,0.22,2.1,0.077,12.0,65.0,0.9967,3.29,0.51,9.2,5.0 80 | 8.0,0.62,0.35,2.8,0.086,28.0,52.0,0.997,3.31,0.62,10.8,5.0 81 | 7.6,0.74,0.0,1.9,0.1,6.0,12.0,0.99521,3.36,0.59,11.0,5.0 82 | 6.6,0.695,0.0,2.1,0.075,12.0,56.0,0.9968,3.49,0.67,9.2,5.0 83 | 8.0,0.58,0.28,3.2,0.066,21.0,114.0,0.9973,3.22,0.54,9.4,6.0 84 | 8.5,0.32,0.42,2.3,0.075,12.0,19.0,0.99434,3.14,0.71,11.8,7.0 85 | 7.0,0.42,0.35,1.6,0.08800000000000001,16.0,39.0,0.9961,3.34,0.55,9.2,5.0 86 | 7.1,0.46,0.14,2.8,0.076,15.0,37.0,0.99624,3.36,0.49,10.7,5.0 87 | 6.7,1.04,0.08,2.3,0.067,19.0,32.0,0.9964799999999999,3.52,0.57,11.0,4.0 88 | 7.4,0.49,0.27,2.1,0.071,14.0,25.0,0.9938799999999999,3.35,0.63,12.0,6.0 89 | 7.6,0.49,0.26,1.6,0.23600000000000002,10.0,88.0,0.9968,3.11,0.8,9.3,5.0 90 | 7.1,0.71,0.0,1.9,0.08,14.0,35.0,0.9972,3.47,0.55,9.4,5.0 91 | 7.3,1.07,0.09,1.7,0.17800000000000002,10.0,89.0,0.9962,3.3,0.57,9.0,5.0 92 | 9.9,0.57,0.25,2.0,0.10400000000000001,12.0,89.0,0.9963,3.04,0.9,10.1,5.0 93 | 6.3,0.68,0.01,3.7,0.10300000000000001,32.0,54.0,0.9958600000000001,3.51,0.66,11.3,6.0 94 | 9.9,0.35,0.41,2.3,0.083,11.0,61.0,0.9982,3.21,0.5,9.5,5.0 95 | 9.8,0.39,0.43,1.65,0.068,5.0,11.0,0.9947799999999999,3.19,0.46,11.4,5.0 96 | 8.9,0.35,0.4,3.6,0.11,12.0,24.0,0.99549,3.23,0.7,12.0,7.0 97 | 7.6,0.54,0.02,1.7,0.085,17.0,31.0,0.9958899999999999,3.37,0.51,10.4,6.0 98 | 7.9,0.3,0.68,8.3,0.05,37.5,278.0,0.99316,3.01,0.51,12.3,7.0 99 | 9.9,0.53,0.57,2.4,0.09300000000000001,30.0,52.0,0.9971,3.19,0.76,11.6,7.0 100 | 9.9,0.35,0.38,1.5,0.057999999999999996,31.0,47.0,0.9967600000000001,3.26,0.82,10.6,7.0 101 | 6.8,0.65,0.02,2.1,0.078,8.0,15.0,0.99498,3.35,0.62,10.4,6.0 102 | 8.2,0.32,0.42,2.3,0.098,3.0,9.0,0.9950600000000001,3.27,0.55,12.3,6.0 103 | 6.6,0.84,0.03,2.3,0.059000000000000004,32.0,48.0,0.9952,3.52,0.56,12.3,7.0 104 | 7.9,0.49,0.32,1.9,0.08199999999999999,17.0,144.0,0.9968,3.2,0.55,9.5,5.0 105 | 9.6,0.5,0.36,2.8,0.11599999999999999,26.0,55.0,0.9972200000000001,3.18,0.68,10.9,5.0 106 | 7.1,0.43,0.42,5.5,0.07,29.0,129.0,0.9973,3.42,0.72,10.5,5.0 107 | 9.3,0.43,0.44,1.9,0.085,9.0,22.0,0.99708,3.28,0.55,9.5,5.0 108 | 8.0,0.64,0.22,2.4,0.094,5.0,33.0,0.9961200000000001,3.37,0.58,11.0,5.0 109 | 13.4,0.27,0.62,2.6,0.08199999999999999,6.0,21.0,1.0002,3.16,0.67,9.7,6.0 110 | 11.7,0.45,0.63,2.2,0.073,7.0,23.0,0.99974,3.21,0.69,10.9,6.0 111 | 8.2,0.74,0.09,2.0,0.067,5.0,10.0,0.99418,3.28,0.57,11.8,6.0 112 | 10.6,0.31,0.49,2.2,0.063,18.0,40.0,0.9976,3.14,0.51,9.8,6.0 113 | 6.3,0.55,0.15,1.8,0.077,26.0,35.0,0.9931399999999999,3.32,0.82,11.6,6.0 114 | 7.2,0.695,0.13,2.0,0.076,12.0,20.0,0.99546,3.29,0.54,10.1,5.0 115 | 10.2,0.44,0.42,2.0,0.071,7.0,20.0,0.99566,3.14,0.79,11.1,7.0 116 | 8.8,0.24,0.35,1.7,0.055,13.0,27.0,0.9939399999999999,3.14,0.59,11.3,7.0 117 | 9.7,0.53,0.6,2.0,0.039,5.0,19.0,0.99585,3.3,0.86,12.4,6.0 118 | 9.4,0.34,0.37,2.2,0.075,5.0,13.0,0.998,3.22,0.62,9.2,5.0 119 | 8.4,0.65,0.6,2.1,0.11199999999999999,12.0,90.0,0.9973,3.2,0.52,9.2,5.0 120 | 6.1,0.6,0.08,1.8,0.071,14.0,45.0,0.99336,3.38,0.54,11.0,5.0 121 | 7.2,0.73,0.02,2.5,0.076,16.0,42.0,0.9972,3.44,0.52,9.3,5.0 122 | 7.3,0.32,0.23,2.3,0.066,35.0,70.0,0.9958799999999999,3.43,0.62,10.1,5.0 123 | 11.1,0.45,0.73,3.2,0.066,6.0,22.0,0.9986,3.17,0.66,11.2,6.0 124 | 10.3,0.43,0.44,2.4,0.214,5.0,12.0,0.9994,3.19,0.63,9.5,6.0 125 | 9.6,0.77,0.12,2.9,0.08199999999999999,30.0,74.0,0.99865,3.3,0.64,10.4,6.0 126 | 6.5,0.53,0.06,2.0,0.063,29.0,44.0,0.9948899999999999,3.38,0.83,10.3,6.0 127 | 9.5,0.885,0.27,2.3,0.084,31.0,145.0,0.9978,3.24,0.53,9.4,5.0 128 | 5.3,0.47,0.11,2.2,0.048,16.0,89.0,0.99182,3.54,0.88,13.566666666666698,7.0 129 | 7.1,0.6,0.01,2.3,0.079,24.0,37.0,0.9951399999999999,3.4,0.61,10.9,6.0 130 | 5.2,0.645,0.0,2.15,0.08,15.0,28.0,0.99444,3.78,0.61,12.5,6.0 131 | 6.6,0.61,0.0,1.6,0.069,4.0,8.0,0.9939600000000001,3.33,0.37,10.4,4.0 132 | 7.8,0.61,0.29,1.6,0.114,9.0,29.0,0.9974,3.26,1.56,9.1,5.0 133 | 7.1,0.75,0.01,2.2,0.059000000000000004,11.0,18.0,0.9924200000000001,3.39,0.4,12.8,6.0 134 | 7.9,0.57,0.31,2.0,0.079,10.0,79.0,0.99677,3.29,0.69,9.5,6.0 135 | 11.1,0.31,0.49,2.7,0.094,16.0,47.0,0.9986,3.12,1.02,10.6,7.0 136 | 10.4,0.64,0.24,2.8,0.105,29.0,53.0,0.9998,3.24,0.67,9.9,5.0 137 | 7.5,0.55,0.24,2.0,0.078,10.0,28.0,0.9983,3.45,0.78,9.5,6.0 138 | 9.9,0.54,0.26,2.0,0.111,7.0,60.0,0.9970899999999999,2.94,0.98,10.2,5.0 139 | 6.6,0.57,0.02,2.1,0.115,6.0,16.0,0.99654,3.38,0.69,9.5,5.0 140 | 11.8,0.33,0.49,3.4,0.09300000000000001,54.0,80.0,1.0002,3.3,0.76,10.7,7.0 141 | 9.3,0.39,0.4,2.6,0.073,10.0,26.0,0.9984,3.34,0.75,10.2,6.0 142 | 5.6,0.615,0.0,1.6,0.08900000000000001,16.0,59.0,0.9943,3.58,0.52,9.9,5.0 143 | 6.4,0.57,0.12,2.3,0.12,25.0,36.0,0.99519,3.47,0.71,11.3,7.0 144 | 8.2,0.43,0.29,1.6,0.081,27.0,45.0,0.99603,3.25,0.54,10.3,5.0 145 | 7.3,0.34,0.33,2.5,0.064,21.0,37.0,0.9952,3.35,0.77,12.1,7.0 146 | 7.0,0.6,0.3,4.5,0.068,20.0,110.0,0.9991399999999999,3.3,1.17,10.2,5.0 147 | 6.8,0.56,0.03,1.7,0.084,18.0,35.0,0.9968,3.44,0.63,10.0,6.0 148 | 6.4,0.885,0.0,2.3,0.166,6.0,12.0,0.99551,3.56,0.51,10.8,5.0 149 | 6.6,0.8,0.03,7.8,0.079,6.0,12.0,0.9963,3.52,0.5,12.2,5.0 150 | 7.9,0.33,0.23,1.7,0.077,18.0,45.0,0.99625,3.29,0.65,9.3,5.0 151 | 10.2,0.34,0.48,2.1,0.052000000000000005,5.0,9.0,0.9945799999999999,3.2,0.69,12.1,7.0 152 | 6.1,0.38,0.15,1.8,0.07200000000000001,6.0,19.0,0.9955,3.42,0.57,9.4,5.0 153 | 6.1,0.715,0.1,2.6,0.053,13.0,27.0,0.9936200000000001,3.57,0.5,11.9,5.0 154 | 10.5,0.28,0.51,1.7,0.08,10.0,24.0,0.9982,3.2,0.89,9.4,6.0 155 | 10.0,0.35,0.47,2.0,0.061,6.0,11.0,0.99585,3.23,0.52,12.0,6.0 156 | 9.1,0.5,0.3,1.9,0.065,8.0,17.0,0.99774,3.32,0.71,10.5,6.0 157 | 6.3,0.76,0.0,2.9,0.07200000000000001,26.0,52.0,0.99379,3.51,0.6,11.5,6.0 158 | 8.3,0.3,0.49,3.8,0.09,11.0,24.0,0.99498,3.27,0.64,12.1,7.0 159 | 6.1,0.32,0.25,1.8,0.086,5.0,32.0,0.99464,3.36,0.44,10.1,5.0 160 | 8.3,0.715,0.15,1.8,0.08900000000000001,10.0,52.0,0.9968,3.23,0.77,9.5,5.0 161 | 6.6,0.66,0.0,3.0,0.115,21.0,31.0,0.99629,3.45,0.63,10.3,5.0 162 | 7.3,0.69,0.32,2.2,0.069,35.0,104.0,0.9963200000000001,3.33,0.51,9.5,5.0 163 | 9.8,0.66,0.39,3.2,0.083,21.0,59.0,0.9989,3.37,0.71,11.5,7.0 164 | 8.9,0.43,0.45,1.9,0.052000000000000005,6.0,16.0,0.9948,3.35,0.7,12.5,6.0 165 | 6.9,0.635,0.17,2.4,0.24100000000000002,6.0,18.0,0.9961,3.4,0.59,10.3,6.0 166 | 7.5,0.63,0.27,2.0,0.083,17.0,91.0,0.99616,3.26,0.58,9.8,6.0 167 | 9.6,0.56,0.23,3.4,0.102,37.0,92.0,0.9996,3.3,0.65,10.1,5.0 168 | 10.3,0.53,0.48,2.5,0.063,6.0,25.0,0.9998,3.12,0.59,9.3,6.0 169 | 7.4,0.63,0.07,2.4,0.09,11.0,37.0,0.9979,3.43,0.76,9.7,6.0 170 | 13.2,0.46,0.52,2.2,0.071,12.0,35.0,1.0006,3.1,0.56,9.0,6.0 171 | 8.9,0.59,0.5,2.0,0.337,27.0,81.0,0.9964,3.04,1.61,9.5,6.0 172 | 8.2,0.78,0.0,2.2,0.08900000000000001,13.0,26.0,0.9978,3.37,0.46,9.6,4.0 173 | 8.7,0.69,0.0,3.2,0.084,13.0,33.0,0.9992,3.36,0.45,9.4,5.0 174 | 7.8,0.6,0.14,2.4,0.086,3.0,15.0,0.9975,3.42,0.6,10.8,6.0 175 | 10.0,0.59,0.31,2.2,0.09,26.0,62.0,0.9994,3.18,0.63,10.2,6.0 176 | 6.0,0.31,0.47,3.6,0.067,18.0,42.0,0.99549,3.39,0.66,11.0,6.0 177 | 10.9,0.39,0.47,1.8,0.11800000000000001,6.0,14.0,0.9982,3.3,0.75,9.8,6.0 178 | 7.5,0.51,0.02,1.7,0.084,13.0,31.0,0.9953799999999999,3.36,0.54,10.5,6.0 179 | 9.9,0.35,0.41,2.3,0.083,11.0,61.0,0.9982,3.21,0.5,9.5,5.0 180 | 6.3,1.02,0.0,2.0,0.083,17.0,24.0,0.9943700000000001,3.59,0.55,11.2,4.0 181 | 9.0,0.46,0.23,2.8,0.092,28.0,104.0,0.9983,3.1,0.56,9.2,5.0 182 | 7.8,0.64,0.0,1.9,0.07200000000000001,27.0,55.0,0.9962,3.31,0.63,11.0,5.0 183 | 8.6,0.47,0.47,2.4,0.07400000000000001,7.0,29.0,0.9979,3.08,0.46,9.5,5.0 184 | 10.6,0.31,0.49,2.5,0.067,6.0,21.0,0.9987,3.26,0.86,10.7,6.0 185 | 9.9,0.72,0.55,1.7,0.136,24.0,52.0,0.9975200000000001,3.35,0.94,10.0,5.0 186 | 10.9,0.53,0.49,4.6,0.11800000000000001,10.0,17.0,1.0002,3.07,0.56,11.7,6.0 187 | 10.4,0.34,0.58,3.7,0.174,6.0,16.0,0.997,3.19,0.7,11.3,6.0 188 | 7.7,0.715,0.01,2.1,0.064,31.0,43.0,0.99371,3.41,0.57,11.8,6.0 189 | 6.2,0.56,0.09,1.7,0.053,24.0,32.0,0.9940200000000001,3.54,0.6,11.3,5.0 190 | 8.3,0.6,0.13,2.6,0.085,6.0,24.0,0.9984,3.31,0.59,9.2,6.0 191 | 7.1,0.43,0.42,5.5,0.071,28.0,128.0,0.9973,3.42,0.71,10.5,5.0 192 | 13.0,0.47,0.49,4.3,0.085,6.0,47.0,1.0021,3.3,0.68,12.7,6.0 193 | 8.6,0.37,0.65,6.4,0.08,3.0,8.0,0.9981700000000001,3.27,0.58,11.0,5.0 194 | 8.2,0.35,0.33,2.4,0.076,11.0,47.0,0.9959899999999999,3.27,0.81,11.0,6.0 195 | 7.3,0.66,0.0,2.0,0.084,6.0,23.0,0.9983,3.61,0.96,9.9,6.0 196 | 7.2,1.0,0.0,3.0,0.102,7.0,16.0,0.9958600000000001,3.43,0.46,10.0,5.0 197 | 10.1,0.38,0.5,2.4,0.10400000000000001,6.0,13.0,0.9964299999999999,3.22,0.65,11.6,7.0 198 | 7.5,0.51,0.02,1.7,0.084,13.0,31.0,0.9953799999999999,3.36,0.54,10.5,6.0 199 | 5.6,0.66,0.0,2.2,0.087,3.0,11.0,0.9937799999999999,3.71,0.63,12.8,7.0 200 | 8.7,0.78,0.51,1.7,0.415,12.0,66.0,0.99623,3.0,1.17,9.2,5.0 201 | 7.8,0.55,0.0,1.7,0.07,7.0,17.0,0.99659,3.26,0.64,9.4,6.0 202 | 7.9,0.65,0.01,2.5,0.078,17.0,38.0,0.9963,3.34,0.74,11.7,7.0 203 | 7.5,0.38,0.57,2.3,0.106,5.0,12.0,0.99605,3.36,0.55,11.4,6.0 204 | 8.0,0.38,0.44,1.9,0.098,6.0,15.0,0.9956,3.3,0.64,11.4,6.0 205 | 8.8,0.31,0.4,2.8,0.109,7.0,16.0,0.9961399999999999,3.31,0.79,11.8,7.0 206 | 6.7,0.46,0.24,1.7,0.077,18.0,34.0,0.9948,3.39,0.6,10.6,6.0 207 | 12.6,0.31,0.72,2.2,0.07200000000000001,6.0,29.0,0.9987,2.88,0.82,9.8,8.0 208 | 6.8,0.91,0.06,2.0,0.06,4.0,11.0,0.99592,3.53,0.64,10.9,4.0 209 | 11.1,0.35,0.48,3.1,0.09,5.0,21.0,0.9986,3.17,0.53,10.5,5.0 210 | 9.6,0.56,0.31,2.8,0.08900000000000001,15.0,46.0,0.9979,3.11,0.92,10.0,6.0 211 | 7.1,0.755,0.15,1.8,0.107,20.0,84.0,0.99593,3.19,0.5,9.5,5.0 212 | 8.9,0.5,0.21,2.2,0.08800000000000001,21.0,39.0,0.99692,3.33,0.83,11.1,6.0 213 | 7.6,0.41,0.14,3.0,0.087,21.0,43.0,0.9964,3.32,0.57,10.5,6.0 214 | 12.7,0.6,0.65,2.3,0.063,6.0,25.0,0.9997,3.03,0.57,9.9,5.0 215 | 7.4,0.47,0.46,2.2,0.114,7.0,20.0,0.9964700000000001,3.32,0.63,10.5,5.0 216 | 8.3,0.49,0.36,1.8,0.222,6.0,16.0,0.998,3.18,0.6,9.5,6.0 217 | 10.6,1.025,0.43,2.8,0.08,21.0,84.0,0.9985,3.06,0.57,10.1,5.0 218 | 8.2,0.38,0.32,2.5,0.08,24.0,71.0,0.99624,3.27,0.85,11.0,6.0 219 | 8.8,0.27,0.46,2.1,0.095,20.0,29.0,0.9948799999999999,3.26,0.56,11.3,6.0 220 | 8.0,0.705,0.05,1.9,0.07400000000000001,8.0,19.0,0.9962,3.34,0.95,10.5,6.0 221 | 5.6,0.5,0.09,2.3,0.049,17.0,99.0,0.9937,3.63,0.63,13.0,5.0 222 | 8.5,0.37,0.32,1.8,0.066,26.0,51.0,0.99456,3.38,0.72,11.8,6.0 223 | 7.0,0.43,0.36,1.6,0.08900000000000001,14.0,37.0,0.99615,3.34,0.56,9.2,6.0 224 | 10.0,0.69,0.11,1.4,0.084,8.0,24.0,0.9957799999999999,2.88,0.47,9.7,5.0 225 | 10.2,0.54,0.37,15.4,0.214,55.0,95.0,1.00369,3.18,0.77,9.0,6.0 226 | 6.1,0.58,0.23,2.5,0.044000000000000004,16.0,70.0,0.9935200000000001,3.46,0.65,12.5,6.0 227 | 7.0,0.22,0.3,1.8,0.065,16.0,20.0,0.99672,3.61,0.82,10.0,6.0 228 | 11.9,0.39,0.69,2.8,0.095,17.0,35.0,0.9994,3.1,0.61,10.8,6.0 229 | 7.9,0.51,0.25,2.9,0.077,21.0,45.0,0.9974,3.49,0.96,12.1,6.0 230 | 7.0,0.45,0.34,2.7,0.08199999999999999,16.0,72.0,0.998,3.55,0.6,9.5,5.0 231 | 8.3,0.85,0.14,2.5,0.09300000000000001,13.0,54.0,0.99724,3.36,0.54,10.1,5.0 232 | 8.2,0.34,0.38,2.5,0.08,12.0,57.0,0.9978,3.3,0.47,9.0,6.0 233 | 7.3,0.33,0.47,2.1,0.077,5.0,11.0,0.9958,3.33,0.53,10.3,6.0 234 | 8.0,0.58,0.16,2.0,0.12,3.0,7.0,0.99454,3.22,0.58,11.2,6.0 235 | 9.1,0.795,0.0,2.6,0.096,11.0,26.0,0.9994,3.35,0.83,9.4,6.0 236 | 8.8,0.41,0.64,2.2,0.09300000000000001,9.0,42.0,0.9986,3.54,0.66,10.5,5.0 237 | 10.6,1.02,0.43,2.9,0.076,26.0,88.0,0.9984,3.08,0.57,10.1,6.0 238 | 6.8,0.47,0.08,2.2,0.064,18.0,38.0,0.9955299999999999,3.3,0.65,9.6,6.0 239 | 7.2,0.58,0.54,2.1,0.114,3.0,9.0,0.9971899999999999,3.33,0.57,10.3,4.0 240 | 11.9,0.58,0.58,1.9,0.071,5.0,18.0,0.998,3.09,0.63,10.0,6.0 241 | 5.6,0.54,0.04,1.7,0.049,5.0,13.0,0.9942,3.72,0.58,11.4,5.0 242 | 7.5,0.49,0.19,1.9,0.076,10.0,44.0,0.9957,3.39,0.54,9.7,5.0 243 | 7.5,0.705,0.24,1.8,0.36,15.0,63.0,0.9964,3.0,1.59,9.5,5.0 244 | 10.8,0.4,0.41,2.2,0.084,7.0,17.0,0.9984,3.08,0.67,9.3,6.0 245 | 7.4,0.41,0.24,1.8,0.066,18.0,47.0,0.9956,3.37,0.62,10.4,5.0 246 | 6.0,0.5,0.04,2.2,0.092,13.0,26.0,0.9964700000000001,3.46,0.47,10.0,5.0 247 | 7.8,0.7,0.06,1.9,0.079,20.0,35.0,0.9962799999999999,3.4,0.69,10.9,5.0 248 | 8.9,0.31,0.57,2.0,0.111,26.0,85.0,0.9971,3.26,0.53,9.7,5.0 249 | 8.2,0.28,0.4,2.4,0.052000000000000005,4.0,10.0,0.99356,3.33,0.7,12.8,7.0 250 | 7.2,0.725,0.05,4.65,0.086,4.0,11.0,0.9962,3.41,0.39,10.9,5.0 251 | 11.1,0.31,0.53,2.2,0.06,3.0,10.0,0.99572,3.02,0.83,10.9,7.0 252 | 9.4,0.4,0.31,2.2,0.09,13.0,62.0,0.9966,3.07,0.63,10.5,6.0 253 | 8.6,0.52,0.38,1.5,0.096,5.0,18.0,0.99666,3.2,0.52,9.4,5.0 254 | 7.6,0.68,0.02,1.3,0.07200000000000001,9.0,20.0,0.9965,3.17,1.08,9.2,4.0 255 | 9.5,0.78,0.22,1.9,0.077,6.0,32.0,0.9988,3.26,0.56,10.6,6.0 256 | 9.6,0.6,0.5,2.3,0.079,28.0,71.0,0.9997,3.5,0.57,9.7,5.0 257 | 7.1,0.22,0.49,1.8,0.039,8.0,18.0,0.99344,3.39,0.56,12.4,6.0 258 | 7.0,0.65,0.02,2.1,0.066,8.0,25.0,0.9972,3.47,0.67,9.5,6.0 259 | 6.6,0.96,0.0,1.8,0.08199999999999999,5.0,16.0,0.9936,3.5,0.44,11.9,6.0 260 | 7.0,0.685,0.0,1.9,0.099,9.0,22.0,0.9960600000000001,3.34,0.6,9.7,5.0 261 | 7.4,0.53,0.26,2.0,0.10099999999999999,16.0,72.0,0.9957,3.15,0.57,9.4,5.0 262 | 8.0,0.42,0.17,2.0,0.073,6.0,18.0,0.9972,3.29,0.61,9.2,6.0 263 | 6.4,0.4,0.23,1.6,0.066,5.0,12.0,0.9958,3.34,0.56,9.2,5.0 264 | 7.7,0.6,0.0,2.6,0.055,7.0,13.0,0.99639,3.38,0.56,10.8,5.0 265 | 6.1,0.705,0.1,2.8,0.081,13.0,28.0,0.99631,3.6,0.66,10.2,5.0 266 | 12.7,0.59,0.45,2.3,0.08199999999999999,11.0,22.0,1.0,3.0,0.7,9.3,6.0 267 | 15.5,0.645,0.49,4.2,0.095,10.0,23.0,1.00315,2.92,0.74,11.1,5.0 268 | 7.0,0.5,0.14,1.8,0.078,10.0,23.0,0.99636,3.53,0.61,10.4,5.0 269 | 10.0,0.32,0.59,2.2,0.077,3.0,15.0,0.9994,3.2,0.78,9.6,5.0 270 | 8.2,0.59,0.0,2.5,0.09300000000000001,19.0,58.0,1.0002,3.5,0.65,9.3,6.0 271 | 6.9,0.54,0.04,3.0,0.077,7.0,27.0,0.9987,3.69,0.91,9.4,6.0 272 | 10.4,0.33,0.63,2.8,0.084,5.0,22.0,0.9998,3.26,0.74,11.2,7.0 273 | 11.1,0.42,0.47,2.65,0.085,9.0,34.0,0.99736,3.24,0.77,12.1,7.0 274 | 7.9,0.765,0.0,2.0,0.084,9.0,22.0,0.9961899999999999,3.33,0.68,10.9,6.0 275 | 7.3,0.305,0.39,1.2,0.059000000000000004,7.0,11.0,0.99331,3.29,0.52,11.5,6.0 276 | 8.4,0.37,0.43,2.3,0.063,12.0,19.0,0.9955,3.17,0.81,11.2,7.0 277 | 7.2,0.5,0.18,2.1,0.071,12.0,31.0,0.99761,3.52,0.72,9.6,6.0 278 | 7.6,0.41,0.49,2.0,0.08800000000000001,16.0,43.0,0.998,3.48,0.64,9.1,5.0 279 | 6.9,0.84,0.21,4.1,0.07400000000000001,16.0,65.0,0.9984200000000001,3.53,0.72,9.25,6.0 280 | 7.7,0.835,0.0,2.6,0.081,6.0,14.0,0.9975,3.3,0.52,9.3,5.0 281 | 6.4,0.79,0.04,2.2,0.061,11.0,17.0,0.9958799999999999,3.53,0.65,10.4,6.0 282 | 10.4,0.33,0.63,2.8,0.084,5.0,22.0,0.9998,3.26,0.74,11.2,7.0 283 | 7.1,0.66,0.0,2.4,0.052000000000000005,6.0,11.0,0.99318,3.35,0.66,12.7,7.0 284 | 7.1,0.63,0.06,2.0,0.083,8.0,29.0,0.99855,3.67,0.73,9.6,5.0 285 | 7.6,0.46,0.11,2.6,0.079,12.0,49.0,0.9968,3.21,0.57,10.0,5.0 286 | 6.8,0.51,0.01,2.1,0.07400000000000001,9.0,25.0,0.9958,3.33,0.56,9.5,6.0 287 | 7.0,0.69,0.08,1.8,0.09699999999999999,22.0,89.0,0.9959,3.34,0.54,9.2,6.0 288 | 10.9,0.53,0.49,4.6,0.11800000000000001,10.0,17.0,1.0002,3.07,0.56,11.7,6.0 289 | 6.6,0.52,0.08,2.4,0.07,13.0,26.0,0.9935799999999999,3.4,0.72,12.5,7.0 290 | 7.4,0.635,0.1,2.4,0.08,16.0,33.0,0.99736,3.58,0.69,10.8,7.0 291 | 7.2,0.38,0.31,2.0,0.055999999999999994,15.0,29.0,0.99472,3.23,0.76,11.3,8.0 292 | 9.3,0.33,0.45,1.5,0.057,19.0,37.0,0.99498,3.18,0.89,11.1,7.0 293 | 7.0,0.51,0.09,2.1,0.062,4.0,9.0,0.99584,3.35,0.54,10.5,5.0 294 | 8.4,0.29,0.4,1.7,0.067,8.0,20.0,0.99603,3.39,0.6,10.5,5.0 295 | 8.6,0.37,0.65,6.4,0.08,3.0,8.0,0.9981700000000001,3.27,0.58,11.0,5.0 296 | 9.2,0.43,0.49,2.4,0.086,23.0,116.0,0.9976,3.23,0.64,9.5,5.0 297 | 9.3,0.37,0.44,1.6,0.038,21.0,42.0,0.99526,3.24,0.81,10.8,7.0 298 | 12.8,0.84,0.63,2.4,0.08800000000000001,13.0,35.0,0.9997,3.1,0.6,10.4,6.0 299 | 8.3,0.845,0.01,2.2,0.07,5.0,14.0,0.9967,3.32,0.58,11.0,4.0 300 | 7.4,0.36,0.3,1.8,0.07400000000000001,17.0,24.0,0.99419,3.24,0.7,11.4,8.0 301 | 6.6,0.61,0.01,1.9,0.08,8.0,25.0,0.99746,3.69,0.73,10.5,5.0 302 | 6.5,0.615,0.0,1.9,0.065,9.0,18.0,0.9972,3.46,0.65,9.2,5.0 303 | 8.1,0.78,0.23,2.6,0.059000000000000004,5.0,15.0,0.997,3.37,0.56,11.3,5.0 304 | 7.6,0.4,0.29,1.9,0.078,29.0,66.0,0.9971,3.45,0.59,9.5,6.0 305 | 9.8,0.44,0.47,2.5,0.063,9.0,28.0,0.9981,3.24,0.65,10.8,6.0 306 | 7.7,0.56,0.2,2.0,0.075,9.0,39.0,0.9987,3.48,0.62,9.3,5.0 307 | 8.9,0.48,0.24,2.85,0.094,35.0,106.0,0.9982,3.1,0.53,9.2,5.0 308 | 5.6,0.5,0.09,2.3,0.049,17.0,99.0,0.9937,3.63,0.63,13.0,5.0 309 | 7.7,0.6,0.06,2.0,0.079,19.0,41.0,0.99697,3.39,0.62,10.1,6.0 310 | 13.3,0.43,0.58,1.9,0.07,15.0,40.0,1.0004,3.06,0.49,9.0,5.0 311 | 10.6,0.36,0.6,2.2,0.152,7.0,18.0,0.9986,3.04,1.06,9.4,5.0 312 | 8.4,0.67,0.19,2.2,0.09300000000000001,11.0,75.0,0.99736,3.2,0.59,9.2,4.0 313 | 9.0,0.6,0.29,2.0,0.069,32.0,73.0,0.99654,3.34,0.57,10.0,5.0 314 | 5.4,0.74,0.09,1.7,0.08900000000000001,16.0,26.0,0.9940200000000001,3.67,0.56,11.6,6.0 315 | 9.1,0.4,0.57,4.6,0.08,6.0,20.0,0.9965200000000001,3.28,0.57,12.5,6.0 316 | 8.3,0.54,0.28,1.9,0.077,11.0,40.0,0.9978,3.39,0.61,10.0,6.0 317 | 9.4,0.615,0.28,3.2,0.087,18.0,72.0,1.0001,3.31,0.53,9.7,5.0 318 | 8.0,0.28,0.44,1.8,0.081,28.0,68.0,0.9950100000000001,3.36,0.66,11.2,5.0 319 | 6.6,0.44,0.09,2.2,0.063,9.0,18.0,0.99444,3.42,0.69,11.3,6.0 320 | 6.7,0.64,0.23,2.1,0.08,11.0,119.0,0.9953799999999999,3.36,0.7,10.9,5.0 321 | 6.6,0.725,0.2,7.8,0.073,29.0,79.0,0.9977,3.29,0.54,9.2,5.0 322 | -------------------------------------------------------------------------------- /prac/ensembles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.datasets import load_breast_cancer\n", 10 | "from sklearn.model_selection import train_test_split" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "X, y = load_breast_cancer(return_X_y=True)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "(455, 30) (455,) (114, 30) (114,)\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "trainX, testX, trainY, testY = train_test_split(X, y, random_state=42, test_size=0.2)\n", 37 | "print(trainX.shape, trainY.shape, testX.shape, testY.shape)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 50 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 51 | " 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0\n", 52 | " 1 1 0]\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(testY)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Bagging" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from sklearn.ensemble import BaggingClassifier" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 41, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "BaggingClassifier(base_estimator=None, bootstrap=True,\n", 85 | " bootstrap_features=False, max_features=1.0, max_samples=1.0,\n", 86 | " n_estimators=10, n_jobs=-1, oob_score=False, random_state=42,\n", 87 | " verbose=0, warm_start=False)" 88 | ] 89 | }, 90 | "execution_count": 41, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "bg = BaggingClassifier(base_estimator=None, n_estimators=10, n_jobs=-1, random_state=42)\n", 97 | "bg.fit(trainX, trainY)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 43, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "[1 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0\n", 110 | " 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0\n", 111 | " 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 0 1 0\n", 112 | " 1 1 0]\n", 113 | "Bagging (10 tree) score: 0.956140350877193\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "out_y = bg.predict(testX) # предсказываем классы\n", 119 | "print(out_y)\n", 120 | "out_score = bg.score(testX, testY) # считаем качество классификации\n", 121 | "print('Bagging (10 tree) score:', out_score)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 44, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "Bagging (100 tree) score: 0.956140350877193\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "bg = BaggingClassifier(base_estimator=None, n_estimators=100, n_jobs=-1, random_state=42).fit(trainX, trainY)\n", 139 | "print('Bagging (100 tree) score:', bg.score(testX, testY))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 45, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from sklearn.svm import LinearSVC\n", 149 | "clf = LinearSVC(random_state=42)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 46, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "Bagging (10 svm) score: 0.8859649122807017\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "bg = BaggingClassifier(base_estimator=clf, n_estimators=10, n_jobs=-1, random_state=42).fit(trainX, trainY)\n", 167 | "print('Bagging (10 svm) score:', bg.score(testX, testY))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 47, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Bagging (100 svm) score: 0.9385964912280702\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "bg = BaggingClassifier(base_estimator=clf, n_estimators=100, n_jobs=-1, random_state=42).fit(trainX, trainY)\n", 185 | "print('Bagging (100 svm) score:', bg.score(testX, testY))" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## AdaBoost" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 49, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "from sklearn.ensemble import AdaBoostClassifier" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 72, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "AdaBoost (10 tree) score: 0.9824561403508771\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "ada = AdaBoostClassifier(base_estimator=None, n_estimators=10, random_state=42).fit(trainX, trainY)\n", 219 | "print('AdaBoost (10 tree) score:', ada.score(testX, testY))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 77, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "Text(0, 0.5, 'Accuracy')" 231 | ] 232 | }, 233 | "execution_count": 77, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | }, 237 | { 238 | "data": { 239 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl81PWdx/HXh4Rwh/uSG+UKHiARvO8qXtja3Xq0Vatbt61W69a22toq1Fa3uttLt6utLGqt1lrXxZOq4FFbE8IhGu5mQMKVkCFAOAJJPvvH/IIxAjNAfvlNMu/n48HD3/zm95v5ZJB553v9fubuiIiIHEibqAsQEZH0p7AQEZGkFBYiIpKUwkJERJJSWIiISFIKCxERSUphISIiSSksREQkKYWFiIgklR11AU2lV69ePnTo0KjLEBFpUebNm7fJ3XsnO67VhMXQoUMpKiqKugwRkRbFzFancpy6oUREJCmFhYiIJKWwEBGRpBQWIiKSlMJCRESSUliIiEhSCgsREUmq1ayzEJFwbd6+m2eK1rC9uibqUujcPpvL8wfTtWPbqEuJlLszq3gjlTt2c8XEwaG+l8JCRA6opraOpwo/4j9eW07ljj2YRV0RuMN/v1XCd84fxRfyB5HVJg2KambLN25j6gvFvLuyguMHd+PyEwZhIf7lKCxEZL/eK6ng7pnFLN2wjZOP7Mldl4xlVL8uUZdF8botTJ25mDue+4Dfv7eaqVPGkj+0R9RlNYstO/fwi9eX8/jfV9O5XTZTp4zli5MGhxoUAObu4b242WTgl0AW8Dt3v6/R80OA6UBvIA58yd1Lg+d+BlxEYlzlNeAWP0Cx+fn5rst9iDSNtZU7+enLS3hp0XoGdOvAnReNYfLR/UL/QjoY7s6Li9bz05eXsH7LLj477ghuv2AM/bq2j7q0UNTWOc8UreH+WcvYvGM3V00czLfPG0WPTjmH9bpmNs/d85MdF1rLwsyygIeAzwClwFwzm+nuixsc9gDwuLs/ZmZnA/cCXzazk4FTgGOD4/4KnAG8GVa9IgK79tTy8Fsl/OatlQDceu5I/vWM4bRvmxVxZZ9mZlxy3BGcM6YPv3nzHzz8dgl/WbyRG886iutPHZaWNR+qolVx7ppZTPG6rUwc2oO7puQx9oiuzVpDmN1QE4GV7l4CYGZPA5cCDcMiD/i3YHsO8Hyw7UB7IAcwoC2wMcRaRTKau/Pqhxu456UlrK3cyUXH9uf7F45hQLcOUZeWVMecbL59XmLs4p6XFnP/rGU8U7SGOy/K49wxfdKqNXSwNmzZxX2vLOH5hevol9ueX105nkuO7R/JzxRmWAwA1jR4XApManTM+8BlJLqqPgd0MbOe7v53M5sDrCcRFg+6+5IQaxXJWMs2JAZK//aPCkb368JTXz2Rk47sGXVZB21Qj448/OV8/rpiE1NfKOarjxdx+sje/OjiPI7q0znq8g7Krj21PPrXGA/NWUlNnfPNs4/i62ceScec6IaZox7gvg140MyuBd4G1gK1ZnYUMAYYGBz3mpmd5u7vNDzZzG4AbgAYPDjcaWMirc2WHXv4+evLeeK9xEDpjy8dy5UTB5Od1bKXX506ohcv33IaT/x9NT9/fTmTf/E21548lJvPHUFu+/SeauvuvL6kjHteWszqih2cP7YvP7gwj8E9O0ZdWqhhsRYY1ODxwGDfXu6+jkTLAjPrDHze3SvN7KvAe+5eFTz3CnAS8E6j8x8BHoHEAHdIP4dIq1Jb5/xx7hrun7WULTv3cNWkwXz7M6PofpgDpemkbVYbrjt1GFPGHcEDs5bx6Lsxnl+4lu+eP5p/mjCQNmk41XZlWRXTXlzM28vLOapPZ564fiKnjUh6T6JmE9psKDPLBpYD55AIibnAVe5e3OCYXkDc3evM7CdArbv/yMwuB74KTCbRDfUq8At3f2F/76fZUCLJzV0V5+76gdJhPbj7krHkHZEbdVmh+6B0C3fN/JD5H1Vy3MCu3DVlLMcP7h51WQBs3bWHX72+ghl/W0WHnCxuPXckXz5pCG2bqYUX+Wwod68xs5uAWSSmzk5392IzmwYUuftM4EzgXjNzEt1QNwanPwucDXxAYrD71QMFhYgc2PotO7n35aXMfH8d/bu259dXjufiiAZKo3DMwK78+esn838L1/HTl5dw2X/9jcuOH8Dtk0fTJzeaqbZ1dc6z80r52aylVGzfzeX5g7jt/FH06twuknqSCXWdRXNSy0Lk0+oHSh+cvZJad752+nC+FvFAadSqqmt4aM5KHn0nRtss45vnjOArpwylXXbzTbWd/9Fmps4s5v3SLRw/uBtTpxzNMQObdypsvVRbFgoLkVbI3Xlt8UbueWkJH8V3MHlsP35w0RgG9Yh+oDRdrNq0nXteWszrS8oY1qsTP7o4j7NG9wn1Pcu27uK+V5fy3Py19OnSjjsuHM1nxw2ItIWnsBDJUCvLtjH1hcW8s2ITI/p05q5LxnLqiF5Rl5W23lxWxrQXF1NSvp2zRvXmhxfnMbx30061ra6p5X/eXcWv31jBnlrn+tOGceNZR9G5XfQtPIWFSIbZumsPv3x9BY8FA6X/9pmRfOnE5hsobcl219Tx2N9W8cs3VlBdU8t1pwzjprOPoksTTLWdszQRRrFN2zlndB/uvDiPYb06NUHVTUNhIZIh6uqcP81bw89eXUZ8x26uOGEwt503kp5pOlCazsq27eL+V5fxp3ml9O7Sju9NHs1l4wcc0lTbkvIqfvziYuYsK2d4r0788JI8zhoVbjfXoVBYiGSAeas3M/WFYhaVbmHCkO5MnTKWowdEM1DamixcU8ndM4tZuKaScYO6MXXKWI4b1C2lc6uqa/j17BVM/2uMdtlZ3HLOCK45eSg52enZwlNYiLRiG7fu4t9fWcpzC9bSN7cd379wDFOOOyJjpsI2h7o657kFa7nvlaVsqqrmnycM5LuTR9O7y75bbHV1zv8uWMt9ry6lfFs1/zRhIN+dPIo+XdL7KriRr7MQkaZXXVPL9L+u4sHZiYHSb5x5JDeedRSd0mCgtLVp08b4pwkDOX9sXx6cvZLp78Z49cMN3LyPlsL7ayq5K2iJHDeoG498eQLj02TRX1NRy0KkhZi9dCPTXljMqoodnDumLz+8eAxDeqbPQGlr94/yKqa9sJi3lpczvHdiqu3YI7py/6ylPFNUSq/O7fje5FF8/vj0vJzI/qgbSg7JotJK7p+1jNq61vH/RWuxbVcNH6zdsvdL6sw0HCjNBO7O7KVl/PjFRGjnZLehrs657tRhfLOJZk81N3VDySF5eu4aCmJxjotoNansW4ecLO68aAxXn5S+A6WZwMw4Z0xfTh3RixnvruIf5VXccPqRLe4S6IdCYSGfUBiLc9Lwnjx23cSoSxFJW+2ys/jXM46MuoxmpV9RZK9NVdWsLKti0vDMuPG9iKROYSF7zY3FAZg0rOXdJU1EwqWwkL0KYnHat23DMVrUJSKNKCxkr4JYnAlDumsAVUQ+Rd8KAiTux7x0w1YmDlUXlIh8msJCAChcFccdDW6LyD4pLASAwlgFOVltGJfixdJEJLMoLARIjFeMG9SN9m2b79aSItJyKCyEquoaPly7RV1QIrJfCguhaFWcOoeJwxQWIrJvCguhMBYnu40xYUjruqSyiDQdhYVQEItz9ICudMzRpcJEZN8UFhlu5+5aFpVWarxCRA5IYZHhFny0mT21ziSNV4jIASgsMtx7sThtDPKHKixEZP8UFhmuMFZB3hG55LbAO3yJSPNRWGSw6ppaFnxUqetBiUhSCosMtqh0C9U1dRrcFpGkFBYZrKCkAoATNF4hIkkoLDJYQSzOqL5d6NEpJ+pSRCTNKSwy1J7aOuat3qxLfIhIShQWGap43VZ27K7VeIWIpERhkaHqxyvUshCRVCgsMlRBLM7wXp3o06V91KWISAugsMhAtXXO3FVxdUGJSMpCDQszm2xmy8xspZndvo/nh5jZG2a2yMzeNLOBDZ4bbGZ/MbMlZrbYzIaGWWsmWbJ+K9t21agLSkRSFlpYmFkW8BBwAZAHXGlmeY0OewB43N2PBaYB9zZ47nHgfncfA0wEysKqNdMUxuIATBqmldsikpowWxYTgZXuXuLuu4GngUsbHZMHzA6259Q/H4RKtru/BuDuVe6+I8RaM0pBrIKB3TtwRLcOUZciIi1EmGExAFjT4HFpsK+h94HLgu3PAV3MrCcwEqg0s+fMbIGZ3R+0VOQwuTuFsbhaFSJyUKIe4L4NOMPMFgBnAGuBWiAbOC14/gRgOHBt45PN7AYzKzKzovLy8mYruiVbUVbF5h17NLgtIgclzLBYCwxq8HhgsG8vd1/n7pe5+3jgB8G+ShKtkIVBF1YN8DxwfOM3cPdH3D3f3fN79+4d1s/RqtSvr9DNjkTkYIQZFnOBEWY2zMxygCuAmQ0PMLNeZlZfwx3A9AbndjOz+gQ4G1gcYq0ZoyAWp19uewb36Bh1KSLSgoQWFkGL4CZgFrAEeMbdi81smplNCQ47E1hmZsuBvsBPgnNrSXRBvWFmHwAG/DasWjOFu1MQizNxWA/MLOpyRKQFyQ7zxd39ZeDlRvt+1GD7WeDZ/Zz7GnBsmPVlmlUVOyjfVq3xChE5aFEPcEsz0niFiBwqhUUGKYzF6dU5hyN7d466FBFpYRQWGUTjFSJyqBQWGaJ08w7WVu5kom6hKiKHQGGRIQpKgutBDdfKbRE5eAqLDFEQq6Brh7aM6tsl6lJEpAVSWGSIwlicE4b2oE0bjVeIyMFTWGSAjVt3sapih6bMisghU1hkgIL6+1doMZ6IHCKFRQYoKKmgc7ts8vrnRl2KiLRQCosMUBiLM2FId7Kz9NctIodG3x6tXEVVNSvKqtQFJSKHRWHRyn18v22FhYgcOoVFK1cQi9O+bRuOGdAt6lJEpAVTWLRyBbE4xw/uTk62/qpF5NDpG6QV27JjD0s3bGXSMF3iQ0QOj8KiFZu7Ko47TNR4hYgcJoVFK1a4Kk5OVhvGD9Z4hYgcHoVFK1ZQUsG4Qd1o3zYr6lJEpIVTWLRSVdU1fLhuq7qgRKRJKCxaqXmrN1Nb51qMJyJNQmHRShWUVJDVxjh+cPeoSxGRViBpWJjZN81M3zgtTGEszjEDutKpXXbUpYhIK5BKy6IvMNfMnjGzyWamu+ekuZ27a3m/tFKX+BCRJpM0LNz9TmAE8ChwLbDCzH5qZkeGXJscogVrNrOnVuMVItJ0UhqzcHcHNgR/aoDuwLNm9rMQa5NDVFASxwzyhyosRKRpJO3QNrNbgKuBTcDvgO+4+x4zawOsAL4bbolysApjcfL655Lbvm3UpYhIK5HK6GcP4DJ3X91wp7vXmdnF4ZQlh6q6ppb5H23mi5OGRF2KiLQiqXRDvQLE6x+YWa6ZTQJw9yVhFSaHZlHpFqpr6rQYT0SaVCph8RugqsHjqmCfpKH6mx0pLESkKaUSFhYMcAOJ7idS676SCLxXUsHIvp3p0Skn6lJEpBVJJSxKzOxmM2sb/LkFKAm7MDl4NbV1zFu9WfevEJEml0pYfA04GVgLlAKTgBvCLEoOzYfrtrJjd63WV4hIk0vaneTuZcAVzVCLHKbCWAWg8QoRaXqprLNoD1wPjAXa1+939+tCrEsOQUFJnOG9OtGnS/vkB4uIHIRUuqGeAPoB5wNvAQOBbWEWJQevts4pXBVXq0JEQpFKWBzl7j8Etrv7Y8BFJMYtkgouPLjMzFaa2e37eH6Imb1hZovM7E0zG9jo+VwzKzWzB1N5v0y2dMNWtu2q0XiFiIQilbDYE/y30syOBroCfZKdZGZZwEPABUAecKWZ5TU67AHgcXc/FpgG3Nvo+R8Db6dQY8YrKKlfX6GZUCLS9FIJi0eC+1ncCcwEFgP/nsJ5E4GV7l7i7ruBp4FLGx2TB8wOtuc0fN7MJpC4PPpfUnivjFcYizOwewcGdOsQdSki0godMCyCiwVudffN7v62uw939z7u/nAKrz0AWNPgcWmwr6H3gcuC7c8BXcysZ/C+/wHcltJPkeHcE+MVWl8hImE5YFgEq7XDvKrsbcAZZrYAOIPEWo5a4BvAy+5eeqCTzewGMysys6Ly8vIQy0xvK8uqiG/frZsdiUhoUrlsx+tmdhvwR2B7/U53j+//FCDxxT+oweOBwb693H0dQcvCzDoDn3f3SjM7CTjNzL4BdAZyzKzK3W9vdP4jwCMA+fn5ToZ6L7gelAa3RSQsqYTF5cF/b2ywz4HhSc6bC4wws2EkQuIK4KqGB5hZLyAetGDuAKYDuPsXGxxzLZDfOCjkY4WxOH1z2zG4R8eoSxGRViqVFdzDDuWF3b3GzG4CZgFZwHR3LzazaUCRu88EzgTuNTMnMevpxv2+oOyTu1NQUsGJw3ui26OLSFhSWcF99b72u/vjyc5195eBlxvt+1GD7WeBZ5O8xgxgRrL3ylSrKnZQtq1ai/FEJFSpdEOd0GC7PXAOMB9IGhYSvvrrQZ2o8QoRCVEq3VDfbPjYzLqRWDMhaaCgJE7PTjkc2btz1KWISCuWyqK8xrYDhzSOIU2vIJa4HpTGK0QkTKmMWbxAYvYTJMIlD3gmzKIkNaWbd7C2cidfPU3ZLSLhSmXM4oEG2zXA6mSL5aR5fHy/ba3cFpFwpRIWHwHr3X0XgJl1MLOh7r4q1MokqYKSOLntsxndr0vUpYhIK5fKmMWfgLoGj2uDfRKx+vtXtGmj8QoRCVcqYZEdXDUWgGA7J7ySJBVlW3cR27RdFw8UkWaRSliUm9mU+gdmdimwKbySJBW6HpSINKdUxiy+BjzZ4G51pcA+V3VL8ymMVdC5XTZ5/XOjLkVEMkAqi/L+AZwYXBUWd68KvSpJqqAkzoQh3cnOOpSlMiIiByfpN42Z/dTMurl7lbtXmVl3M7unOYqTfauoqmZFWZWuByUizSaVX0svcPfK+gfuvhm4MLySJJm5qxLjFboelIg0l1TCIsvM2tU/MLMOQLsDHC8hK4jFad+2DccM6BZ1KSKSIVIZ4H4SeMPM/gcw4FrgsTCLkgMrKIlz/ODu5GRrvEJEmkfSbxt3/3fgHmAMMIrEzYyGhFyX7MeWHXtYsmGr1leISLNK9VfTjSQuJvjPwNnAktAqkgMqWh3HHQ1ui0iz2m83lJmNBK4M/mwC/giYu5/VTLXJPhTE4uRktWH8YI1XiEjzOdCYxVLgHeBid18JYGa3NktVsl8FsTjHDepK+7ZZUZciIhnkQN1QlwHrgTlm9lszO4fEALdEpKq6hg/XbtF4hYg0u/2Ghbs/7+5XAKOBOcC3gD5m9hszO6+5CpSPzV+9mdo613iFiDS7VGZDbXf3P7j7JcBAYAHwvdArk08piFWQ1caYMKR71KWISIY5qIn67r7Z3R9x93PCKkj2rzAW55gBXenULpXlMSIiTUerulqIXXtqeX/NFiapC0pEIqCwaCHmf7SZ3bV1un+FiERCYdFCFMbimMGEIQoLEWl+CosWoqAkTl7/XLp2aBt1KSKSgRQWLcDumjrmf7RZU2ZFJDIKixZgUWkl1TV1WownIpFRWLQABbHEzY7UshCRqCgsWoCCWJyRfTvTo1NO1KWISIZSWKS5mto65q2KqwtKRCKlsEhzxeu2sn13rbqgRCRSCos0VxCrANDKbRGJlMIizRXG4gzr1Yk+ue2jLkVEMpjCIo3V1jmFsbhaFSISuVDDwswmm9kyM1tpZrfv4/khZvaGmS0yszfNbGCwf5yZ/d3MioPnLg+zznS1bMM2tu6q0fWgRCRyoYWFmWUBDwEXAHnAlWaW1+iwB4DH3f1YYBpwb7B/B3C1u48FJgO/MLOMu+l0/XjFRM2EEpGIhdmymAisdPcSd98NPA1c2uiYPGB2sD2n/nl3X+7uK4LtdUAZ0DvEWtNSYSzOwO4dGNCtQ9SliEiGCzMsBgBrGjwuDfY19D6Je30DfA7oYmaf+DXazCYCOcA/QqozLbknxis0ZVZE0kHUA9y3AWeY2QLgDGAtUFv/pJn1B54AvuLudY1PNrMbzKzIzIrKy8ubq+ZmsbKsiortuzlRXVAikgbCDIu1wKAGjwcG+/Zy93Xufpm7jwd+EOyrBDCzXOAl4Afu/t6+3iC4xWu+u+f37t26eql0PSgRSSdhhsVcYISZDTOzHOAKYGbDA8ysl5nV13AHMD3YnwP8L4nB72dDrDFtFcTi9M1tx5CeHaMuRUQkvLBw9xrgJmAWsAR4xt2LzWyamU0JDjsTWGZmy4G+wE+C/V8ATgeuNbOFwZ9xYdWabhLjFRVMGtYTM4u6HBERssN8cXd/GXi50b4fNdh+FvhUy8Hdfw/8Psza0tnqih1s3FqtLigRSRtRD3DLPhQG4xUnajGeiKQJhUUaei9WQc9OORzZu3PUpYiIAAqLtFS/vkLjFSKSLhQWaWZt5U5KN+/UeIWIpBWFRZopKKm/f4UW44lI+lBYpJnCWJzc9tmM7tcl6lJERPZSWKSZgmC8ok0bjVeISPpQWKSRsq27iG3ari4oEUk7Cos0outBiUi6UlikkcJYnE45WYw9IjfqUkREPkFhkUYKYhVMGNqD7Cz9tYhIetG3UpqIb9/N8o1VTFIXlIikIYVFGnB3fvbqUgBOG9Er4mpERD5NYZEGHn67hKfnruGms47i2IHdoi5HRORTFBYRe+WD9dz3ylIuPrY///aZkVGXIyKyTwqLCC1cU8m3/riQ4wd344F/Pk4L8UQkbSksIlK6eQf/8lgRfXLb8dur82nfNivqkkRE9ivUO+XJvm3dtYfrZsyluqaWp2+YRM/O7aIuSUTkgNSyaGZ7auu48cn5lJRv5+EvTeCoPrpgoIikP7UsmpG7c9fMYt5ZsYmfff5YTj5K02RFpGVQy6IZ/e6dGH8o+Iivn3kkXzhhUNTliIikTGHRTGYVb+CnryzhomP6853zRkVdjojIQVFYNINFpZXc8vQCjhvYjf/4gqbIikjLo7AI2drKnVz/WBG9OmuKrIi0XBrgDtG2XXu4fsZcdu2u5Q//MoneXTRFVkRaJoVFSGpq67jpDwtYUVbFjK+cwIi+miIrIi2XuqFC4O5MfWExby0v557PHs1pI3pHXZKIyGFRWIRg+rureOK91fzr6cO5cuLgqMsRETlsCosm9trijdzz0mImj+3H9yaPjrocEZEmobBoQh+u3cLNTy3g2AFd+fnl4zRFVkRaDYVFE1m/ZSfXPzaXHp1y+O01+XTI0RRZEWk9NBuqCVRV13DdjCK2V9fy569Pok+X9lGXJCLSpBQWh6mmto6bn1rA8o3bmH7tCYzqpymyItL6qBvqMN3z0hJmLy1j6pSxnDFSU2RFpHVSWByGGe/GmPG3VfzLqcP40olDoi5HRCQ0CotDNHvpRqa9uJjP5PXljgvHRF2OiEioQg0LM5tsZsvMbKWZ3b6P54eY2RtmtsjM3jSzgQ2eu8bMVgR/rgmzzoNVvG4LN/1hAWOP6MovrxhHlqbIikgrF1pYmFkW8BBwAZAHXGlmeY0OewB43N2PBaYB9wbn9gDuAiYBE4G7zKx7WLUejA1bdnH9jCK6dmjL767Jp2OO5giISOsXZstiIrDS3UvcfTfwNHBpo2PygNnB9pwGz58PvObucXffDLwGTA6x1pRsr67h+sfmsm3XHqZfewJ9czVFVkQyQ5hhMQBY0+BxabCvofeBy4LtzwFdzKxniuc2q9o655anF7Bk/VYevOp4xvTPjbIcEZFmFfUA923AGWa2ADgDWAvUpnqymd1gZkVmVlReXh5WjQD85KUlvL6kjLunjOWs0X1CfS8RkXQTZlisBQY1eDww2LeXu69z98vcfTzwg2BfZSrnBsc+4u757p7fu3d4axye+Psqpr8b4yunDOXqk4aG9j4iIukqzLCYC4wws2FmlgNcAcxseICZ9TKz+hruAKYH27OA88ysezCwfV6wr9nNWVbGXTOLOXdMH+68qPH4vIhIZggtLNy9BriJxJf8EuAZdy82s2lmNiU47ExgmZktB/oCPwnOjQM/JhE4c4Fpwb5mtWT9Vm56cj5j+ufyyyvGa4qsiGQsc/eoa2gS+fn5XlRU1GSvV7Z1F5996F3qHJ6/8RT6ddXMJxFpfcxsnrvnJztOiwT2YcfuGq5/rIjKnXv409dOUlCISMaLejZU2qmtc7719EKK123h11eOZ+wRXaMuSUQkcgqLRu57ZQl/WbyRH16cxzlj+kZdjohIWlBYNPBkwWp++06Ma04awldOGRZ1OSIiaUNhEXhreTk/+r9izhrVmx9erCmyIiINKSyAZRu2ceOT8xnZtwu/vup4srP0sYiINJTx34pl23Zx3Yy5dMzJ4tFr8uncThPEREQay/hvxnZZWYzu14VvnTuSI7p1iLocEZG0lPFh0bVjWx699oSoyxARSWsZ3w0lIiLJKSxERCQphYWIiCSlsBARkaQUFiIikpTCQkREklJYiIhIUgoLERFJqtXcKc/MyoHVh/ESvYBNTVROS6fP4pP0eXySPo+PtYbPYoi79052UKsJi8NlZkWp3FowE+iz+CR9Hp+kz+NjmfRZqBtKRESSUliIiEhSCouPPRJ1AWlEn8Un6fP4JH0eH8uYz0JjFiIikpRaFiIiklTGh4WZTTazZWa20sxuj7qeKJnZIDObY2aLzazYzG6JuqaomVmWmS0wsxejriVqZtbNzJ41s6VmtsTMToq6piiZ2a3Bv5MPzewpM2sfdU1hyuiwMLMs4CHgAiAPuNLM8qKtKlI1wLfdPQ84Ebgxwz8PgFuAJVEXkSZ+Cbzq7qOB48jgz8XMBgA3A/nufjSQBVwRbVXhyuiwACYCK929xN13A08Dl0ZcU2Tcfb27zw+2t5H4MhgQbVXRMbOBwEXA76KuJWpm1hU4HXgUwN13u3tltFVFLhvoYGbZQEdgXcT1hCrTw2IAsKbB41Iy+MuxITMbCowHCqKtJFK/AL4L1EVdSBoYBpQD/xN0y/3OzDpFXVRU3H0t8ADwEbAe2OLuf4m2qnBleljIPphZZ+DPwLfcfWvU9UTBzC4Gytx9XtS1pIls4HjgN+4+HtgOZOwYn5l1J9ELMQw4AuhkZl+KtqpwZXpYrAUGNXj3AdpIAAAEp0lEQVQ8MNiXscysLYmgeNLdn4u6ngidAkwxs1UkuifPNrPfR1tSpEqBUnevb2k+SyI8MtW5QMzdy919D/AccHLENYUq08NiLjDCzIaZWQ6JAaqZEdcUGTMzEn3SS9z9P6OuJ0rufoe7D3T3oST+v5jt7q36N8cDcfcNwBozGxXsOgdYHGFJUfsIONHMOgb/bs6hlQ/4Z0ddQJTcvcbMbgJmkZjNMN3diyMuK0qnAF8GPjCzhcG+77v7yxHWJOnjm8CTwS9WJcBXIq4nMu5eYGbPAvNJzCJcQCtfza0V3CIiklSmd0OJiEgKFBYiIpKUwkJERJJSWIiISFIKCxERSUphIS2SmdWa2UIze9/M5ptZky6IMrPvN3r8tyZ63Xwz+1WwfWZT1m1mQ83sqn29l8jh0tRZaZHMrMrdOwfb55NYD3JGGK8fFjO7G6hy9wcO4pxsd6/Zz3NnAre5+8VNU6HIx9SykNYgF9gMiVXoZnZ/cI+BD8zs8iT7+5vZ20Er5UMzO83M7iNxNdGFZvZkcFxV8N8zzezNBvd1eDJYwYuZXRjsm2dmv9rXPTCC818MLtT4NeDW4H1OM7PeZvZnM5sb/DklOOduM3vCzN4FnghaEO8ELaqGrar7gNOC17u1/r2C1+hhZs+b2SIze8/Mjm3w2tODn6nEzG4O5W9IWryMXsEtLVqHYJV5e6A/cHaw/zJgHIn7LfQC5prZ2ySu27Ov/VcBs9z9J8H9TTq6+ztmdpO7j9vPe48HxpK4JPW7wClmVgQ8DJzu7jEze+pAxbv7KjP7bxq0LMzsD8DP3f2vZjaYxJUFxgSn5AGnuvtOM+sIfMbdd5nZCOApIJ/Ehf32tiyClka9qcACd/+smZ0NPB58HgCjgbOALsAyM/tNcL0jkb0UFtJS7az/MrfEHdseN7OjgVOBp9y9FthoZm8BJxxg/1xgenABxefdfeG+3qyRQncvDd57ITAUqAJK3D0WHPMUcMNB/kznAnlBQwUg1xJXAAaY6e47g+22wINmNg6oBUam8NqnAp8HcPfZZtbTzHKD515y92qg2szKgL4kLhwospfCQlo8d/+7mfUCeh/CuW+b2ekkbnI0w8z+090fT3JadYPtWpru31Eb4ER339VwZxAe2xvsuhXYSKKV1Ab4xPGHIKyfR1oRjVlIi2dmo0lcCLICeAe43BL3zu5N4u5uhfvbb2ZDgI3u/lsSd8Srv+z2nqC1kaplwPBgLALg8hTO2Uai66feX0hcrK/+59pfN1hXYL2715G48GPWfl6voXeALwaveyawKVPvVSKHRr9BSEtVP2YBYMA17l5rZv8LnAS8DzjwXXffcID91wDfMbM9JLqSrg5e8xFgkZnNd/cvJismGEv4BvCqmW0n0b2VzAvAs2Z2KYmQuBl4yMwWkfi3+TaJQfDG/gv4s5ldDbzKx62ORUCtmb0PzCBxJdR6d5PoblsE7ACuSaE+kb00dVakiZhZZ3evCmZHPQSscPefR12XSFNQN5RI0/lq0NopJtFV9HDE9Yg0GbUsREQkKbUsREQkKYWFiIgkpbAQEZGkFBYiIpKUwkJERJJSWIiISFL/D2oE/dRJZaodAAAAAElFTkSuQmCC\n", 240 | "text/plain": [ 241 | "
" 242 | ] 243 | }, 244 | "metadata": { 245 | "needs_background": "light" 246 | }, 247 | "output_type": "display_data" 248 | } 249 | ], 250 | "source": [ 251 | "s_hist = list(ada.staged_score(testX, testY)) # посмотрим на историю обучения\n", 252 | "import matplotlib.pyplot as plt \n", 253 | "%matplotlib inline \n", 254 | "plt.plot(s_hist)\n", 255 | "plt.xlabel('Boosting iteration')\n", 256 | "plt.ylabel('Accuracy')" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 60, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "AdaBoost (100 tree) score: 0.9736842105263158\n", 269 | "Overfit!\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "ada = AdaBoostClassifier(base_estimator=None, n_estimators=100, random_state=42).fit(trainX, trainY)\n", 275 | "print('AdaBoost (100 tree) score:', ada.score(testX, testY))\n", 276 | "print(\"Overfit!\")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 78, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "from sklearn.linear_model import LogisticRegression\n", 286 | "clf = LogisticRegression(random_state=42, solver='liblinear')" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 79, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "AdaBoost (10 logr) score: 0.956140350877193\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "ada = AdaBoostClassifier(base_estimator=clf, n_estimators=10, random_state=42).fit(trainX, trainY)\n", 304 | "print('AdaBoost (10 logr) score:', ada.score(testX, testY))" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 80, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "text": [ 316 | "AdaBoost (100 logr) score: 0.9649122807017544\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "ada = AdaBoostClassifier(base_estimator=clf, n_estimators=100, random_state=42).fit(trainX, trainY)\n", 322 | "print('AdaBoost (100 logr) score:', ada.score(testX, testY))" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 81, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "name": "stdout", 332 | "output_type": "stream", 333 | "text": [ 334 | "AdaBoost (1000 logr) score: 0.9649122807017544\n" 335 | ] 336 | } 337 | ], 338 | "source": [ 339 | "ada = AdaBoostClassifier(base_estimator=clf, n_estimators=1000, random_state=42).fit(trainX, trainY)\n", 340 | "print('AdaBoost (1000 logr) score:', ada.score(testX, testY))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## Gradient Boosting" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 85, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "from sklearn.ensemble import GradientBoostingClassifier" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 86, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "name": "stdout", 366 | "output_type": "stream", 367 | "text": [ 368 | "Gradient Boosting (10 tree) score: 0.956140350877193\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "gb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=10, random_state=42).fit(trainX, trainY)\n", 374 | "print('Gradient Boosting (10 tree) score:', gb.score(testX, testY))" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 87, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "name": "stdout", 384 | "output_type": "stream", 385 | "text": [ 386 | "Gradient Boosting (100 tree) score: 0.956140350877193\n" 387 | ] 388 | } 389 | ], 390 | "source": [ 391 | "gb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, random_state=42).fit(trainX, trainY)\n", 392 | "print('Gradient Boosting (100 tree) score:', gb.score(testX, testY))" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "## XGBoost" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 90, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "Collecting xgboost\n", 412 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/2e/bd/ad3a963b630fa3ee72d1a672fd207263fa0a18113688273afe8298293535/xgboost-0.82.tar.gz (665kB)\n", 413 | "\u001b[K 100% |████████████████████████████████| 675kB 3.4MB/s ta 0:00:011\n", 414 | "\u001b[?25hRequirement already satisfied: numpy in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from xgboost) (1.16.1)\n", 415 | "Requirement already satisfied: scipy in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from xgboost) (1.2.0)\n", 416 | "Building wheels for collected packages: xgboost\n", 417 | " Building wheel for xgboost (setup.py) ... \u001b[?25ldone\n", 418 | "\u001b[?25h Stored in directory: /Users/alex/Library/Caches/pip/wheels/5d/ea/e9/4aef52d7294badf6bc26728a73d3e001c38e07f7dc3722c5eb\n", 419 | "Successfully built xgboost\n", 420 | "Installing collected packages: xgboost\n", 421 | "Successfully installed xgboost-0.82\n", 422 | "\u001b[33mYou are using pip version 19.0.1, however version 19.1 is available.\n", 423 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "!pip install xgboost" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 91, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "from xgboost import XGBClassifier" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 93, 443 | "metadata": {}, 444 | "outputs": [ 445 | { 446 | "name": "stdout", 447 | "output_type": "stream", 448 | "text": [ 449 | "XGB (10 tree) score: 0.956140350877193\n" 450 | ] 451 | } 452 | ], 453 | "source": [ 454 | "xgb = XGBClassifier(learning_rate=0.1, n_estimators=10, n_jobs=-1).fit(trainX, trainY)\n", 455 | "print('XGB (10 tree) score:', xgb.score(testX, testY))" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 96, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "XGB (100 tree) score: 0.9649122807017544\n" 468 | ] 469 | } 470 | ], 471 | "source": [ 472 | "xgb = XGBClassifier(learning_rate=0.1, n_estimators=100, n_jobs=-1).fit(trainX, trainY)\n", 473 | "print('XGB (100 tree) score:', xgb.score(testX, testY))" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 95, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "name": "stdout", 483 | "output_type": "stream", 484 | "text": [ 485 | "XGB (1000 tree) score: 0.9736842105263158\n" 486 | ] 487 | } 488 | ], 489 | "source": [ 490 | "xgb = XGBClassifier(learning_rate=0.1, n_estimators=1000, n_jobs=-1).fit(trainX, trainY)\n", 491 | "print('XGB (1000 tree) score:', xgb.score(testX, testY))" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 98, 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "name": "stdout", 501 | "output_type": "stream", 502 | "text": [ 503 | "XGB (100000 tree) score: 0.9736842105263158\n" 504 | ] 505 | } 506 | ], 507 | "source": [ 508 | "xgb = XGBClassifier(learning_rate=0.1, n_estimators=100000, n_jobs=-1).fit(trainX, trainY)\n", 509 | "print('XGB (100000 tree) score:', xgb.score(testX, testY))" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "## CatBoost" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 102, 522 | "metadata": {}, 523 | "outputs": [ 524 | { 525 | "name": "stdout", 526 | "output_type": "stream", 527 | "text": [ 528 | "Collecting catboost\n", 529 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/97/81/0af439a6357933bd4ae2c1bded77bebd42198da92e8f0c132e6b60783bf1/catboost-0.14.2-cp37-none-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (8.0MB)\n", 530 | "\u001b[K 100% |████████████████████████████████| 8.0MB 1.9MB/s ta 0:00:011\n", 531 | "\u001b[?25hRequirement already satisfied: numpy>=1.11.1 in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from catboost) (1.16.1)\n", 532 | "Collecting enum34 (from catboost)\n", 533 | " Downloading https://files.pythonhosted.org/packages/af/42/cb9355df32c69b553e72a2e28daee25d1611d2c0d9c272aa1d34204205b2/enum34-1.1.6-py3-none-any.whl\n", 534 | "Requirement already satisfied: six in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from catboost) (1.12.0)\n", 535 | "Requirement already satisfied: pandas>=0.19.1 in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from catboost) (0.24.1)\n", 536 | "Collecting graphviz (from catboost)\n", 537 | " Downloading https://files.pythonhosted.org/packages/1f/e2/ef2581b5b86625657afd32030f90cf2717456c1d2b711ba074bf007c0f1a/graphviz-0.10.1-py2.py3-none-any.whl\n", 538 | "Requirement already satisfied: python-dateutil>=2.5.0 in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from pandas>=0.19.1->catboost) (2.8.0)\n", 539 | "Requirement already satisfied: pytz>=2011k in /Users/alex/Programming/pytenv/py3/lib/python3.7/site-packages (from pandas>=0.19.1->catboost) (2018.9)\n", 540 | "Installing collected packages: enum34, graphviz, catboost\n", 541 | "Successfully installed catboost-0.14.2 enum34-1.1.6 graphviz-0.10.1\n", 542 | "\u001b[33mYou are using pip version 19.0.1, however version 19.1 is available.\n", 543 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 544 | ] 545 | } 546 | ], 547 | "source": [ 548 | "!pip install catboost" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": 103, 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [ 557 | "from catboost import CatBoostClassifier" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 107, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "name": "stdout", 567 | "output_type": "stream", 568 | "text": [ 569 | "Learning rate set to 0.5\n", 570 | "0:\tlearn: 0.2269327\ttotal: 83.9ms\tremaining: 755ms\n", 571 | "1:\tlearn: 0.1709999\ttotal: 129ms\tremaining: 514ms\n", 572 | "2:\tlearn: 0.1230784\ttotal: 173ms\tremaining: 403ms\n", 573 | "3:\tlearn: 0.0986998\ttotal: 242ms\tremaining: 363ms\n", 574 | "4:\tlearn: 0.0863845\ttotal: 287ms\tremaining: 287ms\n", 575 | "5:\tlearn: 0.0832428\ttotal: 331ms\tremaining: 221ms\n", 576 | "6:\tlearn: 0.0744292\ttotal: 374ms\tremaining: 160ms\n", 577 | "7:\tlearn: 0.0587910\ttotal: 417ms\tremaining: 104ms\n", 578 | "8:\tlearn: 0.0492131\ttotal: 473ms\tremaining: 52.6ms\n", 579 | "9:\tlearn: 0.0414105\ttotal: 539ms\tremaining: 0us\n", 580 | "CB (10 tree) score: 0.9649122807017544\n" 581 | ] 582 | } 583 | ], 584 | "source": [ 585 | "cb = CatBoostClassifier(n_estimators=10, random_state=42).fit(trainX, trainY)\n", 586 | "print('CB (10 tree) score:', cb.score(testX, testY))" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 108, 592 | "metadata": {}, 593 | "outputs": [ 594 | { 595 | "name": "stdout", 596 | "output_type": "stream", 597 | "text": [ 598 | "Learning rate set to 0.104314\n", 599 | "0:\tlearn: 0.5347815\ttotal: 92ms\tremaining: 9.11s\n", 600 | "1:\tlearn: 0.4518249\ttotal: 138ms\tremaining: 6.75s\n", 601 | "2:\tlearn: 0.3641087\ttotal: 182ms\tremaining: 5.88s\n", 602 | "3:\tlearn: 0.3065524\ttotal: 251ms\tremaining: 6.03s\n", 603 | "4:\tlearn: 0.2526143\ttotal: 298ms\tremaining: 5.66s\n", 604 | "5:\tlearn: 0.2198175\ttotal: 341ms\tremaining: 5.35s\n", 605 | "6:\tlearn: 0.1959248\ttotal: 386ms\tremaining: 5.13s\n", 606 | "7:\tlearn: 0.1775122\ttotal: 428ms\tremaining: 4.92s\n", 607 | "8:\tlearn: 0.1605013\ttotal: 484ms\tremaining: 4.89s\n", 608 | "9:\tlearn: 0.1490967\ttotal: 543ms\tremaining: 4.89s\n", 609 | "10:\tlearn: 0.1379241\ttotal: 606ms\tremaining: 4.9s\n", 610 | "11:\tlearn: 0.1246053\ttotal: 654ms\tremaining: 4.8s\n", 611 | "12:\tlearn: 0.1143870\ttotal: 725ms\tremaining: 4.86s\n", 612 | "13:\tlearn: 0.1046069\ttotal: 775ms\tremaining: 4.76s\n", 613 | "14:\tlearn: 0.1008537\ttotal: 825ms\tremaining: 4.67s\n", 614 | "15:\tlearn: 0.0947220\ttotal: 876ms\tremaining: 4.6s\n", 615 | "16:\tlearn: 0.0898061\ttotal: 923ms\tremaining: 4.5s\n", 616 | "17:\tlearn: 0.0849487\ttotal: 977ms\tremaining: 4.45s\n", 617 | "18:\tlearn: 0.0819844\ttotal: 1.02s\tremaining: 4.36s\n", 618 | "19:\tlearn: 0.0782575\ttotal: 1.07s\tremaining: 4.27s\n", 619 | "20:\tlearn: 0.0760236\ttotal: 1.11s\tremaining: 4.18s\n", 620 | "21:\tlearn: 0.0718845\ttotal: 1.16s\tremaining: 4.1s\n", 621 | "22:\tlearn: 0.0688764\ttotal: 1.25s\tremaining: 4.17s\n", 622 | "23:\tlearn: 0.0655609\ttotal: 1.33s\tremaining: 4.22s\n", 623 | "24:\tlearn: 0.0633699\ttotal: 1.44s\tremaining: 4.32s\n", 624 | "25:\tlearn: 0.0612275\ttotal: 1.55s\tremaining: 4.42s\n", 625 | "26:\tlearn: 0.0596354\ttotal: 1.65s\tremaining: 4.46s\n", 626 | "27:\tlearn: 0.0577796\ttotal: 1.75s\tremaining: 4.49s\n", 627 | "28:\tlearn: 0.0557171\ttotal: 1.84s\tremaining: 4.5s\n", 628 | "29:\tlearn: 0.0540168\ttotal: 1.88s\tremaining: 4.4s\n", 629 | "30:\tlearn: 0.0532458\ttotal: 1.96s\tremaining: 4.37s\n", 630 | "31:\tlearn: 0.0524705\ttotal: 2.01s\tremaining: 4.27s\n", 631 | "32:\tlearn: 0.0507212\ttotal: 2.09s\tremaining: 4.25s\n", 632 | "33:\tlearn: 0.0494067\ttotal: 2.14s\tremaining: 4.15s\n", 633 | "34:\tlearn: 0.0477757\ttotal: 2.18s\tremaining: 4.05s\n", 634 | "35:\tlearn: 0.0468910\ttotal: 2.22s\tremaining: 3.95s\n", 635 | "36:\tlearn: 0.0438993\ttotal: 2.29s\tremaining: 3.89s\n", 636 | "37:\tlearn: 0.0433843\ttotal: 2.34s\tremaining: 3.82s\n", 637 | "38:\tlearn: 0.0418492\ttotal: 2.4s\tremaining: 3.75s\n", 638 | "39:\tlearn: 0.0410830\ttotal: 2.44s\tremaining: 3.67s\n", 639 | "40:\tlearn: 0.0401280\ttotal: 2.49s\tremaining: 3.59s\n", 640 | "41:\tlearn: 0.0387474\ttotal: 2.58s\tremaining: 3.56s\n", 641 | "42:\tlearn: 0.0379306\ttotal: 2.65s\tremaining: 3.51s\n", 642 | "43:\tlearn: 0.0375002\ttotal: 2.7s\tremaining: 3.44s\n", 643 | "44:\tlearn: 0.0370929\ttotal: 2.75s\tremaining: 3.36s\n", 644 | "45:\tlearn: 0.0363206\ttotal: 2.81s\tremaining: 3.3s\n", 645 | "46:\tlearn: 0.0354847\ttotal: 2.91s\tremaining: 3.28s\n", 646 | "47:\tlearn: 0.0351608\ttotal: 3s\tremaining: 3.25s\n", 647 | "48:\tlearn: 0.0348527\ttotal: 3.09s\tremaining: 3.22s\n", 648 | "49:\tlearn: 0.0344221\ttotal: 3.19s\tremaining: 3.19s\n", 649 | "50:\tlearn: 0.0341765\ttotal: 3.35s\tremaining: 3.21s\n", 650 | "51:\tlearn: 0.0339006\ttotal: 3.44s\tremaining: 3.17s\n", 651 | "52:\tlearn: 0.0334341\ttotal: 3.52s\tremaining: 3.12s\n", 652 | "53:\tlearn: 0.0324877\ttotal: 3.61s\tremaining: 3.07s\n", 653 | "54:\tlearn: 0.0322419\ttotal: 3.65s\tremaining: 2.99s\n", 654 | "55:\tlearn: 0.0312423\ttotal: 3.69s\tremaining: 2.9s\n", 655 | "56:\tlearn: 0.0308487\ttotal: 3.74s\tremaining: 2.82s\n", 656 | "57:\tlearn: 0.0308276\ttotal: 3.78s\tremaining: 2.74s\n", 657 | "58:\tlearn: 0.0299485\ttotal: 3.84s\tremaining: 2.67s\n", 658 | "59:\tlearn: 0.0295122\ttotal: 3.89s\tremaining: 2.59s\n", 659 | "60:\tlearn: 0.0287994\ttotal: 3.94s\tremaining: 2.52s\n", 660 | "61:\tlearn: 0.0277803\ttotal: 3.99s\tremaining: 2.45s\n", 661 | "62:\tlearn: 0.0271641\ttotal: 4.08s\tremaining: 2.4s\n", 662 | "63:\tlearn: 0.0267187\ttotal: 4.13s\tremaining: 2.32s\n", 663 | "64:\tlearn: 0.0260498\ttotal: 4.17s\tremaining: 2.25s\n", 664 | "65:\tlearn: 0.0258216\ttotal: 4.22s\tremaining: 2.17s\n", 665 | "66:\tlearn: 0.0249466\ttotal: 4.27s\tremaining: 2.1s\n", 666 | "67:\tlearn: 0.0240213\ttotal: 4.32s\tremaining: 2.03s\n", 667 | "68:\tlearn: 0.0238467\ttotal: 4.37s\tremaining: 1.96s\n", 668 | "69:\tlearn: 0.0227478\ttotal: 4.41s\tremaining: 1.89s\n", 669 | "70:\tlearn: 0.0224176\ttotal: 4.45s\tremaining: 1.82s\n", 670 | "71:\tlearn: 0.0220396\ttotal: 4.5s\tremaining: 1.75s\n", 671 | "72:\tlearn: 0.0216055\ttotal: 4.55s\tremaining: 1.68s\n", 672 | "73:\tlearn: 0.0214642\ttotal: 4.61s\tremaining: 1.62s\n", 673 | "74:\tlearn: 0.0210067\ttotal: 4.67s\tremaining: 1.55s\n", 674 | "75:\tlearn: 0.0202670\ttotal: 4.72s\tremaining: 1.49s\n", 675 | "76:\tlearn: 0.0196697\ttotal: 4.82s\tremaining: 1.44s\n", 676 | "77:\tlearn: 0.0191922\ttotal: 4.86s\tremaining: 1.37s\n", 677 | "78:\tlearn: 0.0188297\ttotal: 4.91s\tremaining: 1.3s\n", 678 | "79:\tlearn: 0.0181877\ttotal: 4.95s\tremaining: 1.24s\n", 679 | "80:\tlearn: 0.0176494\ttotal: 4.99s\tremaining: 1.17s\n", 680 | "81:\tlearn: 0.0170626\ttotal: 5.05s\tremaining: 1.11s\n", 681 | "82:\tlearn: 0.0168108\ttotal: 5.1s\tremaining: 1.04s\n", 682 | "83:\tlearn: 0.0166935\ttotal: 5.15s\tremaining: 980ms\n", 683 | "84:\tlearn: 0.0165572\ttotal: 5.19s\tremaining: 916ms\n", 684 | "85:\tlearn: 0.0163113\ttotal: 5.27s\tremaining: 858ms\n", 685 | "86:\tlearn: 0.0162244\ttotal: 5.32s\tremaining: 795ms\n", 686 | "87:\tlearn: 0.0155334\ttotal: 5.37s\tremaining: 732ms\n", 687 | "88:\tlearn: 0.0154228\ttotal: 5.41s\tremaining: 669ms\n", 688 | "89:\tlearn: 0.0152083\ttotal: 5.46s\tremaining: 607ms\n", 689 | "90:\tlearn: 0.0148635\ttotal: 5.55s\tremaining: 549ms\n", 690 | "91:\tlearn: 0.0146341\ttotal: 5.62s\tremaining: 489ms\n", 691 | "92:\tlearn: 0.0145682\ttotal: 5.68s\tremaining: 428ms\n", 692 | "93:\tlearn: 0.0145237\ttotal: 5.74s\tremaining: 366ms\n", 693 | "94:\tlearn: 0.0141513\ttotal: 5.8s\tremaining: 305ms\n", 694 | "95:\tlearn: 0.0136066\ttotal: 5.84s\tremaining: 243ms\n", 695 | "96:\tlearn: 0.0133914\ttotal: 5.88s\tremaining: 182ms\n", 696 | "97:\tlearn: 0.0131526\ttotal: 5.93s\tremaining: 121ms\n", 697 | "98:\tlearn: 0.0128911\ttotal: 5.97s\tremaining: 60.3ms\n", 698 | "99:\tlearn: 0.0126152\ttotal: 6.03s\tremaining: 0us\n", 699 | "CB (100 tree) score: 0.9649122807017544\n" 700 | ] 701 | } 702 | ], 703 | "source": [ 704 | "cb = CatBoostClassifier(n_estimators=100, random_state=42).fit(trainX, trainY)\n", 705 | "print('CB (100 tree) score:', cb.score(testX, testY))" 706 | ] 707 | } 708 | ], 709 | "metadata": { 710 | "kernelspec": { 711 | "display_name": "Python 3", 712 | "language": "python", 713 | "name": "python3" 714 | }, 715 | "language_info": { 716 | "codemirror_mode": { 717 | "name": "ipython", 718 | "version": 3 719 | }, 720 | "file_extension": ".py", 721 | "mimetype": "text/x-python", 722 | "name": "python", 723 | "nbconvert_exporter": "python", 724 | "pygments_lexer": "ipython3", 725 | "version": "3.7.2" 726 | } 727 | }, 728 | "nbformat": 4, 729 | "nbformat_minor": 2 730 | } 731 | -------------------------------------------------------------------------------- /prac/metrics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt \n", 10 | "import numpy as np\n", 11 | "import sklearn.metrics\n", 12 | "%matplotlib inline " 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "y_true = [+1, -1, -1, +1, -1, +1, -1, -1, +1, -1]\n", 22 | "y_pred = [+1, +1, -1, -1, -1, -1, -1, -1, -1, +1]\n", 23 | "classes_order = [+1, -1]" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Матрица ошибок" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "[[1 2]\n", 43 | " [3 4]]\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "CM = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=classes_order)\n", 49 | "CM = CM.T # По умолчанию строки соотв. правильным ответам, а столбцы - ответам алгоритма\n", 50 | "print(CM)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "tp, fp, fn, tn = CM.ravel()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 5, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQ0AAAD3CAYAAAAHbAHDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFhVJREFUeJzt3X+UXGV9x/H3JyEQRIFqoCKgUaBQtQqVomg9Ra2tUgpW4YgVlValWpAqSo94FBWP51RraUtBaVQELKW1WNvIQS1WEWgBSRCRQJVfolEwhigQ+ZndT/+4d3VYd2eeuzuzc3fm8zrnnsy9c+e5z06S7z6/H9kmIqLUkmFnICIWlwSNiGgkQSMiGknQiIhGEjQiopEEjYhoJEEjYsRJWirpG5IunOG9bST9q6SbJV0laWWv9BI0IkbfXwA3zvLe64Gf2N4T+FvgQ70SS9CIGGGSdgP+APjELLccBpxTv74AeJEkdUszQSNitP0d8JfA5Czv7wp8H8D2FuBu4HHdEtyqn7mLiPn5/Rds57s2TRTdu/a6B9cBD3RcWmV71dSJpEOADbbXSjqoX3lM0IhokY2bJrjqS7sV3btsl1sesL1/l1ueBxwq6WBgObC9pH+yfVTHPT8AdgfWS9oK2AG4q9tzUz2JaBUz4cmio2dK9km2d7O9EjgS+Mq0gAGwGnhd/frw+p6us1hT0ohoEQOTDHbmuaRTgDW2VwOfBD4t6WZgE1Vw6SpBI6JFjHnYZW0ajdK1LwEuqV+f3HH9AeCIJmklaES0zKBLGvOVNo0+kHSWpA2Srh92XkaZpH0kXSHpQUnvGHZ+BsHABC46hiVBoz/OBl4y7EyMgU3A8cBHhp2RQZrERcewJGj0ge1Lqf5BxwDZ3mD7auDhYedlUAxM2EXHsKRNI6JlenemDleCRkSLeMjtFSVSPYlWk3SspGvr4wnDzs+g2fBw4TEsKWlEq9k+Azhj2PlYOGKCrpNMhy4ljT6QdD5wBbC3pPWSXj/sPI0iSY+XtB44AXh3/V1vP+x89ZOBSZcdw5KSRh/YftWw8zAObN8JlM3mWsTaXtJI0IhokWpwV4JGRDQw6QSNiCiUkkZENGLEw1467Gx0ld6TPpJ0zLDzMOpG/TueKmmUHMOSoNFfI/0PuiVG/DsWE15SdAxLqicRLVKt3NXu3+WtDRpbaxsvZ7thZ6OR5TyK7fXYdk8c6LBlxeL6fgGWPfpXeNROuy+a7xjg/o3rN9reqfT+NITO0XK249l60bCzMdI2vuLAYWdhLFz7j2+/vfReW0OtepRobdCIGFeTKWlERCkjHnK7/1u2O3cRYyYNoRHR2ESGkUdEKSMmUtKIiCYm03sSEaWqYeQJGhFRaDFMWEvQiGgRm9YP7mp37iLGjpgsPHqmJC2X9HVJ35S0TtL7Z7jnaEk/7ljx/Q290k1JI6JFqh3W+va7/EHghbY3S1oGXC7pC7avnHbfv9o+rjTRBI2IlulXQ6htA5vr02X1Me/JfqmeRLSIEZMuO0pIWirpWmADcLHtq2a47RWSrpN0gaTde6WZoBHRMhMsKTqAFZLWdBy/tECR7Qnb+1Jt/XCApKdPu+XzwErbzwAuBs7plb9UTyJapGGX60bb+xela/9U0leBlwDXd1y/q+O2TwAf7pVWShoRLVLtsLak6OhF0k6Sdqxfbwu8GPi/affs0nF6KHBjr3RT0ohomT6u3LULcI6kpVQFhM/YvlDSKcAa26uB4yUdCmwBNgFH90o0QSOiRWz1be6J7euA/Wa4fnLH65OAk5qkm6AR0TJtHxGaoBHRItUiPFlPIyKKZWHhiGjAkFmuEVFuakRomyVoRLRMFhaOiGLVehopaUREA6meRESxqk0j1ZOIaCAbQEdEMSO2TKbLNSIayIjQiCiW3pOIaCwNoRFRLCNCI6KxtGlERLFqub8EjYgo5XS5RkQDWYQnIhpL9SQiii2GNo0F6xCWdJakDZKu7313xPjq57aMg7CQo0jOptrdKSJm0e+9XAdhwaonti+VtHKhnhexKBm2ZERoRJRaDG0arQoa9a7XxwAs51FDzk3EcCRoNGB7FbAKYHs91kPOTsSCWwxzT3pWniQ9R9LVkjZLekjShKR7FiJzEePIVtExLCUtLqcDrwJuArYF3gCc0fRBks4HrgD2lrRe0uubphExDiZR0TEsRdUT2zdLWmp7AviUpG/QcKdp26+aSwYjxondvzYNScuBS4FtqP6vX2D7vdPu2QY4F3gWcBfwStvf7ZZuSdC4T9LWwLWSPgzcwcKO74gYI2Jism//vR4EXmh7s6RlwOWSvmD7yo57Xg/8xPaeko4EPgS8sluiJbl7DbAUOA74GbA78Iq5/AQR0Vu/2jRc2VyfLquP6R0MhwHn1K8vAF4kqWviPUsatm+vX94PvL9nTiNizhqO01ghaU3H+aq6B/LnJC0F1gJ7AmfYvmpaGrsC3wewvUXS3cDjgI2zPbRn0JB0G4+MTqrS91N6fTYiGnLVrlFoo+39uyZXtUPuK2lH4HOSnm57XvO/Sto0rgIeD/wz8Hngofk8MCK6G0TPiO2fSvoq1fyvzqDxA6omh/WStgJ2oGoQnVXPNg3bRwJ/RFUf+hRwnO2uiUbE3Jj+tWlI2qkuYSBpW+DFwP9Nu2018Lr69eHAV+zuZZ3SEaGT/HIDSkT0XV9HhO4CnFO3aywBPmP7QkmnAGtsrwY+CXxa0s3AJuDIXomWtGn8c/3w84GjgYckPdb2pjn/KBExq8nJ/gQN29cB+81w/eSO1w8ARzRJt6Sk8VyqUsZJwDupG0KBNIRG9JnNUIeIlyjpcl25APmIiNooTFi7ZiEyEhEVu+wYlpLqSbvDXsSIWfTVE6pZqdd1nE8N7nrGgPIUMbbMcKe9lygJGrcBfzjojEREpe1jG0qCxkMd808iYpAM7lOX66CUBI23DDwXEfFzo1A9OUDSAdMv2j51APmJGHvD7BkpURI03gPcDnxuwHmJGHtTc0/arCRo7EE1GvRFwCm2vzzYLEWMMQMtDxols1w32T6RaiLLEZK+KOm3Bp+1iPG06Ad3Sfo8v+gFEvBE4EqqJQAjot9GoE3jIwPPRUTUtPi7XG1/TdLjgQOoYuDVtu8ceM4ixtEimOVaMmHtDcDXgZdTrexzpaQ/HXTGIsaWC48hKamenAjsN7XEn6THAf8LnDXIjEWMr3aXNEqCxl3AvR3n99Jj4dGImIcRaAi9GbhK0n9S/TiHAddJOgEyMjSi70YgaNxSH1P+s/7zMf3PTsSYG4UJa7bfDyDp0fX55u6fiIh5aXlJo6T35On1LvHrgHWS1kp62uCzFjGmrLJjSEqqJ6uAE2x/FUDSQcDHqVYpj4g+U8tLGiVBY7upgAFg+xJJ2w0wTxHja8hjMEqUBI1bJb0H+HR9fhRw6+CyFDHOhlv1KNGzTQP4U2An4N+BzwIr6msRMQgjMCL0MbaPH3hOIqIyOewMdFdS0viPgeciIipTi/C0uPekJGi0u4IVMWLksqNnOtLukr4q6QZJ6yT9xQz3HCTpbknX1sfJM6XVqaR6sq2k/ZgWPGxnu8aIQehfe8UW4O22r5H0GGCtpItt3zDtvstsH1KaaEnQuBOYPr/EwAtLHzIXE3ttw92n7TnIR4y9tft+bNhZGAtL/3E4z7V9B3BH/fpeSTcCuwLTg0YjJcPID5rPAyKimQaDu1ZIWtNxvsr2qhnTlFYC+wFXzfD2gZK+CfwQeIftdd0eWrJG6GkzXU+PSsSAlDdybrS9f6+b6nljnwXeavueaW9fAzzJ9mZJB1N1fOzVLb2S6slhQM/GkYjoA9PXLldJy6gCxnm2//2XHtcRRGxfJOmjklbY3jhbmkWL8Ng+Z045jojG+jX3RJKATwI3zrbuTb3+749su95JcQk9FtkqCRr7SLoWeICqzvM/wBm2H2jyA0REof71njwPeA3wrfr/MMC7qLYhwfaZVOv+vlnSFuB+4Ei7+64qJUHj16n2ONkWeAJwBPAJqjkoEdFvfQoati+nxzgr26cDpzdJt6T35PaO03XAxZI+1OQhEVGmdODWMJWUNACQtDOwvD49YzDZiYhFP8tV0qGSbgJuA74GfBe4aMD5ihhfLZ/lWjL35APAc4Dv2H4y1e7xMw0QiYg+0GTZMSwlQePheqOkJZKW1Kt49RxQEhFzUDhZbZjtHiVtGj+tR5RdCpwnaQPws8FmK2KMtbwhtKSkcRhwH/A24ItUe6D84SAzFTHWWt6mUdLlOlWqmAQyMjRiwNre5VpS0oiI+LnicRoRsUAWe0lD0jELkZGIoOo9GYEu1zcNPBcR8QuLvSEU2FHSy6dfnGlufkTMj2h/Q2hJ0NgBOIRHzpYz1eZJEdFvIxA0vmc7O6pFLIQRmeXadZHRiOizlgeNng2hto+S9CRJvwsgadt6D4WIGIBF33si6Y3ABcDU7g27ka0aIwan5b0nJV2ux1KtNXgPgO2bgJ0HmamIsVUaMFre5fqg7YeqhY1B0la0vtYVsXi1vSG0pKTxNUnvotrT9cXAvwGfH2y2IsZYy0saJUHjncCPgW8Bf0a11N+7B5mpiHG26BfhsT0JfBz4uKStgW167YsQEfPQ8v9dJb0nb5O0RtJrge8AN0k6cfBZixg/paWMVpc0qHpPjgS+Aqyk2mltDfDXg8tWxBhreUmjJGjcY3uNpFtsbwKQlC0ZIwak7b0nJUHjKZJWA0+u/xTw5MFmK2KMjUDQOKz+8286rn1kAHmJCBiJoPEC2+8bdEYigr7OcpW0O3Au8KtVyqyy/ffT7hHw98DBVLsOHG37mm7plozTOHROOY6Iuenf4K4twNttP5Vql8RjJT112j0vBfaqj2OAj/VKtKSksbOkE6ZftH1qwWcjoqF+zWC1fQdwR/36Xkk3ArsCN3Tcdhhwbj326kpJO0rapf7sjEqCxlLg0Txy5a6IGJAG1ZMVktZ0nK+yvWrGNKWVwH788j7MuwLf7zhfX1+bV9C40/YpBfdFxHw1m1ey0XbPfZXrbVU/C7zV9j1zz1ylJGhcPN+HREQDfew9kbSMKmCcN8ti4D8Adu84362+NquSuSd/KemZwPPrS5fZ/mZZliOiiX6uRl73jHwSuLFLG+Rq4DhJ/wI8G7i7W3sGlM09OR44j2rhnZ2Bf5L0loaZ30fSFZIelPSOJp+NGDv96z15HvAa4IWSrq2PgyW9SdLUfkYXAbcCN1NNTP3zXomWVE/eADx7aiNoSR8CrgD+oSjblU3A8cDLGnwmYiypT5PIbV9Ojw6Mutfk2CbplozTEDDRcT7RKyPT2d5g+2rg4Safixg7i2BbxpKSxqeAqyR9rj5/GVU9KSIGYbEPI7d9qqRLgN+uL/2J7W8MIjP1ZtPHAGy98/aDeERE643CLFfqsehdx6NPJ+lY4I316cG2f1jwnFXAKoDtfm2Xln91EQPS8n/5RUFjLmyfAZwxqPQjRtKIbMs4b5IeT7Xa1/bApKS3Ak/tx+i0iJGToAG276QaaRYRXfRzcNegLEjQiIhymmx31EjQiGiTIW+EVCJBI6Jlhjlwq0SCRkTbpKQREU2kITQiyhlo+a6nCRoRLZM2jYgolnEaEdGMnepJRDSTkkZENJOgERFNpKQREeUMZO5JRDSRLteIaCa9JxHRRNo0IqJcpsZHRBPViNB2R40EjYi2SUNoRDSRkkZElLNbP06jZC/XiFhActnRMx3pLEkbJF0/y/sHSbq7Y0f5k0vyl5JGRNv0r3pyNnA6cG6Xey6zfUiTRBM0ItrE/RsRavtSSSv7k9ovpHoS0TZTa2r0OvrjQEnflPQFSU8r+UBKGhFtUx4PVkha03G+qt5EvdQ1wJNsb5Z0MPAfwF69PpSgEdEyDbpcN9ref67P6dxL2fZFkj4qaYXtjd0+l6AR0SYGJhamy7XemP1Hti3pAKrmirt6fS5BI6JFhPs2uEvS+cBBVNWY9cB7gWUAts8EDgfeLGkLcD9wpN374QkaEW3Tp6Bh+1U93j+dqku2kQSNiLbJMPKIKGYyYS0imsmEtYhoJkEjIorZMNnu+kmCRkTbtDtmJGhEtE3aNCKimQSNiCiWHdbm7r6b7tx49Uv/6vZh56OhFUDXyT5tsnTYGZibRfUd155Ufmtfp70PRGuDhu2dhp2HpiStmc+sw+htLL7jBI2IKGZgot3dJwkaEa1icILGOGmyalLMzeh/xy2vnmSN0D5quNTaoiLpaZIuk/R1SV2nXA/SKH/HwC96T0qOIUlJI4rYXgc8f9j5GAspaYwuSSsl3d+x2cxtks6u3ztb0pmS1kj6jqRDOj5zmaRr6uO59fXOjWtulXRCff1oSafXr/eWtEXS4R15+K6kb0m6YWpTHEnvk/SOGfJ79rTPXl/nZ+VMG+pI2tyRtwvr14+V9NNZ0l8p6SuSrpP035KeKGmPju9nouP1EyRdIunbdd6vlPSEOp1nSfqapLWSviRpF0nPrz93Q+d3Pue/vDZb2NXIG0vQmL9bbO9re1/gxGnvrQQOAP4AOFPScmAD8GLbvwm8Ejit4/7L6nReCRw1w7M+ANw47dpS4HeAg+f7gxQ6CfjeLO/9A3CO7WcA5wGn2e78fu6fem37h/VnXg08DfgxsL+kZXU6h9t+FnAW8EHbU9/NwTzyOx8tNkxMlB1DkurJYH3G9iRwk6RbgX2A24DTJe0LTAC/1nH/8+vfnnsCx3UmJGl/qiC/dtoztgUeALafdv1tko4Cfga83faV9fW/lvTu+vUeHffv0fGb+99sf3D6DyNpV+A5wOdm+XkPBF5ev/408OFZ7ut0HrANcA/wZWBv4OnAxZKgCop3FKQzOlI9GWvT//YNvA34EfBMYH9g6473p36brgTeX5dMpnwAeE9nYvX7S2zfN8Oz/7ZO673AqR3XT+z4LX1Lx/Vb6mvPBV4nae8Z0nxvnY9+/qt+te2VwGrgrYCAdR0lkt+w/Xt9fF77pXoy1o6QtETSHsBTgG8DOwB31CWQ1zDzaO77qEoQ29Tnv1N/ZnrV5HDgih55uItHBqZe7q+fv2za9T2Albb/q8tn/xc4sn79auCyBs+9h2qI+LeBnSQdCCBpWenOX6OhsOckvScj63vA16mqDm+y/YCkjwKflfRa4ItU1YcpU9WT5cCptu+ui+h7UbWL/JykPwLeDBw9y7OPlfQy4FFU7RC9PFnS5VTB6lLb19fPnrIP8Cc90ngL8ClJJ1K1UfS6H+A8SfdTBas/tv1Q3Vh7mqQdqP6N/h2wriCtxc/glg/uUsE2BzEHdS/KhbYvGHZeYvHYYaudfOD2Lyu690s/+cTaYczDSUkjom1a/os8QWNAbB897DzEIjTV5dpiCRoRLeMsLBwR5bIIT0Q0sQiW+8s4jYi28WTZ0YOksyRtmGleUf2+JJ0m6eZ6vtBvlmQvQSOiRQx40kVHgbOBl3R5/6VUY4D2Ao4BPlaSaIJGRJvYfStp2L4U2NTllsOAc125EthR0i690k2bRkTLeOG6XHcFvt9xvr6+1nWCYIJGRIvcy0++9GVfsKLw9uWS1nScr1qIlc0SNCJaxHa3Noh++wGwe8f5bvW1rtKmETG+VgOvrXtRngPcbbvn2iUpaUSMKEnnAwcBKyStp1oPZRmA7TOBi6hWQruZajmEklnJmeUaEc2kehIRjSRoREQjCRoR0UiCRkQ0kqAREY0kaEREIwkaEdFIgkZENPL/hcP+6tNh2eEAAAAASUVORK5CYII=\n", 70 | "text/plain": [ 71 | "
" 72 | ] 73 | }, 74 | "metadata": { 75 | "needs_background": "light" 76 | }, 77 | "output_type": "display_data" 78 | } 79 | ], 80 | "source": [ 81 | "ax = plt.matshow(CM)\n", 82 | "plt.xticks([0,1], classes_order) #, rotation='vertical'\n", 83 | "plt.yticks([0,1], classes_order) #, rotation='vertical'\n", 84 | "plt.xlabel('правильный ответ')\n", 85 | "plt.ylabel('ответ алгоритма')\n", 86 | "plt.colorbar()\n", 87 | "plt.show()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Доля правильных ответов" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 6, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Accuracy1 = 0.5\n", 107 | "Accuracy2 = 0.5\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "acc = sklearn.metrics.accuracy_score(y_true, y_pred)\n", 113 | "print('Accuracy1 =', acc)\n", 114 | "print('Accuracy2 =', (tp+tn)/(tp+tn+fp+fn))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Точность, полнота, F-мера" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "Precision1 = 0.3333333333333333\n", 134 | "Precision2 = 0.3333333333333333\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "prec = sklearn.metrics.precision_score(y_true, y_pred, pos_label=+1) #pos_label нужно для задания positive класса\n", 140 | "print('Precision1 =', prec)\n", 141 | "print('Precision2 =', tp/(tp+fp))" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "Recall1 = 0.25\n", 154 | "Recall2 = 0.25\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "rec = sklearn.metrics.recall_score(y_true, y_pred, pos_label=+1)\n", 160 | "print('Recall1 =', rec)\n", 161 | "print('Recall2 =', tp/(tp+fn))" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 9, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "F11 = 0.28571428571428575\n", 174 | "F12 = 0.28571428571428575\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "f1 = sklearn.metrics.f1_score(y_true, y_pred, pos_label=+1)\n", 180 | "print('F11 =', f1)\n", 181 | "print('F12 =', 2*prec*rec/(prec+rec))" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## ROC-кривая" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 10, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "y = np.array([-1, -1, +1, +1])\n", 198 | "scores = np.array([0.1, 0.4, 0.35, 0.8])\n", 199 | "fpr, tpr, thresholds = sklearn.metrics.roc_curve(y, scores, pos_label=+1)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 15, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "image/png": "\n", 210 | "text/plain": [ 211 | "
" 212 | ] 213 | }, 214 | "metadata": { 215 | "needs_background": "light" 216 | }, 217 | "output_type": "display_data" 218 | } 219 | ], 220 | "source": [ 221 | "fig, axes = plt.subplots(1, 2)\n", 222 | "\n", 223 | "axes[0].plot(fpr, tpr)\n", 224 | "axes[0].set_title('ROC')\n", 225 | "\n", 226 | "axes[1].plot(thresholds, fpr, 'b-+', label='FPR')\n", 227 | "axes[1].plot(thresholds, tpr, 'g-x', label='TPR')\n", 228 | "axes[1].legend()\n", 229 | "\n", 230 | "axes[0].set(xlabel=\"FPR\", ylabel=\"TPR\")\n", 231 | "axes[1].set(xlabel=\"Threshold\")\n", 232 | "\n", 233 | "plt.show()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 16, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "AUROC1 = 0.75\n", 246 | "AUROC2 = 0.75\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "auroc = sklearn.metrics.roc_auc_score(y, scores) # positive класс - класс с большей меткой (+1)\n", 252 | "print('AUROC1 =', auroc)\n", 253 | "print('AUROC2 =', 1/2*1/2+1/2*1)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## PR-кривая" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 27, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "y = np.array([-1, -1, +1, +1])\n", 270 | "scores = np.array([0.1, 0.4, 0.35, 0.8])\n", 271 | "p, r, th = sklearn.metrics.precision_recall_curve(y, scores, pos_label=+1)\n", 272 | "eps = 0.1\n", 273 | "th = np.append(th, th[-1] + eps)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 28, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "image/png": "\n", 284 | "text/plain": [ 285 | "
" 286 | ] 287 | }, 288 | "metadata": { 289 | "needs_background": "light" 290 | }, 291 | "output_type": "display_data" 292 | } 293 | ], 294 | "source": [ 295 | "fig, axes = plt.subplots(1, 2)\n", 296 | "\n", 297 | "axes[0].plot(r, p)\n", 298 | "axes[0].set_title('PR')\n", 299 | "\n", 300 | "axes[1].plot(th, r, 'b-+', label='Recall')\n", 301 | "axes[1].plot(th, p, 'g-x', label='Precision')\n", 302 | "axes[1].legend()\n", 303 | "\n", 304 | "axes[0].set(xlabel=\"Recall\", ylabel=\"Precision\")\n", 305 | "axes[1].set(xlabel=\"Threshold\")\n", 306 | "\n", 307 | "plt.show()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 35, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "AUPRC1 = 0.8333333333333333\n", 320 | "AUPRC2 = 0.8333333333333333\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "auprc = sklearn.metrics.average_precision_score(y, scores, pos_label=+1)\n", 326 | "print('AUPRC1 =', auprc)\n", 327 | "print('AUPRC2 =', p[0]*(r[0]-r[1]) + p[1]*(r[1]-r[2]) + p[2]*(r[2]-r[3]))" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [] 336 | } 337 | ], 338 | "metadata": { 339 | "kernelspec": { 340 | "display_name": "Python 3", 341 | "language": "python", 342 | "name": "python3" 343 | }, 344 | "language_info": { 345 | "codemirror_mode": { 346 | "name": "ipython", 347 | "version": 3 348 | }, 349 | "file_extension": ".py", 350 | "mimetype": "text/x-python", 351 | "name": "python", 352 | "nbconvert_exporter": "python", 353 | "pygments_lexer": "ipython3", 354 | "version": "3.7.2" 355 | } 356 | }, 357 | "nbformat": 4, 358 | "nbformat_minor": 2 359 | } 360 | --------------------------------------------------------------------------------