├── README.md ├── mkb_benchmark.ipynb ├── dj_recomgenerator.ipynb ├── dj_sas_benchmark.ipynb ├── dj_benchmark_GMSC_01.ipynb ├── Benchmark_mmp_digital_reputation_challenge_1.ipynb ├── dj_invest_GMSC.ipynb └── dj_Benchmark_12trip.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # notebooks 2 | Некоторые фрагменты кода 3 | 4 | 5 | [Бенчмарк для студентов 2 курса ВМК МГУ](dj_benchmark_GMSC_01.ipynb) для задачи https://www.kaggle.com/c/msu-iml-2018 6 | 7 | [Чуть подробнее для 4 курса ВМК МГУ](dj_invest_GMSC.ipynb) для задачи https://www.kaggle.com/c/msu-iml-2018 8 | 9 | [Бенчмарк для студентов 5 курса ВМК МГУ](Benchmark_mmp_digital_reputation_challenge_1.ipynb) для задачи https://boosters.pro/championship/digital_reputation_challenge/overview 10 | 11 | [Бенчмарк для студентов 5 курса ВМК МГУ](dj_sas_benchmark.ipynb) для задачи https://sascompetitions.ru 12 | 13 | [Бенчмарк для студентов 2 курса ВМК МГУ](dj_Benchmark_12trip.ipynb) для задачи 1 соревнования https://boosters.pro/championship/onetwotrip_challenge/overview 14 | 15 | [Бенчмарк для студентов ПЗАД](mkb_benchmark.ipynb) для задачи соревнования [Хакатон МКБ 2021](https://dsbattle.com/hackathons/mkb/) 16 | -------------------------------------------------------------------------------- /mkb_benchmark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3e7947ff-db80-4afb-a61d-b708c8db3ddc", 6 | "metadata": {}, 7 | "source": [ 8 | "# примитивный вариант решения задачи MKB\n", 9 | "\n", 10 | "автор: Александр Дьяконов (https://dyakonov.org/ag/)\n", 11 | "\n", 12 | "цель: для оценки студентов своего курса (нужно за неделю побить этот бенчмарк)\n", 13 | "\n", 14 | "* решение записано за 20 минут\n", 15 | "* практически нет генерации признаков\n", 16 | "* все категории кодируются по мощности\n", 17 | "* пропуски -> -1\n", 18 | "* одна модель - lgb\n", 19 | "\n", 20 | "результат в лидерборе 0.8889 (на момент посылки ~15 место из 100)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "id": "bc3393d8-99c9-44b2-8fd6-657cc085e38b", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "id": "8f537e13-4136-4120-969f-1fde6a500f7e", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "(17891, 124) (7330, 123)\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "data_train = pd.read_csv('train_dataset_hackathon_mkb.csv', encoding='cp1251', delimiter=';')\n", 50 | "data_test = pd.read_csv('test_dataset_hackathon_mkb.csv', encoding='cp1251', delimiter=';')\n", 51 | "print (data_train.shape, data_test.shape)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "id": "378ad637-dac9-4b87-a38a-9d4dc11e1424", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def makeX(data):\n", 62 | " # предобработка данных\n", 63 | " data['CITIZENSHIP_NAME'] = data['CITIZENSHIP_NAME'].fillna(-1).map({-1: -1, 'Российская Федерация': 4, 'Таджикистан': 3, 'Казахстан': 2, 'Армения': 1})\n", 64 | " data['SEX_NAME'] = data['SEX_NAME'].fillna(0).map({0: 0, 'мужской': 1, 'женский': -1})\n", 65 | " group_names = ['OKFS_GROUP', 'OKOPF_GROUP', 'OKOGU_GROUP'] + ['WORKERSRANGE', 'OKVED_CODE']\n", 66 | " date_names = ['SIGN_DATE', 'DATEFIRSTREG', 'TAXREG_REGDATE', 'TAXREGPAY_REGDATE', 'BIRTHDATE']\n", 67 | " for name in group_names + date_names + ['id_client']:\n", 68 | " data[name] = data[name].fillna(-1)\n", 69 | " tmp = data[name].value_counts()\n", 70 | " tmp = tmp + 0.1 * np.random.randn(len(tmp))\n", 71 | " data[name] = data[name].map(tmp)\n", 72 | " data.fillna(-1, inplace=True)\n", 73 | " return data" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "id": "d0fecd59-bce3-4315-8c58-60db5a032a8e", 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "data_train = makeX(data_train) # обрабатываем обучение\n", 84 | "data_test = makeX(data_test) # обрабатываем тест" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "id": "c769d199-a3fa-4feb-b9c0-6bdf1570f242", 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "y = data_train.pop('TARGET').values # целевые значения\n", 95 | "data_test = data_test[data_train.columns] # на всякий случай - вдруг, перемешаны столбцы" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "id": "7eb90f9b-36f6-49df-8063-2b55c8447e58", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import lightgbm as lgb\n", 106 | "\n", 107 | "model = lgb.LGBMClassifier(num_leaves=31,\n", 108 | " learning_rate=0.05,\n", 109 | " n_estimators=200)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "id": "07a1b578-fd9c-4c51-a1cd-38127f065d5e", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "model.fit(data_train, y)\n", 120 | "\n", 121 | "a = model.predict_proba(data_test)[:, 1] # получаем ответ" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "id": "4f926992-193a-4f8d-a1c0-2b35a42c38b1", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "df = pd.DataFrame({'id_contract': data_test.id_contract.values, 'TARGET': a})\n", 132 | "df.to_csv('ans1.csv', sep=';', index=False) # сохраняем ответ" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.7.10" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 5 157 | } 158 | -------------------------------------------------------------------------------- /dj_recomgenerator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 24, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "name = 'George'\n", 12 | "surname = 'Kudryavtsev'\n", 13 | "when = \"September, 2014\"\n", 14 | "sex = \"m\"\n", 15 | "research = \"recommender systems\"\n", 16 | "to = \"the master's program at the Skolkovo Institute of Science and Technology\"\n", 17 | "\n", 18 | "\n", 19 | "fullname = name + ' ' + surname\n", 20 | "if (sex==\"m\"):\n", 21 | " his = \"his\"\n", 22 | " he = \"he\"\n", 23 | " He = \"He\"\n", 24 | " His = \"His\"\n", 25 | "else:\n", 26 | " his = \"her\"\n", 27 | " he = \"she\"\n", 28 | " He = \"She\"\n", 29 | " His = \"Her\"\n", 30 | "\n", 31 | "comment = \"\"\n", 32 | "# comment = \"Since \" + his +\" research topic is not widespread in Russia, \" + name + \"has to study a large amount of relevant literature in English.\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 25, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Dear Sir or Madame,\n", 47 | "\n", 48 | "\n", 49 | "I am pleased to provide you with this online letter of recommendation in support of George Kudryavtsev. I am supervisor of his research work at the department of Computational Mathematics and Cybernetics at Lomonosov Moscow State University.\n", 50 | "\n", 51 | "I first met George in September, 2014 when he came to learn about my scientific interests. He also spoke to my colleagues and demonstrated his deep interest to our scientific field. George had excellent academic progress and I was glad to become his research advisor. George attended lectures and seminars I gave at the department. I could positively say that George was among the top students of the group. I would like to make special mention of George talent to learn and structure new information. I also supervised George’s scientific research. I would like to mention his high motivation for research work and organizational abilities.\n", 52 | "\n", 53 | "His research was devoted to recommender systems. In addition George employed his good programming skills to implement many algorithms.\n", 54 | "I consider George as a very promising researcher and confidently recommend his for «the master's program at the Skolkovo Institute of Science and Technology».\n", 55 | "\n", 56 | "If you require any further information, please do not hesitate to contact me: djakonov@mail.ru.\n", 57 | "\n", 58 | "D’yakonov A. G.\n", 59 | "Doctor of Sciences,\n", 60 | "Professor of Department of Mathematical Methods of Forecasting at Lomonosov Moscow State University\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print (\"Dear Sir or Madame,\\n\\n\")\n", 66 | "print (\"I am pleased to provide you with this online letter of recommendation in support of \" + fullname + \\\n", 67 | " \". I am supervisor of \" + his + \" research work at the department of Computational Mathematics and Cybernetics at Lomonosov Moscow State University.\\n\")\n", 68 | "print (\"I first met \" + name + \" in \" + when + \" when \" + he + \" came to learn about my scientific interests. \" + He + \" also spoke to my colleagues and demonstrated \" + his + \" deep interest to our scientific field. \" + \\\n", 69 | " name + \" had excellent academic progress and I was glad to become \" + his + \" research advisor. \" + name + \\\n", 70 | " \" attended lectures and seminars I gave at the department. I could positively say that \" + name +\n", 71 | " \" was among the top students of the group. I would like to make special mention of \" + name + \\\n", 72 | " \" talent to learn and structure new information. I also supervised \" + name + \"’s scientific research. I would like to mention \" + his + \" high motivation for research work and organizational abilities.\\n\")\n", 73 | "print (His + \" research was devoted to \" + research + '. ' + comment + ' In addition ' + name + ' employed his good programming skills to implement many algorithms.')\n", 74 | "print (\"I consider \" + name + \" as a very promising researcher and confidently recommend his for «\" + to + \"».\\n\")\n", 75 | "print (\"If you require any further information, please do not hesitate to contact me: djakonov@mail.ru.\\n\")\n", 76 | "print (\"D’yakonov A. G.\")\n", 77 | "print (\"Doctor of Sciences,\")\n", 78 | "print (\"Professor of Department of Mathematical Methods of Forecasting at Lomonosov Moscow State University\")" 79 | ] 80 | } 81 | ], 82 | "metadata": { 83 | "kernelspec": { 84 | "display_name": "Python 3", 85 | "language": "python", 86 | "name": "python3" 87 | }, 88 | "language_info": { 89 | "codemirror_mode": { 90 | "name": "ipython", 91 | "version": 3 92 | }, 93 | "file_extension": ".py", 94 | "mimetype": "text/x-python", 95 | "name": "python", 96 | "nbconvert_exporter": "python", 97 | "pygments_lexer": "ipython3", 98 | "version": "3.5.0" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 0 103 | } 104 | -------------------------------------------------------------------------------- /dj_sas_benchmark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Тупой бенчмарк" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# подгружаем все нужные пакеты\n", 17 | "import pandas as pd\n", 18 | "import numpy as np\n", 19 | "# для встроенных картинок\n", 20 | "%pylab inline\n", 21 | "import matplotlib.pyplot as plt" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "train = pd.read_csv(\"train.csv\", encoding='cp1251')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 9, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "test = pd.read_csv(\"test.csv\", encoding='cp1251')" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 16, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def prepare_data(d, cf=True):\n", 49 | " print ('Time')\n", 50 | " tmp = d['Interval'].str.split('-')\n", 51 | " d['int0']=tmp.apply(lambda x: int(x[0]))\n", 52 | " d['int1']=tmp.apply(lambda x: int(x[1][:-1]))\n", 53 | " del d['Interval']\n", 54 | " \n", 55 | " print ('Date')\n", 56 | " d.OrderDate = pd.to_datetime(d.OrderDate)\n", 57 | " d.Date = pd.to_datetime(d.Date)\n", 58 | " \n", 59 | " deltaT = (d.OrderDate - d.Date).dt.days.astype(int).values\n", 60 | " d['deltaT'] = deltaT\n", 61 | " \n", 62 | " print ('Day')\n", 63 | " d['Date_day'] = d.Date.dt.day\n", 64 | " d['Date_month'] = d.Date.dt.month\n", 65 | " d['Date_weekday'] = d.Date.dt.weekday\n", 66 | " # train['Date_year'] = train.Date.dt.year\n", 67 | " \n", 68 | " d['OrderDate_day'] = d.OrderDate.dt.day\n", 69 | " d['OrderDate_month'] = d.OrderDate.dt.month\n", 70 | " d['OrderDate_weekday'] = d.OrderDate.dt.weekday\n", 71 | " # train['OrderDate_year'] = train.OrderDate.dt.year\n", 72 | " \n", 73 | " del d['Date']\n", 74 | " del d['OrderDate']\n", 75 | " \n", 76 | " print ('Type')\n", 77 | " d['DeliveryType'] = d['DeliveryType'].map({'Обычная доставка': 0, 'Доставка День в День': 1})\n", 78 | " \n", 79 | " print ('Groupby')\n", 80 | " cols = ['ChannelID', 'ClientID', 'DeliveryType', 'prepay', 'count_edit', 'int0', 'int1', 'deltaT', 'Date_day', 'Date_month', 'Date_weekday',\n", 81 | " 'OrderDate_day', 'OrderDate_month', 'OrderDate_weekday']\n", 82 | " if cf:\n", 83 | " cols = cols + ['CancelFlag']\n", 84 | " \n", 85 | " data = d.groupby('OrderID')[cols].first()\n", 86 | " \n", 87 | " print ('Num')\n", 88 | " data['num'] = d.groupby('OrderID')['GroupID'].count()\n", 89 | " data['sum'] = d.groupby('OrderID')['OrderCnt'].sum()\n", 90 | " data['num/sum'] = data['num'] / data['sum']\n", 91 | " \n", 92 | " if cf:\n", 93 | " y = data.pop('CancelFlag')\n", 94 | " return (data, y)\n", 95 | " return (data)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 23, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Time\n", 108 | "Date\n", 109 | "Day\n", 110 | "Type\n", 111 | "Groupby\n", 112 | "Num\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "data2 = prepare_data(test, cf=False)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 24, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "Time\n", 130 | "Date\n", 131 | "Day\n", 132 | "Type\n", 133 | "Groupby\n", 134 | "Num\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "data, y = prepare_data(train, cf=True)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 29, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "[100]\tcv_agg's auc: 0.697344 + 0.00654333\n", 152 | "[200]\tcv_agg's auc: 0.702906 + 0.00615931\n", 153 | "[300]\tcv_agg's auc: 0.706133 + 0.0055678\n", 154 | "[400]\tcv_agg's auc: 0.707971 + 0.00560712\n", 155 | "[500]\tcv_agg's auc: 0.709498 + 0.00520707\n", 156 | "[600]\tcv_agg's auc: 0.710534 + 0.00546179\n", 157 | "[700]\tcv_agg's auc: 0.71134 + 0.00563445\n", 158 | "[800]\tcv_agg's auc: 0.712172 + 0.00550675\n", 159 | "[900]\tcv_agg's auc: 0.712418 + 0.00562642\n", 160 | "[1000]\tcv_agg's auc: 0.712741 + 0.00559418\n" 161 | ] 162 | }, 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "0.7127633282147994" 167 | ] 168 | }, 169 | "execution_count": 29, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "import lightgbm as lgb\n", 176 | "\n", 177 | "param = {'n_estimators':1000, 'num_leaves':6, 'objective':'binary',\n", 178 | " 'learning_rate': 0.1, 'colsample_bytree': 0.75, 'subsample': 0.75,\n", 179 | " 'metric': 'auc'}\n", 180 | " \n", 181 | "w = lgb.cv(param, lgb.Dataset(data, label=y),\n", 182 | " stratified=False,\n", 183 | " num_boost_round=1000, nfold=4, verbose_eval=100)\n", 184 | "max(w['auc-mean'])" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 33, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "978" 196 | ] 197 | }, 198 | "execution_count": 33, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "np.argmax(w['auc-mean'])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 32, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "[100]\tcv_agg's auc: 0.700747 + 0.0060651\n", 217 | "[200]\tcv_agg's auc: 0.705531 + 0.00576664\n", 218 | "[300]\tcv_agg's auc: 0.708824 + 0.0056635\n", 219 | "[400]\tcv_agg's auc: 0.710562 + 0.00555759\n", 220 | "[500]\tcv_agg's auc: 0.711568 + 0.0050531\n", 221 | "[600]\tcv_agg's auc: 0.712369 + 0.00498233\n", 222 | "[700]\tcv_agg's auc: 0.71297 + 0.0050313\n", 223 | "[800]\tcv_agg's auc: 0.713529 + 0.00490358\n", 224 | "[900]\tcv_agg's auc: 0.713833 + 0.0047361\n", 225 | "[1000]\tcv_agg's auc: 0.71383 + 0.00477599\n" 226 | ] 227 | }, 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "0.7139605983784897" 232 | ] 233 | }, 234 | "execution_count": 32, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "import lightgbm as lgb\n", 241 | "\n", 242 | "param = {'n_estimators':1000, 'num_leaves':8, 'objective':'binary',\n", 243 | " 'learning_rate': 0.1, 'colsample_bytree': 0.75, 'subsample': 0.75,\n", 244 | " 'metric': 'auc'}\n", 245 | " \n", 246 | "w = lgb.cv(param, lgb.Dataset(data, label=y),\n", 247 | " stratified=False,\n", 248 | " num_boost_round=1000, nfold=4, verbose_eval=100)\n", 249 | "max(w['auc-mean'])" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 36, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "model = lgb.LGBMClassifier(learning_rate=0.1, num_leaves=8,\n", 259 | " n_estimators=1000,\n", 260 | " colsample_bytree=0.75, subsample=0.75, random_state=1)\n", 261 | "model.fit(data, y)\n", 262 | "a = model.predict_proba(data2)[:,1]" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 47, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "pd.DataFrame({'ID': [str(x) + ' ' for x in data2.index], ' Score': a}).to_csv('constant.csv', index=False)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "Python 3", 285 | "language": "python", 286 | "name": "python3" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.6.8" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | -------------------------------------------------------------------------------- /dj_benchmark_GMSC_01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Бенчмарк\n", 8 | "\n", 9 | "задачи для студентов 2го курса ВМК МГУ\n", 10 | "\n", 11 | "https://www.kaggle.com/c/msu-iml-2018/\n", 12 | "\n", 13 | "2018, Александр Дьяконов https://dyakonov.org/ag/" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 21, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "Populating the interactive namespace from numpy and matplotlib\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "%pylab inline\n", 33 | "plt.style.use('seaborn-dark')\n", 34 | "import warnings\n", 35 | "warnings.filterwarnings(\"ignore\") # отключение варнингов\n", 36 | "pd.set_option('display.max_columns', None) # pd.options.display.max_columns = None \n", 37 | "# pd.set_option('display.max_rows', None) # не прятать столбцы при выводе дата-фреймов\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import matplotlib as mpl\n", 40 | "plt.rc('font', size=14)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "# загружаем данные\n", 48 | "\n", 49 | "не забудьте поменять каталоги" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 23, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "train = pd.read_csv('./data_GMSC/train.csv')\n", 59 | "test = pd.read_csv('./data_GMSC/test.csv')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 24, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "(112500, 11) (37500, 10)\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# размеры данных\n", 77 | "print(train.shape, test.shape)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "# смотрим на данные" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 25, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/html": [ 95 | "
\n", 96 | "\n", 109 | "\n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
плохой_клиентлиниивозрастповедение_30-59_днейDebt_Ratioдоходчисло_кредитовповедение_90_днейнедвижимостьповедение_60-89_днейсемья
552200.1116734601.329588800.080102.0
8938500.0440976900.5351223800.0100100.0
8158600.0475987700.1696103000.070100.0
10510800.7611495812217.000000NaN40100.0
354300.6906845500.43255212416.070202.0
\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | " плохой_клиент линии возраст поведение_30-59_дней Debt_Ratio \\\n", 203 | "5522 0 0.111673 46 0 1.329588 \n", 204 | "89385 0 0.044097 69 0 0.535122 \n", 205 | "81586 0 0.047598 77 0 0.169610 \n", 206 | "105108 0 0.761149 58 1 2217.000000 \n", 207 | "3543 0 0.690684 55 0 0.432552 \n", 208 | "\n", 209 | " доход число_кредитов поведение_90_дней недвижимость \\\n", 210 | "5522 800.0 8 0 1 \n", 211 | "89385 3800.0 10 0 1 \n", 212 | "81586 3000.0 7 0 1 \n", 213 | "105108 NaN 4 0 1 \n", 214 | "3543 12416.0 7 0 2 \n", 215 | "\n", 216 | " поведение_60-89_дней семья \n", 217 | "5522 0 2.0 \n", 218 | "89385 0 0.0 \n", 219 | "81586 0 0.0 \n", 220 | "105108 0 0.0 \n", 221 | "3543 0 2.0 " 222 | ] 223 | }, 224 | "execution_count": 25, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "train.sample(5)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "# если хотите работать с numpy-массивом" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 27, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "array([[0.00000000e+00, 3.12582480e-02, 5.70000000e+01, 0.00000000e+00,\n", 249 | " 3.97520496e-01, 5.00000000e+03, 1.50000000e+01, 0.00000000e+00,\n", 250 | " 2.00000000e+00, 0.00000000e+00, 0.00000000e+00],\n", 251 | " [0.00000000e+00, 5.23315890e-02, 6.40000000e+01, 0.00000000e+00,\n", 252 | " 5.70000000e+01, nan, 2.00000000e+00, 0.00000000e+00,\n", 253 | " 0.00000000e+00, 0.00000000e+00, nan]])" 254 | ] 255 | }, 256 | "execution_count": 27, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "X = train.values\n", 263 | "X[:2,:]" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "# готовим данные" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 28, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "((112500, 10), (112500,))" 282 | ] 283 | }, 284 | "execution_count": 28, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "y = train.pop('плохой_клиент') # целевой вектор\n", 291 | "train.shape, y.shape" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 29, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# заменить пропуски на -11\n", 301 | "train.fillna(-11, inplace=True)\n", 302 | "test.fillna(-11, inplace=True)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "# Обучаем модель" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 14, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 321 | " max_depth=2, max_features='auto', max_leaf_nodes=None,\n", 322 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 323 | " min_samples_leaf=1, min_samples_split=2,\n", 324 | " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", 325 | " oob_score=False, random_state=0, verbose=0, warm_start=False)" 326 | ] 327 | }, 328 | "execution_count": 14, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "from sklearn.ensemble import RandomForestClassifier\n", 335 | "model = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)\n", 336 | "model.fit(train, y)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 19, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "a = model.predict_proba(test)[:,1] # вероятности" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 20, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# сохранить решение\n", 355 | "pd.DataFrame({'id': np.arange(37500), 'a':a}).to_csv('./data_GMSC/solution.csv', index=False)" 356 | ] 357 | } 358 | ], 359 | "metadata": { 360 | "kernelspec": { 361 | "display_name": "Python 3", 362 | "language": "python", 363 | "name": "python3" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.6.6" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 2 380 | } 381 | -------------------------------------------------------------------------------- /Benchmark_mmp_digital_reputation_challenge_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Бенчмарк\n", 8 | "\n", 9 | "Для задачи **Digital Reputation Challenge**\n", 10 | "\n", 11 | "https://boosters.pro\n", 12 | "\n", 13 | "автор: Александр Дьяконов https://dyakonov.org" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 3, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "import matplotlib\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## загрузка данных" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "(4000, 26) (462888, 2) (4000, 453) (4000, 6)\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "TRAIN_PATH = 'train/'\n", 51 | "X1 = pd.read_csv(TRAIN_PATH + 'X1.csv')\n", 52 | "X2 = pd.read_csv(TRAIN_PATH + 'X2.csv')\n", 53 | "X3 = pd.read_csv(TRAIN_PATH + 'X3.csv')\n", 54 | "Y = pd.read_csv(TRAIN_PATH + 'Y.csv')\n", 55 | "print (X1.shape, X2.shape, X3.shape, Y.shape)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "Y.columns = ['Y' + s if s != 'id' else 'id' for s in Y.columns]" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "(4058, 26) (470083, 2) (4058, 453)\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "TEST_PATH = 'test/'\n", 82 | "X1_test = pd.read_csv(TEST_PATH + 'X1.csv')\n", 83 | "X2_test = pd.read_csv(TEST_PATH + 'X2.csv')\n", 84 | "X3_test = pd.read_csv(TEST_PATH + 'X3.csv')\n", 85 | "print (X1_test.shape, X2_test.shape, X3_test.shape)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "# Подготовка обучения и теста\n", 93 | "\n", 94 | "используем только матрицу 1" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "X = X1.copy()\n", 104 | "X = X.merge(Y)\n", 105 | " \n", 106 | "id_ = X.pop('id')\n", 107 | "y1 = X.pop('Y1')\n", 108 | "y2 = X.pop('Y2')\n", 109 | "y3 = X.pop('Y3')\n", 110 | "y4 = X.pop('Y4')\n", 111 | "y5 = X.pop('Y5')\n", 112 | "\n", 113 | "X_test = X1_test.copy()\n", 114 | "id__ = X_test.pop('id')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Эксперименты\n", 122 | "\n", 123 | "делаются так..." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 9, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stderr", 133 | "output_type": "stream", 134 | "text": [ 135 | "/home/alexander/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py:390: UserWarning: Found `n_estimators` in params. Will use it instead of argument\n", 136 | " warnings.warn(\"Found `{}` in params. Will use it instead of argument\".format(alias))\n" 137 | ] 138 | }, 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "[100]\tcv_agg's auc: 0.600372 + 0.0184634\n", 144 | "[200]\tcv_agg's auc: 0.597381 + 0.0157782\n", 145 | "[300]\tcv_agg's auc: 0.5931 + 0.0141725\n", 146 | "[400]\tcv_agg's auc: 0.590727 + 0.0117742\n", 147 | "[500]\tcv_agg's auc: 0.587088 + 0.0124422\n", 148 | "[600]\tcv_agg's auc: 0.586458 + 0.0104644\n", 149 | "[700]\tcv_agg's auc: 0.587516 + 0.00971511\n", 150 | "[800]\tcv_agg's auc: 0.587225 + 0.0104176\n", 151 | "[900]\tcv_agg's auc: 0.585616 + 0.0111163\n", 152 | "[1000]\tcv_agg's auc: 0.584474 + 0.0119534\n" 153 | ] 154 | }, 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "0.6026030903311532" 159 | ] 160 | }, 161 | "execution_count": 9, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "import lightgbm as lgb\n", 168 | "\n", 169 | "param = {'n_estimators':1000, 'num_leaves':6, 'objective':'binary',\n", 170 | " 'learning_rate': 0.05, 'colsample_bytree': 0.75, 'subsample': 0.75,\n", 171 | " 'metric': 'auc'}\n", 172 | " \n", 173 | "w = lgb.cv(param, lgb.Dataset(X, label=y1),\n", 174 | " stratified=False,\n", 175 | " num_boost_round=1000, nfold=4, verbose_eval=100)\n", 176 | "max(w['auc-mean'])" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 10, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "[]" 188 | ] 189 | }, 190 | "execution_count": 10, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | }, 194 | { 195 | "data": { 196 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl4VNX5wPHvmz0sCUsCBAIkQJB9kbApooAgiAURtaKitG5tpVptbUVbd9yqtbVSq6Lgz7pTrYgoi4KoCBJkXwJhEcIaEkgIIeuc3x9zZzLZmMk6Se77eZ55mHvuMudwYd456xVjDEoppVSAvzOglFKqftCAoJRSCtCAoJRSyqIBQSmlFKABQSmllEUDglJKKUADglJKKYsGBKWUUoAGBKWUUpYgf2egMqKiokxcXJy/s6GUUg3K+vXrTxhjor0d16ACQlxcHElJSf7OhlJKNSgi8pMvx2mTkVJKKUADglJKKYsGBKWUUoAGBKWUUhYNCEoppQANCEoppSwaEJRSSgEaENwOpOewIvm4v7OhlFJ+owHBMnnOt/xi3jqKHPqMaaWUPWlAsJzMKQDg0Mmzfs6JUkr5hwaEUg6d0oCglLInDQilnM4tIPVkDgBzVqRwz/sb/ZwjpZSqGxoQKFkreHjhNkY8s4L07Dz+uiSZjzcc4mBGjh9zp5RSdUMDAnA0szggHMnMBWDd/pPutNe/3VfneVJKqbqmAQHIzisqk/ar/6x3v5+/ej85+YV1mSWllKpzGhCA11btLTe9b4dIrhzQHoC1+zLqMktKKVXnNCAA36acKDf954M7MrGfMyD8Yt46DusIJKVUI6YB4RwGdW6JMcUT1S54+is+3pDqxxwppVTtsX1AKCxyuN8vnHlhiX1tI8IY2qV1ibR/fpXC2fyyfQ5KKdXQ2T4gZOTkA/D4lX3oEt2sxL6WTYKJDA9m31OX8787ncFib9oZFqw/6Dz3TD7Lth+r2wwrpVQtCfJ3BvztxGlnQIhqGkKz0CCW3zuSdpHh5Bc6EBEARIQBHVswc1Q3XlqRwpZDmaz/6SSzPtrMrmPZbHxoLC2ahFT6sw9m5BDVLJTwkMAaLZNSSlWFrWsIhUUOPvrR2ScQ1TwUgG5tmtMsNIhWTct+wf/hsvMICQrgg6RUpr68ml3HsgHnjGbPRfEcpRbIO56Vy/NLkzl5Jt+dtmLncS56dgUPfryF55Yk8/flu2q8fEopVRk+BQQRGS8iySKSIiL3V3DMtSKyXUS2icg7Huk3i8hu63WzR/ogEdliXfNFcf0cr0PzV+9nrjXprHU5AaA8oUFl/8pe+2afO7Bk5hTQ7cHFvLXmJzYePMW/v97DkCe/5J9fpTDw8WXuwDF78Q4APtpwiJdWpPD35bs5kZ0HQG5BEftPnKl2+ZRSqjK8BgQRCQTmABOAXsA0EelV6pgEYBZwoTGmN/A7K70V8DAwFBgCPCwiLa3TXgZuBxKs1/iaKFBlpFlfwFBcQ/Bm1oSe7vdXD4p1v996KBNwDmF1GJj7zV5umb+Opz/fWeL89T+dxBjDgYwcpgzsUGYfwLNfJHPJcyvZnHqqcgVSSqlq8KWGMARIMcbsNcbkA+8Bk0sdcxswxxhzEsAY43rSzGXAMmNMhrVvGTBeRGKACGPM98Y5rvP/gCtroDyVEhke7H7fPNS37pSJ/WKY0Kcda2aN4blr+rPsnpF0b9uMtfsyWLT5MHe+8yMAP6XnkO7RRPTs1H4A3Dh3La9/u4/8Qgf9YyN5Zfog/jzRGWTeXnuAMc+v5I3vnLWWpxbvRCml6oovAaEDcNBjO9VK89Qd6C4i34nIGhEZ7+XcDtb7c12z1nkOH/W1xSoyPJiXbxxEu8gwABLaNmdS//bsPHqame9sKHP8yO7RfHLnhVw7uCMdWoSTX+Tgic+czUW92kdyWe923HpRF1o1DWHVrjT2pBU3FX2/N71Mf4RSStUWXwJCed+Upb+lgnA2+1wCTAPmikiLc5zryzWdHy5yu4gkiUhSWlqaD9n13YnsfO8H+WBC3xj3+6sGduDtW4e6t6cMbE//ji0AWHLPSP5yRXFrW2Lnlu73nVo1KXHN2VP6ALBmX3qN5FEppbzxJSCkAh09tmOBw+Uc84kxpsAYsw9IxhkgKjo31Xp/rmsCYIx51RiTaIxJjI6O9iG7vlu1K40e7Zqz5ZFx1bpOV4/5Cw9P6s2F3aJY8YdL+MO47lxhLX0B0Cw0iFtGxLu3AwKK42KHFuEARDUL4b7LzuNn/Z3nXf/aWj5IOqg1BaVUrfOl4XwdkCAi8cAh4Drg+lLH/A9nzWC+iEThbELaC+wBnvToSB4HzDLGZIjIaREZBqwFbgL+We3SVNKJ7Dwm9ouheViw94O9WHzXRRQ6HO5+ifiopswcneDz+beN7EJcVBNmjkpwz0uYPqwzb635iT8u2MzetDPcP6FHtfOplFIV8RoQjDGFIjITWAIEAm8YY7aJyGNAkjFmobVvnIhsB4qA+4wx6QAi8jjOoALwmDHGtWzor4H5QDjwufWqM0UOQ16hg/DgmpkU1qt9hM/HjuweTXBAyVazAR1bMMBqWnJ5bHJvRiREccdb6/lq5zENCEqpWuXT0BpjzGJgcam0hzzeG+Be61X63DeAN8pJTwL6VDK/NeZsgbNDuYkfZgn/3y+H+HSciHBZ73bcPLwz7607SG5BEWE1FMCUUqo0285UfnP1fsA/AaGyEto2J6/QQY+/fOHvrCilGjHbBoS/LkkGIDyk/i/n1D+2uCkpv9BxjiOVUqrqbBsQXBpCDaFPhwi6RjcFYPTzK/2bGaVUo2XLgJBbUDwhramPM5T9SUT46NfO5bdTT+pT25RStcOWAeFUToH7fedSE8Lqq8gmwfxx/HkAXPWv7/RxnkqpGmfLgHAmv9D9vmMDCQgA0wZ3Ijw4kB8PnOKX89fx76/3+DtLSqlGxJYBwdUx+9jk3gQG1Pmq21XWsmkIWx4ZR+umIew8errESqp707LZfey0H3OnlGrobB0QXMtFNCRBgQEM61r8nOc73/mRlOOnGf3814x9YRWfbip3BRCllPLKlgGhoMgZEIIDG2bxZ03owT2Xdgfgs81HuPRvq9z7NCAopaqqYX4jVpOrhhBSztPPGoLYlk24+9IEvp81mvsuO6/EvqXbj7HxoPPBOrognlKqMur/mMtakFfUsAOCS0xkOHeO6sbkAe1p3TSUng85ZzJfOec7ggKEQodhx2Pj3YvlKaXUuTTsb8QqKnDVEBpok1FpsS2bEB4SyIe/Gu5OK7RqB6+s2kOOx6gqpZSqSOP4RqyEgxk57iaVhl5DKG1wXCueu6Z/ibS/L9/NC8t2+SlHSqmGxHZNRhc9u8L9vrHUEDz1aNccgIiwIDY8NI7Rz69kU2qmn3OllGoIbBcQPAU3shoCQO/2EUwb0olJ/dsTGCBc2C2KTzcdxhjj83OjlVL21Pi+ESuhMdYQRISnrurLcGuuQs+YCE7nFvJTeo6fc6aUqu8a3zeiFxclRLnfhwU3/uKPOi8aEef8hKzcAu8nKKVsq/F/I5ZSWOQcffPs1f1q5FnK9V1syyYM6NiC55ftot8jS/lk4yF/Z0kpVU/ZLyA4HAzv0pprEzv6Oyt15o6RXdzv31930I85UUrVZ7YLCAVFhqBAe3WujurRhkn929OiSTB70874OztKqXrKdgGhyGEIakArnNaE0KBAXpw2kLtGJ3A0K5ejmbn+zpJSqh6yVUA4nVvAlkOZ7lm8dtO/o/PZzPd+sNG9wJ9SSrnYKiC88vVeAL7ZfcLPOfGP3u0jCAwQVu9J56p/rea0jjpSSnmwVUCw+7yssOBAlt97MQBbDmXyN13SQinlwaeAICLjRSRZRFJE5P5y9s8QkTQR2Wi9bvXY94yIbLVeP/dIny8i+zzOGVAzRapYQ33+QU2Kj2rKDUM7ATDvu/36/ASllJvXb0gRCQTmABOAXsA0EelVzqHvG2MGWK+51rkTgfOBAcBQ4D4RifA45z6PczZWtzDeaEBwmj2lr/v9Jxs1ICilnHz5hhwCpBhj9hpj8oH3gMk+Xr8X8LUxptAYcwbYBIyvWlarL9hmw03PZefj47miXwxr9qaTeVb7EpRSvgWEDoDnbKZUK620qSKyWUQWiIhr1tcmYIKINBGRKGAU4DkjbLZ1zgsiElqVAlSG1hCKhQUHcv2QTmTnFTLquZWsSD7u7ywppfzMl2/I8n5Wlx63+SkQZ4zpBywH3gQwxiwFFgOrgXeB7wHX01pmAT2AwUAr4E/lfrjI7SKSJCJJaWlpPmS3YnabkOaNaxhqxpl8bnszyc+5UUr5my8BIZWSv+pjgRINz8aYdGNMnrX5GjDIY99sq49gLM7gsttKP2Kc8oB5OJumyjDGvGqMSTTGJEZHR/tarnIZK4y1jaj1ykiD0DS0ePXzQodhzooUP+ZGKeVvvgSEdUCCiMSLSAhwHbDQ8wARifHYnATssNIDRaS19b4f0A9Y6nmOOBfpvxLYWr2ieFdkTUh7//bhXo60j6eu6su0Ic5RR6+u2uvn3Cil/MnrA3KMMYUiMhNYAgQCbxhjtonIY0CSMWYhcJeITMLZHJQBzLBODwa+sR7MkgXcaIxxNRm9LSLROGsNG4Ff1VyxyueanduyaUhtf1SDMW1IJ6YNgazcAj7bfIRXV+3h1hFdCLDZ8h5KKR+fmGaMWYyzL8Az7SGP97Nw9gmUPi8X50ij8q45ulI5rQGuGoKONiorsXNLPtt8hCcX72RzaiYdWzXhzlHdaBZq64fqKWUrtvrf7lrDKFB//ZZx3eBOHM3M5ZVVe1m0+QgAGdn5PHN1Pz/nTClVV2w1DtP1cJzgAFsV2yfhIYHMurwnd49JcKct2nwYh00XAlTKjmxWQ3AggraPn8M9Y7sT2zKcH/Zl8OH6VJKPnaZnTIT3E5VSDZ6tfioXOozWDnxwTWJH7ht/HiFBASxYn1pi3/82HGL1HnuuFqtUY2evGkKRQ/sPfNSmeRiDOrXk+z3pAJzKyWfii99y6NRZAPY/PdGf2VNK1QJb/VwudNjv8ZnVMbxra3YczeJ4Vi4DHlvmDgYAzy9N9mPOlFK1wVYBoaDIoesZVcLwrq0xBsb/45sy+/75VQrp2Xll0k/l5JN89HRdZE8pVcNs9e2Yk19EeHCgv7PRYPSLjQScax1dNbADG/4ylrUPjGHRb0cA8OXOsgvi/eHDzVz291X8sC+DPWnZ5OQXljlGKVU/2aoPIbegiPAQDQi+Cg0KZFiXVqzZm8GwLq3dM7zbNA+lfWQYS7Ye5drE4mWucguKWL7jGADXvvI9AJHhwSy9ZyRtI8LqvgBKqUqxVQ3hbH4RTTQgVMq7tw3jw18NZ+qgWHeaiPCzAe35cudxFm46TGGRg7zCIjYdPFXm/MyzBe4goZSq32wVEHLyiwjTJqNKEREGx7UqMzrr4gTnyrN3vbuBbg9+Tv9Hl7L7eDYAt4yIp2dMBIvvuojWTUPYeKBsoFBK1T+2azJq0UQXtqsJAzq1KLGdW+Dgw/WpBAUIsyb0IMjqvO8ZE0HyMe1kVqohsF0NQZuMakaTkCDm/WJwibRNB0/RsmmIOxgA9GjXnJ1HTrsXFlRK1V+2CghnC7TJqCaNOq8Nr9+cyNJ7RtLUCrQtmwSXOKZvbCT5RQ5+9s9vOZiR449sKqV8ZKuAUOQwuvR1DRvTsy3d2zZn48PjuHtMAk9O6Vti/xX92jOiWxTbj2Qx/fW17mdSKKXqH9sFhADRgFAbggMDuGdsdxLjWpVIDwwQ3rplCK9OH8T+9By+2HrUTzlUSnljq4DgMM5RM6puiQiX9mxLVLMQViSXncymlKofbBUQjDHoyhX+ERAgxEc1JfnoaX3GglL1lK2+Hh1Gm4z8qWOrJmw7nMWr3+z1d1aUUuWwVUDQPgT/+u1o59PYPlh30M85UUqVx1YBwRjQeOA/8VFNuWt0N/aeOMPKKvQlZJ4t4HRuQS3kTCkFNgsIDmMI1IjgVzcM60zbiFBmzFvHt7vP/eS11SknmP76Wr7fk87Oo1n0f3Qpl72wyr0/M6eAW+av44lF2zFG+yWUqi5bLV3hMPo8ZX9rGxHGby7pxsMLt3Hj62uZNaEHd1zctcxx6/ZnMGPeOvKLHHzjETgOZ+YSd/9njO/djiNZue4F9eZ+u49moUGs+MMlRDcPrbPyKNWY2KqGUGSMNhnVAzcN78xfrugFwFOf7+RAeskZzG+v/Ylr/v09+UUObrsovtxrfLHtaJnVVbPzCnnju321k2mlbMCngCAi40UkWURSROT+cvbPEJE0EdlovW712PeMiGy1Xj/3SI8XkbUisltE3heRWl91zugoo3pBRLhlRLz7y/7u9zcAsPHgKeLu/4wHP94KwAOX9+DBib1YM2sM82YMZvMj49j/9EQm9o1xX+udW4eSMnsCS+8ZSZ8OEby8cg/vrD1Q94VSqhHwGhBEJBCYA0wAegHTRKRXOYe+b4wZYL3mWudOBM4HBgBDgftEJMI6/hngBWNMAnASuKXapfHCYdA+hHpkpjXqaMOBU+w7cYYr53zn3vfq9EHcPtLZlNQuMoxRPdoQEeZcJ2nODedz1+hu3DGyCxd0iyIoMIDubZtz95juADzw8Rb3XIdTOfm8/u0+XVxPKR/40ocwBEgxxuwFEJH3gMnAdh/O7QV8bYwpBApFZBMwXkQ+BEYD11vHvQk8ArxcuexXjnMeQm1+gqqMyPBgVt03ipF/XcFDn2x1pzcNCWRY19bnPPfeceeVSRt1XjSR4cFkni2gywOLiWoWSoeW4Ww6eIq2EaGM6dFWn5in1Dn40mTUAfAcOJ5qpZU2VUQ2i8gCEXE9V3ETMEFEmohIFDAK6Ai0Bk5ZgeJc16wxxhhr2KlGhPoktmU4gLvjOGX2BJL+PNZdG6iMoMAAFt99kXv7RHaeu5/hb0t30fOhL5irk+KUqpAvAaG8b9DS9e9PgThjTD9gOc5f/BhjlgKLgdXAu8D3QKGP13R+uMjtIpIkIklpaWk+ZLd8rhYD7UOoXzxHfd0xsgtBgQHV+hXfoUU4j0/uDUBIUAAJbZrRrU0z9p44A8CHSanVy7BSjZgvTUapOH/Vu8QChz0PMMake2y+hrN/wLVvNjAbQETeAXYDJ4AWIhJk1RLKXNPj/FeBVwESExOr3BDssMap61pG9c9Hv7mA0KAAeraL8H6wD6YPj2P68Dj39ubUU0x6ydk/sT/9DHmFRYQGadORUqX58vW4DkiwRgWFANcBCz0PEJEYj81JwA4rPVBEWlvv+wH9gKXGOYtoBXC1dc7NwCfVKYg3roCgTUb1z/mdWtK7fWStzRHpF9uCe8d2Z2K/GPIKHUyZs5onFm0nr7CoVj5PqYbKaw3BGFMoIjOBJUAg8IYxZpuIPAYkGWMWAneJyCSczUEZwAzr9GDgG+tLOAu40aPf4E/AeyLyBLABeL3milVeOZx/apORPd01JoET2Xl8tvkI249ksf1IFos2H+Gxyb0Z17udv7OnVL3g00xlY8xinH0BnmkPebyfBcwq57xcnCONyrvmXpwjmOqEa9ihjjKyr6hmoTzys14s23GM9pHhfLLpMA98vIWxvdpqzVEpbLR0RXEfgv7Ht7MZF8Yz40LnhLg+HSJ5eOE2Xv92H7+4MF7/bSjbs00Xq2uUkf4SVC7jercF4InPdjD15dWcysn3c46U8i/bBATXapj6I1C5xESG8/iVfQDnshnXvbrGzzlSyr9sExCK+xA0Iqhi04d1Zs+TlwOw8+hpXeJC2ZptAsJ71lO6dPlrVVpggPDzROdUm42lVlBVyk5sExD+uiQZ0CYjVb7rhjgDwtSXV/s5J0r5j20Cgos2GanyJLRt7n7/m7fX8+mmw3z0oy5zoezFNsNOXbSGoMrTLDSIJ6f05YGPt7B4y1EWbzkKQGLnVnRq3cTPuVOqbmgNQSnL9UM7lUkb+dcV5OQXurf3nThD8tHTtlr2IuNMPu+sPUDS/gxblduObFhD0ICgKvbBHcPJziugTfMwPtl4iNe+2ceAR5dx28h4bh/ZlVHPrQRgYr8Y5lx/vn8zW0d+/8FGViQ7VxoOCQpg0W9HEBMZRvMqLFFeU4wxfLLxMP07tiA+qqnf8tHY2K+GYLsSq8oYEt+K0T3a0qdDJPdP6AlAfpGDOSv20P/Rpe7jPtt8xF9ZrFPGGLYeznJv5xc6GPfCKvo+spTvUk7w6KfbOHnGOaFv17HTxN3/GX9asJn07LxqfW7GmXwWrE8l40z5kwV3Hj3N797fyKjnVvL6t/oc7ZqiNQSlKhAYINw6Ip5Ch2H+6v3u9NE92vDVzuPsScuma3Qz/2WwFj37xU52Hj3N1PNjSTudx+wpfbh+SCf+tXKPe8TeDXPXAvDeDwcpdDgoKHLO4Xg/6SDvJx3k9ZsTuSghmpAg336FLd12lLDgQFo2CWHqv1eTX+ggtmU4i347ghZNih+5nltQxItf7nZvv73mJ24ZEe/e3nDgJA99so3AAGF8n3aMTIgmJjKM9DP5dGvTOO9XTRHXDN6GIDEx0SQlJVXp3Lj7PwPgn9MG8rP+7WsyW8oGjmSe5d21Bxjbqx1tI0IZ9dxKxvRsy68v6cq9H2wiIiyIwADh2av7Eduy/nZCnzyTT0R4sNd1m1z/XwBiIsP49LcjiGoWCjhrDW+t+Yll24+5n3TnMvX8WAyGj3485E7rGRPBvBmDaRcZVu5nnc4tICu3kAuf/gqAC7u1Zu3eDKYP78y87/bz6KTe3HxBHAAH0nO49IWvyS900C4ijOnDO/PXJcmsfWAMbSPCSM/OY9ATyyss154nL7flmlUist4Yk+jtOK0hKOWDmMjwEs9xnj48jn9/vYeFm0o+12nEMyt47/ZhDOty7mdC16X8QgcOY8gtKGLg48sY2KkFj0/uw4P/28rYnm2YOiiWzLMFfLH1KAM7tXQ/1hRgWJdWzL15MM1Ci78qRISbhsdx0/A41u3P4NY3k3j6qr5M6Fv8WJTdx7LZcigTgB1Hshj21Je8fetQTuUUMCS+FdHNncFl+utrywSV71LSmTKwAw9d0Yul247x8MJttI0I4+Lu0fzyzXXkFzoYGt+Kl64/n8OnzjoDwr4MJvVvX2L5kahmoZwo1XTV9YHFrHvwUvfnq5JsV0N4+YbzS/zDVaoqcvILGfP81xzJzC13/wd3DGdIfKs6zlVZ3+4+wY2vr63SuWtmjanwV703Gw+e4tNNhxl1XhsWbz3CO2sPuPdd2rMtc29O5Kudx/jlfOf/5x7tmnNtYkcOZOQwf/V+3rltKBd0jWLN3nT3l7zrC37mqG78flx3RITCIgfDnvqSM3lFLJx5IWNfWEXPmAg+L/Vs7UARBj6+DIBpQzoy+8q+tlq1QGsIFbDTPwJVe5qEBPHV7y/hWFYu7SLDKChycCqngIueXQHA51uP1HlA2H44iyXbjnLdkI7kFzo4mVNwzmDw7NR+/PG/m93bo3u0YUDHFuw6dpr+sS2qHAwABnRswYCOLQAYkRCFAB8kHaSgyLB8xzGeX5rM22sPEBoUwPJ7L6ZjK2czW25BETcN70wXq29mWJfWXNC1Nav3pHMiO49Xpg/iMo8HGgUFBvCri7vyxGc7GPvCKgBmTehRIi+upq6kP19K4hPLefeHg3SNbsatF3WpcvkaK1vUEIwxxM9yPt+nvlXnVeOy7XAmzy/dxYrk48y5/nwEarVG+sO+DK595XvevnUo/1qZwncp6WWOmTdjMGv3ZXA8K5fZU/qydPtR+sU6h2uezS9ic+ophtby/wmHw5Bf5GBl8nF+9Z8f3el/ntjT6xfz8axchjz5JRclRPHWLUPL7DfG8F1KOk99voOENs147pr+BFXw8PRdx05z9curKXQYNjw01jbP1va1hmCLgFBY5KDbg58DsOR3IzmvXXMvZyhVdSey8xg8e7n7sa23joind4cIpgyMrfHPuubfq1m3/2SF+3+e2JFnru5X459bVQ6Hof9jSwkNCuTuSxO4YUgnn2rtK5OP0y+2Ba2ahng91psvth5xB6XnrulP5tkCOrdqQs/2EUSEBfl1fkVt0YDgIbegiB5/+QLAPRpBqdo0/7t9/PvrvRzNKu5j2P/0RK/nGWPIOltIZJOyX0rHsnK5/B/fkH4mn74dItl5NMs91HN873Y0CQlkWNfWvPfDAa4c2IEJfWJoFhpEeEj9+hWcW1BEgIjPw1FrmjGGF79M4YXlu8rdf/Pwztw/oSeFDgfNQoMaxUO1NCB4yM4rpM/DSwDY+fh4woLr138Q1Xg98PGWEh2qAPN+MZhR57VhS2omf/rvZs7kFxIWFEibiFD3iJvmoUEs/O0I2rcI41ROAQ9+vJXlO46VuX7HVuF8fvfIEqOAlG8+WHeQTamnOJKZy9e70ogMDy4zEe6Oi7swy5qg2JBpQPCQmVNA/8eWcnH3aN785ZBayJlS5TPGsCctmylzVnM6r7DEvmFdWrFmb0aF594yIp4T2Xl8srF4aGt081BemjaQ1XvS+eWIeMKDA/32S7sx8vzx6DKxbwxPTe1LRANuStKA4OFEdh6JTyznscm9uWl4XM1nTCkfHM3MZfbiHXxaau5CdPNQ0k47x8u/OG0gyUez2H44y71+kMuc689nfJ92tpxYVZc2HTzF7z/cxH2Xnccdb613p//nlqGMSIjyY86qTgOCh2NZuQx98ktmT+nDDUM710LOlPLd1kOZzPpoC1sOZbrnxWw7nEmHFuHuJRoyzxZw3atryDpbwCczLyQ9O18HQ/iBaxSXy+7ZEwiuYARTfabzEDy4npMb2Ag6h1TD16dDJH+7tj8/HjjJ+D7OMfW920eWOCYyPLjE5CrXWHpVt4bEt2LDX8Yy/Y21bD2UxayPtjBzVDd+MX8dw7q05qmr+vo7izXKFgHBYdWCdFKaqi8S2jYv8ZQ2VX+1bBrCot9exJ3v/MiC9aksWO98kt6+E2eYdXkPzuQVEh4cSPMw72tE1Xc+BQQRGQ/8AwgE5hpjni61fwbwV8C1otVLxpi51r5ngYk4l9peBtxtjDEishKIAc5a54wzxhyvVmkq4HA4/9R1jJRSVfX8Nf1p2zyMd384wMi2ufg6AAAOgklEQVTuUXy54zj9HileEr110xC+/dNoRJytEk0b4MgvrzkWkUBgDjAWSAXWichCY8z2Uoe+b4yZWercC4ALAdfMmG+Bi4GV1vYNxpiqLU5UCUVWDaEBNv0ppeqJsOBAHvpZLx76WS8AFm46zF3vbnDvTz+TT8+HnPOd+ndswf9+c0GDm8PgSwgbAqQYY/YCiMh7wGSgdEAojwHCgBBAgGCg7GDqWuZuMmpgN0cpVX9N6t+eofGtaNEkmJDAAH71n/Us2eb8ett08BSPfrqd+yf0aFDznnwJCB2Agx7bqUDZBUVgqoiMBHYB9xhjDhpjvheRFcARnAHhJWPMDo9z5olIEfBf4AlTzpAnEbkduB2gU6eyz7z1hcOhAUEpVfM8Vz146frzyc4tpGloEIlPLGP+6v3MX72fQZ1bkp6dx/70HP5x3QAmD+jgxxyfmy+NKOV9i5b+4v4UiDPG9AOWA28CiEg3oCcQizOwjLaCBjibi/oCF1mv6eV9uDHmVWNMojEmMTo62ofsllXcZKQBQSlVO4IDA2jZNISQoAAWzhzB6B5tAFj/00n2p+cA8Jf/baWwyOHzNYschjOlJjTWJl8CQirQ0WM7Figxs8YYk26McT2J4jVgkPV+CrDGGJNtjMkGPgeGWeccsv48DbyDs2mqVhR3KtfWJyilVLG4qKa8MWMwyU+M56qBHbh3bHduuyierNxChjz5JUu3HfXpOk8u3kHvh5eQebaglnPs5EtAWAckiEi8iIQA1wELPQ8QEc/1fScBrmahA8DFIhIkIsE4O5R3WNtR1rnBwBXA1uoVpWLah6CU8ofQoED+9vMB3DUmgT9cdh7PX9Oflk2Cuf2t9Yz/+yp2HTtd4blFDsPr3+4DIPGJZec8tqZ47UMwxhSKyExgCc5hp28YY7aJyGNAkjFmIXCXiEwCCoEMYIZ1+gJgNLAFZzPTF8aYT0WkKbDECgaBOJuZXqvZohVzaJORUsrPQoMCmToolpjIMK6fu5adR08z7oVVXNEvht7tIzmTV8j+9DPcNDyOZduPsu+Es5lpysAOhAUHElONBxb5yhZLV2w4cJIp/1rNvBmDGWW16ymllL/8Z81PLNp8+JyLG0Lx3IbqLmGuS1d40JnKSqn65MZhnblxWGe2Hc5k0eYjFDkMm1NPcSI7n5Tj2Vw9KJY+7SOYcn5snT7PwiYBwfmnrmWklKpPerePLLOOlT/ZYu5ukXsegp8zopRS9ZgtAoJ7YppGBKWUqpA9AoKryUgDglJKVcgWAaHIaJORUkp5Y4uAoGsZKaWUd/YICDoxTSmlvLJFQCjSGoJSSnlli4CgaxkppZR3NgkIzj+1yUgppSpmi4CgE9OUUso7WwQEXctIKaW8s1dA0D4EpZSqkC0CguuJdbq4nVJKVcwWAaG4ycjPGVFKqXrMFl+ROlNZKaW8s0VAKNKZykop5ZUtAoJrHoJWEJRSqmK2CAhYNQRBI4JSSlXEFgHBqiDoxDSllDoHWwQEV6eyaJuRUkpVyBYBwVVD0HCglFIVs0dAsCKCDjtVSqmK+RQQRGS8iCSLSIqI3F/O/hkikiYiG63XrR77nhWRbSKyQ0ReFKvdRkQGicgW65ru9NrgmpimVQSllKqY14AgIoHAHGAC0AuYJiK9yjn0fWPMAOs11zr3AuBCoB/QBxgMXGwd/zJwO5BgvcZXsyxeaaeyUkpVzJcawhAgxRiz1xiTD7wHTPbx+gYIA0KAUCAYOCYiMUCEMeZ7Y4wB/g+4stK595GrhqCdykopVTFfAkIH4KDHdqqVVtpUEdksIgtEpCOAMeZ7YAVwxHotMcbssM5P9eGaNUJbjJRSyjtfAkJ536Om1PanQJwxph+wHHgTQES6AT2BWJxf+KNFZKSP18S6xu0ikiQiSWlpaT5kt+ILa6eyUkpVzJeAkAp09NiOBQ57HmCMSTfG5FmbrwGDrPdTgDXGmGxjTDbwOTDMumbsua7pce1XjTGJxpjE6OhoH7JbVnGTUZVOV0opW/AlIKwDEkQkXkRCgOuAhZ4HWH0CLpOAHdb7A8DFIhIkIsE4O5R3GGOOAKdFZJg1uugm4JNqlqVCpty6h1JKKU9B3g4wxhSKyExgCRAIvGGM2SYijwFJxpiFwF0iMgkoBDKAGdbpC4DRwBacLTdfGGM+tfb9GpgPhOOsOXxeU4WqiDYZKaVUxbwGBABjzGJgcam0hzzezwJmlXNeEXBHBddMwjkUtdYVL11RF5+mlFINkz1mKlt/ajxQSqmK2SMg6NIVSinllS0Cgo4yUkop72wRENxNRhoRlFKqQvYICMZo7UAppbywSUDQDmWllPLGHgEBox3KSinlhS0CgsNoh7JSSnlji4DgbDLSiKCUUudij4CAdiorpZQ39ggI2mSklFJe2SQgaKeyUkp5Y4uA4NBhp0op5ZUtAoKzyUhDglJKnYs9AoJ2KiullFf2CAjaZKSUUl7ZJCAYbTJSSikv7BEQgACNB0opdU62CAgOrSEopZRXtggI2oeglFLe2SMgoMNOlVLKG3sEBH1AjlJKeWWTgKCdykop5Y0tAoLDGF3+WimlvPApIIjIeBFJFpEUEbm/nP0zRCRNRDZar1ut9FEeaRtFJFdErrT2zReRfR77BtRs0YrpaqdKKeVdkLcDRCQQmAOMBVKBdSKy0BizvdSh7xtjZnomGGNWAAOs67QCUoClHofcZ4xZUI38+8Q5D0EjglJKnYsvNYQhQIoxZq8xJh94D5hchc+6GvjcGJNThXOrxWFMXX+kUko1OL4EhA7AQY/tVCuttKkisllEFohIx3L2Xwe8WypttnXOCyIS6luWq0CbjJRSyitfAkJ5X6Wlf3J/CsQZY/oBy4E3S1xAJAboCyzxSJ4F9AAGA62AP5X74SK3i0iSiCSlpaX5kN3yM6tNRkopdW6+BIRUwPMXfyxw2PMAY0y6MSbP2nwNGFTqGtcCHxtjCjzOOWKc8oB5OJumyjDGvGqMSTTGJEZHR/uQ3bIcOg9BKaW88iUgrAMSRCReREJwNv0s9DzAqgG4TAJ2lLrGNEo1F7nOEecU4iuBrZXLuu+c8xA0Iiil1Ll4HWVkjCkUkZk4m3sCgTeMMdtE5DEgyRizELhLRCYBhUAGMMN1vojE4axhfF3q0m+LSDTOJqmNwK+qXZoKOOchKKWUOhevAQHAGLMYWFwq7SGP97Nw9gmUd+5+yumENsaMrkxGq8OArm6nlFJe2GKmMtpkpJRSXtkiIGiTkVJKeWeLgKBLVyillHf2CAgYbTJSSikvbBEQHLpyhVJKeWWLgOBsMtIaglJKnYstAgIYfUCOUkp5YYuA4NBOZaWU8soWAcEY7VRWSilvfJqp3NAlxrUiO6/Q39lQSql6zRYB4c5R3fydBaWUqvds0WSklFLKOw0ISimlAA0ISimlLBoQlFJKARoQlFJKWTQgKKWUAjQgKKWUsmhAUEopBYAY03DWhhaRNOCnKp4eBZyowew0BFpme9Ay20N1ytzZGBPt7aAGFRCqQ0SSjDGJ/s5HXdIy24OW2R7qoszaZKSUUgrQgKCUUspip4Dwqr8z4AdaZnvQMttDrZfZNn0ISimlzs1ONQSllFLnYIuAICLjRSRZRFJE5H5/56cmiEhHEVkhIjtEZJuI3G2ltxKRZSKy2/qzpZUuIvKi9XewWUTO928Jqk5EAkVkg4gssrbjRWStVeb3RSTESg+1tlOs/XH+zHdViUgLEVkgIjut+z28sd9nEbnH+ne9VUTeFZGwxnafReQNETkuIls90ip9X0XkZuv43SJyc3Xy1OgDgogEAnOACUAvYJqI9PJvrmpEIfB7Y0xPYBhwp1Wu+4EvjTEJwJfWNjjLn2C9bgdervss15i7gR0e288AL1hlPgncYqXfApw0xnQDXrCOa4j+AXxhjOkB9MdZ9kZ7n0WkA3AXkGiM6QMEAtfR+O7zfGB8qbRK3VcRaQU8DAwFhgAPu4JIlRhjGvULGA4s8dieBczyd75qoZyfAGOBZCDGSosBkq33rwDTPI53H9eQXkCs9R9lNLAIEJyTdYJK329gCTDceh9kHSf+LkMlyxsB7Cud78Z8n4EOwEGglXXfFgGXNcb7DMQBW6t6X4FpwCse6SWOq+yr0dcQKP7H5ZJqpTUaVhV5ILAWaGuMOQJg/dnGOqyx/D38Hfgj4LC2WwOnjDGuh2Z7lstdZmt/pnV8Q9IFSAPmWc1kc0WkKY34PhtjDgHPAQeAIzjv23oa9312qex9rdH7bYeAIOWkNZqhVSLSDPgv8DtjTNa5Di0nrUH9PYjIFcBxY8x6z+RyDjU+7GsogoDzgZeNMQOBMxQ3I5SnwZfZavKYDMQD7YGmOJtMSmtM99mbispYo2W3Q0BIBTp6bMcCh/2UlxolIsE4g8HbxpiPrORjIhJj7Y8BjlvpjeHv4UJgkojsB97D2Wz0d6CFiARZx3iWy11ma38kkFGXGa4BqUCqMWattb0AZ4BozPf5UmCfMSbNGFMAfARcQOO+zy6Vva81er/tEBDWAQnWCIUQnJ1TC/2cp2oTEQFeB3YYY/7msWsh4BppcDPOvgVX+k3WaIVhQKaratpQGGNmGWNijTFxOO/jV8aYG4AVwNXWYaXL7Pq7uNo6vkH9cjTGHAUOish5VtIYYDuN+D7jbCoaJiJNrH/nrjI32vvsobL3dQkwTkRaWjWrcVZa1fi7U6WOOm4uB3YBe4AH/Z2fGirTCJxVw83ARut1Oc620y+B3dafrazjBedoqz3AFpwjOPxejmqU/xJgkfW+C/ADkAJ8CIRa6WHWdoq1v4u/813Fsg4Akqx7/T+gZWO/z8CjwE5gK/AWENrY7jPwLs4+kgKcv/Rvqcp9BX5plT0F+EV18qQzlZVSSgH2aDJSSinlAw0ISimlAA0ISimlLBoQlFJKARoQlFJKWTQgKKWUAjQgKKWUsmhAUEopBcD/AxrBTQho4PDRAAAAAElFTkSuQmCC\n", 197 | "text/plain": [ 198 | "
" 199 | ] 200 | }, 201 | "metadata": { 202 | "needs_background": "light" 203 | }, 204 | "output_type": "display_data" 205 | } 206 | ], 207 | "source": [ 208 | "plt.plot(w['auc-mean'])" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "# Подготовка ответа" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 11, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "a1 = 0.0\n", 225 | "for t in range(10):\n", 226 | " model1 = lgb.LGBMClassifier(learning_rate=0.01, num_leaves=6,\n", 227 | " n_estimators=290,\n", 228 | " colsample_bytree=0.75, subsample=0.75, random_state=t)\n", 229 | " model1.fit(X, y1)\n", 230 | " a = model1.predict_proba(X_test)[:,1]\n", 231 | " # print (a)\n", 232 | " a1 += a\n", 233 | "a1 = a1 / 10" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "a2 = 0.0\n", 243 | "for t in range(10):\n", 244 | " model2 = lgb.LGBMClassifier(learning_rate=0.03, num_leaves=2,\n", 245 | " n_estimators=378,\n", 246 | " colsample_bytree=0.75, subsample=0.75, random_state=t)\n", 247 | " model2.fit(X, y2)\n", 248 | " a = model2.predict_proba(X_test)[:,1]\n", 249 | " a2 += a\n", 250 | "a2 = a2 / 10" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 13, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "a3 = 0.0\n", 260 | "for t in range(10):\n", 261 | " model3 = lgb.LGBMClassifier(learning_rate=0.01, num_leaves=4,\n", 262 | " n_estimators=543,\n", 263 | " colsample_bytree=0.75, subsample=0.75, random_state=t)\n", 264 | " model3.fit(X, y3)\n", 265 | " a = model3.predict_proba(X_test)[:,1]\n", 266 | " a3 += a\n", 267 | "a3 = a3 / 10" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 14, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "a4 = 0.0\n", 277 | "for t in range(10):\n", 278 | " model4 = lgb.LGBMClassifier(learning_rate=0.003, num_leaves=6,\n", 279 | " n_estimators=618,\n", 280 | " colsample_bytree=0.75, subsample=0.75, random_state=t)\n", 281 | " model4.fit(X, y4)\n", 282 | " a = model4.predict_proba(X_test)[:,1]\n", 283 | " a4 += a\n", 284 | "a4 = a4 / 10" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 15, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "a5 = 0.0\n", 294 | "for t in range(10):\n", 295 | " model5 = lgb.LGBMClassifier(learning_rate=0.002, num_leaves=3,\n", 296 | " n_estimators=516,\n", 297 | " colsample_bytree=0.75, subsample=0.75, random_state=t)\n", 298 | " model5.fit(X, y5)\n", 299 | " a = model5.predict_proba(X_test)[:,1]\n", 300 | " #print (a)\n", 301 | " a5 += a\n", 302 | "a5 = a5 / 10" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 16, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/html": [ 313 | "
\n", 314 | "\n", 327 | "\n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | "
id12345
000.3415970.3362500.2534920.3006790.428683
110.3458120.2271940.3009290.2826220.466563
220.4377190.4842280.2260820.3072570.444477
340.3222150.2744120.2440450.3012360.394336
470.3669070.2536260.4355250.3803120.456793
\n", 387 | "
" 388 | ], 389 | "text/plain": [ 390 | " id 1 2 3 4 5\n", 391 | "0 0 0.341597 0.336250 0.253492 0.300679 0.428683\n", 392 | "1 1 0.345812 0.227194 0.300929 0.282622 0.466563\n", 393 | "2 2 0.437719 0.484228 0.226082 0.307257 0.444477\n", 394 | "3 4 0.322215 0.274412 0.244045 0.301236 0.394336\n", 395 | "4 7 0.366907 0.253626 0.435525 0.380312 0.456793" 396 | ] 397 | }, 398 | "execution_count": 16, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": [ 404 | "df = pd.DataFrame({'id': X1_test.id.values,\n", 405 | " '1': a1,\n", 406 | " '2': a2,\n", 407 | " '3': a3,\n", 408 | " '4': a4,\n", 409 | " '5': a5})\n", 410 | "df.to_csv('mmp_baseline_.csv', index=False)\n", 411 | "df.head()" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [] 420 | } 421 | ], 422 | "metadata": { 423 | "kernelspec": { 424 | "display_name": "Python 3", 425 | "language": "python", 426 | "name": "python3" 427 | }, 428 | "language_info": { 429 | "codemirror_mode": { 430 | "name": "ipython", 431 | "version": 3 432 | }, 433 | "file_extension": ".py", 434 | "mimetype": "text/x-python", 435 | "name": "python", 436 | "nbconvert_exporter": "python", 437 | "pygments_lexer": "ipython3", 438 | "version": "3.7.3" 439 | } 440 | }, 441 | "nbformat": 4, 442 | "nbformat_minor": 2 443 | } 444 | -------------------------------------------------------------------------------- /dj_invest_GMSC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# этапы решения задачи на реальном примере\n", 8 | "\n", 9 | "для курса \"Машинное обучение и анализ данных\" https://github.com/Dyakonov/MLDM/\n", 10 | " \n", 11 | "2019, Александр Дьяконов https://dyakonov.org/ag/" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Populating the interactive namespace from numpy and matplotlib\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "%pylab inline\n", 31 | "plt.style.use('seaborn-dark')\n", 32 | "import warnings\n", 33 | "warnings.filterwarnings(\"ignore\") # отключение варнингов\n", 34 | "pd.set_option('display.max_columns', None) # pd.options.display.max_columns = None \n", 35 | "# pd.set_option('display.max_rows', None) # не прятать столбцы при выводе дата-фреймов\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import matplotlib as mpl\n", 38 | "plt.rc('font', size=14)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# загрузили данные" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "train = pd.read_csv('./data_GMSC/train.csv')\n", 55 | "test = pd.read_csv('./data_GMSC/test.csv')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "(112500, 11) (37500, 10)\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "# размеры данных\n", 73 | "print(train.shape, test.shape)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# посмотрели" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/html": [ 91 | "
\n", 92 | "\n", 105 | "\n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | "
плохой_клиентлиниивозрастповедение_30-59_днейDebt_Ratioдоходчисло_кредитовповедение_90_днейнедвижимостьповедение_60-89_днейсемья
2212600.3575965220.5320584600.0140104.0
5438700.05614560056.000000NaN40000.0
81500.4472244500.6536079009.0140303.0
1304300.0988105400.20373619166.0150204.0
7546900.6835543400.2641685416.090102.0
\n", 195 | "
" 196 | ], 197 | "text/plain": [ 198 | " плохой_клиент линии возраст поведение_30-59_дней Debt_Ratio \\\n", 199 | "22126 0 0.357596 52 2 0.532058 \n", 200 | "54387 0 0.056145 60 0 56.000000 \n", 201 | "815 0 0.447224 45 0 0.653607 \n", 202 | "13043 0 0.098810 54 0 0.203736 \n", 203 | "75469 0 0.683554 34 0 0.264168 \n", 204 | "\n", 205 | " доход число_кредитов поведение_90_дней недвижимость \\\n", 206 | "22126 4600.0 14 0 1 \n", 207 | "54387 NaN 4 0 0 \n", 208 | "815 9009.0 14 0 3 \n", 209 | "13043 19166.0 15 0 2 \n", 210 | "75469 5416.0 9 0 1 \n", 211 | "\n", 212 | " поведение_60-89_дней семья \n", 213 | "22126 0 4.0 \n", 214 | "54387 0 0.0 \n", 215 | "815 0 3.0 \n", 216 | "13043 0 4.0 \n", 217 | "75469 0 2.0 " 218 | ] 219 | }, 220 | "execution_count": 5, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "train.sample(5)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "# особенности" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "Нам повезло: нет категориальных признаков - не надо думать о кодировках\n", 241 | " \n", 242 | "Но есть пропуски: пока не будем думать о них (попробуйте придкмать что-то умнее) - заменим (-1)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 6, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "((112500, 10), (112500,))" 254 | ] 255 | }, 256 | "execution_count": 6, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "y = train.pop('плохой_клиент') # целевой вектор\n", 263 | "train.shape, y.shape" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 7, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# заменить пропуски на -11\n", 273 | "train.fillna(-1, inplace=True)\n", 274 | "test.fillna(-1, inplace=True)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "у нас задача бинарной классификации:" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 8, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "array([0, 1])" 293 | ] 294 | }, 295 | "execution_count": 8, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "np.unique(y)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "перечислим подходящие алгоритмы для бинарной классификации (тут, кстати, не все алгоритмы):" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 16, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "from sklearn.linear_model import LogisticRegression\n", 318 | "from sklearn.svm import LinearSVC\n", 319 | "from sklearn.linear_model import SGDClassifier\n", 320 | "from sklearn.neighbors import KNeighborsClassifier\n", 321 | "from sklearn.ensemble import RandomForestClassifier\n", 322 | "from sklearn.ensemble import ExtraTreesClassifier\n", 323 | "from sklearn.ensemble import GradientBoostingClassifier\n", 324 | "\n", 325 | "models = {'лог_регрессия': LogisticRegression(),\n", 326 | " 'лин_svm': LinearSVC(),\n", 327 | " 'SGD': SGDClassifier(),\n", 328 | " 'knn': KNeighborsClassifier(),\n", 329 | " 'RF': RandomForestClassifier(),\n", 330 | " 'ETC': ExtraTreesClassifier(),\n", 331 | " 'GBM': GradientBoostingClassifier()} " 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "поэкспериментируем со всеми алгоритмами (параметры по умолчанию)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 22, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "лог_регрессия auc=0.697 std=0.011\n", 351 | "лин_svm auc=0.565 std=0.029\n", 352 | "SGD auc=0.537 std=0.036\n", 353 | "knn auc=0.568 std=0.008\n", 354 | "RF auc=0.777 std=0.007\n", 355 | "ETC auc=0.778 std=0.01\n", 356 | "GBM auc=0.866 std=0.002\n" 357 | ] 358 | } 359 | ], 360 | "source": [ 361 | "from sklearn.model_selection import cross_val_score\n", 362 | "from sklearn.model_selection import ShuffleSplit\n", 363 | "\n", 364 | "cv = ShuffleSplit(n_splits=5, test_size=0.1, train_size=None, random_state=1)\n", 365 | "\n", 366 | "for model_name in models:\n", 367 | " model = models[model_name]\n", 368 | " cvs = cross_val_score(model, train, y, cv=cv, scoring='roc_auc')\n", 369 | " print (model_name, f\"auc={np.round(np.mean(cvs), 3)}\", f\"std={np.round(np.std(cvs), 3)}\")" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "некоторые алгоритмы долго обучаются\n", 377 | "\n", 378 | "совет: поймите какие и от чего это зависит!\n", 379 | "\n", 380 | "пока самый лучший алгоритм - **градиентный бустинг**\n", 381 | "\n", 382 | "здесь метрика качества - AUC ROC\n", 383 | "https://dyakonov.org/2017/07/28/auc-roc-%D0%BF%D0%BB%D0%BE%D1%89%D0%B0%D0%B4%D1%8C-%D0%BF%D0%BE%D0%B4-%D0%BA%D1%80%D0%B8%D0%B2%D0%BE%D0%B9-%D0%BE%D1%88%D0%B8%D0%B1%D0%BE%D0%BA/\n", 384 | "\n", 385 | "Метрик качества очень много! Вот некоторые из них:" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 14, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "dict_keys(['explained_variance', 'r2', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'accuracy', 'roc_auc', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'brier_score_loss', 'adjusted_rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted'])" 397 | ] 398 | }, 399 | "execution_count": 14, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "from sklearn.metrics import SCORERS\n", 406 | "SCORERS.keys()" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "настроим параметры бустинга" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 28, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/plain": [ 424 | "RandomizedSearchCV(cv=ShuffleSplit(n_splits=5, random_state=None, test_size=0.1, train_size=None),\n", 425 | " error_score='raise-deprecating',\n", 426 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n", 427 | " learning_rate=0.1, loss='deviance', max_depth=3,\n", 428 | " max_features=None, max_leaf_nodes=None,\n", 429 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 430 | " min_samples_leaf=1, min_sampl... subsample=1.0, tol=0.0001, validation_fraction=0.1,\n", 431 | " verbose=0, warm_start=False),\n", 432 | " fit_params=None, iid='warn', n_iter=10, n_jobs=-1,\n", 433 | " param_distributions={'learning_rate': [0.05, 0.1, 0.2], 'subsample': [0.5, 1.0], 'max_depth': [1, 2, 3, 4, 5], 'max_features': [0.5, 0.75, 1.0]},\n", 434 | " pre_dispatch='2*n_jobs', random_state=None, refit=True,\n", 435 | " return_train_score='warn', scoring='roc_auc', verbose=0)" 436 | ] 437 | }, 438 | "execution_count": 28, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "from sklearn.model_selection import RandomizedSearchCV\n", 445 | "\n", 446 | "params = {'learning_rate': [0.05, 0.1, 0.2], 'subsample': [0.5, 1.0], 'max_depth': [1, 2, 3, 4, 5], 'max_features': [0.5, 0.75, 1.0]}\n", 447 | "\n", 448 | "model = GradientBoostingClassifier()\n", 449 | "\n", 450 | "rs = RandomizedSearchCV(model, params, n_iter=10, scoring='roc_auc', n_jobs=-1, cv=cv)\n", 451 | "\n", 452 | "rs.fit(train, y)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 30, 458 | "metadata": {}, 459 | "outputs": [ 460 | { 461 | "data": { 462 | "text/html": [ 463 | "
\n", 464 | "\n", 477 | "\n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_subsampleparam_max_featuresparam_max_depthparam_learning_rateparamssplit0_test_scoresplit1_test_scoresplit2_test_scoresplit3_test_scoresplit4_test_scoremean_test_scorestd_test_scorerank_test_scoresplit0_train_scoresplit1_train_scoresplit2_train_scoresplit3_train_scoresplit4_train_scoremean_train_scorestd_train_score
06.8368090.2890800.0189130.00014810.7540.05{'subsample': 1.0, 'max_features': 0.75, 'max_...0.8643990.8688790.8622850.8519240.8465870.8588150.00826560.8685080.8678890.8684000.8690920.8699010.8687580.000688
16.1396521.1586660.0201980.00786710.7530.05{'subsample': 1.0, 'max_features': 0.75, 'max_...0.8631620.8675770.8601940.8505980.8453860.8573830.00818790.8639490.8636990.8644720.8653490.8658110.8646560.000808
27.0810660.4551160.0194970.0009240.50.540.05{'subsample': 0.5, 'max_features': 0.5, 'max_d...0.8646760.8690230.8621860.8523090.8463780.8589140.00832950.8671700.8669710.8673520.8690770.8693990.8679940.001028
34.4717080.4410730.0134200.0040680.50.7520.2{'subsample': 0.5, 'max_features': 0.75, 'max_...0.8646680.8679070.8619870.8505040.8472440.8584620.00811670.8641270.8650110.8651980.8663550.8665360.8654450.000895
42.1144950.0467760.0092470.00098810.7510.05{'subsample': 1.0, 'max_features': 0.75, 'max_...0.8527360.8553340.8479090.8402060.8363360.8465040.007236100.8515550.8506050.8507730.8528620.8536010.8518790.001173
55.2276250.4268650.0153710.0002380.50.7530.05{'subsample': 0.5, 'max_features': 0.75, 'max_...0.8626990.8683070.8608500.8498810.8463550.8576190.00821380.8635110.8634610.8642450.8647700.8662660.8644510.001030
68.5503110.6369350.0235140.00062010.550.05{'subsample': 1.0, 'max_features': 0.5, 'max_d...0.8651470.8692980.8625800.8542520.8481930.8598940.00764240.8726830.8730400.8731560.8741110.8747010.8735380.000749
75.7458840.2324850.0178840.0002160.50.540.1{'subsample': 0.5, 'max_features': 0.5, 'max_d...0.8639340.8694590.8631710.8547910.8498440.8602400.00700030.8704330.8706150.8720360.8726330.8721470.8715730.000882
89.2472650.2957920.0215390.00023410.7550.1{'subsample': 1.0, 'max_features': 0.75, 'max_...0.8664980.8693210.8632940.8546220.8491500.8605770.00754920.8793760.8785380.8799750.8800510.8806070.8797090.000704
95.2083280.1497210.0176660.00075410.540.2{'subsample': 1.0, 'max_features': 0.5, 'max_d...0.8674510.8700340.8645940.8523940.8495550.8608060.00825810.8768570.8770020.8779720.8779810.8793700.8778360.000900
\n", 780 | "
" 781 | ], 782 | "text/plain": [ 783 | " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", 784 | "0 6.836809 0.289080 0.018913 0.000148 \n", 785 | "1 6.139652 1.158666 0.020198 0.007867 \n", 786 | "2 7.081066 0.455116 0.019497 0.000924 \n", 787 | "3 4.471708 0.441073 0.013420 0.004068 \n", 788 | "4 2.114495 0.046776 0.009247 0.000988 \n", 789 | "5 5.227625 0.426865 0.015371 0.000238 \n", 790 | "6 8.550311 0.636935 0.023514 0.000620 \n", 791 | "7 5.745884 0.232485 0.017884 0.000216 \n", 792 | "8 9.247265 0.295792 0.021539 0.000234 \n", 793 | "9 5.208328 0.149721 0.017666 0.000754 \n", 794 | "\n", 795 | " param_subsample param_max_features param_max_depth param_learning_rate \\\n", 796 | "0 1 0.75 4 0.05 \n", 797 | "1 1 0.75 3 0.05 \n", 798 | "2 0.5 0.5 4 0.05 \n", 799 | "3 0.5 0.75 2 0.2 \n", 800 | "4 1 0.75 1 0.05 \n", 801 | "5 0.5 0.75 3 0.05 \n", 802 | "6 1 0.5 5 0.05 \n", 803 | "7 0.5 0.5 4 0.1 \n", 804 | "8 1 0.75 5 0.1 \n", 805 | "9 1 0.5 4 0.2 \n", 806 | "\n", 807 | " params split0_test_score \\\n", 808 | "0 {'subsample': 1.0, 'max_features': 0.75, 'max_... 0.864399 \n", 809 | "1 {'subsample': 1.0, 'max_features': 0.75, 'max_... 0.863162 \n", 810 | "2 {'subsample': 0.5, 'max_features': 0.5, 'max_d... 0.864676 \n", 811 | "3 {'subsample': 0.5, 'max_features': 0.75, 'max_... 0.864668 \n", 812 | "4 {'subsample': 1.0, 'max_features': 0.75, 'max_... 0.852736 \n", 813 | "5 {'subsample': 0.5, 'max_features': 0.75, 'max_... 0.862699 \n", 814 | "6 {'subsample': 1.0, 'max_features': 0.5, 'max_d... 0.865147 \n", 815 | "7 {'subsample': 0.5, 'max_features': 0.5, 'max_d... 0.863934 \n", 816 | "8 {'subsample': 1.0, 'max_features': 0.75, 'max_... 0.866498 \n", 817 | "9 {'subsample': 1.0, 'max_features': 0.5, 'max_d... 0.867451 \n", 818 | "\n", 819 | " split1_test_score split2_test_score split3_test_score split4_test_score \\\n", 820 | "0 0.868879 0.862285 0.851924 0.846587 \n", 821 | "1 0.867577 0.860194 0.850598 0.845386 \n", 822 | "2 0.869023 0.862186 0.852309 0.846378 \n", 823 | "3 0.867907 0.861987 0.850504 0.847244 \n", 824 | "4 0.855334 0.847909 0.840206 0.836336 \n", 825 | "5 0.868307 0.860850 0.849881 0.846355 \n", 826 | "6 0.869298 0.862580 0.854252 0.848193 \n", 827 | "7 0.869459 0.863171 0.854791 0.849844 \n", 828 | "8 0.869321 0.863294 0.854622 0.849150 \n", 829 | "9 0.870034 0.864594 0.852394 0.849555 \n", 830 | "\n", 831 | " mean_test_score std_test_score rank_test_score split0_train_score \\\n", 832 | "0 0.858815 0.008265 6 0.868508 \n", 833 | "1 0.857383 0.008187 9 0.863949 \n", 834 | "2 0.858914 0.008329 5 0.867170 \n", 835 | "3 0.858462 0.008116 7 0.864127 \n", 836 | "4 0.846504 0.007236 10 0.851555 \n", 837 | "5 0.857619 0.008213 8 0.863511 \n", 838 | "6 0.859894 0.007642 4 0.872683 \n", 839 | "7 0.860240 0.007000 3 0.870433 \n", 840 | "8 0.860577 0.007549 2 0.879376 \n", 841 | "9 0.860806 0.008258 1 0.876857 \n", 842 | "\n", 843 | " split1_train_score split2_train_score split3_train_score \\\n", 844 | "0 0.867889 0.868400 0.869092 \n", 845 | "1 0.863699 0.864472 0.865349 \n", 846 | "2 0.866971 0.867352 0.869077 \n", 847 | "3 0.865011 0.865198 0.866355 \n", 848 | "4 0.850605 0.850773 0.852862 \n", 849 | "5 0.863461 0.864245 0.864770 \n", 850 | "6 0.873040 0.873156 0.874111 \n", 851 | "7 0.870615 0.872036 0.872633 \n", 852 | "8 0.878538 0.879975 0.880051 \n", 853 | "9 0.877002 0.877972 0.877981 \n", 854 | "\n", 855 | " split4_train_score mean_train_score std_train_score \n", 856 | "0 0.869901 0.868758 0.000688 \n", 857 | "1 0.865811 0.864656 0.000808 \n", 858 | "2 0.869399 0.867994 0.001028 \n", 859 | "3 0.866536 0.865445 0.000895 \n", 860 | "4 0.853601 0.851879 0.001173 \n", 861 | "5 0.866266 0.864451 0.001030 \n", 862 | "6 0.874701 0.873538 0.000749 \n", 863 | "7 0.872147 0.871573 0.000882 \n", 864 | "8 0.880607 0.879709 0.000704 \n", 865 | "9 0.879370 0.877836 0.000900 " 866 | ] 867 | }, 868 | "execution_count": 30, 869 | "metadata": {}, 870 | "output_type": "execute_result" 871 | } 872 | ], 873 | "source": [ 874 | "pd.DataFrame(rs.cv_results_)" 875 | ] 876 | }, 877 | { 878 | "cell_type": "markdown", 879 | "metadata": {}, 880 | "source": [ 881 | "лучшие параметры" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": 33, 887 | "metadata": {}, 888 | "outputs": [ 889 | { 890 | "data": { 891 | "text/plain": [ 892 | "{'subsample': 1.0, 'max_features': 0.5, 'max_depth': 4, 'learning_rate': 0.2}" 893 | ] 894 | }, 895 | "execution_count": 33, 896 | "metadata": {}, 897 | "output_type": "execute_result" 898 | } 899 | ], 900 | "source": [ 901 | "rs.cv_results_['params'][-1]" 902 | ] 903 | }, 904 | { 905 | "cell_type": "markdown", 906 | "metadata": {}, 907 | "source": [ 908 | "ожидаемое качество" 909 | ] 910 | }, 911 | { 912 | "cell_type": "code", 913 | "execution_count": 36, 914 | "metadata": {}, 915 | "outputs": [ 916 | { 917 | "data": { 918 | "text/plain": [ 919 | "0.8608056678859954" 920 | ] 921 | }, 922 | "execution_count": 36, 923 | "metadata": {}, 924 | "output_type": "execute_result" 925 | } 926 | ], 927 | "source": [ 928 | "rs.cv_results_['mean_test_score'][-1]" 929 | ] 930 | }, 931 | { 932 | "cell_type": "markdown", 933 | "metadata": {}, 934 | "source": [ 935 | "Кстати, хуже параметров по умолчанию;)\n", 936 | "\n", 937 | "Мало экспериментов..." 938 | ] 939 | }, 940 | { 941 | "cell_type": "markdown", 942 | "metadata": {}, 943 | "source": [ 944 | "# советы по улучшению\n", 945 | "\n", 946 | "раз лучшим оказался градиентный бустинг => смотрим его лучшие реализации\n", 947 | "\n", 948 | "* xgboost https://en.wikipedia.org/wiki/XGBoost\n", 949 | "* lightgbm https://github.com/Microsoft/LightGBM\n", 950 | "* catboost https://tech.yandex.ru/catboost/" 951 | ] 952 | } 953 | ], 954 | "metadata": { 955 | "kernelspec": { 956 | "display_name": "Python 3", 957 | "language": "python", 958 | "name": "python3" 959 | }, 960 | "language_info": { 961 | "codemirror_mode": { 962 | "name": "ipython", 963 | "version": 3 964 | }, 965 | "file_extension": ".py", 966 | "mimetype": "text/x-python", 967 | "name": "python", 968 | "nbconvert_exporter": "python", 969 | "pygments_lexer": "ipython3", 970 | "version": "3.6.8" 971 | } 972 | }, 973 | "nbformat": 4, 974 | "nbformat_minor": 2 975 | } 976 | -------------------------------------------------------------------------------- /dj_Benchmark_12trip.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Бенчмарк для задачи OneTwoTrip Contest\n", 8 | "\n", 9 | "https://boosters.pro/championship/onetwotrip_challenge/overview\n", 10 | " \n", 11 | " \n", 12 | "для студентов ВМК МГУ\n", 13 | "\n", 14 | "2019, Александр Дьяконов www.dyakonov.org/ag/" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Populating the interactive namespace from numpy and matplotlib\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "# подгружаем все нужные пакеты\n", 32 | "import pandas as pd\n", 33 | "import numpy as np\n", 34 | "# для встроенных картинок\n", 35 | "%pylab inline\n", 36 | "# отключить предупреждения\n", 37 | "import warnings\n", 38 | "warnings.filterwarnings('ignore')\n", 39 | "# прогресс-бар\n", 40 | "from tqdm import tqdm, tqdm_notebook" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## загрузка данных" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "размеры: (196056, 43) (455011, 37)\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "data_train = pd.read_csv('onetwotrip_challenge_train.csv')\n", 65 | "data_test = pd.read_csv('onetwotrip_challenge_test.csv')\n", 66 | "print ('размеры:', data_train.shape, data_test.shape)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## посмотрим на данные\n", 74 | "\n", 75 | "обратите внимание, как выводятся дата-фреймы" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/html": [ 86 | "
\n", 87 | "\n", 100 | "\n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
orderiduseridfield0field1field2field3field4field5field6field7...indicator_goal22indicator_goal23indicator_goal24indicator_goal25goal21goal22goal23goal24goal25goal1
0010d654494cbe97bbb25d51ead2600679aff9e097924add...0-0.62650811121101...1011010000
114aafc0391f72bbcf60537aece62923baf9ce644b64ac36...144-0.393794572002...1010000000
22bac8ffef46348f587c8d17137ab01fb24aef21547c647d...134-0.548937232001...1011000000
330392247b4b87674aba2c32bf2292b105771a6a376871be...0-0.23865110111132...1011000000
44d1aeefef311bbeb4bd84876c8d49421f276674527d5578...0-0.7040798111101...1001000000
\n", 250 | "

5 rows × 43 columns

\n", 251 | "
" 252 | ], 253 | "text/plain": [ 254 | " orderid userid field0 \\\n", 255 | "0 0 10d654494cbe97bbb25d51ead2600679aff9e097924add... 0 \n", 256 | "1 1 4aafc0391f72bbcf60537aece62923baf9ce644b64ac36... 144 \n", 257 | "2 2 bac8ffef46348f587c8d17137ab01fb24aef21547c647d... 134 \n", 258 | "3 3 0392247b4b87674aba2c32bf2292b105771a6a376871be... 0 \n", 259 | "4 4 d1aeefef311bbeb4bd84876c8d49421f276674527d5578... 0 \n", 260 | "\n", 261 | " field1 field2 field3 field4 field5 field6 field7 ... \\\n", 262 | "0 -0.626508 11 12 1 1 0 1 ... \n", 263 | "1 -0.393794 5 7 2 0 0 2 ... \n", 264 | "2 -0.548937 2 3 2 0 0 1 ... \n", 265 | "3 -0.238651 10 11 1 1 3 2 ... \n", 266 | "4 -0.704079 8 11 1 1 0 1 ... \n", 267 | "\n", 268 | " indicator_goal22 indicator_goal23 indicator_goal24 indicator_goal25 \\\n", 269 | "0 1 0 1 1 \n", 270 | "1 1 0 1 0 \n", 271 | "2 1 0 1 1 \n", 272 | "3 1 0 1 1 \n", 273 | "4 1 0 0 1 \n", 274 | "\n", 275 | " goal21 goal22 goal23 goal24 goal25 goal1 \n", 276 | "0 0 1 0 0 0 0 \n", 277 | "1 0 0 0 0 0 0 \n", 278 | "2 0 0 0 0 0 0 \n", 279 | "3 0 0 0 0 0 0 \n", 280 | "4 0 0 0 0 0 0 \n", 281 | "\n", 282 | "[5 rows x 43 columns]" 283 | ] 284 | }, 285 | "execution_count": 4, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "data_train.head()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 5, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/html": [ 302 | "
\n", 303 | "\n", 316 | "\n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | "
orderiduseridfield0field1field2field3field4field5field6field7...field25field26field27field28field29indicator_goal21indicator_goal22indicator_goal23indicator_goal24indicator_goal25
003a6a6af55e097d3f92705936a7ea3ca8aef651f5966832...0-0.54893710101101...1110411001
112df10f61960163da94a4294773ed9c865296e37c330304...82-0.626508343001...1110111011
2220dc3fec5b5eb42fbfe08119063c3a0010a73c7ec94abb...0-0.548937681102...35110210011
33ed75b3496977bac207eccb59dc91fe9a8d6a27777a6422...60.3043487720101...1310311011
44a346d08351c5fd0bda82984ed7c8b12b6395829da5b857...115-0.471365332001...1110111011
\n", 466 | "

5 rows × 37 columns

\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " orderid userid field0 \\\n", 471 | "0 0 3a6a6af55e097d3f92705936a7ea3ca8aef651f5966832... 0 \n", 472 | "1 1 2df10f61960163da94a4294773ed9c865296e37c330304... 82 \n", 473 | "2 2 20dc3fec5b5eb42fbfe08119063c3a0010a73c7ec94abb... 0 \n", 474 | "3 3 ed75b3496977bac207eccb59dc91fe9a8d6a27777a6422... 6 \n", 475 | "4 4 a346d08351c5fd0bda82984ed7c8b12b6395829da5b857... 115 \n", 476 | "\n", 477 | " field1 field2 field3 field4 field5 field6 field7 ... field25 \\\n", 478 | "0 -0.548937 10 10 1 1 0 1 ... 1 \n", 479 | "1 -0.626508 3 4 3 0 0 1 ... 1 \n", 480 | "2 -0.548937 6 8 1 1 0 2 ... 35 \n", 481 | "3 0.304348 7 7 2 0 10 1 ... 1 \n", 482 | "4 -0.471365 3 3 2 0 0 1 ... 1 \n", 483 | "\n", 484 | " field26 field27 field28 field29 indicator_goal21 indicator_goal22 \\\n", 485 | "0 1 1 0 4 1 1 \n", 486 | "1 1 1 0 1 1 1 \n", 487 | "2 1 1 0 2 1 0 \n", 488 | "3 3 1 0 3 1 1 \n", 489 | "4 1 1 0 1 1 1 \n", 490 | "\n", 491 | " indicator_goal23 indicator_goal24 indicator_goal25 \n", 492 | "0 0 0 1 \n", 493 | "1 0 1 1 \n", 494 | "2 0 1 1 \n", 495 | "3 0 1 1 \n", 496 | "4 0 1 1 \n", 497 | "\n", 498 | "[5 rows x 37 columns]" 499 | ] 500 | }, 501 | "execution_count": 5, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "data_test.head()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "## получаем таблички для обучения" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 6, 520 | "metadata": {}, 521 | "outputs": [], 522 | "source": [ 523 | "# удаляем ненужные признаки\n", 524 | "ids = data_test.pop('orderid') # сохраняем id для теста\n", 525 | "data_test.drop(['userid'], inplace=True, axis=1)" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 7, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/plain": [ 536 | "Index(['field0', 'field1', 'field2', 'field3', 'field4', 'field5', 'field6',\n", 537 | " 'field7', 'field8', 'field9', 'field10', 'field11', 'field12',\n", 538 | " 'field13', 'field14', 'field15', 'field16', 'field17', 'field18',\n", 539 | " 'field19', 'field20', 'field21', 'field22', 'field23', 'field24',\n", 540 | " 'field25', 'field26', 'field27', 'field28', 'field29',\n", 541 | " 'indicator_goal21', 'indicator_goal22', 'indicator_goal23',\n", 542 | " 'indicator_goal24', 'indicator_goal25'],\n", 543 | " dtype='object')" 544 | ] 545 | }, 546 | "execution_count": 7, 547 | "metadata": {}, 548 | "output_type": "execute_result" 549 | } 550 | ], 551 | "source": [ 552 | "cols = data_test.columns # значимые колонки\n", 553 | "cols" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 8, 559 | "metadata": {}, 560 | "outputs": [], 561 | "source": [ 562 | "y = data_train.pop('goal1') # целевая переменная для первой задачи" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 9, 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [ 571 | "y = y.values # мне так спокойней - в numpy.array" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 10, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "data_train = data_train[cols] # оставить только нужные колонки" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 11, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/html": [ 591 | "
\n", 592 | "\n", 605 | "\n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | "
field0field1field2field3field4field5field6field7field8field9...field25field26field27field28field29indicator_goal21indicator_goal22indicator_goal23indicator_goal24indicator_goal25
00-0.6265081112110110...1210411011
1144-0.39379457200210...41310211010
2134-0.54893723200110...11170111011
30-0.2386511011113210...18110411011
40-0.704079811110110...1110311001
\n", 755 | "

5 rows × 35 columns

\n", 756 | "
" 757 | ], 758 | "text/plain": [ 759 | " field0 field1 field2 field3 field4 field5 field6 field7 field8 \\\n", 760 | "0 0 -0.626508 11 12 1 1 0 1 1 \n", 761 | "1 144 -0.393794 5 7 2 0 0 2 1 \n", 762 | "2 134 -0.548937 2 3 2 0 0 1 1 \n", 763 | "3 0 -0.238651 10 11 1 1 3 2 1 \n", 764 | "4 0 -0.704079 8 11 1 1 0 1 1 \n", 765 | "\n", 766 | " field9 ... field25 field26 field27 field28 field29 indicator_goal21 \\\n", 767 | "0 0 ... 1 2 1 0 4 1 \n", 768 | "1 0 ... 41 3 1 0 2 1 \n", 769 | "2 0 ... 1 11 7 0 1 1 \n", 770 | "3 0 ... 18 1 1 0 4 1 \n", 771 | "4 0 ... 1 1 1 0 3 1 \n", 772 | "\n", 773 | " indicator_goal22 indicator_goal23 indicator_goal24 indicator_goal25 \n", 774 | "0 1 0 1 1 \n", 775 | "1 1 0 1 0 \n", 776 | "2 1 0 1 1 \n", 777 | "3 1 0 1 1 \n", 778 | "4 1 0 0 1 \n", 779 | "\n", 780 | "[5 rows x 35 columns]" 781 | ] 782 | }, 783 | "execution_count": 11, 784 | "metadata": {}, 785 | "output_type": "execute_result" 786 | } 787 | ], 788 | "source": [ 789 | "# что получилось\n", 790 | "data_train.head()" 791 | ] 792 | }, 793 | { 794 | "cell_type": "markdown", 795 | "metadata": {}, 796 | "source": [ 797 | "### Эксперименты\n", 798 | "\n", 799 | "сначала делим выборку на обучение и тест\n", 800 | "\n", 801 | "не самое лучшее решение, но для быстроты экспериментов сгодится" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": 12, 807 | "metadata": {}, 808 | "outputs": [], 809 | "source": [ 810 | "from sklearn.model_selection import train_test_split\n", 811 | "X_train, X_test, y_train, y_test = train_test_split(data_train, y, test_size=0.3, random_state=1)" 812 | ] 813 | }, 814 | { 815 | "cell_type": "markdown", 816 | "metadata": {}, 817 | "source": [ 818 | "### Случайный лес\n", 819 | "\n", 820 | "\n", 821 | "строим по одному дереву и вычисляем метрику качества (ROC AUC)\n", 822 | "\n", 823 | "обратите внимание на прогресс-бар" 824 | ] 825 | }, 826 | { 827 | "cell_type": "code", 828 | "execution_count": 14, 829 | "metadata": {}, 830 | "outputs": [ 831 | { 832 | "data": { 833 | "application/vnd.jupyter.widget-view+json": { 834 | "model_id": "f2be4ed53aa84261896b78ba8c029131", 835 | "version_major": 2, 836 | "version_minor": 0 837 | }, 838 | "text/plain": [ 839 | "HBox(children=(IntProgress(value=0), HTML(value='')))" 840 | ] 841 | }, 842 | "metadata": {}, 843 | "output_type": "display_data" 844 | }, 845 | { 846 | "name": "stdout", 847 | "output_type": "stream", 848 | "text": [ 849 | "\n" 850 | ] 851 | }, 852 | { 853 | "data": { 854 | "text/plain": [ 855 | "[]" 856 | ] 857 | }, 858 | "execution_count": 14, 859 | "metadata": {}, 860 | "output_type": "execute_result" 861 | }, 862 | { 863 | "data": { 864 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPW9//HXJwlZgIQ9ARIg7IuCIBEU3Fsp7ra1dWmrtCrtT7na3rZe7WZrt3t7u1zb0nvrWmyraK212FIRLe6KBASFQEhYE7ZshBBClsl8fn9ksDEEM0CSSWbez8djHuac+Z6Zz/GE95x853u+x9wdERGJDXGRLkBERDqPQl9EJIYo9EVEYohCX0Qkhij0RURiiEJfRCSGKPRFRGJIWKFvZnPNLN/MCs3srmO0+bSZ5ZnZBjN7rMVzaWa2y8x+3R5Fi4jIiUloq4GZxQMLgYuAYmCVmS1x97xmbcYCdwOz3X2/maW3eJnvAy+3X9kiInIi2gx9YAZQ6O5bAcxsMXAlkNeszS3AQnffD+DuJUeeMLPpQAbwHJDT1psNHDjQs7Ozw61fRESA1atXl7n7oLbahRP6mUBRs+ViYGaLNuMAzOx1IB74rrs/Z2ZxwM+AzwEfOdYbmNl8YD7A8OHDyc3NDaMsERE5wsx2hNMunD59a2Vdywl7EoCxwPnAdcCDZtYXuBVY6u5FfAh3v9/dc9w9Z9CgNj+oRETkBIVzpl8MDGu2nAXsbqXNW+7eAGwzs3yaPgTOAs4xs1uB3kCimVW7e6tfBouISMcK50x/FTDWzEaaWSJwLbCkRZtngAsAzGwgTd09W939M+4+3N2zga8BjyrwRUQip83Qd/cAsABYBmwEnnT3DWZ2r5ldEWq2DCg3szxgBfB1dy/vqKJFROTEWFebTz8nJ8f1Ra6IyPExs9Xu3uYISV2RKyISQxT6IiIxJJzROyIi8iGCQWd7+SEKSqpJTIhjQK9E+vVMpL4xSGVNPZU1DVTVNlB1OEDV4Qb69uzB+MFpjM9IpU/PHp1aq0JfRKKSu7Nsw152lNcwuE8yQ/qkUFlTz5tby3lrawX1gUaumprJJ6dnMbRvynG/fm1DI0vW7eZPuUWs31XF4YbGE6qzf69EBqclM7RvMpMz+3LHR8ee0OuES6EvIt2Cu/N6YTlpKQlMGpJGQvyxe6dzt1fw/b9vZF1R5VHPJSXEMX1EPxqDCfxs+WZ+/sJmzho1gAsnpHPeuEGMSe9NZU0DxfsPs7+mnqF9U8jql0JSQhz7qurYuKeKldsqeDK3iIpD9YxN7801Zwxj0pA0xg1OpTEYpOJQA/sP1ZPUI46+PRPpk9KDvik9SEvpQe+kBMoP1bFp70Hy9x6kqKKGvQdq2VVZS3zcgY78Xwho9I6IdAOlB+u486l1rMgvBaBXYjynj+hHv56JNDQGaWgMUhcIUh8IUl0XYMPuKjLSkvjanPHMOWUwJVW17D5QS3JCHKcN60tyj3gAdpbX8NSaYpa+t4fCkmoAEhPiqA8Ej6qhV2I8h+qbzubN4KKJGcybnc1ZowZg1trEBZ0r3NE7Cn0R6bIag87yvL188y/rqa4LcOfcCWSkJbFyawW5O/ZT29BIQpyREB9HUkLTIzEhjpkj+/OFs0fSMzH8zoxdlYd5ZXMphSXVDOmTzLD+Pemb0oM9B2rZWVFDxaF6Rg7sxYTBqUwYkkaflM7ti29LuKGv7h0R6VLqA0GeWl3My5tLeHNLOVW1ASYOSePxa6cyLiMVgMumDG33983sm8J1M4a3++t2NQp9EekydlceZsFja1izs5LMvilcMnkIs8cMZM4pGSQlxEe6vKig0BeRDnGoLkDujv28uaWcypp6PpWTxenD+7Xa/+3uvFpQxpefWEtdQyO/vn4al04e0iX6yqONQl9E2oW7k7enipc3l/JSfilrduwnEHR6xBuJ8XEsXlXElKw+XD5lKBU19eysqGF35WFKD9ZRVl1HbUOQ8Rmp/OazpzN6UO9I707UUuiLyAlpaGzqe3+toIytZYfYXnbo/bHqpwxN45ZzRzFr9ACmj+iHOzy9pphH3tjOD5duJCHOyOqXQma/FM7I7s/A3olk9k3hmjOGk5KobpyOpNAXkeMSDDpL1+/hZ89vZlvZIYb1T2FseiqzRg9gwuBUzhs3iPS05KO2+9xZ2Xxm5gjKDtUxoFcS8XHquokEhb6ItKku0Mjb2yp4Kb+UFZtK2Fp2iPEZqTx0Yw4XTkgPu+89Ls5ITz36A0E6j0JfRD7Usg17+c5f17Ovqo7E+DhmjurPbReM4appmTpb74YU+iLSqpKDtdzz1w38Y/1eJg5J4wdXTWb2mAHHdcGTdD06eiLyAbsqD/PQq9tYvGongaDz9Y+NZ/65o+jxIXPdSPeh0BeJYe7Opr0HKSipZlvpIfL2HOCFjSUAXD5lCLd/ZCyjNHwyqij0RaJYTX2AxPi492ekbGgMsrvyMAX7qlmRX8KLG0vYW1ULNE0iNrRPCjeelc0Xzs4mq1/PSJYuHUShLxKF3J1f/7OQn7+wGXdITUqgZ1I8pQfrCIbmWOyZGM+5Ywdx4cR0pmT1IXtAr/dnn5TopdAXiTLuzn8+t4nfvryVuacMZuKQNCoP13OoLsDgtKbZI0cM6MWUrD4K+Rik0BeJIsGg891nN/Domzv4zMzhfP/KU4nTsEppJqyv481srpnlm1mhmd11jDafNrM8M9tgZo+F1k01szdD6941s2vas3gR+ZfqugC3/nENj765g1vOGckPrlLgy9HaPNM3s3hgIXARUAysMrMl7p7XrM1Y4G5gtrvvN7P00FM1wA3uXmBmQ4HVZrbM3Y++h5mIhK0x6JRV1zGodxJxcca2skPMfzSXLaXVfOvSidx09kjNUCmtCqd7ZwZQ6O5bAcxsMXAlkNeszS3AQnffD+DuJaH/bj7SwN13m1kJMAhQ6IucoM37DvLlxWvJ21NFUkIcIwf2YlflYRLijD/cNJNZYwZGukTpwsIJ/UygqNlyMTCzRZtxAGb2OhAPfNfdn2vewMxmAInAlpZvYGbzgfkAw4dH/51rRE5EMOg88sZ2/uu5TaQmJfAfcydQXl0XmvSsJ/dcPknDLKVN4YR+a38jtryxbgIwFjgfyAJeNbNTj3TjmNkQ4PfAje5+1B2H3f1+4H5oukdu2NWLxIiGxiB3LH6Hpe/t5aMT0/nxJ6YwKDUp0mVJNxRO6BcDw5otZwG7W2nzlrs3ANvMLJ+mD4FVZpYG/B34lru/1Q41i8SUQGOQLz+xlqXv7eXuiycw/9xR6q+XExbO6J1VwFgzG2lmicC1wJIWbZ4BLgAws4E0dfdsDbX/C/Cou/+p/coWiS6vbC7l92/tIHd7BQdrG95f3xh0vvqndfz93T1885KJfPG80Qp8OSltnum7e8DMFgDLaOqvf9jdN5jZvUCuuy8JPTfHzPKARuDr7l5uZp8FzgUGmNm80EvOc/e1HbEzIt3RX9fu4o7FH/wnkZqc8P4EZxWH6rlz7nhuOXdUJMqTKGPuXasLPScnx3NzcyNdhkir3J0tpdXUB5z4OCMpIY7h/Xue8Hj4lzeXctPvVjF9RD9+cvUUCkuq2binirLqegLBIIFG5/Th/fj0GcPafjGJaWa22t1z2mqnK3JFwtQYdO586l3+vKb4A+v79uzBjOz+TB/Rj/g4o7ahkfpGZ8LgVM7I7n/ML1zf2bmf//eH1YzNSOWBG3NIS+7BiAG9+MjEjM7YHYlRCn2RMDQ0BvnKE2v527t7mH/uKE4f3pfGIBysbWD1jv2s3FbB83n7Wt121KBejBzQi/S0JAb0SmJnRQ1riyrZWVHDsP4pLPrCGaQl9+jkPZJYpdAX+RDuTsnBOr79zHqez9vH3RdP4Ivnjf5Am2tnNF1bcqCmgbg43p/EbP2uA6zcVkHu9v3sqjzMuuJKyg/VMzgtmanD+nLdjOF84vRM3TNWOpVCX6QVbxSW8dPn8ykoqeZgbQCA711xCjfOyj7mNn16fvBsfdrwfkwb3g/O+9e6xqDrvrISUQp9kRb2Hqjl1sfWkJqcwFVTMxmb0Ztpw/oxOavPSb+2Al8iTaEv0kxj0PnKE2upDwRZ9PkZulWgRB2Fvkgzv31lC29uLecnV09R4EtU0u3tRULWFlXy8+c3c+mUIXxqelakyxHpEAp9EWBH+SFuXpRLRloyP7pqsqY6kKil0JeYV3KwlhsefpvGYJBFX5hx1CgckWiiPn2JaVW1Dcx7eBUlVXU8dstMxqSrH1+im870Jabd/fR7bN53kP/73PSmMfUiUU6hLzFr/a4D/P3dPdx6wRjOGzco0uWIdAqFvsSsnz6fT5+UHtx8zshIlyLSaRT6EpNyt1fwUn4pXzpvtCY7k5ii0JeYsK6oklXbK3B33J2fPp/PwN5J3DhrRKRLE+lUGr0jUa0u0MhPnsvnode2ATAmvTfnjB3IW1sr+O7lk+iZqH8CElv0Gy9Rq2DfQe5YvJa8PVXccNYITs3sw2Mrd/LI69vJ7JvCdTOHR7pEkU6n0JeoEmgMsiK/lD+u3MHLm0vpm9KDB2/I4aOTmu5G9emcYWzcU0WvxASSEuIjXK1I51PoS7dW29DI8rx9rC2qZNPeKjbsrqKypoH01CT+7YIxfPasEUfdpGTikLQIVSsSeQp96ZaKKmr4w8odPLmqiP01DSQlxDF+cCpzJmVw4YR0PjIxgx7xGqcg0pJCX7qNA4cb+Md7e3j6nV28va2C+DjjookZfO6sEZw5aoBuUCIShrBC38zmAvcB8cCD7v6frbT5NPBdwIF17n59aP2NwLdCzX7g7ovaoW6JMUvW7ebOp9ZR2xBk1KBefPWicVydk8WQPimRLk2kW2kz9M0sHlgIXAQUA6vMbIm75zVrMxa4G5jt7vvNLD20vj9wD5BD04fB6tC2+9t/VyQauTu/+mchP1++mTOy+/HtyyYxObOPpj4WOUHhnOnPAArdfSuAmS0GrgTymrW5BVh4JMzdvSS0/mPAcnevCG27HJgLPN4+5Us0q21o5BtPv8fT7+ziE9My+fEnJ2vEjchJCif0M4GiZsvFwMwWbcYBmNnrNHUBfdfdnzvGtpkt38DM5gPzAYYP19jpWFAXaGT9rgPUBYI0Bp2eiQlMG9aXuFC//J4Dh/nSH9awrqiSf79oHP924Rid3Yu0g3BCv7V/ad7K64wFzgeygFfN7NQwt8Xd7wfuB8jJyTnqeYkua4sq+eqTa9lSeugD60cP6sW82SMZ0b8n//7kWg7XN/J/n53O3FMHR6hSkegTTugXA8OaLWcBu1tp85a7NwDbzCyfpg+BYpo+CJpv+9KJFivdW12gkfteKOD/Xt7C4LRk7rt2KhlpySTEGTvKa1j05na+/cx6AEYN7MXjt5zJ2IzUyBYtEmXCCf1VwFgzGwnsAq4Frm/R5hngOuB3ZjaQpu6ercAW4EdmduTuFHNo+sJXYkxlTT03L8old8d+Pp2Txbcum/SB2S1zsvvzidMzWb1jP29uKefG2dma/VKkA7QZ+u4eMLMFwDKa+usfdvcNZnYvkOvuS0LPzTGzPKAR+Lq7lwOY2fdp+uAAuPfIl7oSO/YcOMwND73NjvIafnXdNC4/bWir7cyMnOz+5GT37+QKRWKHuXetLvScnBzPzc2NdBnSTgpLqrnhoZVU1Qa4/4bpzBo9MNIliUQlM1vt7jlttdMVudJhGhqD3PbHNdQ3Bnnii2dyytA+kS5JJOYp9KXDPPL6NvL3HeSBG3IU+CJdhGakkg6xu/Iw//NCAR+dmM5FoWmNRSTyFPrSIe59No+gO/dcfkqkSxGRZhT60q7cnb+9u5vnNuzl3y4cy7D+PSNdkog0oz59aRclB2t5anUxT6/ZRWFJNeMyenPLOaMiXZaItKDQl5P2Un4Jdyxey4HDDeSM6MePPj6Zy04bQmKC/pAU6WoU+nLCgkFn4YpCfv7CZsZnpPLUl87StAkiXZxCX05IdV2ArzyxluV5+7hq6lB+/IkppCRq2mORrk6hL8dtR/khbnk0ly2lh/jOZZP4/OxsTXss0k0o9OW4vFFYxq2PrcEdHv3CDGaP0bQKIt2JQl/C9sw7u/jan9YxcmAvHrwxhxEDekW6JBE5Tgp9OUpNfYDv/y2PUQN784nTMxnQO4kHX93KD/6+kTNH9ef+G3I07bFIN6XQlw9wd/7jz+/x7Lqm++T8ZNkmpmT1ZfWO/VwyeTC/uGaq7lMr0o0p9OUDHnptG8+u282dc8fzkQkZPLGqiCXrdjFvVjbfvmwS8XH6wlakO9N8+vK+NwrL+NzDbzNnUga/+czpGpEj0o1oPn0JW8G+gzy7bjeL3tzBqIG9+O9PnabAF4lSCv0Ylru9gu/8dQN5e6qIMzhr9AB+eNVkeifp10IkWulfdwwKNAZZuGIL9724mcx+Kdxz+SQunTKE9NTkSJcmIh1MoR9jCvYd5Jt/Wc/b2yv4+LRM7r3yFFI1/FIkZij0Y8TeA7X8zwubeTK3iF6JCfzimtP4+LSsSJclIp0srLlvzWyumeWbWaGZ3dXK8/PMrNTM1oYeNzd77idmtsHMNprZL03fEHa6N7eUc/5PV/DnNcXcOCubl++8QIEvEqPaPNM3s3hgIXARUAysMrMl7p7XoukT7r6gxbazgNnAlNCq14DzgJdOsm4JU2PQ+d6zGxiUmsQfbzqT4QN0JyuRWBbOmf4MoNDdt7p7PbAYuDLM13cgGUgEkoAewL4TKVROzF/e2cWmvQf5+scmKPBFJKzQzwSKmi0Xh9a19Ekze9fMnjKzYQDu/iawAtgTeixz940nWbOEqbahkZ8/n8/kzD5cNnlIpMsRkS4gnNBvrQ++5WW8zwLZ7j4FeAFYBGBmY4CJQBZNHxQXmtm5R72B2XwzyzWz3NLS0uOpXz7Eoje2s/tALXdfPIE4TZ8gIoQX+sXAsGbLWcDu5g3cvdzd60KLDwDTQz9/HHjL3avdvRr4B3Bmyzdw9/vdPcfdcwYNGnS8+yCtqKypZ+GKQs4bN4hZmvNeRELCCf1VwFgzG2lmicC1wJLmDcysed/BFcCRLpydwHlmlmBmPWj6ElfdOx1sz4HDzHtkFdV1Ae66eEKkyxGRLqTN0TvuHjCzBcAyIB542N03mNm9QK67LwFuN7MrgABQAcwLbf4UcCHwHk1dQs+5+7PtvxtyxFtby1nw2BoO1zey8PrTmTgkLdIliUgXolk2o0RJVS0PvraNh17bxoj+Pfnt56YzNiM10mWJSCfRLJsxouRgLb98sYAnc4sJNAa5alom37tCUyuISOsU+t2Yu3PbH9ewtqiSq6dn8cVzR5M9UPetFZFjU+h3Y0vf28uq7fv50ccnc/3M4ZEuR0S6gbDm3pGup7ahkR8t3ciEwalcc8awtjcQEUGh32099No2dlUe5juX6761IhI+hX43VFJVy8IVhXzslAxmjdaFVyISPoV+N/SDv28k0Oh845KJkS5FRLoZhX4388w7u1iybje3XTCGEQM0UkdEjo9CvxspqqjhW8+sJ2dEP267YHSkyxGRbkih300EGoPcsfgdDPjFNVNJiNehE5Hjp3H63cR9LxawZmcl9107lWH9dTMUETkxOl3sBv66dhe/+mchn5qexZVTW7t/jYhIeBT6XdzKreV8/U/vMnNkf37w8VMjXY6IdHMK/S5sS2k183+/mqz+Kdz/uRySEuIjXZKIdHMK/S7s359cR0Kc8bt5M+jTU7NmisjJU+h3UUUVNawrquSL541i+AB9cSsi7UOh30Utz9sHwJxJgyNciYhEE4V+F7U8bx9j03trfnwRaVcK/S6osqaet7dXcNGkjEiXIiJRRqHfBa3IL6Ex6Mw5RV07ItK+FPpd0PK8faSnJjEls0+kSxGRKKPQ72JqGxp5Kb+Uj07KIE43RxGRdhZW6JvZXDPLN7NCM7urlefnmVmpma0NPW5u9txwM3vezDaaWZ6ZZbdf+dFhybrdvLK5FHfnzS3l1NQ3Mkf9+SLSAdqccM3M4oGFwEVAMbDKzJa4e16Lpk+4+4JWXuJR4IfuvtzMegPBky06mqzcWs7tj78DwJj03qQmJ9A7KYGzRg+IcGUiEo3COdOfARS6+1Z3rwcWA1eG8+JmNglIcPflAO5e7e41J1xtlGkMOt97No/Mvin899VTSOkRzzs7K7lgQrqmXBCRDhHO1MqZQFGz5WJgZivtPmlm5wKbga+4exEwDqg0s6eBkcALwF3u3nhyZUeHJ3OLyNtTxa+vn8ZlU4Zy9fQs8vZUkdk3JdKliUiUCudMv7VvE73F8rNAtrtPoSnYF4XWJwDnAF8DzgBGAfOOegOz+WaWa2a5paWlYZbevR043MB/L8tnRnZ/Lp08BAAz45ShfejbMzHC1YlItAon9IuBYc2Ws4DdzRu4e7m714UWHwCmN9v2nVDXUAB4Bji95Ru4+/3unuPuOYMGDTrefeiWfvliAftr6vnO5ZMw0ygdEekc4XTvrALGmtlIYBdwLXB98wZmNsTd94QWrwA2Ntu2n5kNcvdS4EIgt10q76bW7NzPr14sYEV+KdfNGMapGosvIp2ozdB394CZLQCWAfHAw+6+wczuBXLdfQlwu5ldAQSACkJdOO7eaGZfA160ptPZ1TT9JRBzqusCLHhsDS/ll9KvZw++NmccN58zKtJliUiMMfeW3fORlZOT47m50fXHgLtz22NrWLZhH1+bM54bzhpBryTdnlhE2o+ZrXb3nLbaKXk6wUOvbWPpe3v5xiUTmH/u6EiXIyIxTNMwdLC3t1Xw439sYu4pg7lF3TkiEmEK/Q5UWVPPbY+tYXj/nvzkU1M0SkdEIk7dOx3ojyt3UnqwjodvPIO0ZN3jVkQiT2f6HaShMcjv39zB2WMGMjlLwzJFpGtQ6HeQf6zfy96qWj4/OzvSpYiIvE+h30EeeX0b2QN6csH49EiXIiLyPoV+B3hn537e2VnJvFnZuhGKiHQpCv0O8Mjr20lNSuDqnGFtNxYR6UQavdNOtpcdYsPuKjbuqWLpe3u4cVY2vXXVrYh0MUqldvCL5Zu578UCAOLjjFOHpnHzOSMjXJWIyNEU+ifpgVe2ct+LBXx8WiY3nT2SMem9Se6hu16JSNek0D8Jj7+9kx8u3cilk4fw00+dRry+tBWRLk6hfxw27D7Ab17awv5D9VTWNLBxbxXnjx/EL66ZqsAXkW5BoR+mmvoA/+8Pa6isqWdcRipD+yYza/QAvjpnPIkJGgQlIt2DQj9MP3t+Mzsranj8ljM5a/SASJcjInJCdIoahjU79/Pw69v47JnDFfgi0q0p9NtQ29DInU+9y9A+Kdx18cRIlyMiclLUvdOGhSsKKSypZtEXZuhiKxHp9nSm/yG2llbz25e3ctXUoZw3blCkyxEROWkK/WNwd+5ZsoGkhDi+cam6dUQkOij0j+Ef6/fyakEZX50zjvTU5EiXIyLSLsIKfTOba2b5ZlZoZne18vw8Mys1s7Whx80tnk8zs11m9uv2KrwjHaoLcO+zeUwaksZnzxwR6XJERNpNm99Mmlk8sBC4CCgGVpnZEnfPa9H0CXdfcIyX+T7w8klV2onue7GAvVW1LPzM6STE648hEYke4STaDKDQ3be6ez2wGLgy3Dcws+lABvD8iZXYudYVVfLgq1u5bsYwpo/oF+lyRETaVTihnwkUNVsuDq1r6ZNm9q6ZPWVmwwDMLA74GfD1k660E9QHgvzHn98lPTWZuy/Rl7ciEn3CCf3WZhLzFsvPAtnuPgV4AVgUWn8rsNTdi/gQZjbfzHLNLLe0tDSMkjrGb14qZNPeg/zgqlNJS+4RsTpERDpKOFcbFQPN7/uXBexu3sDdy5stPgD8V+jns4BzzOxWoDeQaGbV7n5Xi+3vB+4HyMnJafmB0iny9x5k4YpCrjhtKB+dlBGJEkREOlw4ob8KGGtmI4FdwLXA9c0bmNkQd98TWrwC2Ajg7p9p1mYekNMy8LuKnzy3idTkHtxz+aRIlyIi0mHaDH13D5jZAmAZEA887O4bzOxeINfdlwC3m9kVQACoAOZ1YM3trrahkdcKy7h+5nAG9E6KdDkiIh0mrMlk3H0psLTFuu80+/lu4O42XuN3wO+Ou8JOsHJbBXWBoKZaEJGop0HowCubS0lMiGPmSE2bLCLRTaEPvLy5lJkj+5OSqBuai0h0i/nQ31V5mMKSanXtiEhMiPnQf2Vz03UBCn0RiQUxH/ov55cytE8yY9J7R7oUEZEOF9Oh39AY5PXCMs4dNwiz1i48FhGJLjEd+muLKjlYF1DXjojEjJgO/ZfzS4mPM2aNGRjpUkREOkVsh/7mUqYN60ufFE2uJiKxIWZDv6Sqlvd2HeCCCemRLkVEpNPEbOi/lN80VPOC8Qp9EYkdMRv6/9xUwpA+yUwckhrpUkREOk1Mhn59IMhrhWWcPz5dQzVFJKbEZOiv2l5BdV2AC9WfLyIxJiZD/5+bSkhMiGP2GM2qKSKxJSZDf8WmEs4cNYCeiWHdTkBEJGrEXOhvLzvE1rJDXDheV+GKSOyJudD/56YSAC6coJufi0jsibnQf3HTPkYP6sXwAT0jXYqISKeLqdB/Y0sZrxeWc+XUzEiXIiISETET+vWBIN9+Zj3D+/dk/rmjIl2OiEhExMzwlQde3cqW0kM88vkzSO6he+GKSGwK60zfzOaaWb6ZFZrZXa08P8/MSs1sbehxc2j9VDN708w2mNm7ZnZNe+9AOIoqavjliwVcfOpgzbUjIjGtzTN9M4sHFgIXAcXAKjNb4u55LZo+4e4LWqyrAW5w9wIzGwqsNrNl7l7ZHsWH63vPbiAhzvjO5ZM6821FRLqccM70ZwCF7r7V3euBxcCV4by4u29294LQz7uBEqBTB8hXHKrnhY0l3HT2SIb0SenMtxYR6XLCCf1MoKjZcnFoXUufDHXhPGVmw1o+aWYzgERgywlVeoJeLywD4MKJGpcvIhJO6Lc2DaW3WH4WyHb3KcALwKIPvIDZEOD3wOfdPXjUG5jNN7NcM8stLS0Nr/IwvV5YRlpyApMz+7Tr64qIdEfhhH4x0PzMPQvY3byBu5e7e11o8QFg+pHnzCwN+DvwLXfe8ckpAAAHDElEQVR/q7U3cPf73T3H3XMGDWq/3h9359WCMmaNHkh8nKZQFhEJJ/RXAWPNbKSZJQLXAkuaNwidyR9xBbAxtD4R+AvwqLv/qX1KDt+O8hp2VR5m9ljd+FxEBMIYvePuATNbACwD4oGH3X2Dmd0L5Lr7EuB2M7sCCAAVwLzQ5p8GzgUGmNmRdfPcfW377kbrXg31558zRqEvIgJhXpzl7kuBpS3WfafZz3cDd7ey3R+AP5xkjSfs9YIyMvumMELz7IiIAFE8DUNj0HljSxnnjB2oWyKKiIREbei/t+sAVbUBZqtrR0TkfVEb+q8VNA39VOiLiPxL1Ib+qwVlnDI0jf69EiNdiohIlxGVoV9TH2DNzv2crbN8EZEPiMrQX7m1goZG55yxug+uiEhzURn6rxaUkZQQR052v0iXIiLSpURl6L9WWMqMkf11sxQRkRaiLvT3Hqhl875qztHUCyIiR4m60H8tNPXC2WPUny8i0lL0hX5BKQN7JzFhcGqkSxER6XKiKvSDQee1wjLOHjOAOE2lLCJylKgK/U17D1JWXc/ZGqopItKqqAr9V0NTL+hLXBGR1kVV6L9WWMa4jN5kpCVHuhQRkS4pakK/tqGRt7dV6CpcEZEPETWhX1XbwMdOGcxHJqZHuhQRkS4rrDtndQfpqcn88rppkS5DRKRLi5ozfRERaZtCX0Qkhij0RURiiEJfRCSGhBX6ZjbXzPLNrNDM7mrl+XlmVmpma0OPm5s9d6OZFYQeN7Zn8SIicnzaHL1jZvHAQuAioBhYZWZL3D2vRdMn3H1Bi237A/cAOYADq0Pb7m+X6kVE5LiEc6Y/Ayh0963uXg8sBq4M8/U/Bix394pQ0C8H5p5YqSIicrLCCf1MoKjZcnFoXUufNLN3zewpMxt2nNuKiEgnCOfirNbmKPYWy88Cj7t7nZl9CVgEXBjmtpjZfGB+aLHazPLDqKu5gUDZcW7T3cXiPkNs7ncs7jPE5n6fzD6PCKdROKFfDAxrtpwF7G7ewN3Lmy0+APxXs23Pb7HtSy3fwN3vB+4Po5ZWmVmuu+ec6PbdUSzuM8TmfsfiPkNs7ndn7HM43TurgLFmNtLMEoFrgSXNG5jZkGaLVwAbQz8vA+aYWT8z6wfMCa0TEZEIaPNM390DZraAprCOBx529w1mdi+Q6+5LgNvN7AogAFQA80LbVpjZ92n64AC4190rOmA/REQkDOZ+VBd7t2Nm80NdRDEjFvcZYnO/Y3GfITb3uzP2OSpCX0REwqNpGEREYki3Dv22poeIFmY2zMxWmNlGM9tgZneE1vc3s+WhKS6Wh74sjypmFm9m75jZ30LLI81sZWifnwgNLogaZtY3dK3LptDxPitGjvNXQr/b683scTNLjsZjbWYPm1mJma1vtq7V42tNfhnKt3fN7PT2qKHbhn6z6SEuBiYB15nZpMhW1WECwFfdfSJwJnBbaF/vAl5097HAi6HlaHMH/xoNBk3DgX8R2uf9wE0Rqarj3Ac85+4TgNNo2veoPs5mlgncDuS4+6k0DRi5lug81r/j6FkJjnV8LwbGhh7zgf9tjwK6behzctNDdCvuvsfd14R+PkhTEGTStL+LQs0WAVdFpsKOYWZZwKXAg6Flo+miv6dCTaJqn80sDTgXeAjA3evdvZIoP84hCUCKmSUAPYE9ROGxdvdXaBrh2Nyxju+VwKPe5C2gb4vh8SekO4d+TE7xYGbZwDRgJZDh7nug6YMBiLYbBP8PcCcQDC0PACrdPRBajrZjPgooBR4JdWk9aGa9iPLj7O67gJ8CO2kK+wPAaqL7WDd3rOPbIRnXnUM/rCkeoomZ9Qb+DHzZ3asiXU9HMrPLgBJ3X918dStNo+mYJwCnA//r7tOAQ0RZV05rQn3YVwIjgaFAL5q6NlqKpmMdjg75fe/Ood/m9BDRxMx60BT4f3T3p0Or9x35cy/035JI1dcBZgNXmNl2mrruLqTpzL9vqAsAou+YFwPF7r4ytPwUTR8C0XycAT4KbHP3UndvAJ4GZhHdx7q5Yx3fDsm47hz6bU4PES1CfdkPARvd/efNnloCHLkxzY3AXzu7to7i7ne7e5a7Z9N0bP/p7p8BVgBXh5pF2z7vBYrMbHxo1UeAPKL4OIfsBM40s56h3/Uj+x21x7qFYx3fJcANoVE8ZwIHjnQDnRR377YP4BJgM7AF+Gak6+nA/Tybpj/r3gXWhh6X0NTH/SJQEPpv/0jX2kH7fz7wt9DPo4C3gULgT0BSpOtr532dCuSGjvUzQL9YOM7A94BNwHrg90BSNB5r4HGavrdooOlM/qZjHV+auncWhvLtPZpGN510DboiV0QkhnTn7h0RETlOCn0RkRii0BcRiSEKfRGRGKLQFxGJIQp9EZEYotAXEYkhCn0RkRjy/wGle//ClVohuwAAAABJRU5ErkJggg==\n", 865 | "text/plain": [ 866 | "
" 867 | ] 868 | }, 869 | "metadata": { 870 | "needs_background": "light" 871 | }, 872 | "output_type": "display_data" 873 | } 874 | ], 875 | "source": [ 876 | "from sklearn.ensemble import RandomForestClassifier\n", 877 | "from sklearn.metrics import roc_auc_score\n", 878 | "\n", 879 | "model = RandomForestClassifier(max_features=1, n_estimators=1, oob_score=False, warm_start=True, random_state=1)\n", 880 | "\n", 881 | "aucs = []\n", 882 | "for t in tqdm_notebook(list(range(1, 101))):\n", 883 | " model.set_params(n_estimators=t)\n", 884 | " model.fit(X_train, y_train)\n", 885 | " a = model.predict_proba(X_test)[:, 1]\n", 886 | " q = roc_auc_score(y_test, a)\n", 887 | " aucs.append(q)\n", 888 | " \n", 889 | "plt.plot(range(1, 101), aucs)" 890 | ] 891 | }, 892 | { 893 | "cell_type": "code", 894 | "execution_count": 46, 895 | "metadata": {}, 896 | "outputs": [ 897 | { 898 | "data": { 899 | "application/vnd.jupyter.widget-view+json": { 900 | "model_id": "08b2382018ab4201b3b2c5904899732f", 901 | "version_major": 2, 902 | "version_minor": 0 903 | }, 904 | "text/plain": [ 905 | "HBox(children=(IntProgress(value=0), HTML(value='')))" 906 | ] 907 | }, 908 | "metadata": {}, 909 | "output_type": "display_data" 910 | }, 911 | { 912 | "data": { 913 | "text/plain": [ 914 | "[]" 915 | ] 916 | }, 917 | "execution_count": 46, 918 | "metadata": {}, 919 | "output_type": "execute_result" 920 | }, 921 | { 922 | "data": { 923 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VfWd//HXh6yQhSUbS4CwhF0EjYCiYB0XKhVr7aLWqdRa2mltp50uo62/Lo7t1LbTxdZxapWpVquOuOEublRckKCAbIGwhwAJhEAIZL2f3x+5tDHGcoEkJ/fe9/PxyIN7zv3eez+Hk7xz8jmbuTsiIhIfegRdgIiIdB2FvohIHFHoi4jEEYW+iEgcUeiLiMQRhb6ISBxR6IuIxBGFvohIHFHoi4jEkcSgC2grOzvbCwoKgi5DRCSqLF++fK+75xxrXLcL/YKCAoqLi4MuQ0QkqpjZtkjGqb0jIhJHFPoiInFEoS8iEkcU+iIicUShLyISRxT6IiJxRKEvIhJHut1x+iIisayusZnKmnpqG5o4VNdE9eFG9tXWs6+2gT49k7lq6pBO/XyFvohIJyvbf5gX1uzhlZIKlm6uoqE51O6404b0UeiLiESz10v3ct09xRxpbGZEThqfO3Moo/IySE9NJC0lkd49k8hKSyYrPZleyZ0fyQp9EZFO8mpJBV/683IKstK44+rTGJ6THnRJCn0RkY6wqqyanz27nrSURE7N701GahI/eXodI3PTue+6qfRLSw66REChLyIxpKq2gTc37SOzZyJnDs8iMeHkD1AMhZwd+w+zs/oIo/IyyE5P+cDzdy3ZzC+eL6Fvr2TSUxJZtHYPAKfm9+bea6fSu1fSSdfRURT6IhIVGppCbK86zNtbqnh9016WbakiKaEHg/r0pH/vVLbsrWV1+QHcW8ZnpSVz8SkDOHVwHxJ6QA8z+memctrQviR9yC+DxRsqeWLFTuoam6lrDFF9uIGS3TXUNjT/bcyQfr04ZVBveiYnYMCWvbUUb9vPRePzuPXyifTplcyBI42UVhxi/MBMUpMSuuB/J3LmR/+HuomioiLXpZVFxN1ZvKGS/319K6UVh9h14AihcFzlZqRw5ogsEszYWX2E8gNHGJDZk3MKs5lemE3FwXqeXFXOS+v2UNf4/iNlMlMTmTk6lxmF2Zw+tC/DstPYc7Ce/3hqLU+/t4t+aclkpSWTktSDjJQkRuWlM3ZAJgP69GT9roO8u72adbsP0tgUwoHEBONLM0bw2alDMLOu/48KM7Pl7l50rHHa0heRbuVQfRNvb9nH714u5d3t1Qzq05Mpw/oxuF8vBvftyeQhfRiRk37MgJ01oT+HG5qorKkn5NAcckorDvHy+j28vL6SJ1eWA9AvLZn6xmaaQs63LhjFvJnDSUlsf+t85qhj3qOk24so9M1sFvBbIAG4y91/1s6YTwM/AhxY6e5Xmdkk4A4gE2gGfuLuD3VQ7SIS5eoam3l3ezVvb6li+fb9bNxTw64DdQAM6tOTn152Cp88PZ/kxBPrzfdKTmRo1t9jbmRuOrMm9CcUckorD7F8237e2baf5pDzjfNHMSSrV4csV3d2zPaOmSUAG4ALgDJgGXClu69tNaYQ+D/gPHffb2a57l5hZqMAd/eNZjYQWA6MdffqD/s8tXdEYl9FTR2/e6mUh4p30NAUwgzG9M9kbP8MRuSmMzovg3NGZX/oFrd8UEe2d6YApe6+OfzGDwKXAmtbjfkicLu77wdw94rwvxuODnD3cjOrAHKADw19EYlNDU0hNu89xDOrdnHXki3UN4X41On5XDg+j9OH9qN3z+5zhEssiyT0BwE7Wk2XAVPbjBkFYGav09IC+pG7P9d6gJlNAZKBTW0/wMzmAfMAhgzp3FOQRaTrbNxTw4J3ylhcUsmmykM0Nrd0FmZPHMC3LxzNsOy0gCuMP5GEfnt7S9r2hBKBQuBcIB94zcwmHG3jmNkA4M/ANe7+gYtOuPudwJ3Q0t6JuHoR6XZCIefJVeXMf30rK3dUk9DDOGtEFh8Zk8uY/hmcmt+HAoV9YCIJ/TJgcKvpfKC8nTFvuXsjsMXMSmj5JbDMzDKBp4Gb3P2tDqhZRLqp1zZW8rNn17Om/CCFuencNHssl04aRE5GyrFfLF0iktBfBhSa2TBgJ3AFcFWbMY8DVwJ/MrNsWto9m80sGXgMuNfdH+64skUkKO7OxopDvLlpH29u2se2qsPU1jdRU9fI/sON5PftyW+vmMQlEwfSo0dwx61L+44Z+u7eZGbXA8/T0q+f7+5rzOxmoNjdF4afu9DM1tJyaOZ33H2fmV0NzACyzGxu+C3nuvuKzlgYEekcNXWN/HXDXl4tqWDxhkoqauqBlsMqxw7IICM1ifSUREb1z+DTRfk66qYb0xm5IvIPVdU2cMnvlrCz+giZqYmcMyqHGYXZnDUim8H9Yv+49mihM3JF5KS5O99+eCWVNfXMn1vEjMKcDrmImQRHa09EPtTdS7bw8voKvj97LOeNyVPgxwCtQRFp14odLdeHv2h8Hp87c2jQ5UgHUXtHRN6npq6RB97ezh8WbyYvM5WfX35qoFePlI6l0BeJY3WNzfxq0QYqa+pJSexBc8h5bvVuauqbOHN4Fj+cM65b3QBETp5CXySO/WrRBu7862by+/akoSlEY3OIGaNz+NKM4UzM7xN0edIJFPoicertLVX88bXNXDV1CD+97JSgy5Euoh25InHoUH0T33p4BYP79uL7F48NuhzpQtrSF4kzjc0hbnlqLWX7j/B/XzqTtBTFQDzR2haJUXWNzbyxaS/l1XVU1NSzq/oI63YfZMPuQzQ0h/jSjOGcUdAv6DKliyn0RWJMRU0d9725jfuWbqeqtgEAM8hOT2F0XgZzpxcwMb83s8b3D7hSCYJCXyRG7DtUz+9eLuUvS7fTGApx/tiWk6oKczPISk8mSWfTCgp9kagVCjn7ahvYc7COl9dX8IfFm6gL34LwSzNH6K5U0i6FvkiUcXd++UIJf1i8mabQ36+SO2t8f7590WhG5qYHWJ10dwp9kSjz25c2cvsrm5h9ygCmDe9HTkYqI3PTGJmbEXRpEgUU+iJR5K7XNvObFzfyqdPzufXyibozlRw3hb5IN7NiRzXrdh2kf+9U+memUtfYzOryg7y7fT+PvrOT2acM4GcKfDlBCn2RbuTBt7fz/cdX0xz64B3tevdM4lOn5/OTy04hQYEvJ0ihL9INuDu/XrSB214uZeaoHH40ZzxVtfXsPlBPYoIxfmAmg/r01CWO5aQp9EUCUNfYzL8/soqNew6RnNiDhqYQa3cd5NNFLVvySQk9dMildAqFvkgXc3e+u2AVC1eWc+7oHJpDTn1TiBs/OoZ5M4Zra146lUJfpIv9etEGFq4s599njeFfzh0RdDkSZxT6Ip3kwJFGVu88wNrygyQmGP3SkimvruO2l0v5TNFgvjxzeNAlShxS6It0sBU7qrnhkVWs313T7vPTR2Zxy2UT1MaRQCj0RTrQguVlfO+x98jNSOE7F41mYn5vJgzsjQNVtfUcrGtiwsDeuviZBEahL9IBDhxp5NeLNvCnN7Zy1ogsbr/qNPqmJb9vTL820yJBiCj0zWwW8FsgAbjL3X/WzphPAz8CHFjp7leF518D3BQedou739MBdYsEJhRyDjc2c7i+ie1Vh3lo2Q6eXFVOXWOIa6cP43sXjyFRW/LSTR0z9M0sAbgduAAoA5aZ2UJ3X9tqTCFwIzDd3febWW54fj/gh0ARLb8Mlodfu7/jF0Wk8z3w9nZ+uHANDU2hv83rlZzAZZMHcdWUoZyS3zvA6kSOLZIt/SlAqbtvBjCzB4FLgbWtxnwRuP1omLt7RXj+RcAid68Kv3YRMAt4oGPKF+k6C1eW873H3uPM4VmcOzqHXsmJ9OmVxMxROWSkJgVdnkhEIgn9QcCOVtNlwNQ2Y0YBmNnrtLSAfuTuz33Iawe1/QAzmwfMAxgyZEiktYt0mVfWV/BvD63gjIJ+zJ97BqlJCUGXJHJCImk8tndcWdurQSUChcC5wJXAXWbWJ8LX4u53unuRuxfl5OREUJJI13ln+37+5f7ljBmQwd3XFCnwJapFEvplwOBW0/lAeTtjnnD3RnffApTQ8ksgkteKdFvVhxv42l/eJTcjlXs+P0VtHIl6kYT+MqDQzIaZWTJwBbCwzZjHgY8AmFk2Le2ezcDzwIVm1tfM+gIXhueJdHvuzr8/soqKmjp+f9VkstJTgi5J5KQds6fv7k1mdj0tYZ0AzHf3NWZ2M1Ds7gv5e7ivBZqB77j7PgAz+w9afnEA3Hx0p65Id3ffW9t4fs0ebpo9lon5fYIuR6RDmPsHb9YQpKKiIi8uLg66DIlzq3ce4BN3vMH0EVncfc0ZukuVdHtmttzdi441TmeQiLSxckc1V9+9lKy0ZH75qVMV+BJTFPoirSzdvI/P3rWUjNREHpp3pvr4EnN07R2JW6UVNfzi+RLWlB8kNyOFnIwUFm+oZFCfntx/3TT6904NukSRDqfQl7hTUVPHrxdt5KFl20lLTmTm6Bz2H25gU2UtZxT04zefmaQtfIlZCn2JK5sqD3HVH9+iqraBa84q4GvnFerqlxJXFPoSN9bvPsjVdy0F4Imvns24gZkBVyTS9RT6EpPeKzvAfW9to66pmeHZ6eRmpnDrc+tJTUzg/i9OZUROetAligRCoS8xY9+hepaU7uX+t7bz9tYq0pIT6NMrmSdWtFz5Y1CfnjzwxWkMyeoVcKUiwVHoS1TbureWP7+1jdc2VrJhzyEA8vv25KbZY/n0GYPJTE3iSEMz26pqGdy3F2kp+paX+KafAIk6zSFnxY5q5i/ZwrOrd5HYowfTRmTx8cmDmDosi0mD+5DQ6oSqnskJjOmv/r0IKPSlG3N3lm/bT0VNPQePNLL3UD3vbK9m2dYqauqayEhN5MszRzD3rAJyM3VMvUgkFPrSLdU3NfPdBav+1o8/akROGh+bOIAzCvpx4fj+pKtdI3Jc9BMj3c7+2ga+9OflvL21im+eP4qLJuSRmZpE755J6smLnCT9BEngGptDLNtaRVnVEcqqj/DkynJ2Vh/htisnM+fUgUGXJxJTFPoSqHe27+d7j77H+t01AJjBsKw0/nLdVIoK+gVcnUjsUehLIA4caeRXL5Rw71vbyMtI5bdXTGLy4L70751KcqIu/irSWRT60mmamkPMf30L63fXcMHYPD4yJpfmkPOnN7Zy5183c7CukWvOLOBbF47SvWdFuohCXzrF5spDfOvhlby7vZr0lEQefWcnvZITSEnswf7DjfzTmFy+ecEoJgzqHXSpInFFoS8dan9tA/cv3cbvXyklJTGB266czMUT+rN0SxVPrSrnwJFGrjtnOKcN6Rt0qSJxSaEvHWLr3lr+Z/EmHnt3J/VNIc4fm8tPLjuFvPBJU9NHZjN9ZHbAVYqIQl9OWvXhBj71hzepqWvkE6flM/esAkb3zwi6LBFph0JfTtqPFq5hf20Dj391unr0It2cjo2Tk/LCmt08vqKc688bqcAXiQIKfTlh+2sb+N5jqxk3IJOvfmRk0OWISATU3pETcqShme8sWEX14QbuvXYKSQnafhCJBgp9OW4b99Tw1b+8w4Y9h/jBx8bpXrMiUSSizTMzm2VmJWZWamY3tPP8XDOrNLMV4a/rWj33czNbY2brzOw2M7O2r5fo8cjyMi75/RL2HWrgnmuncO3Zw4IuSUSOwzG39M0sAbgduAAoA5aZ2UJ3X9tm6EPufn2b154FTAcmhmctAWYCr55k3RKA+Uu2cPNTa5k2vB+3XTFZNy4RiUKRtHemAKXuvhnAzB4ELgXahn57HEgFkgEDkoA9J1aqBOmu1zZzy9PrmDW+P7+7arJ6+CJRKpKf3EHAjlbTZeF5bV1uZqvMbIGZDQZw9zeBV4Bd4a/n3X1d2xea2TwzKzaz4srKyuNeCOk8oZBzx6ubuOXpdXx0ggJfJNpF8tPbXg/e20w/CRS4+0TgReAeADMbCYwF8mn5RXGemc34wJu53+nuRe5elJOTczz1Sydxd14tqeBjv1vCrc+tZ/bEAdx2pQJfJNpF0t4pAwa3ms4H3nfjUnff12ryj8Ct4ceXAW+5+yEAM3sWmAb89UQLls5XU9fIV+5/h9c27mVwv5789opJXDJxID16aB+8SLSLZLNtGVBoZsPMLBm4AljYeoCZDWg1OQc42sLZDsw0s0QzS6JlJ+4H2jvSfTQ1h/jaA+/yxqZ9/OBj43jp387l0kmDFPgiMeKYW/ru3mRm1wPPAwnAfHdfY2Y3A8XuvhD4upnNAZqAKmBu+OULgPOA92hpCT3n7k92/GJIR7nl6XW8WlLJTy87haumDgm6HBHpYObetj0frKKiIi8uLg66jLhQU9fIvW9uo2dSAsNy0ijZXcPPnl3PdWcP46aPjQu6PBE5Dma23N2LjjVOZ+TGqeaQ840HV/DS+or3zT9/bB43Xjw2oKpEpLMp9OPUf71QwkvrK7j50vHMPmUAW/bWsq+2gZmjckhQ/14kZin049DCleX896ubuHLKYP552lDMjKz0lKDLEpEuoNCPI6GQs+CdMn7wxGrOKOjLj+dMQJdCEokvCv04sXxbFT9+ci2ryg5w2pA+3HH16SQn6kQrkXij0I9xa8oPcNtLG3l+zR7yMlP4zWcmcemkgdrCF4lTCv0oV1XbwIvr9rBiRzUrtldTUVPP0KxeDMtOo/pwIy+u20NGaiLfOL+QL54znLQUrXKReKYEiFLNIecvS7fxi+dLOFjXREZqIpMG92H8wEy2Vx3mtY2V1DeF+Mb5hXx++jB690wKumQR6QYU+lGoZHcN33p4Bat3HmT6yCxu/OhYxg3I1KUSROSYFPpR6LuPrGJXdR2/v2oys08ZoP68iERMh29Emc2Vh1i5o5ovzxzBxyZqh6yIHB+FfpR5/N2d9DCYM2lg0KWISBRS6EcRd+exFTuZPjKbPN2fVkROgEI/iryzfT87qo7w8Unt3a1SROTYFPpR5NF3dtIzKYFZE/oHXYqIRCmFfpRoaArx1KpdXDg+TydYicgJU+hHiVdLKjhwpJGPT1ZrR0ROnEI/CoRCzj1vbiU7PZlzRmYHXY6IRDGFfhS49fn1vF66j3/9p0ISE7TKROTEKUG6uYeWbecPizfzz9OGcvW0oUGXIyJRTnsEuyl359WSSr7/2GrOKczmh5eM09m3InLSFPrdzNtbqnhixU5eXl/BrgN1jMxN5/bPnqa2joh0CIV+N1Hf1Mytz5Yw//Ut9EpO4JzCbL55/igumtCfzFRdFllEOoZCvxvYXHmIrz3wLmvKDzL3rAJu+OgYUpMSgi5LRGKQQj9g+2sbuPyONwC463NFnD8uL+CKRCSWKfQDdvsrpRw40sjTXz+HsQMygy5HRGJcRHsHzWyWmZWYWamZ3dDO83PNrNLMVoS/rmv13BAze8HM1pnZWjMr6Ljyo9uOqsPc++Y2Pnl6vgJfRLrEMbf0zSwBuB24ACgDlpnZQndf22boQ+5+fTtvcS/wE3dfZGbpQOhki44Vv3yhhB494JsXjAq6FBGJE5Fs6U8BSt19s7s3AA8Cl0by5mY2Dkh090UA7n7I3Q+fcLUxZPXOAzyxopxrpw9jQO+eQZcjInEiktAfBOxoNV0WntfW5Wa2yswWmNng8LxRQLWZPWpm75rZL8J/ObyPmc0zs2IzK66srDzuhYhG//nsOvr2SuLL544IuhQRiSORhH57p4F6m+kngQJ3nwi8CNwTnp8InAN8GzgDGA7M/cCbud/p7kXuXpSTkxNh6dFrTfkBXi/dx1fOHalj8EWkS0US+mXA4FbT+UB56wHuvs/d68OTfwROb/Xad8OtoSbgceC0kys5+j2yfCfJCT345On5QZciInEmktBfBhSa2TAzSwauABa2HmBmA1pNzgHWtXptXzM7uvl+HtB2B3BcaWgK8fiKnZw/Lpe+aclBlyMiceaYR++4e5OZXQ88DyQA8919jZndDBS7+0Lg62Y2B2gCqgi3cNy92cy+DbxkLVcLW07LXwJx69WSCqpqG7SVLyKBiOjkLHd/BnimzbwftHp8I3Djh7x2ETDxJGqMKQuWl5GdnsKMwtjfdyEi3Y8u3diF9h2q5+X1FXzitEG6aqaIBELJ04WeWFFOU8i5/DS1dkQkGAr9LrRgeRkT83szun9G0KWISJxS6HeRtzbvY+2ug9qBKyKBUuh3gVDI+ekz6xjYO5VPFw0+9gtERDqJQr8LPLmqnFVlB/jWhaN1cxQRCZRCvxO4//0qFXWNzfz8uRLGDcjkssntXbJIRKTr6CYqHeyV9RV8+b7lnDs6h89PH8aqsmp2Vh/h55+cSI8e7V3GSESk6yj0O9DeQ/V8Z8FKstNTWLqliufX7MEMzh2dw/SR2UGXJyKi0O8o7s4Nj7zHwSNNLPzaVIb2S+OJFTtZtHYP35s9NujyREQAhX6HeWjZDl5ct4ebZo9lTP+WWx9eMWUIV0wZEnBlIiJ/px25HWDbvlpufmotZ43I4trpw4IuR0TkQyn0T9LRtk6CGb/81KnaWSsi3ZpC/yQ9XFzGm5v3cePFYxnYR/e6FZHuTaF/Eipq6rjl6bVMGdaPK87QmbYi0v1pR+5x2H2gjmdX72J0XgYT8nvz44VrqWsK8Z+fOEVtHRGJCgr943D7K6X8+a1t75v3nYtGMyInPaCKRESOj0L/OCwp3cv0kVnMmzGCVTuqOVTfxLwZw4MuS0QkYgr9CJXtP8yWvbVcPW0oM0flMHOUbncoItFHO3Ij9HrpXgDOKdTlFEQkein0I/Taxr3kZaZQmKv+vYhEL4V+BEIh541N+5g+MhszHaUjItFLoR+BtbsOUlXboNaOiEQ9hX4EXtvY0s+fPkKhLyLRTaEfgSWllYzOyyA3MzXoUkRETopC/xjqGptZtnU/Z6u1IyIxIKLQN7NZZlZiZqVmdkM7z881s0ozWxH+uq7N85lmttPMft9RhXeVZVuraGgKKfRFJCYc8+QsM0sAbgcuAMqAZWa20N3Xthn6kLtf/yFv8x/A4pOqNCCLSypJSjCmDusXdCkiIictki39KUCpu2929wbgQeDSSD/AzE4H8oAXTqzE4IRCztPv7WJGYQ69knXysohEv0hCfxCwo9V0WXheW5eb2SozW2BmgwHMrAfwX8B3/tEHmNk8Mys2s+LKysoIS+98xdv2s+tAHXMmDQy6FBGRDhFJ6Ld3NpK3mX4SKHD3icCLwD3h+V8BnnH3HfwD7n6nuxe5e1FOTve5ps0TK3bSMymB88fmBV2KiEiHiKRnUQa0vkNIPlDeeoC772s1+Ufg1vDjM4FzzOwrQDqQbGaH3P0DO4O7m8bmEM+8t4vzx+WRlqLWjojEhkjSbBlQaGbDgJ3AFcBVrQeY2QB33xWenAOsA3D3z7YaMxcoiobAh5bLKO8/3Milp6q1IyKx45ih7+5NZnY98DyQAMx39zVmdjNQ7O4Lga+b2RygCagC5nZizV1i4YpyevdMYoYuoSwiMSSivoW7PwM802beD1o9vhG48Rjv8SfgT8ddYQCONDTzwprdXHLqQJITdf6aiMQOJVo7Xlq/h9qGZh21IyIxR6HfjoeLy8jLTGHqsKygSxER6VAK/TY27qlh8YZKrp46lIQeuna+iMQWhX4b81/fQkpiDz47bWjQpYiIdDiFfiv7DtXz6Ds7+cRp+fRLSw66HBGRDqfQb+X+pdupbwrxhbMLgi5FRKRTKPTD6hqbuffNrXxkdA4jczOCLkdEpFMo9MMWrixn76EGrjtneNCliIh0GoU+UFXbwK9e2MDYAZmcNUKHaYpI7Ir70Hd3vv3wSqpqG/jFJydipsM0RSR2xX3o371kCy+vr+D7s8cyYVDvoMsREelUcR36K3ZUc+tz67lofB6fO1PH5YtI7Ivb0K9rbOabD60gNyOVn19+qto6IhIX4vbuIP+zeBNb9tZy3xem0rtXUtDliIh0ibjc0t+6t5b/fnUTl5w6kLMLs4MuR0Sky8Rd6Ls7/++J1aQk9OD/zR4bdDkiIl0q7kL/6fd28drGvXz7otHkZqYGXY6ISJeKq9BvaArxk6fXMWFQJlfrKpoiEofiKvQfe7eMXQfq+O5FY3StfBGJS3ET+s0h538Wb2bCoEzO0c5bEYlTcRP6z67exZa9tXz13JE6Jl9E4lZchL6789+vbGJ4ThoXje8fdDkiIoGJi9B/dUMla3cd5F9mjqCHevkiEsfiIvTveHUTg/r05OOTBwVdiohIoGI+9HdWH+HtLVVcPW0oSQkxv7giIv9QzKfgojW7AZg1Qb18EZGIQt/MZplZiZmVmtkN7Tw/18wqzWxF+Ou68PxJZvamma0xs1Vm9pmOXoBjWbRuDyNz0xmWndbVHy0i0u0c8yqbZpYA3A5cAJQBy8xsobuvbTP0IXe/vs28w8Dn3H2jmQ0ElpvZ8+5e3RHFH8uBw428tbmKeTN031sREYhsS38KUOrum929AXgQuDSSN3f3De6+Mfy4HKgAck602OP1SkkFzSHnwnF5XfWRIiLdWiShPwjY0Wq6LDyvrcvDLZwFZja47ZNmNgVIBja189w8Mys2s+LKysoISz+2F9buJjcjhVPz+3TYe4qIRLNIQr+9A9u9zfSTQIG7TwReBO553xuYDQD+DHze3UMfeDP3O929yN2LcnI65g+BusZmFpdUcv64PB2bLyISFknolwGtt9zzgfLWA9x9n7vXhyf/CJx+9DkzywSeBm5y97dOrtzIvblpH7UNzVyg1o6IyN9EEvrLgEIzG2ZmycAVwMLWA8Jb8kfNAdaF5ycDjwH3uvvDHVNyZF5Yu4e05ATOGpHVlR8rItKtHfPoHXdvMrPrgeeBBGC+u68xs5uBYndfCHzdzOYATUAVMDf88k8DM4AsMzs6b667r+jYxXi/puYQL67bw7mjc0lJTOjMjxIRiSoR3Rjd3Z8Bnmkz7wetHt8I3NjO6+4D7jvJGo/borV7qKyp55JTB3b1R4uIdGsxeUbu3Uu2MKRfL/XzRUTaiLnQX7mjmuJt+5l7VoHujiUi0kbMhf7dS7aQnpLIp4rygy4eQfAlAAAEmUlEQVRFRKTbianQ33XgCM+8t4vPnDGYjNSkoMsREel2Yir073ljGyF35p5VEHQpIiLdUsyE/uGGJh54ezsXje/P4H69gi5HRKRbiuiQzWhQU9fE2SOzufbsgqBLERHptmIm9PMyU7n9s6cFXYaISLcWM+0dERE5NoW+iEgcUeiLiMQRhb6ISBxR6IuIxBGFvohIHFHoi4jEEYW+iEgcMfe29zgPlplVAtuO82XZwN5OKKc7i8dlhvhc7nhcZojP5T6ZZR7q7jnHGtTtQv9EmFmxuxcFXUdXisdlhvhc7nhcZojP5e6KZVZ7R0Qkjij0RUTiSKyE/p1BFxCAeFxmiM/ljsdlhvhc7k5f5pjo6YuISGRiZUtfREQiENWhb2azzKzEzErN7Iag6+ksZjbYzF4xs3VmtsbM/jU8v5+ZLTKzjeF/+wZda0czswQze9fMngpPDzOzpeFlfsjMkoOusSOZWR8zW2Bm68Pr+8w4Wc/fDH9vrzazB8wsNRbXtZnNN7MKM1vdal6769da3BbOt1Vm1iE3DIna0DezBOB24KPAOOBKMxsXbFWdpgn4lruPBaYBXw0v6w3AS+5eCLwUno41/wqsazV9K/Dr8DLvB74QSFWd57fAc+4+BjiVlmWP6fVsZoOArwNF7j4BSACuIDbX9Z+AWW3mfdj6/ShQGP6aB9zREQVEbegDU4BSd9/s7g3Ag8ClAdfUKdx9l7u/E35cQ0sQDKJlee8JD7sH+HgwFXYOM8sHZgN3hacNOA9YEB4SU8tsZpnADOBuAHdvcPdqYnw9hyUCPc0sEegF7CIG17W7/xWoajP7w9bvpcC93uItoI+ZDTjZGqI59AcBO1pNl4XnxTQzKwAmA0uBPHffBS2/GIDc4CrrFL8BvguEwtNZQLW7N4WnY22dDwcqgf8Nt7TuMrM0Ynw9u/tO4JfAdlrC/gCwnNhe16192PrtlIyL5tC3dubF9KFIZpYOPAJ8w90PBl1PZzKzjwEV7r689ex2hsbSOk8ETgPucPfJQC0x1sppT7iHfSkwDBgIpNHS2mgrltZ1JDrl+z2aQ78MGNxqOh8oD6iWTmdmSbQE/v3u/mh49p6jf+6F/60Iqr5OMB2YY2ZbaWndnUfLln+fcAsAYm+dlwFl7r40PL2All8CsbyeAc4Htrh7pbs3Ao8CZxHb67q1D1u/nZJx0Rz6y4DC8B7+ZFp2/CwMuKZOEe5l3w2sc/dftXpqIXBN+PE1wBNdXVtncfcb3T3f3QtoWbcvu/tngVeAT4aHxdoy7wZ2mNno8Kx/AtYSw+s5bDswzcx6hb/Xjy53zK7rNj5s/S4EPhc+imcacOBoG+ikuHvUfgEXAxuATcD3g66nE5fzbFr+rFsFrAh/XUxLj/slYGP4335B19pJy38u8FT48XDgbaAUeBhICbq+Dl7WSUBxeF0/DvSNh/UM/BhYD6wG/gykxOK6Bh6gZb9FIy1b8l/4sPVLS3vn9nC+vUfL0U0nXYPOyBURiSPR3N4REZHjpNAXEYkjCn0RkTii0BcRiSMKfRGROKLQFxGJIwp9EZE4otAXEYkj/x8pJ+TfosO9XgAAAABJRU5ErkJggg==\n", 924 | "text/plain": [ 925 | "
" 926 | ] 927 | }, 928 | "metadata": { 929 | "needs_background": "light" 930 | }, 931 | "output_type": "display_data" 932 | } 933 | ], 934 | "source": [ 935 | "# аналогичные эксперименты с max_features=2\n", 936 | "\n", 937 | "model = RandomForestClassifier(max_features=2, n_estimators=1, oob_score=False, warm_start=True, random_state=1)\n", 938 | "\n", 939 | "aucs = []\n", 940 | "for t in tqdm_notebook(list(range(1, 101))):\n", 941 | " model.set_params(n_estimators=t)\n", 942 | " model.fit(X_train, y_train)\n", 943 | " a = model.predict_proba(X_test)[:, 1]\n", 944 | " q = roc_auc_score(y_test, a)\n", 945 | " aucs.append(q)\n", 946 | " \n", 947 | "plt.plot(range(1, 101), aucs) " 948 | ] 949 | }, 950 | { 951 | "cell_type": "markdown", 952 | "metadata": {}, 953 | "source": [ 954 | "# Козырь\n", 955 | "\n", 956 | "более хорошая модель из другой библиотеки" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": 15, 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [ 965 | "import lightgbm as lgb\n", 966 | "model = lgb.LGBMClassifier(num_leaves=6, learning_rate=0.1, n_estimators=300)" 967 | ] 968 | }, 969 | { 970 | "cell_type": "code", 971 | "execution_count": 16, 972 | "metadata": {}, 973 | "outputs": [ 974 | { 975 | "name": "stdout", 976 | "output_type": "stream", 977 | "text": [ 978 | "[50]\tvalid_0's auc: 0.68674\tvalid_1's auc: 0.665981\n", 979 | "[100]\tvalid_0's auc: 0.71142\tvalid_1's auc: 0.675922\n", 980 | "[150]\tvalid_0's auc: 0.727589\tvalid_1's auc: 0.676426\n", 981 | "[200]\tvalid_0's auc: 0.739755\tvalid_1's auc: 0.678309\n", 982 | "[250]\tvalid_0's auc: 0.748912\tvalid_1's auc: 0.678582\n", 983 | "[300]\tvalid_0's auc: 0.757664\tvalid_1's auc: 0.678602\n", 984 | "[350]\tvalid_0's auc: 0.764987\tvalid_1's auc: 0.677316\n", 985 | "[400]\tvalid_0's auc: 0.771289\tvalid_1's auc: 0.676125\n", 986 | "[450]\tvalid_0's auc: 0.77833\tvalid_1's auc: 0.675192\n", 987 | "[500]\tvalid_0's auc: 0.784456\tvalid_1's auc: 0.674098\n" 988 | ] 989 | } 990 | ], 991 | "source": [ 992 | "param = {'num_leaves': 6, 'objective': 'binary', 'learning_rate': 0.1}\n", 993 | "param['metric'] = 'auc'\n", 994 | "\n", 995 | "q = lgb.train(param, train_set=lgb.Dataset(X_train, y_train), num_boost_round=500,\n", 996 | " valid_sets=[lgb.Dataset(X_train, y_train), lgb.Dataset(X_test, y_test)],\n", 997 | " verbose_eval=50)" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "markdown", 1002 | "metadata": {}, 1003 | "source": [ 1004 | "тут сразу качество лучше" 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "markdown", 1009 | "metadata": {}, 1010 | "source": [ 1011 | "### Обучение и формирование ответа" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": 17, 1017 | "metadata": {}, 1018 | "outputs": [ 1019 | { 1020 | "data": { 1021 | "text/plain": [ 1022 | "LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", 1023 | " learning_rate=0.1, max_depth=-1, min_child_samples=20,\n", 1024 | " min_child_weight=0.001, min_split_gain=0.0, n_estimators=300,\n", 1025 | " n_jobs=-1, num_leaves=6, objective=None, random_state=None,\n", 1026 | " reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,\n", 1027 | " subsample_for_bin=200000, subsample_freq=1)" 1028 | ] 1029 | }, 1030 | "execution_count": 17, 1031 | "metadata": {}, 1032 | "output_type": "execute_result" 1033 | } 1034 | ], 1035 | "source": [ 1036 | "model.fit(data_train, y)" 1037 | ] 1038 | }, 1039 | { 1040 | "cell_type": "code", 1041 | "execution_count": 18, 1042 | "metadata": {}, 1043 | "outputs": [], 1044 | "source": [ 1045 | "a = model.predict_proba(data_test)[:, 1] # вероятности за 1й класс" 1046 | ] 1047 | }, 1048 | { 1049 | "cell_type": "code", 1050 | "execution_count": 19, 1051 | "metadata": {}, 1052 | "outputs": [], 1053 | "source": [ 1054 | "pd.DataFrame(a, columns=['proba'], index=ids.values).to_csv('dj1_01_.csv')" 1055 | ] 1056 | } 1057 | ], 1058 | "metadata": { 1059 | "kernelspec": { 1060 | "display_name": "Python 3", 1061 | "language": "python", 1062 | "name": "python3" 1063 | }, 1064 | "language_info": { 1065 | "codemirror_mode": { 1066 | "name": "ipython", 1067 | "version": 3 1068 | }, 1069 | "file_extension": ".py", 1070 | "mimetype": "text/x-python", 1071 | "name": "python", 1072 | "nbconvert_exporter": "python", 1073 | "pygments_lexer": "ipython3", 1074 | "version": "3.6.7" 1075 | } 1076 | }, 1077 | "nbformat": 4, 1078 | "nbformat_minor": 2 1079 | } 1080 | --------------------------------------------------------------------------------