├── requirements.txt
├── README.md
├── LICENSE.md
├── Zimnat-lgb_best_score.ipynb
└── Zimnat_insurance_best_multy_overall.ipynb
/requirements.txt:
--------------------------------------------------------------------------------
1 | pd.__version__==1.1.0
2 | np.__version__==1.18.5
3 | lightgbm.__version__==3.0.0
4 | sklearn.__version__==0.23.1
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Zimnat_Insurance_top-12
2 | My solution for #12 in privat leaderboard. Score=0.0260809843625832
3 |
4 |
5 | 1. Run Zimnat-lgb_best_score.ipynb. It contains lightgbm and xgboost models with some preprocessing.
6 | 2. Run Zimnat_insurance_cat_target+multy.ipynb. It contains some another and same preprocessing and first: catboost for multiclass; second: catboost for binary with target values.
7 | 3. Run Zimnat_insurance_best_multy_overall.ipynb. It contains the same preprocessing and new catboost model. In the end of code you can see blending all models and some postprocessing from statistics.
8 |
9 |
10 |
11 | P.S: bro, you are the best
12 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Roman Zaev
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/Zimnat-lgb_best_score.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Download libraries and data"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
15 | "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
16 | },
17 | "outputs": [],
18 | "source": [
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "import copy\n",
22 | "from itertools import combinations\n",
23 | "\n",
24 | "from lightgbm import LGBMClassifier\n",
25 | "from xgboost import XGBClassifier\n",
26 | "\n",
27 | "from sklearn.metrics import log_loss\n",
28 | "from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold\n",
29 | "from sklearn.preprocessing import LabelEncoder\n",
30 | "\n",
31 | "from tqdm import tqdm, tqdm_notebook\n",
32 | "\n",
33 | "pd.set_option('display.max_columns', 100)\n",
34 | "\n",
35 | "import warnings\n",
36 | "warnings.filterwarnings(\"ignore\")\n",
37 | "import time"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 171,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "train = pd.read_csv('Train.csv')\n",
47 | "test = pd.read_csv('Test.csv')\n",
48 | "sub = pd.read_csv('SampleSubmission.csv')"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "# Checking the data"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 172,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/html": [
66 | "
\n",
67 | "\n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " | \n",
84 | " ID | \n",
85 | " join_date | \n",
86 | " sex | \n",
87 | " marital_status | \n",
88 | " birth_year | \n",
89 | " branch_code | \n",
90 | " occupation_code | \n",
91 | " occupation_category_code | \n",
92 | " P5DA | \n",
93 | " RIBP | \n",
94 | " 8NN1 | \n",
95 | " 7POT | \n",
96 | " 66FJ | \n",
97 | " GYSR | \n",
98 | " SOP4 | \n",
99 | " RVSZ | \n",
100 | " PYUQ | \n",
101 | " LJR9 | \n",
102 | " N2MW | \n",
103 | " AHXO | \n",
104 | " BSTQ | \n",
105 | " FM3X | \n",
106 | " K6QO | \n",
107 | " QBOL | \n",
108 | " JWFN | \n",
109 | " JZ9D | \n",
110 | " J9JW | \n",
111 | " GHYX | \n",
112 | " ECY3 | \n",
113 | "
\n",
114 | " \n",
115 | " \n",
116 | " \n",
117 | " | 0 | \n",
118 | " 4WKQSBB | \n",
119 | " 1/2/2019 | \n",
120 | " F | \n",
121 | " M | \n",
122 | " 1987 | \n",
123 | " 1X1H | \n",
124 | " 2A7I | \n",
125 | " T4MS | \n",
126 | " 0 | \n",
127 | " 0 | \n",
128 | " 0 | \n",
129 | " 0 | \n",
130 | " 0 | \n",
131 | " 0 | \n",
132 | " 0 | \n",
133 | " 1 | \n",
134 | " 0 | \n",
135 | " 0 | \n",
136 | " 0 | \n",
137 | " 0 | \n",
138 | " 0 | \n",
139 | " 0 | \n",
140 | " 1 | \n",
141 | " 0 | \n",
142 | " 0 | \n",
143 | " 0 | \n",
144 | " 0 | \n",
145 | " 0 | \n",
146 | " 0 | \n",
147 | "
\n",
148 | " \n",
149 | " | 1 | \n",
150 | " CP5S02H | \n",
151 | " 1/6/2019 | \n",
152 | " F | \n",
153 | " M | \n",
154 | " 1981 | \n",
155 | " UAOD | \n",
156 | " 2A7I | \n",
157 | " T4MS | \n",
158 | " 0 | \n",
159 | " 0 | \n",
160 | " 0 | \n",
161 | " 0 | \n",
162 | " 0 | \n",
163 | " 0 | \n",
164 | " 0 | \n",
165 | " 1 | \n",
166 | " 0 | \n",
167 | " 0 | \n",
168 | " 0 | \n",
169 | " 0 | \n",
170 | " 0 | \n",
171 | " 0 | \n",
172 | " 1 | \n",
173 | " 0 | \n",
174 | " 0 | \n",
175 | " 0 | \n",
176 | " 0 | \n",
177 | " 0 | \n",
178 | " 0 | \n",
179 | "
\n",
180 | " \n",
181 | "
\n",
182 | "
"
183 | ],
184 | "text/plain": [
185 | " ID join_date sex marital_status birth_year branch_code \\\n",
186 | "0 4WKQSBB 1/2/2019 F M 1987 1X1H \n",
187 | "1 CP5S02H 1/6/2019 F M 1981 UAOD \n",
188 | "\n",
189 | " occupation_code occupation_category_code P5DA RIBP 8NN1 7POT 66FJ \\\n",
190 | "0 2A7I T4MS 0 0 0 0 0 \n",
191 | "1 2A7I T4MS 0 0 0 0 0 \n",
192 | "\n",
193 | " GYSR SOP4 RVSZ PYUQ LJR9 N2MW AHXO BSTQ FM3X K6QO QBOL JWFN \\\n",
194 | "0 0 0 1 0 0 0 0 0 0 1 0 0 \n",
195 | "1 0 0 1 0 0 0 0 0 0 1 0 0 \n",
196 | "\n",
197 | " JZ9D J9JW GHYX ECY3 \n",
198 | "0 0 0 0 0 \n",
199 | "1 0 0 0 0 "
200 | ]
201 | },
202 | "execution_count": 172,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "train.head(2)"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 173,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/html": [
219 | "\n",
220 | "\n",
233 | "
\n",
234 | " \n",
235 | " \n",
236 | " | \n",
237 | " ID | \n",
238 | " join_date | \n",
239 | " sex | \n",
240 | " marital_status | \n",
241 | " birth_year | \n",
242 | " branch_code | \n",
243 | " occupation_code | \n",
244 | " occupation_category_code | \n",
245 | " P5DA | \n",
246 | " RIBP | \n",
247 | " 8NN1 | \n",
248 | " 7POT | \n",
249 | " 66FJ | \n",
250 | " GYSR | \n",
251 | " SOP4 | \n",
252 | " RVSZ | \n",
253 | " PYUQ | \n",
254 | " LJR9 | \n",
255 | " N2MW | \n",
256 | " AHXO | \n",
257 | " BSTQ | \n",
258 | " FM3X | \n",
259 | " K6QO | \n",
260 | " QBOL | \n",
261 | " JWFN | \n",
262 | " JZ9D | \n",
263 | " J9JW | \n",
264 | " GHYX | \n",
265 | " ECY3 | \n",
266 | "
\n",
267 | " \n",
268 | " \n",
269 | " \n",
270 | " | 0 | \n",
271 | " F86J5PC | \n",
272 | " 1/12/2018 | \n",
273 | " M | \n",
274 | " M | \n",
275 | " 1984 | \n",
276 | " 94KC | \n",
277 | " DZRV | \n",
278 | " 90QI | \n",
279 | " 0 | \n",
280 | " 0 | \n",
281 | " 0 | \n",
282 | " 0 | \n",
283 | " 0 | \n",
284 | " 0 | \n",
285 | " 0 | \n",
286 | " 1 | \n",
287 | " 0 | \n",
288 | " 0 | \n",
289 | " 0 | \n",
290 | " 0 | \n",
291 | " 0 | \n",
292 | " 0 | \n",
293 | " 0 | \n",
294 | " 0 | \n",
295 | " 0 | \n",
296 | " 0 | \n",
297 | " 0 | \n",
298 | " 0 | \n",
299 | " 0 | \n",
300 | "
\n",
301 | " \n",
302 | " | 1 | \n",
303 | " H6141K3 | \n",
304 | " 1/10/2019 | \n",
305 | " M | \n",
306 | " M | \n",
307 | " 1996 | \n",
308 | " 1X1H | \n",
309 | " J9SY | \n",
310 | " 90QI | \n",
311 | " 0 | \n",
312 | " 0 | \n",
313 | " 0 | \n",
314 | " 0 | \n",
315 | " 0 | \n",
316 | " 0 | \n",
317 | " 0 | \n",
318 | " 1 | \n",
319 | " 0 | \n",
320 | " 0 | \n",
321 | " 0 | \n",
322 | " 0 | \n",
323 | " 0 | \n",
324 | " 0 | \n",
325 | " 1 | \n",
326 | " 0 | \n",
327 | " 0 | \n",
328 | " 0 | \n",
329 | " 0 | \n",
330 | " 0 | \n",
331 | " 0 | \n",
332 | "
\n",
333 | " \n",
334 | "
\n",
335 | "
"
336 | ],
337 | "text/plain": [
338 | " ID join_date sex marital_status birth_year branch_code \\\n",
339 | "0 F86J5PC 1/12/2018 M M 1984 94KC \n",
340 | "1 H6141K3 1/10/2019 M M 1996 1X1H \n",
341 | "\n",
342 | " occupation_code occupation_category_code P5DA RIBP 8NN1 7POT 66FJ \\\n",
343 | "0 DZRV 90QI 0 0 0 0 0 \n",
344 | "1 J9SY 90QI 0 0 0 0 0 \n",
345 | "\n",
346 | " GYSR SOP4 RVSZ PYUQ LJR9 N2MW AHXO BSTQ FM3X K6QO QBOL JWFN \\\n",
347 | "0 0 0 1 0 0 0 0 0 0 0 0 0 \n",
348 | "1 0 0 1 0 0 0 0 0 0 1 0 0 \n",
349 | "\n",
350 | " JZ9D J9JW GHYX ECY3 \n",
351 | "0 0 0 0 0 \n",
352 | "1 0 0 0 0 "
353 | ]
354 | },
355 | "execution_count": 173,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "test.head(2)"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 174,
367 | "metadata": {},
368 | "outputs": [
369 | {
370 | "data": {
371 | "text/html": [
372 | "\n",
373 | "\n",
386 | "
\n",
387 | " \n",
388 | " \n",
389 | " | \n",
390 | " ID X PCODE | \n",
391 | " Label | \n",
392 | "
\n",
393 | " \n",
394 | " \n",
395 | " \n",
396 | " | 0 | \n",
397 | " F86J5PC X P5DA | \n",
398 | " 0 | \n",
399 | "
\n",
400 | " \n",
401 | " | 1 | \n",
402 | " F86J5PC X RIBP | \n",
403 | " 0 | \n",
404 | "
\n",
405 | " \n",
406 | "
\n",
407 | "
"
408 | ],
409 | "text/plain": [
410 | " ID X PCODE Label\n",
411 | "0 F86J5PC X P5DA 0\n",
412 | "1 F86J5PC X RIBP 0"
413 | ]
414 | },
415 | "execution_count": 174,
416 | "metadata": {},
417 | "output_type": "execute_result"
418 | }
419 | ],
420 | "source": [
421 | "sub.head(2)"
422 | ]
423 | },
424 | {
425 | "cell_type": "markdown",
426 | "metadata": {},
427 | "source": [
428 | "# Replacing non common occupation codes to occupation category code"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 175,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": [
437 | "replace_train = list(\n",
438 | " set(train['occupation_code'].unique().tolist()) -\n",
439 | " set(test['occupation_code']))\n",
440 | "replace_test = list(\n",
441 | " set(test['occupation_code'].unique().tolist()) -\n",
442 | " set(train['occupation_code']))\n",
443 | "\n",
444 | "train['occupation_code'] = train['occupation_code'].replace(\n",
445 | " replace_train, np.nan)\n",
446 | "test['occupation_code'] = test['occupation_code'].replace(replace_test, np.nan)\n",
447 | "train['occupation_code'].fillna(train['occupation_category_code'],\n",
448 | " inplace=True)\n",
449 | "test['occupation_code'].fillna(test['occupation_category_code'], inplace=True)"
450 | ]
451 | },
452 | {
453 | "cell_type": "markdown",
454 | "metadata": {},
455 | "source": [
456 | "# Getting right format"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 176,
462 | "metadata": {},
463 | "outputs": [],
464 | "source": [
465 | "#Make spliting train clients info. Trying to reproduce the situation with test\n",
466 | "#\n",
467 | "X_train = []\n",
468 | "X_train_columns = train.columns[:-1]\n",
469 | "client_index = 0\n",
470 | "\n",
471 | "for line in tqdm_notebook(train.values):\n",
472 | "\n",
473 | " info = line[:8]\n",
474 | " info_products = line[8:-1]\n",
475 | " indexes = [k for k, i in enumerate(info_products) if i == 1]\n",
476 | "\n",
477 | " for i in indexes:\n",
478 | "\n",
479 | " client_index += 1\n",
480 | "\n",
481 | " for k in range(len(info_products)):\n",
482 | "\n",
483 | " if k == i:\n",
484 | "\n",
485 | " info_products_transformed = list(copy.copy(info_products))\n",
486 | " info_products_transformed[i] = 0\n",
487 | "\n",
488 | " X_train.append(\n",
489 | " list(info) + info_products_transformed +\n",
490 | " [X_train_columns[8 + k]] + [client_index])\n",
491 | "\n",
492 | "X_train = pd.DataFrame(X_train)\n",
493 | "X_train.columns = [\n",
494 | " 'ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',\n",
495 | " 'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',\n",
496 | " '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',\n",
497 | " 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3',\n",
498 | " 'target', 'ID2'\n",
499 | "]\n",
500 | "train = X_train.copy()"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 177,
506 | "metadata": {},
507 | "outputs": [],
508 | "source": [
509 | "#Make info about true values in data of predictions\n",
510 | "#\n",
511 | "X_test = []\n",
512 | "true_values = []\n",
513 | "client_index = 0\n",
514 | "for line in tqdm_notebook(test.values):\n",
515 | "\n",
516 | " client_index += 1\n",
517 | "\n",
518 | " info = line[:8]\n",
519 | " info_products = line[8:-1]\n",
520 | " indexes = [k for k, i in enumerate(info_products) if i == 1]\n",
521 | "\n",
522 | " X_test.append(list(info) + list(info_products) + [client_index])\n",
523 | "\n",
524 | " for true in test.columns[8:][indexes]:\n",
525 | " true_values.append(line[0] + ' X ' + true)\n",
526 | "\n",
527 | "X_test = pd.DataFrame(X_test)\n",
528 | "X_test.columns = [\n",
529 | " 'ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',\n",
530 | " 'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',\n",
531 | " '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',\n",
532 | " 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3',\n",
533 | " 'ID2'\n",
534 | "]\n",
535 | "test = X_test.copy()"
536 | ]
537 | },
538 | {
539 | "cell_type": "code",
540 | "execution_count": 178,
541 | "metadata": {},
542 | "outputs": [],
543 | "source": [
544 | "train['marital_status'] = train['marital_status'].replace(['f'], ['F'])"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": 179,
550 | "metadata": {},
551 | "outputs": [],
552 | "source": [
553 | "df = train.append(test)"
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {},
559 | "source": [
560 | "# Feature Engineering"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": 180,
566 | "metadata": {},
567 | "outputs": [],
568 | "source": [
569 | "def create_date_featues(df):\n",
570 | "\n",
571 | " df['Join_Year'] = pd.to_datetime(df['join_date']).dt.year\n",
572 | "\n",
573 | " df['Join_Month'] = pd.to_datetime(df['join_date']).dt.month\n",
574 | "\n",
575 | " df['Join_Day'] = pd.to_datetime(df['join_date']).dt.day\n",
576 | "\n",
577 | " df['DayOfyear'] = pd.to_datetime(df['join_date']).dt.dayofyear\n",
578 | "\n",
579 | " return df"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 181,
585 | "metadata": {},
586 | "outputs": [],
587 | "source": [
588 | "df = create_date_featues(df)"
589 | ]
590 | },
591 | {
592 | "cell_type": "code",
593 | "execution_count": 182,
594 | "metadata": {},
595 | "outputs": [],
596 | "source": [
597 | "df['birth_year_bin'] = pd.cut(df['birth_year'], bins=5)"
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": 183,
603 | "metadata": {},
604 | "outputs": [
605 | {
606 | "name": "stdout",
607 | "output_type": "stream",
608 | "text": [
609 | "Wall time: 116 ms\n"
610 | ]
611 | }
612 | ],
613 | "source": [
614 | "%%time\n",
615 | "columns = [\n",
616 | " 'P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ',\n",
617 | " 'LJR9', 'N2MW', 'AHXO', 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D',\n",
618 | " 'J9JW', 'GHYX', 'ECY3'\n",
619 | "]\n",
620 | "for col in columns:\n",
621 | " df[col + '_' + 'sum'] = df.groupby('branch_code')[col].transform(sum)"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 184,
627 | "metadata": {},
628 | "outputs": [],
629 | "source": [
630 | "for col in columns:\n",
631 | " df[col + '_' +\n",
632 | " 'Join_year_sum'] = df.groupby('Join_Year')[col].transform(sum)"
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "execution_count": 189,
638 | "metadata": {},
639 | "outputs": [],
640 | "source": [
641 | "df['join_date'] = pd.to_datetime(df['join_date'])\n",
642 | "for col in columns:\n",
643 | " df['from_arise_col_' +\n",
644 | " col] = (df['join_date'] -\n",
645 | " df.loc[df[col] == 1, 'join_date'].min()).dt.days"
646 | ]
647 | },
648 | {
649 | "cell_type": "code",
650 | "execution_count": 194,
651 | "metadata": {},
652 | "outputs": [],
653 | "source": [
654 | "df['Number_of_Insurance_Bought'] = df.iloc[:, 8:29].sum(axis=1)\n",
655 | "\n",
656 | "\n",
657 | "def mapper(df):\n",
658 | " if df['Number_of_Insurance_Bought'] == 1:\n",
659 | " return 'One'\n",
660 | " elif (df['Number_of_Insurance_Bought'] >\n",
661 | " 1) & (df['Number_of_Insurance_Bought'] < 5):\n",
662 | " return 'Medium'\n",
663 | " elif (df['Number_of_Insurance_Bought'] >\n",
664 | " 4) & (df['Number_of_Insurance_Bought'] < 8):\n",
665 | " return 'High'\n",
666 | " else:\n",
667 | " return 'Too High'\n",
668 | "\n",
669 | "\n",
670 | "df['Insurance_Count'] = df.apply(lambda df: mapper(df), axis=1)\n",
671 | "del df['Number_of_Insurance_Bought']"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 195,
677 | "metadata": {},
678 | "outputs": [],
679 | "source": [
680 | "df['branch_start_year'] = df.groupby('branch_code')['Join_Year'].transform('min')\n",
681 | "df['branch_since'] = 2020 - df['branch_start_year']\n",
682 | "del df['branch_start_year']"
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": 196,
688 | "metadata": {},
689 | "outputs": [],
690 | "source": [
691 | "df['Unique_customers_per_branch'] = df.groupby('branch_code')['ID'].transform('nunique')\n",
692 | "df['Unique_Insurance_per_branch'] = df.groupby('branch_code')['target'].transform('nunique')\n",
693 | "\n",
694 | "df['Unique_year_per_branch'] = df.groupby('branch_code')['Join_Year'].transform('nunique')\n",
695 | "df['Unique_month_per_branch'] = df.groupby('branch_code')['Join_Month'].transform('nunique')\n",
696 | "df['Unique_branch_per_year'] = df.groupby('Join_Year')['branch_code'].transform('nunique')"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 197,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "df['Age'] = df['Join_Year'] - df['birth_year']\n",
706 | "df['Average_Age_per_branch'] = df.groupby('branch_code')['Age'].transform('mean')\n",
707 | "\n",
708 | "df['Average_Age_per_occupation'] = df.groupby('occupation_code')['Age'].transform('mean')\n",
709 | "\n",
710 | "for col in columns:\n",
711 | " df[col + '_' + 'meanAge'] = df.groupby(col)['Age'].transform('mean')\n",
712 | "\n",
713 | "del df['Age']"
714 | ]
715 | },
716 | {
717 | "cell_type": "code",
718 | "execution_count": 198,
719 | "metadata": {},
720 | "outputs": [],
721 | "source": [
722 | "df.reset_index(drop=True, inplace=True)"
723 | ]
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": 199,
728 | "metadata": {},
729 | "outputs": [],
730 | "source": [
731 | "names_products = [\n",
732 | " 'P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ',\n",
733 | " 'LJR9', 'N2MW', 'AHXO', 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D',\n",
734 | " 'J9JW', 'GHYX', 'ECY3'\n",
735 | "]"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": 200,
741 | "metadata": {},
742 | "outputs": [
743 | {
744 | "data": {
745 | "application/vnd.jupyter.widget-view+json": {
746 | "model_id": "fb3a491c747e40fba6e9220d679c2d94",
747 | "version_major": 2,
748 | "version_minor": 0
749 | },
750 | "text/plain": [
751 | "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
752 | ]
753 | },
754 | "metadata": {},
755 | "output_type": "display_data"
756 | },
757 | {
758 | "name": "stdout",
759 | "output_type": "stream",
760 | "text": [
761 | "\n"
762 | ]
763 | }
764 | ],
765 | "source": [
766 | "#Add glue togethered targets\n",
767 | "#\n",
768 | "for i, row in tqdm_notebook(df.iterrows()):\n",
769 | " res = []\n",
770 | " for c in names_products:\n",
771 | " if row[c] == 1:\n",
772 | " res.append(c)\n",
773 | " df.loc[df.index == i, 'product_comb'] = '_'.join(sorted(res))"
774 | ]
775 | },
776 | {
777 | "cell_type": "code",
778 | "execution_count": 201,
779 | "metadata": {},
780 | "outputs": [
781 | {
782 | "data": {
783 | "text/html": [
784 | "\n",
785 | "\n",
798 | "
\n",
799 | " \n",
800 | " \n",
801 | " | \n",
802 | " ID | \n",
803 | " join_date | \n",
804 | " sex | \n",
805 | " marital_status | \n",
806 | " birth_year | \n",
807 | " branch_code | \n",
808 | " occupation_code | \n",
809 | " occupation_category_code | \n",
810 | " P5DA | \n",
811 | " RIBP | \n",
812 | " 8NN1 | \n",
813 | " 7POT | \n",
814 | " 66FJ | \n",
815 | " GYSR | \n",
816 | " SOP4 | \n",
817 | " RVSZ | \n",
818 | " PYUQ | \n",
819 | " LJR9 | \n",
820 | " N2MW | \n",
821 | " AHXO | \n",
822 | " BSTQ | \n",
823 | " FM3X | \n",
824 | " K6QO | \n",
825 | " QBOL | \n",
826 | " JWFN | \n",
827 | " JZ9D | \n",
828 | " J9JW | \n",
829 | " GHYX | \n",
830 | " ECY3 | \n",
831 | " target | \n",
832 | " ID2 | \n",
833 | " Join_Year | \n",
834 | " Join_Month | \n",
835 | " Join_Day | \n",
836 | " DayOfyear | \n",
837 | " birth_year_bin | \n",
838 | " P5DA_sum | \n",
839 | " RIBP_sum | \n",
840 | " 8NN1_sum | \n",
841 | " 7POT_sum | \n",
842 | " 66FJ_sum | \n",
843 | " GYSR_sum | \n",
844 | " SOP4_sum | \n",
845 | " RVSZ_sum | \n",
846 | " PYUQ_sum | \n",
847 | " LJR9_sum | \n",
848 | " N2MW_sum | \n",
849 | " AHXO_sum | \n",
850 | " BSTQ_sum | \n",
851 | " FM3X_sum | \n",
852 | " ... | \n",
853 | " from_arise_col_8NN1 | \n",
854 | " from_arise_col_7POT | \n",
855 | " from_arise_col_66FJ | \n",
856 | " from_arise_col_GYSR | \n",
857 | " from_arise_col_SOP4 | \n",
858 | " from_arise_col_RVSZ | \n",
859 | " from_arise_col_PYUQ | \n",
860 | " from_arise_col_LJR9 | \n",
861 | " from_arise_col_N2MW | \n",
862 | " from_arise_col_AHXO | \n",
863 | " from_arise_col_BSTQ | \n",
864 | " from_arise_col_FM3X | \n",
865 | " from_arise_col_K6QO | \n",
866 | " from_arise_col_QBOL | \n",
867 | " from_arise_col_JWFN | \n",
868 | " from_arise_col_JZ9D | \n",
869 | " from_arise_col_J9JW | \n",
870 | " from_arise_col_GHYX | \n",
871 | " from_arise_col_ECY3 | \n",
872 | " Insurance_Count | \n",
873 | " branch_since | \n",
874 | " Unique_customers_per_branch | \n",
875 | " Unique_Insurance_per_branch | \n",
876 | " Unique_year_per_branch | \n",
877 | " Unique_month_per_branch | \n",
878 | " Unique_branch_per_year | \n",
879 | " Average_Age_per_branch | \n",
880 | " Average_Age_per_occupation | \n",
881 | " P5DA_meanAge | \n",
882 | " RIBP_meanAge | \n",
883 | " 8NN1_meanAge | \n",
884 | " 7POT_meanAge | \n",
885 | " 66FJ_meanAge | \n",
886 | " GYSR_meanAge | \n",
887 | " SOP4_meanAge | \n",
888 | " RVSZ_meanAge | \n",
889 | " PYUQ_meanAge | \n",
890 | " LJR9_meanAge | \n",
891 | " N2MW_meanAge | \n",
892 | " AHXO_meanAge | \n",
893 | " BSTQ_meanAge | \n",
894 | " FM3X_meanAge | \n",
895 | " K6QO_meanAge | \n",
896 | " QBOL_meanAge | \n",
897 | " JWFN_meanAge | \n",
898 | " JZ9D_meanAge | \n",
899 | " J9JW_meanAge | \n",
900 | " GHYX_meanAge | \n",
901 | " ECY3_meanAge | \n",
902 | " product_comb | \n",
903 | "
\n",
904 | " \n",
905 | " \n",
906 | " \n",
907 | " | 0 | \n",
908 | " 4WKQSBB | \n",
909 | " 2019-01-02 | \n",
910 | " F | \n",
911 | " M | \n",
912 | " 1987 | \n",
913 | " 1X1H | \n",
914 | " 2A7I | \n",
915 | " T4MS | \n",
916 | " 0 | \n",
917 | " 0 | \n",
918 | " 0 | \n",
919 | " 0 | \n",
920 | " 0 | \n",
921 | " 0 | \n",
922 | " 0 | \n",
923 | " 0 | \n",
924 | " 0 | \n",
925 | " 0 | \n",
926 | " 0 | \n",
927 | " 0 | \n",
928 | " 0 | \n",
929 | " 0 | \n",
930 | " 1 | \n",
931 | " 0 | \n",
932 | " 0 | \n",
933 | " 0 | \n",
934 | " 0 | \n",
935 | " 0 | \n",
936 | " 0 | \n",
937 | " RVSZ | \n",
938 | " 1 | \n",
939 | " 2019.0 | \n",
940 | " 1.0 | \n",
941 | " 2.0 | \n",
942 | " 2.0 | \n",
943 | " (1979.4, 1995.2] | \n",
944 | " 0 | \n",
945 | " 14 | \n",
946 | " 0 | \n",
947 | " 5 | \n",
948 | " 1 | \n",
949 | " 0 | \n",
950 | " 0 | \n",
951 | " 1598 | \n",
952 | " 8 | \n",
953 | " 0 | \n",
954 | " 4 | \n",
955 | " 0 | \n",
956 | " 8 | \n",
957 | " 0 | \n",
958 | " ... | \n",
959 | " 3284.0 | \n",
960 | " 3284.0 | \n",
961 | " 3282.0 | \n",
962 | " 2191.0 | \n",
963 | " 3282.0 | \n",
964 | " 3284.0 | \n",
965 | " 3284.0 | \n",
966 | " 3284.0 | \n",
967 | " 2919.0 | \n",
968 | " 3278.0 | \n",
969 | " 3277.0 | \n",
970 | " 2554.0 | \n",
971 | " 3282.0 | \n",
972 | " 3284.0 | \n",
973 | " 2912.0 | \n",
974 | " 3278.0 | \n",
975 | " 3278.0 | \n",
976 | " 3277.0 | \n",
977 | " 3281.0 | \n",
978 | " One | \n",
979 | " 2.0 | \n",
980 | " 1738 | \n",
981 | " 14 | \n",
982 | " 3 | \n",
983 | " 1 | \n",
984 | " 14.0 | \n",
985 | " 37.532588 | \n",
986 | " 38.035144 | \n",
987 | " 38.412348 | \n",
988 | " 38.350148 | \n",
989 | " 38.411398 | \n",
990 | " 38.41096 | \n",
991 | " 38.405632 | \n",
992 | " 38.414079 | \n",
993 | " 38.429297 | \n",
994 | " 38.36121 | \n",
995 | " 38.396291 | \n",
996 | " 38.419403 | \n",
997 | " 38.365864 | \n",
998 | " 38.382695 | \n",
999 | " 38.413804 | \n",
1000 | " 38.413013 | \n",
1001 | " 38.384302 | \n",
1002 | " 37.673714 | \n",
1003 | " 38.4074 | \n",
1004 | " 38.429089 | \n",
1005 | " 38.431485 | \n",
1006 | " 38.408466 | \n",
1007 | " 38.360615 | \n",
1008 | " K6QO | \n",
1009 | "
\n",
1010 | " \n",
1011 | " | 1 | \n",
1012 | " 4WKQSBB | \n",
1013 | " 2019-01-02 | \n",
1014 | " F | \n",
1015 | " M | \n",
1016 | " 1987 | \n",
1017 | " 1X1H | \n",
1018 | " 2A7I | \n",
1019 | " T4MS | \n",
1020 | " 0 | \n",
1021 | " 0 | \n",
1022 | " 0 | \n",
1023 | " 0 | \n",
1024 | " 0 | \n",
1025 | " 0 | \n",
1026 | " 0 | \n",
1027 | " 1 | \n",
1028 | " 0 | \n",
1029 | " 0 | \n",
1030 | " 0 | \n",
1031 | " 0 | \n",
1032 | " 0 | \n",
1033 | " 0 | \n",
1034 | " 0 | \n",
1035 | " 0 | \n",
1036 | " 0 | \n",
1037 | " 0 | \n",
1038 | " 0 | \n",
1039 | " 0 | \n",
1040 | " 0 | \n",
1041 | " K6QO | \n",
1042 | " 2 | \n",
1043 | " 2019.0 | \n",
1044 | " 1.0 | \n",
1045 | " 2.0 | \n",
1046 | " 2.0 | \n",
1047 | " (1979.4, 1995.2] | \n",
1048 | " 0 | \n",
1049 | " 14 | \n",
1050 | " 0 | \n",
1051 | " 5 | \n",
1052 | " 1 | \n",
1053 | " 0 | \n",
1054 | " 0 | \n",
1055 | " 1598 | \n",
1056 | " 8 | \n",
1057 | " 0 | \n",
1058 | " 4 | \n",
1059 | " 0 | \n",
1060 | " 8 | \n",
1061 | " 0 | \n",
1062 | " ... | \n",
1063 | " 3284.0 | \n",
1064 | " 3284.0 | \n",
1065 | " 3282.0 | \n",
1066 | " 2191.0 | \n",
1067 | " 3282.0 | \n",
1068 | " 3284.0 | \n",
1069 | " 3284.0 | \n",
1070 | " 3284.0 | \n",
1071 | " 2919.0 | \n",
1072 | " 3278.0 | \n",
1073 | " 3277.0 | \n",
1074 | " 2554.0 | \n",
1075 | " 3282.0 | \n",
1076 | " 3284.0 | \n",
1077 | " 2912.0 | \n",
1078 | " 3278.0 | \n",
1079 | " 3278.0 | \n",
1080 | " 3277.0 | \n",
1081 | " 3281.0 | \n",
1082 | " One | \n",
1083 | " 2.0 | \n",
1084 | " 1738 | \n",
1085 | " 14 | \n",
1086 | " 3 | \n",
1087 | " 1 | \n",
1088 | " 14.0 | \n",
1089 | " 37.532588 | \n",
1090 | " 38.035144 | \n",
1091 | " 38.412348 | \n",
1092 | " 38.350148 | \n",
1093 | " 38.411398 | \n",
1094 | " 38.41096 | \n",
1095 | " 38.405632 | \n",
1096 | " 38.414079 | \n",
1097 | " 38.429297 | \n",
1098 | " 38.46995 | \n",
1099 | " 38.396291 | \n",
1100 | " 38.419403 | \n",
1101 | " 38.365864 | \n",
1102 | " 38.382695 | \n",
1103 | " 38.413804 | \n",
1104 | " 38.413013 | \n",
1105 | " 38.436159 | \n",
1106 | " 37.673714 | \n",
1107 | " 38.4074 | \n",
1108 | " 38.429089 | \n",
1109 | " 38.431485 | \n",
1110 | " 38.408466 | \n",
1111 | " 38.360615 | \n",
1112 | " RVSZ | \n",
1113 | "
\n",
1114 | " \n",
1115 | " | 2 | \n",
1116 | " CP5S02H | \n",
1117 | " 2019-01-06 | \n",
1118 | " F | \n",
1119 | " M | \n",
1120 | " 1981 | \n",
1121 | " UAOD | \n",
1122 | " 2A7I | \n",
1123 | " T4MS | \n",
1124 | " 0 | \n",
1125 | " 0 | \n",
1126 | " 0 | \n",
1127 | " 0 | \n",
1128 | " 0 | \n",
1129 | " 0 | \n",
1130 | " 0 | \n",
1131 | " 0 | \n",
1132 | " 0 | \n",
1133 | " 0 | \n",
1134 | " 0 | \n",
1135 | " 0 | \n",
1136 | " 0 | \n",
1137 | " 0 | \n",
1138 | " 1 | \n",
1139 | " 0 | \n",
1140 | " 0 | \n",
1141 | " 0 | \n",
1142 | " 0 | \n",
1143 | " 0 | \n",
1144 | " 0 | \n",
1145 | " RVSZ | \n",
1146 | " 3 | \n",
1147 | " 2019.0 | \n",
1148 | " 1.0 | \n",
1149 | " 6.0 | \n",
1150 | " 6.0 | \n",
1151 | " (1979.4, 1995.2] | \n",
1152 | " 0 | \n",
1153 | " 54 | \n",
1154 | " 6 | \n",
1155 | " 13 | \n",
1156 | " 6 | \n",
1157 | " 0 | \n",
1158 | " 18 | \n",
1159 | " 4191 | \n",
1160 | " 360 | \n",
1161 | " 44 | \n",
1162 | " 25 | \n",
1163 | " 8 | \n",
1164 | " 37 | \n",
1165 | " 0 | \n",
1166 | " ... | \n",
1167 | " 3288.0 | \n",
1168 | " 3288.0 | \n",
1169 | " 3286.0 | \n",
1170 | " 2195.0 | \n",
1171 | " 3286.0 | \n",
1172 | " 3288.0 | \n",
1173 | " 3288.0 | \n",
1174 | " 3288.0 | \n",
1175 | " 2923.0 | \n",
1176 | " 3282.0 | \n",
1177 | " 3281.0 | \n",
1178 | " 2558.0 | \n",
1179 | " 3286.0 | \n",
1180 | " 3288.0 | \n",
1181 | " 2916.0 | \n",
1182 | " 3282.0 | \n",
1183 | " 3282.0 | \n",
1184 | " 3281.0 | \n",
1185 | " 3285.0 | \n",
1186 | " One | \n",
1187 | " 9.0 | \n",
1188 | " 4714 | \n",
1189 | " 18 | \n",
1190 | " 10 | \n",
1191 | " 1 | \n",
1192 | " 14.0 | \n",
1193 | " 37.961993 | \n",
1194 | " 38.035144 | \n",
1195 | " 38.412348 | \n",
1196 | " 38.350148 | \n",
1197 | " 38.411398 | \n",
1198 | " 38.41096 | \n",
1199 | " 38.405632 | \n",
1200 | " 38.414079 | \n",
1201 | " 38.429297 | \n",
1202 | " 38.36121 | \n",
1203 | " 38.396291 | \n",
1204 | " 38.419403 | \n",
1205 | " 38.365864 | \n",
1206 | " 38.382695 | \n",
1207 | " 38.413804 | \n",
1208 | " 38.413013 | \n",
1209 | " 38.384302 | \n",
1210 | " 37.673714 | \n",
1211 | " 38.4074 | \n",
1212 | " 38.429089 | \n",
1213 | " 38.431485 | \n",
1214 | " 38.408466 | \n",
1215 | " 38.360615 | \n",
1216 | " K6QO | \n",
1217 | "
\n",
1218 | " \n",
1219 | " | 3 | \n",
1220 | " CP5S02H | \n",
1221 | " 2019-01-06 | \n",
1222 | " F | \n",
1223 | " M | \n",
1224 | " 1981 | \n",
1225 | " UAOD | \n",
1226 | " 2A7I | \n",
1227 | " T4MS | \n",
1228 | " 0 | \n",
1229 | " 0 | \n",
1230 | " 0 | \n",
1231 | " 0 | \n",
1232 | " 0 | \n",
1233 | " 0 | \n",
1234 | " 0 | \n",
1235 | " 1 | \n",
1236 | " 0 | \n",
1237 | " 0 | \n",
1238 | " 0 | \n",
1239 | " 0 | \n",
1240 | " 0 | \n",
1241 | " 0 | \n",
1242 | " 0 | \n",
1243 | " 0 | \n",
1244 | " 0 | \n",
1245 | " 0 | \n",
1246 | " 0 | \n",
1247 | " 0 | \n",
1248 | " 0 | \n",
1249 | " K6QO | \n",
1250 | " 4 | \n",
1251 | " 2019.0 | \n",
1252 | " 1.0 | \n",
1253 | " 6.0 | \n",
1254 | " 6.0 | \n",
1255 | " (1979.4, 1995.2] | \n",
1256 | " 0 | \n",
1257 | " 54 | \n",
1258 | " 6 | \n",
1259 | " 13 | \n",
1260 | " 6 | \n",
1261 | " 0 | \n",
1262 | " 18 | \n",
1263 | " 4191 | \n",
1264 | " 360 | \n",
1265 | " 44 | \n",
1266 | " 25 | \n",
1267 | " 8 | \n",
1268 | " 37 | \n",
1269 | " 0 | \n",
1270 | " ... | \n",
1271 | " 3288.0 | \n",
1272 | " 3288.0 | \n",
1273 | " 3286.0 | \n",
1274 | " 2195.0 | \n",
1275 | " 3286.0 | \n",
1276 | " 3288.0 | \n",
1277 | " 3288.0 | \n",
1278 | " 3288.0 | \n",
1279 | " 2923.0 | \n",
1280 | " 3282.0 | \n",
1281 | " 3281.0 | \n",
1282 | " 2558.0 | \n",
1283 | " 3286.0 | \n",
1284 | " 3288.0 | \n",
1285 | " 2916.0 | \n",
1286 | " 3282.0 | \n",
1287 | " 3282.0 | \n",
1288 | " 3281.0 | \n",
1289 | " 3285.0 | \n",
1290 | " One | \n",
1291 | " 9.0 | \n",
1292 | " 4714 | \n",
1293 | " 18 | \n",
1294 | " 10 | \n",
1295 | " 1 | \n",
1296 | " 14.0 | \n",
1297 | " 37.961993 | \n",
1298 | " 38.035144 | \n",
1299 | " 38.412348 | \n",
1300 | " 38.350148 | \n",
1301 | " 38.411398 | \n",
1302 | " 38.41096 | \n",
1303 | " 38.405632 | \n",
1304 | " 38.414079 | \n",
1305 | " 38.429297 | \n",
1306 | " 38.46995 | \n",
1307 | " 38.396291 | \n",
1308 | " 38.419403 | \n",
1309 | " 38.365864 | \n",
1310 | " 38.382695 | \n",
1311 | " 38.413804 | \n",
1312 | " 38.413013 | \n",
1313 | " 38.436159 | \n",
1314 | " 37.673714 | \n",
1315 | " 38.4074 | \n",
1316 | " 38.429089 | \n",
1317 | " 38.431485 | \n",
1318 | " 38.408466 | \n",
1319 | " 38.360615 | \n",
1320 | " RVSZ | \n",
1321 | "
\n",
1322 | " \n",
1323 | " | 4 | \n",
1324 | " 2YKDILJ | \n",
1325 | " 2013-01-06 | \n",
1326 | " M | \n",
1327 | " U | \n",
1328 | " 1991 | \n",
1329 | " 748L | \n",
1330 | " QZYX | \n",
1331 | " 90QI | \n",
1332 | " 0 | \n",
1333 | " 0 | \n",
1334 | " 0 | \n",
1335 | " 0 | \n",
1336 | " 0 | \n",
1337 | " 0 | \n",
1338 | " 0 | \n",
1339 | " 1 | \n",
1340 | " 0 | \n",
1341 | " 0 | \n",
1342 | " 0 | \n",
1343 | " 0 | \n",
1344 | " 0 | \n",
1345 | " 0 | \n",
1346 | " 0 | \n",
1347 | " 0 | \n",
1348 | " 0 | \n",
1349 | " 0 | \n",
1350 | " 0 | \n",
1351 | " 0 | \n",
1352 | " 1 | \n",
1353 | " SOP4 | \n",
1354 | " 5 | \n",
1355 | " 2013.0 | \n",
1356 | " 1.0 | \n",
1357 | " 6.0 | \n",
1358 | " 6.0 | \n",
1359 | " (1979.4, 1995.2] | \n",
1360 | " 7 | \n",
1361 | " 2031 | \n",
1362 | " 144 | \n",
1363 | " 259 | \n",
1364 | " 366 | \n",
1365 | " 2 | \n",
1366 | " 416 | \n",
1367 | " 10711 | \n",
1368 | " 1142 | \n",
1369 | " 254 | \n",
1370 | " 161 | \n",
1371 | " 38 | \n",
1372 | " 391 | \n",
1373 | " 133 | \n",
1374 | " ... | \n",
1375 | " 1097.0 | \n",
1376 | " 1097.0 | \n",
1377 | " 1095.0 | \n",
1378 | " 4.0 | \n",
1379 | " 1095.0 | \n",
1380 | " 1097.0 | \n",
1381 | " 1097.0 | \n",
1382 | " 1097.0 | \n",
1383 | " 732.0 | \n",
1384 | " 1091.0 | \n",
1385 | " 1090.0 | \n",
1386 | " 367.0 | \n",
1387 | " 1095.0 | \n",
1388 | " 1097.0 | \n",
1389 | " 725.0 | \n",
1390 | " 1091.0 | \n",
1391 | " 1091.0 | \n",
1392 | " 1090.0 | \n",
1393 | " 1094.0 | \n",
1394 | " Medium | \n",
1395 | " 10.0 | \n",
1396 | " 10919 | \n",
1397 | " 21 | \n",
1398 | " 11 | \n",
1399 | " 6 | \n",
1400 | " 7.0 | \n",
1401 | " 38.560827 | \n",
1402 | " 31.423673 | \n",
1403 | " 38.412348 | \n",
1404 | " 38.350148 | \n",
1405 | " 38.411398 | \n",
1406 | " 38.41096 | \n",
1407 | " 38.405632 | \n",
1408 | " 38.414079 | \n",
1409 | " 38.429297 | \n",
1410 | " 38.46995 | \n",
1411 | " 38.396291 | \n",
1412 | " 38.419403 | \n",
1413 | " 38.365864 | \n",
1414 | " 38.382695 | \n",
1415 | " 38.413804 | \n",
1416 | " 38.413013 | \n",
1417 | " 38.436159 | \n",
1418 | " 37.673714 | \n",
1419 | " 38.4074 | \n",
1420 | " 38.429089 | \n",
1421 | " 38.431485 | \n",
1422 | " 38.408466 | \n",
1423 | " 40.115056 | \n",
1424 | " ECY3_RVSZ | \n",
1425 | "
\n",
1426 | " \n",
1427 | "
\n",
1428 | "
5 rows × 130 columns
\n",
1429 | "
"
1430 | ],
1431 | "text/plain": [
1432 | " ID join_date sex marital_status birth_year branch_code \\\n",
1433 | "0 4WKQSBB 2019-01-02 F M 1987 1X1H \n",
1434 | "1 4WKQSBB 2019-01-02 F M 1987 1X1H \n",
1435 | "2 CP5S02H 2019-01-06 F M 1981 UAOD \n",
1436 | "3 CP5S02H 2019-01-06 F M 1981 UAOD \n",
1437 | "4 2YKDILJ 2013-01-06 M U 1991 748L \n",
1438 | "\n",
1439 | " occupation_code occupation_category_code P5DA RIBP 8NN1 7POT 66FJ \\\n",
1440 | "0 2A7I T4MS 0 0 0 0 0 \n",
1441 | "1 2A7I T4MS 0 0 0 0 0 \n",
1442 | "2 2A7I T4MS 0 0 0 0 0 \n",
1443 | "3 2A7I T4MS 0 0 0 0 0 \n",
1444 | "4 QZYX 90QI 0 0 0 0 0 \n",
1445 | "\n",
1446 | " GYSR SOP4 RVSZ PYUQ LJR9 N2MW AHXO BSTQ FM3X K6QO QBOL JWFN \\\n",
1447 | "0 0 0 0 0 0 0 0 0 0 1 0 0 \n",
1448 | "1 0 0 1 0 0 0 0 0 0 0 0 0 \n",
1449 | "2 0 0 0 0 0 0 0 0 0 1 0 0 \n",
1450 | "3 0 0 1 0 0 0 0 0 0 0 0 0 \n",
1451 | "4 0 0 1 0 0 0 0 0 0 0 0 0 \n",
1452 | "\n",
1453 | " JZ9D J9JW GHYX ECY3 target ID2 Join_Year Join_Month Join_Day \\\n",
1454 | "0 0 0 0 0 RVSZ 1 2019.0 1.0 2.0 \n",
1455 | "1 0 0 0 0 K6QO 2 2019.0 1.0 2.0 \n",
1456 | "2 0 0 0 0 RVSZ 3 2019.0 1.0 6.0 \n",
1457 | "3 0 0 0 0 K6QO 4 2019.0 1.0 6.0 \n",
1458 | "4 0 0 0 1 SOP4 5 2013.0 1.0 6.0 \n",
1459 | "\n",
1460 | " DayOfyear birth_year_bin P5DA_sum RIBP_sum 8NN1_sum 7POT_sum \\\n",
1461 | "0 2.0 (1979.4, 1995.2] 0 14 0 5 \n",
1462 | "1 2.0 (1979.4, 1995.2] 0 14 0 5 \n",
1463 | "2 6.0 (1979.4, 1995.2] 0 54 6 13 \n",
1464 | "3 6.0 (1979.4, 1995.2] 0 54 6 13 \n",
1465 | "4 6.0 (1979.4, 1995.2] 7 2031 144 259 \n",
1466 | "\n",
1467 | " 66FJ_sum GYSR_sum SOP4_sum RVSZ_sum PYUQ_sum LJR9_sum N2MW_sum \\\n",
1468 | "0 1 0 0 1598 8 0 4 \n",
1469 | "1 1 0 0 1598 8 0 4 \n",
1470 | "2 6 0 18 4191 360 44 25 \n",
1471 | "3 6 0 18 4191 360 44 25 \n",
1472 | "4 366 2 416 10711 1142 254 161 \n",
1473 | "\n",
1474 | " AHXO_sum BSTQ_sum FM3X_sum ... from_arise_col_8NN1 \\\n",
1475 | "0 0 8 0 ... 3284.0 \n",
1476 | "1 0 8 0 ... 3284.0 \n",
1477 | "2 8 37 0 ... 3288.0 \n",
1478 | "3 8 37 0 ... 3288.0 \n",
1479 | "4 38 391 133 ... 1097.0 \n",
1480 | "\n",
1481 | " from_arise_col_7POT from_arise_col_66FJ from_arise_col_GYSR \\\n",
1482 | "0 3284.0 3282.0 2191.0 \n",
1483 | "1 3284.0 3282.0 2191.0 \n",
1484 | "2 3288.0 3286.0 2195.0 \n",
1485 | "3 3288.0 3286.0 2195.0 \n",
1486 | "4 1097.0 1095.0 4.0 \n",
1487 | "\n",
1488 | " from_arise_col_SOP4 from_arise_col_RVSZ from_arise_col_PYUQ \\\n",
1489 | "0 3282.0 3284.0 3284.0 \n",
1490 | "1 3282.0 3284.0 3284.0 \n",
1491 | "2 3286.0 3288.0 3288.0 \n",
1492 | "3 3286.0 3288.0 3288.0 \n",
1493 | "4 1095.0 1097.0 1097.0 \n",
1494 | "\n",
1495 | " from_arise_col_LJR9 from_arise_col_N2MW from_arise_col_AHXO \\\n",
1496 | "0 3284.0 2919.0 3278.0 \n",
1497 | "1 3284.0 2919.0 3278.0 \n",
1498 | "2 3288.0 2923.0 3282.0 \n",
1499 | "3 3288.0 2923.0 3282.0 \n",
1500 | "4 1097.0 732.0 1091.0 \n",
1501 | "\n",
1502 | " from_arise_col_BSTQ from_arise_col_FM3X from_arise_col_K6QO \\\n",
1503 | "0 3277.0 2554.0 3282.0 \n",
1504 | "1 3277.0 2554.0 3282.0 \n",
1505 | "2 3281.0 2558.0 3286.0 \n",
1506 | "3 3281.0 2558.0 3286.0 \n",
1507 | "4 1090.0 367.0 1095.0 \n",
1508 | "\n",
1509 | " from_arise_col_QBOL from_arise_col_JWFN from_arise_col_JZ9D \\\n",
1510 | "0 3284.0 2912.0 3278.0 \n",
1511 | "1 3284.0 2912.0 3278.0 \n",
1512 | "2 3288.0 2916.0 3282.0 \n",
1513 | "3 3288.0 2916.0 3282.0 \n",
1514 | "4 1097.0 725.0 1091.0 \n",
1515 | "\n",
1516 | " from_arise_col_J9JW from_arise_col_GHYX from_arise_col_ECY3 \\\n",
1517 | "0 3278.0 3277.0 3281.0 \n",
1518 | "1 3278.0 3277.0 3281.0 \n",
1519 | "2 3282.0 3281.0 3285.0 \n",
1520 | "3 3282.0 3281.0 3285.0 \n",
1521 | "4 1091.0 1090.0 1094.0 \n",
1522 | "\n",
1523 | " Insurance_Count branch_since Unique_customers_per_branch \\\n",
1524 | "0 One 2.0 1738 \n",
1525 | "1 One 2.0 1738 \n",
1526 | "2 One 9.0 4714 \n",
1527 | "3 One 9.0 4714 \n",
1528 | "4 Medium 10.0 10919 \n",
1529 | "\n",
1530 | " Unique_Insurance_per_branch Unique_year_per_branch \\\n",
1531 | "0 14 3 \n",
1532 | "1 14 3 \n",
1533 | "2 18 10 \n",
1534 | "3 18 10 \n",
1535 | "4 21 11 \n",
1536 | "\n",
1537 | " Unique_month_per_branch Unique_branch_per_year Average_Age_per_branch \\\n",
1538 | "0 1 14.0 37.532588 \n",
1539 | "1 1 14.0 37.532588 \n",
1540 | "2 1 14.0 37.961993 \n",
1541 | "3 1 14.0 37.961993 \n",
1542 | "4 6 7.0 38.560827 \n",
1543 | "\n",
1544 | " Average_Age_per_occupation P5DA_meanAge RIBP_meanAge 8NN1_meanAge \\\n",
1545 | "0 38.035144 38.412348 38.350148 38.411398 \n",
1546 | "1 38.035144 38.412348 38.350148 38.411398 \n",
1547 | "2 38.035144 38.412348 38.350148 38.411398 \n",
1548 | "3 38.035144 38.412348 38.350148 38.411398 \n",
1549 | "4 31.423673 38.412348 38.350148 38.411398 \n",
1550 | "\n",
1551 | " 7POT_meanAge 66FJ_meanAge GYSR_meanAge SOP4_meanAge RVSZ_meanAge \\\n",
1552 | "0 38.41096 38.405632 38.414079 38.429297 38.36121 \n",
1553 | "1 38.41096 38.405632 38.414079 38.429297 38.46995 \n",
1554 | "2 38.41096 38.405632 38.414079 38.429297 38.36121 \n",
1555 | "3 38.41096 38.405632 38.414079 38.429297 38.46995 \n",
1556 | "4 38.41096 38.405632 38.414079 38.429297 38.46995 \n",
1557 | "\n",
1558 | " PYUQ_meanAge LJR9_meanAge N2MW_meanAge AHXO_meanAge BSTQ_meanAge \\\n",
1559 | "0 38.396291 38.419403 38.365864 38.382695 38.413804 \n",
1560 | "1 38.396291 38.419403 38.365864 38.382695 38.413804 \n",
1561 | "2 38.396291 38.419403 38.365864 38.382695 38.413804 \n",
1562 | "3 38.396291 38.419403 38.365864 38.382695 38.413804 \n",
1563 | "4 38.396291 38.419403 38.365864 38.382695 38.413804 \n",
1564 | "\n",
1565 | " FM3X_meanAge K6QO_meanAge QBOL_meanAge JWFN_meanAge JZ9D_meanAge \\\n",
1566 | "0 38.413013 38.384302 37.673714 38.4074 38.429089 \n",
1567 | "1 38.413013 38.436159 37.673714 38.4074 38.429089 \n",
1568 | "2 38.413013 38.384302 37.673714 38.4074 38.429089 \n",
1569 | "3 38.413013 38.436159 37.673714 38.4074 38.429089 \n",
1570 | "4 38.413013 38.436159 37.673714 38.4074 38.429089 \n",
1571 | "\n",
1572 | " J9JW_meanAge GHYX_meanAge ECY3_meanAge product_comb \n",
1573 | "0 38.431485 38.408466 38.360615 K6QO \n",
1574 | "1 38.431485 38.408466 38.360615 RVSZ \n",
1575 | "2 38.431485 38.408466 38.360615 K6QO \n",
1576 | "3 38.431485 38.408466 38.360615 RVSZ \n",
1577 | "4 38.431485 38.408466 40.115056 ECY3_RVSZ \n",
1578 | "\n",
1579 | "[5 rows x 130 columns]"
1580 | ]
1581 | },
1582 | "execution_count": 201,
1583 | "metadata": {},
1584 | "output_type": "execute_result"
1585 | }
1586 | ],
1587 | "source": [
1588 | "df.head()"
1589 | ]
1590 | },
1591 | {
1592 | "cell_type": "markdown",
1593 | "metadata": {},
1594 | "source": [
1595 | "## Interaction Feature"
1596 | ]
1597 | },
1598 | {
1599 | "cell_type": "code",
1600 | "execution_count": 202,
1601 | "metadata": {},
1602 | "outputs": [],
1603 | "source": [
1604 | "df['Join_Year'] = df['Join_Year'].astype(str)\n",
1605 | "df['birth_year_bin'] = df['birth_year_bin'].astype(str)"
1606 | ]
1607 | },
1608 | {
1609 | "cell_type": "code",
1610 | "execution_count": 203,
1611 | "metadata": {},
1612 | "outputs": [],
1613 | "source": [
1614 | "df['bc_oc'] = df['branch_code'] + '_' + df['occupation_code']\n",
1615 | "df['bc_occ'] = df['branch_code'] + '_' + df['occupation_category_code']"
1616 | ]
1617 | },
1618 | {
1619 | "cell_type": "markdown",
1620 | "metadata": {},
1621 | "source": [
1622 | "## Label Encoding"
1623 | ]
1624 | },
1625 | {
1626 | "cell_type": "code",
1627 | "execution_count": 204,
1628 | "metadata": {},
1629 | "outputs": [],
1630 | "source": [
1631 | "from sklearn.preprocessing import LabelEncoder\n",
1632 | "le = LabelEncoder()\n",
1633 | "for col in [\n",
1634 | " 'product_comb', 'Insurance_Count', 'sex', 'marital_status',\n",
1635 | " 'branch_code', 'occupation_category_code', 'occupation_code'\n",
1636 | "]:\n",
1637 | " df[col] = le.fit_transform(df[col])"
1638 | ]
1639 | },
1640 | {
1641 | "cell_type": "markdown",
1642 | "metadata": {},
1643 | "source": [
1644 | "## Frequency Encoding"
1645 | ]
1646 | },
1647 | {
1648 | "cell_type": "code",
1649 | "execution_count": 205,
1650 | "metadata": {},
1651 | "outputs": [],
1652 | "source": [
1653 | "fe_pol = (df.groupby('product_comb').size()) / len(df)\n",
1654 | "df['product_comb_fe'] = df['product_comb'].apply(lambda x: fe_pol[x])"
1655 | ]
1656 | },
1657 | {
1658 | "cell_type": "code",
1659 | "execution_count": 206,
1660 | "metadata": {},
1661 | "outputs": [],
1662 | "source": [
1663 | "fe_pol = (df.groupby('bc_occ').size()) / len(df)\n",
1664 | "df['bc_occ'] = df['bc_occ'].apply(lambda x: fe_pol[x])"
1665 | ]
1666 | },
1667 | {
1668 | "cell_type": "code",
1669 | "execution_count": 207,
1670 | "metadata": {},
1671 | "outputs": [],
1672 | "source": [
1673 | "fe_pol = (df.groupby('bc_oc').size()) / len(df)\n",
1674 | "df['bc_oc'] = df['bc_oc'].apply(lambda x: fe_pol[x])"
1675 | ]
1676 | },
1677 | {
1678 | "cell_type": "code",
1679 | "execution_count": 208,
1680 | "metadata": {},
1681 | "outputs": [],
1682 | "source": [
1683 | "fe_pol = (df.groupby('birth_year_bin').size()) / len(df)\n",
1684 | "df['birth_year_bin'] = df['birth_year_bin'].apply(lambda x: fe_pol[x])\n",
1685 | "df['birth_year_bin'] = df['birth_year_bin'].astype(float)"
1686 | ]
1687 | },
1688 | {
1689 | "cell_type": "code",
1690 | "execution_count": 209,
1691 | "metadata": {},
1692 | "outputs": [],
1693 | "source": [
1694 | "fe_pol = (df.groupby('occupation_code').size()) / len(df)\n",
1695 | "df['occupation_code_fe'] = df['occupation_code'].apply(lambda x: fe_pol[x])"
1696 | ]
1697 | },
1698 | {
1699 | "cell_type": "code",
1700 | "execution_count": 210,
1701 | "metadata": {},
1702 | "outputs": [],
1703 | "source": [
1704 | "fe_pol = (df.groupby('occupation_category_code').size()) / len(df)\n",
1705 | "df['occupation_category_code'] = df['occupation_category_code'].apply(lambda x: fe_pol[x])"
1706 | ]
1707 | },
1708 | {
1709 | "cell_type": "code",
1710 | "execution_count": 211,
1711 | "metadata": {},
1712 | "outputs": [],
1713 | "source": [
1714 | "fe_pol = (df.groupby('sex').size()) / len(df)\n",
1715 | "df['sex_fe'] = df['sex'].apply(lambda x: fe_pol[x])"
1716 | ]
1717 | },
1718 | {
1719 | "cell_type": "code",
1720 | "execution_count": 212,
1721 | "metadata": {},
1722 | "outputs": [],
1723 | "source": [
1724 | "fe_pol = (df.groupby('Insurance_Count').size()) / len(df)\n",
1725 | "df['Insurance_Count_fe'] = df['Insurance_Count'].apply(lambda x: fe_pol[x])"
1726 | ]
1727 | },
1728 | {
1729 | "cell_type": "code",
1730 | "execution_count": 213,
1731 | "metadata": {},
1732 | "outputs": [],
1733 | "source": [
1734 | "df['Join_Year'] = df['Join_Year'].astype(float)"
1735 | ]
1736 | },
1737 | {
1738 | "cell_type": "code",
1739 | "execution_count": 214,
1740 | "metadata": {},
1741 | "outputs": [],
1742 | "source": [
1743 | "#Relationship between targets\n",
1744 | "#\n",
1745 | "for col in columns:\n",
1746 | " for cols in columns:\n",
1747 | " if col != cols:\n",
1748 | " df[col + '_' + cols] = df.groupby(col)[cols].transform(sum)"
1749 | ]
1750 | },
1751 | {
1752 | "cell_type": "code",
1753 | "execution_count": 215,
1754 | "metadata": {},
1755 | "outputs": [],
1756 | "source": [
1757 | "df['num_freq'] = df.groupby('product_comb_fe')['ID'].transform('count')"
1758 | ]
1759 | },
1760 | {
1761 | "cell_type": "markdown",
1762 | "metadata": {},
1763 | "source": [
1764 | "## Getting back train and test"
1765 | ]
1766 | },
1767 | {
1768 | "cell_type": "code",
1769 | "execution_count": 239,
1770 | "metadata": {},
1771 | "outputs": [],
1772 | "source": [
1773 | "train = df[:train.shape[0]]\n",
1774 | "test = df[-test.shape[0]:]"
1775 | ]
1776 | },
1777 | {
1778 | "cell_type": "code",
1779 | "execution_count": 48,
1780 | "metadata": {},
1781 | "outputs": [
1782 | {
1783 | "data": {
1784 | "text/plain": [
1785 | "(66353, 21)"
1786 | ]
1787 | },
1788 | "execution_count": 48,
1789 | "metadata": {},
1790 | "output_type": "execute_result"
1791 | }
1792 | ],
1793 | "source": [
1794 | "len(train),train['target'].nunique()"
1795 | ]
1796 | },
1797 | {
1798 | "cell_type": "markdown",
1799 | "metadata": {},
1800 | "source": [
1801 | "## Removing records if target count is less than 3"
1802 | ]
1803 | },
1804 | {
1805 | "cell_type": "code",
1806 | "execution_count": 240,
1807 | "metadata": {},
1808 | "outputs": [],
1809 | "source": [
1810 | "train['target_count'] = train.groupby(['branch_code',\n",
1811 | " 'target'])['target'].transform('count')\n",
1812 | "train = train[train['target_count'] > 2]\n",
1813 | "del train['target_count']"
1814 | ]
1815 | },
1816 | {
1817 | "cell_type": "code",
1818 | "execution_count": 54,
1819 | "metadata": {},
1820 | "outputs": [
1821 | {
1822 | "data": {
1823 | "text/plain": [
1824 | "(66290, 20)"
1825 | ]
1826 | },
1827 | "execution_count": 54,
1828 | "metadata": {},
1829 | "output_type": "execute_result"
1830 | }
1831 | ],
1832 | "source": [
1833 | "len(train), train['target'].nunique()"
1834 | ]
1835 | },
1836 | {
1837 | "cell_type": "markdown",
1838 | "metadata": {},
1839 | "source": [
1840 | "## Label Encoding Target"
1841 | ]
1842 | },
1843 | {
1844 | "cell_type": "code",
1845 | "execution_count": 241,
1846 | "metadata": {},
1847 | "outputs": [],
1848 | "source": [
1849 | "te = LabelEncoder()\n",
1850 | "train['target'] = te.fit_transform(train['target'])"
1851 | ]
1852 | },
1853 | {
1854 | "cell_type": "markdown",
1855 | "metadata": {},
1856 | "source": [
1857 | "# StratifiedKFold"
1858 | ]
1859 | },
1860 | {
1861 | "cell_type": "code",
1862 | "execution_count": 60,
1863 | "metadata": {},
1864 | "outputs": [
1865 | {
1866 | "name": "stdout",
1867 | "output_type": "stream",
1868 | "text": [
1869 | "Training until validation scores don't improve for 20 rounds\n",
1870 | "[200]\ttraining's multi_logloss: 0.425229\tvalid_1's multi_logloss: 0.442599\n",
1871 | "[400]\ttraining's multi_logloss: 0.342772\tvalid_1's multi_logloss: 0.377401\n",
1872 | "[600]\ttraining's multi_logloss: 0.312031\tvalid_1's multi_logloss: 0.363581\n",
1873 | "[800]\ttraining's multi_logloss: 0.291081\tvalid_1's multi_logloss: 0.358438\n",
1874 | "[1000]\ttraining's multi_logloss: 0.27363\tvalid_1's multi_logloss: 0.355791\n",
1875 | "[1200]\ttraining's multi_logloss: 0.258673\tvalid_1's multi_logloss: 0.354585\n",
1876 | "Early stopping, best iteration is:\n",
1877 | "[1366]\ttraining's multi_logloss: 0.247805\tvalid_1's multi_logloss: 0.354216\n",
1878 | "1 err_lgm: 0.35421631499025946\n",
1879 | "[LightGBM] [Warning] min_data_in_leaf is set=40, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=40\n",
1880 | "[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9\n",
1881 | "[LightGBM] [Warning] bagging_freq is set=2, subsample_freq=0 will be ignored. Current value: bagging_freq=2\n",
1882 | "Training until validation scores don't improve for 20 rounds\n",
1883 | "[200]\ttraining's multi_logloss: 0.418809\tvalid_1's multi_logloss: 0.455144\n",
1884 | "[400]\ttraining's multi_logloss: 0.33489\tvalid_1's multi_logloss: 0.393109\n",
1885 | "[600]\ttraining's multi_logloss: 0.3046\tvalid_1's multi_logloss: 0.381151\n",
1886 | "[800]\ttraining's multi_logloss: 0.283089\tvalid_1's multi_logloss: 0.377217\n",
1887 | "[1000]\ttraining's multi_logloss: 0.265824\tvalid_1's multi_logloss: 0.375818\n",
1888 | "Early stopping, best iteration is:\n",
1889 | "[1073]\ttraining's multi_logloss: 0.260279\tvalid_1's multi_logloss: 0.375593\n",
1890 | "1 err_lgm: 0.375593067922245\n",
1891 | "[LightGBM] [Warning] min_data_in_leaf is set=40, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=40\n",
1892 | "[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9\n",
1893 | "[LightGBM] [Warning] bagging_freq is set=2, subsample_freq=0 will be ignored. Current value: bagging_freq=2\n",
1894 | "Training until validation scores don't improve for 20 rounds\n",
1895 | "[200]\ttraining's multi_logloss: 0.420918\tvalid_1's multi_logloss: 0.451048\n",
1896 | "[400]\ttraining's multi_logloss: 0.339036\tvalid_1's multi_logloss: 0.386804\n",
1897 | "[600]\ttraining's multi_logloss: 0.308682\tvalid_1's multi_logloss: 0.373044\n",
1898 | "[800]\ttraining's multi_logloss: 0.28747\tvalid_1's multi_logloss: 0.367751\n",
1899 | "[1000]\ttraining's multi_logloss: 0.269769\tvalid_1's multi_logloss: 0.365496\n",
1900 | "[1200]\ttraining's multi_logloss: 0.254697\tvalid_1's multi_logloss: 0.36462\n",
1901 | "Early stopping, best iteration is:\n",
1902 | "[1261]\ttraining's multi_logloss: 0.250606\tvalid_1's multi_logloss: 0.36449\n",
1903 | "1 err_lgm: 0.3644898381125798\n"
1904 | ]
1905 | },
1906 | {
1907 | "data": {
1908 | "text/plain": [
1909 | "0.36476640700836144"
1910 | ]
1911 | },
1912 | "execution_count": 60,
1913 | "metadata": {},
1914 | "output_type": "execute_result"
1915 | }
1916 | ],
1917 | "source": [
1918 | "#LGB model\n",
1919 | "#\n",
1920 | "err = []\n",
1921 | "y_pred_tot_lgb = 0\n",
1922 | "\n",
1923 | "fold = StratifiedKFold(n_splits=3, shuffle=True, random_state=1997)\n",
1924 | "i = 1\n",
1925 | "x = train.drop(columns={'join_date', 'ID', 'ID2', 'target'})\n",
1926 | "y = train[['target']]\n",
1927 | "ID = test['ID']\n",
1928 | "testing = test.drop(columns={'join_date', 'ID', 'ID2', 'target'})\n",
1929 | "for train_index, test_index in fold.split(x, y):\n",
1930 | " x_train, x_val = x.iloc[train_index], x.iloc[test_index]\n",
1931 | " y_train, y_val = y.iloc[train_index], y.iloc[test_index]\n",
1932 | " m = LGBMClassifier(n_estimators=10000,\n",
1933 | " n_jobs=-1,\n",
1934 | " random_state=69,\n",
1935 | " learning_rate=0.01,\n",
1936 | " max_depth=5,\n",
1937 | " num_leaves=128,\n",
1938 | " colsample_bytree=0.5,\n",
1939 | " colsample_bynode=0.5,\n",
1940 | " min_data_in_leaf=40,\n",
1941 | " bagging_freq=2,\n",
1942 | " bagging_fraction=0.9,\n",
1943 | " reg_alpha=0.5,\n",
1944 | " reg_lambda=1)\n",
1945 | " m.fit(x_train,\n",
1946 | " y_train,\n",
1947 | " eval_set=[(x_train, y_train), (x_val, y_val)],\n",
1948 | " early_stopping_rounds=20,\n",
1949 | " eval_metric='multi_logloss',\n",
1950 | " verbose=200)\n",
1951 | " pred_y = m.predict_proba(x_val)\n",
1952 | " print(i, \" err_lgm: \", log_loss(y_val, pred_y))\n",
1953 | " err.append(log_loss(y_val, pred_y))\n",
1954 | " pred_test = m.predict_proba(testing)\n",
1955 | " y_pred_tot_lgb += pred_test\n",
1956 | "y_pred_tot_lgb = y_pred_tot_lgb / 3\n",
1957 | "(err[0] + err[1] + err[2]) / 3"
1958 | ]
1959 | },
1960 | {
1961 | "cell_type": "code",
1962 | "execution_count": 62,
1963 | "metadata": {},
1964 | "outputs": [
1965 | {
1966 | "name": "stdout",
1967 | "output_type": "stream",
1968 | "text": [
1969 | "[0]\tvalidation_0-mlogloss:2.23928\tvalidation_1-mlogloss:2.24216\n",
1970 | "Multiple eval metrics have been passed: 'validation_1-mlogloss' will be used for early stopping.\n",
1971 | "\n",
1972 | "Will train until validation_1-mlogloss hasn't improved in 20 rounds.\n",
1973 | "[20]\tvalidation_0-mlogloss:0.55627\tvalidation_1-mlogloss:0.58586\n",
1974 | "[40]\tvalidation_0-mlogloss:0.36262\tvalidation_1-mlogloss:0.41120\n",
1975 | "[60]\tvalidation_0-mlogloss:0.30980\tvalidation_1-mlogloss:0.37351\n",
1976 | "[80]\tvalidation_0-mlogloss:0.28610\tvalidation_1-mlogloss:0.36247\n",
1977 | "[100]\tvalidation_0-mlogloss:0.27054\tvalidation_1-mlogloss:0.35819\n",
1978 | "[120]\tvalidation_0-mlogloss:0.25927\tvalidation_1-mlogloss:0.35662\n",
1979 | "[140]\tvalidation_0-mlogloss:0.25005\tvalidation_1-mlogloss:0.35568\n",
1980 | "[160]\tvalidation_0-mlogloss:0.24265\tvalidation_1-mlogloss:0.35481\n",
1981 | "[180]\tvalidation_0-mlogloss:0.23612\tvalidation_1-mlogloss:0.35453\n",
1982 | "[200]\tvalidation_0-mlogloss:0.23081\tvalidation_1-mlogloss:0.35423\n",
1983 | "[220]\tvalidation_0-mlogloss:0.22614\tvalidation_1-mlogloss:0.35409\n",
1984 | "[240]\tvalidation_0-mlogloss:0.22208\tvalidation_1-mlogloss:0.35406\n",
1985 | "[260]\tvalidation_0-mlogloss:0.21838\tvalidation_1-mlogloss:0.35401\n",
1986 | "[280]\tvalidation_0-mlogloss:0.21490\tvalidation_1-mlogloss:0.35399\n",
1987 | "[300]\tvalidation_0-mlogloss:0.21182\tvalidation_1-mlogloss:0.35393\n",
1988 | "Stopping. Best iteration:\n",
1989 | "[295]\tvalidation_0-mlogloss:0.21251\tvalidation_1-mlogloss:0.35388\n",
1990 | "\n",
1991 | "1 err_lgm: 0.35387566261402437\n",
1992 | "[0]\tvalidation_0-mlogloss:2.23749\tvalidation_1-mlogloss:2.24493\n",
1993 | "Multiple eval metrics have been passed: 'validation_1-mlogloss' will be used for early stopping.\n",
1994 | "\n",
1995 | "Will train until validation_1-mlogloss hasn't improved in 20 rounds.\n",
1996 | "[20]\tvalidation_0-mlogloss:0.55083\tvalidation_1-mlogloss:0.59409\n",
1997 | "[40]\tvalidation_0-mlogloss:0.35619\tvalidation_1-mlogloss:0.42096\n",
1998 | "[60]\tvalidation_0-mlogloss:0.30289\tvalidation_1-mlogloss:0.38610\n",
1999 | "[80]\tvalidation_0-mlogloss:0.27864\tvalidation_1-mlogloss:0.37726\n",
2000 | "[100]\tvalidation_0-mlogloss:0.26256\tvalidation_1-mlogloss:0.37437\n",
2001 | "[120]\tvalidation_0-mlogloss:0.25081\tvalidation_1-mlogloss:0.37333\n",
2002 | "[140]\tvalidation_0-mlogloss:0.24170\tvalidation_1-mlogloss:0.37294\n",
2003 | "[160]\tvalidation_0-mlogloss:0.23452\tvalidation_1-mlogloss:0.37292\n",
2004 | "Stopping. Best iteration:\n",
2005 | "[156]\tvalidation_0-mlogloss:0.23581\tvalidation_1-mlogloss:0.37285\n",
2006 | "\n",
2007 | "1 err_lgm: 0.3728546684019317\n",
2008 | "[0]\tvalidation_0-mlogloss:2.23681\tvalidation_1-mlogloss:2.24268\n",
2009 | "Multiple eval metrics have been passed: 'validation_1-mlogloss' will be used for early stopping.\n",
2010 | "\n",
2011 | "Will train until validation_1-mlogloss hasn't improved in 20 rounds.\n",
2012 | "[20]\tvalidation_0-mlogloss:0.55195\tvalidation_1-mlogloss:0.59279\n",
2013 | "[40]\tvalidation_0-mlogloss:0.35853\tvalidation_1-mlogloss:0.41876\n",
2014 | "[60]\tvalidation_0-mlogloss:0.30635\tvalidation_1-mlogloss:0.38258\n",
2015 | "[80]\tvalidation_0-mlogloss:0.28186\tvalidation_1-mlogloss:0.37189\n",
2016 | "[100]\tvalidation_0-mlogloss:0.26577\tvalidation_1-mlogloss:0.36806\n",
2017 | "[120]\tvalidation_0-mlogloss:0.25558\tvalidation_1-mlogloss:0.36647\n",
2018 | "[140]\tvalidation_0-mlogloss:0.24680\tvalidation_1-mlogloss:0.36596\n",
2019 | "[160]\tvalidation_0-mlogloss:0.23923\tvalidation_1-mlogloss:0.36540\n",
2020 | "[180]\tvalidation_0-mlogloss:0.23289\tvalidation_1-mlogloss:0.36518\n",
2021 | "[200]\tvalidation_0-mlogloss:0.22699\tvalidation_1-mlogloss:0.36497\n",
2022 | "[220]\tvalidation_0-mlogloss:0.22182\tvalidation_1-mlogloss:0.36473\n",
2023 | "[240]\tvalidation_0-mlogloss:0.21746\tvalidation_1-mlogloss:0.36472\n",
2024 | "[260]\tvalidation_0-mlogloss:0.21355\tvalidation_1-mlogloss:0.36466\n",
2025 | "Stopping. Best iteration:\n",
2026 | "[251]\tvalidation_0-mlogloss:0.21522\tvalidation_1-mlogloss:0.36453\n",
2027 | "\n",
2028 | "1 err_lgm: 0.3645286154121357\n"
2029 | ]
2030 | },
2031 | {
2032 | "data": {
2033 | "text/plain": [
2034 | "0.36375298214269725"
2035 | ]
2036 | },
2037 | "execution_count": 62,
2038 | "metadata": {},
2039 | "output_type": "execute_result"
2040 | }
2041 | ],
2042 | "source": [
2043 | "#XGB model\n",
2044 | "#\n",
2045 | "err = []\n",
2046 | "y_pred_tot_xgb = 0\n",
2047 | "\n",
2048 | "fold = StratifiedKFold(n_splits=3, shuffle=True, random_state=1997)\n",
2049 | "i = 1\n",
2050 | "\n",
2051 | "for train_index, test_index in fold.split(x, y):\n",
2052 | " x_train, x_val = x.iloc[train_index], x.iloc[test_index]\n",
2053 | " y_train, y_val = y.iloc[train_index], y.iloc[test_index]\n",
2054 | " m = XGBClassifier(\n",
2055 | " n_estimators=10000,\n",
2056 | " eta=0.1,\n",
2057 | " n_jobs=-1,\n",
2058 | " random_state=69,\n",
2059 | " reg_alpha=0.5, #reg_lambda=1.2 \n",
2060 | " colsample_bytree=0.8,\n",
2061 | " colsample_bylevel=0.8,\n",
2062 | " colsample_bynode=0.8,\n",
2063 | " subsample=0.9,\n",
2064 | " gamma=1.5,\n",
2065 | " max_depth=7)\n",
2066 | " m.fit(x_train,\n",
2067 | " y_train,\n",
2068 | " eval_set=[(x_train, y_train), (x_val, y_val)],\n",
2069 | " early_stopping_rounds=20,\n",
2070 | " eval_metric='mlogloss',\n",
2071 | " verbose=20)\n",
2072 | " pred_y = m.predict_proba(x_val)\n",
2073 | " print(i, \" err_lgm: \", log_loss(y_val, pred_y))\n",
2074 | " err.append(log_loss(y_val, pred_y))\n",
2075 | " pred_test = m.predict_proba(testing)\n",
2076 | " y_pred_tot_xgb += pred_test\n",
2077 | "y_pred_tot_xgb = y_pred_tot_xgb / 3\n",
2078 | "(err[0] + err[1] + err[2]) / 3"
2079 | ]
2080 | },
2081 | {
2082 | "cell_type": "markdown",
2083 | "metadata": {},
2084 | "source": [
2085 | "# Averaging submission"
2086 | ]
2087 | },
2088 | {
2089 | "cell_type": "code",
2090 | "execution_count": 66,
2091 | "metadata": {},
2092 | "outputs": [],
2093 | "source": [
2094 | "pred = y_pred_tot_lgb * 0.6 + y_pred_tot_xgb * 0.4\n",
2095 | "y_test = pd.DataFrame(pred)\n",
2096 | "y_test.columns = te.inverse_transform(y_test.columns)"
2097 | ]
2098 | },
2099 | {
2100 | "cell_type": "code",
2101 | "execution_count": 69,
2102 | "metadata": {},
2103 | "outputs": [
2104 | {
2105 | "data": {
2106 | "application/vnd.jupyter.widget-view+json": {
2107 | "model_id": "fdc2159a005045a98a782bbdd871d84b",
2108 | "version_major": 2,
2109 | "version_minor": 0
2110 | },
2111 | "text/plain": [
2112 | "HBox(children=(FloatProgress(value=0.0, max=200000.0), HTML(value='')))"
2113 | ]
2114 | },
2115 | "metadata": {},
2116 | "output_type": "display_data"
2117 | },
2118 | {
2119 | "name": "stdout",
2120 | "output_type": "stream",
2121 | "text": [
2122 | "\n",
2123 | "Wall time: 37 s\n"
2124 | ]
2125 | },
2126 | {
2127 | "data": {
2128 | "text/html": [
2129 | "\n",
2130 | "\n",
2143 | "
\n",
2144 | " \n",
2145 | " \n",
2146 | " | \n",
2147 | " ID X PCODE | \n",
2148 | " Label | \n",
2149 | "
\n",
2150 | " \n",
2151 | " \n",
2152 | " \n",
2153 | " | 0 | \n",
2154 | " F86J5PC X 66FJ | \n",
2155 | " 0.000077 | \n",
2156 | "
\n",
2157 | " \n",
2158 | " | 1 | \n",
2159 | " F86J5PC X 7POT | \n",
2160 | " 0.000107 | \n",
2161 | "
\n",
2162 | " \n",
2163 | " | 2 | \n",
2164 | " F86J5PC X 8NN1 | \n",
2165 | " 0.000014 | \n",
2166 | "
\n",
2167 | " \n",
2168 | " | 3 | \n",
2169 | " F86J5PC X AHXO | \n",
2170 | " 0.000102 | \n",
2171 | "
\n",
2172 | " \n",
2173 | " | 4 | \n",
2174 | " F86J5PC X BSTQ | \n",
2175 | " 0.000021 | \n",
2176 | "
\n",
2177 | " \n",
2178 | "
\n",
2179 | "
"
2180 | ],
2181 | "text/plain": [
2182 | " ID X PCODE Label\n",
2183 | "0 F86J5PC X 66FJ 0.000077\n",
2184 | "1 F86J5PC X 7POT 0.000107\n",
2185 | "2 F86J5PC X 8NN1 0.000014\n",
2186 | "3 F86J5PC X AHXO 0.000102\n",
2187 | "4 F86J5PC X BSTQ 0.000021"
2188 | ]
2189 | },
2190 | "execution_count": 69,
2191 | "metadata": {},
2192 | "output_type": "execute_result"
2193 | }
2194 | ],
2195 | "source": [
2196 | "%%time\n",
2197 | "answer_mass = []\n",
2198 | "for i in range(test.shape[0]):\n",
2199 | " test['ID'] = ID\n",
2200 | " id = test['ID'].iloc[i]\n",
2201 | " for c in y_test.columns:\n",
2202 | " answer_mass.append([id + ' X ' + c, y_test[c].iloc[i]])\n",
2203 | "\n",
2204 | "df_answer = pd.DataFrame(answer_mass)\n",
2205 | "df_answer.columns = ['ID X PCODE', 'Label']\n",
2206 | "for i in tqdm_notebook(range(df_answer.shape[0])):\n",
2207 | " if df_answer['ID X PCODE'].iloc[i] in true_values:\n",
2208 | " df_answer['Label'].iloc[i] = 1.0\n",
2209 | "df_answer.head()"
2210 | ]
2211 | },
2212 | {
2213 | "cell_type": "code",
2214 | "execution_count": 70,
2215 | "metadata": {},
2216 | "outputs": [],
2217 | "source": [
2218 | "sub1 = df_answer[['ID X PCODE', 'Label']]\n",
2219 | "sub1.reset_index(drop=True, inplace=True)"
2220 | ]
2221 | },
2222 | {
2223 | "cell_type": "code",
2224 | "execution_count": 71,
2225 | "metadata": {},
2226 | "outputs": [],
2227 | "source": [
2228 | "sub = pd.read_csv('SampleSubmission.csv')"
2229 | ]
2230 | },
2231 | {
2232 | "cell_type": "code",
2233 | "execution_count": 72,
2234 | "metadata": {},
2235 | "outputs": [],
2236 | "source": [
2237 | "sub.sort_values(by=['ID X PCODE'], inplace=True)\n",
2238 | "sub1.sort_values(by=['ID X PCODE'], inplace=True)"
2239 | ]
2240 | },
2241 | {
2242 | "cell_type": "code",
2243 | "execution_count": 73,
2244 | "metadata": {},
2245 | "outputs": [],
2246 | "source": [
2247 | "actual = sub1\n",
2248 | "findl = actual['ID X PCODE'].values\n",
2249 | "replacel = actual['Label'].values\n",
2250 | "sub.loc[sub['ID X PCODE'].isin(findl), ['Label']] = replacel"
2251 | ]
2252 | },
2253 | {
2254 | "cell_type": "code",
2255 | "execution_count": 81,
2256 | "metadata": {},
2257 | "outputs": [],
2258 | "source": [
2259 | "#Make submission\n",
2260 | "sub.to_csv('submiss.csv',index=False)"
2261 | ]
2262 | },
2263 | {
2264 | "cell_type": "markdown",
2265 | "metadata": {},
2266 | "source": [
2267 | "Open Zimnat_insurance_cat_target+multy.ipynb"
2268 | ]
2269 | }
2270 | ],
2271 | "metadata": {
2272 | "kernelspec": {
2273 | "display_name": "Python 3",
2274 | "language": "python",
2275 | "name": "python3"
2276 | },
2277 | "language_info": {
2278 | "codemirror_mode": {
2279 | "name": "ipython",
2280 | "version": 3
2281 | },
2282 | "file_extension": ".py",
2283 | "mimetype": "text/x-python",
2284 | "name": "python",
2285 | "nbconvert_exporter": "python",
2286 | "pygments_lexer": "ipython3",
2287 | "version": "3.8.3"
2288 | },
2289 | "toc": {
2290 | "base_numbering": 1,
2291 | "nav_menu": {},
2292 | "number_sections": true,
2293 | "sideBar": true,
2294 | "skip_h1_title": false,
2295 | "title_cell": "Table of Contents",
2296 | "title_sidebar": "Contents",
2297 | "toc_cell": false,
2298 | "toc_position": {},
2299 | "toc_section_display": true,
2300 | "toc_window_display": false
2301 | }
2302 | },
2303 | "nbformat": 4,
2304 | "nbformat_minor": 4
2305 | }
2306 |
--------------------------------------------------------------------------------
/Zimnat_insurance_best_multy_overall.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "82PpxfmqglcE"
8 | },
9 | "source": [
10 | "# Download libraries and data"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "#Download CatBoost\n",
20 | "#\n",
21 | "!pip install catboost==0.23.2"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {
28 | "ExecuteTime": {
29 | "end_time": "2020-08-07T19:29:03.461280Z",
30 | "start_time": "2020-08-07T19:29:03.186558Z"
31 | },
32 | "colab": {},
33 | "colab_type": "code",
34 | "id": "3arOHSJwCtNq"
35 | },
36 | "outputs": [],
37 | "source": [
38 | "#Import libraries\n",
39 | "#\n",
40 | "import pandas as pd, os, gc\n",
41 | "import numpy as np\n",
42 | "import math\n",
43 | "import copy\n",
44 | "from itertools import combinations\n",
45 | "\n",
46 | "import matplotlib.pyplot as plt\n",
47 | "\n",
48 | "from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler\n",
49 | "from sklearn.metrics import roc_curve, auc, log_loss\n",
50 | "\n",
51 | "from tqdm import tqdm, tqdm_notebook\n",
52 | "\n",
53 | "from sklearn.model_selection import GroupShuffleSplit, StratifiedKFold, train_test_split, GroupKFold\n",
54 | "from catboost import CatBoostClassifier\n",
55 | "from xgboost import XGBClassifier\n",
56 | "\n",
57 | "%matplotlib inline\n",
58 | "import warnings\n",
59 | "warnings.filterwarnings('ignore')"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {
66 | "ExecuteTime": {
67 | "end_time": "2020-08-07T19:29:03.664112Z",
68 | "start_time": "2020-08-07T19:29:03.501157Z"
69 | },
70 | "colab": {},
71 | "colab_type": "code",
72 | "id": "s3ZCXtzKDiDe"
73 | },
74 | "outputs": [],
75 | "source": [
76 | "#Download data\n",
77 | "#\n",
78 | "train = pd.read_csv('Train.csv')\n",
79 | "test = pd.read_csv('Test.csv')\n",
80 | "sub = pd.read_csv('SampleSubmission.csv')"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 4,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/html": [
91 | "\n",
92 | "\n",
105 | "
\n",
106 | " \n",
107 | " \n",
108 | " | \n",
109 | " ID | \n",
110 | " join_date | \n",
111 | " sex | \n",
112 | " marital_status | \n",
113 | " birth_year | \n",
114 | " branch_code | \n",
115 | " occupation_code | \n",
116 | " occupation_category_code | \n",
117 | " P5DA | \n",
118 | " RIBP | \n",
119 | " ... | \n",
120 | " AHXO | \n",
121 | " BSTQ | \n",
122 | " FM3X | \n",
123 | " K6QO | \n",
124 | " QBOL | \n",
125 | " JWFN | \n",
126 | " JZ9D | \n",
127 | " J9JW | \n",
128 | " GHYX | \n",
129 | " ECY3 | \n",
130 | "
\n",
131 | " \n",
132 | " \n",
133 | " \n",
134 | " | 0 | \n",
135 | " 4WKQSBB | \n",
136 | " 1/2/2019 | \n",
137 | " F | \n",
138 | " M | \n",
139 | " 1987 | \n",
140 | " 1X1H | \n",
141 | " 2A7I | \n",
142 | " T4MS | \n",
143 | " 0 | \n",
144 | " 0 | \n",
145 | " ... | \n",
146 | " 0 | \n",
147 | " 0 | \n",
148 | " 0 | \n",
149 | " 1 | \n",
150 | " 0 | \n",
151 | " 0 | \n",
152 | " 0 | \n",
153 | " 0 | \n",
154 | " 0 | \n",
155 | " 0 | \n",
156 | "
\n",
157 | " \n",
158 | " | 1 | \n",
159 | " CP5S02H | \n",
160 | " 1/6/2019 | \n",
161 | " F | \n",
162 | " M | \n",
163 | " 1981 | \n",
164 | " UAOD | \n",
165 | " 2A7I | \n",
166 | " T4MS | \n",
167 | " 0 | \n",
168 | " 0 | \n",
169 | " ... | \n",
170 | " 0 | \n",
171 | " 0 | \n",
172 | " 0 | \n",
173 | " 1 | \n",
174 | " 0 | \n",
175 | " 0 | \n",
176 | " 0 | \n",
177 | " 0 | \n",
178 | " 0 | \n",
179 | " 0 | \n",
180 | "
\n",
181 | " \n",
182 | " | 2 | \n",
183 | " 2YKDILJ | \n",
184 | " 1/6/2013 | \n",
185 | " M | \n",
186 | " U | \n",
187 | " 1991 | \n",
188 | " 748L | \n",
189 | " QZYX | \n",
190 | " 90QI | \n",
191 | " 0 | \n",
192 | " 0 | \n",
193 | " ... | \n",
194 | " 0 | \n",
195 | " 0 | \n",
196 | " 0 | \n",
197 | " 0 | \n",
198 | " 0 | \n",
199 | " 0 | \n",
200 | " 0 | \n",
201 | " 0 | \n",
202 | " 0 | \n",
203 | " 1 | \n",
204 | "
\n",
205 | " \n",
206 | " | 3 | \n",
207 | " 2S9E81J | \n",
208 | " 1/8/2019 | \n",
209 | " M | \n",
210 | " M | \n",
211 | " 1990 | \n",
212 | " 1X1H | \n",
213 | " BP09 | \n",
214 | " 56SI | \n",
215 | " 0 | \n",
216 | " 0 | \n",
217 | " ... | \n",
218 | " 0 | \n",
219 | " 0 | \n",
220 | " 0 | \n",
221 | " 1 | \n",
222 | " 0 | \n",
223 | " 0 | \n",
224 | " 0 | \n",
225 | " 0 | \n",
226 | " 0 | \n",
227 | " 0 | \n",
228 | "
\n",
229 | " \n",
230 | " | 4 | \n",
231 | " BHDYVFT | \n",
232 | " 1/8/2019 | \n",
233 | " M | \n",
234 | " M | \n",
235 | " 1990 | \n",
236 | " 748L | \n",
237 | " NO3L | \n",
238 | " T4MS | \n",
239 | " 0 | \n",
240 | " 0 | \n",
241 | " ... | \n",
242 | " 0 | \n",
243 | " 0 | \n",
244 | " 0 | \n",
245 | " 0 | \n",
246 | " 0 | \n",
247 | " 0 | \n",
248 | " 1 | \n",
249 | " 1 | \n",
250 | " 0 | \n",
251 | " 0 | \n",
252 | "
\n",
253 | " \n",
254 | "
\n",
255 | "
5 rows × 29 columns
\n",
256 | "
"
257 | ],
258 | "text/plain": [
259 | " ID join_date sex marital_status birth_year branch_code \\\n",
260 | "0 4WKQSBB 1/2/2019 F M 1987 1X1H \n",
261 | "1 CP5S02H 1/6/2019 F M 1981 UAOD \n",
262 | "2 2YKDILJ 1/6/2013 M U 1991 748L \n",
263 | "3 2S9E81J 1/8/2019 M M 1990 1X1H \n",
264 | "4 BHDYVFT 1/8/2019 M M 1990 748L \n",
265 | "\n",
266 | " occupation_code occupation_category_code P5DA RIBP ... AHXO BSTQ FM3X \\\n",
267 | "0 2A7I T4MS 0 0 ... 0 0 0 \n",
268 | "1 2A7I T4MS 0 0 ... 0 0 0 \n",
269 | "2 QZYX 90QI 0 0 ... 0 0 0 \n",
270 | "3 BP09 56SI 0 0 ... 0 0 0 \n",
271 | "4 NO3L T4MS 0 0 ... 0 0 0 \n",
272 | "\n",
273 | " K6QO QBOL JWFN JZ9D J9JW GHYX ECY3 \n",
274 | "0 1 0 0 0 0 0 0 \n",
275 | "1 1 0 0 0 0 0 0 \n",
276 | "2 0 0 0 0 0 0 1 \n",
277 | "3 1 0 0 0 0 0 0 \n",
278 | "4 0 0 0 1 1 0 0 \n",
279 | "\n",
280 | "[5 rows x 29 columns]"
281 | ]
282 | },
283 | "execution_count": 4,
284 | "metadata": {},
285 | "output_type": "execute_result"
286 | }
287 | ],
288 | "source": [
289 | "train.head()"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 5,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "data": {
299 | "text/html": [
300 | "\n",
301 | "\n",
314 | "
\n",
315 | " \n",
316 | " \n",
317 | " | \n",
318 | " ID | \n",
319 | " join_date | \n",
320 | " sex | \n",
321 | " marital_status | \n",
322 | " birth_year | \n",
323 | " branch_code | \n",
324 | " occupation_code | \n",
325 | " occupation_category_code | \n",
326 | " P5DA | \n",
327 | " RIBP | \n",
328 | " ... | \n",
329 | " AHXO | \n",
330 | " BSTQ | \n",
331 | " FM3X | \n",
332 | " K6QO | \n",
333 | " QBOL | \n",
334 | " JWFN | \n",
335 | " JZ9D | \n",
336 | " J9JW | \n",
337 | " GHYX | \n",
338 | " ECY3 | \n",
339 | "
\n",
340 | " \n",
341 | " \n",
342 | " \n",
343 | " | 0 | \n",
344 | " F86J5PC | \n",
345 | " 1/12/2018 | \n",
346 | " M | \n",
347 | " M | \n",
348 | " 1984 | \n",
349 | " 94KC | \n",
350 | " DZRV | \n",
351 | " 90QI | \n",
352 | " 0 | \n",
353 | " 0 | \n",
354 | " ... | \n",
355 | " 0 | \n",
356 | " 0 | \n",
357 | " 0 | \n",
358 | " 0 | \n",
359 | " 0 | \n",
360 | " 0 | \n",
361 | " 0 | \n",
362 | " 0 | \n",
363 | " 0 | \n",
364 | " 0 | \n",
365 | "
\n",
366 | " \n",
367 | " | 1 | \n",
368 | " H6141K3 | \n",
369 | " 1/10/2019 | \n",
370 | " M | \n",
371 | " M | \n",
372 | " 1996 | \n",
373 | " 1X1H | \n",
374 | " J9SY | \n",
375 | " 90QI | \n",
376 | " 0 | \n",
377 | " 0 | \n",
378 | " ... | \n",
379 | " 0 | \n",
380 | " 0 | \n",
381 | " 0 | \n",
382 | " 1 | \n",
383 | " 0 | \n",
384 | " 0 | \n",
385 | " 0 | \n",
386 | " 0 | \n",
387 | " 0 | \n",
388 | " 0 | \n",
389 | "
\n",
390 | " \n",
391 | " | 2 | \n",
392 | " RBAYUXZ | \n",
393 | " 1/1/2020 | \n",
394 | " F | \n",
395 | " W | \n",
396 | " 1968 | \n",
397 | " UAOD | \n",
398 | " 2A7I | \n",
399 | " T4MS | \n",
400 | " 0 | \n",
401 | " 0 | \n",
402 | " ... | \n",
403 | " 0 | \n",
404 | " 0 | \n",
405 | " 0 | \n",
406 | " 1 | \n",
407 | " 0 | \n",
408 | " 0 | \n",
409 | " 0 | \n",
410 | " 0 | \n",
411 | " 0 | \n",
412 | " 0 | \n",
413 | "
\n",
414 | " \n",
415 | " | 3 | \n",
416 | " KCBILBQ | \n",
417 | " 1/2/2019 | \n",
418 | " M | \n",
419 | " M | \n",
420 | " 1989 | \n",
421 | " 94KC | \n",
422 | " 2A7I | \n",
423 | " T4MS | \n",
424 | " 0 | \n",
425 | " 0 | \n",
426 | " ... | \n",
427 | " 0 | \n",
428 | " 0 | \n",
429 | " 0 | \n",
430 | " 0 | \n",
431 | " 0 | \n",
432 | " 0 | \n",
433 | " 0 | \n",
434 | " 0 | \n",
435 | " 0 | \n",
436 | " 0 | \n",
437 | "
\n",
438 | " \n",
439 | " | 4 | \n",
440 | " LSEC1ZJ | \n",
441 | " 1/2/2020 | \n",
442 | " F | \n",
443 | " M | \n",
444 | " 1982 | \n",
445 | " UAOD | \n",
446 | " 0KID | \n",
447 | " T4MS | \n",
448 | " 0 | \n",
449 | " 0 | \n",
450 | " ... | \n",
451 | " 0 | \n",
452 | " 0 | \n",
453 | " 0 | \n",
454 | " 0 | \n",
455 | " 0 | \n",
456 | " 0 | \n",
457 | " 1 | \n",
458 | " 0 | \n",
459 | " 0 | \n",
460 | " 0 | \n",
461 | "
\n",
462 | " \n",
463 | "
\n",
464 | "
5 rows × 29 columns
\n",
465 | "
"
466 | ],
467 | "text/plain": [
468 | " ID join_date sex marital_status birth_year branch_code \\\n",
469 | "0 F86J5PC 1/12/2018 M M 1984 94KC \n",
470 | "1 H6141K3 1/10/2019 M M 1996 1X1H \n",
471 | "2 RBAYUXZ 1/1/2020 F W 1968 UAOD \n",
472 | "3 KCBILBQ 1/2/2019 M M 1989 94KC \n",
473 | "4 LSEC1ZJ 1/2/2020 F M 1982 UAOD \n",
474 | "\n",
475 | " occupation_code occupation_category_code P5DA RIBP ... AHXO BSTQ FM3X \\\n",
476 | "0 DZRV 90QI 0 0 ... 0 0 0 \n",
477 | "1 J9SY 90QI 0 0 ... 0 0 0 \n",
478 | "2 2A7I T4MS 0 0 ... 0 0 0 \n",
479 | "3 2A7I T4MS 0 0 ... 0 0 0 \n",
480 | "4 0KID T4MS 0 0 ... 0 0 0 \n",
481 | "\n",
482 | " K6QO QBOL JWFN JZ9D J9JW GHYX ECY3 \n",
483 | "0 0 0 0 0 0 0 0 \n",
484 | "1 1 0 0 0 0 0 0 \n",
485 | "2 1 0 0 0 0 0 0 \n",
486 | "3 0 0 0 0 0 0 0 \n",
487 | "4 0 0 0 1 0 0 0 \n",
488 | "\n",
489 | "[5 rows x 29 columns]"
490 | ]
491 | },
492 | "execution_count": 5,
493 | "metadata": {},
494 | "output_type": "execute_result"
495 | }
496 | ],
497 | "source": [
498 | "test.head()"
499 | ]
500 | },
501 | {
502 | "cell_type": "markdown",
503 | "metadata": {},
504 | "source": [
505 | "# Data preparing"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 6,
511 | "metadata": {},
512 | "outputs": [],
513 | "source": [
514 | "replace_train=list(set(train['occupation_code'].unique().tolist())-set(test['occupation_code']))\n",
515 | "replace_test=list(set(test['occupation_code'].unique().tolist())-set(train['occupation_code']))\n",
516 | "\n",
517 | "train['occupation_code']=train['occupation_code'].replace(replace_train,np.nan)\n",
518 | "test['occupation_code']=test['occupation_code'].replace(replace_test,np.nan)\n",
519 | "train['occupation_code'].fillna(train['occupation_category_code'],inplace=True)\n",
520 | "test['occupation_code'].fillna(test['occupation_category_code'],inplace=True)"
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 7,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "#Adding amount of purchased products for each client(for test without 1 missing)\n",
530 | "#\n",
531 | "train['sum'] = train.iloc[:, 8:].T.sum()\n",
532 | "\n",
533 | "test['sum'] = test.iloc[:, 8:].T.sum()+1"
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": 8,
539 | "metadata": {},
540 | "outputs": [],
541 | "source": [
542 | "train.loc[train.marital_status == 'f', 'marital_status'] = 'F'"
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 9,
548 | "metadata": {},
549 | "outputs": [],
550 | "source": [
551 | "#Renaming features to prevent any repeating\n",
552 | "#\n",
553 | "train['sex'] += '_sex'\n",
554 | "train['marital_status'] += '_marital_status'\n",
555 | "train['branch_code'] += '_branch_code'\n",
556 | "train['occupation_code'] += '_occupation_code'\n",
557 | "train['occupation_category_code'] += '_occupation_category_code'\n",
558 | "test['sex'] += '_sex'\n",
559 | "test['marital_status'] += '_marital_status'\n",
560 | "test['branch_code'] += '_branch_code'\n",
561 | "test['occupation_code'] += '_occupation_code'\n",
562 | "test['occupation_category_code'] += '_occupation_category_code'"
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "execution_count": 10,
568 | "metadata": {},
569 | "outputs": [],
570 | "source": [
571 | "names_products = [\n",
572 | " 'P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ',\n",
573 | " 'LJR9', 'N2MW', 'AHXO', 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D',\n",
574 | " 'J9JW', 'GHYX', 'ECY3'\n",
575 | "]"
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "execution_count": 16,
581 | "metadata": {
582 | "ExecuteTime": {
583 | "end_time": "2020-08-07T19:29:05.039157Z",
584 | "start_time": "2020-08-07T19:29:04.237473Z"
585 | },
586 | "colab": {},
587 | "colab_type": "code",
588 | "id": "oxjj_QAfEyZw"
589 | },
590 | "outputs": [
591 | {
592 | "data": {
593 | "application/vnd.jupyter.widget-view+json": {
594 | "model_id": "6d7164fbcd5143bda3ad980cd86f12b6",
595 | "version_major": 2,
596 | "version_minor": 0
597 | },
598 | "text/plain": [
599 | "HBox(children=(FloatProgress(value=0.0, max=29132.0), HTML(value='')))"
600 | ]
601 | },
602 | "metadata": {},
603 | "output_type": "display_data"
604 | },
605 | {
606 | "name": "stdout",
607 | "output_type": "stream",
608 | "text": [
609 | "\n"
610 | ]
611 | }
612 | ],
613 | "source": [
614 | "#Make spliting train clients info. Trying to reproduce the situation with test\n",
615 | "#\n",
616 | "X_train = []\n",
617 | "X_train_columns = train.columns[:-1]\n",
618 | "df_train_true = []\n",
619 | "client_index = 0\n",
620 | "\n",
621 | "for line in tqdm_notebook(train.values):\n",
622 | "\n",
623 | " info = line[:8]\n",
624 | " info_products = line[8:-1]\n",
625 | " indexes = [k for k, i in enumerate(info_products) if i == 1]\n",
626 | "\n",
627 | " for i in indexes:\n",
628 | "\n",
629 | " client_index += 1\n",
630 | "\n",
631 | " for k in range(len(info_products)):\n",
632 | "\n",
633 | " if k == i:\n",
634 | "\n",
635 | " info_products_transformed = list(copy.copy(info_products))\n",
636 | " df_train_true.append(info_products)\n",
637 | " info_products_transformed[i] = 0\n",
638 | "\n",
639 | " X_train.append(\n",
640 | " list(info) + info_products_transformed +\n",
641 | " [X_train_columns[8 + k]] + [client_index])\n",
642 | "\n",
643 | "X_train = pd.DataFrame(X_train)\n",
644 | "df_train_true = pd.DataFrame(df_train_true)\n",
645 | "df_train_true.columns = [\n",
646 | " 'P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ',\n",
647 | " 'LJR9', 'N2MW', 'AHXO', 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D',\n",
648 | " 'J9JW', 'GHYX', 'ECY3'\n",
649 | "]\n",
650 | "X_train.columns = [\n",
651 | " 'ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',\n",
652 | " 'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',\n",
653 | " '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',\n",
654 | " 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3',\n",
655 | " 'product_pred', 'ID2'\n",
656 | "]"
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "execution_count": 17,
662 | "metadata": {
663 | "ExecuteTime": {
664 | "end_time": "2020-08-07T19:29:10.768064Z",
665 | "start_time": "2020-08-07T19:29:10.494815Z"
666 | },
667 | "colab": {},
668 | "colab_type": "code",
669 | "id": "URdSMgJeOnLE"
670 | },
671 | "outputs": [
672 | {
673 | "data": {
674 | "application/vnd.jupyter.widget-view+json": {
675 | "model_id": "4597b1fe75884ba6ba095cbddad5b809",
676 | "version_major": 2,
677 | "version_minor": 0
678 | },
679 | "text/plain": [
680 | "HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))"
681 | ]
682 | },
683 | "metadata": {},
684 | "output_type": "display_data"
685 | },
686 | {
687 | "name": "stdout",
688 | "output_type": "stream",
689 | "text": [
690 | "\n"
691 | ]
692 | }
693 | ],
694 | "source": [
695 | "#Make info about true values in data of predictions\n",
696 | "#\n",
697 | "X_test = []\n",
698 | "true_values = []\n",
699 | "client_index = 0\n",
700 | "for line in tqdm_notebook(test.values):\n",
701 | "\n",
702 | " client_index += 1\n",
703 | "\n",
704 | " info = line[:8]\n",
705 | " info_products = line[8:-1]\n",
706 | " indexes = [k for k, i in enumerate(info_products) if i == 1]\n",
707 | "\n",
708 | " X_test.append(list(info) + list(info_products) + [client_index])\n",
709 | "\n",
710 | " for true in test.columns[8:][indexes]:\n",
711 | " true_values.append(line[0] + ' X ' + true)\n",
712 | "\n",
713 | "X_test = pd.DataFrame(X_test)\n",
714 | "X_test.columns = [\n",
715 | " 'ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',\n",
716 | " 'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',\n",
717 | " '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',\n",
718 | " 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3',\n",
719 | " 'ID2'\n",
720 | "]"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": 18,
726 | "metadata": {
727 | "ExecuteTime": {
728 | "end_time": "2020-08-07T19:29:11.136106Z",
729 | "start_time": "2020-08-07T19:29:11.129621Z"
730 | }
731 | },
732 | "outputs": [
733 | {
734 | "data": {
735 | "text/plain": [
736 | "((29132, 30), (66353, 31))"
737 | ]
738 | },
739 | "execution_count": 18,
740 | "metadata": {},
741 | "output_type": "execute_result"
742 | }
743 | ],
744 | "source": [
745 | "#Checking shapes\n",
746 | "#\n",
747 | "train.shape, X_train.shape"
748 | ]
749 | },
750 | {
751 | "cell_type": "code",
752 | "execution_count": 19,
753 | "metadata": {
754 | "ExecuteTime": {
755 | "end_time": "2020-08-07T19:29:07.853673Z",
756 | "start_time": "2020-08-07T19:29:07.785086Z"
757 | }
758 | },
759 | "outputs": [
760 | {
761 | "data": {
762 | "text/html": [
763 | "\n",
764 | "\n",
777 | "
\n",
778 | " \n",
779 | " \n",
780 | " | \n",
781 | " ID | \n",
782 | " join_date | \n",
783 | " sex | \n",
784 | " marital_status | \n",
785 | " birth_year | \n",
786 | " branch_code | \n",
787 | " occupation_code | \n",
788 | " occupation_category_code | \n",
789 | " P5DA | \n",
790 | " RIBP | \n",
791 | " ... | \n",
792 | " FM3X | \n",
793 | " K6QO | \n",
794 | " QBOL | \n",
795 | " JWFN | \n",
796 | " JZ9D | \n",
797 | " J9JW | \n",
798 | " GHYX | \n",
799 | " ECY3 | \n",
800 | " product_pred | \n",
801 | " ID2 | \n",
802 | "
\n",
803 | " \n",
804 | " \n",
805 | " \n",
806 | " | 0 | \n",
807 | " 4WKQSBB | \n",
808 | " 1/2/2019 | \n",
809 | " F_sex | \n",
810 | " M_marital_status | \n",
811 | " 1987 | \n",
812 | " 1X1H_branch_code | \n",
813 | " 2A7I_occupation_code | \n",
814 | " T4MS_occupation_category_code | \n",
815 | " 0 | \n",
816 | " 0 | \n",
817 | " ... | \n",
818 | " 0 | \n",
819 | " 1 | \n",
820 | " 0 | \n",
821 | " 0 | \n",
822 | " 0 | \n",
823 | " 0 | \n",
824 | " 0 | \n",
825 | " 0 | \n",
826 | " RVSZ | \n",
827 | " 1 | \n",
828 | "
\n",
829 | " \n",
830 | " | 1 | \n",
831 | " 4WKQSBB | \n",
832 | " 1/2/2019 | \n",
833 | " F_sex | \n",
834 | " M_marital_status | \n",
835 | " 1987 | \n",
836 | " 1X1H_branch_code | \n",
837 | " 2A7I_occupation_code | \n",
838 | " T4MS_occupation_category_code | \n",
839 | " 0 | \n",
840 | " 0 | \n",
841 | " ... | \n",
842 | " 0 | \n",
843 | " 0 | \n",
844 | " 0 | \n",
845 | " 0 | \n",
846 | " 0 | \n",
847 | " 0 | \n",
848 | " 0 | \n",
849 | " 0 | \n",
850 | " K6QO | \n",
851 | " 2 | \n",
852 | "
\n",
853 | " \n",
854 | "
\n",
855 | "
2 rows × 31 columns
\n",
856 | "
"
857 | ],
858 | "text/plain": [
859 | " ID join_date sex marital_status birth_year branch_code \\\n",
860 | "0 4WKQSBB 1/2/2019 F_sex M_marital_status 1987 1X1H_branch_code \n",
861 | "1 4WKQSBB 1/2/2019 F_sex M_marital_status 1987 1X1H_branch_code \n",
862 | "\n",
863 | " occupation_code occupation_category_code P5DA RIBP ... FM3X \\\n",
864 | "0 2A7I_occupation_code T4MS_occupation_category_code 0 0 ... 0 \n",
865 | "1 2A7I_occupation_code T4MS_occupation_category_code 0 0 ... 0 \n",
866 | "\n",
867 | " K6QO QBOL JWFN JZ9D J9JW GHYX ECY3 product_pred ID2 \n",
868 | "0 1 0 0 0 0 0 0 RVSZ 1 \n",
869 | "1 0 0 0 0 0 0 0 K6QO 2 \n",
870 | "\n",
871 | "[2 rows x 31 columns]"
872 | ]
873 | },
874 | "execution_count": 19,
875 | "metadata": {},
876 | "output_type": "execute_result"
877 | }
878 | ],
879 | "source": [
880 | "#Look of train data after alters\n",
881 | "#\n",
882 | "X_train.head(2)"
883 | ]
884 | },
885 | {
886 | "cell_type": "code",
887 | "execution_count": 20,
888 | "metadata": {
889 | "ExecuteTime": {
890 | "end_time": "2020-08-07T19:29:12.081590Z",
891 | "start_time": "2020-08-07T19:29:12.053903Z"
892 | }
893 | },
894 | "outputs": [
895 | {
896 | "data": {
897 | "text/html": [
898 | "\n",
899 | "\n",
912 | "
\n",
913 | " \n",
914 | " \n",
915 | " | \n",
916 | " ID | \n",
917 | " join_date | \n",
918 | " sex | \n",
919 | " marital_status | \n",
920 | " birth_year | \n",
921 | " branch_code | \n",
922 | " occupation_code | \n",
923 | " occupation_category_code | \n",
924 | " P5DA | \n",
925 | " RIBP | \n",
926 | " ... | \n",
927 | " BSTQ | \n",
928 | " FM3X | \n",
929 | " K6QO | \n",
930 | " QBOL | \n",
931 | " JWFN | \n",
932 | " JZ9D | \n",
933 | " J9JW | \n",
934 | " GHYX | \n",
935 | " ECY3 | \n",
936 | " ID2 | \n",
937 | "
\n",
938 | " \n",
939 | " \n",
940 | " \n",
941 | " | 0 | \n",
942 | " F86J5PC | \n",
943 | " 1/12/2018 | \n",
944 | " M_sex | \n",
945 | " M_marital_status | \n",
946 | " 1984 | \n",
947 | " 94KC_branch_code | \n",
948 | " DZRV_occupation_code | \n",
949 | " 90QI_occupation_category_code | \n",
950 | " 0 | \n",
951 | " 0 | \n",
952 | " ... | \n",
953 | " 0 | \n",
954 | " 0 | \n",
955 | " 0 | \n",
956 | " 0 | \n",
957 | " 0 | \n",
958 | " 0 | \n",
959 | " 0 | \n",
960 | " 0 | \n",
961 | " 0 | \n",
962 | " 1 | \n",
963 | "
\n",
964 | " \n",
965 | " | 1 | \n",
966 | " H6141K3 | \n",
967 | " 1/10/2019 | \n",
968 | " M_sex | \n",
969 | " M_marital_status | \n",
970 | " 1996 | \n",
971 | " 1X1H_branch_code | \n",
972 | " J9SY_occupation_code | \n",
973 | " 90QI_occupation_category_code | \n",
974 | " 0 | \n",
975 | " 0 | \n",
976 | " ... | \n",
977 | " 0 | \n",
978 | " 0 | \n",
979 | " 1 | \n",
980 | " 0 | \n",
981 | " 0 | \n",
982 | " 0 | \n",
983 | " 0 | \n",
984 | " 0 | \n",
985 | " 0 | \n",
986 | " 2 | \n",
987 | "
\n",
988 | " \n",
989 | "
\n",
990 | "
2 rows × 30 columns
\n",
991 | "
"
992 | ],
993 | "text/plain": [
994 | " ID join_date sex marital_status birth_year branch_code \\\n",
995 | "0 F86J5PC 1/12/2018 M_sex M_marital_status 1984 94KC_branch_code \n",
996 | "1 H6141K3 1/10/2019 M_sex M_marital_status 1996 1X1H_branch_code \n",
997 | "\n",
998 | " occupation_code occupation_category_code P5DA RIBP ... BSTQ \\\n",
999 | "0 DZRV_occupation_code 90QI_occupation_category_code 0 0 ... 0 \n",
1000 | "1 J9SY_occupation_code 90QI_occupation_category_code 0 0 ... 0 \n",
1001 | "\n",
1002 | " FM3X K6QO QBOL JWFN JZ9D J9JW GHYX ECY3 ID2 \n",
1003 | "0 0 0 0 0 0 0 0 0 1 \n",
1004 | "1 0 1 0 0 0 0 0 0 2 \n",
1005 | "\n",
1006 | "[2 rows x 30 columns]"
1007 | ]
1008 | },
1009 | "execution_count": 20,
1010 | "metadata": {},
1011 | "output_type": "execute_result"
1012 | }
1013 | ],
1014 | "source": [
1015 | "#Look of test data after alters\n",
1016 | "#\n",
1017 | "X_test.head(2)"
1018 | ]
1019 | },
1020 | {
1021 | "cell_type": "code",
1022 | "execution_count": 21,
1023 | "metadata": {},
1024 | "outputs": [
1025 | {
1026 | "data": {
1027 | "text/html": [
1028 | "\n",
1029 | "\n",
1042 | "
\n",
1043 | " \n",
1044 | " \n",
1045 | " | \n",
1046 | " P5DA | \n",
1047 | " RIBP | \n",
1048 | " 8NN1 | \n",
1049 | " 7POT | \n",
1050 | " 66FJ | \n",
1051 | " GYSR | \n",
1052 | " SOP4 | \n",
1053 | " RVSZ | \n",
1054 | " PYUQ | \n",
1055 | " LJR9 | \n",
1056 | " ... | \n",
1057 | " AHXO | \n",
1058 | " BSTQ | \n",
1059 | " FM3X | \n",
1060 | " K6QO | \n",
1061 | " QBOL | \n",
1062 | " JWFN | \n",
1063 | " JZ9D | \n",
1064 | " J9JW | \n",
1065 | " GHYX | \n",
1066 | " ECY3 | \n",
1067 | "
\n",
1068 | " \n",
1069 | " \n",
1070 | " \n",
1071 | " | 0 | \n",
1072 | " 0 | \n",
1073 | " 0 | \n",
1074 | " 0 | \n",
1075 | " 0 | \n",
1076 | " 0 | \n",
1077 | " 0 | \n",
1078 | " 0 | \n",
1079 | " 1 | \n",
1080 | " 0 | \n",
1081 | " 0 | \n",
1082 | " ... | \n",
1083 | " 0 | \n",
1084 | " 0 | \n",
1085 | " 0 | \n",
1086 | " 1 | \n",
1087 | " 0 | \n",
1088 | " 0 | \n",
1089 | " 0 | \n",
1090 | " 0 | \n",
1091 | " 0 | \n",
1092 | " 0 | \n",
1093 | "
\n",
1094 | " \n",
1095 | " | 1 | \n",
1096 | " 0 | \n",
1097 | " 0 | \n",
1098 | " 0 | \n",
1099 | " 0 | \n",
1100 | " 0 | \n",
1101 | " 0 | \n",
1102 | " 0 | \n",
1103 | " 1 | \n",
1104 | " 0 | \n",
1105 | " 0 | \n",
1106 | " ... | \n",
1107 | " 0 | \n",
1108 | " 0 | \n",
1109 | " 0 | \n",
1110 | " 1 | \n",
1111 | " 0 | \n",
1112 | " 0 | \n",
1113 | " 0 | \n",
1114 | " 0 | \n",
1115 | " 0 | \n",
1116 | " 0 | \n",
1117 | "
\n",
1118 | " \n",
1119 | "
\n",
1120 | "
2 rows × 21 columns
\n",
1121 | "
"
1122 | ],
1123 | "text/plain": [
1124 | " P5DA RIBP 8NN1 7POT 66FJ GYSR SOP4 RVSZ PYUQ LJR9 ... AHXO \\\n",
1125 | "0 0 0 0 0 0 0 0 1 0 0 ... 0 \n",
1126 | "1 0 0 0 0 0 0 0 1 0 0 ... 0 \n",
1127 | "\n",
1128 | " BSTQ FM3X K6QO QBOL JWFN JZ9D J9JW GHYX ECY3 \n",
1129 | "0 0 0 1 0 0 0 0 0 0 \n",
1130 | "1 0 0 1 0 0 0 0 0 0 \n",
1131 | "\n",
1132 | "[2 rows x 21 columns]"
1133 | ]
1134 | },
1135 | "execution_count": 21,
1136 | "metadata": {},
1137 | "output_type": "execute_result"
1138 | }
1139 | ],
1140 | "source": [
1141 | "#It is true values for train data\n",
1142 | "#\n",
1143 | "df_train_true.head(2)"
1144 | ]
1145 | },
1146 | {
1147 | "cell_type": "markdown",
1148 | "metadata": {
1149 | "colab_type": "text",
1150 | "id": "s1KcI9I6g1de"
1151 | },
1152 | "source": [
1153 | "# Reshaping data"
1154 | ]
1155 | },
1156 | {
1157 | "cell_type": "code",
1158 | "execution_count": 22,
1159 | "metadata": {
1160 | "ExecuteTime": {
1161 | "end_time": "2020-08-07T19:29:13.305572Z",
1162 | "start_time": "2020-08-07T19:29:13.262176Z"
1163 | },
1164 | "code_folding": [],
1165 | "colab": {},
1166 | "colab_type": "code",
1167 | "id": "ttUKNdnAczTd"
1168 | },
1169 | "outputs": [],
1170 | "source": [
1171 | "#Make data with reshape\n",
1172 | "#\n",
1173 | "features_train = []\n",
1174 | "features_test = []\n",
1175 | "columns = []\n",
1176 | "\n",
1177 | "append_features = [\n",
1178 | " 'P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ',\n",
1179 | " 'LJR9', 'N2MW', 'AHXO', 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D',\n",
1180 | " 'J9JW', 'GHYX', 'ECY3', 'ID', 'ID2', 'join_date', 'sex', 'marital_status',\n",
1181 | " 'branch_code', 'occupation_code', 'occupation_category_code', 'birth_year'\n",
1182 | "]\n",
1183 | "for f in append_features:\n",
1184 | "\n",
1185 | " features_train.append(X_train[f].values.reshape(-1, 1))\n",
1186 | " features_test.append(X_test[f].values.reshape(-1, 1))\n",
1187 | "\n",
1188 | " columns.append(np.array([f]))\n",
1189 | "\n",
1190 | "y_train = X_train[['product_pred']]"
1191 | ]
1192 | },
1193 | {
1194 | "cell_type": "code",
1195 | "execution_count": 23,
1196 | "metadata": {
1197 | "ExecuteTime": {
1198 | "end_time": "2020-08-07T19:29:13.988566Z",
1199 | "start_time": "2020-08-07T19:29:13.870945Z"
1200 | },
1201 | "colab": {},
1202 | "colab_type": "code",
1203 | "id": "kWCK5LrfkPp-"
1204 | },
1205 | "outputs": [],
1206 | "source": [
1207 | "features_train = np.concatenate(features_train, axis=1)\n",
1208 | "features_test = np.concatenate(features_test, axis=1)\n",
1209 | "columns = np.concatenate(np.array(columns))\n",
1210 | "\n",
1211 | "X_train = pd.DataFrame(features_train)\n",
1212 | "X_train.columns = columns\n",
1213 | "\n",
1214 | "X_test = pd.DataFrame(features_test)\n",
1215 | "X_test.columns = columns"
1216 | ]
1217 | },
1218 | {
1219 | "cell_type": "markdown",
1220 | "metadata": {
1221 | "colab_type": "text",
1222 | "id": "YVUKKo3llI0y"
1223 | },
1224 | "source": [
1225 | "# Add new features"
1226 | ]
1227 | },
1228 | {
1229 | "cell_type": "code",
1230 | "execution_count": 24,
1231 | "metadata": {
1232 | "ExecuteTime": {
1233 | "end_time": "2020-08-07T19:29:17.239570Z",
1234 | "start_time": "2020-08-07T19:29:16.140241Z"
1235 | },
1236 | "colab": {},
1237 | "colab_type": "code",
1238 | "id": "NhfA7yullH76"
1239 | },
1240 | "outputs": [],
1241 | "source": [
1242 | "#Reformatting date of join to some features: year, month, day, day of week, day of year of join; add age of clients\n",
1243 | "#\n",
1244 | "for df in [X_train, X_test]:\n",
1245 | " df['join_date'] = pd.to_datetime(df.join_date, format='%d/%m/%Y')\n",
1246 | "\n",
1247 | " df['from_begin'] = (df.join_date - pd.datetime(2010, 1, 1)).dt.days\n",
1248 | "\n",
1249 | " df['join_day'] = df['join_date'].dt.day\n",
1250 | " df['join_month'] = df['join_date'].dt.month\n",
1251 | " df['join_year'] = df['join_date'].dt.year\n",
1252 | " df['dayofweek'] = df['join_date'].dt.weekday\n",
1253 | " df['day_of_year'] = df['join_date'].dt.dayofyear\n",
1254 | "\n",
1255 | " df['age'] = (df['join_year'] - df['birth_year']).astype(float)"
1256 | ]
1257 | },
1258 | {
1259 | "cell_type": "code",
1260 | "execution_count": 25,
1261 | "metadata": {
1262 | "ExecuteTime": {
1263 | "end_time": "2020-08-07T19:31:04.854098Z",
1264 | "start_time": "2020-08-07T19:31:04.805496Z"
1265 | },
1266 | "code_folding": []
1267 | },
1268 | "outputs": [],
1269 | "source": [
1270 | "#Concating train and test data\n",
1271 | "#\n",
1272 | "common = X_train.append(X_test)"
1273 | ]
1274 | },
1275 | {
1276 | "cell_type": "code",
1277 | "execution_count": 26,
1278 | "metadata": {},
1279 | "outputs": [],
1280 | "source": [
1281 | "common['branch_start']=pd.datetime.now().year-common.groupby('branch_code')['join_year'].transform('min')"
1282 | ]
1283 | },
1284 | {
1285 | "cell_type": "code",
1286 | "execution_count": 27,
1287 | "metadata": {},
1288 | "outputs": [],
1289 | "source": [
1290 | "def transform(df, row):\n",
1291 | " df[row[0]]=common.groupby(row[1])[row[2]].transform(row[3])"
1292 | ]
1293 | },
1294 | {
1295 | "cell_type": "code",
1296 | "execution_count": 28,
1297 | "metadata": {},
1298 | "outputs": [],
1299 | "source": [
1300 | "row_features = [['nuniq_people', 'branch_code', 'ID', 'nunique'],\n",
1301 | " ['nuniq_branch_in_year', 'join_year', 'branch_code', 'nunique'], \n",
1302 | " ['nuniq_year', 'branch_code', 'join_year', 'nunique'], \n",
1303 | " ['nuniq_month', 'branch_code', 'join_month', 'nunique'], \n",
1304 | " ['mean_age_in_branch', 'branch_code', 'age', 'mean'],\n",
1305 | " ['std_age_in_branch', 'branch_code', 'age', 'std'],\n",
1306 | " ['median_age_in_branch', 'branch_code', 'age', 'median'],\n",
1307 | " ['mean_age_in_occupation', 'occupation_code', 'age', 'mean'],\n",
1308 | " ['std_age_in_occupation', 'occupation_code', 'age', 'std'],\n",
1309 | " ['median_age_in_occupation', 'occupation_code', 'age', 'median']]\n",
1310 | "for row in row_features:\n",
1311 | " transform(common,row)"
1312 | ]
1313 | },
1314 | {
1315 | "cell_type": "code",
1316 | "execution_count": 29,
1317 | "metadata": {},
1318 | "outputs": [],
1319 | "source": [
1320 | "common['birth_year_binary']= pd.cut(common['birth_year'], bins=5)\n",
1321 | "\n",
1322 | "common['branch_ocupation']=common['branch_code']+'_'+common['occupation_code']\n",
1323 | "common['branch_ocupcode']=common['branch_code']+'_'+common['occupation_category_code']"
1324 | ]
1325 | },
1326 | {
1327 | "cell_type": "code",
1328 | "execution_count": 30,
1329 | "metadata": {},
1330 | "outputs": [],
1331 | "source": [
1332 | "common['Number_of_Insurance_Bought']=common.iloc[:, :21].sum(axis=1)\n",
1333 | "\n",
1334 | "def mapper(common):\n",
1335 | " if common['Number_of_Insurance_Bought']==1:\n",
1336 | " return 'One'\n",
1337 | " elif (common['Number_of_Insurance_Bought']>1) & (common['Number_of_Insurance_Bought']<5):\n",
1338 | " return 'Medium'\n",
1339 | " elif (common['Number_of_Insurance_Bought']>4 )& (common['Number_of_Insurance_Bought']<8):\n",
1340 | " return 'High' \n",
1341 | " else:\n",
1342 | " return 'Too High' \n",
1343 | "common['Insurance_Count']=common.apply(lambda common:mapper(common) ,axis = 1)\n",
1344 | "del common['Number_of_Insurance_Bought']"
1345 | ]
1346 | },
1347 | {
1348 | "cell_type": "code",
1349 | "execution_count": 31,
1350 | "metadata": {},
1351 | "outputs": [],
1352 | "source": [
1353 | "for name in [\n",
1354 | " 'sex', 'marital_status', 'occupation_code', 'occupation_category_code',\n",
1355 | " 'birth_year_binary', 'branch_ocupation', 'branch_ocupcode', 'Insurance_Count'\n",
1356 | "]:\n",
1357 | " freq = (common.groupby(name).size()) / len(common)\n",
1358 | " common[name + '_freq'] = common[name].apply(lambda x: freq[x])\n",
1359 | " common[name + '_freq'] = common[name + '_freq'].astype(float)"
1360 | ]
1361 | },
1362 | {
1363 | "cell_type": "code",
1364 | "execution_count": 32,
1365 | "metadata": {},
1366 | "outputs": [],
1367 | "source": [
1368 | "le_ins = LabelEncoder()\n",
1369 | "common['Insurance_Count'] = le_ins.fit_transform(common['Insurance_Count'])"
1370 | ]
1371 | },
1372 | {
1373 | "cell_type": "code",
1374 | "execution_count": 33,
1375 | "metadata": {},
1376 | "outputs": [
1377 | {
1378 | "data": {
1379 | "application/vnd.jupyter.widget-view+json": {
1380 | "model_id": "814282d1539a46fe838138df39f56a12",
1381 | "version_major": 2,
1382 | "version_minor": 0
1383 | },
1384 | "text/plain": [
1385 | "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
1386 | ]
1387 | },
1388 | "metadata": {},
1389 | "output_type": "display_data"
1390 | },
1391 | {
1392 | "name": "stdout",
1393 | "output_type": "stream",
1394 | "text": [
1395 | "\n"
1396 | ]
1397 | }
1398 | ],
1399 | "source": [
1400 | "for i,row in tqdm_notebook(common.iterrows()):\n",
1401 | " res = []\n",
1402 | " for c in names_products:\n",
1403 | " if row[c] == 1:\n",
1404 | " res.append(c)\n",
1405 | " common.loc[common.index == i, 'product_comb'] = '_'.join(sorted(res))\n",
1406 | "common['product_comb'] = le_ins.fit_transform(common['product_comb'])"
1407 | ]
1408 | },
1409 | {
1410 | "cell_type": "code",
1411 | "execution_count": 34,
1412 | "metadata": {},
1413 | "outputs": [
1414 | {
1415 | "data": {
1416 | "application/vnd.jupyter.widget-view+json": {
1417 | "model_id": "e2b5ae2003cc4d2396effe9199d79d3a",
1418 | "version_major": 2,
1419 | "version_minor": 0
1420 | },
1421 | "text/plain": [
1422 | "HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))"
1423 | ]
1424 | },
1425 | "metadata": {},
1426 | "output_type": "display_data"
1427 | },
1428 | {
1429 | "name": "stdout",
1430 | "output_type": "stream",
1431 | "text": [
1432 | "\n"
1433 | ]
1434 | }
1435 | ],
1436 | "source": [
1437 | "for col in tqdm_notebook(names_products):\n",
1438 | " for cols in names_products:\n",
1439 | " if col!=cols:\n",
1440 | " common[col+'_'+cols]=common.groupby(col)[cols].transform(sum)"
1441 | ]
1442 | },
1443 | {
1444 | "cell_type": "code",
1445 | "execution_count": 35,
1446 | "metadata": {},
1447 | "outputs": [],
1448 | "source": [
1449 | "common.drop(\n",
1450 | " columns=['birth_year_binary', 'branch_ocupation', 'branch_ocupcode'],\n",
1451 | " inplace=True)"
1452 | ]
1453 | },
1454 | {
1455 | "cell_type": "code",
1456 | "execution_count": 36,
1457 | "metadata": {
1458 | "scrolled": true
1459 | },
1460 | "outputs": [
1461 | {
1462 | "data": {
1463 | "application/vnd.jupyter.widget-view+json": {
1464 | "model_id": "a1a0bab252f64d8992e09fd5c35fe805",
1465 | "version_major": 2,
1466 | "version_minor": 0
1467 | },
1468 | "text/plain": [
1469 | "HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))"
1470 | ]
1471 | },
1472 | "metadata": {},
1473 | "output_type": "display_data"
1474 | },
1475 | {
1476 | "name": "stdout",
1477 | "output_type": "stream",
1478 | "text": [
1479 | "\n"
1480 | ]
1481 | }
1482 | ],
1483 | "source": [
1484 | "#Approximate counting of days after open branches and after first buy of each product\n",
1485 | "#\n",
1486 | "for code in tqdm_notebook(common.branch_code.unique()):\n",
1487 | " common.loc[common.branch_code == code, 'from_arise_branch'] = \\\n",
1488 | " common.loc[common.branch_code == code, 'from_begin'] - common.loc[common.branch_code == code, 'from_begin'].min()\n",
1489 | " for product in names_products:\n",
1490 | " common.loc[common.branch_code == code, 'from_arise_product_'+product+'_in_branch'] = \\\n",
1491 | " common.loc[common.branch_code == code, 'from_begin'] - common.loc[(common.branch_code == code)&(common[product]==1), 'from_begin'].min()"
1492 | ]
1493 | },
1494 | {
1495 | "cell_type": "code",
1496 | "execution_count": 37,
1497 | "metadata": {
1498 | "code_folding": []
1499 | },
1500 | "outputs": [
1501 | {
1502 | "data": {
1503 | "application/vnd.jupyter.widget-view+json": {
1504 | "model_id": "5956fe24f4ff4ed19eac512f2988de2f",
1505 | "version_major": 2,
1506 | "version_minor": 0
1507 | },
1508 | "text/plain": [
1509 | "HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))"
1510 | ]
1511 | },
1512 | "metadata": {},
1513 | "output_type": "display_data"
1514 | },
1515 | {
1516 | "name": "stdout",
1517 | "output_type": "stream",
1518 | "text": [
1519 | "\n"
1520 | ]
1521 | }
1522 | ],
1523 | "source": [
1524 | "for product in tqdm_notebook(names_products):\n",
1525 | " common['from_arise_product_'+product] = (common['join_date'] - common.loc[common[product] == 1, 'join_date'].min()).dt.days\n",
1526 | " common[product+'_'+'sum_in_branch']=common.groupby('branch_code')[product].transform(sum)\n",
1527 | " common[product+'_'+'_age_mean']=common.groupby(product)['age'].transform('mean')\n",
1528 | " common[product+'_'+'_age_std']=common.groupby(product)['age'].transform('std')\n",
1529 | " common[product+'_'+'_age_median']=common.groupby(product)['age'].transform('median')\n",
1530 | " common[product+'_'+'_sum_join_year']=common.groupby('join_year')[product].transform(sum)"
1531 | ]
1532 | },
1533 | {
1534 | "cell_type": "code",
1535 | "execution_count": 41,
1536 | "metadata": {},
1537 | "outputs": [],
1538 | "source": [
1539 | "#Splitting concating data to train and test\n",
1540 | "#\n",
1541 | "X_train = common[:66353]\n",
1542 | "X_test = common[66353:]"
1543 | ]
1544 | },
1545 | {
1546 | "cell_type": "markdown",
1547 | "metadata": {
1548 | "colab_type": "text",
1549 | "id": "GDTy7qyulLoP"
1550 | },
1551 | "source": [
1552 | "# Encoding"
1553 | ]
1554 | },
1555 | {
1556 | "cell_type": "code",
1557 | "execution_count": 46,
1558 | "metadata": {},
1559 | "outputs": [],
1560 | "source": [
1561 | "#Encoding of target values that look like names of missing products\n",
1562 | "#\n",
1563 | "le = LabelEncoder()\n",
1564 | "le.fit(y_train.iloc[:, 0])\n",
1565 | "\n",
1566 | "y_train = pd.DataFrame(le.transform(y_train.iloc[:, 0]))\n",
1567 | "y_train.columns = ['target']"
1568 | ]
1569 | },
1570 | {
1571 | "cell_type": "code",
1572 | "execution_count": 48,
1573 | "metadata": {
1574 | "code_folding": []
1575 | },
1576 | "outputs": [],
1577 | "source": [
1578 | "#Merging amount of purchased products\n",
1579 | "#\n",
1580 | "X_train = X_train.merge(train[['ID', 'sum']])\n",
1581 | "X_test = X_test.merge(test[['ID', 'sum']])"
1582 | ]
1583 | },
1584 | {
1585 | "cell_type": "code",
1586 | "execution_count": 49,
1587 | "metadata": {
1588 | "code_folding": []
1589 | },
1590 | "outputs": [],
1591 | "source": [
1592 | "#Adding features with replacing to string type to use them like cat_features\n",
1593 | "#\n",
1594 | "for df in [X_train, X_test]:\n",
1595 | " df['dayofweek_cat'] = df['dayofweek'].astype(str)\n",
1596 | " df['from_begin_cat'] = df['from_begin'].astype(str)\n",
1597 | " df['birth_year'] = df['birth_year'].astype(str)\n",
1598 | " df['join_year_cat'] = df['join_year'].astype(str)\n",
1599 | " df['sum_cat'] = df['sum'].astype(str)\n",
1600 | " df['day_of_year_cat'] = df['day_of_year'].astype(str)"
1601 | ]
1602 | },
1603 | {
1604 | "cell_type": "markdown",
1605 | "metadata": {
1606 | "colab_type": "text",
1607 | "id": "qn9zHq0iqhA3"
1608 | },
1609 | "source": [
1610 | "# Model"
1611 | ]
1612 | },
1613 | {
1614 | "cell_type": "markdown",
1615 | "metadata": {},
1616 | "source": [
1617 | "## Model main"
1618 | ]
1619 | },
1620 | {
1621 | "cell_type": "code",
1622 | "execution_count": 50,
1623 | "metadata": {},
1624 | "outputs": [],
1625 | "source": [
1626 | "#Names of cat_features\n",
1627 | "#\n",
1628 | "cat_features = [\n",
1629 | " 'sex',\n",
1630 | " 'marital_status',\n",
1631 | " 'branch_code',\n",
1632 | " 'occupation_category_code',\n",
1633 | " 'occupation_code',\n",
1634 | " 'dayofweek_cat',\n",
1635 | " 'from_begin_cat',\n",
1636 | " 'sum_cat',\n",
1637 | " 'birth_year',\n",
1638 | " 'join_year_cat'\n",
1639 | "]"
1640 | ]
1641 | },
1642 | {
1643 | "cell_type": "code",
1644 | "execution_count": 51,
1645 | "metadata": {
1646 | "code_folding": [],
1647 | "scrolled": true
1648 | },
1649 | "outputs": [
1650 | {
1651 | "name": "stdout",
1652 | "output_type": "stream",
1653 | "text": [
1654 | "Fold 1\n",
1655 | "0:\tlearn: 2.9445576\ttest: 2.9428904\tbest: 2.9428904 (0)\ttotal: 77.7ms\tremaining: 19m 26s\n",
1656 | "100:\tlearn: 0.9699479\ttest: 0.9618732\tbest: 0.9618732 (100)\ttotal: 5.8s\tremaining: 14m 15s\n",
1657 | "200:\tlearn: 0.6838025\ttest: 0.6798409\tbest: 0.6798409 (200)\ttotal: 11.4s\tremaining: 13m 58s\n",
1658 | "300:\tlearn: 0.5793816\ttest: 0.5790330\tbest: 0.5790330 (300)\ttotal: 16.8s\tremaining: 13m 41s\n",
1659 | "400:\tlearn: 0.5248620\ttest: 0.5276316\tbest: 0.5276316 (400)\ttotal: 22s\tremaining: 13m 21s\n",
1660 | "500:\tlearn: 0.4913727\ttest: 0.4965360\tbest: 0.4965360 (500)\ttotal: 27.3s\tremaining: 13m 11s\n",
1661 | "600:\tlearn: 0.4687746\ttest: 0.4759556\tbest: 0.4759556 (600)\ttotal: 32.6s\tremaining: 13m 1s\n",
1662 | "700:\tlearn: 0.4517229\ttest: 0.4605643\tbest: 0.4605643 (700)\ttotal: 37.9s\tremaining: 12m 53s\n",
1663 | "800:\tlearn: 0.4381602\ttest: 0.4486232\tbest: 0.4486232 (800)\ttotal: 43.2s\tremaining: 12m 45s\n",
1664 | "900:\tlearn: 0.4274216\ttest: 0.4394789\tbest: 0.4394789 (900)\ttotal: 48.4s\tremaining: 12m 37s\n",
1665 | "1000:\tlearn: 0.4178731\ttest: 0.4313687\tbest: 0.4313687 (1000)\ttotal: 53.6s\tremaining: 12m 29s\n",
1666 | "1100:\tlearn: 0.4100848\ttest: 0.4250238\tbest: 0.4250238 (1100)\ttotal: 58.9s\tremaining: 12m 23s\n",
1667 | "1200:\tlearn: 0.4027702\ttest: 0.4192038\tbest: 0.4192038 (1200)\ttotal: 1m 4s\tremaining: 12m 17s\n",
1668 | "1300:\tlearn: 0.3964511\ttest: 0.4144097\tbest: 0.4144097 (1300)\ttotal: 1m 9s\tremaining: 12m 11s\n",
1669 | "1400:\tlearn: 0.3907707\ttest: 0.4101905\tbest: 0.4101905 (1400)\ttotal: 1m 14s\tremaining: 12m 5s\n",
1670 | "1500:\tlearn: 0.3856775\ttest: 0.4065659\tbest: 0.4065659 (1500)\ttotal: 1m 19s\tremaining: 11m 59s\n",
1671 | "1600:\tlearn: 0.3810719\ttest: 0.4034603\tbest: 0.4034603 (1600)\ttotal: 1m 25s\tremaining: 11m 53s\n",
1672 | "1700:\tlearn: 0.3768876\ttest: 0.4007674\tbest: 0.4007674 (1700)\ttotal: 1m 30s\tremaining: 11m 47s\n",
1673 | "1800:\tlearn: 0.3732356\ttest: 0.3986237\tbest: 0.3986237 (1800)\ttotal: 1m 35s\tremaining: 11m 41s\n",
1674 | "1900:\tlearn: 0.3697762\ttest: 0.3965820\tbest: 0.3965820 (1900)\ttotal: 1m 40s\tremaining: 11m 34s\n",
1675 | "2000:\tlearn: 0.3666192\ttest: 0.3947294\tbest: 0.3947294 (2000)\ttotal: 1m 45s\tremaining: 11m 28s\n",
1676 | "2100:\tlearn: 0.3636188\ttest: 0.3931982\tbest: 0.3931982 (2100)\ttotal: 1m 51s\tremaining: 11m 21s\n",
1677 | "2200:\tlearn: 0.3609654\ttest: 0.3918206\tbest: 0.3918206 (2200)\ttotal: 1m 56s\tremaining: 11m 15s\n",
1678 | "2300:\tlearn: 0.3584496\ttest: 0.3905929\tbest: 0.3905929 (2300)\ttotal: 2m 1s\tremaining: 11m 9s\n",
1679 | "2400:\tlearn: 0.3560133\ttest: 0.3894057\tbest: 0.3894057 (2400)\ttotal: 2m 6s\tremaining: 11m 2s\n",
1680 | "2500:\tlearn: 0.3538295\ttest: 0.3882776\tbest: 0.3882776 (2500)\ttotal: 2m 11s\tremaining: 10m 56s\n",
1681 | "2600:\tlearn: 0.3518477\ttest: 0.3873623\tbest: 0.3873623 (2600)\ttotal: 2m 16s\tremaining: 10m 49s\n",
1682 | "2700:\tlearn: 0.3497690\ttest: 0.3863694\tbest: 0.3863694 (2700)\ttotal: 2m 21s\tremaining: 10m 43s\n",
1683 | "2800:\tlearn: 0.3478093\ttest: 0.3854819\tbest: 0.3854819 (2800)\ttotal: 2m 26s\tremaining: 10m 37s\n",
1684 | "2900:\tlearn: 0.3460032\ttest: 0.3848065\tbest: 0.3848065 (2900)\ttotal: 2m 31s\tremaining: 10m 31s\n",
1685 | "3000:\tlearn: 0.3442535\ttest: 0.3841118\tbest: 0.3841080 (2999)\ttotal: 2m 36s\tremaining: 10m 25s\n",
1686 | "3100:\tlearn: 0.3425588\ttest: 0.3834379\tbest: 0.3834379 (3100)\ttotal: 2m 41s\tremaining: 10m 19s\n",
1687 | "3200:\tlearn: 0.3410767\ttest: 0.3829064\tbest: 0.3829029 (3198)\ttotal: 2m 46s\tremaining: 10m 13s\n",
1688 | "3300:\tlearn: 0.3395174\ttest: 0.3823415\tbest: 0.3823415 (3300)\ttotal: 2m 51s\tremaining: 10m 6s\n",
1689 | "3400:\tlearn: 0.3379953\ttest: 0.3817752\tbest: 0.3817748 (3399)\ttotal: 2m 56s\tremaining: 10m 1s\n",
1690 | "3500:\tlearn: 0.3365688\ttest: 0.3813758\tbest: 0.3813758 (3500)\ttotal: 3m 1s\tremaining: 9m 55s\n",
1691 | "3600:\tlearn: 0.3352206\ttest: 0.3809488\tbest: 0.3809488 (3600)\ttotal: 3m 6s\tremaining: 9m 49s\n",
1692 | "3700:\tlearn: 0.3339196\ttest: 0.3804761\tbest: 0.3804761 (3700)\ttotal: 3m 11s\tremaining: 9m 43s\n",
1693 | "3800:\tlearn: 0.3326430\ttest: 0.3801036\tbest: 0.3801036 (3800)\ttotal: 3m 15s\tremaining: 9m 37s\n",
1694 | "3900:\tlearn: 0.3313024\ttest: 0.3797104\tbest: 0.3797104 (3900)\ttotal: 3m 20s\tremaining: 9m 31s\n",
1695 | "4000:\tlearn: 0.3301287\ttest: 0.3793236\tbest: 0.3793236 (4000)\ttotal: 3m 25s\tremaining: 9m 25s\n",
1696 | "4100:\tlearn: 0.3288722\ttest: 0.3789769\tbest: 0.3789769 (4100)\ttotal: 3m 30s\tremaining: 9m 19s\n",
1697 | "4200:\tlearn: 0.3275755\ttest: 0.3786657\tbest: 0.3786657 (4200)\ttotal: 3m 35s\tremaining: 9m 14s\n",
1698 | "4300:\tlearn: 0.3263775\ttest: 0.3783198\tbest: 0.3783198 (4300)\ttotal: 3m 40s\tremaining: 9m 8s\n",
1699 | "4400:\tlearn: 0.3252476\ttest: 0.3780543\tbest: 0.3780543 (4400)\ttotal: 3m 45s\tremaining: 9m 2s\n",
1700 | "4500:\tlearn: 0.3241109\ttest: 0.3777310\tbest: 0.3777310 (4500)\ttotal: 3m 50s\tremaining: 8m 56s\n",
1701 | "4600:\tlearn: 0.3229418\ttest: 0.3775090\tbest: 0.3775086 (4599)\ttotal: 3m 55s\tremaining: 8m 51s\n",
1702 | "4700:\tlearn: 0.3218174\ttest: 0.3772874\tbest: 0.3772870 (4699)\ttotal: 4m\tremaining: 8m 45s\n",
1703 | "4800:\tlearn: 0.3207294\ttest: 0.3770379\tbest: 0.3770379 (4800)\ttotal: 4m 4s\tremaining: 8m 40s\n",
1704 | "4900:\tlearn: 0.3196369\ttest: 0.3767789\tbest: 0.3767789 (4900)\ttotal: 4m 9s\tremaining: 8m 34s\n",
1705 | "5000:\tlearn: 0.3185608\ttest: 0.3765598\tbest: 0.3765598 (5000)\ttotal: 4m 14s\tremaining: 8m 29s\n",
1706 | "5100:\tlearn: 0.3174650\ttest: 0.3762987\tbest: 0.3762987 (5100)\ttotal: 4m 19s\tremaining: 8m 23s\n",
1707 | "5200:\tlearn: 0.3164677\ttest: 0.3760948\tbest: 0.3760948 (5200)\ttotal: 4m 24s\tremaining: 8m 18s\n",
1708 | "5300:\tlearn: 0.3155009\ttest: 0.3759666\tbest: 0.3759542 (5289)\ttotal: 4m 29s\tremaining: 8m 13s\n",
1709 | "5400:\tlearn: 0.3145589\ttest: 0.3758027\tbest: 0.3758027 (5400)\ttotal: 4m 34s\tremaining: 8m 7s\n",
1710 | "5500:\tlearn: 0.3136438\ttest: 0.3756387\tbest: 0.3756387 (5500)\ttotal: 4m 39s\tremaining: 8m 2s\n",
1711 | "5600:\tlearn: 0.3126727\ttest: 0.3754576\tbest: 0.3754576 (5600)\ttotal: 4m 44s\tremaining: 7m 56s\n",
1712 | "5700:\tlearn: 0.3117880\ttest: 0.3752937\tbest: 0.3752937 (5700)\ttotal: 4m 48s\tremaining: 7m 51s\n",
1713 | "5800:\tlearn: 0.3107715\ttest: 0.3751184\tbest: 0.3751184 (5800)\ttotal: 4m 53s\tremaining: 7m 46s\n",
1714 | "5900:\tlearn: 0.3099772\ttest: 0.3749611\tbest: 0.3749544 (5897)\ttotal: 4m 58s\tremaining: 7m 40s\n",
1715 | "6000:\tlearn: 0.3090775\ttest: 0.3748101\tbest: 0.3748101 (6000)\ttotal: 5m 3s\tremaining: 7m 35s\n",
1716 | "6100:\tlearn: 0.3082600\ttest: 0.3746978\tbest: 0.3746978 (6100)\ttotal: 5m 8s\tremaining: 7m 30s\n",
1717 | "6200:\tlearn: 0.3073831\ttest: 0.3746079\tbest: 0.3746074 (6199)\ttotal: 5m 13s\tremaining: 7m 24s\n",
1718 | "6300:\tlearn: 0.3065163\ttest: 0.3744541\tbest: 0.3744541 (6300)\ttotal: 5m 18s\tremaining: 7m 19s\n",
1719 | "6400:\tlearn: 0.3056747\ttest: 0.3743266\tbest: 0.3743266 (6400)\ttotal: 5m 23s\tremaining: 7m 14s\n",
1720 | "6500:\tlearn: 0.3048217\ttest: 0.3741856\tbest: 0.3741856 (6500)\ttotal: 5m 28s\tremaining: 7m 8s\n",
1721 | "6600:\tlearn: 0.3039494\ttest: 0.3740515\tbest: 0.3740515 (6600)\ttotal: 5m 33s\tremaining: 7m 3s\n",
1722 | "6700:\tlearn: 0.3031394\ttest: 0.3739326\tbest: 0.3739326 (6700)\ttotal: 5m 38s\tremaining: 6m 58s\n",
1723 | "6800:\tlearn: 0.3023120\ttest: 0.3738246\tbest: 0.3738200 (6792)\ttotal: 5m 42s\tremaining: 6m 53s\n",
1724 | "6900:\tlearn: 0.3014222\ttest: 0.3737430\tbest: 0.3737430 (6900)\ttotal: 5m 47s\tremaining: 6m 48s\n",
1725 | "7000:\tlearn: 0.3006848\ttest: 0.3736238\tbest: 0.3736229 (6999)\ttotal: 5m 52s\tremaining: 6m 43s\n",
1726 | "7100:\tlearn: 0.2999227\ttest: 0.3735453\tbest: 0.3735441 (7095)\ttotal: 5m 57s\tremaining: 6m 37s\n",
1727 | "7200:\tlearn: 0.2990862\ttest: 0.3734714\tbest: 0.3734714 (7200)\ttotal: 6m 2s\tremaining: 6m 32s\n",
1728 | "7300:\tlearn: 0.2982340\ttest: 0.3733713\tbest: 0.3733706 (7296)\ttotal: 6m 7s\tremaining: 6m 27s\n",
1729 | "7400:\tlearn: 0.2974178\ttest: 0.3732681\tbest: 0.3732632 (7393)\ttotal: 6m 12s\tremaining: 6m 22s\n",
1730 | "7500:\tlearn: 0.2965426\ttest: 0.3731930\tbest: 0.3731913 (7499)\ttotal: 6m 17s\tremaining: 6m 17s\n",
1731 | "7600:\tlearn: 0.2957518\ttest: 0.3731020\tbest: 0.3731008 (7597)\ttotal: 6m 22s\tremaining: 6m 12s\n",
1732 | "7700:\tlearn: 0.2949449\ttest: 0.3730386\tbest: 0.3730376 (7695)\ttotal: 6m 27s\tremaining: 6m 7s\n",
1733 | "7800:\tlearn: 0.2941542\ttest: 0.3730005\tbest: 0.3729990 (7765)\ttotal: 6m 32s\tremaining: 6m 2s\n",
1734 | "7900:\tlearn: 0.2934190\ttest: 0.3729462\tbest: 0.3729462 (7900)\ttotal: 6m 37s\tremaining: 5m 56s\n",
1735 | "8000:\tlearn: 0.2926701\ttest: 0.3729496\tbest: 0.3729383 (7953)\ttotal: 6m 42s\tremaining: 5m 51s\n",
1736 | "8100:\tlearn: 0.2919350\ttest: 0.3729189\tbest: 0.3729189 (8100)\ttotal: 6m 47s\tremaining: 5m 46s\n",
1737 | "8200:\tlearn: 0.2911466\ttest: 0.3728935\tbest: 0.3728867 (8195)\ttotal: 6m 52s\tremaining: 5m 41s\n",
1738 | "8300:\tlearn: 0.2904501\ttest: 0.3728195\tbest: 0.3728104 (8281)\ttotal: 6m 57s\tremaining: 5m 36s\n",
1739 | "8400:\tlearn: 0.2897403\ttest: 0.3727076\tbest: 0.3727076 (8400)\ttotal: 7m 2s\tremaining: 5m 31s\n",
1740 | "8500:\tlearn: 0.2889573\ttest: 0.3726123\tbest: 0.3726123 (8500)\ttotal: 7m 7s\tremaining: 5m 26s\n",
1741 | "8600:\tlearn: 0.2882222\ttest: 0.3725166\tbest: 0.3725107 (8592)\ttotal: 7m 11s\tremaining: 5m 21s\n",
1742 | "8700:\tlearn: 0.2874155\ttest: 0.3724603\tbest: 0.3724588 (8696)\ttotal: 7m 16s\tremaining: 5m 16s\n"
1743 | ]
1744 | },
1745 | {
1746 | "name": "stdout",
1747 | "output_type": "stream",
1748 | "text": [
1749 | "8800:\tlearn: 0.2866961\ttest: 0.3724647\tbest: 0.3724497 (8715)\ttotal: 7m 21s\tremaining: 5m 11s\n",
1750 | "8900:\tlearn: 0.2859461\ttest: 0.3724325\tbest: 0.3724298 (8897)\ttotal: 7m 26s\tremaining: 5m 6s\n",
1751 | "9000:\tlearn: 0.2852204\ttest: 0.3723815\tbest: 0.3723774 (8996)\ttotal: 7m 31s\tremaining: 5m 1s\n",
1752 | "9100:\tlearn: 0.2845444\ttest: 0.3723280\tbest: 0.3723280 (9100)\ttotal: 7m 36s\tremaining: 4m 56s\n",
1753 | "9200:\tlearn: 0.2837712\ttest: 0.3722565\tbest: 0.3722565 (9200)\ttotal: 7m 41s\tremaining: 4m 50s\n",
1754 | "9300:\tlearn: 0.2830607\ttest: 0.3722009\tbest: 0.3722009 (9300)\ttotal: 7m 46s\tremaining: 4m 45s\n",
1755 | "9400:\tlearn: 0.2823363\ttest: 0.3721711\tbest: 0.3721694 (9358)\ttotal: 7m 51s\tremaining: 4m 40s\n",
1756 | "9500:\tlearn: 0.2816317\ttest: 0.3721397\tbest: 0.3721264 (9494)\ttotal: 7m 56s\tremaining: 4m 35s\n",
1757 | "9600:\tlearn: 0.2809993\ttest: 0.3721344\tbest: 0.3721242 (9538)\ttotal: 8m 1s\tremaining: 4m 30s\n",
1758 | "9700:\tlearn: 0.2803175\ttest: 0.3721418\tbest: 0.3721211 (9633)\ttotal: 8m 6s\tremaining: 4m 25s\n",
1759 | "9800:\tlearn: 0.2796034\ttest: 0.3721264\tbest: 0.3721145 (9724)\ttotal: 8m 11s\tremaining: 4m 20s\n",
1760 | "9900:\tlearn: 0.2788351\ttest: 0.3721505\tbest: 0.3721145 (9724)\ttotal: 8m 16s\tremaining: 4m 15s\n",
1761 | "bestTest = 0.3721145043\n",
1762 | "bestIteration = 9724\n",
1763 | "Shrink model to first 9725 iterations.\n",
1764 | "Fold 2\n",
1765 | "0:\tlearn: 2.9443681\ttest: 2.9434765\tbest: 2.9434765 (0)\ttotal: 61ms\tremaining: 15m 14s\n",
1766 | "100:\tlearn: 0.9694982\ttest: 0.9623182\tbest: 0.9623182 (100)\ttotal: 5.77s\tremaining: 14m 10s\n",
1767 | "200:\tlearn: 0.6842900\ttest: 0.6783291\tbest: 0.6783291 (200)\ttotal: 11.5s\tremaining: 14m 4s\n",
1768 | "300:\tlearn: 0.5802920\ttest: 0.5734318\tbest: 0.5734318 (300)\ttotal: 17s\tremaining: 13m 48s\n",
1769 | "400:\tlearn: 0.5268177\ttest: 0.5189816\tbest: 0.5189816 (400)\ttotal: 22.2s\tremaining: 13m 29s\n",
1770 | "500:\tlearn: 0.4928743\ttest: 0.4865865\tbest: 0.4865865 (500)\ttotal: 27.7s\tremaining: 13m 20s\n",
1771 | "600:\tlearn: 0.4699431\ttest: 0.4655205\tbest: 0.4655205 (600)\ttotal: 33s\tremaining: 13m 11s\n",
1772 | "700:\tlearn: 0.4532967\ttest: 0.4507689\tbest: 0.4507689 (700)\ttotal: 38.3s\tremaining: 13m 2s\n",
1773 | "800:\tlearn: 0.4395780\ttest: 0.4393647\tbest: 0.4393647 (800)\ttotal: 43.8s\tremaining: 12m 55s\n",
1774 | "900:\tlearn: 0.4282105\ttest: 0.4301656\tbest: 0.4301656 (900)\ttotal: 49.2s\tremaining: 12m 49s\n",
1775 | "1000:\tlearn: 0.4188862\ttest: 0.4229607\tbest: 0.4229607 (1000)\ttotal: 54.5s\tremaining: 12m 42s\n",
1776 | "1100:\tlearn: 0.4108868\ttest: 0.4169308\tbest: 0.4169308 (1100)\ttotal: 59.8s\tremaining: 12m 34s\n",
1777 | "1200:\tlearn: 0.4035234\ttest: 0.4117374\tbest: 0.4117374 (1200)\ttotal: 1m 5s\tremaining: 12m 27s\n",
1778 | "1300:\tlearn: 0.3969228\ttest: 0.4073188\tbest: 0.4073188 (1300)\ttotal: 1m 10s\tremaining: 12m 21s\n",
1779 | "1400:\tlearn: 0.3913798\ttest: 0.4038983\tbest: 0.4038983 (1400)\ttotal: 1m 15s\tremaining: 12m 14s\n",
1780 | "1500:\tlearn: 0.3866217\ttest: 0.4009698\tbest: 0.4009698 (1500)\ttotal: 1m 20s\tremaining: 12m 7s\n",
1781 | "1600:\tlearn: 0.3821708\ttest: 0.3983890\tbest: 0.3983890 (1600)\ttotal: 1m 26s\tremaining: 12m\n",
1782 | "1700:\tlearn: 0.3781741\ttest: 0.3960898\tbest: 0.3960898 (1700)\ttotal: 1m 31s\tremaining: 11m 53s\n",
1783 | "1800:\tlearn: 0.3742509\ttest: 0.3940105\tbest: 0.3940105 (1800)\ttotal: 1m 36s\tremaining: 11m 47s\n",
1784 | "1900:\tlearn: 0.3709104\ttest: 0.3922476\tbest: 0.3922476 (1900)\ttotal: 1m 41s\tremaining: 11m 40s\n",
1785 | "2000:\tlearn: 0.3678909\ttest: 0.3907634\tbest: 0.3907634 (2000)\ttotal: 1m 46s\tremaining: 11m 32s\n",
1786 | "2100:\tlearn: 0.3650593\ttest: 0.3893629\tbest: 0.3893629 (2100)\ttotal: 1m 51s\tremaining: 11m 26s\n",
1787 | "2200:\tlearn: 0.3623068\ttest: 0.3880129\tbest: 0.3880129 (2200)\ttotal: 1m 56s\tremaining: 11m 19s\n",
1788 | "2300:\tlearn: 0.3599148\ttest: 0.3868767\tbest: 0.3868767 (2300)\ttotal: 2m 1s\tremaining: 11m 12s\n",
1789 | "2400:\tlearn: 0.3577155\ttest: 0.3859346\tbest: 0.3859346 (2400)\ttotal: 2m 6s\tremaining: 11m 5s\n",
1790 | "2500:\tlearn: 0.3553781\ttest: 0.3849259\tbest: 0.3849259 (2500)\ttotal: 2m 11s\tremaining: 10m 59s\n",
1791 | "2600:\tlearn: 0.3533105\ttest: 0.3840945\tbest: 0.3840945 (2600)\ttotal: 2m 17s\tremaining: 10m 53s\n",
1792 | "2700:\tlearn: 0.3513950\ttest: 0.3833142\tbest: 0.3833142 (2700)\ttotal: 2m 22s\tremaining: 10m 46s\n",
1793 | "2800:\tlearn: 0.3496242\ttest: 0.3826326\tbest: 0.3826326 (2800)\ttotal: 2m 26s\tremaining: 10m 39s\n",
1794 | "2900:\tlearn: 0.3478921\ttest: 0.3819524\tbest: 0.3819506 (2898)\ttotal: 2m 31s\tremaining: 10m 33s\n",
1795 | "3000:\tlearn: 0.3462332\ttest: 0.3813759\tbest: 0.3813759 (3000)\ttotal: 2m 36s\tremaining: 10m 27s\n",
1796 | "3100:\tlearn: 0.3445562\ttest: 0.3807373\tbest: 0.3807373 (3100)\ttotal: 2m 41s\tremaining: 10m 21s\n",
1797 | "3200:\tlearn: 0.3429904\ttest: 0.3801625\tbest: 0.3801625 (3200)\ttotal: 2m 46s\tremaining: 10m 14s\n",
1798 | "3300:\tlearn: 0.3414159\ttest: 0.3795433\tbest: 0.3795433 (3300)\ttotal: 2m 51s\tremaining: 10m 8s\n",
1799 | "3400:\tlearn: 0.3400266\ttest: 0.3791237\tbest: 0.3791181 (3398)\ttotal: 2m 56s\tremaining: 10m 2s\n",
1800 | "3500:\tlearn: 0.3386484\ttest: 0.3786599\tbest: 0.3786599 (3500)\ttotal: 3m 1s\tremaining: 9m 56s\n",
1801 | "3600:\tlearn: 0.3374751\ttest: 0.3782948\tbest: 0.3782948 (3600)\ttotal: 3m 6s\tremaining: 9m 49s\n",
1802 | "3700:\tlearn: 0.3361114\ttest: 0.3778727\tbest: 0.3778727 (3700)\ttotal: 3m 11s\tremaining: 9m 44s\n",
1803 | "3800:\tlearn: 0.3348361\ttest: 0.3774884\tbest: 0.3774855 (3799)\ttotal: 3m 16s\tremaining: 9m 37s\n",
1804 | "3900:\tlearn: 0.3336625\ttest: 0.3771435\tbest: 0.3771435 (3900)\ttotal: 3m 21s\tremaining: 9m 31s\n",
1805 | "4000:\tlearn: 0.3325035\ttest: 0.3767501\tbest: 0.3767501 (4000)\ttotal: 3m 25s\tremaining: 9m 25s\n",
1806 | "4100:\tlearn: 0.3313293\ttest: 0.3764553\tbest: 0.3764553 (4100)\ttotal: 3m 30s\tremaining: 9m 20s\n",
1807 | "4200:\tlearn: 0.3299892\ttest: 0.3760755\tbest: 0.3760755 (4200)\ttotal: 3m 35s\tremaining: 9m 14s\n",
1808 | "4300:\tlearn: 0.3287937\ttest: 0.3757468\tbest: 0.3757460 (4299)\ttotal: 3m 40s\tremaining: 9m 8s\n",
1809 | "4400:\tlearn: 0.3276672\ttest: 0.3754384\tbest: 0.3754382 (4396)\ttotal: 3m 45s\tremaining: 9m 2s\n",
1810 | "4500:\tlearn: 0.3266092\ttest: 0.3751657\tbest: 0.3751657 (4500)\ttotal: 3m 50s\tremaining: 8m 57s\n",
1811 | "4600:\tlearn: 0.3254817\ttest: 0.3748788\tbest: 0.3748761 (4599)\ttotal: 3m 55s\tremaining: 8m 51s\n",
1812 | "4700:\tlearn: 0.3243820\ttest: 0.3746932\tbest: 0.3746932 (4700)\ttotal: 4m\tremaining: 8m 46s\n",
1813 | "4800:\tlearn: 0.3233590\ttest: 0.3744619\tbest: 0.3744619 (4800)\ttotal: 4m 4s\tremaining: 8m 40s\n",
1814 | "4900:\tlearn: 0.3224071\ttest: 0.3742858\tbest: 0.3742853 (4899)\ttotal: 4m 9s\tremaining: 8m 34s\n",
1815 | "5000:\tlearn: 0.3213104\ttest: 0.3740207\tbest: 0.3740207 (5000)\ttotal: 4m 14s\tremaining: 8m 29s\n",
1816 | "5100:\tlearn: 0.3202703\ttest: 0.3738921\tbest: 0.3738919 (5097)\ttotal: 4m 19s\tremaining: 8m 23s\n",
1817 | "5200:\tlearn: 0.3192828\ttest: 0.3736484\tbest: 0.3736484 (5200)\ttotal: 4m 24s\tremaining: 8m 18s\n",
1818 | "5300:\tlearn: 0.3182278\ttest: 0.3734363\tbest: 0.3734353 (5297)\ttotal: 4m 29s\tremaining: 8m 12s\n",
1819 | "5400:\tlearn: 0.3171807\ttest: 0.3732738\tbest: 0.3732724 (5390)\ttotal: 4m 34s\tremaining: 8m 7s\n",
1820 | "5500:\tlearn: 0.3162452\ttest: 0.3731329\tbest: 0.3731325 (5494)\ttotal: 4m 38s\tremaining: 8m 1s\n",
1821 | "5600:\tlearn: 0.3153347\ttest: 0.3730231\tbest: 0.3730224 (5597)\ttotal: 4m 43s\tremaining: 7m 56s\n",
1822 | "5700:\tlearn: 0.3142878\ttest: 0.3728379\tbest: 0.3728379 (5700)\ttotal: 4m 48s\tremaining: 7m 51s\n",
1823 | "5800:\tlearn: 0.3133186\ttest: 0.3726643\tbest: 0.3726610 (5789)\ttotal: 4m 53s\tremaining: 7m 45s\n",
1824 | "5900:\tlearn: 0.3123658\ttest: 0.3724978\tbest: 0.3724978 (5900)\ttotal: 4m 58s\tremaining: 7m 40s\n",
1825 | "6000:\tlearn: 0.3114211\ttest: 0.3723352\tbest: 0.3723309 (5995)\ttotal: 5m 3s\tremaining: 7m 34s\n",
1826 | "6100:\tlearn: 0.3105598\ttest: 0.3721834\tbest: 0.3721793 (6098)\ttotal: 5m 8s\tremaining: 7m 29s\n",
1827 | "6200:\tlearn: 0.3096669\ttest: 0.3720013\tbest: 0.3720013 (6200)\ttotal: 5m 13s\tremaining: 7m 24s\n",
1828 | "6300:\tlearn: 0.3087257\ttest: 0.3718906\tbest: 0.3718861 (6294)\ttotal: 5m 18s\tremaining: 7m 19s\n",
1829 | "6400:\tlearn: 0.3078272\ttest: 0.3717507\tbest: 0.3717507 (6400)\ttotal: 5m 22s\tremaining: 7m 13s\n",
1830 | "6500:\tlearn: 0.3069387\ttest: 0.3716658\tbest: 0.3716632 (6499)\ttotal: 5m 27s\tremaining: 7m 8s\n",
1831 | "6600:\tlearn: 0.3061115\ttest: 0.3715509\tbest: 0.3715509 (6600)\ttotal: 5m 32s\tremaining: 7m 3s\n",
1832 | "6700:\tlearn: 0.3052217\ttest: 0.3713980\tbest: 0.3713980 (6700)\ttotal: 5m 37s\tremaining: 6m 58s\n",
1833 | "6800:\tlearn: 0.3043711\ttest: 0.3713464\tbest: 0.3713464 (6800)\ttotal: 5m 42s\tremaining: 6m 52s\n",
1834 | "6900:\tlearn: 0.3035667\ttest: 0.3712873\tbest: 0.3712718 (6869)\ttotal: 5m 47s\tremaining: 6m 47s\n",
1835 | "7000:\tlearn: 0.3026999\ttest: 0.3712250\tbest: 0.3712218 (6998)\ttotal: 5m 52s\tremaining: 6m 42s\n",
1836 | "7100:\tlearn: 0.3018933\ttest: 0.3711512\tbest: 0.3711507 (7087)\ttotal: 5m 57s\tremaining: 6m 37s\n",
1837 | "7200:\tlearn: 0.3010557\ttest: 0.3710529\tbest: 0.3710529 (7200)\ttotal: 6m 2s\tremaining: 6m 32s\n",
1838 | "7300:\tlearn: 0.3002674\ttest: 0.3709802\tbest: 0.3709802 (7300)\ttotal: 6m 7s\tremaining: 6m 27s\n",
1839 | "7400:\tlearn: 0.2994336\ttest: 0.3708955\tbest: 0.3708939 (7399)\ttotal: 6m 11s\tremaining: 6m 21s\n"
1840 | ]
1841 | },
1842 | {
1843 | "name": "stdout",
1844 | "output_type": "stream",
1845 | "text": [
1846 | "7500:\tlearn: 0.2986146\ttest: 0.3708438\tbest: 0.3708386 (7471)\ttotal: 6m 16s\tremaining: 6m 16s\n",
1847 | "7600:\tlearn: 0.2978320\ttest: 0.3707964\tbest: 0.3707964 (7600)\ttotal: 6m 21s\tremaining: 6m 11s\n",
1848 | "7700:\tlearn: 0.2971115\ttest: 0.3707068\tbest: 0.3707068 (7700)\ttotal: 6m 26s\tremaining: 6m 6s\n",
1849 | "7800:\tlearn: 0.2962688\ttest: 0.3706367\tbest: 0.3706354 (7799)\ttotal: 6m 31s\tremaining: 6m 1s\n",
1850 | "7900:\tlearn: 0.2954515\ttest: 0.3706102\tbest: 0.3705969 (7888)\ttotal: 6m 36s\tremaining: 5m 56s\n",
1851 | "8000:\tlearn: 0.2946311\ttest: 0.3705748\tbest: 0.3705715 (7997)\ttotal: 6m 41s\tremaining: 5m 51s\n",
1852 | "8100:\tlearn: 0.2938061\ttest: 0.3705295\tbest: 0.3705261 (8087)\ttotal: 6m 46s\tremaining: 5m 46s\n",
1853 | "8200:\tlearn: 0.2930550\ttest: 0.3704819\tbest: 0.3704692 (8189)\ttotal: 6m 51s\tremaining: 5m 41s\n",
1854 | "8300:\tlearn: 0.2922324\ttest: 0.3704177\tbest: 0.3704177 (8300)\ttotal: 6m 56s\tremaining: 5m 35s\n",
1855 | "8400:\tlearn: 0.2914426\ttest: 0.3703328\tbest: 0.3703322 (8395)\ttotal: 7m 1s\tremaining: 5m 30s\n",
1856 | "8500:\tlearn: 0.2906545\ttest: 0.3702983\tbest: 0.3702943 (8489)\ttotal: 7m 6s\tremaining: 5m 25s\n",
1857 | "8600:\tlearn: 0.2898853\ttest: 0.3702520\tbest: 0.3702520 (8600)\ttotal: 7m 11s\tremaining: 5m 20s\n",
1858 | "8700:\tlearn: 0.2891316\ttest: 0.3702383\tbest: 0.3702377 (8697)\ttotal: 7m 16s\tremaining: 5m 15s\n",
1859 | "8800:\tlearn: 0.2883580\ttest: 0.3701953\tbest: 0.3701936 (8789)\ttotal: 7m 21s\tremaining: 5m 10s\n",
1860 | "8900:\tlearn: 0.2876060\ttest: 0.3702348\tbest: 0.3701914 (8824)\ttotal: 7m 25s\tremaining: 5m 5s\n",
1861 | "9000:\tlearn: 0.2869364\ttest: 0.3702237\tbest: 0.3701914 (8824)\ttotal: 7m 30s\tremaining: 5m\n",
1862 | "bestTest = 0.370191368\n",
1863 | "bestIteration = 8824\n",
1864 | "Shrink model to first 8825 iterations.\n",
1865 | "Fold 3\n",
1866 | "0:\tlearn: 2.9437895\ttest: 2.9439809\tbest: 2.9439809 (0)\ttotal: 64.5ms\tremaining: 16m 7s\n",
1867 | "100:\tlearn: 0.9666337\ttest: 0.9737121\tbest: 0.9737121 (100)\ttotal: 5.76s\tremaining: 14m 10s\n",
1868 | "200:\tlearn: 0.6822880\ttest: 0.6907236\tbest: 0.6907236 (200)\ttotal: 11.6s\tremaining: 14m 15s\n",
1869 | "300:\tlearn: 0.5780329\ttest: 0.5866115\tbest: 0.5866115 (300)\ttotal: 17.3s\tremaining: 14m 7s\n",
1870 | "400:\tlearn: 0.5249610\ttest: 0.5340124\tbest: 0.5340124 (400)\ttotal: 22.7s\tremaining: 13m 46s\n",
1871 | "500:\tlearn: 0.4903229\ttest: 0.5009206\tbest: 0.5009206 (500)\ttotal: 28.2s\tremaining: 13m 37s\n",
1872 | "600:\tlearn: 0.4671883\ttest: 0.4791335\tbest: 0.4791335 (600)\ttotal: 33.8s\tremaining: 13m 28s\n",
1873 | "700:\tlearn: 0.4507938\ttest: 0.4639248\tbest: 0.4639248 (700)\ttotal: 39.2s\tremaining: 13m 20s\n",
1874 | "800:\tlearn: 0.4375317\ttest: 0.4518411\tbest: 0.4518411 (800)\ttotal: 44.7s\tremaining: 13m 12s\n",
1875 | "900:\tlearn: 0.4269307\ttest: 0.4425353\tbest: 0.4425353 (900)\ttotal: 50.2s\tremaining: 13m 5s\n",
1876 | "1000:\tlearn: 0.4181567\ttest: 0.4349669\tbest: 0.4349669 (1000)\ttotal: 55.6s\tremaining: 12m 57s\n",
1877 | "1100:\tlearn: 0.4105997\ttest: 0.4286424\tbest: 0.4286424 (1100)\ttotal: 1m 1s\tremaining: 12m 50s\n",
1878 | "1200:\tlearn: 0.4033486\ttest: 0.4228499\tbest: 0.4228499 (1200)\ttotal: 1m 6s\tremaining: 12m 41s\n",
1879 | "1300:\tlearn: 0.3969627\ttest: 0.4179884\tbest: 0.4179884 (1300)\ttotal: 1m 11s\tremaining: 12m 33s\n",
1880 | "1400:\tlearn: 0.3912228\ttest: 0.4138862\tbest: 0.4138862 (1400)\ttotal: 1m 17s\tremaining: 12m 28s\n",
1881 | "1500:\tlearn: 0.3859352\ttest: 0.4100195\tbest: 0.4100195 (1500)\ttotal: 1m 22s\tremaining: 12m 21s\n",
1882 | "1600:\tlearn: 0.3815999\ttest: 0.4070405\tbest: 0.4070405 (1600)\ttotal: 1m 27s\tremaining: 12m 13s\n",
1883 | "1700:\tlearn: 0.3776680\ttest: 0.4044705\tbest: 0.4044705 (1700)\ttotal: 1m 32s\tremaining: 12m 5s\n",
1884 | "1800:\tlearn: 0.3740485\ttest: 0.4020943\tbest: 0.4020943 (1800)\ttotal: 1m 37s\tremaining: 11m 57s\n",
1885 | "1900:\tlearn: 0.3708849\ttest: 0.4001783\tbest: 0.4001783 (1900)\ttotal: 1m 42s\tremaining: 11m 49s\n",
1886 | "2000:\tlearn: 0.3679143\ttest: 0.3983328\tbest: 0.3983328 (2000)\ttotal: 1m 48s\tremaining: 11m 41s\n",
1887 | "2100:\tlearn: 0.3652782\ttest: 0.3967781\tbest: 0.3967781 (2100)\ttotal: 1m 53s\tremaining: 11m 34s\n",
1888 | "2200:\tlearn: 0.3627526\ttest: 0.3953468\tbest: 0.3953468 (2200)\ttotal: 1m 58s\tremaining: 11m 26s\n",
1889 | "2300:\tlearn: 0.3603579\ttest: 0.3938987\tbest: 0.3938987 (2300)\ttotal: 2m 2s\tremaining: 11m 18s\n",
1890 | "2400:\tlearn: 0.3581742\ttest: 0.3926778\tbest: 0.3926778 (2400)\ttotal: 2m 7s\tremaining: 11m 11s\n",
1891 | "2500:\tlearn: 0.3562758\ttest: 0.3916742\tbest: 0.3916742 (2500)\ttotal: 2m 12s\tremaining: 11m 3s\n",
1892 | "2600:\tlearn: 0.3543680\ttest: 0.3907456\tbest: 0.3907456 (2600)\ttotal: 2m 17s\tremaining: 10m 56s\n",
1893 | "2700:\tlearn: 0.3528549\ttest: 0.3900831\tbest: 0.3900831 (2700)\ttotal: 2m 22s\tremaining: 10m 49s\n",
1894 | "2800:\tlearn: 0.3512045\ttest: 0.3892952\tbest: 0.3892952 (2800)\ttotal: 2m 27s\tremaining: 10m 44s\n",
1895 | "2900:\tlearn: 0.3498431\ttest: 0.3886999\tbest: 0.3886999 (2900)\ttotal: 2m 32s\tremaining: 10m 37s\n",
1896 | "3000:\tlearn: 0.3481895\ttest: 0.3879855\tbest: 0.3879855 (3000)\ttotal: 2m 37s\tremaining: 10m 31s\n",
1897 | "3100:\tlearn: 0.3466946\ttest: 0.3873528\tbest: 0.3873528 (3100)\ttotal: 2m 43s\tremaining: 10m 25s\n",
1898 | "3200:\tlearn: 0.3451060\ttest: 0.3866692\tbest: 0.3866692 (3200)\ttotal: 2m 47s\tremaining: 10m 19s\n",
1899 | "3300:\tlearn: 0.3438039\ttest: 0.3862147\tbest: 0.3862147 (3300)\ttotal: 2m 52s\tremaining: 10m 12s\n",
1900 | "3400:\tlearn: 0.3423666\ttest: 0.3856366\tbest: 0.3856366 (3400)\ttotal: 2m 57s\tremaining: 10m 7s\n",
1901 | "3500:\tlearn: 0.3410860\ttest: 0.3851325\tbest: 0.3851325 (3500)\ttotal: 3m 2s\tremaining: 10m\n",
1902 | "3600:\tlearn: 0.3398489\ttest: 0.3846819\tbest: 0.3846819 (3600)\ttotal: 3m 7s\tremaining: 9m 54s\n",
1903 | "3700:\tlearn: 0.3385640\ttest: 0.3841184\tbest: 0.3841184 (3700)\ttotal: 3m 12s\tremaining: 9m 49s\n",
1904 | "3800:\tlearn: 0.3373945\ttest: 0.3837013\tbest: 0.3837013 (3800)\ttotal: 3m 17s\tremaining: 9m 42s\n",
1905 | "3900:\tlearn: 0.3362733\ttest: 0.3832844\tbest: 0.3832844 (3900)\ttotal: 3m 22s\tremaining: 9m 36s\n",
1906 | "4000:\tlearn: 0.3351111\ttest: 0.3828893\tbest: 0.3828883 (3999)\ttotal: 3m 27s\tremaining: 9m 30s\n",
1907 | "4100:\tlearn: 0.3339635\ttest: 0.3825139\tbest: 0.3825139 (4100)\ttotal: 3m 32s\tremaining: 9m 24s\n",
1908 | "4200:\tlearn: 0.3328368\ttest: 0.3821965\tbest: 0.3821947 (4198)\ttotal: 3m 37s\tremaining: 9m 18s\n",
1909 | "4300:\tlearn: 0.3317781\ttest: 0.3818511\tbest: 0.3818511 (4300)\ttotal: 3m 41s\tremaining: 9m 12s\n",
1910 | "4400:\tlearn: 0.3307150\ttest: 0.3815458\tbest: 0.3815458 (4400)\ttotal: 3m 46s\tremaining: 9m 6s\n",
1911 | "4500:\tlearn: 0.3297377\ttest: 0.3813009\tbest: 0.3813009 (4500)\ttotal: 3m 51s\tremaining: 9m\n",
1912 | "4600:\tlearn: 0.3286289\ttest: 0.3809880\tbest: 0.3809880 (4600)\ttotal: 3m 56s\tremaining: 8m 54s\n",
1913 | "4700:\tlearn: 0.3275877\ttest: 0.3806923\tbest: 0.3806863 (4696)\ttotal: 4m 1s\tremaining: 8m 48s\n",
1914 | "4800:\tlearn: 0.3265512\ttest: 0.3803794\tbest: 0.3803779 (4798)\ttotal: 4m 6s\tremaining: 8m 42s\n",
1915 | "4900:\tlearn: 0.3256019\ttest: 0.3801802\tbest: 0.3801802 (4900)\ttotal: 4m 11s\tremaining: 8m 37s\n",
1916 | "5000:\tlearn: 0.3246328\ttest: 0.3798789\tbest: 0.3798789 (5000)\ttotal: 4m 15s\tremaining: 8m 31s\n",
1917 | "5100:\tlearn: 0.3237276\ttest: 0.3796492\tbest: 0.3796492 (5100)\ttotal: 4m 20s\tremaining: 8m 25s\n",
1918 | "5200:\tlearn: 0.3228442\ttest: 0.3794166\tbest: 0.3794166 (5200)\ttotal: 4m 25s\tremaining: 8m 20s\n",
1919 | "5300:\tlearn: 0.3219078\ttest: 0.3791499\tbest: 0.3791499 (5300)\ttotal: 4m 30s\tremaining: 8m 14s\n",
1920 | "5400:\tlearn: 0.3209660\ttest: 0.3789839\tbest: 0.3789788 (5391)\ttotal: 4m 35s\tremaining: 8m 9s\n",
1921 | "5500:\tlearn: 0.3200584\ttest: 0.3788319\tbest: 0.3788319 (5500)\ttotal: 4m 40s\tremaining: 8m 3s\n",
1922 | "5600:\tlearn: 0.3191029\ttest: 0.3785811\tbest: 0.3785811 (5600)\ttotal: 4m 44s\tremaining: 7m 58s\n",
1923 | "5700:\tlearn: 0.3182321\ttest: 0.3784223\tbest: 0.3784223 (5698)\ttotal: 4m 49s\tremaining: 7m 52s\n",
1924 | "5800:\tlearn: 0.3173450\ttest: 0.3782003\tbest: 0.3782003 (5800)\ttotal: 4m 54s\tremaining: 7m 47s\n",
1925 | "5900:\tlearn: 0.3164450\ttest: 0.3780575\tbest: 0.3780575 (5900)\ttotal: 4m 59s\tremaining: 7m 41s\n",
1926 | "6000:\tlearn: 0.3155760\ttest: 0.3778265\tbest: 0.3778217 (5998)\ttotal: 5m 4s\tremaining: 7m 36s\n",
1927 | "6100:\tlearn: 0.3147287\ttest: 0.3776652\tbest: 0.3776587 (6098)\ttotal: 5m 9s\tremaining: 7m 31s\n",
1928 | "6200:\tlearn: 0.3138946\ttest: 0.3775094\tbest: 0.3775094 (6200)\ttotal: 5m 14s\tremaining: 7m 25s\n",
1929 | "6300:\tlearn: 0.3130381\ttest: 0.3773897\tbest: 0.3773864 (6299)\ttotal: 5m 18s\tremaining: 7m 20s\n",
1930 | "6400:\tlearn: 0.3122034\ttest: 0.3772815\tbest: 0.3772779 (6390)\ttotal: 5m 23s\tremaining: 7m 14s\n",
1931 | "6500:\tlearn: 0.3113488\ttest: 0.3771375\tbest: 0.3771361 (6490)\ttotal: 5m 28s\tremaining: 7m 9s\n",
1932 | "6600:\tlearn: 0.3104638\ttest: 0.3769675\tbest: 0.3769675 (6599)\ttotal: 5m 33s\tremaining: 7m 4s\n",
1933 | "6700:\tlearn: 0.3096569\ttest: 0.3768008\tbest: 0.3768002 (6698)\ttotal: 5m 38s\tremaining: 6m 58s\n",
1934 | "6800:\tlearn: 0.3089298\ttest: 0.3767119\tbest: 0.3767119 (6800)\ttotal: 5m 43s\tremaining: 6m 53s\n",
1935 | "6900:\tlearn: 0.3081403\ttest: 0.3765886\tbest: 0.3765880 (6898)\ttotal: 5m 47s\tremaining: 6m 48s\n",
1936 | "7000:\tlearn: 0.3074183\ttest: 0.3764605\tbest: 0.3764600 (6997)\ttotal: 5m 52s\tremaining: 6m 43s\n"
1937 | ]
1938 | },
1939 | {
1940 | "name": "stdout",
1941 | "output_type": "stream",
1942 | "text": [
1943 | "7100:\tlearn: 0.3066156\ttest: 0.3763372\tbest: 0.3763372 (7100)\ttotal: 5m 57s\tremaining: 6m 37s\n",
1944 | "7200:\tlearn: 0.3058966\ttest: 0.3762786\tbest: 0.3762786 (7200)\ttotal: 6m 2s\tremaining: 6m 32s\n",
1945 | "7300:\tlearn: 0.3051038\ttest: 0.3761495\tbest: 0.3761495 (7300)\ttotal: 6m 7s\tremaining: 6m 27s\n",
1946 | "7400:\tlearn: 0.3043295\ttest: 0.3760555\tbest: 0.3760538 (7399)\ttotal: 6m 12s\tremaining: 6m 22s\n",
1947 | "7500:\tlearn: 0.3035415\ttest: 0.3759647\tbest: 0.3759647 (7500)\ttotal: 6m 18s\tremaining: 6m 18s\n",
1948 | "7600:\tlearn: 0.3027972\ttest: 0.3758383\tbest: 0.3758347 (7596)\ttotal: 6m 23s\tremaining: 6m 13s\n",
1949 | "7700:\tlearn: 0.3019187\ttest: 0.3756992\tbest: 0.3756992 (7700)\ttotal: 6m 28s\tremaining: 6m 8s\n",
1950 | "7800:\tlearn: 0.3011553\ttest: 0.3755568\tbest: 0.3755561 (7799)\ttotal: 6m 33s\tremaining: 6m 2s\n",
1951 | "7900:\tlearn: 0.3003440\ttest: 0.3754536\tbest: 0.3754521 (7898)\ttotal: 6m 38s\tremaining: 5m 57s\n",
1952 | "8000:\tlearn: 0.2995573\ttest: 0.3753286\tbest: 0.3753271 (7998)\ttotal: 6m 43s\tremaining: 5m 52s\n",
1953 | "8100:\tlearn: 0.2987896\ttest: 0.3752060\tbest: 0.3752060 (8100)\ttotal: 6m 47s\tremaining: 5m 47s\n",
1954 | "8200:\tlearn: 0.2980186\ttest: 0.3751054\tbest: 0.3751047 (8181)\ttotal: 6m 52s\tremaining: 5m 42s\n",
1955 | "8300:\tlearn: 0.2972517\ttest: 0.3750124\tbest: 0.3750084 (8298)\ttotal: 6m 57s\tremaining: 5m 37s\n",
1956 | "8400:\tlearn: 0.2965652\ttest: 0.3749325\tbest: 0.3749325 (8400)\ttotal: 7m 2s\tremaining: 5m 31s\n",
1957 | "8500:\tlearn: 0.2958502\ttest: 0.3748635\tbest: 0.3748634 (8499)\ttotal: 7m 7s\tremaining: 5m 26s\n",
1958 | "8600:\tlearn: 0.2950783\ttest: 0.3747830\tbest: 0.3747830 (8600)\ttotal: 7m 12s\tremaining: 5m 21s\n",
1959 | "8700:\tlearn: 0.2943076\ttest: 0.3747100\tbest: 0.3747099 (8697)\ttotal: 7m 17s\tremaining: 5m 16s\n",
1960 | "8800:\tlearn: 0.2935542\ttest: 0.3746214\tbest: 0.3746214 (8800)\ttotal: 7m 22s\tremaining: 5m 11s\n",
1961 | "8900:\tlearn: 0.2928097\ttest: 0.3745509\tbest: 0.3745509 (8900)\ttotal: 7m 26s\tremaining: 5m 6s\n",
1962 | "9000:\tlearn: 0.2921266\ttest: 0.3744730\tbest: 0.3744730 (9000)\ttotal: 7m 31s\tremaining: 5m 1s\n",
1963 | "9100:\tlearn: 0.2914218\ttest: 0.3743784\tbest: 0.3743755 (9086)\ttotal: 7m 36s\tremaining: 4m 55s\n",
1964 | "9200:\tlearn: 0.2906955\ttest: 0.3743307\tbest: 0.3743295 (9199)\ttotal: 7m 41s\tremaining: 4m 50s\n",
1965 | "9300:\tlearn: 0.2899799\ttest: 0.3742830\tbest: 0.3742819 (9291)\ttotal: 7m 46s\tremaining: 4m 45s\n",
1966 | "9400:\tlearn: 0.2892462\ttest: 0.3742510\tbest: 0.3742505 (9399)\ttotal: 7m 51s\tremaining: 4m 40s\n",
1967 | "9500:\tlearn: 0.2884749\ttest: 0.3741921\tbest: 0.3741838 (9495)\ttotal: 7m 56s\tremaining: 4m 35s\n",
1968 | "9600:\tlearn: 0.2877788\ttest: 0.3741160\tbest: 0.3741160 (9600)\ttotal: 8m\tremaining: 4m 30s\n",
1969 | "9700:\tlearn: 0.2871115\ttest: 0.3741111\tbest: 0.3741040 (9657)\ttotal: 8m 5s\tremaining: 4m 25s\n",
1970 | "9800:\tlearn: 0.2863541\ttest: 0.3740104\tbest: 0.3740085 (9799)\ttotal: 8m 10s\tremaining: 4m 20s\n",
1971 | "9900:\tlearn: 0.2856196\ttest: 0.3739835\tbest: 0.3739822 (9899)\ttotal: 8m 15s\tremaining: 4m 15s\n",
1972 | "10000:\tlearn: 0.2849435\ttest: 0.3739559\tbest: 0.3739547 (9998)\ttotal: 8m 20s\tremaining: 4m 10s\n",
1973 | "10100:\tlearn: 0.2842390\ttest: 0.3739441\tbest: 0.3739186 (10084)\ttotal: 8m 25s\tremaining: 4m 5s\n",
1974 | "10200:\tlearn: 0.2834556\ttest: 0.3739202\tbest: 0.3739152 (10196)\ttotal: 8m 30s\tremaining: 4m\n",
1975 | "10300:\tlearn: 0.2827621\ttest: 0.3738768\tbest: 0.3738734 (10298)\ttotal: 8m 35s\tremaining: 3m 54s\n",
1976 | "10400:\tlearn: 0.2821135\ttest: 0.3738436\tbest: 0.3738386 (10386)\ttotal: 8m 39s\tremaining: 3m 49s\n",
1977 | "10500:\tlearn: 0.2813812\ttest: 0.3737976\tbest: 0.3737973 (10499)\ttotal: 8m 44s\tremaining: 3m 44s\n",
1978 | "10600:\tlearn: 0.2806919\ttest: 0.3737563\tbest: 0.3737528 (10598)\ttotal: 8m 49s\tremaining: 3m 39s\n",
1979 | "10700:\tlearn: 0.2799841\ttest: 0.3737778\tbest: 0.3737528 (10598)\ttotal: 8m 54s\tremaining: 3m 34s\n",
1980 | "10800:\tlearn: 0.2793340\ttest: 0.3737218\tbest: 0.3737211 (10799)\ttotal: 8m 59s\tremaining: 3m 29s\n",
1981 | "10900:\tlearn: 0.2786476\ttest: 0.3736935\tbest: 0.3736915 (10897)\ttotal: 9m 4s\tremaining: 3m 24s\n",
1982 | "11000:\tlearn: 0.2780168\ttest: 0.3736679\tbest: 0.3736679 (11000)\ttotal: 9m 9s\tremaining: 3m 19s\n",
1983 | "11100:\tlearn: 0.2773362\ttest: 0.3736590\tbest: 0.3736512 (11010)\ttotal: 9m 14s\tremaining: 3m 14s\n",
1984 | "11200:\tlearn: 0.2767015\ttest: 0.3736327\tbest: 0.3736327 (11200)\ttotal: 9m 19s\tremaining: 3m 9s\n",
1985 | "11300:\tlearn: 0.2760368\ttest: 0.3735720\tbest: 0.3735720 (11300)\ttotal: 9m 24s\tremaining: 3m 4s\n",
1986 | "11400:\tlearn: 0.2753389\ttest: 0.3735426\tbest: 0.3735282 (11386)\ttotal: 9m 30s\tremaining: 2m 59s\n",
1987 | "11500:\tlearn: 0.2747149\ttest: 0.3735154\tbest: 0.3735116 (11468)\ttotal: 9m 35s\tremaining: 2m 54s\n",
1988 | "11600:\tlearn: 0.2740372\ttest: 0.3735139\tbest: 0.3735049 (11595)\ttotal: 9m 40s\tremaining: 2m 49s\n",
1989 | "11700:\tlearn: 0.2733493\ttest: 0.3734869\tbest: 0.3734767 (11687)\ttotal: 9m 44s\tremaining: 2m 44s\n",
1990 | "11800:\tlearn: 0.2726567\ttest: 0.3734749\tbest: 0.3734634 (11773)\ttotal: 9m 49s\tremaining: 2m 39s\n",
1991 | "11900:\tlearn: 0.2719464\ttest: 0.3734793\tbest: 0.3734602 (11826)\ttotal: 9m 54s\tremaining: 2m 34s\n",
1992 | "12000:\tlearn: 0.2712719\ttest: 0.3734943\tbest: 0.3734503 (11921)\ttotal: 9m 59s\tremaining: 2m 29s\n",
1993 | "12100:\tlearn: 0.2705960\ttest: 0.3734967\tbest: 0.3734503 (11921)\ttotal: 10m 4s\tremaining: 2m 24s\n",
1994 | "bestTest = 0.3734503482\n",
1995 | "bestIteration = 11921\n",
1996 | "Shrink model to first 11922 iterations.\n",
1997 | "Fold 4\n",
1998 | "0:\tlearn: 2.9441145\ttest: 2.9428512\tbest: 2.9428512 (0)\ttotal: 63.7ms\tremaining: 15m 55s\n",
1999 | "100:\tlearn: 0.9713663\ttest: 0.9542919\tbest: 0.9542919 (100)\ttotal: 5.82s\tremaining: 14m 19s\n",
2000 | "200:\tlearn: 0.6868976\ttest: 0.6694787\tbest: 0.6694787 (200)\ttotal: 11.6s\tremaining: 14m 11s\n",
2001 | "300:\tlearn: 0.5816151\ttest: 0.5668115\tbest: 0.5668115 (300)\ttotal: 17.1s\tremaining: 13m 55s\n",
2002 | "400:\tlearn: 0.5277489\ttest: 0.5168683\tbest: 0.5168683 (400)\ttotal: 22.4s\tremaining: 13m 34s\n",
2003 | "500:\tlearn: 0.4930415\ttest: 0.4849697\tbest: 0.4849697 (500)\ttotal: 27.8s\tremaining: 13m 24s\n",
2004 | "600:\tlearn: 0.4702242\ttest: 0.4644594\tbest: 0.4644594 (600)\ttotal: 33.3s\tremaining: 13m 16s\n",
2005 | "700:\tlearn: 0.4531113\ttest: 0.4495191\tbest: 0.4495191 (700)\ttotal: 38.6s\tremaining: 13m 7s\n",
2006 | "800:\tlearn: 0.4396474\ttest: 0.4380675\tbest: 0.4380675 (800)\ttotal: 44s\tremaining: 12m 59s\n",
2007 | "900:\tlearn: 0.4285257\ttest: 0.4289758\tbest: 0.4289758 (900)\ttotal: 49.3s\tremaining: 12m 52s\n",
2008 | "1000:\tlearn: 0.4196144\ttest: 0.4218432\tbest: 0.4218432 (1000)\ttotal: 54.6s\tremaining: 12m 43s\n",
2009 | "1100:\tlearn: 0.4113955\ttest: 0.4157216\tbest: 0.4157216 (1100)\ttotal: 1m\tremaining: 12m 37s\n",
2010 | "1200:\tlearn: 0.4042766\ttest: 0.4105635\tbest: 0.4105635 (1200)\ttotal: 1m 5s\tremaining: 12m 32s\n",
2011 | "1300:\tlearn: 0.3978758\ttest: 0.4063071\tbest: 0.4063071 (1300)\ttotal: 1m 10s\tremaining: 12m 26s\n",
2012 | "1400:\tlearn: 0.3920750\ttest: 0.4025975\tbest: 0.4025975 (1400)\ttotal: 1m 16s\tremaining: 12m 19s\n",
2013 | "1500:\tlearn: 0.3869891\ttest: 0.3994454\tbest: 0.3994454 (1500)\ttotal: 1m 21s\tremaining: 12m 13s\n",
2014 | "1600:\tlearn: 0.3826395\ttest: 0.3967433\tbest: 0.3967433 (1600)\ttotal: 1m 26s\tremaining: 12m 6s\n",
2015 | "1700:\tlearn: 0.3786445\ttest: 0.3945701\tbest: 0.3945701 (1700)\ttotal: 1m 32s\tremaining: 11m 59s\n",
2016 | "1800:\tlearn: 0.3750828\ttest: 0.3924128\tbest: 0.3924128 (1800)\ttotal: 1m 37s\tremaining: 11m 52s\n",
2017 | "1900:\tlearn: 0.3713657\ttest: 0.3902837\tbest: 0.3902837 (1900)\ttotal: 1m 42s\tremaining: 11m 45s\n",
2018 | "2000:\tlearn: 0.3681296\ttest: 0.3884967\tbest: 0.3884967 (2000)\ttotal: 1m 47s\tremaining: 11m 39s\n",
2019 | "2100:\tlearn: 0.3653758\ttest: 0.3871591\tbest: 0.3871591 (2100)\ttotal: 1m 52s\tremaining: 11m 32s\n",
2020 | "2200:\tlearn: 0.3626846\ttest: 0.3858827\tbest: 0.3858827 (2200)\ttotal: 1m 57s\tremaining: 11m 25s\n",
2021 | "2300:\tlearn: 0.3603069\ttest: 0.3847996\tbest: 0.3847979 (2299)\ttotal: 2m 2s\tremaining: 11m 17s\n",
2022 | "2400:\tlearn: 0.3581784\ttest: 0.3838213\tbest: 0.3838213 (2400)\ttotal: 2m 7s\tremaining: 11m 10s\n",
2023 | "2500:\tlearn: 0.3561337\ttest: 0.3829761\tbest: 0.3829761 (2500)\ttotal: 2m 12s\tremaining: 11m 3s\n",
2024 | "2600:\tlearn: 0.3541508\ttest: 0.3821290\tbest: 0.3821290 (2600)\ttotal: 2m 17s\tremaining: 10m 56s\n",
2025 | "2700:\tlearn: 0.3523220\ttest: 0.3813719\tbest: 0.3813719 (2700)\ttotal: 2m 22s\tremaining: 10m 49s\n",
2026 | "2800:\tlearn: 0.3505456\ttest: 0.3806785\tbest: 0.3806785 (2800)\ttotal: 2m 27s\tremaining: 10m 42s\n",
2027 | "2900:\tlearn: 0.3487332\ttest: 0.3800245\tbest: 0.3800245 (2900)\ttotal: 2m 32s\tremaining: 10m 36s\n",
2028 | "3000:\tlearn: 0.3471686\ttest: 0.3794431\tbest: 0.3794427 (2999)\ttotal: 2m 37s\tremaining: 10m 30s\n",
2029 | "3100:\tlearn: 0.3456715\ttest: 0.3789680\tbest: 0.3789680 (3100)\ttotal: 2m 42s\tremaining: 10m 23s\n",
2030 | "3200:\tlearn: 0.3442029\ttest: 0.3785018\tbest: 0.3785018 (3200)\ttotal: 2m 47s\tremaining: 10m 17s\n",
2031 | "3300:\tlearn: 0.3428733\ttest: 0.3780782\tbest: 0.3780762 (3299)\ttotal: 2m 52s\tremaining: 10m 11s\n",
2032 | "3400:\tlearn: 0.3415839\ttest: 0.3776550\tbest: 0.3776550 (3400)\ttotal: 2m 57s\tremaining: 10m 4s\n",
2033 | "3500:\tlearn: 0.3404091\ttest: 0.3773172\tbest: 0.3773172 (3500)\ttotal: 3m 2s\tremaining: 9m 58s\n"
2034 | ]
2035 | },
2036 | {
2037 | "name": "stdout",
2038 | "output_type": "stream",
2039 | "text": [
2040 | "3600:\tlearn: 0.3392124\ttest: 0.3769761\tbest: 0.3769761 (3600)\ttotal: 3m 7s\tremaining: 9m 52s\n",
2041 | "3700:\tlearn: 0.3379441\ttest: 0.3765148\tbest: 0.3765148 (3700)\ttotal: 3m 11s\tremaining: 9m 45s\n",
2042 | "3800:\tlearn: 0.3366381\ttest: 0.3761183\tbest: 0.3761183 (3800)\ttotal: 3m 16s\tremaining: 9m 39s\n",
2043 | "3900:\tlearn: 0.3353981\ttest: 0.3757377\tbest: 0.3757377 (3900)\ttotal: 3m 21s\tremaining: 9m 33s\n",
2044 | "4000:\tlearn: 0.3341714\ttest: 0.3754291\tbest: 0.3754283 (3997)\ttotal: 3m 26s\tremaining: 9m 27s\n",
2045 | "4100:\tlearn: 0.3329611\ttest: 0.3751036\tbest: 0.3751036 (4100)\ttotal: 3m 31s\tremaining: 9m 22s\n",
2046 | "4200:\tlearn: 0.3319368\ttest: 0.3748113\tbest: 0.3748090 (4197)\ttotal: 3m 36s\tremaining: 9m 16s\n",
2047 | "4300:\tlearn: 0.3308520\ttest: 0.3745480\tbest: 0.3745480 (4300)\ttotal: 3m 41s\tremaining: 9m 10s\n",
2048 | "4400:\tlearn: 0.3297650\ttest: 0.3743114\tbest: 0.3743106 (4399)\ttotal: 3m 46s\tremaining: 9m 4s\n",
2049 | "4500:\tlearn: 0.3286908\ttest: 0.3740695\tbest: 0.3740695 (4500)\ttotal: 3m 51s\tremaining: 8m 58s\n",
2050 | "4600:\tlearn: 0.3276579\ttest: 0.3738413\tbest: 0.3738413 (4600)\ttotal: 3m 55s\tremaining: 8m 53s\n",
2051 | "4700:\tlearn: 0.3266524\ttest: 0.3736030\tbest: 0.3736030 (4700)\ttotal: 4m\tremaining: 8m 47s\n",
2052 | "4800:\tlearn: 0.3255082\ttest: 0.3733491\tbest: 0.3733491 (4800)\ttotal: 4m 5s\tremaining: 8m 41s\n",
2053 | "4900:\tlearn: 0.3245424\ttest: 0.3731565\tbest: 0.3731565 (4900)\ttotal: 4m 10s\tremaining: 8m 35s\n",
2054 | "5000:\tlearn: 0.3235796\ttest: 0.3729578\tbest: 0.3729578 (5000)\ttotal: 4m 15s\tremaining: 8m 30s\n",
2055 | "5100:\tlearn: 0.3226926\ttest: 0.3727767\tbest: 0.3727767 (5100)\ttotal: 4m 20s\tremaining: 8m 24s\n",
2056 | "5200:\tlearn: 0.3218391\ttest: 0.3726883\tbest: 0.3726883 (5200)\ttotal: 4m 24s\tremaining: 8m 19s\n",
2057 | "5300:\tlearn: 0.3209391\ttest: 0.3725385\tbest: 0.3725376 (5299)\ttotal: 4m 29s\tremaining: 8m 13s\n",
2058 | "5400:\tlearn: 0.3199756\ttest: 0.3723152\tbest: 0.3723142 (5397)\ttotal: 4m 34s\tremaining: 8m 8s\n",
2059 | "5500:\tlearn: 0.3190305\ttest: 0.3721521\tbest: 0.3721521 (5500)\ttotal: 4m 39s\tremaining: 8m 2s\n",
2060 | "5600:\tlearn: 0.3180663\ttest: 0.3719317\tbest: 0.3719317 (5600)\ttotal: 4m 44s\tremaining: 7m 57s\n",
2061 | "5700:\tlearn: 0.3172743\ttest: 0.3718330\tbest: 0.3718330 (5700)\ttotal: 4m 49s\tremaining: 7m 51s\n",
2062 | "5800:\tlearn: 0.3163399\ttest: 0.3716938\tbest: 0.3716938 (5800)\ttotal: 4m 53s\tremaining: 7m 46s\n",
2063 | "5900:\tlearn: 0.3153997\ttest: 0.3714888\tbest: 0.3714886 (5899)\ttotal: 4m 58s\tremaining: 7m 40s\n",
2064 | "6000:\tlearn: 0.3145143\ttest: 0.3713565\tbest: 0.3713443 (5991)\ttotal: 5m 3s\tremaining: 7m 35s\n",
2065 | "6100:\tlearn: 0.3135688\ttest: 0.3711795\tbest: 0.3711768 (6099)\ttotal: 5m 8s\tremaining: 7m 29s\n",
2066 | "6200:\tlearn: 0.3127407\ttest: 0.3710886\tbest: 0.3710886 (6200)\ttotal: 5m 13s\tremaining: 7m 24s\n",
2067 | "6300:\tlearn: 0.3118820\ttest: 0.3709710\tbest: 0.3709710 (6300)\ttotal: 5m 18s\tremaining: 7m 19s\n",
2068 | "6400:\tlearn: 0.3110969\ttest: 0.3708528\tbest: 0.3708528 (6400)\ttotal: 5m 22s\tremaining: 7m 13s\n",
2069 | "6500:\tlearn: 0.3102707\ttest: 0.3707461\tbest: 0.3707448 (6498)\ttotal: 5m 27s\tremaining: 7m 8s\n",
2070 | "6600:\tlearn: 0.3094866\ttest: 0.3706898\tbest: 0.3706898 (6600)\ttotal: 5m 32s\tremaining: 7m 3s\n",
2071 | "6700:\tlearn: 0.3086424\ttest: 0.3706255\tbest: 0.3706218 (6690)\ttotal: 5m 37s\tremaining: 6m 57s\n",
2072 | "6800:\tlearn: 0.3078532\ttest: 0.3704983\tbest: 0.3704947 (6798)\ttotal: 5m 42s\tremaining: 6m 52s\n",
2073 | "6900:\tlearn: 0.3069886\ttest: 0.3703896\tbest: 0.3703828 (6892)\ttotal: 5m 47s\tremaining: 6m 47s\n",
2074 | "7000:\tlearn: 0.3062019\ttest: 0.3702855\tbest: 0.3702855 (7000)\ttotal: 5m 52s\tremaining: 6m 42s\n",
2075 | "7100:\tlearn: 0.3054384\ttest: 0.3702080\tbest: 0.3702076 (7098)\ttotal: 5m 56s\tremaining: 6m 37s\n",
2076 | "7200:\tlearn: 0.3046275\ttest: 0.3701393\tbest: 0.3701393 (7200)\ttotal: 6m 1s\tremaining: 6m 31s\n",
2077 | "7300:\tlearn: 0.3039194\ttest: 0.3700597\tbest: 0.3700597 (7300)\ttotal: 6m 6s\tremaining: 6m 26s\n",
2078 | "7400:\tlearn: 0.3030846\ttest: 0.3700079\tbest: 0.3699990 (7392)\ttotal: 6m 11s\tremaining: 6m 21s\n",
2079 | "7500:\tlearn: 0.3022570\ttest: 0.3698628\tbest: 0.3698628 (7500)\ttotal: 6m 16s\tremaining: 6m 16s\n",
2080 | "7600:\tlearn: 0.3014671\ttest: 0.3697234\tbest: 0.3697209 (7597)\ttotal: 6m 21s\tremaining: 6m 11s\n",
2081 | "7700:\tlearn: 0.3007301\ttest: 0.3696408\tbest: 0.3696349 (7695)\ttotal: 6m 26s\tremaining: 6m 6s\n",
2082 | "7800:\tlearn: 0.2999650\ttest: 0.3696314\tbest: 0.3696284 (7799)\ttotal: 6m 31s\tremaining: 6m 1s\n",
2083 | "7900:\tlearn: 0.2992056\ttest: 0.3696034\tbest: 0.3696034 (7900)\ttotal: 6m 36s\tremaining: 5m 56s\n",
2084 | "8000:\tlearn: 0.2983549\ttest: 0.3694847\tbest: 0.3694847 (8000)\ttotal: 6m 41s\tremaining: 5m 51s\n",
2085 | "8100:\tlearn: 0.2975575\ttest: 0.3693905\tbest: 0.3693832 (8094)\ttotal: 6m 46s\tremaining: 5m 46s\n",
2086 | "8200:\tlearn: 0.2968142\ttest: 0.3693207\tbest: 0.3693207 (8200)\ttotal: 6m 51s\tremaining: 5m 41s\n",
2087 | "8300:\tlearn: 0.2960877\ttest: 0.3692626\tbest: 0.3692597 (8294)\ttotal: 6m 56s\tremaining: 5m 36s\n",
2088 | "8400:\tlearn: 0.2953211\ttest: 0.3692038\tbest: 0.3691988 (8398)\ttotal: 7m 1s\tremaining: 5m 31s\n",
2089 | "8500:\tlearn: 0.2945980\ttest: 0.3691641\tbest: 0.3691545 (8476)\ttotal: 7m 7s\tremaining: 5m 26s\n",
2090 | "8600:\tlearn: 0.2938322\ttest: 0.3691234\tbest: 0.3691212 (8596)\ttotal: 7m 12s\tremaining: 5m 21s\n",
2091 | "8700:\tlearn: 0.2930071\ttest: 0.3690673\tbest: 0.3690580 (8658)\ttotal: 7m 17s\tremaining: 5m 16s\n",
2092 | "8800:\tlearn: 0.2922303\ttest: 0.3690234\tbest: 0.3690234 (8800)\ttotal: 7m 22s\tremaining: 5m 11s\n",
2093 | "8900:\tlearn: 0.2914938\ttest: 0.3689460\tbest: 0.3689460 (8900)\ttotal: 7m 27s\tremaining: 5m 6s\n",
2094 | "9000:\tlearn: 0.2907710\ttest: 0.3688827\tbest: 0.3688799 (8997)\ttotal: 7m 31s\tremaining: 5m 1s\n",
2095 | "9100:\tlearn: 0.2900229\ttest: 0.3688352\tbest: 0.3688352 (9100)\ttotal: 7m 36s\tremaining: 4m 56s\n",
2096 | "9200:\tlearn: 0.2892442\ttest: 0.3687413\tbest: 0.3687413 (9200)\ttotal: 7m 41s\tremaining: 4m 50s\n",
2097 | "9300:\tlearn: 0.2885062\ttest: 0.3687262\tbest: 0.3687207 (9269)\ttotal: 7m 46s\tremaining: 4m 45s\n",
2098 | "9400:\tlearn: 0.2877793\ttest: 0.3686913\tbest: 0.3686891 (9397)\ttotal: 7m 51s\tremaining: 4m 40s\n",
2099 | "9500:\tlearn: 0.2869538\ttest: 0.3685994\tbest: 0.3685987 (9498)\ttotal: 7m 56s\tremaining: 4m 35s\n",
2100 | "9600:\tlearn: 0.2861795\ttest: 0.3685772\tbest: 0.3685734 (9592)\ttotal: 8m 1s\tremaining: 4m 30s\n",
2101 | "9700:\tlearn: 0.2854078\ttest: 0.3685484\tbest: 0.3685396 (9696)\ttotal: 8m 6s\tremaining: 4m 25s\n",
2102 | "9800:\tlearn: 0.2846896\ttest: 0.3685243\tbest: 0.3685205 (9782)\ttotal: 8m 11s\tremaining: 4m 20s\n",
2103 | "9900:\tlearn: 0.2838710\ttest: 0.3684569\tbest: 0.3684540 (9895)\ttotal: 8m 16s\tremaining: 4m 15s\n",
2104 | "10000:\tlearn: 0.2831418\ttest: 0.3684233\tbest: 0.3684208 (9993)\ttotal: 8m 21s\tremaining: 4m 10s\n",
2105 | "10100:\tlearn: 0.2825218\ttest: 0.3684175\tbest: 0.3684037 (10082)\ttotal: 8m 26s\tremaining: 4m 5s\n",
2106 | "10200:\tlearn: 0.2817390\ttest: 0.3683958\tbest: 0.3683908 (10183)\ttotal: 8m 31s\tremaining: 4m\n",
2107 | "10300:\tlearn: 0.2809499\ttest: 0.3683933\tbest: 0.3683905 (10212)\ttotal: 8m 36s\tremaining: 3m 55s\n",
2108 | "10400:\tlearn: 0.2802800\ttest: 0.3683796\tbest: 0.3683796 (10400)\ttotal: 8m 41s\tremaining: 3m 50s\n",
2109 | "10500:\tlearn: 0.2795632\ttest: 0.3683903\tbest: 0.3683783 (10401)\ttotal: 8m 46s\tremaining: 3m 45s\n",
2110 | "10600:\tlearn: 0.2788819\ttest: 0.3683868\tbest: 0.3683642 (10527)\ttotal: 8m 51s\tremaining: 3m 40s\n",
2111 | "10700:\tlearn: 0.2781030\ttest: 0.3683809\tbest: 0.3683576 (10677)\ttotal: 8m 56s\tremaining: 3m 35s\n",
2112 | "10800:\tlearn: 0.2774174\ttest: 0.3683718\tbest: 0.3683576 (10677)\ttotal: 9m 1s\tremaining: 3m 30s\n",
2113 | "10900:\tlearn: 0.2767312\ttest: 0.3683335\tbest: 0.3683310 (10898)\ttotal: 9m 7s\tremaining: 3m 25s\n",
2114 | "11000:\tlearn: 0.2760596\ttest: 0.3682948\tbest: 0.3682884 (10996)\ttotal: 9m 12s\tremaining: 3m 20s\n",
2115 | "11100:\tlearn: 0.2753065\ttest: 0.3682629\tbest: 0.3682595 (11086)\ttotal: 9m 17s\tremaining: 3m 15s\n",
2116 | "11200:\tlearn: 0.2746245\ttest: 0.3682473\tbest: 0.3682406 (11171)\ttotal: 9m 22s\tremaining: 3m 10s\n",
2117 | "11300:\tlearn: 0.2739585\ttest: 0.3682471\tbest: 0.3682365 (11217)\ttotal: 9m 27s\tremaining: 3m 5s\n",
2118 | "11400:\tlearn: 0.2732829\ttest: 0.3682331\tbest: 0.3682325 (11386)\ttotal: 9m 33s\tremaining: 3m\n",
2119 | "11500:\tlearn: 0.2725566\ttest: 0.3682264\tbest: 0.3682215 (11496)\ttotal: 9m 38s\tremaining: 2m 55s\n",
2120 | "11600:\tlearn: 0.2719067\ttest: 0.3682170\tbest: 0.3682170 (11600)\ttotal: 9m 43s\tremaining: 2m 50s\n",
2121 | "11700:\tlearn: 0.2712049\ttest: 0.3682375\tbest: 0.3682127 (11627)\ttotal: 9m 48s\tremaining: 2m 45s\n",
2122 | "11800:\tlearn: 0.2704886\ttest: 0.3681876\tbest: 0.3681869 (11798)\ttotal: 9m 54s\tremaining: 2m 41s\n",
2123 | "11900:\tlearn: 0.2697195\ttest: 0.3681579\tbest: 0.3681394 (11858)\ttotal: 10m\tremaining: 2m 36s\n",
2124 | "12000:\tlearn: 0.2690434\ttest: 0.3681903\tbest: 0.3681394 (11858)\ttotal: 10m 7s\tremaining: 2m 31s\n",
2125 | "bestTest = 0.3681394479\n",
2126 | "bestIteration = 11858\n",
2127 | "Shrink model to first 11859 iterations.\n",
2128 | "Fold 5\n",
2129 | "0:\tlearn: 2.9424549\ttest: 2.9442838\tbest: 2.9442838 (0)\ttotal: 77.9ms\tremaining: 19m 28s\n",
2130 | "100:\tlearn: 0.9617585\ttest: 0.9881392\tbest: 0.9881392 (100)\ttotal: 7.21s\tremaining: 17m 43s\n"
2131 | ]
2132 | },
2133 | {
2134 | "name": "stdout",
2135 | "output_type": "stream",
2136 | "text": [
2137 | "200:\tlearn: 0.6761444\ttest: 0.7050895\tbest: 0.7050895 (200)\ttotal: 14.5s\tremaining: 17m 47s\n",
2138 | "300:\tlearn: 0.5715305\ttest: 0.6024932\tbest: 0.6024932 (300)\ttotal: 21.5s\tremaining: 17m 29s\n",
2139 | "400:\tlearn: 0.5181647\ttest: 0.5508480\tbest: 0.5508480 (400)\ttotal: 28.1s\tremaining: 17m 4s\n",
2140 | "500:\tlearn: 0.4840808\ttest: 0.5186106\tbest: 0.5186106 (500)\ttotal: 34.9s\tremaining: 16m 50s\n",
2141 | "600:\tlearn: 0.4617105\ttest: 0.4980052\tbest: 0.4980052 (600)\ttotal: 41.7s\tremaining: 16m 38s\n",
2142 | "700:\tlearn: 0.4452419\ttest: 0.4829969\tbest: 0.4829969 (700)\ttotal: 48.5s\tremaining: 16m 28s\n",
2143 | "800:\tlearn: 0.4315767\ttest: 0.4711263\tbest: 0.4711263 (800)\ttotal: 57s\tremaining: 16m 50s\n",
2144 | "900:\tlearn: 0.4202764\ttest: 0.4615710\tbest: 0.4615710 (900)\ttotal: 1m 3s\tremaining: 16m 36s\n",
2145 | "1000:\tlearn: 0.4112617\ttest: 0.4540898\tbest: 0.4540898 (1000)\ttotal: 1m 10s\tremaining: 16m 22s\n",
2146 | "1100:\tlearn: 0.4027244\ttest: 0.4473620\tbest: 0.4473620 (1100)\ttotal: 1m 17s\tremaining: 16m 12s\n",
2147 | "1200:\tlearn: 0.3954945\ttest: 0.4418959\tbest: 0.4418959 (1200)\ttotal: 1m 23s\tremaining: 16m 1s\n",
2148 | "1300:\tlearn: 0.3892795\ttest: 0.4374623\tbest: 0.4374623 (1300)\ttotal: 1m 30s\tremaining: 15m 51s\n",
2149 | "1400:\tlearn: 0.3835470\ttest: 0.4335617\tbest: 0.4335617 (1400)\ttotal: 1m 37s\tremaining: 15m 42s\n",
2150 | "1500:\tlearn: 0.3784162\ttest: 0.4302539\tbest: 0.4302539 (1500)\ttotal: 1m 43s\tremaining: 15m 33s\n",
2151 | "1600:\tlearn: 0.3741321\ttest: 0.4274136\tbest: 0.4274136 (1600)\ttotal: 1m 50s\tremaining: 15m 23s\n",
2152 | "1700:\tlearn: 0.3701685\ttest: 0.4250229\tbest: 0.4250229 (1700)\ttotal: 1m 56s\tremaining: 15m 14s\n",
2153 | "1800:\tlearn: 0.3667586\ttest: 0.4230024\tbest: 0.4230024 (1800)\ttotal: 2m 3s\tremaining: 15m 4s\n",
2154 | "1900:\tlearn: 0.3637877\ttest: 0.4213772\tbest: 0.4213772 (1900)\ttotal: 2m 9s\tremaining: 14m 54s\n",
2155 | "2000:\tlearn: 0.3608081\ttest: 0.4197317\tbest: 0.4197317 (2000)\ttotal: 2m 16s\tremaining: 14m 47s\n",
2156 | "2100:\tlearn: 0.3581836\ttest: 0.4184556\tbest: 0.4184556 (2100)\ttotal: 2m 24s\tremaining: 14m 45s\n",
2157 | "2200:\tlearn: 0.3556847\ttest: 0.4172725\tbest: 0.4172725 (2200)\ttotal: 2m 30s\tremaining: 14m 35s\n",
2158 | "2300:\tlearn: 0.3534802\ttest: 0.4162296\tbest: 0.4162296 (2300)\ttotal: 2m 36s\tremaining: 14m 26s\n",
2159 | "2400:\tlearn: 0.3513135\ttest: 0.4152141\tbest: 0.4152141 (2400)\ttotal: 2m 43s\tremaining: 14m 16s\n",
2160 | "2500:\tlearn: 0.3494048\ttest: 0.4144232\tbest: 0.4144232 (2500)\ttotal: 2m 49s\tremaining: 14m 7s\n",
2161 | "2600:\tlearn: 0.3474297\ttest: 0.4134853\tbest: 0.4134853 (2600)\ttotal: 2m 55s\tremaining: 13m 58s\n",
2162 | "2700:\tlearn: 0.3455128\ttest: 0.4126680\tbest: 0.4126680 (2700)\ttotal: 3m 2s\tremaining: 13m 50s\n",
2163 | "2800:\tlearn: 0.3438843\ttest: 0.4120378\tbest: 0.4120378 (2800)\ttotal: 3m 8s\tremaining: 13m 41s\n",
2164 | "2900:\tlearn: 0.3423575\ttest: 0.4114719\tbest: 0.4114719 (2900)\ttotal: 3m 14s\tremaining: 13m 32s\n",
2165 | "3000:\tlearn: 0.3407821\ttest: 0.4109110\tbest: 0.4109110 (3000)\ttotal: 3m 21s\tremaining: 13m 24s\n",
2166 | "3100:\tlearn: 0.3391884\ttest: 0.4103074\tbest: 0.4103074 (3100)\ttotal: 3m 27s\tremaining: 13m 16s\n",
2167 | "3200:\tlearn: 0.3376242\ttest: 0.4097487\tbest: 0.4097487 (3200)\ttotal: 3m 33s\tremaining: 13m 8s\n",
2168 | "3300:\tlearn: 0.3362089\ttest: 0.4093418\tbest: 0.4093418 (3300)\ttotal: 3m 40s\tremaining: 13m\n",
2169 | "3400:\tlearn: 0.3347776\ttest: 0.4088332\tbest: 0.4088332 (3400)\ttotal: 3m 46s\tremaining: 12m 52s\n",
2170 | "3500:\tlearn: 0.3334630\ttest: 0.4084179\tbest: 0.4084131 (3499)\ttotal: 3m 52s\tremaining: 12m 45s\n",
2171 | "3600:\tlearn: 0.3321925\ttest: 0.4079383\tbest: 0.4079383 (3600)\ttotal: 4m 1s\tremaining: 12m 45s\n",
2172 | "3700:\tlearn: 0.3309955\ttest: 0.4075805\tbest: 0.4075805 (3700)\ttotal: 4m 7s\tremaining: 12m 36s\n",
2173 | "3800:\tlearn: 0.3298875\ttest: 0.4072638\tbest: 0.4072638 (3800)\ttotal: 4m 14s\tremaining: 12m 28s\n",
2174 | "3900:\tlearn: 0.3287322\ttest: 0.4069309\tbest: 0.4069309 (3900)\ttotal: 4m 20s\tremaining: 12m 20s\n",
2175 | "4000:\tlearn: 0.3275012\ttest: 0.4065555\tbest: 0.4065555 (4000)\ttotal: 4m 26s\tremaining: 12m 13s\n",
2176 | "4100:\tlearn: 0.3264662\ttest: 0.4062378\tbest: 0.4062378 (4100)\ttotal: 4m 34s\tremaining: 12m 9s\n",
2177 | "4200:\tlearn: 0.3253504\ttest: 0.4059299\tbest: 0.4059266 (4196)\ttotal: 4m 40s\tremaining: 12m 1s\n",
2178 | "4300:\tlearn: 0.3242990\ttest: 0.4056054\tbest: 0.4056049 (4299)\ttotal: 4m 46s\tremaining: 11m 53s\n",
2179 | "4400:\tlearn: 0.3232095\ttest: 0.4053209\tbest: 0.4053209 (4400)\ttotal: 4m 53s\tremaining: 11m 46s\n",
2180 | "4500:\tlearn: 0.3222201\ttest: 0.4050968\tbest: 0.4050943 (4499)\ttotal: 4m 59s\tremaining: 11m 38s\n",
2181 | "4600:\tlearn: 0.3212505\ttest: 0.4048767\tbest: 0.4048767 (4600)\ttotal: 5m 5s\tremaining: 11m 31s\n",
2182 | "4700:\tlearn: 0.3203304\ttest: 0.4046258\tbest: 0.4046240 (4697)\ttotal: 5m 11s\tremaining: 11m 23s\n",
2183 | "4800:\tlearn: 0.3192728\ttest: 0.4043751\tbest: 0.4043751 (4800)\ttotal: 5m 18s\tremaining: 11m 16s\n",
2184 | "4900:\tlearn: 0.3183672\ttest: 0.4041917\tbest: 0.4041917 (4900)\ttotal: 5m 24s\tremaining: 11m 8s\n",
2185 | "5000:\tlearn: 0.3174759\ttest: 0.4040201\tbest: 0.4040149 (4994)\ttotal: 5m 31s\tremaining: 11m 3s\n",
2186 | "5100:\tlearn: 0.3165674\ttest: 0.4038492\tbest: 0.4038492 (5100)\ttotal: 5m 38s\tremaining: 10m 56s\n",
2187 | "5200:\tlearn: 0.3156905\ttest: 0.4036690\tbest: 0.4036625 (5193)\ttotal: 5m 44s\tremaining: 10m 48s\n",
2188 | "5300:\tlearn: 0.3147239\ttest: 0.4034269\tbest: 0.4034269 (5300)\ttotal: 5m 50s\tremaining: 10m 41s\n",
2189 | "5400:\tlearn: 0.3137961\ttest: 0.4032637\tbest: 0.4032550 (5392)\ttotal: 5m 56s\tremaining: 10m 34s\n",
2190 | "5500:\tlearn: 0.3128559\ttest: 0.4030648\tbest: 0.4030648 (5500)\ttotal: 6m 4s\tremaining: 10m 29s\n",
2191 | "5600:\tlearn: 0.3119545\ttest: 0.4028348\tbest: 0.4028348 (5600)\ttotal: 6m 10s\tremaining: 10m 22s\n",
2192 | "5700:\tlearn: 0.3111319\ttest: 0.4026802\tbest: 0.4026802 (5700)\ttotal: 6m 17s\tremaining: 10m 15s\n",
2193 | "5800:\tlearn: 0.3101547\ttest: 0.4024679\tbest: 0.4024647 (5798)\ttotal: 6m 23s\tremaining: 10m 8s\n",
2194 | "5900:\tlearn: 0.3092607\ttest: 0.4023143\tbest: 0.4023143 (5900)\ttotal: 6m 30s\tremaining: 10m 1s\n",
2195 | "6000:\tlearn: 0.3083620\ttest: 0.4021604\tbest: 0.4021546 (5998)\ttotal: 6m 36s\tremaining: 9m 54s\n",
2196 | "6100:\tlearn: 0.3075716\ttest: 0.4019895\tbest: 0.4019892 (6097)\ttotal: 6m 43s\tremaining: 9m 47s\n",
2197 | "6200:\tlearn: 0.3066833\ttest: 0.4018588\tbest: 0.4018588 (6200)\ttotal: 6m 49s\tremaining: 9m 41s\n",
2198 | "6300:\tlearn: 0.3058037\ttest: 0.4017364\tbest: 0.4017361 (6299)\ttotal: 6m 57s\tremaining: 9m 36s\n",
2199 | "6400:\tlearn: 0.3050026\ttest: 0.4016023\tbest: 0.4016011 (6399)\ttotal: 7m 3s\tremaining: 9m 29s\n",
2200 | "6500:\tlearn: 0.3041733\ttest: 0.4014706\tbest: 0.4014706 (6500)\ttotal: 7m 10s\tremaining: 9m 22s\n",
2201 | "6600:\tlearn: 0.3033654\ttest: 0.4013718\tbest: 0.4013692 (6599)\ttotal: 7m 16s\tremaining: 9m 15s\n",
2202 | "6700:\tlearn: 0.3025699\ttest: 0.4013270\tbest: 0.4013242 (6693)\ttotal: 7m 22s\tremaining: 9m 8s\n",
2203 | "6800:\tlearn: 0.3017861\ttest: 0.4012242\tbest: 0.4012234 (6798)\ttotal: 7m 29s\tremaining: 9m 1s\n",
2204 | "6900:\tlearn: 0.3009621\ttest: 0.4010854\tbest: 0.4010819 (6895)\ttotal: 7m 37s\tremaining: 8m 56s\n",
2205 | "7000:\tlearn: 0.3001619\ttest: 0.4010114\tbest: 0.4010114 (7000)\ttotal: 7m 44s\tremaining: 8m 50s\n",
2206 | "7100:\tlearn: 0.2993572\ttest: 0.4009370\tbest: 0.4009341 (7097)\ttotal: 7m 50s\tremaining: 8m 43s\n",
2207 | "7200:\tlearn: 0.2985159\ttest: 0.4008075\tbest: 0.4008075 (7200)\ttotal: 7m 57s\tremaining: 8m 37s\n",
2208 | "7300:\tlearn: 0.2977427\ttest: 0.4007471\tbest: 0.4007430 (7288)\ttotal: 8m 4s\tremaining: 8m 30s\n",
2209 | "7400:\tlearn: 0.2969435\ttest: 0.4006814\tbest: 0.4006808 (7399)\ttotal: 8m 10s\tremaining: 8m 23s\n",
2210 | "7500:\tlearn: 0.2961321\ttest: 0.4006056\tbest: 0.4005972 (7489)\ttotal: 8m 17s\tremaining: 8m 17s\n",
2211 | "7600:\tlearn: 0.2953712\ttest: 0.4005281\tbest: 0.4005214 (7596)\ttotal: 8m 25s\tremaining: 8m 12s\n",
2212 | "7700:\tlearn: 0.2945888\ttest: 0.4004464\tbest: 0.4004449 (7699)\ttotal: 8m 30s\tremaining: 8m 4s\n",
2213 | "7800:\tlearn: 0.2938290\ttest: 0.4003592\tbest: 0.4003577 (7793)\ttotal: 8m 35s\tremaining: 7m 55s\n",
2214 | "7900:\tlearn: 0.2930511\ttest: 0.4002797\tbest: 0.4002797 (7900)\ttotal: 8m 40s\tremaining: 7m 47s\n",
2215 | "8000:\tlearn: 0.2922237\ttest: 0.4001815\tbest: 0.4001756 (7996)\ttotal: 8m 45s\tremaining: 7m 39s\n",
2216 | "8100:\tlearn: 0.2915275\ttest: 0.4001321\tbest: 0.4001321 (8100)\ttotal: 8m 50s\tremaining: 7m 31s\n",
2217 | "8200:\tlearn: 0.2907568\ttest: 0.4000595\tbest: 0.4000480 (8187)\ttotal: 8m 55s\tremaining: 7m 23s\n",
2218 | "8300:\tlearn: 0.2900331\ttest: 0.3999890\tbest: 0.3999875 (8299)\ttotal: 9m\tremaining: 7m 16s\n",
2219 | "8400:\tlearn: 0.2892897\ttest: 0.3999585\tbest: 0.3999585 (8400)\ttotal: 9m 5s\tremaining: 7m 8s\n",
2220 | "8500:\tlearn: 0.2885418\ttest: 0.3998574\tbest: 0.3998548 (8497)\ttotal: 9m 10s\tremaining: 7m\n",
2221 | "8600:\tlearn: 0.2878089\ttest: 0.3998342\tbest: 0.3998304 (8597)\ttotal: 9m 15s\tremaining: 6m 52s\n",
2222 | "8700:\tlearn: 0.2870797\ttest: 0.3998222\tbest: 0.3998087 (8641)\ttotal: 9m 19s\tremaining: 6m 45s\n",
2223 | "8800:\tlearn: 0.2863412\ttest: 0.3997846\tbest: 0.3997766 (8794)\ttotal: 9m 24s\tremaining: 6m 37s\n",
2224 | "8900:\tlearn: 0.2856493\ttest: 0.3997696\tbest: 0.3997518 (8854)\ttotal: 9m 29s\tremaining: 6m 30s\n"
2225 | ]
2226 | },
2227 | {
2228 | "name": "stdout",
2229 | "output_type": "stream",
2230 | "text": [
2231 | "9000:\tlearn: 0.2848238\ttest: 0.3997300\tbest: 0.3997300 (8999)\ttotal: 9m 34s\tremaining: 6m 22s\n",
2232 | "9100:\tlearn: 0.2841335\ttest: 0.3997097\tbest: 0.3997093 (9098)\ttotal: 9m 39s\tremaining: 6m 15s\n",
2233 | "9200:\tlearn: 0.2834343\ttest: 0.3997057\tbest: 0.3996978 (9150)\ttotal: 9m 44s\tremaining: 6m 8s\n",
2234 | "9300:\tlearn: 0.2826697\ttest: 0.3996663\tbest: 0.3996599 (9260)\ttotal: 9m 49s\tremaining: 6m 1s\n",
2235 | "9400:\tlearn: 0.2818948\ttest: 0.3996026\tbest: 0.3996021 (9399)\ttotal: 9m 54s\tremaining: 5m 53s\n",
2236 | "9500:\tlearn: 0.2811680\ttest: 0.3995901\tbest: 0.3995835 (9493)\ttotal: 9m 59s\tremaining: 5m 46s\n",
2237 | "9600:\tlearn: 0.2804824\ttest: 0.3995799\tbest: 0.3995784 (9599)\ttotal: 10m 3s\tremaining: 5m 39s\n",
2238 | "9700:\tlearn: 0.2797218\ttest: 0.3995514\tbest: 0.3995498 (9697)\ttotal: 10m 8s\tremaining: 5m 32s\n",
2239 | "9800:\tlearn: 0.2790184\ttest: 0.3995658\tbest: 0.3995486 (9725)\ttotal: 10m 13s\tremaining: 5m 25s\n",
2240 | "9900:\tlearn: 0.2783224\ttest: 0.3995481\tbest: 0.3995409 (9897)\ttotal: 10m 18s\tremaining: 5m 18s\n",
2241 | "10000:\tlearn: 0.2776369\ttest: 0.3995318\tbest: 0.3995216 (9981)\ttotal: 10m 23s\tremaining: 5m 11s\n",
2242 | "10100:\tlearn: 0.2769338\ttest: 0.3994822\tbest: 0.3994822 (10100)\ttotal: 10m 28s\tremaining: 5m 4s\n",
2243 | "10200:\tlearn: 0.2762047\ttest: 0.3994717\tbest: 0.3994592 (10138)\ttotal: 10m 33s\tremaining: 4m 58s\n",
2244 | "10300:\tlearn: 0.2754445\ttest: 0.3994536\tbest: 0.3994536 (10300)\ttotal: 10m 38s\tremaining: 4m 51s\n",
2245 | "10400:\tlearn: 0.2747585\ttest: 0.3994544\tbest: 0.3994426 (10388)\ttotal: 10m 43s\tremaining: 4m 44s\n",
2246 | "10500:\tlearn: 0.2740382\ttest: 0.3994316\tbest: 0.3994234 (10488)\ttotal: 10m 48s\tremaining: 4m 37s\n",
2247 | "10600:\tlearn: 0.2733845\ttest: 0.3994220\tbest: 0.3994157 (10593)\ttotal: 10m 53s\tremaining: 4m 31s\n",
2248 | "10700:\tlearn: 0.2726328\ttest: 0.3994026\tbest: 0.3994018 (10697)\ttotal: 10m 58s\tremaining: 4m 24s\n",
2249 | "10800:\tlearn: 0.2719250\ttest: 0.3994230\tbest: 0.3993875 (10720)\ttotal: 11m 3s\tremaining: 4m 17s\n",
2250 | "10900:\tlearn: 0.2712554\ttest: 0.3993818\tbest: 0.3993764 (10893)\ttotal: 11m 8s\tremaining: 4m 11s\n",
2251 | "11000:\tlearn: 0.2705212\ttest: 0.3993897\tbest: 0.3993764 (10893)\ttotal: 11m 13s\tremaining: 4m 4s\n",
2252 | "bestTest = 0.3993763864\n",
2253 | "bestIteration = 10893\n",
2254 | "Shrink model to first 10894 iterations.\n",
2255 | "MEAN SCORE = 0.37665441095147056\n"
2256 | ]
2257 | }
2258 | ],
2259 | "source": [
2260 | "model_cat = CatBoostClassifier(\n",
2261 | " **{\n",
2262 | " 'depth': 5,\n",
2263 | " 'n_estimators': 15000,\n",
2264 | " 'learning_rate': 0.01,\n",
2265 | " 'random_state': 567,\n",
2266 | " 'task_type': 'GPU',\n",
2267 | " 'thread_count': 1,\n",
2268 | " \"verbose\": 100,\n",
2269 | " \"use_best_model\": True,\n",
2270 | " 'nan_mode': 'Max',\n",
2271 | " })\n",
2272 | "\n",
2273 | "probs = []\n",
2274 | "probs_train = []\n",
2275 | "i = 1\n",
2276 | "scoring = 0\n",
2277 | "group_kfold = GroupKFold(n_splits=5)\n",
2278 | "cols = X_train.drop(columns=['ID', 'ID2', 'join_date']).columns\n",
2279 | "for train_index, test_index in group_kfold.split(X_train, y_train,\n",
2280 | " np.array(X_train['ID'])):\n",
2281 | " X_real_train, X_valid = X_train.iloc[train_index], X_train.iloc[test_index]\n",
2282 | " y_real_train, y_valid = y_train.iloc[train_index], y_train.iloc[test_index]\n",
2283 | " print('Fold', i)\n",
2284 | " model_cat.fit(\n",
2285 | " X_real_train[cols],\n",
2286 | " y_real_train,\n",
2287 | " cat_features=cat_features,\n",
2288 | " eval_set=[(X_valid[cols], y_valid)],\n",
2289 | " early_stopping_rounds = 200,\n",
2290 | " )\n",
2291 | " scoring += model_cat.get_best_score()['validation']['MultiClass']\n",
2292 | "\n",
2293 | " proba = model_cat.predict_proba(X_test[cols])\n",
2294 | " probs.append(proba)\n",
2295 | " probs_train.append(model_cat.predict_proba(X_train[cols]))\n",
2296 | " i += 1\n",
2297 | "scoring /= 5\n",
2298 | "print('MEAN SCORE =', scoring)"
2299 | ]
2300 | },
2301 | {
2302 | "cell_type": "code",
2303 | "execution_count": 52,
2304 | "metadata": {},
2305 | "outputs": [
2306 | {
2307 | "data": {
2308 | "text/html": [
2309 | "\n",
2310 | "\n",
2323 | "
\n",
2324 | " \n",
2325 | " \n",
2326 | " | \n",
2327 | " importance | \n",
2328 | "
\n",
2329 | " \n",
2330 | " \n",
2331 | " \n",
2332 | " | RVSZ | \n",
2333 | " 3.425022 | \n",
2334 | "
\n",
2335 | " \n",
2336 | " | sex | \n",
2337 | " 1.030568 | \n",
2338 | "
\n",
2339 | " \n",
2340 | " | marital_status | \n",
2341 | " 6.894693 | \n",
2342 | "
\n",
2343 | " \n",
2344 | " | occupation_code | \n",
2345 | " 2.663958 | \n",
2346 | "
\n",
2347 | " \n",
2348 | " | birth_year | \n",
2349 | " 2.524650 | \n",
2350 | "
\n",
2351 | " \n",
2352 | " | from_begin | \n",
2353 | " 1.836085 | \n",
2354 | "
\n",
2355 | " \n",
2356 | " | day_of_year | \n",
2357 | " 1.021888 | \n",
2358 | "
\n",
2359 | " \n",
2360 | " | age | \n",
2361 | " 2.946634 | \n",
2362 | "
\n",
2363 | " \n",
2364 | " | mean_age_in_occupation | \n",
2365 | " 1.728786 | \n",
2366 | "
\n",
2367 | " \n",
2368 | " | std_age_in_occupation | \n",
2369 | " 2.435206 | \n",
2370 | "
\n",
2371 | " \n",
2372 | " | occupation_code_freq | \n",
2373 | " 1.001891 | \n",
2374 | "
\n",
2375 | " \n",
2376 | " | branch_ocupation_freq | \n",
2377 | " 1.370450 | \n",
2378 | "
\n",
2379 | " \n",
2380 | " | branch_ocupcode_freq | \n",
2381 | " 1.119399 | \n",
2382 | "
\n",
2383 | " \n",
2384 | " | product_comb | \n",
2385 | " 3.557339 | \n",
2386 | "
\n",
2387 | " \n",
2388 | " | RVSZ_JZ9D | \n",
2389 | " 2.484825 | \n",
2390 | "
\n",
2391 | " \n",
2392 | " | RVSZ_ECY3 | \n",
2393 | " 1.351959 | \n",
2394 | "
\n",
2395 | " \n",
2396 | " | from_arise_product_AHXO_in_branch | \n",
2397 | " 1.080058 | \n",
2398 | "
\n",
2399 | " \n",
2400 | " | RVSZ__age_mean | \n",
2401 | " 3.978749 | \n",
2402 | "
\n",
2403 | " \n",
2404 | " | RVSZ__age_std | \n",
2405 | " 2.444638 | \n",
2406 | "
\n",
2407 | " \n",
2408 | " | dayofweek_cat | \n",
2409 | " 1.230625 | \n",
2410 | "
\n",
2411 | " \n",
2412 | " | from_begin_cat | \n",
2413 | " 2.898488 | \n",
2414 | "
\n",
2415 | " \n",
2416 | "
\n",
2417 | "
"
2418 | ],
2419 | "text/plain": [
2420 | " importance\n",
2421 | "RVSZ 3.425022\n",
2422 | "sex 1.030568\n",
2423 | "marital_status 6.894693\n",
2424 | "occupation_code 2.663958\n",
2425 | "birth_year 2.524650\n",
2426 | "from_begin 1.836085\n",
2427 | "day_of_year 1.021888\n",
2428 | "age 2.946634\n",
2429 | "mean_age_in_occupation 1.728786\n",
2430 | "std_age_in_occupation 2.435206\n",
2431 | "occupation_code_freq 1.001891\n",
2432 | "branch_ocupation_freq 1.370450\n",
2433 | "branch_ocupcode_freq 1.119399\n",
2434 | "product_comb 3.557339\n",
2435 | "RVSZ_JZ9D 2.484825\n",
2436 | "RVSZ_ECY3 1.351959\n",
2437 | "from_arise_product_AHXO_in_branch 1.080058\n",
2438 | "RVSZ__age_mean 3.978749\n",
2439 | "RVSZ__age_std 2.444638\n",
2440 | "dayofweek_cat 1.230625\n",
2441 | "from_begin_cat 2.898488"
2442 | ]
2443 | },
2444 | "execution_count": 52,
2445 | "metadata": {},
2446 | "output_type": "execute_result"
2447 | }
2448 | ],
2449 | "source": [
2450 | "pd.DataFrame(model_cat.feature_importances_,\n",
2451 | " index=cols,\n",
2452 | " columns=['importance']).query('importance>1')"
2453 | ]
2454 | },
2455 | {
2456 | "cell_type": "code",
2457 | "execution_count": 53,
2458 | "metadata": {
2459 | "code_folding": []
2460 | },
2461 | "outputs": [],
2462 | "source": [
2463 | "#Meaning predict values\n",
2464 | "#\n",
2465 | "new_a = np.ones((10000,21)) * 0.0\n",
2466 | "for r in probs:\n",
2467 | " new_a += r\n",
2468 | "new_a /= 5"
2469 | ]
2470 | },
2471 | {
2472 | "cell_type": "code",
2473 | "execution_count": 54,
2474 | "metadata": {},
2475 | "outputs": [],
2476 | "source": [
2477 | "y_test = pd.DataFrame(new_a)\n",
2478 | "y_test.columns = le.inverse_transform(y_test.columns)"
2479 | ]
2480 | },
2481 | {
2482 | "cell_type": "code",
2483 | "execution_count": 55,
2484 | "metadata": {
2485 | "code_folding": []
2486 | },
2487 | "outputs": [
2488 | {
2489 | "data": {
2490 | "application/vnd.jupyter.widget-view+json": {
2491 | "model_id": "8dc756e81aba45dda072f754c119c765",
2492 | "version_major": 2,
2493 | "version_minor": 0
2494 | },
2495 | "text/plain": [
2496 | "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
2497 | ]
2498 | },
2499 | "metadata": {},
2500 | "output_type": "display_data"
2501 | },
2502 | {
2503 | "name": "stdout",
2504 | "output_type": "stream",
2505 | "text": [
2506 | "\n"
2507 | ]
2508 | }
2509 | ],
2510 | "source": [
2511 | "for i,row in tqdm_notebook(X_test.iterrows()):\n",
2512 | " summ = 0\n",
2513 | " \n",
2514 | " for c in names_products:\n",
2515 | " if row[c] == 1:\n",
2516 | " y_test.loc[y_test.index == i, c] = 1.0\n",
2517 | " else:\n",
2518 | " summ += y_test.loc[y_test.index == i, c].values[0]\n",
2519 | " for c in names_products:\n",
2520 | " if row[c] != 1.0:\n",
2521 | " y_test.loc[y_test.index == i, c] /= summ"
2522 | ]
2523 | },
2524 | {
2525 | "cell_type": "code",
2526 | "execution_count": 56,
2527 | "metadata": {},
2528 | "outputs": [
2529 | {
2530 | "data": {
2531 | "text/html": [
2532 | "\n",
2533 | "\n",
2546 | "
\n",
2547 | " \n",
2548 | " \n",
2549 | " | \n",
2550 | " ID X PCODE | \n",
2551 | " Label | \n",
2552 | "
\n",
2553 | " \n",
2554 | " \n",
2555 | " \n",
2556 | " | 0 | \n",
2557 | " F86J5PC X 66FJ | \n",
2558 | " 0.000079 | \n",
2559 | "
\n",
2560 | " \n",
2561 | " | 1 | \n",
2562 | " F86J5PC X 7POT | \n",
2563 | " 0.000080 | \n",
2564 | "
\n",
2565 | " \n",
2566 | " | 2 | \n",
2567 | " F86J5PC X 8NN1 | \n",
2568 | " 0.000002 | \n",
2569 | "
\n",
2570 | " \n",
2571 | " | 3 | \n",
2572 | " F86J5PC X AHXO | \n",
2573 | " 0.000017 | \n",
2574 | "
\n",
2575 | " \n",
2576 | " | 4 | \n",
2577 | " F86J5PC X BSTQ | \n",
2578 | " 0.000023 | \n",
2579 | "
\n",
2580 | " \n",
2581 | "
\n",
2582 | "
"
2583 | ],
2584 | "text/plain": [
2585 | " ID X PCODE Label\n",
2586 | "0 F86J5PC X 66FJ 0.000079\n",
2587 | "1 F86J5PC X 7POT 0.000080\n",
2588 | "2 F86J5PC X 8NN1 0.000002\n",
2589 | "3 F86J5PC X AHXO 0.000017\n",
2590 | "4 F86J5PC X BSTQ 0.000023"
2591 | ]
2592 | },
2593 | "execution_count": 56,
2594 | "metadata": {},
2595 | "output_type": "execute_result"
2596 | }
2597 | ],
2598 | "source": [
2599 | "#Reformat predict values to necessary view and replace that are given\n",
2600 | "#\n",
2601 | "answer_mass = []\n",
2602 | "for i in range(X_test.shape[0]):\n",
2603 | " id = X_test['ID'].iloc[i]\n",
2604 | " for c in y_test.columns:\n",
2605 | " answer_mass.append([id + ' X ' + c, y_test[c].iloc[i]])\n",
2606 | "\n",
2607 | "df_answer = pd.DataFrame(answer_mass)\n",
2608 | "df_answer.columns = ['ID X PCODE', 'Label']\n",
2609 | "df_answer.head()"
2610 | ]
2611 | },
2612 | {
2613 | "cell_type": "code",
2614 | "execution_count": 72,
2615 | "metadata": {},
2616 | "outputs": [
2617 | {
2618 | "data": {
2619 | "text/html": [
2620 | "\n",
2621 | "\n",
2634 | "
\n",
2635 | " \n",
2636 | " \n",
2637 | " | \n",
2638 | " ID X PCODE | \n",
2639 | " Label | \n",
2640 | "
\n",
2641 | " \n",
2642 | " \n",
2643 | " \n",
2644 | " | 0 | \n",
2645 | " 0021EE1 X P5DA | \n",
2646 | " 0.000005 | \n",
2647 | "
\n",
2648 | " \n",
2649 | " | 1 | \n",
2650 | " 0029J1L X P5DA | \n",
2651 | " 0.000114 | \n",
2652 | "
\n",
2653 | " \n",
2654 | " | 2 | \n",
2655 | " 004QK71 X P5DA | \n",
2656 | " 0.000091 | \n",
2657 | "
\n",
2658 | " \n",
2659 | " | 3 | \n",
2660 | " 005AP9V X P5DA | \n",
2661 | " 0.000021 | \n",
2662 | "
\n",
2663 | " \n",
2664 | " | 4 | \n",
2665 | " 0096G27 X P5DA | \n",
2666 | " 0.000015 | \n",
2667 | "
\n",
2668 | " \n",
2669 | "
\n",
2670 | "
"
2671 | ],
2672 | "text/plain": [
2673 | " ID X PCODE Label\n",
2674 | "0 0021EE1 X P5DA 0.000005\n",
2675 | "1 0029J1L X P5DA 0.000114\n",
2676 | "2 004QK71 X P5DA 0.000091\n",
2677 | "3 005AP9V X P5DA 0.000021\n",
2678 | "4 0096G27 X P5DA 0.000015"
2679 | ]
2680 | },
2681 | "execution_count": 72,
2682 | "metadata": {},
2683 | "output_type": "execute_result"
2684 | }
2685 | ],
2686 | "source": [
2687 | "#Blending models\n",
2688 | "#\n",
2689 | "df_new = pd.read_csv('submiss.csv')\n",
2690 | "df = df_new.merge(\n",
2691 | " df_answer.rename(columns={'Label': 'lbl'}))\n",
2692 | "df['Label'] = df['Label']*0.8 + df['lbl']*0.2\n",
2693 | "df.drop(['lbl'], axis=1, inplace=True)\n",
2694 | "df_new = pd.read_csv('submis_1.csv')\n",
2695 | "df = df.merge(\n",
2696 | " df_new.rename(columns={'Label': 'lbl'}))\n",
2697 | "df['Label'] = df['Label']*0.6 + df['lbl']*0.4\n",
2698 | "df.drop(['lbl'], axis=1, inplace=True)\n",
2699 | "df.head()"
2700 | ]
2701 | },
2702 | {
2703 | "cell_type": "code",
2704 | "execution_count": 73,
2705 | "metadata": {},
2706 | "outputs": [],
2707 | "source": [
2708 | "#Back to the first view\n",
2709 | "#\n",
2710 | "df_replaced = copy.copy(df)\n",
2711 | "df_replaced['ID'] = list(map(lambda x: x.split(' X ')[0], df_replaced['ID X PCODE']))\n",
2712 | "df_replaced['PCODE'] = list(map(lambda x: x.split(' X ')[1], df_replaced['ID X PCODE']))\n",
2713 | "df_replaced = df_replaced.set_index(['ID','PCODE'])['Label'].unstack().reset_index()"
2714 | ]
2715 | },
2716 | {
2717 | "cell_type": "code",
2718 | "execution_count": 74,
2719 | "metadata": {},
2720 | "outputs": [],
2721 | "source": [
2722 | "#Extra read data\n",
2723 | "#\n",
2724 | "train_base = pd.read_csv('Train.csv')\n",
2725 | "test_base = pd.read_csv('Test.csv')"
2726 | ]
2727 | },
2728 | {
2729 | "cell_type": "code",
2730 | "execution_count": 76,
2731 | "metadata": {},
2732 | "outputs": [],
2733 | "source": [
2734 | "#Making dict with known values from statistics\n",
2735 | "#\n",
2736 | "dict_post = {}\n",
2737 | "for pr1 in names_products:\n",
2738 | " for pr2 in names_products:\n",
2739 | " if pr1 == pr2:\n",
2740 | " continue\n",
2741 | " stats = train_base[train_base[pr1] == 1][pr2].value_counts()\n",
2742 | "\n",
2743 | " stats0 = stats.get(0, 0)\n",
2744 | " stats1 = stats.get(1, 0)\n",
2745 | "\n",
2746 | " threshold = 0.999\n",
2747 | "\n",
2748 | " if stats0 + stats1 > 100 and \\\n",
2749 | " (stats0 / (stats0 + stats1) > threshold or stats1 / (stats1 + stats0) > threshold ):\n",
2750 | " if (stats1 == 0 or stats0 > stats1):\n",
2751 | " dict_post[(pr1, pr2)] = (1,0)\n",
2752 | " elif stats0 == 0 or stats1 > stats0:\n",
2753 | " dict_post[(pr1, pr2)] = (1,1)"
2754 | ]
2755 | },
2756 | {
2757 | "cell_type": "code",
2758 | "execution_count": 77,
2759 | "metadata": {},
2760 | "outputs": [
2761 | {
2762 | "data": {
2763 | "application/vnd.jupyter.widget-view+json": {
2764 | "model_id": "a673810336d3439dbdcb8d61dd1b0039",
2765 | "version_major": 2,
2766 | "version_minor": 0
2767 | },
2768 | "text/plain": [
2769 | "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
2770 | ]
2771 | },
2772 | "metadata": {},
2773 | "output_type": "display_data"
2774 | },
2775 | {
2776 | "name": "stdout",
2777 | "output_type": "stream",
2778 | "text": [
2779 | "\n"
2780 | ]
2781 | }
2782 | ],
2783 | "source": [
2784 | "#Replacing known values from our predictions\n",
2785 | "#\n",
2786 | "for i, row in tqdm_notebook(df_replaced.iterrows()):\n",
2787 | " for key in dict_post.keys():\n",
2788 | " if row[key[0]] == 1:\n",
2789 | " if row[key[1]] != dict_post[key][1]:\n",
2790 | " if dict_post[key][1] == 1:\n",
2791 | " for c in names_products:\n",
2792 | " if c != key[1] and row[c] != 1:\n",
2793 | " df_replaced.loc[df_replaced.index == i, c] = 1e-53\n",
2794 | " elif c==key[1]:\n",
2795 | " df_replaced.loc[df_replaced.index == i, key[1]] = 0.9999999999999999999999999999999999"
2796 | ]
2797 | },
2798 | {
2799 | "cell_type": "code",
2800 | "execution_count": 78,
2801 | "metadata": {},
2802 | "outputs": [
2803 | {
2804 | "data": {
2805 | "text/html": [
2806 | "\n",
2807 | "\n",
2820 | "
\n",
2821 | " \n",
2822 | " \n",
2823 | " | \n",
2824 | " ID X PCODE | \n",
2825 | " Label | \n",
2826 | "
\n",
2827 | " \n",
2828 | " \n",
2829 | " \n",
2830 | " | 0 | \n",
2831 | " 0021EE1 X P5DA | \n",
2832 | " 1.000000e-53 | \n",
2833 | "
\n",
2834 | " \n",
2835 | " | 1 | \n",
2836 | " 0029J1L X P5DA | \n",
2837 | " 1.138897e-04 | \n",
2838 | "
\n",
2839 | " \n",
2840 | " | 2 | \n",
2841 | " 004QK71 X P5DA | \n",
2842 | " 9.056966e-05 | \n",
2843 | "
\n",
2844 | " \n",
2845 | " | 3 | \n",
2846 | " 005AP9V X P5DA | \n",
2847 | " 2.055958e-05 | \n",
2848 | "
\n",
2849 | " \n",
2850 | " | 4 | \n",
2851 | " 0096G27 X P5DA | \n",
2852 | " 1.524471e-05 | \n",
2853 | "
\n",
2854 | " \n",
2855 | "
\n",
2856 | "
"
2857 | ],
2858 | "text/plain": [
2859 | " ID X PCODE Label\n",
2860 | "0 0021EE1 X P5DA 1.000000e-53\n",
2861 | "1 0029J1L X P5DA 1.138897e-04\n",
2862 | "2 004QK71 X P5DA 9.056966e-05\n",
2863 | "3 005AP9V X P5DA 2.055958e-05\n",
2864 | "4 0096G27 X P5DA 1.524471e-05"
2865 | ]
2866 | },
2867 | "execution_count": 78,
2868 | "metadata": {},
2869 | "output_type": "execute_result"
2870 | }
2871 | ],
2872 | "source": [
2873 | "#Making submission view\n",
2874 | "#\n",
2875 | "products =df_replaced[['P5DA', 'RIBP', '8NN1',\n",
2876 | " '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',\n",
2877 | " 'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3']]\n",
2878 | "df_replaced = df_replaced.melt(id_vars=['ID'], value_vars=products, var_name = \"PCODE\", value_name=\"Label\" )\n",
2879 | "df_replaced['ID X PCODE'] = df_replaced['ID'] + ' X ' + df_replaced['PCODE']\n",
2880 | "df_replaced = df_replaced[['ID X PCODE', 'Label']]\n",
2881 | "df_replaced.head()"
2882 | ]
2883 | },
2884 | {
2885 | "cell_type": "code",
2886 | "execution_count": 79,
2887 | "metadata": {},
2888 | "outputs": [],
2889 | "source": [
2890 | "df_replaced.to_csv('submis.csv', index = False)"
2891 | ]
2892 | },
2893 | {
2894 | "cell_type": "markdown",
2895 | "metadata": {},
2896 | "source": [
2897 | "Submit"
2898 | ]
2899 | }
2900 | ],
2901 | "metadata": {
2902 | "colab": {
2903 | "collapsed_sections": [],
2904 | "include_colab_link": true,
2905 | "name": "Baseline1.ipynb",
2906 | "provenance": []
2907 | },
2908 | "kernelspec": {
2909 | "display_name": "Python 3",
2910 | "language": "python",
2911 | "name": "python3"
2912 | },
2913 | "language_info": {
2914 | "codemirror_mode": {
2915 | "name": "ipython",
2916 | "version": 3
2917 | },
2918 | "file_extension": ".py",
2919 | "mimetype": "text/x-python",
2920 | "name": "python",
2921 | "nbconvert_exporter": "python",
2922 | "pygments_lexer": "ipython3",
2923 | "version": "3.8.3"
2924 | },
2925 | "toc": {
2926 | "base_numbering": 1,
2927 | "nav_menu": {
2928 | "height": "142px",
2929 | "width": "160px"
2930 | },
2931 | "number_sections": true,
2932 | "sideBar": true,
2933 | "skip_h1_title": false,
2934 | "title_cell": "Table of Contents",
2935 | "title_sidebar": "Contents",
2936 | "toc_cell": false,
2937 | "toc_position": {
2938 | "height": "calc(100% - 180px)",
2939 | "left": "10px",
2940 | "top": "150px",
2941 | "width": "220.6px"
2942 | },
2943 | "toc_section_display": true,
2944 | "toc_window_display": true
2945 | }
2946 | },
2947 | "nbformat": 4,
2948 | "nbformat_minor": 1
2949 | }
2950 |
--------------------------------------------------------------------------------