├── Readme.md └── baseline_solution.ipynb /Readme.md: -------------------------------------------------------------------------------- 1 | # Онлайн-хакатон Райффайзенбанка в области Data Science 2 | 3 | 4 | Ключевая информация: 5 | 6 | * Скор на паблике: 1.4241 7 | * Примерное место на паблике: 43 8 | * Обучение только на `price_type == 1` 9 | * Рассказ про adversarial validation 10 | * Рассказ про схему валидации 11 | * Визуализация геоданных с помощью библиотеки keplergl 12 | -------------------------------------------------------------------------------- /baseline_solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Онлайн-хакатон Райффайзенбанка в области Data Science\n", 8 | "\n", 9 | "\n", 10 | "Ключевая информация:\n", 11 | "\n", 12 | "* Скор на паблике: 1.4241\n", 13 | "* Примерное место на паблике: 43\n", 14 | "* Обучение только на `price_type == 1`\n", 15 | "* Рассказ про adversarial validation\n", 16 | "* Рассказ про схему валидации\n", 17 | "* Визуализация геоданных с помощью библиотеки keplergl\n", 18 | "\n", 19 | "\n", 20 | "----" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Установим необходимые библиотеки" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "ExecuteTime": { 35 | "end_time": "2021-09-25T18:31:26.101470Z", 36 | "start_time": "2021-09-25T18:31:24.514293Z" 37 | } 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/site-packages (4.50.2)\n", 45 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/site-packages (3.3.2)\n", 46 | "Requirement already satisfied: catboost in /usr/local/lib/python3.8/site-packages (0.26)\n", 47 | "Requirement already satisfied: keplergl in /usr/local/lib/python3.8/site-packages (0.2.1)\n", 48 | "Requirement already satisfied: six in /usr/local/lib/python3.8/site-packages (from catboost) (1.15.0)\n", 49 | "Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.8/site-packages (from catboost) (1.19.2)\n", 50 | "Requirement already satisfied: graphviz in /usr/local/lib/python3.8/site-packages (from catboost) (0.16)\n", 51 | "Requirement already satisfied: scipy in /usr/local/lib/python3.8/site-packages (from catboost) (1.5.2)\n", 52 | "Requirement already satisfied: plotly in /usr/local/lib/python3.8/site-packages (from catboost) (5.1.0)\n", 53 | "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.8/site-packages (from catboost) (1.1.2)\n", 54 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.8/site-packages (from pandas>=0.24.0->catboost) (2020.1)\n", 55 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/site-packages (from pandas>=0.24.0->catboost) (2.8.1)\n", 56 | "Requirement already satisfied: Shapely>=1.6.4.post2 in /usr/local/lib/python3.8/site-packages (from keplergl) (1.7.1)\n", 57 | "Requirement already satisfied: ipywidgets<8,>=7.0.0 in /usr/local/lib/python3.8/site-packages (from keplergl) (7.5.1)\n", 58 | "Requirement already satisfied: geopandas>=0.5.0 in /usr/local/lib/python3.8/site-packages (from keplergl) (0.8.1)\n", 59 | "Requirement already satisfied: traittypes>=0.2.1 in /usr/local/lib/python3.8/site-packages (from keplergl) (0.2.1)\n", 60 | "Requirement already satisfied: fiona in /usr/local/lib/python3.8/site-packages (from geopandas>=0.5.0->keplergl) (1.8.17)\n", 61 | "Requirement already satisfied: pyproj>=2.2.0 in /usr/local/lib/python3.8/site-packages (from geopandas>=0.5.0->keplergl) (2.6.1.post1)\n", 62 | "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.8/site-packages (from ipywidgets<8,>=7.0.0->keplergl) (5.0.7)\n", 63 | "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.8/site-packages (from ipywidgets<8,>=7.0.0->keplergl) (5.0.4)\n", 64 | "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.8/site-packages (from ipywidgets<8,>=7.0.0->keplergl) (5.3.4)\n", 65 | "Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.8/site-packages (from ipywidgets<8,>=7.0.0->keplergl) (3.5.1)\n", 66 | "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.8/site-packages (from ipywidgets<8,>=7.0.0->keplergl) (7.18.1)\n", 67 | "Requirement already satisfied: appnope in /usr/local/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7.0.0->keplergl) (0.1.0)\n", 68 | "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7.0.0->keplergl) (6.1.7)\n", 69 | "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7.0.0->keplergl) (6.0.4)\n", 70 | "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (53.0.0)\n", 71 | "Requirement already satisfied: jedi>=0.10 in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.17.2)\n", 72 | "Requirement already satisfied: pickleshare in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.7.5)\n", 73 | "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (4.8.0)\n", 74 | "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (3.0.7)\n", 75 | "Requirement already satisfied: pygments in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (2.6.1)\n", 76 | "Requirement already satisfied: backcall in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.2.0)\n", 77 | "Requirement already satisfied: decorator in /usr/local/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (4.4.2)\n", 78 | "Requirement already satisfied: parso<0.8.0,>=0.7.0 in /usr/local/lib/python3.8/site-packages (from jedi>=0.10->ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.7.1)\n", 79 | "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets<8,>=7.0.0->keplergl) (0.2.0)\n", 80 | "Requirement already satisfied: jupyter-core in /usr/local/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets<8,>=7.0.0->keplergl) (4.6.3)\n", 81 | "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets<8,>=7.0.0->keplergl) (3.2.0)\n", 82 | "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets<8,>=7.0.0->keplergl) (20.2.0)\n", 83 | "Requirement already satisfied: pyrsistent>=0.14.0 in /usr/local/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets<8,>=7.0.0->keplergl) (0.17.3)\n", 84 | "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.8/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.6.0)\n", 85 | "Requirement already satisfied: wcwidth in /usr/local/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<8,>=7.0.0->keplergl) (0.2.5)\n", 86 | "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (6.1.4)\n", 87 | "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.9.0)\n", 88 | "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (20.1.0)\n", 89 | "Requirement already satisfied: pyzmq>=17 in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (19.0.2)\n", 90 | "Requirement already satisfied: Send2Trash in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.5.0)\n", 91 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (2.11.2)\n", 92 | "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.8.0)\n", 93 | "Requirement already satisfied: nbconvert in /usr/local/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (6.0.4)\n", 94 | "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/site-packages (from matplotlib) (7.2.0)\n", 95 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/site-packages (from matplotlib) (0.10.0)\n", 96 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /usr/local/lib/python3.8/site-packages (from matplotlib) (2.4.7)\n", 97 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/site-packages (from matplotlib) (1.2.0)\n", 98 | "Requirement already satisfied: certifi>=2020.06.20 in /usr/local/lib/python3.8/site-packages (from matplotlib) (2020.6.20)\n", 99 | "Requirement already satisfied: cffi>=1.0.0 in /usr/local/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.14.2)\n", 100 | "Requirement already satisfied: pycparser in /usr/local/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (2.20)\n", 101 | "Requirement already satisfied: click<8,>=4.0 in /usr/local/lib/python3.8/site-packages (from fiona->geopandas>=0.5.0->keplergl) (7.1.2)\n", 102 | "Requirement already satisfied: munch in /usr/local/lib/python3.8/site-packages (from fiona->geopandas>=0.5.0->keplergl) (2.5.0)\n", 103 | "Requirement already satisfied: click-plugins>=1.0 in /usr/local/lib/python3.8/site-packages (from fiona->geopandas>=0.5.0->keplergl) (1.1.1)\n", 104 | "Requirement already satisfied: cligj>=0.5 in /usr/local/lib/python3.8/site-packages (from fiona->geopandas>=0.5.0->keplergl) (0.5.0)\n" 105 | ] 106 | }, 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.1.1)\n", 112 | "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.1.1)\n", 113 | "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.8.4)\n", 114 | "Requirement already satisfied: bleach in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (3.2.1)\n", 115 | "Requirement already satisfied: testpath in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.4.4)\n", 116 | "Requirement already satisfied: defusedxml in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.6.0)\n", 117 | "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.5.0)\n", 118 | "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.3)\n", 119 | "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.4.2)\n", 120 | "Requirement already satisfied: async-generator in /usr/local/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.10)\n", 121 | "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (1.4.0)\n", 122 | "Requirement already satisfied: webencodings in /usr/local/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (0.5.1)\n", 123 | "Requirement already satisfied: packaging in /usr/local/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets<8,>=7.0.0->keplergl) (20.4)\n", 124 | "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.8/site-packages (from plotly->catboost) (7.0.0)\n", 125 | "Note: you may need to restart the kernel to use updated packages.\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "pip install tqdm matplotlib catboost keplergl" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 2, 136 | "metadata": { 137 | "ExecuteTime": { 138 | "end_time": "2021-09-25T18:31:27.230045Z", 139 | "start_time": "2021-09-25T18:31:26.103332Z" 140 | } 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "import pandas as pd\n", 145 | "from matplotlib import pyplot as plt\n", 146 | "import numpy as np\n", 147 | "from catboost import CatBoostClassifier, CatBoostRegressor\n", 148 | "\n", 149 | "from sklearn.metrics import roc_auc_score\n", 150 | "from sklearn.base import clone\n", 151 | "from tqdm.auto import tqdm\n", 152 | "\n", 153 | "from keplergl import KeplerGl\n", 154 | "\n", 155 | "np.random.seed(42)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Метрика, которую предоставили организаторы" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 3, 168 | "metadata": { 169 | "ExecuteTime": { 170 | "end_time": "2021-09-25T18:31:27.236022Z", 171 | "start_time": "2021-09-25T18:31:27.231896Z" 172 | } 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "def deviation_metric_one_sample(y_true, y_pred):\n", 177 | " deviation = (y_pred - y_true) / np.maximum(1e-8, y_true)\n", 178 | " if np.abs(deviation) <= 0.15: return 0\n", 179 | " elif deviation <= -0.6: return 9.9\n", 180 | " elif deviation < -0.15: return 1.1 * (deviation / 0.15 + 1) ** 2\n", 181 | " elif deviation < 0.6: return (deviation / 0.15 - 1) ** 2\n", 182 | " return 9\n", 183 | "\n", 184 | "def deviation_metric(y_true, y_pred):\n", 185 | " return np.array([deviation_metric_one_sample(y_true[n], y_pred[n]) for n in range(len(y_true))]).mean()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "metadata": { 192 | "ExecuteTime": { 193 | "end_time": "2021-09-25T18:31:29.658149Z", 194 | "start_time": "2021-09-25T18:31:27.238094Z" 195 | } 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stderr", 200 | "output_type": "stream", 201 | "text": [ 202 | "/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3145: DtypeWarning: Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.\n", 203 | " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "test = pd.read_csv('./data/test.csv').rename({'per_square_meter_price': 'target'}, axis=1)\n", 209 | "test['train'] = 0\n", 210 | "test['target'] = 0\n", 211 | "\n", 212 | "train = pd.read_csv('./data/train.csv').rename({'per_square_meter_price': 'target'}, axis=1)\n", 213 | "train['train'] = 1\n", 214 | "\n", 215 | "dataset = pd.concat([train, test])" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Выделим важные колонки" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 5, 228 | "metadata": { 229 | "ExecuteTime": { 230 | "end_time": "2021-09-25T18:31:29.662569Z", 231 | "start_time": "2021-09-25T18:31:29.660009Z" 232 | } 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "key_cols = ['id', 'date', 'price_type', 'train', 'month', 'target']\n", 237 | "cat_cols = ['city', 'osm_city_nearest_name', 'region', 'realty_type', 'street', 'floor']" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "Для удобства объединим выборки и добавим некоторые важные колонки.\n", 245 | "\n", 246 | "Также отфильтруем датасет :)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 6, 252 | "metadata": { 253 | "ExecuteTime": { 254 | "end_time": "2021-09-25T18:31:30.558167Z", 255 | "start_time": "2021-09-25T18:31:29.664648Z" 256 | } 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "dataset[cat_cols] = dataset[cat_cols].astype(str).fillna('NAN')\n", 261 | "dataset['date'] = pd.to_datetime(dataset['date'])\n", 262 | "dataset['month'] = (\n", 263 | " dataset['date'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)\n", 264 | ")\n", 265 | "\n", 266 | "dataset = dataset[dataset['price_type']==1]" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "# Небольшая визуализация" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 7, 279 | "metadata": { 280 | "ExecuteTime": { 281 | "end_time": "2021-09-25T18:31:30.750349Z", 282 | "start_time": "2021-09-25T18:31:30.559827Z" 283 | } 284 | }, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEUCAYAAAAiMOHqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4CElEQVR4nO3dd3gVZfbA8e9JI5BKGoQk9N6bFEERRQUbVmxrWV11V9R1V1dd9bfrrruuurZVbOha14Jd7AXFRlFaQsfQzA0BQkkDElLO74+ZyCWmt5vkns/z5Mm9M3PnPXPLe2bemXlfUVWMMcb4nwBfB2CMMcY3LAEYY4yfsgRgjDF+yhKAMcb4KUsAxhjjpywBGGOMn7IEYFoscQT5Og5j2ipLAKZFEZHJIvKxiPwE5AFn+zomY9oqSwCtlIhsEZEpXs+TRGSTiNzry7gaQkQmAnOAWUAPVY1Q1Vd9HJYxbZYdXrcBIhIPfA58pKo3+TqeBvgHcK2qvu/rQIzxB3YE0MqJSDTwKfA9cI3X9HYi8pCIbHP/HhKRdl7zu4uIikiB+1cqIr+pooyKy64SkWO85h8pIj+ISK77/8hq4h0gIvNFJEdEVovIaV6zxwAniMj2ijF7lV3mfeRTYd1RIvJfEckSkUwR+YeIBLrzLhWRb93HASLyivsXICLvueveV2E7n6gpZhF5TkQOusvvEZGnqzpvUWHZn8vzmt9FROa660kXkSuqeR+fc2Md4TXtXnfaFK/tvEVENorIbhF5TURivJafKCIL3O3KEJFL3emdReRTd3qBiBSLyB0V30ev9XjKvw8i0lFE3heRbBHZ6z5O9lp2vvseJHhNe82Nu3dV22uahiWA1i0c+AjnSO4yPbxjp9uAccBwYBhO5Xq71/zyzz5KVcOBb2pRXjQQAbwG3AfgVigfAA8DscADwAciElvxxSISDLyHk7ASgGuBl0Skn7tIe2AQMLRizKoa7sb5UzXxPQeUAL2BEcAJQGVJbZa7LRerapmqnuque1D5drrl/bYWMQPc675+IHAyMLWaGO/12pZhFea9CniALjjnPu4SkWOrWde68u1z4zwV2OE1/1rgdGCSu869wKPu8t1wvjuPAPE435MV7uuuB0qBRDfOOdXEUFEA8CzQDegKHMB5v71tBC5x44gD+tZh/aYRWQJo3R4HCoBkYEKFeRcCf1fVnaqaDfwNuMhrfghQpqqldSxTgEBgt/v8ZOBHVX1RVUtU9RWciunUSl47Didp3a2qB1X1C+B94HyvZaqLueqgRDoBJwHXq+o+Vd0JPAicV2G5O4HJwFmqWlyLVdcm5nKBOO/P7krm1RR/Cs5neLOqFqrqCuBp4OJqXjYXOF5E2uO8358DhV7zfwvcpqoeVS0C7gDOdo9QLgA+V9VXVLVYVXe7ZZYLoB71g7ueN1V1v6rmA//ESUDeXuDQ53ox8GJdyzGNwxJA61Ze0d4EPO1WBOW6AFu9nm91p5WLwdkj/AWvJooCEenqNWsXTsK5HrininLKy0qqZNVdgAxVLati2YM1xFzuHa9mi3tFRHD2OIOBLHdeDvAkzl57uZHAmUAc0LOS9VamppgBbnTLywAWAj/Uct0Vy9njVppVlVNRMU4SOBu4HCdheOsGvO31fqzF2bPvBKTg7IlX5n5gP5Dvvm5GbTdCRDqIyJMislVE8oCvgejypjhXNrBBRI7CSQQv1Hb9pnFZAmjd/unuLT6FU/nc6TVvG04FUK6rO61cX2BDZSstb6Jw/7ybXOJUtQMwHXjTTTgVyykvK7OSVW8DUkQkoIplf6oh5nKnq2o0zt75hcCJONtf5MYY7f5Fquogr9flAlNwmseeqVApVaWmmAHuc+OJwDmy+lMt1ltZOTEiElFNOZV5GmcHIFZVUyvMywCmeb0f0aoaqqqZ7rxela3QPfr6BueigmicJr/augHoB4xV1UjgaHe6VBL3I0C6W57xAUsAbccVwJUiMsZ9/gpwu4jEu+2sfwH+Bz83N/weeKeeZZUCUTiV3YdAXxG5QESCRORcnLbwyq7kWYyzZ3mTiAS7Jw5PxWn7rjbmKpS4/wNUNQunnf5+EYl0T4D2EhHv5oeNqpqlqrNx7jG4sRbbWlPM3koBxWlTrxNVzQAWAP8SkVARGYqzV1/d9qOq64CPgX9VMvsJ4J9uez/u+zrdnfcSMEVEZrifW6yIDHeX6w7cDFxd1+3ASYIHgBz3/NBfq1juU2AZTjOd8RFLAG2Eqm7CqTCfFZEQnEsqlwBpwEqcH9s/3MU/AeZT9x9fjogU4ByyX6Wquaq6GzgFZ89vN87e6CmququSGA/iVJ7TcJqTHsM5EbvOXeQuN86VlcTs7T0RyXeXeROnAgSnPTkEWIPTvPUGkFjFtvwGp+mmXxXzaxszOMmhANiO85u65xcrqp3zge44RwNvA39V1c9repGq/klV361k1n9wmog+dd+vRcBY9zU/4ZwzuQHYg3MCuPyk9JM45zwqNu2VO8K98scjIh6gM/C6e7XPQzgn83e55X1c2Qrck++XqeqCmrbPNB2xEcGMMQ0lIvOBS1V1i49DMXVgRwDGmMawFKfpx7QidgRgjDF+yo4AjDHGT1kCMMYYP9UiOoOLi4vT7t27+zoMY4xpVZYuXbpLVet82XG5FpEAunfvzpIlS3wdhjHGtCoiUtWlurViTUDGGOOnLAEYY4yfsgRgjDF+yhKAMcb4KUsAxhjjpywBGGOMn7IEYIwxDVBYXNdB9VoOSwDGGFNPizftZugdn7J0a6WD67V4lgCMMaaevkvfxcHSMu6Yu5qystbXsaYlAGOMqadUTy4hQQGszMzl9aUZvg6nziwBGGNMPagqaZ4cpg/rwuhuHfn3J+vJKyz2dVh1YgnAGGPqwbP3AHv3FzM0JZo7ThvE7n0HefjzH30dVp1YAjDGmHpI8+QCMCw5isFJUZw7OoXnFmwhfWeBjyOrPUsAxhhTD2meHEICA+jfORKAG0/sR/uQQO58fw2tZaRFSwDGGFMPqZ4cBiRGEBLkVKNx4e34/XF9+GpDNl+s2+nj6GrHEoAxxtRRWZmyKjOPocnRh02/5Mju9IoP487313CwpMw3wdWBJQBjjKmjTbsKKCgqYUhy1GHTgwMD+Mupg9iyez/PfrfZR9HVniUAY4ypo9SM8hPA0b+YN6lvPFMGJPDIF+nszC9s5sjqxhKAMcbU0crMXDqEBNI7IbzS+befPJCDJWXc+/H6Zo6sbiwBGGNMHaV6chjcJYrAAKl0fve4MC6b2IM3lnpYkZHTvMHVgSUAY4ypg+LSMtZsy2Nohfb/iq45tjfxEe1adD9BtUoAIvJ7EVklIqtF5Hp3WoyIfCYiP7r/O7rTRUQeFpF0EUkTkZFNGL8xxjSr9dvzKSopY2hKdLXLhbcL4uap/VmRkcPbyzObJ7g6qjEBiMhg4ApgDDAMOEVEegO3APNUtQ8wz30OMA3o4/5dCTzeBHEbY4xPlN8BPDSp+iMAgDNHJDEsJZq7P15HQVFJU4dWZ7U5AhgALFbV/apaAnwFnAlMB553l3keON19PB14QR2LgGgRSWzcsI0xxjdWZuYQ1T6YbrEdalw2IEC449SBZOcXMeuL9GaIrm5qkwBWAUeJSKyIdABOAlKATqqa5S6zHejkPk4CvPtF9bjTDiMiV4rIEhFZkp2dXe8NMMaY5pSakcvQ5ChEKj8BXNGIrh05a2Qyz3y7mS279jVxdHVTYwJQ1bXAPcCnwMfACqC0wjIK1Oksh6rOVtXRqjo6Pj6+Li81xhifKCwuZf2O/BpPAFd089R+BAcK//hgTRNFVj+1Ogmsqv9V1VGqejSwF9gA7Chv2nH/l3d+kYlzhFAu2Z1mjDGt2upteZSW6S+6gKhJQmQo1x7Xh8/X7uSrDS2nxaO2VwEluP+74rT/vwzMBS5xF7kEeNd9PBe42L0aaByQ69VUZIwxrVaaJweo/A7gmvx6Qne6x3bg7++tpri0ZfQTVNv7AN4UkTXAe8BMVc0B7gaOF5EfgSnuc4APgU1AOvAUcHWjRmyMMT6S5sklPqIdnSLb1fm17YIC+b9TBrIxex/PL9jS+MHVQ1BtFlLVoyqZths4rpLpCsxseGjGGNOypHlyGFaHE8AVHds/gUl94/nP5z9y+ogk4sLrnkgak90JbIwxtZBfWMymXfvq3P7vTUT4v1MGcqC4lPs+8X0/QZYAjDGmFlZm5qJKna8Aqqh3QjiXHtmdOUsyWOneVOYrlgCMMaYWfr4DuAFHAOWum9KH2LAQ/vbeap8OH2kJwBhjaiHNk0Nyx/bEhIU0eF2RocH86cR+LNm6l7mp2xohuvqxBGCMMbWQ5smt1+WfVTlnVApDkqL414fr2H/QN/0EWQIwxpga7C4owrP3QIPb/70FBAh3nDaQ7XmFPD5/Y6Ott04x+KRUY4xpRdIyG6/939uobjGcPrwLT369iYw9+xt13bVhCcAYY2qQlpGLCL8YBL4x3DJtAEEBwj8/WNvo666JJQBjjKlBmieHXvHhhLer1b2zddI5KpSZk3vz8ertLEjf1ejrr44lAGOMqYaqkurJrdUAMPV1+cQepMS052/vraGkGfsJsgRgjDHV2J5XyK6CokY9AVxRaHAgt500kPU78nlp8U9NVk5FlgCMMaYaqRnuCeAaxgBuqBMHdWJC71ge+GwDe/cdbNKyylkCMMaYaqR5cggKEAYmRjZpOSLCX08dREFRCfd/1jz9BFkCMMaYaqR5cunXOYLQ4MAmL6tvpwguGteNlxf/xJpteU1eniUAY4ypgqqS5slp0vb/iv4wpS9R7YObpZ8gSwDGGFOFrbv3k1dY0ug3gFUnqkMwN5zQj8Wb9/Dhyu1NWpYlAGOMqUKqOwRkcx4BAJw/pisDEiO568O1HDhY2mTlWAIwxpgqpHlyaRcUQN9OEc1abmCAcMepA8nMOcCTXzddP0GWAIwxpgppnhwGdYkkOLD5q8qxPWM5eWgiT3y1kcycA01ShiUAY4ypRElpGasy85q1/b+iW08aAMBdHzZNP0GWAIwxphLp2QUcKC5t9vZ/b0nR7fntpF58kJbFok27G339lgCMMaYSjTkEZENcdXQvkqKdfoJKyxr3slBLAMYYU4k0Tw4R7YLoGRfm0zjahwRy60kDWJuVxyvfN24/QZYAjDGmEmmeXAYnRREQIL4OhZOGdGZsjxju/3Q9ufuLG229lgCMMaaCopJS1mblMTTFd+3/3kSEO04bRO6BYh78fEOjrdcSgDHGVLAuK5/iUm3UQeAbakBiJBeM7cqLi7ayYUd+o6zTEoAxxlSQ5t4BPKQJB4GpjxuO70d4uyD+/t6aRuknyBKAMcZUkObJJSYshOSO7X0dymE6hoXwx+P78m36Lj5ds6PB67MEYIwxFaR5chmaHIWI708AV3Th2K706xTBPz5Y0+B1WQIwxhgv+w+W8OPOfJ9f/1+VoMAA/nLqQDL2NLx7CEsAxhjjZVVmHmUKw3x4B3BNJvSO46yRyQ1ejyUAY4zx8vMJ4BacAADunzGsweuwBGCMMV7SPLkkRoWSEBHq61CanCUAY4zx0txDQPpSrRKAiPxBRFaLyCoReUVEQkWkh4gsFpF0EZkjIiHusu3c5+nu/O5NugXGGNNIcvcXs2X3/hZ7Arix1ZgARCQJuA4YraqDgUDgPOAe4EFV7Q3sBS53X3I5sNed/qC7nDHGtHhpmTkALeoO4KZU2yagIKC9iAQBHYAs4FjgDXf+88Dp7uPp7nPc+cdJS7yY1hhjKijvArql3QHcVGpMAKqaCdwH/IRT8ecCS4EcVS1xF/MASe7jJCDDfW2Ju3xsxfWKyJUiskRElmRnZzd0O4wxpsFSM3LoHtuBqA7Bvg6lWdSmCagjzl59D6ALEAZMbWjBqjpbVUer6uj4+PiGrs4YYxpsZWau37T/Q+2agKYAm1U1W1WLgbeACUC02yQEkAxkuo8zgRQAd34U0PhjmRljTCPamV9IVm6h31wBBLVLAD8B40Skg9uWfxywBvgSONtd5hLgXffxXPc57vwvtDG6rTPGmCaUluG0/w9LifZtIM2oNucAFuOczF0GrHRfMxu4GfijiKTjtPH/133Jf4FYd/ofgVuaIG5jjGlUaZ4cAgQGdYn0dSjNJqjmRUBV/wr8tcLkTcCYSpYtBM5peGjGGNN8Uj259EmIoENIrarFNsHuBDbG+D1VdU8A+0/7P1gCMMYYPHsPsGffQYb6Ufs/WAIwxpifbwBryV1ANwVLAMYYv5fmySEkMID+nf3nBDBYAjDGGFI9OQxIjCAkyL+qRP/aWmOMqaCsTFmVmdfiB4BpCpYAjDF+bdOufRQUlfhVFxDlLAEYY/xa+RCQ/tIFtDdLAMYYv5bmyaVDSCC9E8J9HUqzswRgjPFrqZ4cBneJIjDA/4YtsQRgjPFbxaVlrNnmnyeAwRKAMcaPbdiRT1FJmd91AVHOEoAxxm8dugM42reB+IglAGOM30rz5BDVPphusR18HYpPWAIwxvit1AynB1BnrCv/YwnAGOOXCotLWb8j32/b/8ESgDHGT63elkdpmTIkKdrXofiMJQBjjF9aWX4HcIodARhjjF9J8+QSH9GOzpGhvg7FZywBGGP8Uqonh2F+fAIYLAEYY/xQfmExm3bt88seQL1ZAjDG+J2Vmbmo4tdXAIElAGOMHyq/A9iOAIwxxs+s9OSS3LE9MWEhvg7FpywBGGP8jnMCONrXYficJQBjjF/ZXVCEZ+8Bv2//B0sAxhg/k5Zp7f/lLAEYY/xKWkYuIjA4KdLXoficJQBjjF9ZmZlDz7gwIkKDfR2Kz1kCMMb4DVUl1ZNrJ4BdlgCMMX5je14h2flFdgLYZQnAGOM3UjPcE8Ap0b4NpIWwBGCM8RtpnhyCAoSBiXYCGCwBGGP8SJonl76dIggNDvR1KC1CjQlARPqJyAqvvzwRuV5EYkTkMxH50f3f0V1eRORhEUkXkTQRGdn0m2GMMdVTVdI8OX49AExFNSYAVV2vqsNVdTgwCtgPvA3cAsxT1T7APPc5wDSgj/t3JfB4E8RtjDF1snX3fvIKS+wGMC91bQI6DtioqluB6cDz7vTngdPdx9OBF9SxCIgWkcTGCNYYY+or1R0C0q4AOqSuCeA84BX3cSdVzXIfbwc6uY+TgAyv13jcaYcRkStFZImILMnOzq5jGMYYUzdpnlzaBQXQt1OEr0NpMWqdAEQkBDgNeL3iPFVVQOtSsKrOVtXRqjo6Pj6+Li81xpg6S/PkMLBLJMGBdu1Lubq8E9OAZaq6w32+o7xpx/2/052eCaR4vS7ZnWaMMT5RWqasysyzO4ArqEsCOJ9DzT8Ac4FL3MeXAO96Tb/YvRpoHJDr1VRkjDHNLn1nAQeKS639v4Kg2iwkImHA8cBVXpPvBl4TkcuBrcAMd/qHwElAOs4VQ79utGiNMaYeDp0AjvZpHC1NrRKAqu4DYitM241zVVDFZRWY2SjRGWNMI0jz5BDRLoiecWG+DqVFsbMhxpg2L82Ty+CkKAICxNehtCiWAIwxbVpRSSlrs/Ks/b8SlgCMMW3a+u35FJeqtf9XwhKAMaZNS/WUjwFsRwAVWQIwxrRpaRk5xISFkNyxva9DaXEsARhj2rQ0Ty5Dk6MQsRPAFVkCMMa0WfsPlvDjznxr/6+CJQBjTJu1elseZQpDk6z9vzKWAIwxbVZqRg4AQ20QmEpZAjDGtFlpnlwSo0JJiAj1dSgtkiUAY0yblebJscs/q2EJwBjTJuXuL2bL7v12ArgalgCMMW1SWmYOYDeAVccSgDGmTUorvwM4Kdq3gbRgfp0AMnMOUFZWp5EsjTGtRJonh+6xHYjqEOzrUFqsWo0H0BY9PO9HHvhsAwkR7ThxUGemDe7MmB4xBNl4oca0CWmeXI7oHuPrMFo0v0wA5ZX/iYM6ESDCG0s9vLhoKzFhIRw/oBNTh3RmQq84QoIsGRjTGu3MLyQrt9Da/2vgdwmgvPI/c2QS/z57GIEBwoGDpXy1YScfrdrOByuzmLMkg4jQIKYM6MTUwZ2Z1Dee0OBAX4dujKmltAyn/X9YSrRvA2nh/CoBVFb5A7QPCWTq4ESmDk6kqKSU79J38dHK7Xy2dgdvL8+kQ0ggk/slMHVwZyb3TyC8nV+9bca0OmmeHAIEBnWJ9HUoLZrf1GRVVf4VtQsK5Nj+nTi2fyeKS8tYvGkPH63K4pPVO/hgZRYhQQEc3SeOqYMTOX5AJzvBZEwLlJaZS5+ECDqE+E0VVy9+8e7UtvKvKDgwgIl94pjYJ46/Tx/M0q17nWSwajufr91JUIAwvlcs0wYncsKgTsSFt2viLTHG1ERVSfPkclz/BF+H0uK1+QRQ38q/osAAYUyPGMb0iOEvpwwkzZPLR6u28/GqLG59eyW3v7OSI7rHMG1wZ6YOTqRzlPU9YowvePYeYM++gwy19v8atekE0FiVf0UiwrCUaIalRHPz1H6s257/czK447013PHeGkZ0jWba4M5MG5xISkyHRinXGFOz8hvAhtkVQDVqswmgqSr/ikSEAYmRDEiM5I/H92VjdgEfr9rOR6uyuOvDddz14ToGdYn8+cigd0J4k8RhjHGkeXIIDhT6dY7wdSgtnqj6/k7Y0aNH65IlSxptfc1V+dckY8/+n5PBsp9yAOiTEM60wZ25YGw3ayYypgmcP3sR+w6WMPeaib4OpcmJyFJVHV3f17e5I4CWUvkDpMR04Iqje3LF0T3ZnlvIJ6udZDDry3Q+X7uTD66baOOUGtOIysqUVZm5TB/RxdehtApt6lbXllT5V9Q5KpRLjuzOq1eO556zhrImK4/567N9HZYxjWZfUQn//mQd89buoKS0zCcxbNq1j/yiEusCupbazBFAS678Kzp9RBIPff4js75M55h+8XYUYNqEO99fw6s/ZADQOTKUGaOTmXFECskdm+8iiDRPDgDDLAHUSps4AmhNlT849xdcNaknS7fu5fvNe3wdjjEN9unq7bz6QwZXHd2TJ341iv6JETzyZTpH3fslFz/zPR+tzOJgSdMfFaR5cmkfHGgXW9RSqz8CaG2Vf7kZo1N4eN6PPDZ/I2N7xvo6HGPqbWd+Ibe8tZLBSZHccEI/QoICmDq4M5k5B3jthwxeX5LB715aRlx4CGeNSua8I7rSIy6sSWJJ9eQwOCmy1dQDvtaqjwAeaaWVP0BocCCXTezBVxuyWZWZ6+twjKkXVeWmN9LYV1TCQ+cOP6wH3aTo9vzh+L58c/OxPHvpEYzs2pGnv9nM5Pvmc+6TC3lneSaFxaWNFktxaRlrtuVZ+38dtNoE8Mi8H7n/sw2cOaL1Vf7lfjWuGxGhQTw2P93XoRhTL/9btJX567O57eQB9E6o/Lr7wABhcv8EZl88moW3HMufTuxHVm4h189Zwdi75nHH3NWs357f4Fg27MinqKTMuoCug1bZBHRY5X9O66z8ASJDg7lkfHcenZ9O+s4Ca7c0rUr6znz+8cFaJvWN56Jx3Wr1moTIUGZO7s3vJvVi4abdvPL9T7y8+CeeW7CFEV2jOf+IrpwyLLFenbgdugM4us6v9Ve1OgIQkWgReUNE1onIWhEZLyIxIvKZiPzo/u/oLisi8rCIpItImoiMbMyA20rlX+7XE7rTLiiAJ77a6OtQjKm1gyVlXD9nBWHtgvj3OUPrfCVbQIAwoXccsy4YyaJbj+P2kweQd6CYm95MY8w/53Hr2ytZ6alb02iaJ4eo9sF0i7WuV2qrtk1A/wE+VtX+wDBgLXALME9V+wDz3OcA04A+7t+VwOONFWxbq/wBYsPbcf6YrryzPBPP3v2+DseYWnno8w2syszjX2cOISGiYXe0x4SF8JujevL5Hyfx+m/Hc8KgTry1zMOps77l5Ie/4cVFW8krLK5xPakZuQxNjrLLquugxgQgIlHA0cB/AVT1oKrmANOB593FngdOdx9PB15QxyIgWkQSGxpoW6z8y11xVE9E4KmvN/k6FGNq9P3mPTz+1UbOOyKFEwd1brT1ighHdI/hgRnDWXzrFO6cPghV+L93VjH2n/O48fVUlm7dQ2Xd1xQWl7JhRz5Dkqz9vy5q09DWA8gGnhWRYcBS4PdAJ1XNcpfZDnRyHycBGV6v97jTsqintlz5A3SJbs+ZI5J59YcMrjm2D/ERNq6AaZnyCov5w5wVdI3pwP+dMrDJyolqH8xF47vzq3HdWJmZyyvfZzB3RSZvLPXQJyGc88Z05cwRSXQMCwFgTVYeJWVqVwDVUW2agIKAkcDjqjoC2Meh5h4A1EnJdepVTkSuFJElIrIkO7vqLhHaeuVf7rfH9KK4tIxnvtvs61CMqdId765me14hD547nLBmGBpVRBiaHM2/zhzC97dN4Z6zhhDWLog731/D2Lvmce0ry1mQvovUjBwAhqXYEUBd1OYT9AAeVV3sPn8DJwHsEJFEVc1ym3h2uvMzgRSv1ye70w6jqrOB2eD0BlpZwf5S+QP0iAvjpCGJvLhwK7+d1Iuo9jbUpGlZ3kvdxlvLM7l+Sh9Gdu3Y7OWHtQvi3CO6cu4RXVmblcecHzJ4a5mH91K3ERQgxEe0o3Ok9bBbFzUeAajqdiBDRPq5k44D1gBzgUvcaZcA77qP5wIXu1cDjQNyvZqKas2fKv9yVx/Tm4KiEl5cuMXXoRhzmKzcA9z29kqGp0RzzeTevg6HAYmR3HHaIL6/bQoPnjuMMT1iOHd0ip0ArqPaHsNdC7wkIiHAJuDXOMnjNRG5HNgKzHCX/RA4CUgH9rvL1ok/Vv4AA7tEcmz/BJ75bguXTexhA1qbFqGsTLnhtVRKypSHzh1OUGDLuX80NDiQM0Ykc8aIZF+H0irVqoZR1RVAZYMOHFfJsgrMrG9A/lr5l5s5uRdnPb6QV7/P4LKJPXwdjjE8891mFmzczT1nDaF7E/XhY3yj5aRyrPIHGNUthrE9Ypj99aZm6T3RmOqs257HvR+v54SBnZgxOqXmF5hWpcUkAKv8D5k5uTfb8wp5e7nH16EYP1ZYXMr1r64gsn0w/zpziLWvt0EtIgHszC+yyt/LUX3iGJIUxePzN1Ja5vsxm41/uu+T9azbns+/zxlKbLjdm9IWtYgEsCOv0Cp/LyLCzMm92LJ7Px+urPf9c8bU23fpu3j6281cPL4bk/sl+Doc00RaRAKI7hBslX8FJwzsTK/4MB79Mr3SW9+NaSo5+w9yw2up9IoP48/TBvg6HNOEWkQCSOnYwSr/CgIChKuP6c267fl8uX5nzS8wphGoKre9vYpdBUX857wRtA8J9HVIpgnZheYt2GnDu/DAZxuY9UU6k/sltNqTcKVlyr8/WU9W7gF6xoXTKyGMXvHh9IgLIzTYKpiW5O3lmXywMoubpvZjsHWs1uZZAmjBygeP/8u7q1m8eQ/jWunYwbO+SOeJrzbSOTKUd1ds+3m6iDNsYK/4cHrGO0nB+QsjPqJdq014rVXGnv385d3VjOkew1VH9/J1OKYZWAJo4coHj3/0y/RWmQAWpO/ioXnOFV73zxhGYXEZm3ftY2N2AZuynf8bswv4fvMeDniNDxvRLoieCeH0igujV4KTFHrGh9MttgPtguyoobGVlil/fG0FAtw/w87H+QtLAC1caHAgl0/syT0fryPNk9OqurvdmVfIda+uoFd8OP84YzAiQvuQQAZ2iWRgl8jDli0rU7bnFf4iMSzctJu3lh/qSzBAoGtMB3q6RwrO0YPzOCYsxI4a6umJrzbyw5a9PHjuMFJibEQtf2EJoBX41biuPDY/nce+3MgTF43ydTi1UlJaxnWvLmdfUQkvXzG2xn6NAgKELtHt6RLdnqP6xB82r6CohM3Z+9i0q4CNOwvY6CaI79J3UeR1t3R0h2B6xh2eFHolhNM1pgPBLaj/mpZmpSeXBz/bwClDEzl9eJKvwzHNyBJAKxARGsylR3bnkS/S+XFHPn06Rfg6pBr9Z96PLNq0h/vOGUbfBsYb3i6IIclRDEk+/KRkaZmyLeeAe7RQ3qxUwPwN2by+9NBd1HHhITx83giO7B3XoDjaogMHS/n9nOXER7Tjn6fb3b7+xhJAK/HrCT14+pvNPP7VRh6YMdzX4VTrqw3ZzPoynRmjkzl7VNP10hgYIKTEdCAlpgPH9Dt8Xl5hMZuy95G+s4AnvtrIRc98z60nDeCyCd2tkvNy14dr2ZS9j5d/M5aoDjYGhb+x4+JWIiYshPPHdOXdFdvI2NNyB4/Pyj3AH+asoG9CBH87bbDP4ogMDWZ4SjRnj0rmnZkTOK5/Ane+v4YbXkul0Otksz/7ct1OXly0ld9M7GFHR37KEkArcsXRPQgQeOqbljl4fElpGde9spzC4lIevXBki7mJKLxdEE/8ahR/PL4vby3P5JwnFpKZc8DXYfnU7oIi/vRGGv07R3Djif1qfoFpkywBtCKJUe05a6QzePzO/EJfh/ML9326gR+27OVfZw6hd0K4r8M5TECAcN1xfXjq4tFs3rWP0x75lkWbdvs6LJ9QVW55ayV5B4p56LzhdjOeH7ME0MpcNakXJaVlPPPtFl+Hcph5a3fwxFcbuWBsV6a34CtJjh/YiXdmTiCqQzC/enoxzy/Y4nd9Lc35IYPP1uzgpqn96N85suYXmDbLEkAr0yMujJOHduF/i7aSu7/Y1+EAkJlzgBteT2VgYiR/OWWgr8OpUe+EcN6ZOYFJfeP569zV3PRGmt+cF9i8ax9/e28NE3rHctkEG3HO31kCaIWuPqYXBUUlvNACBo8/WFLGNS8vo6RUeezCka2mOSEyNJinLh7Ndcf14fWlHs6dvYis3LZ9XqC4tIzr56wgJCiA+84ZRoDd7ev3LAG0QgMSIzmufwLPfLeZ/QdLfBrLPR+vY/lPOdxz1tBWN15sQIDwx+P78sSvRpG+I59TH/mOH7bs8XVYTWbWF+mkZuRw1xlDSIxq7+twTAtgCaCVunpyb/buL+aV7zN8FsMnq7fz3283c8n4bpw8NNFncTTU1MGdeWfmBMLbBXL+7EX8b9HWNndeYOnWvcz6Mp0zRya16s/KNC5LAK3UqG4dGdczhqe+3kRRSfO3X2fs2c+Nr6cyNDmKW09u/YOG9OkUwbvXTGRinzhuf2cVt7690ifva1MoKCrhj6+tIDEqlL+dNsjX4ZgWxBJAK/bz4PHLMmteuBEVlZQy8+VlADx6wcg20ztnVPtg/nvJEcyc3ItXvs/g/NmL2JHX8i63ras731tDxp79PDBjOBGhdrevOcQSQCs2sXccQ5OjePyrjZSUltX8gkZy1wdrSfPkct85ba/nyMAA4U8n9ufRC0ayNiufUx/5lqVb9/o6rHr7eNV25izJ4HfH9GJMjxhfh2NaGEsArZiIM2zk1t37+XDV9mYp84O0LJ5fuJXLJ/bgxEGdm6VMXzh5aCJvzzyS0OBAzpu9kFe//8nXIdXZzrxC/vxWGkOSovj9cX19HY5pgSwBtHInDOxE74RwHmuGweO37NrHzW+mMaJrNDdP7d+kZbUE/TtHMveaCYzrGcstb63ktrdXcrCk+Y60GkJVufGNNA4Ul/LgucMJCbKfuvkl+1a0cgEBwu8m9WLd9ny+WNd0g8cXFpdy9UvLCAoUZl0w0m8qlOgOITz36zFcNaknLy3+iQueWtQiu+Go6IWFW/l6Qza3nTSgxXXLYVoO//gVt3GnDe9CUnR7ZjXhUcDf31/Dmqw8HpgxjKRo/7qGPDBA+PO0ATx8/ghWbcvltEe+Y0VGjq/DqpSqsmTLHu76cC3H9IvnV+O6+Tok04JZAmgDggMD+O2kniz/KYdFmxr/RqZ3V2Ty8uKfuGpST47t36nR199anDasC2/+7kiCAoUZTy7ktSW+uwejnKqSvrOAFxdtZeZLyxj9j885+4mFhLcL4t6zh9rYB6Za0hJueBk9erQuWbLE12G0aoXFpUy850sGJEbw4uVjG2296TsLOG3WtwzqEsnLV4yzoRWBPfsOcs3Ly1iwcTeXjO/G7acMbLb3RVXZuns/CzftZuHG3SzatJud+UUAdI4MZXyvWMb3jGVy/wTiI9o1S0zGd0RkqaqOru/rbUSwNiI0OJDfHNWDuz9aR2pGDsNSohu8zgMHS5n50jJCgwN55PyRVvm7YsJCeOGyMdz90Tqe/nYza7fn89iFI4kLb5oKN2PP/p8r+4WbdpOV65yDiI9ox/iesT9X+t1iO9gev6kTOwJoQ/ILi5lw9xeM7xXLkxfVe6fgZze9kcrrSz089+sxTOobX/ML/NDbyz3c8uZKYsNCePKi0b8Yt7g+tuUcYOHG3T/v5ZcPXhMbFsK4nrGMcyv8XvFhVuH7OTsCMD8rHzz+4UYYPP6NpR5eW+Lh2mN7W+VfjTNGJNMnIYIrX1jC2U8s4F9nDuHMkXUbB3lnXuHPlf3CTbvZutsZ8jO6QzBje8RwxVE9GN8rjr6dwq3CN43KjgDamD37DjLh7i+YNrgzD5w7vF7r2LAjn9NmfcvwlGhe+s04Aq3b4BrtKihi5kvLWLx5D5dN6MGtJ/UnqIoms+z8IhZvPlThb8reB0BEaBBjexxq0unfOcK6bDbVapYjABHZAuQDpUCJqo4WkRhgDtAd2ALMUNW94uyi/Ac4CdgPXKqqy+oboKmbmLAQLhjblecWbOEPx/etc1cN+4pKuPqlZYS3C+bh80ZY5V9LceHt+N9vxvLPD9byzHebWbc9j1kXjCQmLIQ9+w6yeNOhJp0fdxYAzljFR3TvyHlHpDC+ZxwDu0Ta+22aVV2agCar6i6v57cA81T1bhG5xX1+MzAN6OP+jQUed/+bZnLFUT15YeEWnvx6I/84fUitX6eq3P7OKjZmF/DS5WNJiAxtwijbnuDAAO44bRCDukRy2zurOOXhb4hsH8y67fkAtA8OZHT3jpwxMonxPWMZkhRV5VGCMc2hIecApgPHuI+fB+bjJIDpwAvqtC0tEpFoEUlU1ayGBGpqr3NUKGePSua1JR6uO7ZPrSvyOT9k8PbyTP4wpS9H9o5r4ijbrnNGp9C3UwS3v7OKiNAgbji+L+N7xTI0Odpv7qA2rUNtE4ACn4qIAk+q6mygk1elvh0ov0MoCfC+Q8bjTrME0IyuOroXc37I4L/fbubPJ9XcX//arDz+Onc1E3vHcc2xvZshwrZtWEo071070ddhGFOt2u6OTFTVkTjNOzNF5Gjvme7efp3OJovIlSKyRESWZGdn1+Wlpha6x4Vxijt4fM7+g9Uum19YzNUvLSOqfTAPnTfc2qGN8RO1SgCqmun+3wm8DYwBdohIIoD7v7wnskwgxevlye60iuucraqjVXV0fLxdZtgUfndML/YdLOX5BVurXEZV+fNbK9m6ex+PnD+iyW5mMsa0PDUmABEJE5GI8sfACcAqYC5wibvYJcC77uO5wMXiGAfkWvu/bwxIjGTKgASeXbCZfUWVDx7/v8U/8X5aFjec0I+xPWObOUJjjC/V5gigE/CtiKQC3wMfqOrHwN3A8SLyIzDFfQ7wIbAJSAeeAq5u9KhNrV09uTc5+4t5pZIBTVZl5nLne2s4pl88v5vUywfRGWN8qcaTwKq6CRhWyfTdwHGVTFdgZqNEZxpsZNeOjO8Zy1PfbOKi8d1+Hr83z233jw0P4YEZw+2GI2P8kF2T5gdmTu7Njrwi3nIHj1dVbno9jW05B5h1wQhiwkJ8HKExxhcsAfiBCb1jGZocxRPu4PHPLdjCx6u3c9PUfozqZgOFG+OvLAH4Ae/B4+/+aB13fbiWKQMSuOKonr4OzRjjQ5YA/ET54PFPf7uZhIhQ7jtnmPUsaYyfswTgJwIChBtP6EdMWAiPXjiS6A7W7m+Mv7PxAPzI1MGdOWFgJ7vixxgD2BGA37HK3xhTzhKAMcb4KUsAxhjjpywBGGOMn7IEYIwxfsoSgDHG+ClLAMYY46csARhjjJ8Sp/dmHwchkg+s91HxUUCuldumy7Zt9o+y/a1cgD6qGlXvV6uqz/+AJT4se7aV27bLtm32j7L9rdzGKNuagOA9K7fNl23b7B9l+1u5DS67pTQBLVHV0b6Owxhj/ElLOQKY7esAjDHG37SIIwBjjDHNr6UcATQZESkVkRVef92rWXa+iDRKU5SIqIj8z+t5kIhki8j7jbH+WpR/uhtD/2Yqz6fb65ZZ0Fxl1af8Rv5+NevnW6Hs20RktYikub+psc1YdrKIvCsiP4rIRhH5j4hUObiFiFwvIh0aWKaKyP1ez28UkTsass5allted60WkVQRuUFEGrXObvMJADigqsO9/rY0U7n7gMEi0t59fjyQWZcViEhDxms4H/jW/V+XMgPrWV6Dt9fUSb0+34YSkfHAKcBIVR0KTAEymqlsAd4C3lHVPkBfIBz4ZzUvux5oUAIAioAzRSSugeupq/K6axDO72ka8NfGLKBZE4Cv99DKicgoEflKRJaKyCcikug1+yI3664SkTENLOpD4GT38fnAK14xjBGRhSKyXEQWiEg/d/qlIjJXRL4A5tWnUBEJByYClwPnudOOEZGvReQDEVkvIk+U702ISIGI3C8iqcD4+m4s9dver0VkuNdy34rIsPoG4G7n+17PZ4nIpe7jLSLyNxFZJiIrm2LvubryG7GMqj7fqrb7JBFZ537fH27gUVkisEtViwBUdZeqbqvqN+Ue9fynkX5TxwKFqvqsW3Yp8AfgMhEJE5H73DLSRORaEbkO6AJ8KSJfNqDcEpzzlH+oOENEuovIF26Z80Skq4hEichWr99XmIhkiEhwfQNQ1Z3AlcA14ggUkX+LyA9u2Vd5xXSz+/1OFZG7q1uvPxwBtJdDzT9vux/CI8DZqjoKeIbD9yA6qOpw4Gp3XkO8CpwnIqHAUGCx17x1wFGqOgL4C3CX17yRbnyT6lnudOBjVd0A7BaRUe70McC1wECgF3CmOz0MWKyqw1T123qWCfXb3v8ClwKISF8gVFVTGxBDTXap6kjgceDGJiynKVX1+f6C+1k8CUxzv+/xDSz7UyBFRDaIyGMiMqkZf1ODgKXeE1Q1D/gJ+A3QHRjuHpm8pKoPA9uAyao6uQHlAjwKXCgiFW+6egR4vrxM4GFVzQVWAOW/31OAT1S1uCEBqOomIBBIwEn+uap6BHAEcIWI9BCRaTjfj7GqOgy4t7p1NnsCEJFwN1OW74VNd6d3F5G1IvKUOG1en8qh5oSG8G4COgPoBwwGPhORFcDtQLLX8q8AqOrXQKSIRNe3YFVNw/lSno+zd+wtCnhdRFYBD+J8uct9pqp76luuW96r7uNXOdRM8L2qbnL3nF7B2YsEKAXebEB5QL2393XgFLcSuQx4rqFx1OAt9/9SN9bWqKrPtzL9gU2qutl9/ko1y9ZIVQuAUTh7o9nAHOAqmuk3VY1jgCdVtcQtqyG/n19wE80LwHUVZo0HXnYfv8ih39Qc4Fz38Xnu88Z0AnCx+34vBmKBPjhNcs+q6n437mrfB1+MCVwInKGqeeK0qS0SkbnuvD7A+ap6hYi8BpwF/K+qFdWTAKtVtaqmjoqXRTX0Mqm5wH04X9BYr+l3Al+q6hninJie7zVvX30LE5EYnEPlISKiOHsMCnxA1dtW6CaFxlCn7VXV/SLyGc5eywycyqUhSjh8xya0wvwi938pTfP9r6n8Bqnm8323Kcv15n5X5gPzRWQlMJPm+U2tAc72niAikUBXYEs911kXDwHLgGdrsexc4C738xoFfNHQwkWkJ873didOPXatqn5SYZkT67JOXzQBCc4bkwZ8DiQBndx5m1V1hfu4qfbQ1gPx4pzMQkSCRcR77/tcd/pEnEOshvbx8QzwN1VdWWF6FIdOkl7awDK8nQ28qKrdVLW7qqYAm4GjgDHuYWIAznY2pLmnKvXZ3qeBh4EfVHVvA8vfCgwUkXbunuZxDVxfSyu/qs83oIpy1wM95dDVb+dWXGFdiEg/EenjNWk4sJbm+U3NAzqIyMXu+gKB+3GOGj8BrhL3wgm34gXIByLqWd5h3L3p13CaX8otwD0PA1wIfOMuWwD8APwHeL+hO1giEg88AcxS59r9T4DflZ9XEJG+IhIGfAb8Wtwrn7zeh0r54gjgQpx2yFGqWiwiWzi0t1LktVwp0BhNQIdR1YMicjbwsNueF4ST2Ve7ixSKyHKgvEmioeV5cCq3iu4FnheR23H2zhvL+cA9Faa9CfwO5ws5C+gNfAm83YjlAvXbXlVdKiJ51G7PqlLuD79IVTPco8dVOBXj8vqus4WWX9Xnex5O5XRYuap6QESuBj4WkX0434GGCAcecZNMCZCO0xw0myb+TamqisgZwGMi8n84Se9D4Fac+qIvkCYixcBTON/12Tjbvq0RzgOAk3Cu8Xp+LfCsiPwJp0ns117z5uA0cR5Tz7Lau008wTjv9YvAA+68p3F2kJeJiLhln66qH4tzUcUSETnIofenUs16I5g4VwHdBvRW1WtFZDLOoVEPd5H3VXWwu+yNQLiq3tFsAbZhInIMcKOqnuLjUH5BRLrgNCn0V9Wyeq5jGPCUqjb0yq168XX51RGRcFUtcCuKR4EfVfXBZip7Ps73bklzlGfqptmagMr3kHDOlI922w4vxrk6xPgp93B+MXBbAyr/3+KcaLy9MWNrLeXXwhXunuRqnKa4J30bjmkpmu0IoCXvIRljjD9qliOAVrCHZIwxfsc6gzPGGD/VJEcAIpIiIl+KyBr3pq7fu9NjROQzcTpy+kxEOrrTLxTnduaV4nQTMMxrXVPF6bogXURuaYp4jTHGHzXJEYA4/YAkquoyEYnAuab/dJzrv/eo6t1uZd5RVW8WkSOBtaq6V5xbme9Q1bHudb4bcDpC8uBcwna+qq5p9KCNMcbPNMkRgKpmqeoy93E+zo0iSTh3ez7vLvY8TlJAVRd43QC0iEO3kY8B0t3uCw7i3PY+vSliNsYYf9PkJ4HdOxBH4Fzq10lVs9xZ2zl0B7C3y4GP3MdJHN7VrMedZowxpoGa9E5gcbqtfRO43u375+d57l19WmH5yTgJYCLGGGOaVJMdAbh9VLyJ0y1reQ+MO+RQP+GJOJ0alS8/FOf25umqutudnAmkeK02GRtkxBhjGkVTXQUkOP28r1XVB7xmzQUucR9fgtODISLSFaeb3ovcPs7L/QD0cTswC8Hp72QuxhhjGqyprgKaiNMr3kqg/Pb+W3HOA7yG033rVmCGqu4Rkadxun7e6i5boqqj3XWdhNOxVCDwjKpWN/ybMcaYWrIbwYwxxk/5w5CQxhhjKmEJwBhj/JQlAGOM8VOWAIwxxk9ZAjDGGD9lCcAYY/yUJQBjjPFTlgCMMcZP/T87CNt3LW/SCAAAAABJRU5ErkJggg==\n", 289 | "text/plain": [ 290 | "
" 291 | ] 292 | }, 293 | "metadata": { 294 | "needs_background": "light" 295 | }, 296 | "output_type": "display_data" 297 | } 298 | ], 299 | "source": [ 300 | "dataset['month'].value_counts().sort_index().plot(title='Кол-во объектов по месяцам');" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "Интересно, что в последние два месяца, объект гораздо больше, чем в другие месяцы...\n", 308 | "\n", 309 | "Посмотрим, как географически распределены train и test:" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 8, 315 | "metadata": { 316 | "ExecuteTime": { 317 | "end_time": "2021-09-25T18:31:30.864935Z", 318 | "start_time": "2021-09-25T18:31:30.753719Z" 319 | } 320 | }, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "User Guide: https://docs.kepler.gl/docs/keplergl-jupyter\n" 327 | ] 328 | }, 329 | { 330 | "data": { 331 | "application/vnd.jupyter.widget-view+json": { 332 | "model_id": "e0a8fa94f39249308ae9041d235222f0", 333 | "version_major": 2, 334 | "version_minor": 0 335 | }, 336 | "text/plain": [ 337 | "KeplerGl(data={'test': lat lng\n", 338 | "0 51.709255 36.147908\n", 339 | "1 61.233240 73.462509\n", 340 | "2 …" 341 | ] 342 | }, 343 | "metadata": {}, 344 | "output_type": "display_data" 345 | } 346 | ], 347 | "source": [ 348 | "kepler_data = {\n", 349 | " \"test\": dataset[dataset['train']==0][['lat', 'lng']],\n", 350 | " \"train\": dataset[dataset['train']==1][['lat', 'lng']],\n", 351 | "}\n", 352 | "\n", 353 | "map1 = KeplerGl(height=400, data=kepler_data)\n", 354 | "map1" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "Выглядит достаточно равномерно" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "# Adversarial validation\n", 369 | "\n", 370 | "В данном разделе проверим, насколько сильно наша обучающая выборка отличается от тестовой.\n", 371 | "\n", 372 | "\n", 373 | "Для этого назовем `единичками` объекты из обучающей выбокри, `ноликами` - объекты из тестовой выборки.\n", 374 | "И на исходных признаках построим модель машинного обучения, которая будет пытаться отличить обучающую выборку от тестовой на основе признаков.\n", 375 | "\n", 376 | "Так как данные распределены по времени, то чтобы оценить качество нашей модели оставим по последнему месяцу из train и test.\n", 377 | "\n", 378 | "Если выборки не отличаются, то качество такой модели должно быть случайной.\n", 379 | "\n", 380 | "Для оценки качества модели будем использовать `ROC AUC`. Как мы знаем, `ROC AUC` случайного алгоритма должен быть в районе `0.5`." 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 9, 386 | "metadata": { 387 | "ExecuteTime": { 388 | "end_time": "2021-09-25T18:31:39.888229Z", 389 | "start_time": "2021-09-25T18:31:30.866752Z" 390 | } 391 | }, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "Learning rate set to 0.022342\n", 398 | "0:\tlearn: 0.6909881\ttotal: 58.8ms\tremaining: 58.7s\n", 399 | "100:\tlearn: 0.6161894\ttotal: 1.11s\tremaining: 9.9s\n", 400 | "200:\tlearn: 0.5937461\ttotal: 1.97s\tremaining: 7.82s\n", 401 | "300:\tlearn: 0.5767069\ttotal: 2.77s\tremaining: 6.43s\n", 402 | "400:\tlearn: 0.5588018\ttotal: 3.56s\tremaining: 5.32s\n", 403 | "500:\tlearn: 0.5380880\ttotal: 4.49s\tremaining: 4.47s\n", 404 | "600:\tlearn: 0.5179787\ttotal: 5.39s\tremaining: 3.58s\n", 405 | "700:\tlearn: 0.4999933\ttotal: 6.25s\tremaining: 2.67s\n", 406 | "800:\tlearn: 0.4835562\ttotal: 7.14s\tremaining: 1.77s\n", 407 | "900:\tlearn: 0.4679176\ttotal: 7.97s\tremaining: 876ms\n", 408 | "999:\tlearn: 0.4541192\ttotal: 8.81s\tremaining: 0us\n", 409 | "0.48765809873521015\n" 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "oot_train_month_adv = '2020-08-01'\n", 415 | "oot_val_month_adv = '2020-12-01'\n", 416 | "\n", 417 | "Xy_train_adv = dataset[~dataset['month'].isin([oot_train_month_adv, oot_val_month_adv])].reset_index(drop=True)\n", 418 | "Xy_test_adv = dataset[dataset['month'].isin([oot_train_month_adv, oot_val_month_adv])].reset_index(drop=True)\n", 419 | "\n", 420 | "\n", 421 | "adv_model = CatBoostClassifier(verbose=100)\n", 422 | "adv_model.fit(\n", 423 | " Xy_train_adv.drop(key_cols, axis=1), \n", 424 | " Xy_train_adv['train'],\n", 425 | " cat_features=cat_cols\n", 426 | ")\n", 427 | "\n", 428 | "\n", 429 | "predict_adv = adv_model.predict(Xy_test_adv.drop(key_cols, axis=1))\n", 430 | "\n", 431 | "print(roc_auc_score(Xy_test_adv['train'], predict_adv))" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "Качество нашей модели получилось околослучайное, а значит все в порядке" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "# Модель" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "Прежде, чем строить модель, давайте разберемся, как будем валидироваться.\n", 453 | "\n", 454 | "Как говорилось выше, данные распределены по времени, поэтому валидироваться не обходимо также по времени:\n", 455 | "\n", 456 | "![alt text](https://miro.medium.com/max/558/1*AXRu72CV1hdjLfODFGbMWQ.png \"Title\")\n", 457 | "\n" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 10, 463 | "metadata": { 464 | "ExecuteTime": { 465 | "end_time": "2021-09-25T18:31:39.896342Z", 466 | "start_time": "2021-09-25T18:31:39.889996Z" 467 | } 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "# месяц с которого начнем валидироваться\n", 472 | "start_month = '2020-04-01'\n", 473 | "\n", 474 | "# месяцы для валидации\n", 475 | "val_months = (\n", 476 | " dataset[(dataset['train']==1) & (dataset['month'] > start_month)]['month']\n", 477 | " .drop_duplicates()\n", 478 | " .sort_values()\n", 479 | " .tolist()\n", 480 | ")" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 11, 486 | "metadata": { 487 | "ExecuteTime": { 488 | "end_time": "2021-09-25T18:32:06.507988Z", 489 | "start_time": "2021-09-25T18:31:39.898647Z" 490 | } 491 | }, 492 | "outputs": [ 493 | { 494 | "data": { 495 | "application/vnd.jupyter.widget-view+json": { 496 | "model_id": "f91d64541e1548d1b3575e83280da1b2", 497 | "version_major": 2, 498 | "version_minor": 0 499 | }, 500 | "text/plain": [ 501 | "HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))" 502 | ] 503 | }, 504 | "metadata": {}, 505 | "output_type": "display_data" 506 | }, 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "\n", 512 | "Средняя метрика по бинам: 1.594\n", 513 | "Отклонение метрики по бинам: 0.160\n" 514 | ] 515 | } 516 | ], 517 | "source": [ 518 | "main_model = CatBoostRegressor(loss_function='MAE', verbose=0)\n", 519 | "\n", 520 | "scores = []\n", 521 | "\n", 522 | "for month in tqdm(val_months):\n", 523 | " Xy_train = dataset[(dataset['month'] < month)].reset_index(drop=True)\n", 524 | " Xy_val = dataset[(dataset['month'] == month)].reset_index(drop=True)\n", 525 | " \n", 526 | " model = clone(main_model)\n", 527 | " model = CatBoostRegressor(loss_function='MAE', verbose=0)\n", 528 | " model = model.fit(Xy_train.drop(key_cols, axis=1), Xy_train['target'], cat_features=cat_cols)\n", 529 | "\n", 530 | " metric = deviation_metric(Xy_val['target'], model.predict(Xy_val.drop(key_cols, axis=1)))\n", 531 | " scores.append(metric)\n", 532 | " \n", 533 | "print(f'Средняя метрика по бинам: {np.mean(scores):.3f}')\n", 534 | "print(f'Отклонение метрики по бинам: {np.std(scores):.3f}')" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 12, 540 | "metadata": { 541 | "ExecuteTime": { 542 | "end_time": "2021-09-25T18:32:06.536279Z", 543 | "start_time": "2021-09-25T18:32:06.509814Z" 544 | } 545 | }, 546 | "outputs": [ 547 | { 548 | "data": { 549 | "text/html": [ 550 | "
\n", 551 | "\n", 564 | "\n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | "
Feature IdImportances
0city8.071964
1total_square7.342278
2floor4.817922
3realty_type4.656558
4osm_train_stop_closest_dist3.711538
.........
68osm_building_points_in_0.010.200341
69osm_healthcare_points_in_0.0050.177565
70osm_leisure_points_in_0.00750.161400
71osm_building_points_in_0.0010.131856
72osm_train_stop_points_in_0.00750.000000
\n", 630 | "

73 rows × 2 columns

\n", 631 | "
" 632 | ], 633 | "text/plain": [ 634 | " Feature Id Importances\n", 635 | "0 city 8.071964\n", 636 | "1 total_square 7.342278\n", 637 | "2 floor 4.817922\n", 638 | "3 realty_type 4.656558\n", 639 | "4 osm_train_stop_closest_dist 3.711538\n", 640 | ".. ... ...\n", 641 | "68 osm_building_points_in_0.01 0.200341\n", 642 | "69 osm_healthcare_points_in_0.005 0.177565\n", 643 | "70 osm_leisure_points_in_0.0075 0.161400\n", 644 | "71 osm_building_points_in_0.001 0.131856\n", 645 | "72 osm_train_stop_points_in_0.0075 0.000000\n", 646 | "\n", 647 | "[73 rows x 2 columns]" 648 | ] 649 | }, 650 | "execution_count": 12, 651 | "metadata": {}, 652 | "output_type": "execute_result" 653 | } 654 | ], 655 | "source": [ 656 | "model.get_feature_importance(prettified=True)" 657 | ] 658 | }, 659 | { 660 | "cell_type": "markdown", 661 | "metadata": {}, 662 | "source": [ 663 | "# Финальная модель\n", 664 | "\n", 665 | "Обучим финальную модель и сделаем предикт" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 13, 671 | "metadata": { 672 | "ExecuteTime": { 673 | "end_time": "2021-09-25T18:32:14.105074Z", 674 | "start_time": "2021-09-25T18:32:06.538334Z" 675 | } 676 | }, 677 | "outputs": [], 678 | "source": [ 679 | "Xy_train = dataset[(dataset['train'] == 1)].reset_index(drop=True)\n", 680 | "Xy_test = dataset[(dataset['train'] == 0)].reset_index(drop=True)\n", 681 | "\n", 682 | "model = clone(main_model)\n", 683 | "model = CatBoostRegressor(loss_function='MAE', verbose=0)\n", 684 | "model = model.fit(Xy_train.drop(key_cols, axis=1), Xy_train['target'], cat_features=cat_cols)" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": 14, 690 | "metadata": { 691 | "ExecuteTime": { 692 | "end_time": "2021-09-25T18:32:14.133472Z", 693 | "start_time": "2021-09-25T18:32:14.106470Z" 694 | } 695 | }, 696 | "outputs": [], 697 | "source": [ 698 | "Xy_test['per_square_meter_price'] = model.predict(Xy_test.drop(key_cols, axis=1))\n", 699 | "Xy_test[['id', 'per_square_meter_price']].to_csv('sub1.csv', index=False)" 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "# Дальнейшие идеи\n", 707 | "\n", 708 | "* Покрутить признаки\n", 709 | "* потюнить модель\n", 710 | "* Подумать, что можно сделать с отфильтрованной частью датасета\n", 711 | "* Поиспользовать внешние данные :)" 712 | ] 713 | } 714 | ], 715 | "metadata": { 716 | "kernelspec": { 717 | "display_name": "Python 3", 718 | "language": "python", 719 | "name": "python3" 720 | }, 721 | "language_info": { 722 | "codemirror_mode": { 723 | "name": "ipython", 724 | "version": 3 725 | }, 726 | "file_extension": ".py", 727 | "mimetype": "text/x-python", 728 | "name": "python", 729 | "nbconvert_exporter": "python", 730 | "pygments_lexer": "ipython3", 731 | "version": "3.8.8" 732 | }, 733 | "toc": { 734 | "base_numbering": 1, 735 | "nav_menu": {}, 736 | "number_sections": true, 737 | "sideBar": true, 738 | "skip_h1_title": false, 739 | "title_cell": "Table of Contents", 740 | "title_sidebar": "Contents", 741 | "toc_cell": false, 742 | "toc_position": {}, 743 | "toc_section_display": true, 744 | "toc_window_display": false 745 | } 746 | }, 747 | "nbformat": 4, 748 | "nbformat_minor": 4 749 | } 750 | --------------------------------------------------------------------------------