├── README.md ├── Challenge#1 -African Snake Antivenom Binding ├── Readme.md └── UmojaHack_Challenge_1_Top_1_Notebook.ipynb └── Challenge#3 - Faulty Air Quality Sensor ├── Readme.md └── UmojaHack_Challenge_3_Top_3_Notebook.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # UmojaHack-Africa-2022 -------------------------------------------------------------------------------- /Challenge#1 -African Snake Antivenom Binding/Readme.md: -------------------------------------------------------------------------------- 1 | # UmojaHack Africa 2022 #1: African Snake Antivenom Binding 2 | 3 | ## Brief Description 4 | 5 | The objective of this challenge is to create a machine learning model to predict how strongly a given string of amino acids from a snake venom toxin protein binds to eight different commercial antivenom antibodies., have a look on [Zindi](https://zindi.africa/competitions/umojahack-africa-2022-advanced-challenge). 6 | 7 | ## About this code 8 | 9 | ``` 10 | # this code is my 1st place Solution for the Advanced Hackathon ! 11 | ``` 12 | 13 | ## [On the Leaderboard](https://zindi.africa/competitions/umojahack-africa-2022-advanced-challenge/leaderboard) 14 | 15 | Rank : 1/112 16 | ## Authors 17 | 18 |
19 | 20 | | Name | Zindi ID | Github ID | 21 | |----------------|--------------------------------------------------|------------------------------------------| 22 | |Azer KSOURI |[@ASSAZZIN](https://zindi.africa/users/ASSAZZIN) |[@Az-Ks](https://github.com/ASSAZZIN-01) | 23 | 24 | 25 |
26 | -------------------------------------------------------------------------------- /Challenge#3 - Faulty Air Quality Sensor/Readme.md: -------------------------------------------------------------------------------- 1 | # UmojaHack Africa 2022 #3: Faulty Air Quality Sensor 2 | 3 | ## Brief Description 4 | 5 | The objective of this challenge is to create a classification model to identify a device has an off set fault or not, regardless of the device. The model can be used by AirQo to automatically flag a device that is returning faulty data., have a look on [Zindi](https://zindi.africa/competitions/umojahack-africa-2022-beginner-challenge). 6 | 7 | ## About this code 8 | 9 | ``` 10 | # this code is not My Final Solution - But it's a code that will give you 3th place ! 11 | ``` 12 | 13 | 14 | ## [On the Leaderboard](https://zindi.africa/competitions/umojahack-africa-2022-beginner-challenge/leaderboard) 15 | 16 | Rank : 1/493 17 | ## Authors 18 | 19 |
20 | 21 | | Name | Zindi ID | Github ID | 22 | |----------------|--------------------------------------------------|------------------------------------------| 23 | |Azer KSOURI |[@ASSAZZIN](https://zindi.africa/users/ASSAZZIN) |[@Az-Ks](https://github.com/ASSAZZIN-01) | 24 | 25 | 26 |
27 | -------------------------------------------------------------------------------- /Challenge#3 - Faulty Air Quality Sensor/UmojaHack_Challenge_3_Top_3_Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Challenge#3 - Faulty Air Quality Sensor", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "source": [ 21 | "# IMPORTS" 22 | ], 23 | "metadata": { 24 | "id": "rnazxbDHUu8i" 25 | } 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "5aRGOfmQ-OtI" 31 | }, 32 | "source": [ 33 | "import os\n", 34 | "import gc\n", 35 | "import random\n", 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "import lightgbm as lgb\n", 39 | "\n", 40 | "from tqdm.notebook import tqdm\n", 41 | "from tqdm import tqdm_notebook\n", 42 | "\n", 43 | "from sklearn.model_selection import GroupKFold\n", 44 | "from sklearn.metrics import accuracy_score\n", 45 | "from sklearn.preprocessing import LabelEncoder\n", 46 | "\n", 47 | "import warnings\n", 48 | "warnings.simplefilter('ignore')" 49 | ], 50 | "execution_count": 15, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "id": "XpQ6PCnNtP_4" 57 | }, 58 | "source": [ 59 | "# **Load - Process**" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": { 65 | "id": "FHCfc_HVtULf" 66 | }, 67 | "source": [ 68 | "class Process :\n", 69 | "\n", 70 | " def LAG(self,data,LagFeature,shift=1,NewFeatures=[]) :\n", 71 | " data[NewFeatures[0]] = data[LagFeature] - data[LagFeature].shift(shift)\n", 72 | " data[NewFeatures[1]] = data[LagFeature].shift(shift)\n", 73 | "\n", 74 | " def LE(self,data,LE_cols = []) :\n", 75 | " LE = LabelEncoder()\n", 76 | " for le_col in LE_cols :\n", 77 | " data[le_col] = LE.fit_transform(data[le_col])\n", 78 | " \n", 79 | " def process(self,train,test) :\n", 80 | " data = pd.concat([train,test]).reset_index(drop=True)\n", 81 | " \n", 82 | " # time features \n", 83 | " data['created_at'] = pd.to_datetime(data['Datetime'])\n", 84 | " data['year'] = data['created_at'].dt.year\n", 85 | " data['year'] = data['year'].astype(float)\n", 86 | " data['month'] = data['created_at'].dt.month\n", 87 | " data['day'] = data['created_at'].dt.day\n", 88 | " data['weekday'] = data['created_at'].dt.weekday\n", 89 | " data['weekofyear'] = data['created_at'].dt.weekofyear\n", 90 | " data['hour'] = data['created_at'].dt.hour\n", 91 | "\n", 92 | " # combination between time features\n", 93 | " data['day_hour'] = data['day'].astype(str) + '-' + data['hour'].astype(str)\n", 94 | " data['month_day'] = data['month'].astype(str) + '-' + data['day'].astype(str)\n", 95 | "\n", 96 | " # Label Encoder\n", 97 | " self.LE(data,LE_cols = ['day_hour','month_day'])\n", 98 | "\n", 99 | " # FE\n", 100 | " data = data.sort_values('Datetime').reset_index(drop=True)\n", 101 | " self.LAG(data,LagFeature='Sensor1_PM2.5',shift=1,NewFeatures=['sensor1_diff_next','sensor1_next'])\n", 102 | " self.LAG(data,LagFeature='Sensor1_PM2.5',shift=-1,NewFeatures=['sensor1_diff_before','sensor1_before'])\n", 103 | " self.LAG(data,LagFeature='Sensor2_PM2.5',shift=1,NewFeatures=['sensor2_diff_next','sensor2_next'])\n", 104 | " self.LAG(data,LagFeature='Sensor2_PM2.5',shift=-1,NewFeatures=['sensor2_diff_before','sensor2_before'])\n", 105 | "\n", 106 | " # Get our New Train,Test\n", 107 | " data['SplitBy'] = data['year'].astype(int).astype(str) + '-' + data['month'].astype(str) + '-' + data['day'].astype(str)\n", 108 | " data = data.sort_values('SplitBy').reset_index(drop=True)\n", 109 | " train = data[data['ID'].isin(train['ID'].values)].reset_index(drop=True)\n", 110 | " train['Offset_fault'] = train['Offset_fault'].astype('int')\n", 111 | " test = data[~data['ID'].isin(train['ID'].values)].reset_index(drop=True)\n", 112 | "\n", 113 | " return train, test" 114 | ], 115 | "execution_count": 10, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "source": [ 121 | "train = pd.read_csv('train.csv')\n", 122 | "test = pd.read_csv('test.csv')" 123 | ], 124 | "metadata": { 125 | "id": "loJYF5mb9lGM" 126 | }, 127 | "execution_count": 11, 128 | "outputs": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "source": [ 133 | "processor = Process()\n", 134 | "train, test= processor.process(train,test)" 135 | ], 136 | "metadata": { 137 | "id": "4gBBFMokXIA0" 138 | }, 139 | "execution_count": 12, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "print('unique days',train.SplitBy.nunique() , test.SplitBy.nunique())\n", 146 | "print('shape',train.shape , test.shape)" 147 | ], 148 | "metadata": { 149 | "colab": { 150 | "base_uri": "https://localhost:8080/" 151 | }, 152 | "id": "5bzG__eQJYPy", 153 | "outputId": "d4f381e2-8dd0-485d-fd75-de675df1a935" 154 | }, 155 | "execution_count": 13, 156 | "outputs": [ 157 | { 158 | "output_type": "stream", 159 | "name": "stdout", 160 | "text": [ 161 | "unique days 99 52\n", 162 | "shape (297177, 25) (127361, 25)\n" 163 | ] 164 | } 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "id": "dlPfPBZz-xdo" 171 | }, 172 | "source": [ 173 | "# **Modeling**" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "source": [ 179 | "class CFG :\n", 180 | " SEED = 42\n", 181 | " n_splits = 5\n", 182 | "\n", 183 | " lgb_params = {'boosting_type': 'gbdt','objective': 'binary','metric': 'auc',\n", 184 | " 'n_estimators': 2500,'reg_lambda' :50,'num_leaves' : 81,\n", 185 | " 'seed': SEED,'silent':True,'early_stopping_rounds': 100,\n", 186 | " }\n", 187 | " remove_features = ['ID', 'Datetime',\"created_at\",'SplitBy' , 'folds', 'Offset_fault',]\n", 188 | " TARGET_COL = 'Offset_fault'" 189 | ], 190 | "metadata": { 191 | "id": "T3hOkrFNYxHm" 192 | }, 193 | "execution_count": 26, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "class CostumSplit :\n", 200 | " def __init__(self,) :\n", 201 | " self.n_splits = CFG.n_splits\n", 202 | "\n", 203 | " def Split(self,Train_) :\n", 204 | " kf = GroupKFold(n_splits=self.n_splits)\n", 205 | "\n", 206 | " Train = Train_.copy()\n", 207 | " Train = Train.drop_duplicates('SplitBy').reset_index(drop=True)\n", 208 | " \n", 209 | " groups = Train['SplitBy']\n", 210 | " Train[\"folds\"]=-1 \n", 211 | " for fold, (_, val_index) in enumerate(kf.split(Train,Train['Offset_fault'],groups)):\n", 212 | " Train.loc[val_index, \"folds\"] = fold\n", 213 | " return Train\n", 214 | "\n", 215 | " def apply(self,train) :\n", 216 | " mapper = dict(zip(self.Split(train)['SplitBy'].tolist(),\n", 217 | " self.Split(train)['folds'].tolist()))\n", 218 | "\n", 219 | " train['folds'] = train['SplitBy'].map(mapper)\n", 220 | " return train" 221 | ], 222 | "metadata": { 223 | "id": "6pJr-HO2BcWO" 224 | }, 225 | "execution_count": 27, 226 | "outputs": [] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "source": [ 231 | "split = CostumSplit() \n", 232 | "\n", 233 | "train = split.apply(train)" 234 | ], 235 | "metadata": { 236 | "id": "3gNuMgtTYYK8" 237 | }, 238 | "execution_count": 28, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "source": [ 244 | "features_columns = [col for col in train.columns if col not in CFG.remove_features]\n", 245 | "len(features_columns)" 246 | ], 247 | "metadata": { 248 | "colab": { 249 | "base_uri": "https://localhost:8080/" 250 | }, 251 | "id": "hm0AQWmpYq0r", 252 | "outputId": "99cca32f-3e33-4449-ff45-8163e9838224" 253 | }, 254 | "execution_count": 29, 255 | "outputs": [ 256 | { 257 | "output_type": "execute_result", 258 | "data": { 259 | "text/plain": [ 260 | "20" 261 | ] 262 | }, 263 | "metadata": {}, 264 | "execution_count": 29 265 | } 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "source": [ 271 | "oof_lgb = np.zeros((train.shape[0],))\n", 272 | "test[CFG.TARGET_COL] = 0\n", 273 | "lgb_preds = []\n", 274 | "\n", 275 | "for fold in range(CFG.n_splits) :\n", 276 | " print(50*'-')\n", 277 | " print(f'Fold {fold+1} / {CFG.n_splits}' )\n", 278 | "\n", 279 | " tr_x, tr_y = train[train['folds']!=fold][features_columns] , train[train['folds']!=fold][CFG.TARGET_COL] \n", 280 | " vl_x, vl_y = train[train['folds']==fold][features_columns] , train[train['folds']==fold][CFG.TARGET_COL] \n", 281 | " val_idx = vl_x.index.tolist()\n", 282 | "\n", 283 | " train_data = lgb.Dataset(tr_x, label=tr_y)\n", 284 | " valid_data = lgb.Dataset(vl_x, label=vl_y)\n", 285 | "\n", 286 | " estimator = lgb.train(CFG.lgb_params,train_data,valid_sets = [train_data,valid_data],verbose_eval = 0)\n", 287 | " \n", 288 | " y_pred_val = estimator.predict(vl_x,num_iteration=estimator.best_iteration)\n", 289 | " oof_lgb[val_idx] = y_pred_val\n", 290 | " print(f'FOLD-{fold} accuracy score :',accuracy_score(vl_y, (y_pred_val>0.5)*1))\n", 291 | "\n", 292 | " y_pred_test = estimator.predict(test[features_columns],num_iteration=estimator.best_iteration)\n", 293 | " lgb_preds.append(y_pred_test)\n", 294 | " print(50*'-')\n", 295 | "\n", 296 | "print('OOF score :',accuracy_score(train[CFG.TARGET_COL], (oof_lgb>0.5)*1))" 297 | ], 298 | "metadata": { 299 | "colab": { 300 | "base_uri": "https://localhost:8080/" 301 | }, 302 | "id": "a0q2CU_nYUJB", 303 | "outputId": "b5fafe6d-6b81-4b65-85ff-8aa5681ceb65" 304 | }, 305 | "execution_count": 33, 306 | "outputs": [ 307 | { 308 | "output_type": "stream", 309 | "name": "stdout", 310 | "text": [ 311 | "--------------------------------------------------\n", 312 | "Fold 1 / 5\n", 313 | "FOLD-0 accuracy score : 0.9912406059281427\n", 314 | "--------------------------------------------------\n", 315 | "--------------------------------------------------\n", 316 | "Fold 2 / 5\n", 317 | "FOLD-1 accuracy score : 0.9918887601390498\n", 318 | "--------------------------------------------------\n", 319 | "--------------------------------------------------\n", 320 | "Fold 3 / 5\n", 321 | "FOLD-2 accuracy score : 0.9920260595161345\n", 322 | "--------------------------------------------------\n", 323 | "--------------------------------------------------\n", 324 | "Fold 4 / 5\n", 325 | "FOLD-3 accuracy score : 0.9960781662046115\n", 326 | "--------------------------------------------------\n", 327 | "--------------------------------------------------\n", 328 | "Fold 5 / 5\n", 329 | "FOLD-4 accuracy score : 0.9951137552077083\n", 330 | "--------------------------------------------------\n", 331 | "OOF score : 0.9932531790818266\n" 332 | ] 333 | } 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "source": [ 339 | "# **SUBMISSION**" 340 | ], 341 | "metadata": { 342 | "id": "gxWmLDChbAEq" 343 | } 344 | }, 345 | { 346 | "cell_type": "code", 347 | "source": [ 348 | "SUB_FILE_NAME = 'WinningSolution.csv' ;sub_df = test[['ID']].copy() ; sub_df['Offset_fault'] = (np.mean(lgb_preds,axis=0)>0.5)*1\n", 349 | "sub_df.to_csv(SUB_FILE_NAME, index=False)\n", 350 | "sub_df.head(10)" 351 | ], 352 | "metadata": { 353 | "colab": { 354 | "base_uri": "https://localhost:8080/", 355 | "height": 363 356 | }, 357 | "id": "u47maAY2YUGQ", 358 | "outputId": "32426f63-0f9a-4180-d59b-214a8565320b" 359 | }, 360 | "execution_count": 32, 361 | "outputs": [ 362 | { 363 | "output_type": "execute_result", 364 | "data": { 365 | "text/plain": [ 366 | " ID Offset_fault\n", 367 | "0 ID_VJTCP5667QNH 0\n", 368 | "1 ID_Z4FVLMBG5SI8 0\n", 369 | "2 ID_1AKWB2POZX8Q 0\n", 370 | "3 ID_MD0HNZQZT1FQ 1\n", 371 | "4 ID_HJ7XVHB2GBFK 0\n", 372 | "5 ID_8GT0DMK2ZO33 0\n", 373 | "6 ID_M5Z3J91KLW8A 1\n", 374 | "7 ID_I4C5C9NCPXZY 0\n", 375 | "8 ID_R8WE3U29LXY4 1\n", 376 | "9 ID_98KEGPPXVOQU 0" 377 | ], 378 | "text/html": [ 379 | "\n", 380 | "
\n", 381 | "
\n", 382 | "
\n", 383 | "\n", 396 | "\n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | "
IDOffset_fault
0ID_VJTCP5667QNH0
1ID_Z4FVLMBG5SI80
2ID_1AKWB2POZX8Q0
3ID_MD0HNZQZT1FQ1
4ID_HJ7XVHB2GBFK0
5ID_8GT0DMK2ZO330
6ID_M5Z3J91KLW8A1
7ID_I4C5C9NCPXZY0
8ID_R8WE3U29LXY41
9ID_98KEGPPXVOQU0
\n", 457 | "
\n", 458 | " \n", 468 | " \n", 469 | " \n", 506 | "\n", 507 | " \n", 531 | "
\n", 532 | "
\n", 533 | " " 534 | ] 535 | }, 536 | "metadata": {}, 537 | "execution_count": 32 538 | } 539 | ] 540 | } 541 | ] 542 | } 543 | -------------------------------------------------------------------------------- /Challenge#1 -African Snake Antivenom Binding/UmojaHack_Challenge_1_Top_1_Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Challenge#1 - African Snake Antivenom Binding", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU" 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "source": [ 23 | "# SETUP" 24 | ], 25 | "metadata": { 26 | "id": "tA6WnOxLhys7" 27 | } 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "zXHQEtzWhrSS" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "!nvidia-smi" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "source": [ 43 | "# IMPORTS" 44 | ], 45 | "metadata": { 46 | "id": "096OX259iWmc" 47 | } 48 | }, 49 | { 50 | "cell_type": "code", 51 | "source": [ 52 | "import random ,os\n", 53 | "import numpy as np \n", 54 | "import pandas as pd\n", 55 | "import gc\n", 56 | "import time\n", 57 | "\n", 58 | "from sklearn.metrics import mean_squared_error\n", 59 | "from tqdm import tqdm\n", 60 | "from sklearn.model_selection import GroupKFold\n", 61 | "\n", 62 | "# torch\n", 63 | "import torch\n", 64 | "from torch import nn \n", 65 | "from torch.utils.data import DataLoader, Dataset\n", 66 | "from torch.utils.tensorboard import SummaryWriter\n", 67 | "from torch import nn\n", 68 | "from torch.nn import functional as F\n", 69 | "\n", 70 | "import warnings\n", 71 | "warnings.simplefilter('ignore')" 72 | ], 73 | "metadata": { 74 | "id": "W47qxZSFiP8D" 75 | }, 76 | "execution_count": null, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "source": [ 82 | "#Load Data" 83 | ], 84 | "metadata": { 85 | "id": "M1vXVtGQidCI" 86 | } 87 | }, 88 | { 89 | "cell_type": "code", 90 | "source": [ 91 | "train_df = pd.read_csv(\"https://storage.googleapis.com/umojahack2022/train.csv\")\n", 92 | "test_df = pd.read_csv(\"https://storage.googleapis.com/umojahack2022/test.csv\")" 93 | ], 94 | "metadata": { 95 | "id": "Cqr6bjLjibur" 96 | }, 97 | "execution_count": null, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "print(train_df.shape , test_df.shape)\n", 104 | "print('-----------')\n", 105 | "print(train_df.Toxin_UniprotID.nunique() , test_df.Toxin_UniprotID.nunique())" 106 | ], 107 | "metadata": { 108 | "id": "WmPo4VXQieuy" 109 | }, 110 | "execution_count": null, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "source": [ 116 | "# UTILS" 117 | ], 118 | "metadata": { 119 | "id": "7ueaRC5ji8be" 120 | } 121 | }, 122 | { 123 | "cell_type": "code", 124 | "source": [ 125 | "def seed_all(SEED_VAL=1):\n", 126 | " random.seed(SEED_VAL)\n", 127 | " np.random.seed(SEED_VAL)\n", 128 | " torch.manual_seed(SEED_VAL)\n", 129 | " torch.cuda.manual_seed_all(SEED_VAL)\n", 130 | " os.environ['PYTHONHASHSEED'] = str(SEED_VAL)\n", 131 | " torch.backends.cudnn.deterministic = True\n", 132 | " torch.backends.cudnn.benchmark = False" 133 | ], 134 | "metadata": { 135 | "id": "ok2pqKkeofGA" 136 | }, 137 | "execution_count": null, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "source": [ 143 | "def free_memory(sleep_time=0.1):\n", 144 | " \"\"\" Black magic function to free torch memory and some jupyter whims \"\"\"\n", 145 | " gc.collect()\n", 146 | " torch.cuda.synchronize()\n", 147 | " gc.collect()\n", 148 | " torch.cuda.empty_cache()\n", 149 | " time.sleep(sleep_time)" 150 | ], 151 | "metadata": { 152 | "id": "Crw-7RjUdUz4" 153 | }, 154 | "execution_count": null, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "source": [ 160 | "def get_seq_column_map(train, test, col):\n", 161 | " sequences = []\n", 162 | " for seq in train[col]:\n", 163 | " sequences.extend(list(seq))\n", 164 | " for seq in test[col]:\n", 165 | " sequences.extend(list(seq))\n", 166 | " unique = np.unique(sequences)\n", 167 | " return {k: v for k, v in zip(unique, range(len(unique)))}\n", 168 | "\n", 169 | "def get_column_map(train, test, col):\n", 170 | " sequences = []\n", 171 | " unique_values = pd.concat([train[col], test[col]]).unique().tolist()\n", 172 | " return {k: v for k, v in zip(unique_values, range(len(unique_values)))}" 173 | ], 174 | "metadata": { 175 | "id": "GyEThHRCi4gk" 176 | }, 177 | "execution_count": null, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "source": [ 183 | "class AntivenomChallengeDataSet(Dataset):\n", 184 | " def __init__(\n", 185 | " self,\n", 186 | " amino_acid_map,\n", 187 | " antivenom_map,\n", 188 | " data,\n", 189 | " is_train,\n", 190 | " label_name=None,\n", 191 | " ):\n", 192 | " self.amino_acid_map = amino_acid_map\n", 193 | " self.antivenom_map = antivenom_map\n", 194 | " self.data = data\n", 195 | " self.is_train = is_train\n", 196 | " self.label_name = label_name\n", 197 | "\n", 198 | " def __len__(self):\n", 199 | " return len(self.data) \n", 200 | "\n", 201 | " def __getitem__(self,idx):\n", 202 | " row = self.data.iloc[idx]\n", 203 | " kmer_seq = torch.as_tensor([self.amino_acid_map[e] for e in list(row[\"Toxin_Kmer\"])])\n", 204 | " antivenom = torch.as_tensor(self.antivenom_map[row[\"Antivenom\"]])\n", 205 | " position_start = torch.as_tensor(row[\"Kmer_Position_start\"])\n", 206 | " position_end = torch.as_tensor(row[\"Kmer_Position_end\"])\n", 207 | " \n", 208 | " inputs = {\n", 209 | " \"K_mer\": kmer_seq,\n", 210 | " \"antivenom\": antivenom,\n", 211 | " \"position_start\": position_start,\n", 212 | " \"position_end\": position_end,\n", 213 | " }\n", 214 | "\n", 215 | " if self.is_train: \n", 216 | " return inputs, torch.as_tensor([row[self.label_name]])\n", 217 | " return inputs" 218 | ], 219 | "metadata": { 220 | "id": "9V8_CoFXi-V7" 221 | }, 222 | "execution_count": null, 223 | "outputs": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "source": [ 228 | "class ResidualLSTM(nn.Module):\n", 229 | "\n", 230 | " def __init__(self, d_model):\n", 231 | " super(ResidualLSTM, self).__init__()\n", 232 | " self.LSTM=nn.LSTM(d_model, d_model, num_layers=1, bidirectional=True,batch_first=True)\n", 233 | " self.linear1=nn.Linear(d_model*2, d_model*4)\n", 234 | " self.linear2=nn.Linear(d_model*4, d_model)\n", 235 | "\n", 236 | " def forward(self, x):\n", 237 | " res=x\n", 238 | " x, _ = self.LSTM(x)\n", 239 | " x=F.relu(self.linear1(x))\n", 240 | " x=self.linear2(x)\n", 241 | " x=res+x\n", 242 | " return x\n", 243 | "\n", 244 | "class SimpleSeqModel(nn.Module):\n", 245 | " \"\"\"\n", 246 | " Credits : INstadeepStartNotebook & https://www.kaggle.com/code/shujun717/1-solution-lstm-cnn-transformer-1-fold\n", 247 | " \"\"\"\n", 248 | " def __init__(\n", 249 | " self,\n", 250 | " K_mer_emb_size,\n", 251 | " K_mer_nunique,\n", 252 | " antivenom_emb_size,\n", 253 | " antivenom_unique,\n", 254 | " max_Position_start,\n", 255 | " Position_start_emb_size,\n", 256 | " ): \n", 257 | " super().__init__()\n", 258 | " self.K_mer_emb_size = K_mer_emb_size \n", 259 | " self.K_mer_nunique = K_mer_nunique \n", 260 | " self.antivenom_emb_size = antivenom_emb_size \n", 261 | " self.antivenom_unique = antivenom_unique \n", 262 | " self.rnnlayers = 3\n", 263 | " self.max_seq=None\n", 264 | " self.nlayers=3\n", 265 | " self.dropout=0\n", 266 | " self.nheads=16\n", 267 | "\n", 268 | " self.Kmer_emb_layer = nn.Embedding( num_embeddings=self.K_mer_nunique,embedding_dim=self.K_mer_emb_size,)\n", 269 | " \n", 270 | " embed_dim =self.K_mer_emb_size\n", 271 | " self.pos_encoder = nn.ModuleList([ResidualLSTM(self.K_mer_emb_size) for i in range(self.rnnlayers)])\n", 272 | " self.pos_encoder_dropout = nn.Dropout(self.dropout)\n", 273 | " self.layer_normal = nn.LayerNorm(embed_dim)\n", 274 | " encoder_layers = [nn.TransformerEncoderLayer(embed_dim, self.nheads, embed_dim*4, self.dropout) for i in range(self.nlayers)]\n", 275 | " conv_layers = [nn.Conv1d(embed_dim,embed_dim,(self.nlayers-i)*2-1,stride=1,padding=0) for i in range(self.nlayers)]\n", 276 | " deconv_layers = [nn.ConvTranspose1d(embed_dim,embed_dim,(self.nlayers-i)*2-1,stride=1,padding=0) for i in range(self.nlayers)]\n", 277 | " layer_norm_layers = [nn.LayerNorm(embed_dim) for i in range(self.nlayers)]\n", 278 | " layer_norm_layers2 = [nn.LayerNorm(embed_dim) for i in range(self.nlayers)]\n", 279 | " self.transformer_encoder = nn.ModuleList(encoder_layers)\n", 280 | " self.conv_layers = nn.ModuleList(conv_layers)\n", 281 | " self.layer_norm_layers = nn.ModuleList(layer_norm_layers)\n", 282 | " self.layer_norm_layers2 = nn.ModuleList(layer_norm_layers2)\n", 283 | " self.deconv_layers = nn.ModuleList(deconv_layers)\n", 284 | " self.pred = nn.Linear(embed_dim, 1)\n", 285 | " self.downsample = nn.Linear(embed_dim*2,embed_dim)\n", 286 | "\n", 287 | " self.Antivenom_emb = nn.Embedding(num_embeddings=self.antivenom_unique,embedding_dim=self.antivenom_emb_size,)\n", 288 | " self.Position_start_emb = nn.Embedding(num_embeddings=max_Position_start,embedding_dim=Position_start_emb_size,)\n", 289 | " self.Features = nn.Linear(in_features=self.antivenom_emb_size + Position_start_emb_size,out_features=128,)\n", 290 | " self.Linear_1 = nn.Linear(in_features=1152,out_features=512,)\n", 291 | " self.relu_1 = nn.ReLU()\n", 292 | " self.Output = nn.Linear(in_features=self.Linear_1.out_features, out_features=1,)\n", 293 | " \n", 294 | " \n", 295 | "\n", 296 | " def forward(self, inputs):\n", 297 | " kmer_emb = self.Kmer_emb_layer(inputs[\"K_mer\"])\n", 298 | " for lstm in self.pos_encoder:\n", 299 | " kmer_emb=lstm(kmer_emb)\n", 300 | " kmer_emb = torch.squeeze(kmer_emb)\n", 301 | " kmer_emb = self.pos_encoder_dropout(kmer_emb)\n", 302 | " kmer_emb = self.layer_normal(kmer_emb)\n", 303 | "\n", 304 | " for conv, transformer_layer, layer_norm1, layer_norm2, deconv in zip(self.conv_layers,self.transformer_encoder,self.layer_norm_layers,\n", 305 | " self.layer_norm_layers2,self.deconv_layers):\n", 306 | " #LXBXC to BXCXL\n", 307 | " res=kmer_emb\n", 308 | " kmer_emb=F.relu(conv(kmer_emb.permute(1,2,0)).permute(2,0,1))\n", 309 | " kmer_emb=layer_norm1(kmer_emb)\n", 310 | " kmer_emb=transformer_layer(kmer_emb)\n", 311 | " kmer_emb=F.relu(deconv(kmer_emb.permute(1,2,0)).permute(2,0,1))\n", 312 | " kmer_emb=layer_norm2(kmer_emb)\n", 313 | " kmer_emb=res+kmer_emb\n", 314 | " \n", 315 | " antivenom_emb = self.Antivenom_emb(inputs[\"antivenom\"])\n", 316 | " position_start_emb = self.Position_start_emb(inputs[\"position_start\"])\n", 317 | " emb_features = torch.cat((antivenom_emb, position_start_emb), axis=1)\n", 318 | " features = self.Features(emb_features)\n", 319 | "\n", 320 | " emb = torch.cat((torch.squeeze(kmer_emb[:,1,:], 1), features), axis=1)\n", 321 | " linear_1 = self.relu_1(self.Linear_1(emb))\n", 322 | " output = self.Output(linear_1)\n", 323 | "\n", 324 | " return output" 325 | ], 326 | "metadata": { 327 | "id": "vtpVbYMSjBBS" 328 | }, 329 | "execution_count": null, 330 | "outputs": [] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "source": [ 335 | "def train_func(fold,train_data_loader,val_data_loader,model,loss_fn,optimizer,num_epochs,device,writer,early_stopping=5,): \n", 336 | " def get_score(y_true, y_pred):\n", 337 | " return mean_squared_error(y_true, y_pred,squared=False)\n", 338 | " \n", 339 | " seed_all()\n", 340 | " total_batches = len(train_data_loader)\n", 341 | " total_batches_val = len(val_data_loader)\n", 342 | " train_loss = []\n", 343 | " \n", 344 | " n_iter = 0\n", 345 | " best_outputs = []\n", 346 | " for epoch in range(num_epochs): \n", 347 | " tqdm_bar = tqdm(train_data_loader, desc=f\"epoch {epoch}\", position=0) \n", 348 | " old_val_loss = np.inf\n", 349 | " wating = 0\n", 350 | " model.train()\n", 351 | " for batch_number, (X, y) in enumerate(tqdm_bar):\n", 352 | " y = y.type(torch.FloatTensor).to(device)\n", 353 | " X = {k: X[k].to(device) for k in X}\n", 354 | " \n", 355 | " optimizer.zero_grad()\n", 356 | " pred = model(X)\n", 357 | " loss = loss_fn(pred, y)\n", 358 | " loss.backward()\n", 359 | " \n", 360 | " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", 361 | " optimizer.step()\n", 362 | " \n", 363 | " loss = loss.item()\n", 364 | " train_loss.append(loss)\n", 365 | "\n", 366 | " writer.add_scalar(\"loss/train\", loss, n_iter)\n", 367 | " n_iter += 1\n", 368 | "\n", 369 | " if batch_number % 25 == 0: \n", 370 | " tqdm_bar.set_postfix({\"train\": f\"{batch_number}/{total_batches} loss: {loss:.3} epoch loss: {np.mean(train_loss):.3}\",},)\n", 371 | "\n", 372 | " ############## validation ############## \n", 373 | " val_tqdm_bar = tqdm(val_data_loader, desc=f\"epoch {epoch}\", position=0, leave=True,) \n", 374 | " final_outputs = []\n", 375 | " final_targets = [] \n", 376 | " val_loss = []\n", 377 | " model.eval()\n", 378 | " with torch.no_grad(): \n", 379 | " for batch_number, (X, y) in enumerate(val_tqdm_bar):\n", 380 | " y = y.type(torch.FloatTensor).to(device)\n", 381 | " X = {k: X[k].to(device) for k in X}\n", 382 | " \n", 383 | " pred = model(X)\n", 384 | " final_outputs.append(pred.cpu().detach().numpy())\n", 385 | " final_targets.append(y.cpu().numpy())\n", 386 | " val_loss.append(loss_fn(pred, y).item())\n", 387 | "\n", 388 | " writer.add_scalar(\"loss/validation\", np.random.random(), n_iter)\n", 389 | " if batch_number % 25 == 0: \n", 390 | " val_tqdm_bar.set_postfix({\"valid\": f\"{batch_number}/{total_batches_val} val loss: {np.mean(val_loss):.3}\"},)\n", 391 | " \n", 392 | " new_val_loss = np.mean(val_loss)\n", 393 | " final_targets = np.concatenate(final_targets)\n", 394 | " final_outputs = np.concatenate(final_outputs)\n", 395 | " \n", 396 | " scoree = get_score(final_targets,final_outputs)\n", 397 | " print('Validation RMSE for this epoch',scoree)\n", 398 | " print()\n", 399 | " if new_val_loss > old_val_loss:\n", 400 | " wating += wating\n", 401 | " else:\n", 402 | " old_val_loss = new_val_loss\n", 403 | " best_outputs = final_outputs\n", 404 | " torch.save(model, f\"model_fold{fold}.pth\")\n", 405 | " if wating > early_stopping:\n", 406 | " break\n", 407 | " return best_outputs" 408 | ], 409 | "metadata": { 410 | "id": "6E7hoiNt1-TJ" 411 | }, 412 | "execution_count": null, 413 | "outputs": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "source": [ 418 | "class Trainer :\n", 419 | " def __init__(self,train_df,test_df) :\n", 420 | " self.train_df = train_df\n", 421 | " self.test_df = test_df\n", 422 | " self.n_splits = 10\n", 423 | "\n", 424 | " #Data loader params\n", 425 | " self.batch_size = 512\n", 426 | " self.num_workers = 0\n", 427 | " self.shuffle = True\n", 428 | " self.drop_last = False\n", 429 | "\n", 430 | " # model params\n", 431 | " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 432 | " self.num_epochs = 25\n", 433 | " self.early_stopping = 5\n", 434 | " self.lr = 1e-4\n", 435 | "\n", 436 | " def seed_all(self):\n", 437 | " random.seed(self.SEED_VAL)\n", 438 | " np.random.seed(self.SEED_VAL)\n", 439 | " torch.manual_seed(self.SEED_VAL)\n", 440 | " torch.cuda.manual_seed_all(self.SEED_VAL)\n", 441 | " os.environ['PYTHONHASHSEED'] = str(self.SEED_VAL)\n", 442 | " torch.backends.cudnn.deterministic = True\n", 443 | " torch.backends.cudnn.benchmark = False\n", 444 | "\n", 445 | " def get_score(self,y_true, y_pred):\n", 446 | " return mean_squared_error(y_true, y_pred,squared=False)\n", 447 | "\n", 448 | " def Split(self,Train) :\n", 449 | " X = Train[[\"Toxin_UniprotID\"]]\n", 450 | " y = Train['Signal']\n", 451 | " groups = Train[\"Toxin_UniprotID\"]\n", 452 | " \n", 453 | " kf = GroupKFold(n_splits=self.n_splits)\n", 454 | " Train[\"folds\"]=-1 \n", 455 | " for fold, (_, val_index) in enumerate(kf.split(X, y,groups=groups)):\n", 456 | " Train.loc[val_index, \"folds\"] = fold\n", 457 | " return Train\n", 458 | "\n", 459 | " def TrainKfold(self,) :\n", 460 | " seed_all()\n", 461 | " amino_acid_map = get_seq_column_map(self.train_df, self.test_df, \"Toxin_Kmer\")\n", 462 | " antivenom_map = get_column_map(self.train_df, self.test_df, \"Antivenom\")\n", 463 | " max_Position_start = pd.concat([self.train_df[[\"Kmer_Position_start\"]], self.test_df[[\"Kmer_Position_start\"]]]).Kmer_Position_start.max()+1\n", 464 | "\n", 465 | " self.train_df = self.Split(self.train_df)\n", 466 | " oof = np.zeros((self.train_df.shape[0],1))\n", 467 | "\n", 468 | " for fold in range(self.n_splits):\n", 469 | " train_split_df = self.train_df[self.train_df.folds != fold]\n", 470 | " val_split_df = self.train_df[self.train_df.folds == fold]\n", 471 | " val_split_df_index = self.train_df[self.train_df.folds == fold].index.tolist()\n", 472 | "\n", 473 | " train_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,\n", 474 | " data=train_split_df,is_train=True,label_name=\"Signal\",)\n", 475 | " val_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,\n", 476 | " data=val_split_df,is_train=True,label_name=\"Signal\",)\n", 477 | " test_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,data=test_df,is_train=False,)\n", 478 | "\n", 479 | " train_data_loader = DataLoader(dataset=train_dataset,batch_size=self.batch_size,shuffle=self.shuffle, num_workers=self.num_workers,drop_last=self.drop_last, )\n", 480 | " val_data_loader = DataLoader(dataset=val_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False, )\n", 481 | " test_data_loader= DataLoader(dataset=test_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False,)\n", 482 | "\n", 483 | " \n", 484 | "\n", 485 | " model = SimpleSeqModel(K_mer_emb_size=1024,K_mer_nunique=len(amino_acid_map),\n", 486 | " antivenom_emb_size=128,antivenom_unique=len(antivenom_map),\n", 487 | " max_Position_start=max_Position_start,Position_start_emb_size=64,)\n", 488 | "\n", 489 | " loss_fn = nn.MSELoss()\n", 490 | " model = model.to(self.device)\n", 491 | " optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)\n", 492 | " writer = SummaryWriter()\n", 493 | " writer.add_graph(model, {k: v.to(self.device) for k, v in next(iter(train_data_loader))[0].items()})\n", 494 | "\n", 495 | " \n", 496 | "\n", 497 | " oof_fold = train_func(fold,train_data_loader=train_data_loader,val_data_loader=val_data_loader,\n", 498 | " model=model,loss_fn=loss_fn,optimizer=optimizer,\n", 499 | " num_epochs=self.num_epochs,device=self.device,writer = writer , early_stopping=self.early_stopping,)\n", 500 | "\n", 501 | " oof[val_split_df_index] = oof_fold\n", 502 | "\n", 503 | " return oof\n", 504 | "\n", 505 | " def INFERENCE(self,) :\n", 506 | " seed_all()\n", 507 | " amino_acid_map = get_seq_column_map(self.train_df, self.test_df, \"Toxin_Kmer\")\n", 508 | " antivenom_map = get_column_map(self.train_df, self.test_df, \"Antivenom\")\n", 509 | " max_Position_start = pd.concat([self.train_df[[\"Kmer_Position_start\"]], self.test_df[[\"Kmer_Position_start\"]]]).Kmer_Position_start.max()+1\n", 510 | " test_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,data=self.test_df,is_train=False,)\n", 511 | " test_data_loader= DataLoader(dataset=test_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False,)\n", 512 | " final_preds = []\n", 513 | " for fold in range(self.n_splits):\n", 514 | " path= f\"model_fold{fold}.pth\"\n", 515 | " model = torch.load(path).to(self.device)\n", 516 | " tqdm_bar = tqdm(test_data_loader, desc=f\"Inference-Fold{fold}\", position=0, leave=True) \n", 517 | " total_batches = len(tqdm_bar)\n", 518 | "\n", 519 | " preds = []\n", 520 | " with torch.no_grad():\n", 521 | " for batch_number, X in enumerate(tqdm_bar):\n", 522 | " X= {k: X[k].to(self.device) for k in X}\n", 523 | " pred = model(X)\n", 524 | " preds.append(pred.cpu().numpy())\n", 525 | "\n", 526 | " preds = np.concatenate(preds).reshape((-1))\n", 527 | " final_preds.append(preds)\n", 528 | " return np.mean(final_preds,0)" 529 | ], 530 | "metadata": { 531 | "id": "_ZDfuECwl2Lv" 532 | }, 533 | "execution_count": null, 534 | "outputs": [] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "source": [ 539 | "AssazzinTrainer = Trainer(train_df,test_df)" 540 | ], 541 | "metadata": { 542 | "id": "ubqossgn4j1o" 543 | }, 544 | "execution_count": null, 545 | "outputs": [] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "source": [ 550 | "free_memory(sleep_time=0.1)\n", 551 | "import gc ; gc.collect()" 552 | ], 553 | "metadata": { 554 | "id": "IVY-yUcj4jzZ" 555 | }, 556 | "execution_count": null, 557 | "outputs": [] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "source": [ 562 | "OOF = AssazzinTrainer.TrainKfold()" 563 | ], 564 | "metadata": { 565 | "id": "HRkHazfZ4jxI" 566 | }, 567 | "execution_count": null, 568 | "outputs": [] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "source": [ 573 | "print('OOF RMSE :',AssazzinTrainer.get_score(train_df['Signal'],OOF))" 574 | ], 575 | "metadata": { 576 | "id": "o_Nl0DIB6XVL" 577 | }, 578 | "execution_count": null, 579 | "outputs": [] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "source": [ 584 | "# INFERENCE & SUBMISSION" 585 | ], 586 | "metadata": { 587 | "id": "lTLtPTwunA-x" 588 | } 589 | }, 590 | { 591 | "cell_type": "code", 592 | "source": [ 593 | "test_pred = AssazzinTrainer.INFERENCE()" 594 | ], 595 | "metadata": { 596 | "id": "lG_QMOoJF9Pp" 597 | }, 598 | "execution_count": null, 599 | "outputs": [] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "source": [ 604 | "sample_submission=test_df[[\"ID\"]]\n", 605 | "sample_submission[\"Signal\"] = np.clip(test_pred,train_df['Signal'].min(),train_df['Signal'].max())\n", 606 | "sample_submission.to_csv(\"AssazzinGoodBaseline_Complex_v3.csv\",index=False)" 607 | ], 608 | "metadata": { 609 | "id": "uGsLY7X54lFN" 610 | }, 611 | "execution_count": null, 612 | "outputs": [] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "source": [ 617 | "sample_submission[\"Signal\"].hist()" 618 | ], 619 | "metadata": { 620 | "id": "34eEOKd0Gm50", 621 | "colab": { 622 | "base_uri": "https://localhost:8080/", 623 | "height": 282 624 | }, 625 | "outputId": "2d0e9c6a-44f1-4617-a494-5454aca9d8d5" 626 | }, 627 | "execution_count": null, 628 | "outputs": [ 629 | { 630 | "output_type": "execute_result", 631 | "data": { 632 | "text/plain": [ 633 | "" 634 | ] 635 | }, 636 | "metadata": {}, 637 | "execution_count": 87 638 | }, 639 | { 640 | "output_type": "display_data", 641 | "data": { 642 | "text/plain": [ 643 | "
" 644 | ], 645 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUSklEQVR4nO3dYYxl5X3f8e8vYMeIJAZsd4R2URcpK0ekK2M6AixX1dTUywKRl1a2hUXN2qLaviARllZKoW9Q7FgiL4hjo8bqymyzTqkJcmItsq3QLebKqlQwYBMwYIuNs4hdAdt4AWdsxdE4/76YZ+Mx7DB3Zu7MvTPP9yNd3XOe85xzn//und89c+45Z1JVSJL68EvjHoAkaf0Y+pLUEUNfkjpi6EtSRwx9SerImeMewBt5+9vfXtu2bVvx+j/+8Y85++yzRzegCWN9G5v1bWyTXN9jjz32t1X1jtMtm+jQ37ZtG48++uiK1x8MBszMzIxuQBPG+jY269vYJrm+JM8ttszDO5LUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1JGJviJXy7Ptlq+N7bWP3n7N2F5b0vDc05ekjhj6ktQRQ1+SOmLoS1JHlgz9JO9M8viCx4+SfCLJeUkOJ3m2PZ/b+ifJ55IcSfJEkksWbGtP6/9skj1rWZgk6fWWDP2q+n5VXVxVFwP/EvgJ8BXgFuCBqtoOPNDmAa4CtrfHXuDzAEnOA24DLgMuBW479UEhSVofyz28cwXw11X1HLAbONjaDwLXtundwBdr3kPAOUnOB64EDlfVyap6GTgM7Fp1BZKkoS33PP3rgC+16amqeqFNvwhMtektwPML1jnW2hZr/wVJ9jL/GwJTU1MMBoNlDvHnZmdnV7X+pHttfft2zI1tLGvx79zb/99mY32TaejQT/Jm4APAra9dVlWVpEYxoKraD+wHmJ6ertX8ObJJ/nNmo/Da+j42zouzrp9Zss9y9fb/t9lY32Razp7+VcC3q+qlNv9SkvOr6oV2+OZEaz8OXLBgva2t7Tgw85r2wUoGrcmzFlcD79sxt+QHmVcCS8uznGP6H+Hnh3YA7gNOnYGzBzi0oP2GdhbP5cCr7TDQ/cDOJOe2L3B3tjZJ0joZak8/ydnA+4H/tKD5duDeJDcCzwEfbu1fB64GjjB/ps/HAarqZJJPAY+0fp+sqpOrrkCSNLShQr+qfgy87TVtP2T+bJ7X9i3gpkW2cwA4sPxhSpJGwStyJakjhr4kdWRT30//yeOvjuU0Rs8okTSp3NOXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjm/o8/XFZiztOns4wd6GUpIXc05ekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1ZKjQT3JOki8n+V6SZ5K8J8l5SQ4nebY9n9v6JsnnkhxJ8kSSSxZsZ0/r/2ySPWtVlCTp9Ibd0/8s8JdV9RvAu4BngFuAB6pqO/BAmwe4CtjeHnuBzwMkOQ+4DbgMuBS47dQHhSRpfSwZ+kneCvxr4C6AqvqHqnoF2A0cbN0OAte26d3AF2veQ8A5Sc4HrgQOV9XJqnoZOAzsGmk1kqQ3NMy9dy4E/h/w35O8C3gMuBmYqqoXWp8Xgak2vQV4fsH6x1rbYu2/IMle5n9DYGpqisFgMGwtrzN11vz9aTYr62NV749xm52d3dDjX4r1TaZhQv9M4BLgd6rq4SSf5eeHcgCoqkpSoxhQVe0H9gNMT0/XzMzMird1592HuOPJzXtPuX075rqv7+j1M+szmDUwGAxYzft70lnfZBrmmP4x4FhVPdzmv8z8h8BL7bAN7flEW34cuGDB+ltb22LtkqR1smToV9WLwPNJ3tmargCeBu4DTp2Bswc41KbvA25oZ/FcDrzaDgPdD+xMcm77Andna5MkrZNhjw38DnB3kjcDPwA+zvwHxr1JbgSeAz7c+n4duBo4Avyk9aWqTib5FPBI6/fJqjo5kiokSUMZKvSr6nFg+jSLrjhN3wJuWmQ7B4ADyxmgJGl0vCJXkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdGSr0kxxN8mSSx5M82trOS3I4ybPt+dzWniSfS3IkyRNJLlmwnT2t/7NJ9qxNSZKkxSxnT//fVNXFVTXd5m8BHqiq7cADbR7gKmB7e+wFPg/zHxLAbcBlwKXAbac+KCRJ62M1h3d2Awfb9EHg2gXtX6x5DwHnJDkfuBI4XFUnq+pl4DCwaxWvL0lapjOH7FfA/0pSwH+rqv3AVFW90Ja/CEy16S3A8wvWPdbaFmv/BUn2Mv8bAlNTUwwGgyGH+HpTZ8G+HXMrXn/SWR+ren+M2+zs7IYe/1KsbzING/r/qqqOJ/lnwOEk31u4sKqqfSCsWvtA2Q8wPT1dMzMzK97WnXcf4o4nhy1x49m3Y677+o5eP7M+g1kDg8GA1by/J531TaahDu9U1fH2fAL4CvPH5F9qh21ozyda9+PABQtW39raFmuXJK2TJUM/ydlJfvXUNLAT+C5wH3DqDJw9wKE2fR9wQzuL53Lg1XYY6H5gZ5Jz2xe4O1ubJGmdDHNsYAr4SpJT/f9nVf1lkkeAe5PcCDwHfLj1/zpwNXAE+AnwcYCqOpnkU8Ajrd8nq+rkyCqRJC1pydCvqh8A7zpN+w+BK07TXsBNi2zrAHBg+cOUJI2CV+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6sjmvS+vurDtlq+N7bWP3n7N2F5bWin39CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdGTr0k5yR5DtJvtrmL0zycJIjSf4syZtb+y+3+SNt+bYF27i1tX8/yZWjLkaS9MaWs6d/M/DMgvk/AD5TVb8OvAzc2NpvBF5u7Z9p/UhyEXAd8JvALuCPk5yxuuFLkpZjqNBPshW4BvhCmw/wPuDLrctB4No2vbvN05Zf0frvBu6pqp9W1d8AR4BLR1GEJGk4w95754+A3wV+tc2/DXilquba/DFgS5veAjwPUFVzSV5t/bcADy3Y5sJ1/kmSvcBegKmpKQaDwbC1vM7UWbBvx9zSHTco6xuv1bw3AWZnZ1e9jUlmfZNpydBP8lvAiap6LMnMWg+oqvYD+wGmp6drZmblL3nn3Ye448nNe0+5fTvmrG+Mjl4/s6r1B4MBq3l/Tzrrm0zD/ES9F/hAkquBtwC/BnwWOCfJmW1vfytwvPU/DlwAHEtyJvBW4IcL2k9ZuI4kaR0seUy/qm6tqq1VtY35L2K/UVXXAw8CH2zd9gCH2vR9bZ62/BtVVa39unZ2z4XAduBbI6tEkrSk1fzu/J+Be5L8PvAd4K7Wfhfwp0mOACeZ/6Cgqp5Kci/wNDAH3FRVP1vF60uSlmlZoV9VA2DQpn/Aac6+qaq/Bz60yPqfBj693EFKkkbDK3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjiwZ+knekuRbSf4qyVNJfq+1X5jk4SRHkvxZkje39l9u80fa8m0LtnVra/9+kivXqihJ0ukNs6f/U+B9VfUu4GJgV5LLgT8APlNVvw68DNzY+t8IvNzaP9P6keQi4DrgN4FdwB8nOWOUxUiS3tiSoV/zZtvsm9qjgPcBX27tB4Fr2/TuNk9bfkWStPZ7quqnVfU3wBHg0pFUIUkaylDH9JOckeRx4ARwGPhr4JWqmmtdjgFb2vQW4HmAtvxV4G0L20+zjiRpHZw5TKeq+hlwcZJzgK8Av7FWA0qyF9gLMDU1xWAwWPG2ps6CfTvmlu64QVnfeK3mvQkwOzu76m1MMuubTEOF/ilV9UqSB4H3AOckObPtzW8Fjrdux4ELgGNJzgTeCvxwQfspC9dZ+Br7gf0A09PTNTMzs6yCFrrz7kPc8eSyStxQ9u2Ys74xOnr9zKrWHwwGrOb9PemsbzINc/bOO9oePknOAt4PPAM8CHywddsDHGrT97V52vJvVFW19uva2T0XAtuBb42qEEnS0obZjTofONjOtPkl4N6q+mqSp4F7kvw+8B3grtb/LuBPkxwBTjJ/xg5V9VSSe4GngTngpnbYSJK0TpYM/ap6Anj3adp/wGnOvqmqvwc+tMi2Pg18evnDlCSNglfkSlJHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHZnc+9ZKE27bLV9b1fr7dszxsRVs4+jt16zqddU39/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVky9JNckOTBJE8neSrJza39vCSHkzzbns9t7UnyuSRHkjyR5JIF29rT+j+bZM/alSVJOp1h9vTngH1VdRFwOXBTkouAW4AHqmo78ECbB7gK2N4ee4HPw/yHBHAbcBlwKXDbqQ8KSdL6WDL0q+qFqvp2m/474BlgC7AbONi6HQSubdO7gS/WvIeAc5KcD1wJHK6qk1X1MnAY2DXSaiRJb2hZ995Jsg14N/AwMFVVL7RFLwJTbXoL8PyC1Y61tsXaX/sae5n/DYGpqSkGg8FyhvgLps6av7/JZmV9G9tK61vNz8R6mp2d3TBjXYmNWt/QoZ/kV4A/Bz5RVT9K8k/LqqqS1CgGVFX7gf0A09PTNTMzs+Jt3Xn3Ie54cvPeU27fjjnr28BWWt/R62dGP5g1MBgMWM3P76TbqPUNdfZOkjcxH/h3V9VftOaX2mEb2vOJ1n4cuGDB6ltb22LtkqR1MszZOwHuAp6pqj9csOg+4NQZOHuAQwvab2hn8VwOvNoOA90P7ExybvsCd2drkyStk2F+t3wv8FHgySSPt7b/AtwO3JvkRuA54MNt2deBq4EjwE+AjwNU1ckknwIeaf0+WVUnR1KFJGkoS4Z+Vf0fIIssvuI0/Qu4aZFtHQAOLGeAkqTR8YpcSeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjmzeP0skbVLbbvna2F776O3XjO21NRru6UtSRwx9SeqIoS9JHTH0Jakjhr4kdWTJ0E9yIMmJJN9d0HZeksNJnm3P57b2JPlckiNJnkhyyYJ19rT+zybZszblSJLeyDB7+n8C7HpN2y3AA1W1HXigzQNcBWxvj73A52H+QwK4DbgMuBS47dQHhSRp/SwZ+lX1TeDka5p3Awfb9EHg2gXtX6x5DwHnJDkfuBI4XFUnq+pl4DCv/yCRJK2xlV6cNVVVL7TpF4GpNr0FeH5Bv2OtbbH210myl/nfEpiammIwGKxwiDB1FuzbMbfi9Sed9W1sG7G+5fw8zs7Orurnd9Jt1PpWfUVuVVWSGsVg2vb2A/sBpqena2ZmZsXbuvPuQ9zx5Oa96Hjfjjnr28A2Yn1Hr58Zuu9gMGA1P7+TbqPWt9Kzd15qh21ozyda+3HgggX9tra2xdolSetopaF/H3DqDJw9wKEF7Te0s3guB15th4HuB3YmObd9gbuztUmS1tGSv1sm+RIwA7w9yTHmz8K5Hbg3yY3Ac8CHW/evA1cDR4CfAB8HqKqTST4FPNL6fbKqXvvlsCRpjS0Z+lX1kUUWXXGavgXctMh2DgAHljU6SdJIeUWuJHXE0Jekjhj6ktQRQ1+SOmLoS1JHNtblgJLGajl/n3ffjjk+NqK/5+vf5h0d9/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqI5+lLmnjLuT5glDbj9QHu6UtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOrPspm0l2AZ8FzgC+UFW3r/cYJGkYb3Sq6ChvHX06a3W66Lru6Sc5A/ivwFXARcBHkly0nmOQpJ6t9+GdS4EjVfWDqvoH4B5g9zqPQZK6lapavxdLPgjsqqr/2OY/ClxWVb+9oM9eYG+bfSfw/VW85NuBv13F+pPO+jY269vYJrm+f15V7zjdgom7DUNV7Qf2j2JbSR6tqulRbGsSWd/GZn0b20atb70P7xwHLlgwv7W1SZLWwXqH/iPA9iQXJnkzcB1w3zqPQZK6ta6Hd6pqLslvA/czf8rmgap6ag1fciSHiSaY9W1s1rexbcj61vWLXEnSeHlFriR1xNCXpI5s6tBP8qEkTyX5xyQb7tSqxSTZleT7SY4kuWXc4xmlJAeSnEjy3XGPZS0kuSDJg0mebu/Nm8c9plFK8pYk30ryV62+3xv3mNZCkjOSfCfJV8c9luXa1KEPfBf498A3xz2QUengVhZ/Auwa9yDW0Bywr6ouAi4Hbtpk/38/Bd5XVe8CLgZ2Jbl8zGNaCzcDz4x7ECuxqUO/qp6pqtVc0TuJNvWtLKrqm8DJcY9jrVTVC1X17Tb9d8wHx5bxjmp0at5sm31Te2yqs0WSbAWuAb4w7rGsxKYO/U1qC/D8gvljbKLQ6EmSbcC7gYfHO5LRaoc+HgdOAIeralPVB/wR8LvAP457ICux4UM/yf9O8t3TPDbN3q82nyS/Avw58Imq+tG4xzNKVfWzqrqY+SvuL03yL8Y9plFJ8lvAiap6bNxjWamJu/fOclXVvx33GNaZt7LY4JK8ifnAv7uq/mLc41krVfVKkgeZ/45ms3wx/17gA0muBt4C/FqS/1FV/2HM4xraht/T75C3stjAkgS4C3imqv5w3OMZtSTvSHJOmz4LeD/wvfGOanSq6taq2lpV25j/2fvGRgp82OShn+TfJTkGvAf4WpL7xz2m1aqqOeDUrSyeAe5d41tZrKskXwL+L/DOJMeS3DjuMY3Ye4GPAu9L8nh7XD3uQY3Q+cCDSZ5gfgflcFVtuNMaNzNvwyBJHdnUe/qSpF9k6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SO/H+UMHnFIe/gmwAAAABJRU5ErkJggg==\n" 646 | }, 647 | "metadata": { 648 | "needs_background": "light" 649 | } 650 | } 651 | ] 652 | } 653 | ] 654 | } --------------------------------------------------------------------------------