├── README.md
├── Challenge#1 -African Snake Antivenom Binding
├── Readme.md
└── UmojaHack_Challenge_1_Top_1_Notebook.ipynb
└── Challenge#3 - Faulty Air Quality Sensor
├── Readme.md
└── UmojaHack_Challenge_3_Top_3_Notebook.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # UmojaHack-Africa-2022
--------------------------------------------------------------------------------
/Challenge#1 -African Snake Antivenom Binding/Readme.md:
--------------------------------------------------------------------------------
1 | # UmojaHack Africa 2022 #1: African Snake Antivenom Binding
2 |
3 | ## Brief Description
4 |
5 | The objective of this challenge is to create a machine learning model to predict how strongly a given string of amino acids from a snake venom toxin protein binds to eight different commercial antivenom antibodies., have a look on [Zindi](https://zindi.africa/competitions/umojahack-africa-2022-advanced-challenge).
6 |
7 | ## About this code
8 |
9 | ```
10 | # this code is my 1st place Solution for the Advanced Hackathon !
11 | ```
12 |
13 | ## [On the Leaderboard](https://zindi.africa/competitions/umojahack-africa-2022-advanced-challenge/leaderboard)
14 |
15 | Rank : 1/112
16 | ## Authors
17 |
18 |
19 |
20 | | Name | Zindi ID | Github ID |
21 | |----------------|--------------------------------------------------|------------------------------------------|
22 | |Azer KSOURI |[@ASSAZZIN](https://zindi.africa/users/ASSAZZIN) |[@Az-Ks](https://github.com/ASSAZZIN-01) |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Challenge#3 - Faulty Air Quality Sensor/Readme.md:
--------------------------------------------------------------------------------
1 | # UmojaHack Africa 2022 #3: Faulty Air Quality Sensor
2 |
3 | ## Brief Description
4 |
5 | The objective of this challenge is to create a classification model to identify a device has an off set fault or not, regardless of the device. The model can be used by AirQo to automatically flag a device that is returning faulty data., have a look on [Zindi](https://zindi.africa/competitions/umojahack-africa-2022-beginner-challenge).
6 |
7 | ## About this code
8 |
9 | ```
10 | # this code is not My Final Solution - But it's a code that will give you 3th place !
11 | ```
12 |
13 |
14 | ## [On the Leaderboard](https://zindi.africa/competitions/umojahack-africa-2022-beginner-challenge/leaderboard)
15 |
16 | Rank : 1/493
17 | ## Authors
18 |
19 |
20 |
21 | | Name | Zindi ID | Github ID |
22 | |----------------|--------------------------------------------------|------------------------------------------|
23 | |Azer KSOURI |[@ASSAZZIN](https://zindi.africa/users/ASSAZZIN) |[@Az-Ks](https://github.com/ASSAZZIN-01) |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/Challenge#3 - Faulty Air Quality Sensor/UmojaHack_Challenge_3_Top_3_Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Challenge#3 - Faulty Air Quality Sensor",
7 | "provenance": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "source": [
21 | "# IMPORTS"
22 | ],
23 | "metadata": {
24 | "id": "rnazxbDHUu8i"
25 | }
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "5aRGOfmQ-OtI"
31 | },
32 | "source": [
33 | "import os\n",
34 | "import gc\n",
35 | "import random\n",
36 | "import pandas as pd\n",
37 | "import numpy as np\n",
38 | "import lightgbm as lgb\n",
39 | "\n",
40 | "from tqdm.notebook import tqdm\n",
41 | "from tqdm import tqdm_notebook\n",
42 | "\n",
43 | "from sklearn.model_selection import GroupKFold\n",
44 | "from sklearn.metrics import accuracy_score\n",
45 | "from sklearn.preprocessing import LabelEncoder\n",
46 | "\n",
47 | "import warnings\n",
48 | "warnings.simplefilter('ignore')"
49 | ],
50 | "execution_count": 15,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "XpQ6PCnNtP_4"
57 | },
58 | "source": [
59 | "# **Load - Process**"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "metadata": {
65 | "id": "FHCfc_HVtULf"
66 | },
67 | "source": [
68 | "class Process :\n",
69 | "\n",
70 | " def LAG(self,data,LagFeature,shift=1,NewFeatures=[]) :\n",
71 | " data[NewFeatures[0]] = data[LagFeature] - data[LagFeature].shift(shift)\n",
72 | " data[NewFeatures[1]] = data[LagFeature].shift(shift)\n",
73 | "\n",
74 | " def LE(self,data,LE_cols = []) :\n",
75 | " LE = LabelEncoder()\n",
76 | " for le_col in LE_cols :\n",
77 | " data[le_col] = LE.fit_transform(data[le_col])\n",
78 | " \n",
79 | " def process(self,train,test) :\n",
80 | " data = pd.concat([train,test]).reset_index(drop=True)\n",
81 | " \n",
82 | " # time features \n",
83 | " data['created_at'] = pd.to_datetime(data['Datetime'])\n",
84 | " data['year'] = data['created_at'].dt.year\n",
85 | " data['year'] = data['year'].astype(float)\n",
86 | " data['month'] = data['created_at'].dt.month\n",
87 | " data['day'] = data['created_at'].dt.day\n",
88 | " data['weekday'] = data['created_at'].dt.weekday\n",
89 | " data['weekofyear'] = data['created_at'].dt.weekofyear\n",
90 | " data['hour'] = data['created_at'].dt.hour\n",
91 | "\n",
92 | " # combination between time features\n",
93 | " data['day_hour'] = data['day'].astype(str) + '-' + data['hour'].astype(str)\n",
94 | " data['month_day'] = data['month'].astype(str) + '-' + data['day'].astype(str)\n",
95 | "\n",
96 | " # Label Encoder\n",
97 | " self.LE(data,LE_cols = ['day_hour','month_day'])\n",
98 | "\n",
99 | " # FE\n",
100 | " data = data.sort_values('Datetime').reset_index(drop=True)\n",
101 | " self.LAG(data,LagFeature='Sensor1_PM2.5',shift=1,NewFeatures=['sensor1_diff_next','sensor1_next'])\n",
102 | " self.LAG(data,LagFeature='Sensor1_PM2.5',shift=-1,NewFeatures=['sensor1_diff_before','sensor1_before'])\n",
103 | " self.LAG(data,LagFeature='Sensor2_PM2.5',shift=1,NewFeatures=['sensor2_diff_next','sensor2_next'])\n",
104 | " self.LAG(data,LagFeature='Sensor2_PM2.5',shift=-1,NewFeatures=['sensor2_diff_before','sensor2_before'])\n",
105 | "\n",
106 | " # Get our New Train,Test\n",
107 | " data['SplitBy'] = data['year'].astype(int).astype(str) + '-' + data['month'].astype(str) + '-' + data['day'].astype(str)\n",
108 | " data = data.sort_values('SplitBy').reset_index(drop=True)\n",
109 | " train = data[data['ID'].isin(train['ID'].values)].reset_index(drop=True)\n",
110 | " train['Offset_fault'] = train['Offset_fault'].astype('int')\n",
111 | " test = data[~data['ID'].isin(train['ID'].values)].reset_index(drop=True)\n",
112 | "\n",
113 | " return train, test"
114 | ],
115 | "execution_count": 10,
116 | "outputs": []
117 | },
118 | {
119 | "cell_type": "code",
120 | "source": [
121 | "train = pd.read_csv('train.csv')\n",
122 | "test = pd.read_csv('test.csv')"
123 | ],
124 | "metadata": {
125 | "id": "loJYF5mb9lGM"
126 | },
127 | "execution_count": 11,
128 | "outputs": []
129 | },
130 | {
131 | "cell_type": "code",
132 | "source": [
133 | "processor = Process()\n",
134 | "train, test= processor.process(train,test)"
135 | ],
136 | "metadata": {
137 | "id": "4gBBFMokXIA0"
138 | },
139 | "execution_count": 12,
140 | "outputs": []
141 | },
142 | {
143 | "cell_type": "code",
144 | "source": [
145 | "print('unique days',train.SplitBy.nunique() , test.SplitBy.nunique())\n",
146 | "print('shape',train.shape , test.shape)"
147 | ],
148 | "metadata": {
149 | "colab": {
150 | "base_uri": "https://localhost:8080/"
151 | },
152 | "id": "5bzG__eQJYPy",
153 | "outputId": "d4f381e2-8dd0-485d-fd75-de675df1a935"
154 | },
155 | "execution_count": 13,
156 | "outputs": [
157 | {
158 | "output_type": "stream",
159 | "name": "stdout",
160 | "text": [
161 | "unique days 99 52\n",
162 | "shape (297177, 25) (127361, 25)\n"
163 | ]
164 | }
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "metadata": {
170 | "id": "dlPfPBZz-xdo"
171 | },
172 | "source": [
173 | "# **Modeling**"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "source": [
179 | "class CFG :\n",
180 | " SEED = 42\n",
181 | " n_splits = 5\n",
182 | "\n",
183 | " lgb_params = {'boosting_type': 'gbdt','objective': 'binary','metric': 'auc',\n",
184 | " 'n_estimators': 2500,'reg_lambda' :50,'num_leaves' : 81,\n",
185 | " 'seed': SEED,'silent':True,'early_stopping_rounds': 100,\n",
186 | " }\n",
187 | " remove_features = ['ID', 'Datetime',\"created_at\",'SplitBy' , 'folds', 'Offset_fault',]\n",
188 | " TARGET_COL = 'Offset_fault'"
189 | ],
190 | "metadata": {
191 | "id": "T3hOkrFNYxHm"
192 | },
193 | "execution_count": 26,
194 | "outputs": []
195 | },
196 | {
197 | "cell_type": "code",
198 | "source": [
199 | "class CostumSplit :\n",
200 | " def __init__(self,) :\n",
201 | " self.n_splits = CFG.n_splits\n",
202 | "\n",
203 | " def Split(self,Train_) :\n",
204 | " kf = GroupKFold(n_splits=self.n_splits)\n",
205 | "\n",
206 | " Train = Train_.copy()\n",
207 | " Train = Train.drop_duplicates('SplitBy').reset_index(drop=True)\n",
208 | " \n",
209 | " groups = Train['SplitBy']\n",
210 | " Train[\"folds\"]=-1 \n",
211 | " for fold, (_, val_index) in enumerate(kf.split(Train,Train['Offset_fault'],groups)):\n",
212 | " Train.loc[val_index, \"folds\"] = fold\n",
213 | " return Train\n",
214 | "\n",
215 | " def apply(self,train) :\n",
216 | " mapper = dict(zip(self.Split(train)['SplitBy'].tolist(),\n",
217 | " self.Split(train)['folds'].tolist()))\n",
218 | "\n",
219 | " train['folds'] = train['SplitBy'].map(mapper)\n",
220 | " return train"
221 | ],
222 | "metadata": {
223 | "id": "6pJr-HO2BcWO"
224 | },
225 | "execution_count": 27,
226 | "outputs": []
227 | },
228 | {
229 | "cell_type": "code",
230 | "source": [
231 | "split = CostumSplit() \n",
232 | "\n",
233 | "train = split.apply(train)"
234 | ],
235 | "metadata": {
236 | "id": "3gNuMgtTYYK8"
237 | },
238 | "execution_count": 28,
239 | "outputs": []
240 | },
241 | {
242 | "cell_type": "code",
243 | "source": [
244 | "features_columns = [col for col in train.columns if col not in CFG.remove_features]\n",
245 | "len(features_columns)"
246 | ],
247 | "metadata": {
248 | "colab": {
249 | "base_uri": "https://localhost:8080/"
250 | },
251 | "id": "hm0AQWmpYq0r",
252 | "outputId": "99cca32f-3e33-4449-ff45-8163e9838224"
253 | },
254 | "execution_count": 29,
255 | "outputs": [
256 | {
257 | "output_type": "execute_result",
258 | "data": {
259 | "text/plain": [
260 | "20"
261 | ]
262 | },
263 | "metadata": {},
264 | "execution_count": 29
265 | }
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "source": [
271 | "oof_lgb = np.zeros((train.shape[0],))\n",
272 | "test[CFG.TARGET_COL] = 0\n",
273 | "lgb_preds = []\n",
274 | "\n",
275 | "for fold in range(CFG.n_splits) :\n",
276 | " print(50*'-')\n",
277 | " print(f'Fold {fold+1} / {CFG.n_splits}' )\n",
278 | "\n",
279 | " tr_x, tr_y = train[train['folds']!=fold][features_columns] , train[train['folds']!=fold][CFG.TARGET_COL] \n",
280 | " vl_x, vl_y = train[train['folds']==fold][features_columns] , train[train['folds']==fold][CFG.TARGET_COL] \n",
281 | " val_idx = vl_x.index.tolist()\n",
282 | "\n",
283 | " train_data = lgb.Dataset(tr_x, label=tr_y)\n",
284 | " valid_data = lgb.Dataset(vl_x, label=vl_y)\n",
285 | "\n",
286 | " estimator = lgb.train(CFG.lgb_params,train_data,valid_sets = [train_data,valid_data],verbose_eval = 0)\n",
287 | " \n",
288 | " y_pred_val = estimator.predict(vl_x,num_iteration=estimator.best_iteration)\n",
289 | " oof_lgb[val_idx] = y_pred_val\n",
290 | " print(f'FOLD-{fold} accuracy score :',accuracy_score(vl_y, (y_pred_val>0.5)*1))\n",
291 | "\n",
292 | " y_pred_test = estimator.predict(test[features_columns],num_iteration=estimator.best_iteration)\n",
293 | " lgb_preds.append(y_pred_test)\n",
294 | " print(50*'-')\n",
295 | "\n",
296 | "print('OOF score :',accuracy_score(train[CFG.TARGET_COL], (oof_lgb>0.5)*1))"
297 | ],
298 | "metadata": {
299 | "colab": {
300 | "base_uri": "https://localhost:8080/"
301 | },
302 | "id": "a0q2CU_nYUJB",
303 | "outputId": "b5fafe6d-6b81-4b65-85ff-8aa5681ceb65"
304 | },
305 | "execution_count": 33,
306 | "outputs": [
307 | {
308 | "output_type": "stream",
309 | "name": "stdout",
310 | "text": [
311 | "--------------------------------------------------\n",
312 | "Fold 1 / 5\n",
313 | "FOLD-0 accuracy score : 0.9912406059281427\n",
314 | "--------------------------------------------------\n",
315 | "--------------------------------------------------\n",
316 | "Fold 2 / 5\n",
317 | "FOLD-1 accuracy score : 0.9918887601390498\n",
318 | "--------------------------------------------------\n",
319 | "--------------------------------------------------\n",
320 | "Fold 3 / 5\n",
321 | "FOLD-2 accuracy score : 0.9920260595161345\n",
322 | "--------------------------------------------------\n",
323 | "--------------------------------------------------\n",
324 | "Fold 4 / 5\n",
325 | "FOLD-3 accuracy score : 0.9960781662046115\n",
326 | "--------------------------------------------------\n",
327 | "--------------------------------------------------\n",
328 | "Fold 5 / 5\n",
329 | "FOLD-4 accuracy score : 0.9951137552077083\n",
330 | "--------------------------------------------------\n",
331 | "OOF score : 0.9932531790818266\n"
332 | ]
333 | }
334 | ]
335 | },
336 | {
337 | "cell_type": "markdown",
338 | "source": [
339 | "# **SUBMISSION**"
340 | ],
341 | "metadata": {
342 | "id": "gxWmLDChbAEq"
343 | }
344 | },
345 | {
346 | "cell_type": "code",
347 | "source": [
348 | "SUB_FILE_NAME = 'WinningSolution.csv' ;sub_df = test[['ID']].copy() ; sub_df['Offset_fault'] = (np.mean(lgb_preds,axis=0)>0.5)*1\n",
349 | "sub_df.to_csv(SUB_FILE_NAME, index=False)\n",
350 | "sub_df.head(10)"
351 | ],
352 | "metadata": {
353 | "colab": {
354 | "base_uri": "https://localhost:8080/",
355 | "height": 363
356 | },
357 | "id": "u47maAY2YUGQ",
358 | "outputId": "32426f63-0f9a-4180-d59b-214a8565320b"
359 | },
360 | "execution_count": 32,
361 | "outputs": [
362 | {
363 | "output_type": "execute_result",
364 | "data": {
365 | "text/plain": [
366 | " ID Offset_fault\n",
367 | "0 ID_VJTCP5667QNH 0\n",
368 | "1 ID_Z4FVLMBG5SI8 0\n",
369 | "2 ID_1AKWB2POZX8Q 0\n",
370 | "3 ID_MD0HNZQZT1FQ 1\n",
371 | "4 ID_HJ7XVHB2GBFK 0\n",
372 | "5 ID_8GT0DMK2ZO33 0\n",
373 | "6 ID_M5Z3J91KLW8A 1\n",
374 | "7 ID_I4C5C9NCPXZY 0\n",
375 | "8 ID_R8WE3U29LXY4 1\n",
376 | "9 ID_98KEGPPXVOQU 0"
377 | ],
378 | "text/html": [
379 | "\n",
380 | " \n",
381 | "
\n",
382 | "
\n",
383 | "\n",
396 | "
\n",
397 | " \n",
398 | " \n",
399 | " \n",
400 | " ID \n",
401 | " Offset_fault \n",
402 | " \n",
403 | " \n",
404 | " \n",
405 | " \n",
406 | " 0 \n",
407 | " ID_VJTCP5667QNH \n",
408 | " 0 \n",
409 | " \n",
410 | " \n",
411 | " 1 \n",
412 | " ID_Z4FVLMBG5SI8 \n",
413 | " 0 \n",
414 | " \n",
415 | " \n",
416 | " 2 \n",
417 | " ID_1AKWB2POZX8Q \n",
418 | " 0 \n",
419 | " \n",
420 | " \n",
421 | " 3 \n",
422 | " ID_MD0HNZQZT1FQ \n",
423 | " 1 \n",
424 | " \n",
425 | " \n",
426 | " 4 \n",
427 | " ID_HJ7XVHB2GBFK \n",
428 | " 0 \n",
429 | " \n",
430 | " \n",
431 | " 5 \n",
432 | " ID_8GT0DMK2ZO33 \n",
433 | " 0 \n",
434 | " \n",
435 | " \n",
436 | " 6 \n",
437 | " ID_M5Z3J91KLW8A \n",
438 | " 1 \n",
439 | " \n",
440 | " \n",
441 | " 7 \n",
442 | " ID_I4C5C9NCPXZY \n",
443 | " 0 \n",
444 | " \n",
445 | " \n",
446 | " 8 \n",
447 | " ID_R8WE3U29LXY4 \n",
448 | " 1 \n",
449 | " \n",
450 | " \n",
451 | " 9 \n",
452 | " ID_98KEGPPXVOQU \n",
453 | " 0 \n",
454 | " \n",
455 | " \n",
456 | "
\n",
457 | "
\n",
458 | "
\n",
461 | " \n",
462 | " \n",
464 | " \n",
465 | " \n",
466 | " \n",
467 | " \n",
468 | " \n",
469 | " \n",
506 | "\n",
507 | " \n",
531 | "
\n",
532 | "
\n",
533 | " "
534 | ]
535 | },
536 | "metadata": {},
537 | "execution_count": 32
538 | }
539 | ]
540 | }
541 | ]
542 | }
543 |
--------------------------------------------------------------------------------
/Challenge#1 -African Snake Antivenom Binding/UmojaHack_Challenge_1_Top_1_Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Challenge#1 - African Snake Antivenom Binding",
7 | "provenance": [],
8 | "collapsed_sections": []
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "accelerator": "GPU"
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "source": [
23 | "# SETUP"
24 | ],
25 | "metadata": {
26 | "id": "tA6WnOxLhys7"
27 | }
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {
33 | "id": "zXHQEtzWhrSS"
34 | },
35 | "outputs": [],
36 | "source": [
37 | "!nvidia-smi"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "source": [
43 | "# IMPORTS"
44 | ],
45 | "metadata": {
46 | "id": "096OX259iWmc"
47 | }
48 | },
49 | {
50 | "cell_type": "code",
51 | "source": [
52 | "import random ,os\n",
53 | "import numpy as np \n",
54 | "import pandas as pd\n",
55 | "import gc\n",
56 | "import time\n",
57 | "\n",
58 | "from sklearn.metrics import mean_squared_error\n",
59 | "from tqdm import tqdm\n",
60 | "from sklearn.model_selection import GroupKFold\n",
61 | "\n",
62 | "# torch\n",
63 | "import torch\n",
64 | "from torch import nn \n",
65 | "from torch.utils.data import DataLoader, Dataset\n",
66 | "from torch.utils.tensorboard import SummaryWriter\n",
67 | "from torch import nn\n",
68 | "from torch.nn import functional as F\n",
69 | "\n",
70 | "import warnings\n",
71 | "warnings.simplefilter('ignore')"
72 | ],
73 | "metadata": {
74 | "id": "W47qxZSFiP8D"
75 | },
76 | "execution_count": null,
77 | "outputs": []
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "source": [
82 | "#Load Data"
83 | ],
84 | "metadata": {
85 | "id": "M1vXVtGQidCI"
86 | }
87 | },
88 | {
89 | "cell_type": "code",
90 | "source": [
91 | "train_df = pd.read_csv(\"https://storage.googleapis.com/umojahack2022/train.csv\")\n",
92 | "test_df = pd.read_csv(\"https://storage.googleapis.com/umojahack2022/test.csv\")"
93 | ],
94 | "metadata": {
95 | "id": "Cqr6bjLjibur"
96 | },
97 | "execution_count": null,
98 | "outputs": []
99 | },
100 | {
101 | "cell_type": "code",
102 | "source": [
103 | "print(train_df.shape , test_df.shape)\n",
104 | "print('-----------')\n",
105 | "print(train_df.Toxin_UniprotID.nunique() , test_df.Toxin_UniprotID.nunique())"
106 | ],
107 | "metadata": {
108 | "id": "WmPo4VXQieuy"
109 | },
110 | "execution_count": null,
111 | "outputs": []
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "source": [
116 | "# UTILS"
117 | ],
118 | "metadata": {
119 | "id": "7ueaRC5ji8be"
120 | }
121 | },
122 | {
123 | "cell_type": "code",
124 | "source": [
125 | "def seed_all(SEED_VAL=1):\n",
126 | " random.seed(SEED_VAL)\n",
127 | " np.random.seed(SEED_VAL)\n",
128 | " torch.manual_seed(SEED_VAL)\n",
129 | " torch.cuda.manual_seed_all(SEED_VAL)\n",
130 | " os.environ['PYTHONHASHSEED'] = str(SEED_VAL)\n",
131 | " torch.backends.cudnn.deterministic = True\n",
132 | " torch.backends.cudnn.benchmark = False"
133 | ],
134 | "metadata": {
135 | "id": "ok2pqKkeofGA"
136 | },
137 | "execution_count": null,
138 | "outputs": []
139 | },
140 | {
141 | "cell_type": "code",
142 | "source": [
143 | "def free_memory(sleep_time=0.1):\n",
144 | " \"\"\" Black magic function to free torch memory and some jupyter whims \"\"\"\n",
145 | " gc.collect()\n",
146 | " torch.cuda.synchronize()\n",
147 | " gc.collect()\n",
148 | " torch.cuda.empty_cache()\n",
149 | " time.sleep(sleep_time)"
150 | ],
151 | "metadata": {
152 | "id": "Crw-7RjUdUz4"
153 | },
154 | "execution_count": null,
155 | "outputs": []
156 | },
157 | {
158 | "cell_type": "code",
159 | "source": [
160 | "def get_seq_column_map(train, test, col):\n",
161 | " sequences = []\n",
162 | " for seq in train[col]:\n",
163 | " sequences.extend(list(seq))\n",
164 | " for seq in test[col]:\n",
165 | " sequences.extend(list(seq))\n",
166 | " unique = np.unique(sequences)\n",
167 | " return {k: v for k, v in zip(unique, range(len(unique)))}\n",
168 | "\n",
169 | "def get_column_map(train, test, col):\n",
170 | " sequences = []\n",
171 | " unique_values = pd.concat([train[col], test[col]]).unique().tolist()\n",
172 | " return {k: v for k, v in zip(unique_values, range(len(unique_values)))}"
173 | ],
174 | "metadata": {
175 | "id": "GyEThHRCi4gk"
176 | },
177 | "execution_count": null,
178 | "outputs": []
179 | },
180 | {
181 | "cell_type": "code",
182 | "source": [
183 | "class AntivenomChallengeDataSet(Dataset):\n",
184 | " def __init__(\n",
185 | " self,\n",
186 | " amino_acid_map,\n",
187 | " antivenom_map,\n",
188 | " data,\n",
189 | " is_train,\n",
190 | " label_name=None,\n",
191 | " ):\n",
192 | " self.amino_acid_map = amino_acid_map\n",
193 | " self.antivenom_map = antivenom_map\n",
194 | " self.data = data\n",
195 | " self.is_train = is_train\n",
196 | " self.label_name = label_name\n",
197 | "\n",
198 | " def __len__(self):\n",
199 | " return len(self.data) \n",
200 | "\n",
201 | " def __getitem__(self,idx):\n",
202 | " row = self.data.iloc[idx]\n",
203 | " kmer_seq = torch.as_tensor([self.amino_acid_map[e] for e in list(row[\"Toxin_Kmer\"])])\n",
204 | " antivenom = torch.as_tensor(self.antivenom_map[row[\"Antivenom\"]])\n",
205 | " position_start = torch.as_tensor(row[\"Kmer_Position_start\"])\n",
206 | " position_end = torch.as_tensor(row[\"Kmer_Position_end\"])\n",
207 | " \n",
208 | " inputs = {\n",
209 | " \"K_mer\": kmer_seq,\n",
210 | " \"antivenom\": antivenom,\n",
211 | " \"position_start\": position_start,\n",
212 | " \"position_end\": position_end,\n",
213 | " }\n",
214 | "\n",
215 | " if self.is_train: \n",
216 | " return inputs, torch.as_tensor([row[self.label_name]])\n",
217 | " return inputs"
218 | ],
219 | "metadata": {
220 | "id": "9V8_CoFXi-V7"
221 | },
222 | "execution_count": null,
223 | "outputs": []
224 | },
225 | {
226 | "cell_type": "code",
227 | "source": [
228 | "class ResidualLSTM(nn.Module):\n",
229 | "\n",
230 | " def __init__(self, d_model):\n",
231 | " super(ResidualLSTM, self).__init__()\n",
232 | " self.LSTM=nn.LSTM(d_model, d_model, num_layers=1, bidirectional=True,batch_first=True)\n",
233 | " self.linear1=nn.Linear(d_model*2, d_model*4)\n",
234 | " self.linear2=nn.Linear(d_model*4, d_model)\n",
235 | "\n",
236 | " def forward(self, x):\n",
237 | " res=x\n",
238 | " x, _ = self.LSTM(x)\n",
239 | " x=F.relu(self.linear1(x))\n",
240 | " x=self.linear2(x)\n",
241 | " x=res+x\n",
242 | " return x\n",
243 | "\n",
244 | "class SimpleSeqModel(nn.Module):\n",
245 | " \"\"\"\n",
246 | " Credits : INstadeepStartNotebook & https://www.kaggle.com/code/shujun717/1-solution-lstm-cnn-transformer-1-fold\n",
247 | " \"\"\"\n",
248 | " def __init__(\n",
249 | " self,\n",
250 | " K_mer_emb_size,\n",
251 | " K_mer_nunique,\n",
252 | " antivenom_emb_size,\n",
253 | " antivenom_unique,\n",
254 | " max_Position_start,\n",
255 | " Position_start_emb_size,\n",
256 | " ): \n",
257 | " super().__init__()\n",
258 | " self.K_mer_emb_size = K_mer_emb_size \n",
259 | " self.K_mer_nunique = K_mer_nunique \n",
260 | " self.antivenom_emb_size = antivenom_emb_size \n",
261 | " self.antivenom_unique = antivenom_unique \n",
262 | " self.rnnlayers = 3\n",
263 | " self.max_seq=None\n",
264 | " self.nlayers=3\n",
265 | " self.dropout=0\n",
266 | " self.nheads=16\n",
267 | "\n",
268 | " self.Kmer_emb_layer = nn.Embedding( num_embeddings=self.K_mer_nunique,embedding_dim=self.K_mer_emb_size,)\n",
269 | " \n",
270 | " embed_dim =self.K_mer_emb_size\n",
271 | " self.pos_encoder = nn.ModuleList([ResidualLSTM(self.K_mer_emb_size) for i in range(self.rnnlayers)])\n",
272 | " self.pos_encoder_dropout = nn.Dropout(self.dropout)\n",
273 | " self.layer_normal = nn.LayerNorm(embed_dim)\n",
274 | " encoder_layers = [nn.TransformerEncoderLayer(embed_dim, self.nheads, embed_dim*4, self.dropout) for i in range(self.nlayers)]\n",
275 | " conv_layers = [nn.Conv1d(embed_dim,embed_dim,(self.nlayers-i)*2-1,stride=1,padding=0) for i in range(self.nlayers)]\n",
276 | " deconv_layers = [nn.ConvTranspose1d(embed_dim,embed_dim,(self.nlayers-i)*2-1,stride=1,padding=0) for i in range(self.nlayers)]\n",
277 | " layer_norm_layers = [nn.LayerNorm(embed_dim) for i in range(self.nlayers)]\n",
278 | " layer_norm_layers2 = [nn.LayerNorm(embed_dim) for i in range(self.nlayers)]\n",
279 | " self.transformer_encoder = nn.ModuleList(encoder_layers)\n",
280 | " self.conv_layers = nn.ModuleList(conv_layers)\n",
281 | " self.layer_norm_layers = nn.ModuleList(layer_norm_layers)\n",
282 | " self.layer_norm_layers2 = nn.ModuleList(layer_norm_layers2)\n",
283 | " self.deconv_layers = nn.ModuleList(deconv_layers)\n",
284 | " self.pred = nn.Linear(embed_dim, 1)\n",
285 | " self.downsample = nn.Linear(embed_dim*2,embed_dim)\n",
286 | "\n",
287 | " self.Antivenom_emb = nn.Embedding(num_embeddings=self.antivenom_unique,embedding_dim=self.antivenom_emb_size,)\n",
288 | " self.Position_start_emb = nn.Embedding(num_embeddings=max_Position_start,embedding_dim=Position_start_emb_size,)\n",
289 | " self.Features = nn.Linear(in_features=self.antivenom_emb_size + Position_start_emb_size,out_features=128,)\n",
290 | " self.Linear_1 = nn.Linear(in_features=1152,out_features=512,)\n",
291 | " self.relu_1 = nn.ReLU()\n",
292 | " self.Output = nn.Linear(in_features=self.Linear_1.out_features, out_features=1,)\n",
293 | " \n",
294 | " \n",
295 | "\n",
296 | " def forward(self, inputs):\n",
297 | " kmer_emb = self.Kmer_emb_layer(inputs[\"K_mer\"])\n",
298 | " for lstm in self.pos_encoder:\n",
299 | " kmer_emb=lstm(kmer_emb)\n",
300 | " kmer_emb = torch.squeeze(kmer_emb)\n",
301 | " kmer_emb = self.pos_encoder_dropout(kmer_emb)\n",
302 | " kmer_emb = self.layer_normal(kmer_emb)\n",
303 | "\n",
304 | " for conv, transformer_layer, layer_norm1, layer_norm2, deconv in zip(self.conv_layers,self.transformer_encoder,self.layer_norm_layers,\n",
305 | " self.layer_norm_layers2,self.deconv_layers):\n",
306 | " #LXBXC to BXCXL\n",
307 | " res=kmer_emb\n",
308 | " kmer_emb=F.relu(conv(kmer_emb.permute(1,2,0)).permute(2,0,1))\n",
309 | " kmer_emb=layer_norm1(kmer_emb)\n",
310 | " kmer_emb=transformer_layer(kmer_emb)\n",
311 | " kmer_emb=F.relu(deconv(kmer_emb.permute(1,2,0)).permute(2,0,1))\n",
312 | " kmer_emb=layer_norm2(kmer_emb)\n",
313 | " kmer_emb=res+kmer_emb\n",
314 | " \n",
315 | " antivenom_emb = self.Antivenom_emb(inputs[\"antivenom\"])\n",
316 | " position_start_emb = self.Position_start_emb(inputs[\"position_start\"])\n",
317 | " emb_features = torch.cat((antivenom_emb, position_start_emb), axis=1)\n",
318 | " features = self.Features(emb_features)\n",
319 | "\n",
320 | " emb = torch.cat((torch.squeeze(kmer_emb[:,1,:], 1), features), axis=1)\n",
321 | " linear_1 = self.relu_1(self.Linear_1(emb))\n",
322 | " output = self.Output(linear_1)\n",
323 | "\n",
324 | " return output"
325 | ],
326 | "metadata": {
327 | "id": "vtpVbYMSjBBS"
328 | },
329 | "execution_count": null,
330 | "outputs": []
331 | },
332 | {
333 | "cell_type": "code",
334 | "source": [
335 | "def train_func(fold,train_data_loader,val_data_loader,model,loss_fn,optimizer,num_epochs,device,writer,early_stopping=5,): \n",
336 | " def get_score(y_true, y_pred):\n",
337 | " return mean_squared_error(y_true, y_pred,squared=False)\n",
338 | " \n",
339 | " seed_all()\n",
340 | " total_batches = len(train_data_loader)\n",
341 | " total_batches_val = len(val_data_loader)\n",
342 | " train_loss = []\n",
343 | " \n",
344 | " n_iter = 0\n",
345 | " best_outputs = []\n",
346 | " for epoch in range(num_epochs): \n",
347 | " tqdm_bar = tqdm(train_data_loader, desc=f\"epoch {epoch}\", position=0) \n",
348 | " old_val_loss = np.inf\n",
349 | " wating = 0\n",
350 | " model.train()\n",
351 | " for batch_number, (X, y) in enumerate(tqdm_bar):\n",
352 | " y = y.type(torch.FloatTensor).to(device)\n",
353 | " X = {k: X[k].to(device) for k in X}\n",
354 | " \n",
355 | " optimizer.zero_grad()\n",
356 | " pred = model(X)\n",
357 | " loss = loss_fn(pred, y)\n",
358 | " loss.backward()\n",
359 | " \n",
360 | " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
361 | " optimizer.step()\n",
362 | " \n",
363 | " loss = loss.item()\n",
364 | " train_loss.append(loss)\n",
365 | "\n",
366 | " writer.add_scalar(\"loss/train\", loss, n_iter)\n",
367 | " n_iter += 1\n",
368 | "\n",
369 | " if batch_number % 25 == 0: \n",
370 | " tqdm_bar.set_postfix({\"train\": f\"{batch_number}/{total_batches} loss: {loss:.3} epoch loss: {np.mean(train_loss):.3}\",},)\n",
371 | "\n",
372 | " ############## validation ############## \n",
373 | " val_tqdm_bar = tqdm(val_data_loader, desc=f\"epoch {epoch}\", position=0, leave=True,) \n",
374 | " final_outputs = []\n",
375 | " final_targets = [] \n",
376 | " val_loss = []\n",
377 | " model.eval()\n",
378 | " with torch.no_grad(): \n",
379 | " for batch_number, (X, y) in enumerate(val_tqdm_bar):\n",
380 | " y = y.type(torch.FloatTensor).to(device)\n",
381 | " X = {k: X[k].to(device) for k in X}\n",
382 | " \n",
383 | " pred = model(X)\n",
384 | " final_outputs.append(pred.cpu().detach().numpy())\n",
385 | " final_targets.append(y.cpu().numpy())\n",
386 | " val_loss.append(loss_fn(pred, y).item())\n",
387 | "\n",
388 | " writer.add_scalar(\"loss/validation\", np.random.random(), n_iter)\n",
389 | " if batch_number % 25 == 0: \n",
390 | " val_tqdm_bar.set_postfix({\"valid\": f\"{batch_number}/{total_batches_val} val loss: {np.mean(val_loss):.3}\"},)\n",
391 | " \n",
392 | " new_val_loss = np.mean(val_loss)\n",
393 | " final_targets = np.concatenate(final_targets)\n",
394 | " final_outputs = np.concatenate(final_outputs)\n",
395 | " \n",
396 | " scoree = get_score(final_targets,final_outputs)\n",
397 | " print('Validation RMSE for this epoch',scoree)\n",
398 | " print()\n",
399 | " if new_val_loss > old_val_loss:\n",
400 | " wating += wating\n",
401 | " else:\n",
402 | " old_val_loss = new_val_loss\n",
403 | " best_outputs = final_outputs\n",
404 | " torch.save(model, f\"model_fold{fold}.pth\")\n",
405 | " if wating > early_stopping:\n",
406 | " break\n",
407 | " return best_outputs"
408 | ],
409 | "metadata": {
410 | "id": "6E7hoiNt1-TJ"
411 | },
412 | "execution_count": null,
413 | "outputs": []
414 | },
415 | {
416 | "cell_type": "code",
417 | "source": [
418 | "class Trainer :\n",
419 | " def __init__(self,train_df,test_df) :\n",
420 | " self.train_df = train_df\n",
421 | " self.test_df = test_df\n",
422 | " self.n_splits = 10\n",
423 | "\n",
424 | " #Data loader params\n",
425 | " self.batch_size = 512\n",
426 | " self.num_workers = 0\n",
427 | " self.shuffle = True\n",
428 | " self.drop_last = False\n",
429 | "\n",
430 | " # model params\n",
431 | " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
432 | " self.num_epochs = 25\n",
433 | " self.early_stopping = 5\n",
434 | " self.lr = 1e-4\n",
435 | "\n",
436 | " def seed_all(self):\n",
437 | " random.seed(self.SEED_VAL)\n",
438 | " np.random.seed(self.SEED_VAL)\n",
439 | " torch.manual_seed(self.SEED_VAL)\n",
440 | " torch.cuda.manual_seed_all(self.SEED_VAL)\n",
441 | " os.environ['PYTHONHASHSEED'] = str(self.SEED_VAL)\n",
442 | " torch.backends.cudnn.deterministic = True\n",
443 | " torch.backends.cudnn.benchmark = False\n",
444 | "\n",
445 | " def get_score(self,y_true, y_pred):\n",
446 | " return mean_squared_error(y_true, y_pred,squared=False)\n",
447 | "\n",
448 | " def Split(self,Train) :\n",
449 | " X = Train[[\"Toxin_UniprotID\"]]\n",
450 | " y = Train['Signal']\n",
451 | " groups = Train[\"Toxin_UniprotID\"]\n",
452 | " \n",
453 | " kf = GroupKFold(n_splits=self.n_splits)\n",
454 | " Train[\"folds\"]=-1 \n",
455 | " for fold, (_, val_index) in enumerate(kf.split(X, y,groups=groups)):\n",
456 | " Train.loc[val_index, \"folds\"] = fold\n",
457 | " return Train\n",
458 | "\n",
459 | " def TrainKfold(self,) :\n",
460 | " seed_all()\n",
461 | " amino_acid_map = get_seq_column_map(self.train_df, self.test_df, \"Toxin_Kmer\")\n",
462 | " antivenom_map = get_column_map(self.train_df, self.test_df, \"Antivenom\")\n",
463 | " max_Position_start = pd.concat([self.train_df[[\"Kmer_Position_start\"]], self.test_df[[\"Kmer_Position_start\"]]]).Kmer_Position_start.max()+1\n",
464 | "\n",
465 | " self.train_df = self.Split(self.train_df)\n",
466 | " oof = np.zeros((self.train_df.shape[0],1))\n",
467 | "\n",
468 | " for fold in range(self.n_splits):\n",
469 | " train_split_df = self.train_df[self.train_df.folds != fold]\n",
470 | " val_split_df = self.train_df[self.train_df.folds == fold]\n",
471 | " val_split_df_index = self.train_df[self.train_df.folds == fold].index.tolist()\n",
472 | "\n",
473 | " train_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,\n",
474 | " data=train_split_df,is_train=True,label_name=\"Signal\",)\n",
475 | " val_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,\n",
476 | " data=val_split_df,is_train=True,label_name=\"Signal\",)\n",
477 | " test_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,data=test_df,is_train=False,)\n",
478 | "\n",
479 | " train_data_loader = DataLoader(dataset=train_dataset,batch_size=self.batch_size,shuffle=self.shuffle, num_workers=self.num_workers,drop_last=self.drop_last, )\n",
480 | " val_data_loader = DataLoader(dataset=val_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False, )\n",
481 | " test_data_loader= DataLoader(dataset=test_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False,)\n",
482 | "\n",
483 | " \n",
484 | "\n",
485 | " model = SimpleSeqModel(K_mer_emb_size=1024,K_mer_nunique=len(amino_acid_map),\n",
486 | " antivenom_emb_size=128,antivenom_unique=len(antivenom_map),\n",
487 | " max_Position_start=max_Position_start,Position_start_emb_size=64,)\n",
488 | "\n",
489 | " loss_fn = nn.MSELoss()\n",
490 | " model = model.to(self.device)\n",
491 | " optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)\n",
492 | " writer = SummaryWriter()\n",
493 | " writer.add_graph(model, {k: v.to(self.device) for k, v in next(iter(train_data_loader))[0].items()})\n",
494 | "\n",
495 | " \n",
496 | "\n",
497 | " oof_fold = train_func(fold,train_data_loader=train_data_loader,val_data_loader=val_data_loader,\n",
498 | " model=model,loss_fn=loss_fn,optimizer=optimizer,\n",
499 | " num_epochs=self.num_epochs,device=self.device,writer = writer , early_stopping=self.early_stopping,)\n",
500 | "\n",
501 | " oof[val_split_df_index] = oof_fold\n",
502 | "\n",
503 | " return oof\n",
504 | "\n",
505 | " def INFERENCE(self,) :\n",
506 | " seed_all()\n",
507 | " amino_acid_map = get_seq_column_map(self.train_df, self.test_df, \"Toxin_Kmer\")\n",
508 | " antivenom_map = get_column_map(self.train_df, self.test_df, \"Antivenom\")\n",
509 | " max_Position_start = pd.concat([self.train_df[[\"Kmer_Position_start\"]], self.test_df[[\"Kmer_Position_start\"]]]).Kmer_Position_start.max()+1\n",
510 | " test_dataset = AntivenomChallengeDataSet(amino_acid_map=amino_acid_map,antivenom_map=antivenom_map,data=self.test_df,is_train=False,)\n",
511 | " test_data_loader= DataLoader(dataset=test_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,drop_last=False,)\n",
512 | " final_preds = []\n",
513 | " for fold in range(self.n_splits):\n",
514 | " path= f\"model_fold{fold}.pth\"\n",
515 | " model = torch.load(path).to(self.device)\n",
516 | " tqdm_bar = tqdm(test_data_loader, desc=f\"Inference-Fold{fold}\", position=0, leave=True) \n",
517 | " total_batches = len(tqdm_bar)\n",
518 | "\n",
519 | " preds = []\n",
520 | " with torch.no_grad():\n",
521 | " for batch_number, X in enumerate(tqdm_bar):\n",
522 | " X= {k: X[k].to(self.device) for k in X}\n",
523 | " pred = model(X)\n",
524 | " preds.append(pred.cpu().numpy())\n",
525 | "\n",
526 | " preds = np.concatenate(preds).reshape((-1))\n",
527 | " final_preds.append(preds)\n",
528 | " return np.mean(final_preds,0)"
529 | ],
530 | "metadata": {
531 | "id": "_ZDfuECwl2Lv"
532 | },
533 | "execution_count": null,
534 | "outputs": []
535 | },
536 | {
537 | "cell_type": "code",
538 | "source": [
539 | "AssazzinTrainer = Trainer(train_df,test_df)"
540 | ],
541 | "metadata": {
542 | "id": "ubqossgn4j1o"
543 | },
544 | "execution_count": null,
545 | "outputs": []
546 | },
547 | {
548 | "cell_type": "code",
549 | "source": [
550 | "free_memory(sleep_time=0.1)\n",
551 | "import gc ; gc.collect()"
552 | ],
553 | "metadata": {
554 | "id": "IVY-yUcj4jzZ"
555 | },
556 | "execution_count": null,
557 | "outputs": []
558 | },
559 | {
560 | "cell_type": "code",
561 | "source": [
562 | "OOF = AssazzinTrainer.TrainKfold()"
563 | ],
564 | "metadata": {
565 | "id": "HRkHazfZ4jxI"
566 | },
567 | "execution_count": null,
568 | "outputs": []
569 | },
570 | {
571 | "cell_type": "code",
572 | "source": [
573 | "print('OOF RMSE :',AssazzinTrainer.get_score(train_df['Signal'],OOF))"
574 | ],
575 | "metadata": {
576 | "id": "o_Nl0DIB6XVL"
577 | },
578 | "execution_count": null,
579 | "outputs": []
580 | },
581 | {
582 | "cell_type": "markdown",
583 | "source": [
584 | "# INFERENCE & SUBMISSION"
585 | ],
586 | "metadata": {
587 | "id": "lTLtPTwunA-x"
588 | }
589 | },
590 | {
591 | "cell_type": "code",
592 | "source": [
593 | "test_pred = AssazzinTrainer.INFERENCE()"
594 | ],
595 | "metadata": {
596 | "id": "lG_QMOoJF9Pp"
597 | },
598 | "execution_count": null,
599 | "outputs": []
600 | },
601 | {
602 | "cell_type": "code",
603 | "source": [
604 | "sample_submission=test_df[[\"ID\"]]\n",
605 | "sample_submission[\"Signal\"] = np.clip(test_pred,train_df['Signal'].min(),train_df['Signal'].max())\n",
606 | "sample_submission.to_csv(\"AssazzinGoodBaseline_Complex_v3.csv\",index=False)"
607 | ],
608 | "metadata": {
609 | "id": "uGsLY7X54lFN"
610 | },
611 | "execution_count": null,
612 | "outputs": []
613 | },
614 | {
615 | "cell_type": "code",
616 | "source": [
617 | "sample_submission[\"Signal\"].hist()"
618 | ],
619 | "metadata": {
620 | "id": "34eEOKd0Gm50",
621 | "colab": {
622 | "base_uri": "https://localhost:8080/",
623 | "height": 282
624 | },
625 | "outputId": "2d0e9c6a-44f1-4617-a494-5454aca9d8d5"
626 | },
627 | "execution_count": null,
628 | "outputs": [
629 | {
630 | "output_type": "execute_result",
631 | "data": {
632 | "text/plain": [
633 | ""
634 | ]
635 | },
636 | "metadata": {},
637 | "execution_count": 87
638 | },
639 | {
640 | "output_type": "display_data",
641 | "data": {
642 | "text/plain": [
643 | ""
644 | ],
645 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUSklEQVR4nO3dYYxl5X3f8e8vYMeIJAZsd4R2URcpK0ekK2M6AixX1dTUywKRl1a2hUXN2qLaviARllZKoW9Q7FgiL4hjo8bqymyzTqkJcmItsq3QLebKqlQwYBMwYIuNs4hdAdt4AWdsxdE4/76YZ+Mx7DB3Zu7MvTPP9yNd3XOe85xzn//und89c+45Z1JVSJL68EvjHoAkaf0Y+pLUEUNfkjpi6EtSRwx9SerImeMewBt5+9vfXtu2bVvx+j/+8Y85++yzRzegCWN9G5v1bWyTXN9jjz32t1X1jtMtm+jQ37ZtG48++uiK1x8MBszMzIxuQBPG+jY269vYJrm+JM8ttszDO5LUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1JGJviJXy7Ptlq+N7bWP3n7N2F5b0vDc05ekjhj6ktQRQ1+SOmLoS1JHlgz9JO9M8viCx4+SfCLJeUkOJ3m2PZ/b+ifJ55IcSfJEkksWbGtP6/9skj1rWZgk6fWWDP2q+n5VXVxVFwP/EvgJ8BXgFuCBqtoOPNDmAa4CtrfHXuDzAEnOA24DLgMuBW479UEhSVofyz28cwXw11X1HLAbONjaDwLXtundwBdr3kPAOUnOB64EDlfVyap6GTgM7Fp1BZKkoS33PP3rgC+16amqeqFNvwhMtektwPML1jnW2hZr/wVJ9jL/GwJTU1MMBoNlDvHnZmdnV7X+pHttfft2zI1tLGvx79zb/99mY32TaejQT/Jm4APAra9dVlWVpEYxoKraD+wHmJ6ertX8ObJJ/nNmo/Da+j42zouzrp9Zss9y9fb/t9lY32Razp7+VcC3q+qlNv9SkvOr6oV2+OZEaz8OXLBgva2t7Tgw85r2wUoGrcmzFlcD79sxt+QHmVcCS8uznGP6H+Hnh3YA7gNOnYGzBzi0oP2GdhbP5cCr7TDQ/cDOJOe2L3B3tjZJ0joZak8/ydnA+4H/tKD5duDeJDcCzwEfbu1fB64GjjB/ps/HAarqZJJPAY+0fp+sqpOrrkCSNLShQr+qfgy87TVtP2T+bJ7X9i3gpkW2cwA4sPxhSpJGwStyJakjhr4kdWRT30//yeOvjuU0Rs8okTSp3NOXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjm/o8/XFZiztOns4wd6GUpIXc05ekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1ZKjQT3JOki8n+V6SZ5K8J8l5SQ4nebY9n9v6JsnnkhxJ8kSSSxZsZ0/r/2ySPWtVlCTp9Ibd0/8s8JdV9RvAu4BngFuAB6pqO/BAmwe4CtjeHnuBzwMkOQ+4DbgMuBS47dQHhSRpfSwZ+kneCvxr4C6AqvqHqnoF2A0cbN0OAte26d3AF2veQ8A5Sc4HrgQOV9XJqnoZOAzsGmk1kqQ3NMy9dy4E/h/w35O8C3gMuBmYqqoXWp8Xgak2vQV4fsH6x1rbYu2/IMle5n9DYGpqisFgMGwtrzN11vz9aTYr62NV749xm52d3dDjX4r1TaZhQv9M4BLgd6rq4SSf5eeHcgCoqkpSoxhQVe0H9gNMT0/XzMzMird1592HuOPJzXtPuX075rqv7+j1M+szmDUwGAxYzft70lnfZBrmmP4x4FhVPdzmv8z8h8BL7bAN7flEW34cuGDB+ltb22LtkqR1smToV9WLwPNJ3tmargCeBu4DTp2Bswc41KbvA25oZ/FcDrzaDgPdD+xMcm77Andna5MkrZNhjw38DnB3kjcDPwA+zvwHxr1JbgSeAz7c+n4duBo4Avyk9aWqTib5FPBI6/fJqjo5kiokSUMZKvSr6nFg+jSLrjhN3wJuWmQ7B4ADyxmgJGl0vCJXkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdGSr0kxxN8mSSx5M82trOS3I4ybPt+dzWniSfS3IkyRNJLlmwnT2t/7NJ9qxNSZKkxSxnT//fVNXFVTXd5m8BHqiq7cADbR7gKmB7e+wFPg/zHxLAbcBlwKXAbac+KCRJ62M1h3d2Awfb9EHg2gXtX6x5DwHnJDkfuBI4XFUnq+pl4DCwaxWvL0lapjOH7FfA/0pSwH+rqv3AVFW90Ja/CEy16S3A8wvWPdbaFmv/BUn2Mv8bAlNTUwwGgyGH+HpTZ8G+HXMrXn/SWR+ren+M2+zs7IYe/1KsbzING/r/qqqOJ/lnwOEk31u4sKqqfSCsWvtA2Q8wPT1dMzMzK97WnXcf4o4nhy1x49m3Y677+o5eP7M+g1kDg8GA1by/J531TaahDu9U1fH2fAL4CvPH5F9qh21ozyda9+PABQtW39raFmuXJK2TJUM/ydlJfvXUNLAT+C5wH3DqDJw9wKE2fR9wQzuL53Lg1XYY6H5gZ5Jz2xe4O1ubJGmdDHNsYAr4SpJT/f9nVf1lkkeAe5PcCDwHfLj1/zpwNXAE+AnwcYCqOpnkU8Ajrd8nq+rkyCqRJC1pydCvqh8A7zpN+w+BK07TXsBNi2zrAHBg+cOUJI2CV+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6sjmvS+vurDtlq+N7bWP3n7N2F5bWin39CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdGTr0k5yR5DtJvtrmL0zycJIjSf4syZtb+y+3+SNt+bYF27i1tX8/yZWjLkaS9MaWs6d/M/DMgvk/AD5TVb8OvAzc2NpvBF5u7Z9p/UhyEXAd8JvALuCPk5yxuuFLkpZjqNBPshW4BvhCmw/wPuDLrctB4No2vbvN05Zf0frvBu6pqp9W1d8AR4BLR1GEJGk4w95754+A3wV+tc2/DXilquba/DFgS5veAjwPUFVzSV5t/bcADy3Y5sJ1/kmSvcBegKmpKQaDwbC1vM7UWbBvx9zSHTco6xuv1bw3AWZnZ1e9jUlmfZNpydBP8lvAiap6LMnMWg+oqvYD+wGmp6drZmblL3nn3Ye448nNe0+5fTvmrG+Mjl4/s6r1B4MBq3l/Tzrrm0zD/ES9F/hAkquBtwC/BnwWOCfJmW1vfytwvPU/DlwAHEtyJvBW4IcL2k9ZuI4kaR0seUy/qm6tqq1VtY35L2K/UVXXAw8CH2zd9gCH2vR9bZ62/BtVVa39unZ2z4XAduBbI6tEkrSk1fzu/J+Be5L8PvAd4K7Wfhfwp0mOACeZ/6Cgqp5Kci/wNDAH3FRVP1vF60uSlmlZoV9VA2DQpn/Aac6+qaq/Bz60yPqfBj693EFKkkbDK3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjiwZ+knekuRbSf4qyVNJfq+1X5jk4SRHkvxZkje39l9u80fa8m0LtnVra/9+kivXqihJ0ukNs6f/U+B9VfUu4GJgV5LLgT8APlNVvw68DNzY+t8IvNzaP9P6keQi4DrgN4FdwB8nOWOUxUiS3tiSoV/zZtvsm9qjgPcBX27tB4Fr2/TuNk9bfkWStPZ7quqnVfU3wBHg0pFUIUkaylDH9JOckeRx4ARwGPhr4JWqmmtdjgFb2vQW4HmAtvxV4G0L20+zjiRpHZw5TKeq+hlwcZJzgK8Av7FWA0qyF9gLMDU1xWAwWPG2ps6CfTvmlu64QVnfeK3mvQkwOzu76m1MMuubTEOF/ilV9UqSB4H3AOckObPtzW8Fjrdux4ELgGNJzgTeCvxwQfspC9dZ+Br7gf0A09PTNTMzs6yCFrrz7kPc8eSyStxQ9u2Ys74xOnr9zKrWHwwGrOb9PemsbzINc/bOO9oePknOAt4PPAM8CHywddsDHGrT97V52vJvVFW19uva2T0XAtuBb42qEEnS0obZjTofONjOtPkl4N6q+mqSp4F7kvw+8B3grtb/LuBPkxwBTjJ/xg5V9VSSe4GngTngpnbYSJK0TpYM/ap6Anj3adp/wGnOvqmqvwc+tMi2Pg18evnDlCSNglfkSlJHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHZnc+9ZKE27bLV9b1fr7dszxsRVs4+jt16zqddU39/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVky9JNckOTBJE8neSrJza39vCSHkzzbns9t7UnyuSRHkjyR5JIF29rT+j+bZM/alSVJOp1h9vTngH1VdRFwOXBTkouAW4AHqmo78ECbB7gK2N4ee4HPw/yHBHAbcBlwKXDbqQ8KSdL6WDL0q+qFqvp2m/474BlgC7AbONi6HQSubdO7gS/WvIeAc5KcD1wJHK6qk1X1MnAY2DXSaiRJb2hZ995Jsg14N/AwMFVVL7RFLwJTbXoL8PyC1Y61tsXaX/sae5n/DYGpqSkGg8FyhvgLps6av7/JZmV9G9tK61vNz8R6mp2d3TBjXYmNWt/QoZ/kV4A/Bz5RVT9K8k/LqqqS1CgGVFX7gf0A09PTNTMzs+Jt3Xn3Ie54cvPeU27fjjnr28BWWt/R62dGP5g1MBgMWM3P76TbqPUNdfZOkjcxH/h3V9VftOaX2mEb2vOJ1n4cuGDB6ltb22LtkqR1MszZOwHuAp6pqj9csOg+4NQZOHuAQwvab2hn8VwOvNoOA90P7ExybvsCd2drkyStk2F+t3wv8FHgySSPt7b/AtwO3JvkRuA54MNt2deBq4EjwE+AjwNU1ckknwIeaf0+WVUnR1KFJGkoS4Z+Vf0fIIssvuI0/Qu4aZFtHQAOLGeAkqTR8YpcSeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjmzeP0skbVLbbvna2F776O3XjO21NRru6UtSRwx9SeqIoS9JHTH0Jakjhr4kdWTJ0E9yIMmJJN9d0HZeksNJnm3P57b2JPlckiNJnkhyyYJ19rT+zybZszblSJLeyDB7+n8C7HpN2y3AA1W1HXigzQNcBWxvj73A52H+QwK4DbgMuBS47dQHhSRp/SwZ+lX1TeDka5p3Awfb9EHg2gXtX6x5DwHnJDkfuBI4XFUnq+pl4DCv/yCRJK2xlV6cNVVVL7TpF4GpNr0FeH5Bv2OtbbH210myl/nfEpiammIwGKxwiDB1FuzbMbfi9Sed9W1sG7G+5fw8zs7Orurnd9Jt1PpWfUVuVVWSGsVg2vb2A/sBpqena2ZmZsXbuvPuQ9zx5Oa96Hjfjjnr28A2Yn1Hr58Zuu9gMGA1P7+TbqPWt9Kzd15qh21ozyda+3HgggX9tra2xdolSetopaF/H3DqDJw9wKEF7Te0s3guB15th4HuB3YmObd9gbuztUmS1tGSv1sm+RIwA7w9yTHmz8K5Hbg3yY3Ac8CHW/evA1cDR4CfAB8HqKqTST4FPNL6fbKqXvvlsCRpjS0Z+lX1kUUWXXGavgXctMh2DgAHljU6SdJIeUWuJHXE0Jekjhj6ktQRQ1+SOmLoS1JHNtblgJLGajl/n3ffjjk+NqK/5+vf5h0d9/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqI5+lLmnjLuT5glDbj9QHu6UtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOrPspm0l2AZ8FzgC+UFW3r/cYJGkYb3Sq6ChvHX06a3W66Lru6Sc5A/ivwFXARcBHkly0nmOQpJ6t9+GdS4EjVfWDqvoH4B5g9zqPQZK6lapavxdLPgjsqqr/2OY/ClxWVb+9oM9eYG+bfSfw/VW85NuBv13F+pPO+jY269vYJrm+f15V7zjdgom7DUNV7Qf2j2JbSR6tqulRbGsSWd/GZn0b20atb70P7xwHLlgwv7W1SZLWwXqH/iPA9iQXJnkzcB1w3zqPQZK6ta6Hd6pqLslvA/czf8rmgap6ag1fciSHiSaY9W1s1rexbcj61vWLXEnSeHlFriR1xNCXpI5s6tBP8qEkTyX5xyQb7tSqxSTZleT7SY4kuWXc4xmlJAeSnEjy3XGPZS0kuSDJg0mebu/Nm8c9plFK8pYk30ryV62+3xv3mNZCkjOSfCfJV8c9luXa1KEPfBf498A3xz2QUengVhZ/Auwa9yDW0Bywr6ouAi4Hbtpk/38/Bd5XVe8CLgZ2Jbl8zGNaCzcDz4x7ECuxqUO/qp6pqtVc0TuJNvWtLKrqm8DJcY9jrVTVC1X17Tb9d8wHx5bxjmp0at5sm31Te2yqs0WSbAWuAb4w7rGsxKYO/U1qC/D8gvljbKLQ6EmSbcC7gYfHO5LRaoc+HgdOAIeralPVB/wR8LvAP457ICux4UM/yf9O8t3TPDbN3q82nyS/Avw58Imq+tG4xzNKVfWzqrqY+SvuL03yL8Y9plFJ8lvAiap6bNxjWamJu/fOclXVvx33GNaZt7LY4JK8ifnAv7uq/mLc41krVfVKkgeZ/45ms3wx/17gA0muBt4C/FqS/1FV/2HM4xraht/T75C3stjAkgS4C3imqv5w3OMZtSTvSHJOmz4LeD/wvfGOanSq6taq2lpV25j/2fvGRgp82OShn+TfJTkGvAf4WpL7xz2m1aqqOeDUrSyeAe5d41tZrKskXwL+L/DOJMeS3DjuMY3Ye4GPAu9L8nh7XD3uQY3Q+cCDSZ5gfgflcFVtuNMaNzNvwyBJHdnUe/qSpF9k6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SO/H+UMHnFIe/gmwAAAABJRU5ErkJggg==\n"
646 | },
647 | "metadata": {
648 | "needs_background": "light"
649 | }
650 | }
651 | ]
652 | }
653 | ]
654 | }
--------------------------------------------------------------------------------