├── Boosted Late-Fusion.ipynb
├── LICENSE
├── README.md
├── SEResnext50_train_predict.ipynb
├── camembert_train_predict.ipynb
├── flaubert_train_predict.ipynb
├── multi-modal_concatenate_fusion.ipynb
└── multi_modal_addition_fusion.ipynb


/Boosted Late-Fusion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
  8 |     "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "import pandas as pd\n",
 13 |     "import numpy as np\n",
 14 |     "from tqdm import tqdm\n",
 15 |     "tqdm.pandas()\n",
 16 |     "\n",
 17 |     "import os, time, datetime\n",
 18 |     "from sklearn.model_selection import train_test_split\n",
 19 |     "from sklearn.metrics import roc_auc_score, f1_score, roc_curve, auc\n",
 20 |     "import lightgbm as lgb\n",
 21 |     "import xgboost as xgb"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "def format_time(elapsed):\n",
 31 |     "    '''\n",
 32 |     "    Takes a time in seconds and returns a string hh:mm:ss\n",
 33 |     "    '''\n",
 34 |     "    # Round to the nearest second.\n",
 35 |     "    elapsed_rounded = int(round((elapsed)))\n",
 36 |     "    \n",
 37 |     "    # Format as hh:mm:ss\n",
 38 |     "    return str(datetime.timedelta(seconds=elapsed_rounded))\n",
 39 |     "\n",
 40 |     "class SigirPreprocess():\n",
 41 |     "    \n",
 42 |     "    def __init__(self, text_data_path):\n",
 43 |     "        self.text_data_path = text_data_path\n",
 44 |     "        self.train = None\n",
 45 |     "        self.dict_code_to_id = {}\n",
 46 |     "        self.dict_id_to_code = {}\n",
 47 |     "        self.list_tags = {}\n",
 48 |     "        self.sentences = []\n",
 49 |     "        self.labels = []\n",
 50 |     "        self.text_col = None\n",
 51 |     "        self.X_test = None\n",
 52 |     "        \n",
 53 |     "    def prepare_data(self ):\n",
 54 |     "        catalog_eng= pd.read_csv(self.text_data_path+\"data/catalog_english_taxonomy.tsv\",sep=\"\\t\")\n",
 55 |     "        X_train= pd.read_csv(self.text_data_path+\"data/X_train.tsv\",sep=\"\\t\")\n",
 56 |     "        Y_train= pd.read_csv(self.text_data_path+\"data/Y_train.tsv\",sep=\"\\t\")\n",
 57 |     "        \n",
 58 |     "        self.list_tags = list(Y_train['Prdtypecode'].unique())\n",
 59 |     "        for i,tag in enumerate(self.list_tags):\n",
 60 |     "            self.dict_code_to_id[tag] = i \n",
 61 |     "            self.dict_id_to_code[i]=tag\n",
 62 |     "        print(self.dict_code_to_id)\n",
 63 |     "            \n",
 64 |     "        Y_train['labels']=Y_train['Prdtypecode'].map(self.dict_code_to_id)\n",
 65 |     "        train=pd.merge(left=X_train,right=Y_train,\n",
 66 |     "               how='left',left_on=['Integer_id','Image_id','Product_id'],\n",
 67 |     "               right_on=['Integer_id','Image_id','Product_id'])\n",
 68 |     "        prod_map=pd.Series(catalog_eng['Top level category'].values,\n",
 69 |     "                           index=catalog_eng['Prdtypecode']).to_dict()\n",
 70 |     "\n",
 71 |     "        train['product'] = train['Prdtypecode'].map(prod_map)\n",
 72 |     "        train['title_len']=train['Title'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
 73 |     "        train['desc_len']=train['Description'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
 74 |     "        train['title_desc_len']=train['title_len'] + train['desc_len']\n",
 75 |     "        train.loc[train['Description'].isnull(), 'Description'] = \" \"\n",
 76 |     "        train['title_desc'] = train['Title'] + \" \" + train['Description']\n",
 77 |     "        \n",
 78 |     "        self.train = train\n",
 79 |     "        \n",
 80 |     "    def get_sentences(self, text_col, remove_null_rows=False):\n",
 81 |     "        self.text_col = text_col\n",
 82 |     "        if remove_null_rows==True:\n",
 83 |     "            new_train = self.train[self.train[text_col].notnull()]\n",
 84 |     "\n",
 85 |     "        else:\n",
 86 |     "            new_train = self.train.copy()\n",
 87 |     "            \n",
 88 |     "        self.sentences = new_train[text_col].values\n",
 89 |     "        self.labels = new_train['labels'].values\n",
 90 |     "    \n",
 91 |     "    def prepare_test(self, text_col, test_data_path, phase=1):\n",
 92 |     "        X_test=pd.read_csv(test_data_path+f\"data/x_test_task1_phase{phase}.tsv\",sep=\"\\t\")\n",
 93 |     "        X_test.loc[X_test['Description'].isnull(), 'Description'] = \" \"\n",
 94 |     "        X_test['title_desc'] = X_test['Title'] + \" \" + X_test['Description']\n",
 95 |     "        self.X_test = X_test\n",
 96 |     "        self.test_sentences = X_test[text_col].values\n",
 97 |     " "
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "text_col = 'title_desc'\n",
107 |     "val_size = 0.1\n",
108 |     "random_state=2020\n",
109 |     "num_class = 27\n",
110 |     "do_gridsearch = False"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "kwargs = {'add_logits':['cam', 'fla']}\n",
120 |     "\n",
121 |     "\n",
122 |     "cam_path = '/../input/camembert-vec-256m768-10ep/'\n",
123 |     "flau_path = '/../input/flaubertlogits2107/' \n",
124 |     "res_path = '/../input/resnextfinal/'\n",
125 |     "cms_path = '/../input/crossmodal-v0/'\n",
126 |     "vca_path = '/../input/vec-concat-9093/'\n",
127 |     "vca_path_phase2 = '/../input/predictions-test-phase2-vec-fusion/'\n",
128 |     "aem_path = '/../input/addition-ensemble-latest/'\n",
129 |     "\n",
130 |     "\n",
131 |     "val_logits_path = {'cam':cam_path + 'validation_set_softmax_logits.npy',\n",
132 |     "              'fla':flau_path + 'validation_set_softmax_logits.npy',\n",
133 |     "              'res':res_path + 'Valid_resnext50_32x4d_phase1_softmax_logits.npy',\n",
134 |     "                'vca':vca_path + 'softmax_logits_val_9093.npy',\n",
135 |     "                  'aem':aem_path + 'softmax_logits_val_add.npy'}\n",
136 |     "\n",
137 |     "test_logits_path_phase1 = {'cam':cam_path+f'X_test_phase1_softmax_logits.npy',\n",
138 |     "              'fla':flau_path + f'X_test_phase1_softmax_logits.npy', \n",
139 |     "              'res':res_path + f'Test_resnext50_32x4d_phase1_softmax_logits.npy',\n",
140 |     "                'vca':vca_path + f'softmax_logits_test_9093.npy'}\n",
141 |     "\n",
142 |     "test_logits_path_phase2 = {'cam':cam_path+f'X_test_phase2_softmax_logits.npy',\n",
143 |     "                  'fla':flau_path + f'X_test_phase2_softmax_logits.npy', \n",
144 |     "                  'res':res_path + f'Test_resnext50_32x4d_phase2_softmax_logits.npy',\n",
145 |     "                    'vca':vca_path_phase2 + f'softmax_logits_test_phase2_9093.npy'}\n",
146 |     "                           \n",
147 |     "\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "## Get valdation dataset from original train dataset\n",
157 |     "Preprocess = SigirPreprocess(\"/../input/textphase1/\")\n",
158 |     "Preprocess.prepare_data()\n",
159 |     "Preprocess.get_sentences(text_col, True)\n",
160 |     "\n",
161 |     "full_data = Preprocess.train\n",
162 |     "labels = Preprocess.labels\n",
163 |     "index = full_data.Integer_id\n",
164 |     "\n",
165 |     "\n",
166 |     "tr_index, val_index, tr_labels, val_labels = train_test_split(index, labels,\n",
167 |     "                                                    stratify=labels,\n",
168 |     "                                                    random_state=random_state, \n",
169 |     "                                                    test_size=val_size)\n",
170 |     "\n",
171 |     "train_data = full_data.loc[tr_index, :]\n",
172 |     "train_data.reset_index(inplace=True, drop=True)\n",
173 |     "val_data = full_data.loc[val_index, :]\n",
174 |     "val_data.reset_index(inplace=True, drop=True)\n",
175 |     "\n",
176 |     "full_data.loc[val_index, 'sample'] = 'val'\n",
177 |     "full_data['sample'].fillna('train', inplace=True)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "def preparelogits_df(logit_paths, df=None, val_labels=None, **kwargs):\n",
187 |     "    ### Prepare and combine Logits data with original validation dataset\n",
188 |     "    logits_dict = {}\n",
189 |     "    dfs_dict = {}\n",
190 |     "    for key, logit_path in logit_paths.items():\n",
191 |     "        logits_dict[key] = np.load(logit_path)\n",
192 |     "        \n",
193 |     "        dfs_dict[key] = pd.DataFrame(logits_dict[key], \n",
194 |     "                                     columns=[key + \"_\" + str(i) for i in range(1,28)])\n",
195 |     "        print(\"Shape of logit arrays: {}\", logits_dict[key].shape)\n",
196 |     "        \n",
197 |     "    if kwargs['add_logits']:\n",
198 |     "        if len(kwargs['add_logits'])>0:\n",
199 |     "            add_str = '_'.join(kwargs['add_logits'])\n",
200 |     "            logits_dict[add_str] = logits_dict[kwargs['add_logits'][0]]\n",
201 |     "            for k in kwargs['add_logits'][1:]:\n",
202 |     "                logits_dict[add_str] += logits_dict[k]\n",
203 |     "            logits_dict[add_str] = logits_dict[add_str]/len(kwargs['add_logits'])\n",
204 |     "            dfs_dict[add_str] = pd.DataFrame(logits_dict[add_str], \n",
205 |     "                                     columns=[add_str + \"_\" + str(i) for i in range(1,28)])\n",
206 |     "            print(\"Shape of logit arrays: {}\", logits_dict[add_str].shape)\n",
207 |     "\n",
208 |     "\n",
209 |     "    \n",
210 |     "    if type(val_labels) == np.ndarray:\n",
211 |     "        for key,logits in logits_dict.items():\n",
212 |     "            print(\"\"\"Validation F1 scores for {} logits: {} \"\"\".format(key, \n",
213 |     "                f1_score(val_labels, np.argmax(logits, axis=1), average='macro')))\n",
214 |     "            \n",
215 |     "    \n",
216 |     "\n",
217 |     "    df = pd.concat([df] + list(dfs_dict.values()), axis=1)\n",
218 |     "    \n",
219 |     "    return df"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "val_data = preparelogits_df(val_logits_path, df=val_data, \n",
229 |     "                            val_labels=val_labels, **kwargs)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "# Model Data Prep"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "df_log = val_data.copy()\n",
246 |     "\n",
247 |     "probas_cols = [\"fla_\" + str(i) for i in range(1,28)] + [\"cam_\" + str(i) for i in range(1,28)] +\\\n",
248 |     "[\"res_\" + str(i) for i in range(1,28)] \\\n",
249 |     "+ [\"vca_\" + str(i) for i in range(1,28)] \\\n",
250 |     "\n",
251 |     "X = df_log[probas_cols]\n",
252 |     "y = df_log['labels'].values\n",
253 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=random_state)\n"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "from scipy.stats import randint as sp_randint\n",
263 |     "from scipy.stats import uniform as sp_uniform\n",
264 |     "\n",
265 |     "from sklearn.model_selection import RandomizedSearchCV, GridSearchCV\n",
266 |     "n_HP_points_to_test = 100\n",
267 |     "\n",
268 |     "\n",
269 |     "param_test ={'num_leaves': sp_randint(6, 50), \n",
270 |     "             'min_child_samples': sp_randint(100, 500), \n",
271 |     "             'min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4],\n",
272 |     "             'subsample': sp_uniform(loc=0.2, scale=0.8), \n",
273 |     "             'colsample_bytree': sp_uniform(loc=0.4, scale=0.6),\n",
274 |     "             'reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],\n",
275 |     "             'reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],\n",
276 |     "#              \"bagging_fraction\" : [0.5, 0.6, 0.7, 0.8, 0.9],\n",
277 |     "#              \"feature_fraction\":[0.5, 0.6, 0.7, 0.8, 0.9]\n",
278 |     "            }\n",
279 |     "\n",
280 |     "\n",
281 |     "\n",
282 |     "\n",
283 |     "fit_params={\n",
284 |     "            \"early_stopping_rounds\":100, \n",
285 |     "            \"eval_metric\" : 'multi_logloss', \n",
286 |     "            \"eval_set\" : [(X_test,y_test)],\n",
287 |     "            'eval_names': ['valid'],\n",
288 |     "            #'callbacks': [lgb.reset_parameter(learning_rate=learning_rate_010_decay_power_099)],\n",
289 |     "            'verbose': 100,\n",
290 |     "            'categorical_feature': 'auto'}\n",
291 |     "\n",
292 |     "\n",
293 |     "clf = lgb.LGBMClassifier(num_iteration=1000, max_depth=-1, random_state=314, silent=True,\n",
294 |     "                         metric='multi_logloss', n_jobs=4, early_stopping_rounds=100,\n",
295 |     "                         num_class=num_class, objective= \"multiclass\")\n",
296 |     "gs = RandomizedSearchCV(\n",
297 |     "    estimator=clf, param_distributions=param_test, \n",
298 |     "    n_iter=n_HP_points_to_test,\n",
299 |     "    cv=3,\n",
300 |     "    refit=True,\n",
301 |     "    random_state=314,\n",
302 |     "    verbose=True)\n",
303 |     "\n",
304 |     "if do_gridsearch==True:\n",
305 |     "    gs.fit(X_train, y_train, **fit_params)\n",
306 |     "    print('Best score reached: {} with params: {} '.format(gs.best_score_, gs.best_params_))"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {},
313 |    "outputs": [],
314 |    "source": [
315 |     "# opt_parameters = gs.best_params_\n",
316 |     "opt_parameters = {'colsample_bytree': 0.5284213741879101, 'min_child_samples': 125, \n",
317 |     "         'min_child_weight': 10.0, 'num_leaves': 22, \n",
318 |     "         'reg_alpha': 0.1, 'reg_lambda': 20, 'subsample': 0.3080033455431848} \n"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "# Model Training"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "### Run lightgbm to get weights for different class logits\n",
335 |     "\n",
336 |     "t0 = time.time()\n",
337 |     "\n",
338 |     "model_met = 'fit' #'xgb'#'train' #fit\n",
339 |     "\n",
340 |     "params = {\n",
341 |     "          \"objective\" : \"multiclass\",\n",
342 |     "          \"num_class\" : num_class,\n",
343 |     "          \"num_leaves\" : 60,\n",
344 |     "          \"max_depth\": -1,\n",
345 |     "          \"learning_rate\" : 0.01,\n",
346 |     "          \"bagging_fraction\" : 0.9,  # subsample\n",
347 |     "          \"feature_fraction\" : 0.9,  # colsample_bytree\n",
348 |     "          \"bagging_freq\" : 5,        # subsample_freq\n",
349 |     "          \"bagging_seed\" : 2018,\n",
350 |     "          \"verbosity\" : -1 }\n",
351 |     "\n",
352 |     "lgtrain, lgval = lgb.Dataset(X_train, y_train), lgb.Dataset(X_test, y_test)\n",
353 |     "\n",
354 |     "if model_met == 'train':\n",
355 |     "    params.update(opt_parameters)\n",
356 |     "    params.update(fit_params)\n",
357 |     "    \n",
358 |     "    lgbmodel = lgb.train(params, lgtrain, valid_sets=[lgtrain, lgval], \n",
359 |     "                         num_iterations = 1000, metric= 'multi_logloss')\n",
360 |     "    train_logits = lgbmodel.predict(X_train) \n",
361 |     "    test_logits = lgbmodel.predict(X_test)\n",
362 |     "\n",
363 |     "    train_pred = np.argmax(train_logits, axis=1) \n",
364 |     "    test_pred = np.argmax(test_logits, axis=1) \n",
365 |     "elif model_met == 'xgb':\n",
366 |     "    dtrain = xgb.DMatrix(X_train, label=y_train)\n",
367 |     "    dtrain.save_binary('xgb_train.buffer')\n",
368 |     "    dtest = xgb.DMatrix(X_test, label=y_test)\n",
369 |     "    \n",
370 |     "    num_round = 200\n",
371 |     "    xgb_param = {'max_depth': 5, 'eta': 0.1, 'seed':2020, 'verbosity':1,\n",
372 |     "                 'objective': 'multi:softmax', 'num_class':num_class}\n",
373 |     "    xgb_param['nthread'] = 4\n",
374 |     "    xgb_param['eval_metric'] = 'mlogloss'\n",
375 |     "    evallist = [(dtest, 'eval'), (dtrain, 'train')]\n",
376 |     "    bst = xgb.train(xgb_param, dtrain, num_round, evallist\n",
377 |     "                    , early_stopping_rounds=10\n",
378 |     "                   )\n",
379 |     "    \n",
380 |     "    train_logits = bst.predict(xgb.DMatrix(X_train), ntree_limit=bst.best_ntree_limit) \n",
381 |     "    test_logits = bst.predict(xgb.DMatrix(X_test), ntree_limit=bst.best_ntree_limit)\n",
382 |     "\n",
383 |     "    train_pred = train_logits \n",
384 |     "    test_pred = test_logits \n",
385 |     "    \n",
386 |     "else:\n",
387 |     "\n",
388 |     "    lgbmodel = lgb.LGBMClassifier(**clf.get_params())\n",
389 |     "    #set optimal parameters\n",
390 |     "    lgbmodel.set_params(**opt_parameters)\n",
391 |     "    lgbmodel.fit(X_train, y_train, **fit_params)\n",
392 |     "    \n",
393 |     "    train_logits = lgbmodel.predict(X_train) \n",
394 |     "    test_logits = lgbmodel.predict(X_test)\n",
395 |     "\n",
396 |     "    train_pred = train_logits \n",
397 |     "    test_pred = test_logits \n",
398 |     "    \n",
399 |     "print(\"Validation F1: {} and Training F1: {} \".format(\n",
400 |     "    f1_score(y_test, test_pred, average='macro'), \n",
401 |     "    f1_score(y_train, train_pred, average='macro')))\n",
402 |     "\n",
403 |     "if model_met == 'train':\n",
404 |     "    feat_imp = pd.DataFrame({'feature':probas_cols, \n",
405 |     "                             'logit_kind': [i.split('_')[0] for i in probas_cols],\n",
406 |     "                             'imp':lgbmodel.feature_importance()/sum(lgbmodel.feature_importance())})\n",
407 |     "\n",
408 |     "\n",
409 |     "    lgbmodel.save_model('lgb_classifier_81feats.txt', num_iteration=lgbmodel.best_iteration) \n",
410 |     "    print(\"\"\"Feature Importances by logits group: \n",
411 |     "          \"\"\", feat_imp.groupby(['logit_kind'])['imp'].sum())\n",
412 |     "else:\n",
413 |     "    feat_imp = pd.DataFrame({'feature':probas_cols, \n",
414 |     "                             'logit_kind': [i.split('_')[0] for i in probas_cols],\n",
415 |     "                             'imp':lgbmodel.feature_importances_/sum(lgbmodel.feature_importances_)})\n",
416 |     "\n",
417 |     "    print(\"\"\"Feature Importances by logits group: \n",
418 |     "          \"\"\", feat_imp.groupby(['logit_kind'])['imp'].sum())\n",
419 |     "    \n",
420 |     "import shap\n",
421 |     "explainer = shap.TreeExplainer(lgbmodel)\n",
422 |     "shap_values = explainer.shap_values(X)\n",
423 |     "print(\"Time Elapsed: {:}.\".format(format_time(time.time() - t0)))"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "for n, path in enumerate(['/kaggle/input/textphase1/', \n",
433 |     "                          '/kaggle/input/testphase2/']):\n",
434 |     "    phase = n+1\n",
435 |     "    if phase==1:\n",
436 |     "        test_logits_path = test_logits_path_phase1\n",
437 |     "    else:\n",
438 |     "        test_logits_path = test_logits_path_phase2\n",
439 |     "    Preprocess.prepare_test(text_col, path, phase)\n",
440 |     "    X_test_phase1= Preprocess.X_test\n",
441 |     "\n",
442 |     "    test_phase1 = preparelogits_df(test_logits_path,\n",
443 |     "                                df=X_test_phase1, val_labels=None, **kwargs)\n",
444 |     "    \n",
445 |     "    phase1_logits = lgbmodel.predict(test_phase1[probas_cols].values) \n",
446 |     "    if model_met == 'train':\n",
447 |     "        predictions = np.argmax(phase1_logits, axis=1) \n",
448 |     "    elif model_met == 'xgb':\n",
449 |     "        phase1_logits = bst.predict(xgb.DMatrix(test_phase1[probas_cols]), \n",
450 |     "                                    ntree_limit=bst.best_ntree_limit) \n",
451 |     "        predictions = phase1_logits\n",
452 |     "    else:\n",
453 |     "        predictions = phase1_logits\n",
454 |     "    X_test_phase1['prediction_model']= predictions\n",
455 |     "    X_test_phase1['Prdtypecode']=X_test_phase1['prediction_model'].map(Preprocess.dict_id_to_code)\n",
456 |     "    print(X_test_phase1['Prdtypecode'].value_counts())\n",
457 |     "    X_test_phase1=X_test_phase1.drop(['prediction_model','Title','Description'],axis=1)\n",
458 |     "    X_test_phase1.to_csv(f'y_test_task1_phase{phase}_pred_.tsv',sep='\\t',index=False)"
459 |    ]
460 |   }
461 |  ],
462 |  "metadata": {
463 |   "kernelspec": {
464 |    "display_name": "Python 3",
465 |    "language": "python",
466 |    "name": "python3"
467 |   },
468 |   "language_info": {
469 |    "codemirror_mode": {
470 |     "name": "ipython",
471 |     "version": 3
472 |    },
473 |    "file_extension": ".py",
474 |    "mimetype": "text/x-python",
475 |    "name": "python",
476 |    "nbconvert_exporter": "python",
477 |    "pygments_lexer": "ipython3",
478 |    "version": "3.7.7"
479 |   },
480 |   "toc": {
481 |    "base_numbering": 1,
482 |    "nav_menu": {},
483 |    "number_sections": true,
484 |    "sideBar": true,
485 |    "skip_h1_title": false,
486 |    "title_cell": "Table of Contents",
487 |    "title_sidebar": "Contents",
488 |    "toc_cell": false,
489 |    "toc_position": {},
490 |    "toc_section_display": true,
491 |    "toc_window_display": false
492 |   }
493 |  },
494 |  "nbformat": 4,
495 |  "nbformat_minor": 4
496 | }
497 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 depshad
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning Framework for Multi-modal Product Classification
 2 | Code repository for Rakuten Data Challenge : Multimodal Product Classification and Retrieval. 
 3 | 
 4 | Team Transformer's solution : Deep Multi-level Boosted Fusion Learning Framework for Multi-modal Product Classification 
 5 |  
 6 | Paper Link : https://sigir-ecom.github.io/ecom20DCPapers/SIGIR_eCom20_DC_paper_8.pdf
 7 | 
 8 | 
 9 | Data challenge link : https://sigir-ecom.github.io/data-task.html
10 | 
11 | ## Abstract
12 | 
13 | In this paper, we present our approach for the ’Multimodal Product 
14 | Classification’ task as a part of the 2020 SIGIR Workshop On eCommerce (ECOM20). The specific objective of this task is to build and
15 | submit systems that classify previously unseen products into their
16 | corresponding product type codes. We propose a deep Multi-Modal
17 | Multi-level Boosted Fusion Learning Framework used to categorize
18 | large-scale multi-modal (text and image) product data into product
19 | type codes. Our proposed final methodology achieved a macro F1-
20 | score of 91.94 on the phase 1 test dataset which is the top-scoring
21 | submission and third position on the scoreboard for phase 2 test
22 | dataset with macro F1-score of 90.53.
23 | 
24 | ## Code Usage
25 | 
26 | ### Unimodal Model Training and Prediction Scripts
27 | 
28 | 1. SEResnext50_train_predict.ipynb : Fine tune the pre-trained SEResnext50 model on Rakuten images
29 | 
30 | 2. camembert_train_predict.ipynb : Fine tune the pre-trained Cammebert model on French text; Custom Cammbert model with vector output (used later for feature fusion)
31 | 
32 | 3. flaubert_train_predict.ipynb : Fine tune the pre-trained Flaubert model on French text; Custom Flaubert model with vector output (used later for feature fusion)
33 | 
34 | ### Multimodal Feature Level Fusion
35 | 1. multi-modal_concatenate_fusion.ipynb : Concatenate the features extracted and train NN module on top
36 | 
37 | ### Probability Level Fusion
38 | 1. Boosted Late-Fusion.ipynb : Train LightGBM model with class probability as input
39 | 
40 | 
41 | 
42 | <p align="center"> Multi-modal Joint Representation Learning </p> 
43 | 
44 | <p align="center">
45 |   <img src="https://user-images.githubusercontent.com/56831322/89715638-a5ff2280-d9c4-11ea-9ca1-be884c8b9c26.png" />
46 | </p>
47 | 
48 | 
49 | 
50 | <p align="center"> Late Fusion Model </p> 
51 | 
52 | <p align="center">
53 |   <img src="https://user-images.githubusercontent.com/56831322/89715668-f1193580-d9c4-11ea-8fcd-042e909ee30d.png" />
54 | </p>
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/camembert_train_predict.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {
   7 |     "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
   8 |     "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
   9 |    },
  10 |    "outputs": [],
  11 |    "source": [
  12 |     "import os, time, datetime\n",
  13 |     "import numpy as np\n",
  14 |     "import pandas as pd\n",
  15 |     "from tqdm import tqdm\n",
  16 |     "import random\n",
  17 |     "import logging\n",
  18 |     "tqdm.pandas()\n",
  19 |     "import seaborn as sns\n",
  20 |     "from sklearn.model_selection import train_test_split\n",
  21 |     "\n",
  22 |     "#NN Packages\n",
  23 |     "import torch\n",
  24 |     "import torch.nn as nn\n",
  25 |     "from torch.utils.data import TensorDataset, random_split,DataLoader, RandomSampler, SequentialSampler\n",
  26 |     "\n",
  27 |     "logger = logging.getLogger(__name__)\n",
  28 |     "\n",
  29 |     "\n",
  30 |     "if torch.cuda.is_available():    \n",
  31 |     "\n",
  32 |     "    # Tell PyTorch to use the GPU.    \n",
  33 |     "    device = torch.device(\"cuda\")\n",
  34 |     "\n",
  35 |     "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
  36 |     "\n",
  37 |     "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
  38 |     "\n",
  39 |     "# If not...\n",
  40 |     "else:\n",
  41 |     "    print('No GPU available, using the CPU instead.')\n",
  42 |     "    device = torch.device(\"cpu\")"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "code",
  47 |    "execution_count": null,
  48 |    "metadata": {},
  49 |    "outputs": [],
  50 |    "source": [
  51 |     "def format_time(elapsed):\n",
  52 |     "    '''\n",
  53 |     "    Takes a time in seconds and returns a string hh:mm:ss\n",
  54 |     "    '''\n",
  55 |     "    # Round to the nearest second.\n",
  56 |     "    elapsed_rounded = int(round((elapsed)))\n",
  57 |     "    \n",
  58 |     "    # Format as hh:mm:ss\n",
  59 |     "    return str(datetime.timedelta(seconds=elapsed_rounded))\n",
  60 |     "\n",
  61 |     "class SigirPreprocess():\n",
  62 |     "    \n",
  63 |     "    def __init__(self, text_data_path):\n",
  64 |     "        self.text_data_path = text_data_path\n",
  65 |     "        self.train = None\n",
  66 |     "        self.dict_code_to_id = {}\n",
  67 |     "        self.dict_id_to_code = {}\n",
  68 |     "        self.list_tags = {}\n",
  69 |     "        self.sentences = []\n",
  70 |     "        self.labels = []\n",
  71 |     "        self.text_col = None\n",
  72 |     "        self.X_test = None\n",
  73 |     "    def prepare_data(self ):\n",
  74 |     "        catalog_eng= pd.read_csv(self.text_data_path+\"data/catalog_english_taxonomy.tsv\",sep=\"\\t\")\n",
  75 |     "        X_train= pd.read_csv(self.text_data_path+\"data/X_train.tsv\",sep=\"\\t\")\n",
  76 |     "        Y_train= pd.read_csv(self.text_data_path+\"data/Y_train.tsv\",sep=\"\\t\")\n",
  77 |     "        \n",
  78 |     "        self.list_tags = list(Y_train['Prdtypecode'].unique())\n",
  79 |     "        for i,tag in enumerate(self.list_tags):\n",
  80 |     "            self.dict_code_to_id[tag] = i \n",
  81 |     "            self.dict_id_to_code[i]=tag\n",
  82 |     "        print(self.dict_code_to_id)\n",
  83 |     "            \n",
  84 |     "        Y_train['labels']=Y_train['Prdtypecode'].map(self.dict_code_to_id)\n",
  85 |     "        train=pd.merge(left=X_train,right=Y_train,\n",
  86 |     "               how='left',left_on=['Integer_id','Image_id','Product_id'],\n",
  87 |     "               right_on=['Integer_id','Image_id','Product_id'])\n",
  88 |     "        prod_map=pd.Series(catalog_eng['Top level category'].values,\n",
  89 |     "                           index=catalog_eng['Prdtypecode']).to_dict()\n",
  90 |     "\n",
  91 |     "        train['product'] = train['Prdtypecode'].map(prod_map)\n",
  92 |     "        train['title_len']=train['Title'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
  93 |     "        train['desc_len']=train['Description'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
  94 |     "        train['title_desc_len']=train['title_len'] + train['desc_len']\n",
  95 |     "        train.loc[train['Description'].isnull(), 'Description'] = \" \"\n",
  96 |     "        train['title_desc'] = train['Title'] + \" \" + train['Description']\n",
  97 |     "        \n",
  98 |     "        self.train = train\n",
  99 |     "        \n",
 100 |     "    def get_sentences(self, text_col, remove_null_rows=False):\n",
 101 |     "        self.text_col = text_col\n",
 102 |     "        if remove_null_rows==True:\n",
 103 |     "            new_train = self.train[self.train[text_col].notnull()]\n",
 104 |     "\n",
 105 |     "        else:\n",
 106 |     "            new_train = self.train.copy()\n",
 107 |     "            \n",
 108 |     "        self.sentences = new_train[text_col].values\n",
 109 |     "        self.labels = new_train['labels'].values\n",
 110 |     "    \n",
 111 |     "    def prepare_test(self, text_col):\n",
 112 |     "        X_test=pd.read_csv(self.text_data_path+\"data/x_test_task1_phase1.tsv\",sep=\"\\t\")\n",
 113 |     "        X_test.loc[X_test['Description'].isnull(), 'Description'] = \" \"\n",
 114 |     "        X_test['title_desc'] = X_test['Title'] + \" \" + X_test['Description']\n",
 115 |     "        self.X_test = X_test\n",
 116 |     "        self.test_sentences = X_test[text_col].values\n",
 117 |     "        "
 118 |    ]
 119 |   },
 120 |   {
 121 |    "cell_type": "code",
 122 |    "execution_count": null,
 123 |    "metadata": {},
 124 |    "outputs": [],
 125 |    "source": [
 126 |     "text_col = 'title_desc'\n",
 127 |     "max_len = 256\n",
 128 |     "val_size = 0.1\n",
 129 |     "\n"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": null,
 135 |    "metadata": {},
 136 |    "outputs": [],
 137 |    "source": [
 138 |     "Preprocess = SigirPreprocess(\"/kaggle/input/textphase1/\")\n",
 139 |     "Preprocess.prepare_data()\n",
 140 |     "Preprocess.get_sentences(text_col, True)"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "code",
 145 |    "execution_count": null,
 146 |    "metadata": {},
 147 |    "outputs": [],
 148 |    "source": [
 149 |     "sentences = Preprocess.sentences\n",
 150 |     "labels = Preprocess.labels\n",
 151 |     "print(\"Total number of sentences:{}, labels:{}\".format(len(sentences), len(labels)))"
 152 |    ]
 153 |   },
 154 |   {
 155 |    "cell_type": "code",
 156 |    "execution_count": null,
 157 |    "metadata": {},
 158 |    "outputs": [],
 159 |    "source": [
 160 |     "from transformers import CamembertConfig, CamembertTokenizer, CamembertModel, CamembertForSequenceClassification, AdamW\n",
 161 |     "from transformers.modeling_roberta import RobertaClassificationHead\n",
 162 |     "print('Using Camembert')\n",
 163 |     "modelname = 'camembert-base'\n",
 164 |     "tokenizer = CamembertTokenizer.from_pretrained(modelname, do_lowercase=False)\n"
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "code",
 169 |    "execution_count": null,
 170 |    "metadata": {},
 171 |    "outputs": [],
 172 |    "source": [
 173 |     "#function to prepare input for model training\n",
 174 |     "def prep_input(sentences,labels, max_len):\n",
 175 |     "    input_ids = []\n",
 176 |     "    attention_masks = []\n",
 177 |     "\n",
 178 |     "    # For every sentence...\n",
 179 |     "    for sent in tqdm(sentences):\n",
 180 |     "        # `encode_plus` will:\n",
 181 |     "        #   (1) Tokenize the sentence.\n",
 182 |     "        #   (2) Prepend the `[CLS]` token to the start.\n",
 183 |     "        #   (3) Append the `[SEP]` token to the end.\n",
 184 |     "        #   (4) Map tokens to their IDs.\n",
 185 |     "        #   (5) Pad or truncate the sentence to `max_length`\n",
 186 |     "        #   (6) Create attention masks for [PAD] tokens.\n",
 187 |     "        encoded_dict = tokenizer.encode_plus(\n",
 188 |     "                            sent,                      # Sentence to encode.\n",
 189 |     "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
 190 |     "                            max_length = max_len,           # Pad & truncate all sentences.\n",
 191 |     "                            pad_to_max_length = True,\n",
 192 |     "                            return_attention_mask = True,   # Construct attn. masks.\n",
 193 |     "                            return_tensors = 'pt',     # Return pytorch tensors.\n",
 194 |     "                       )\n",
 195 |     "\n",
 196 |     "        # Add the encoded sentence to the list.    \n",
 197 |     "        input_ids.append(encoded_dict['input_ids'])\n",
 198 |     "\n",
 199 |     "        # And its attention mask (simply differentiates padding from non-padding).\n",
 200 |     "        attention_masks.append(encoded_dict['attention_mask'])\n",
 201 |     "\n",
 202 |     "    # Convert the lists into tensors.\n",
 203 |     "    input_ids = torch.cat(input_ids, dim=0)\n",
 204 |     "    attention_masks = torch.cat(attention_masks, dim=0)\n",
 205 |     "    if labels is not None:\n",
 206 |     "        labels = torch.tensor(labels)\n",
 207 |     "        return input_ids,attention_masks,labels\n",
 208 |     "    else:\n",
 209 |     "        return input_ids,attention_masks\n",
 210 |     "    "
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "code",
 215 |    "execution_count": null,
 216 |    "metadata": {},
 217 |    "outputs": [],
 218 |    "source": [
 219 |     "input_ids,attention_masks,labels=prep_input(sentences,labels, max_len=max_len)\n",
 220 |     "print('Original: ', sentences[0])\n",
 221 |     "print('Token IDs:', input_ids[0]) "
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "markdown",
 226 |    "metadata": {},
 227 |    "source": [
 228 |     "### Camembert Model with Vector Output"
 229 |    ]
 230 |   },
 231 |   {
 232 |    "cell_type": "code",
 233 |    "execution_count": null,
 234 |    "metadata": {},
 235 |    "outputs": [],
 236 |    "source": [
 237 |     "# class RobertaClassificationHead(nn.Module):\n",
 238 |     "#     \"\"\"Head for sentence-level classification tasks.\"\"\"\n",
 239 |     "\n",
 240 |     "#     def __init__(self, config):\n",
 241 |     "#         super().__init__()\n",
 242 |     "#         self.dense = nn.Linear(config.hidden_size, config.hidden_size)\n",
 243 |     "#         self.dropout = nn.Dropout(config.hidden_dropout_prob)\n",
 244 |     "#         self.out_proj = nn.Linear(config.hidden_size, config.num_labels)\n",
 245 |     "\n",
 246 |     "#     def forward(self, features, **kwargs):\n",
 247 |     "#         x = features[:, 0, :]  # take <s> token (equiv. to [CLS])\n",
 248 |     "#         x = self.dropout(x)\n",
 249 |     "#         x = self.dense(x)\n",
 250 |     "#         x = torch.tanh(x)\n",
 251 |     "#         feat = self.dropout(x)\n",
 252 |     "#         x = self.out_proj(feat)\n",
 253 |     "#         return x,feat"
 254 |    ]
 255 |   },
 256 |   {
 257 |    "cell_type": "code",
 258 |    "execution_count": null,
 259 |    "metadata": {},
 260 |    "outputs": [],
 261 |    "source": [
 262 |     "class vec_output_CamembertForSequenceClassification(CamembertModel):\n",
 263 |     "    config_class = CamembertConfig\n",
 264 |     "\n",
 265 |     "    def __init__(self, config):\n",
 266 |     "        super().__init__(config)\n",
 267 |     "        self.num_labels = config.num_labels\n",
 268 |     "\n",
 269 |     "        self.roberta = CamembertModel(config)\n",
 270 |     "        self.dense = nn.Linear(256*config.hidden_size, config.hidden_size)\n",
 271 |     "        self.dropout = nn.Dropout(0.1)\n",
 272 |     "        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)\n",
 273 |     "        self.init_weights()\n",
 274 |     "\n",
 275 |     "\n",
 276 |     "    def forward(\n",
 277 |     "        self,\n",
 278 |     "        input_ids=None,\n",
 279 |     "        attention_mask=None,\n",
 280 |     "        token_type_ids=None,\n",
 281 |     "        position_ids=None,\n",
 282 |     "        head_mask=None,\n",
 283 |     "        inputs_embeds=None,\n",
 284 |     "        labels=None,\n",
 285 |     "        output_attentions=None,\n",
 286 |     "        output_hidden_states=None,\n",
 287 |     "    ):\n",
 288 |     "        outputs = self.roberta(\n",
 289 |     "            input_ids,\n",
 290 |     "            attention_mask=attention_mask,\n",
 291 |     "            token_type_ids=token_type_ids,\n",
 292 |     "            position_ids=position_ids,\n",
 293 |     "            head_mask=head_mask,\n",
 294 |     "            inputs_embeds=inputs_embeds,\n",
 295 |     "#             output_attentions=output_attentions,\n",
 296 |     "#             output_hidden_states=output_hidden_states,\n",
 297 |     "        )\n",
 298 |     "        sequence_output = outputs[0] #(B,256,768)\n",
 299 |     "        x = sequence_output.view(sequence_output.shape[0], 256*768)\n",
 300 |     "#         x = sequence_output[:, 0, :]  # take <s> token (equiv. to [CLS])-> #(B,768) Image -> (B,2048)\n",
 301 |     "        x = self.dense(x)  # 768 -> 768\n",
 302 |     "        feat= torch.tanh(x) \n",
 303 |     "        logits = self.out_proj(feat) # 768 -> 27\n",
 304 |     "        outputs = (logits,) + outputs[2:]\n",
 305 |     "\n",
 306 |     "        return outputs,feat  # (loss), logits, (hidden_states), (attentions)"
 307 |    ]
 308 |   },
 309 |   {
 310 |    "cell_type": "code",
 311 |    "execution_count": null,
 312 |    "metadata": {},
 313 |    "outputs": [],
 314 |    "source": [
 315 |     "\n",
 316 |     "model = vec_output_CamembertForSequenceClassification.from_pretrained(\n",
 317 |     "    modelname, # Use the 12-layer BERT model, with an uncased vocab.\n",
 318 |     "    num_labels = len(Preprocess.dict_code_to_id), # The number of output labels--2 for binary classification.\n",
 319 |     "                    # You can increase this for multi-class tasks.   \n",
 320 |     "    output_attentions = False, # Whether the model returns attentions weights.\n",
 321 |     "    output_hidden_states = False, # Whether the model returns all hidden-states.\n",
 322 |     ")\n",
 323 |     "model.cuda()"
 324 |    ]
 325 |   },
 326 |   {
 327 |    "cell_type": "code",
 328 |    "execution_count": null,
 329 |    "metadata": {},
 330 |    "outputs": [],
 331 |    "source": [
 332 |     "tr_inputs, val_inputs, tr_labels, val_labels = train_test_split(input_ids, labels,stratify=labels,\n",
 333 |     "                                                            random_state=2020, test_size=val_size)\n",
 334 |     "tr_masks, val_masks, u,v =   train_test_split(attention_masks, labels,stratify=labels,\n",
 335 |     "                                             random_state=2020, test_size=val_size)\n",
 336 |     "\n",
 337 |     "\n",
 338 |     "train_dataset=TensorDataset(tr_inputs, tr_masks, tr_labels)\n",
 339 |     "val_dataset=TensorDataset(val_inputs, val_masks, val_labels)\n",
 340 |     "train_sampler = RandomSampler(train_dataset) \n",
 341 |     "valid_sampler = SequentialSampler(val_dataset)\n",
 342 |     "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n",
 343 |     "\n",
 344 |     "# The DataLoader needs to know our batch size for training, so we specify it \n",
 345 |     "# here. For fine-tuning BERT on a specific task, the authors recommend a batch \n",
 346 |     "# size of 16 or 32.\n",
 347 |     "batch_size = 32\n",
 348 |     "\n",
 349 |     "# Create the DataLoaders for our training and validation sets.\n",
 350 |     "# We'll take training samples in random order. \n",
 351 |     "train_dataloader = DataLoader(\n",
 352 |     "            train_dataset,  # The training samples.\n",
 353 |     "            sampler = train_sampler, # Select batches randomly\n",
 354 |     "            batch_size = batch_size # Trains with this batch size.\n",
 355 |     "        )\n",
 356 |     "\n",
 357 |     "# For validation the order doesn't matter, so we'll just read them sequentially.\n",
 358 |     "validation_dataloader = DataLoader(\n",
 359 |     "            val_dataset, # The validation samples.\n",
 360 |     "            sampler = valid_sampler, # Pull out batches sequentially.\n",
 361 |     "            batch_size = batch_size # Evaluate with this batch size.\n",
 362 |     "        )"
 363 |    ]
 364 |   },
 365 |   {
 366 |    "cell_type": "code",
 367 |    "execution_count": null,
 368 |    "metadata": {},
 369 |    "outputs": [],
 370 |    "source": [
 371 |     "optimizer = AdamW(model.parameters(),\n",
 372 |     "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
 373 |     "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
 374 |     "                )\n"
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "code",
 379 |    "execution_count": null,
 380 |    "metadata": {},
 381 |    "outputs": [],
 382 |    "source": [
 383 |     "from transformers import get_linear_schedule_with_warmup\n",
 384 |     "\n",
 385 |     "# Number of training epochs. The BERT authors recommend between 2 and 4. \n",
 386 |     "# We chose to run for 4, but we'll see later that this may be over-fitting the\n",
 387 |     "# training data.\n",
 388 |     "epochs = 10\n",
 389 |     "\n",
 390 |     "# Total number of training steps is [number of batches] x [number of epochs]. \n",
 391 |     "# (Note that this is not the same as the number of training samples).\n",
 392 |     "total_steps = len(train_dataloader) * epochs\n",
 393 |     "\n",
 394 |     "# Create the learning rate scheduler.\n",
 395 |     "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
 396 |     "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
 397 |     "                                            num_training_steps = total_steps)"
 398 |    ]
 399 |   },
 400 |   {
 401 |    "cell_type": "code",
 402 |    "execution_count": null,
 403 |    "metadata": {},
 404 |    "outputs": [],
 405 |    "source": [
 406 |     "# Function to calculate the accuracy of our predictions vs labels\n",
 407 |     "def flat_accuracy(preds, labels):\n",
 408 |     "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
 409 |     "    labels_flat = labels.flatten()\n",
 410 |     "    return np.sum(pred_flat == labels_flat) / len(labels_flat)"
 411 |    ]
 412 |   },
 413 |   {
 414 |    "cell_type": "code",
 415 |    "execution_count": null,
 416 |    "metadata": {},
 417 |    "outputs": [],
 418 |    "source": [
 419 |     "import torch.nn as nn\n",
 420 |     "loss_criterion = nn.CrossEntropyLoss()"
 421 |    ]
 422 |   },
 423 |   {
 424 |    "cell_type": "code",
 425 |    "execution_count": null,
 426 |    "metadata": {},
 427 |    "outputs": [],
 428 |    "source": [
 429 |     "from sklearn.metrics import f1_score\n",
 430 |     "# This training code is based on the `run_glue.py` script here:\n",
 431 |     "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
 432 |     "\n",
 433 |     "# Set the seed value all over the place to make this reproducible.\n",
 434 |     "seed_val = 42\n",
 435 |     "\n",
 436 |     "random.seed(seed_val)\n",
 437 |     "np.random.seed(seed_val)\n",
 438 |     "torch.manual_seed(seed_val)\n",
 439 |     "torch.cuda.manual_seed_all(seed_val)\n",
 440 |     "\n",
 441 |     "# We'll store a number of quantities such as training and validation loss, \n",
 442 |     "# validation accuracy, and timings.\n",
 443 |     "training_stats = []\n",
 444 |     "\n",
 445 |     "# Measure the total training time for the whole run.\n",
 446 |     "total_t0 = time.time()\n",
 447 |     "\n",
 448 |     "\n",
 449 |     "# For each epoch...\n",
 450 |     "for epoch_i in range(0, epochs):\n",
 451 |     "    \n",
 452 |     "    # ========================================\n",
 453 |     "    #               Training\n",
 454 |     "    # ========================================\n",
 455 |     "    \n",
 456 |     "    # Perform one full pass over the training set.\n",
 457 |     "\n",
 458 |     "    print(\"\")\n",
 459 |     "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
 460 |     "    print('Training...')\n",
 461 |     "    \n",
 462 |     "    #tr and val\n",
 463 |     "    vec_output_tr = []\n",
 464 |     "    vec_output_val =[]\n",
 465 |     "\n",
 466 |     "    # Measure how long the training epoch takes.\n",
 467 |     "    t0 = time.time()\n",
 468 |     "\n",
 469 |     "    # Reset the total loss for this epoch.\n",
 470 |     "    total_train_loss = 0\n",
 471 |     "\n",
 472 |     "    # Put the model into training mode. Don't be mislead--the call to \n",
 473 |     "    # `train` just changes the *mode*, it doesn't *perform* the training.\n",
 474 |     "    # `dropout` and `batchnorm` layers behave differently during training\n",
 475 |     "    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
 476 |     "    best_f1 = 0\n",
 477 |     "    model.train()\n",
 478 |     "\n",
 479 |     "    # For each batch of training data...\n",
 480 |     "    for step, batch in tqdm(enumerate(train_dataloader)):\n",
 481 |     "        \n",
 482 |     "        # Unpack this training batch from our dataloader. \n",
 483 |     "        #\n",
 484 |     "        \n",
 485 |     "        # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
 486 |     "        # `to` method.\n",
 487 |     "        #\n",
 488 |     "        # `batch` contains three pytorch tensors:\n",
 489 |     "        #   [0]: input ids \n",
 490 |     "        #   [1]: attention masks\n",
 491 |     "        #   [2]: labels \n",
 492 |     "        b_input_ids = batch[0].to(device)\n",
 493 |     "        b_input_mask = batch[1].to(device)\n",
 494 |     "        b_labels = batch[2].to(device)\n",
 495 |     "\n",
 496 |     "        \n",
 497 |     "        model.zero_grad()        \n",
 498 |     "\n",
 499 |     "        \n",
 500 |     "        logits,vec = model(b_input_ids, \n",
 501 |     "                     token_type_ids=None, \n",
 502 |     "                     attention_mask=b_input_mask\n",
 503 |     "                    )\n",
 504 |     "        #new\n",
 505 |     "        logits = logits[0]\n",
 506 |     "        \n",
 507 |     "        #Defining the loss\n",
 508 |     "        loss = loss_criterion(logits, b_labels)\n",
 509 |     "        \n",
 510 |     "        #saving the features_tr\n",
 511 |     "        vec = vec.detach().cpu().numpy()\n",
 512 |     "        vec_output_tr.extend(vec)\n",
 513 |     "        \n",
 514 |     "        # Accumulate the training loss over all of the batches so that we can\n",
 515 |     "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
 516 |     "        # single value; the `.item()` function just returns the Python value \n",
 517 |     "        # from the tensor.\n",
 518 |     "        total_train_loss += loss.item()\n",
 519 |     "\n",
 520 |     "        # Perform a backward pass to calculate the gradients.\n",
 521 |     "        loss.backward()\n",
 522 |     "\n",
 523 |     "        # Clip the norm of the gradients to 1.0.\n",
 524 |     "        # This is to help prevent the \"exploding gradients\" problem.\n",
 525 |     "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
 526 |     "\n",
 527 |     "        # Update parameters and take a step using the computed gradient.\n",
 528 |     "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
 529 |     "        # modified based on their gradients, the learning rate, etc.\n",
 530 |     "        optimizer.step()\n",
 531 |     "\n",
 532 |     "        # Update the learning rate.\n",
 533 |     "        scheduler.step()\n",
 534 |     "        \n",
 535 |     "        \n",
 536 |     "        \n",
 537 |     "\n",
 538 |     "    # Calculate the average loss over all of the batches.\n",
 539 |     "    avg_train_loss = total_train_loss / len(train_dataloader)            \n",
 540 |     "    \n",
 541 |     "    # Measure how long this epoch took.\n",
 542 |     "    training_time = format_time(time.time() - t0)\n",
 543 |     "\n",
 544 |     "    print(\"\")\n",
 545 |     "    print(\"  Average training loss: {0:.2f} \".format(avg_train_loss))\n",
 546 |     "    print(\"  Training epcoh took: {:} \".format(training_time))\n",
 547 |     "        \n",
 548 |     "    # ========================================\n",
 549 |     "    #               Validation\n",
 550 |     "    # ========================================\n",
 551 |     "    # After the completion of each training epoch, measure our performance on\n",
 552 |     "    # our validation set.\n",
 553 |     "\n",
 554 |     "    print(\"\")\n",
 555 |     "    print(\"Running Validation...\")\n",
 556 |     "\n",
 557 |     "    t0 = time.time()\n",
 558 |     "\n",
 559 |     "    # Put the model in evaluation mode--the dropout layers behave differently\n",
 560 |     "    # during evaluation.\n",
 561 |     "    model.eval()\n",
 562 |     "\n",
 563 |     "    # Tracking variables \n",
 564 |     "    total_eval_accuracy = 0\n",
 565 |     "    total_eval_loss = 0\n",
 566 |     "    nb_eval_steps = 0\n",
 567 |     "    predictions=[]\n",
 568 |     "    true_labels=[]\n",
 569 |     "    \n",
 570 |     "\n",
 571 |     "    # Evaluate data for one epoch\n",
 572 |     "    for batch in tqdm(validation_dataloader):\n",
 573 |     "        \n",
 574 |     "        # Unpack this training batch from our dataloader. \n",
 575 |     "        #\n",
 576 |     "        # As we unpack the batch, we'll also copy each tensor to the GPU using \n",
 577 |     "        # the `to` method.\n",
 578 |     "        #\n",
 579 |     "        # `batch` contains three pytorch tensors:\n",
 580 |     "        #   [0]: input ids \n",
 581 |     "        #   [1]: attention masks\n",
 582 |     "        #   [2]: labels \n",
 583 |     "        b_input_ids = batch[0].to(device)\n",
 584 |     "        b_input_mask = batch[1].to(device)\n",
 585 |     "        b_labels = batch[2].to(device)\n",
 586 |     "        \n",
 587 |     "        # Tell pytorch not to bother with constructing the compute graph during\n",
 588 |     "        # the forward pass, since this is only needed for backprop (training).\n",
 589 |     "        with torch.no_grad():        \n",
 590 |     "\n",
 591 |     "            # Forward pass, calculate logit predictions.\n",
 592 |     "            # token_type_ids is the same as the \"segment ids\", which \n",
 593 |     "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
 594 |     "            # The documentation for this `model` function is here: \n",
 595 |     "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
 596 |     "            # Get the \"logits\" output by the model. The \"logits\" are the output\n",
 597 |     "            # values prior to applying an activation function like the softmax.\n",
 598 |     "            logits,vec = model(b_input_ids, \n",
 599 |     "                           token_type_ids=None, \n",
 600 |     "                           attention_mask=b_input_mask\n",
 601 |     "                           )\n",
 602 |     "            \n",
 603 |     "        #new\n",
 604 |     "        logits = logits[0]\n",
 605 |     "        \n",
 606 |     "        #defining the val loss\n",
 607 |     "        loss = loss_criterion(logits, b_labels)\n",
 608 |     "        \n",
 609 |     "        \n",
 610 |     "        # Accumulate the validation loss.\n",
 611 |     "        total_eval_loss += loss.item()\n",
 612 |     "\n",
 613 |     "        # Move logits and labels to CPU\n",
 614 |     "        logits = logits.detach().cpu().numpy()\n",
 615 |     "\n",
 616 |     "        # Move logits and labels to CPU\n",
 617 |     "        predicted_labels=np.argmax(logits,axis=1)\n",
 618 |     "        predictions.extend(predicted_labels)\n",
 619 |     "        label_ids = b_labels.to('cpu').numpy()\n",
 620 |     "        true_labels.extend(label_ids)\n",
 621 |     "        \n",
 622 |     "        #saving the features_tr\n",
 623 |     "        vec = vec.detach().cpu().numpy()\n",
 624 |     "        vec_output_val.extend(vec)\n",
 625 |     "        \n",
 626 |     "\n",
 627 |     "        # Calculate the accuracy for this batch of test sentences, and\n",
 628 |     "        # accumulate it over all batches.\n",
 629 |     "        total_eval_accuracy += flat_accuracy(logits, label_ids)\n",
 630 |     "        \n",
 631 |     "\n",
 632 |     "    # Report the final accuracy for this validation run.\n",
 633 |     "    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n",
 634 |     "    print(\"  Accuracy: {0:.2f}\".format(avg_val_accuracy))\n",
 635 |     "\n",
 636 |     "    # Calculate the average loss over all of the batches.\n",
 637 |     "    avg_val_loss = total_eval_loss / len(validation_dataloader)\n",
 638 |     "    \n",
 639 |     "    # Measure how long the validation run took.\n",
 640 |     "    validation_time = format_time(time.time() - t0)\n",
 641 |     "    \n",
 642 |     "    print(\"  Validation Loss: {0:.2f}\".format(avg_val_loss))\n",
 643 |     "    print(\"  Validation took: {:}\".format(validation_time))\n",
 644 |     "    print(\"Validation F1-Score: {}\".format(f1_score(true_labels,predictions,average='macro')))\n",
 645 |     "    curr_f1=f1_score(true_labels,predictions,average='macro')\n",
 646 |     "    if curr_f1 > best_f1:\n",
 647 |     "        best_f1=curr_f1\n",
 648 |     "        torch.save(model.state_dict(), 'best_model.pt')\n",
 649 |     "        np.save('best_vec_train_model_train.npy',vec_output_tr)\n",
 650 |     "        np.save('best_vec_val.npy',vec_output_val)\n",
 651 |     "        \n",
 652 |     "    # Record all statistics from this epoch.\n",
 653 |     "#     training_stats.append(\n",
 654 |     "#         {\n",
 655 |     "#             'epoch': epoch_i + 1,\n",
 656 |     "#             'Training Loss': avg_train_loss,\n",
 657 |     "#             'Valid. Loss': avg_val_loss,\n",
 658 |     "#             'Valid. Accur.': avg_val_accuracy,\n",
 659 |     "#             'Training Time': training_time,\n",
 660 |     "#             'Validation Time': validation_time\n",
 661 |     "#         }\n",
 662 |     "#     )\n",
 663 |     "\n",
 664 |     "print(\"\")\n",
 665 |     "print(\"Training complete!\")\n",
 666 |     "\n",
 667 |     "print(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))\n"
 668 |    ]
 669 |   },
 670 |   {
 671 |    "cell_type": "markdown",
 672 |    "metadata": {},
 673 |    "source": [
 674 |     "## Predictions"
 675 |    ]
 676 |   },
 677 |   {
 678 |    "cell_type": "code",
 679 |    "execution_count": null,
 680 |    "metadata": {},
 681 |    "outputs": [],
 682 |    "source": [
 683 |     "model_path = '/kaggle/working/best_model.pt'\n",
 684 |     "checkpoint = torch.load(model_path)\n",
 685 |     "# model = checkpoint['model']\n",
 686 |     "model.load_state_dict(checkpoint)"
 687 |    ]
 688 |   },
 689 |   {
 690 |    "cell_type": "code",
 691 |    "execution_count": null,
 692 |    "metadata": {},
 693 |    "outputs": [],
 694 |    "source": [
 695 |     "def predict_pyt(model, prediction_dataloader):\n",
 696 |     "    \"\"\"\n",
 697 |     "    model: pytorch model\n",
 698 |     "    prediction_dataloader: DataLoader object for which the predictions has to be made.\n",
 699 |     "    return:\n",
 700 |     "        predictions:- Direct predicted labels\n",
 701 |     "        softmax_logits:- logits which are normalized with softmax on output\"\"\"\n",
 702 |     "    \n",
 703 |     "    # Put model in evaluation mode\n",
 704 |     "    model.eval()\n",
 705 |     "\n",
 706 |     "    # Tracking variables \n",
 707 |     "    predictions = []\n",
 708 |     "    softmax_logits=[]\n",
 709 |     "    vec_outputs = []\n",
 710 |     "    \n",
 711 |     "    # Predict \n",
 712 |     "    for batch in tqdm(prediction_dataloader):\n",
 713 |     "        \n",
 714 |     "        # Add batch to GPU\n",
 715 |     "        batch = tuple(t.to(device) for t in batch)\n",
 716 |     "        # Unpack the inputs from our dataloader\n",
 717 |     "        try:\n",
 718 |     "            b_input_ids, b_input_mask = batch\n",
 719 |     "        except ValueError:\n",
 720 |     "            b_input_ids, b_input_mask, _ = batch\n",
 721 |     "        # Telling the model not to compute or store gradients, saving memory and \n",
 722 |     "        # speeding up prediction\n",
 723 |     "        with torch.no_grad():\n",
 724 |     "          # Forward pass, calculate logit predictions\n",
 725 |     "            logits,vec = model(b_input_ids, token_type_ids=None, \n",
 726 |     "                          attention_mask=b_input_mask)\n",
 727 |     "            \n",
 728 |     "            logits = logits[0]\n",
 729 |     "\n",
 730 |     "        \n",
 731 |     "    #----- Add softmax---     \n",
 732 |     "        m = nn.Softmax(dim=1)\n",
 733 |     "    # #     input = torch.randn(2, 3)\n",
 734 |     "        output = m(logits)\n",
 735 |     "    #-------#------\n",
 736 |     "        \n",
 737 |     "        # Move logits and labels to CPU\n",
 738 |     "        logits = logits.detach().cpu().numpy()\n",
 739 |     "        predicted_labels=np.argmax(logits,axis=1)\n",
 740 |     "        predictions.extend(predicted_labels)\n",
 741 |     "        softmax_logits.extend(output)\n",
 742 |     "        \n",
 743 |     "        #vec_outputs saving\n",
 744 |     "        vec = vec.detach().cpu().numpy()\n",
 745 |     "        vec_outputs.extend(vec)\n",
 746 |     "\n",
 747 |     "    print('DONE')\n",
 748 |     "    return predictions, softmax_logits , vec_outputs\n",
 749 |     "\n",
 750 |     "def predict_wrapper(model, sentences, max_len=max_len, batch_size = batch_size ):\n",
 751 |     "    \"\"\"\n",
 752 |     "    Wrapper to create DataLoader object and predict, \n",
 753 |     "    this is if model and sentences are passed\"\"\"\n",
 754 |     "    input_ids,attention_masks=prep_input(sentences,labels=None, max_len=max_len)\n",
 755 |     "    prediction_data = TensorDataset(input_ids, attention_masks)\n",
 756 |     "    prediction_sampler = SequentialSampler(prediction_data)\n",
 757 |     "    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
 758 |     "    return predict_pyt(model, prediction_dataloader)\n",
 759 |     "\n",
 760 |     "\n",
 761 |     "\n"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "code",
 766 |    "execution_count": null,
 767 |    "metadata": {},
 768 |    "outputs": [],
 769 |    "source": [
 770 |     "## Prepare the test dataset\n",
 771 |     "batch_size = 32  \n",
 772 |     "\n",
 773 |     "Preprocess.prepare_test(text_col)\n",
 774 |     "test_sentences = Preprocess.test_sentences\n",
 775 |     "X_test_phase1= Preprocess.X_test"
 776 |    ]
 777 |   },
 778 |   {
 779 |    "cell_type": "code",
 780 |    "execution_count": null,
 781 |    "metadata": {},
 782 |    "outputs": [],
 783 |    "source": [
 784 |     "## Predictions of train dataset \n",
 785 |     "# model_path = '../input/camembertvectoroutput/best_model.pt'\n",
 786 |     "# checkpoint = torch.load(model_path)\n",
 787 |     "# model = checkpoint['model']\n",
 788 |     "# model.load_state_dict(checkpoint)\n",
 789 |     "start = time.time()\n",
 790 |     "predictions, softmax_logits , vec_outputs = predict_pyt(model, train_dataloader)\n",
 791 |     "\n",
 792 |     "#saving\n",
 793 |     "np.save('best_vec_train_model_eval.npy',vec_outputs)\n",
 794 |     "softmax_logits = np.array([ten.detach().cpu().numpy() for ten in softmax_logits])\n",
 795 |     "np.save('train_set_softmax_logits.npy',softmax_logits)\n",
 796 |     "print('length of predictions {}'.format(len(predictions)))\n",
 797 |     "print('Time Taken Predict for train set: {:}'.format(format_time(time.time() - start) ))"
 798 |    ]
 799 |   },
 800 |   {
 801 |    "cell_type": "code",
 802 |    "execution_count": null,
 803 |    "metadata": {},
 804 |    "outputs": [],
 805 |    "source": [
 806 |     "# Predictions of validation set which is randomly separated from train dataset\n",
 807 |     "start = time.time()\n",
 808 |     "predictions, val_softmax_logits , vec_outputs= predict_pyt(model, validation_dataloader)\n",
 809 |     "np.save('best_vec_val_model_eval.npy',vec_outputs)\n",
 810 |     "val_softmax_logits = np.array([ten.detach().cpu().numpy() for ten in val_softmax_logits])\n",
 811 |     "np.save('validation_set_softmax_logits.npy',val_softmax_logits)\n",
 812 |     "print('Time Taken Predict for val set: {:}'.format(format_time(time.time() - start)))"
 813 |    ]
 814 |   },
 815 |   {
 816 |    "cell_type": "code",
 817 |    "execution_count": null,
 818 |    "metadata": {},
 819 |    "outputs": [],
 820 |    "source": [
 821 |     "## Predictions of test dataset \n",
 822 |     "\n",
 823 |     "start = time.time()\n",
 824 |     "predictions, softmax_logits , vec_outputs = predict_wrapper(model, test_sentences)\n",
 825 |     "\n",
 826 |     "#saving\n",
 827 |     "np.save('best_vec_test.npy',vec_outputs)\n",
 828 |     "softmax_logits = np.array([ten.detach().cpu().numpy() for ten in softmax_logits])\n",
 829 |     "np.save('X_test_phase1_softmax_logits.npy',softmax_logits)\n",
 830 |     "print('length of predictions {}'.format(len(predictions)))\n",
 831 |     "print('Time Taken Predict for test set: {:}'.format(format_time(time.time() - start) ))"
 832 |    ]
 833 |   },
 834 |   {
 835 |    "cell_type": "code",
 836 |    "execution_count": null,
 837 |    "metadata": {},
 838 |    "outputs": [],
 839 |    "source": [
 840 |     "X_test_phase1['prediction_model']= predictions\n",
 841 |     "X_test_phase1['Prdtypecode']=X_test_phase1['prediction_model'].map(Preprocess.dict_id_to_code)\n",
 842 |     "print(X_test_phase1['Prdtypecode'].value_counts())\n",
 843 |     "X_test_phase1=X_test_phase1.drop(['prediction_model','Title','Description'],axis=1)"
 844 |    ]
 845 |   },
 846 |   {
 847 |    "cell_type": "code",
 848 |    "execution_count": null,
 849 |    "metadata": {},
 850 |    "outputs": [],
 851 |    "source": [
 852 |     "X_test_phase1.to_csv('y_test_task1_phase1_pred.tsv',sep='\\t',index=False)\n"
 853 |    ]
 854 |   }
 855 |  ],
 856 |  "metadata": {
 857 |   "kernelspec": {
 858 |    "display_name": "Python 3",
 859 |    "language": "python",
 860 |    "name": "python3"
 861 |   },
 862 |   "language_info": {
 863 |    "codemirror_mode": {
 864 |     "name": "ipython",
 865 |     "version": 3
 866 |    },
 867 |    "file_extension": ".py",
 868 |    "mimetype": "text/x-python",
 869 |    "name": "python",
 870 |    "nbconvert_exporter": "python",
 871 |    "pygments_lexer": "ipython3",
 872 |    "version": "3.7.6"
 873 |   },
 874 |   "toc": {
 875 |    "base_numbering": 1,
 876 |    "nav_menu": {},
 877 |    "number_sections": true,
 878 |    "sideBar": true,
 879 |    "skip_h1_title": false,
 880 |    "title_cell": "Table of Contents",
 881 |    "title_sidebar": "Contents",
 882 |    "toc_cell": false,
 883 |    "toc_position": {},
 884 |    "toc_section_display": true,
 885 |    "toc_window_display": false
 886 |   },
 887 |   "widgets": {
 888 |    "application/vnd.jupyter.widget-state+json": {
 889 |     "state": {
 890 |      "04e2caaecb124a14945c845ca6e62aad": {
 891 |       "model_module": "@jupyter-widgets/controls",
 892 |       "model_module_version": "1.5.0",
 893 |       "model_name": "ProgressStyleModel",
 894 |       "state": {
 895 |        "_model_module": "@jupyter-widgets/controls",
 896 |        "_model_module_version": "1.5.0",
 897 |        "_model_name": "ProgressStyleModel",
 898 |        "_view_count": null,
 899 |        "_view_module": "@jupyter-widgets/base",
 900 |        "_view_module_version": "1.2.0",
 901 |        "_view_name": "StyleView",
 902 |        "bar_color": null,
 903 |        "description_width": "initial"
 904 |       }
 905 |      },
 906 |      "0d47bfa702554bd68f25bb94db8a8811": {
 907 |       "model_module": "@jupyter-widgets/controls",
 908 |       "model_module_version": "1.5.0",
 909 |       "model_name": "DescriptionStyleModel",
 910 |       "state": {
 911 |        "_model_module": "@jupyter-widgets/controls",
 912 |        "_model_module_version": "1.5.0",
 913 |        "_model_name": "DescriptionStyleModel",
 914 |        "_view_count": null,
 915 |        "_view_module": "@jupyter-widgets/base",
 916 |        "_view_module_version": "1.2.0",
 917 |        "_view_name": "StyleView",
 918 |        "description_width": ""
 919 |       }
 920 |      },
 921 |      "1e4ce92ff6a44d89b65e7917319266eb": {
 922 |       "model_module": "@jupyter-widgets/controls",
 923 |       "model_module_version": "1.5.0",
 924 |       "model_name": "DescriptionStyleModel",
 925 |       "state": {
 926 |        "_model_module": "@jupyter-widgets/controls",
 927 |        "_model_module_version": "1.5.0",
 928 |        "_model_name": "DescriptionStyleModel",
 929 |        "_view_count": null,
 930 |        "_view_module": "@jupyter-widgets/base",
 931 |        "_view_module_version": "1.2.0",
 932 |        "_view_name": "StyleView",
 933 |        "description_width": ""
 934 |       }
 935 |      },
 936 |      "212f4750f35d4bc2b4272e6d070fce89": {
 937 |       "model_module": "@jupyter-widgets/controls",
 938 |       "model_module_version": "1.5.0",
 939 |       "model_name": "HBoxModel",
 940 |       "state": {
 941 |        "_dom_classes": [],
 942 |        "_model_module": "@jupyter-widgets/controls",
 943 |        "_model_module_version": "1.5.0",
 944 |        "_model_name": "HBoxModel",
 945 |        "_view_count": null,
 946 |        "_view_module": "@jupyter-widgets/controls",
 947 |        "_view_module_version": "1.5.0",
 948 |        "_view_name": "HBoxView",
 949 |        "box_style": "",
 950 |        "children": [
 951 |         "IPY_MODEL_85588758dedc4b8bbc6ee33178593140",
 952 |         "IPY_MODEL_a39feb5a6e374ea2ab65be2fe8b75b00"
 953 |        ],
 954 |        "layout": "IPY_MODEL_5a054222842941dab063a8db8ede0ff2"
 955 |       }
 956 |      },
 957 |      "4bc03bf5ab334fc590007e48be4dd318": {
 958 |       "model_module": "@jupyter-widgets/base",
 959 |       "model_module_version": "1.2.0",
 960 |       "model_name": "LayoutModel",
 961 |       "state": {
 962 |        "_model_module": "@jupyter-widgets/base",
 963 |        "_model_module_version": "1.2.0",
 964 |        "_model_name": "LayoutModel",
 965 |        "_view_count": null,
 966 |        "_view_module": "@jupyter-widgets/base",
 967 |        "_view_module_version": "1.2.0",
 968 |        "_view_name": "LayoutView",
 969 |        "align_content": null,
 970 |        "align_items": null,
 971 |        "align_self": null,
 972 |        "border": null,
 973 |        "bottom": null,
 974 |        "display": null,
 975 |        "flex": null,
 976 |        "flex_flow": null,
 977 |        "grid_area": null,
 978 |        "grid_auto_columns": null,
 979 |        "grid_auto_flow": null,
 980 |        "grid_auto_rows": null,
 981 |        "grid_column": null,
 982 |        "grid_gap": null,
 983 |        "grid_row": null,
 984 |        "grid_template_areas": null,
 985 |        "grid_template_columns": null,
 986 |        "grid_template_rows": null,
 987 |        "height": null,
 988 |        "justify_content": null,
 989 |        "justify_items": null,
 990 |        "left": null,
 991 |        "margin": null,
 992 |        "max_height": null,
 993 |        "max_width": null,
 994 |        "min_height": null,
 995 |        "min_width": null,
 996 |        "object_fit": null,
 997 |        "object_position": null,
 998 |        "order": null,
 999 |        "overflow": null,
1000 |        "overflow_x": null,
1001 |        "overflow_y": null,
1002 |        "padding": null,
1003 |        "right": null,
1004 |        "top": null,
1005 |        "visibility": null,
1006 |        "width": null
1007 |       }
1008 |      },
1009 |      "4ec5441ef13241dcb0af2d40a4036e6e": {
1010 |       "model_module": "@jupyter-widgets/controls",
1011 |       "model_module_version": "1.5.0",
1012 |       "model_name": "HBoxModel",
1013 |       "state": {
1014 |        "_dom_classes": [],
1015 |        "_model_module": "@jupyter-widgets/controls",
1016 |        "_model_module_version": "1.5.0",
1017 |        "_model_name": "HBoxModel",
1018 |        "_view_count": null,
1019 |        "_view_module": "@jupyter-widgets/controls",
1020 |        "_view_module_version": "1.5.0",
1021 |        "_view_name": "HBoxView",
1022 |        "box_style": "",
1023 |        "children": [
1024 |         "IPY_MODEL_8cd310281c3e4133b3776f69196bef32",
1025 |         "IPY_MODEL_702441bdd088466d8e1d264071baca75"
1026 |        ],
1027 |        "layout": "IPY_MODEL_dcc661b801f940139925b83564e8f282"
1028 |       }
1029 |      },
1030 |      "4f92279308be48e2a1b543fdb441246c": {
1031 |       "model_module": "@jupyter-widgets/base",
1032 |       "model_module_version": "1.2.0",
1033 |       "model_name": "LayoutModel",
1034 |       "state": {
1035 |        "_model_module": "@jupyter-widgets/base",
1036 |        "_model_module_version": "1.2.0",
1037 |        "_model_name": "LayoutModel",
1038 |        "_view_count": null,
1039 |        "_view_module": "@jupyter-widgets/base",
1040 |        "_view_module_version": "1.2.0",
1041 |        "_view_name": "LayoutView",
1042 |        "align_content": null,
1043 |        "align_items": null,
1044 |        "align_self": null,
1045 |        "border": null,
1046 |        "bottom": null,
1047 |        "display": null,
1048 |        "flex": null,
1049 |        "flex_flow": null,
1050 |        "grid_area": null,
1051 |        "grid_auto_columns": null,
1052 |        "grid_auto_flow": null,
1053 |        "grid_auto_rows": null,
1054 |        "grid_column": null,
1055 |        "grid_gap": null,
1056 |        "grid_row": null,
1057 |        "grid_template_areas": null,
1058 |        "grid_template_columns": null,
1059 |        "grid_template_rows": null,
1060 |        "height": null,
1061 |        "justify_content": null,
1062 |        "justify_items": null,
1063 |        "left": null,
1064 |        "margin": null,
1065 |        "max_height": null,
1066 |        "max_width": null,
1067 |        "min_height": null,
1068 |        "min_width": null,
1069 |        "object_fit": null,
1070 |        "object_position": null,
1071 |        "order": null,
1072 |        "overflow": null,
1073 |        "overflow_x": null,
1074 |        "overflow_y": null,
1075 |        "padding": null,
1076 |        "right": null,
1077 |        "top": null,
1078 |        "visibility": null,
1079 |        "width": null
1080 |       }
1081 |      },
1082 |      "57be58d12ce0415590cd75f529dc8a06": {
1083 |       "model_module": "@jupyter-widgets/base",
1084 |       "model_module_version": "1.2.0",
1085 |       "model_name": "LayoutModel",
1086 |       "state": {
1087 |        "_model_module": "@jupyter-widgets/base",
1088 |        "_model_module_version": "1.2.0",
1089 |        "_model_name": "LayoutModel",
1090 |        "_view_count": null,
1091 |        "_view_module": "@jupyter-widgets/base",
1092 |        "_view_module_version": "1.2.0",
1093 |        "_view_name": "LayoutView",
1094 |        "align_content": null,
1095 |        "align_items": null,
1096 |        "align_self": null,
1097 |        "border": null,
1098 |        "bottom": null,
1099 |        "display": null,
1100 |        "flex": null,
1101 |        "flex_flow": null,
1102 |        "grid_area": null,
1103 |        "grid_auto_columns": null,
1104 |        "grid_auto_flow": null,
1105 |        "grid_auto_rows": null,
1106 |        "grid_column": null,
1107 |        "grid_gap": null,
1108 |        "grid_row": null,
1109 |        "grid_template_areas": null,
1110 |        "grid_template_columns": null,
1111 |        "grid_template_rows": null,
1112 |        "height": null,
1113 |        "justify_content": null,
1114 |        "justify_items": null,
1115 |        "left": null,
1116 |        "margin": null,
1117 |        "max_height": null,
1118 |        "max_width": null,
1119 |        "min_height": null,
1120 |        "min_width": null,
1121 |        "object_fit": null,
1122 |        "object_position": null,
1123 |        "order": null,
1124 |        "overflow": null,
1125 |        "overflow_x": null,
1126 |        "overflow_y": null,
1127 |        "padding": null,
1128 |        "right": null,
1129 |        "top": null,
1130 |        "visibility": null,
1131 |        "width": null
1132 |       }
1133 |      },
1134 |      "5a054222842941dab063a8db8ede0ff2": {
1135 |       "model_module": "@jupyter-widgets/base",
1136 |       "model_module_version": "1.2.0",
1137 |       "model_name": "LayoutModel",
1138 |       "state": {
1139 |        "_model_module": "@jupyter-widgets/base",
1140 |        "_model_module_version": "1.2.0",
1141 |        "_model_name": "LayoutModel",
1142 |        "_view_count": null,
1143 |        "_view_module": "@jupyter-widgets/base",
1144 |        "_view_module_version": "1.2.0",
1145 |        "_view_name": "LayoutView",
1146 |        "align_content": null,
1147 |        "align_items": null,
1148 |        "align_self": null,
1149 |        "border": null,
1150 |        "bottom": null,
1151 |        "display": null,
1152 |        "flex": null,
1153 |        "flex_flow": null,
1154 |        "grid_area": null,
1155 |        "grid_auto_columns": null,
1156 |        "grid_auto_flow": null,
1157 |        "grid_auto_rows": null,
1158 |        "grid_column": null,
1159 |        "grid_gap": null,
1160 |        "grid_row": null,
1161 |        "grid_template_areas": null,
1162 |        "grid_template_columns": null,
1163 |        "grid_template_rows": null,
1164 |        "height": null,
1165 |        "justify_content": null,
1166 |        "justify_items": null,
1167 |        "left": null,
1168 |        "margin": null,
1169 |        "max_height": null,
1170 |        "max_width": null,
1171 |        "min_height": null,
1172 |        "min_width": null,
1173 |        "object_fit": null,
1174 |        "object_position": null,
1175 |        "order": null,
1176 |        "overflow": null,
1177 |        "overflow_x": null,
1178 |        "overflow_y": null,
1179 |        "padding": null,
1180 |        "right": null,
1181 |        "top": null,
1182 |        "visibility": null,
1183 |        "width": null
1184 |       }
1185 |      },
1186 |      "6a940c5ee47e4a0fa6bd17899077b04c": {
1187 |       "model_module": "@jupyter-widgets/base",
1188 |       "model_module_version": "1.2.0",
1189 |       "model_name": "LayoutModel",
1190 |       "state": {
1191 |        "_model_module": "@jupyter-widgets/base",
1192 |        "_model_module_version": "1.2.0",
1193 |        "_model_name": "LayoutModel",
1194 |        "_view_count": null,
1195 |        "_view_module": "@jupyter-widgets/base",
1196 |        "_view_module_version": "1.2.0",
1197 |        "_view_name": "LayoutView",
1198 |        "align_content": null,
1199 |        "align_items": null,
1200 |        "align_self": null,
1201 |        "border": null,
1202 |        "bottom": null,
1203 |        "display": null,
1204 |        "flex": null,
1205 |        "flex_flow": null,
1206 |        "grid_area": null,
1207 |        "grid_auto_columns": null,
1208 |        "grid_auto_flow": null,
1209 |        "grid_auto_rows": null,
1210 |        "grid_column": null,
1211 |        "grid_gap": null,
1212 |        "grid_row": null,
1213 |        "grid_template_areas": null,
1214 |        "grid_template_columns": null,
1215 |        "grid_template_rows": null,
1216 |        "height": null,
1217 |        "justify_content": null,
1218 |        "justify_items": null,
1219 |        "left": null,
1220 |        "margin": null,
1221 |        "max_height": null,
1222 |        "max_width": null,
1223 |        "min_height": null,
1224 |        "min_width": null,
1225 |        "object_fit": null,
1226 |        "object_position": null,
1227 |        "order": null,
1228 |        "overflow": null,
1229 |        "overflow_x": null,
1230 |        "overflow_y": null,
1231 |        "padding": null,
1232 |        "right": null,
1233 |        "top": null,
1234 |        "visibility": null,
1235 |        "width": null
1236 |       }
1237 |      },
1238 |      "702441bdd088466d8e1d264071baca75": {
1239 |       "model_module": "@jupyter-widgets/controls",
1240 |       "model_module_version": "1.5.0",
1241 |       "model_name": "HTMLModel",
1242 |       "state": {
1243 |        "_dom_classes": [],
1244 |        "_model_module": "@jupyter-widgets/controls",
1245 |        "_model_module_version": "1.5.0",
1246 |        "_model_name": "HTMLModel",
1247 |        "_view_count": null,
1248 |        "_view_module": "@jupyter-widgets/controls",
1249 |        "_view_module_version": "1.5.0",
1250 |        "_view_name": "HTMLView",
1251 |        "description": "",
1252 |        "description_tooltip": null,
1253 |        "layout": "IPY_MODEL_94ed9026bc664a81a39ea16f09293c7c",
1254 |        "placeholder": "​",
1255 |        "style": "IPY_MODEL_0d47bfa702554bd68f25bb94db8a8811",
1256 |        "value": " 811k/811k [00:01&lt;00:00, 648kB/s]"
1257 |       }
1258 |      },
1259 |      "82320d113b0b40e1b038d3cf321b3433": {
1260 |       "model_module": "@jupyter-widgets/controls",
1261 |       "model_module_version": "1.5.0",
1262 |       "model_name": "HTMLModel",
1263 |       "state": {
1264 |        "_dom_classes": [],
1265 |        "_model_module": "@jupyter-widgets/controls",
1266 |        "_model_module_version": "1.5.0",
1267 |        "_model_name": "HTMLModel",
1268 |        "_view_count": null,
1269 |        "_view_module": "@jupyter-widgets/controls",
1270 |        "_view_module_version": "1.5.0",
1271 |        "_view_name": "HTMLView",
1272 |        "description": "",
1273 |        "description_tooltip": null,
1274 |        "layout": "IPY_MODEL_4bc03bf5ab334fc590007e48be4dd318",
1275 |        "placeholder": "​",
1276 |        "style": "IPY_MODEL_895e9b60a3974711883bcd1d827de8a6",
1277 |        "value": " 508/508 [00:00&lt;00:00, 1.38kB/s]"
1278 |       }
1279 |      },
1280 |      "85588758dedc4b8bbc6ee33178593140": {
1281 |       "model_module": "@jupyter-widgets/controls",
1282 |       "model_module_version": "1.5.0",
1283 |       "model_name": "FloatProgressModel",
1284 |       "state": {
1285 |        "_dom_classes": [],
1286 |        "_model_module": "@jupyter-widgets/controls",
1287 |        "_model_module_version": "1.5.0",
1288 |        "_model_name": "FloatProgressModel",
1289 |        "_view_count": null,
1290 |        "_view_module": "@jupyter-widgets/controls",
1291 |        "_view_module_version": "1.5.0",
1292 |        "_view_name": "ProgressView",
1293 |        "bar_style": "success",
1294 |        "description": "Downloading: 100%",
1295 |        "description_tooltip": null,
1296 |        "layout": "IPY_MODEL_4f92279308be48e2a1b543fdb441246c",
1297 |        "max": 445032417,
1298 |        "min": 0,
1299 |        "orientation": "horizontal",
1300 |        "style": "IPY_MODEL_04e2caaecb124a14945c845ca6e62aad",
1301 |        "value": 445032417
1302 |       }
1303 |      },
1304 |      "895e9b60a3974711883bcd1d827de8a6": {
1305 |       "model_module": "@jupyter-widgets/controls",
1306 |       "model_module_version": "1.5.0",
1307 |       "model_name": "DescriptionStyleModel",
1308 |       "state": {
1309 |        "_model_module": "@jupyter-widgets/controls",
1310 |        "_model_module_version": "1.5.0",
1311 |        "_model_name": "DescriptionStyleModel",
1312 |        "_view_count": null,
1313 |        "_view_module": "@jupyter-widgets/base",
1314 |        "_view_module_version": "1.2.0",
1315 |        "_view_name": "StyleView",
1316 |        "description_width": ""
1317 |       }
1318 |      },
1319 |      "8cd310281c3e4133b3776f69196bef32": {
1320 |       "model_module": "@jupyter-widgets/controls",
1321 |       "model_module_version": "1.5.0",
1322 |       "model_name": "FloatProgressModel",
1323 |       "state": {
1324 |        "_dom_classes": [],
1325 |        "_model_module": "@jupyter-widgets/controls",
1326 |        "_model_module_version": "1.5.0",
1327 |        "_model_name": "FloatProgressModel",
1328 |        "_view_count": null,
1329 |        "_view_module": "@jupyter-widgets/controls",
1330 |        "_view_module_version": "1.5.0",
1331 |        "_view_name": "ProgressView",
1332 |        "bar_style": "success",
1333 |        "description": "Downloading: 100%",
1334 |        "description_tooltip": null,
1335 |        "layout": "IPY_MODEL_fb5ba4132e1e455ea0b38556501346c8",
1336 |        "max": 810912,
1337 |        "min": 0,
1338 |        "orientation": "horizontal",
1339 |        "style": "IPY_MODEL_a717d5b6e71341408ed3a51d679f1ed6",
1340 |        "value": 810912
1341 |       }
1342 |      },
1343 |      "94ed9026bc664a81a39ea16f09293c7c": {
1344 |       "model_module": "@jupyter-widgets/base",
1345 |       "model_module_version": "1.2.0",
1346 |       "model_name": "LayoutModel",
1347 |       "state": {
1348 |        "_model_module": "@jupyter-widgets/base",
1349 |        "_model_module_version": "1.2.0",
1350 |        "_model_name": "LayoutModel",
1351 |        "_view_count": null,
1352 |        "_view_module": "@jupyter-widgets/base",
1353 |        "_view_module_version": "1.2.0",
1354 |        "_view_name": "LayoutView",
1355 |        "align_content": null,
1356 |        "align_items": null,
1357 |        "align_self": null,
1358 |        "border": null,
1359 |        "bottom": null,
1360 |        "display": null,
1361 |        "flex": null,
1362 |        "flex_flow": null,
1363 |        "grid_area": null,
1364 |        "grid_auto_columns": null,
1365 |        "grid_auto_flow": null,
1366 |        "grid_auto_rows": null,
1367 |        "grid_column": null,
1368 |        "grid_gap": null,
1369 |        "grid_row": null,
1370 |        "grid_template_areas": null,
1371 |        "grid_template_columns": null,
1372 |        "grid_template_rows": null,
1373 |        "height": null,
1374 |        "justify_content": null,
1375 |        "justify_items": null,
1376 |        "left": null,
1377 |        "margin": null,
1378 |        "max_height": null,
1379 |        "max_width": null,
1380 |        "min_height": null,
1381 |        "min_width": null,
1382 |        "object_fit": null,
1383 |        "object_position": null,
1384 |        "order": null,
1385 |        "overflow": null,
1386 |        "overflow_x": null,
1387 |        "overflow_y": null,
1388 |        "padding": null,
1389 |        "right": null,
1390 |        "top": null,
1391 |        "visibility": null,
1392 |        "width": null
1393 |       }
1394 |      },
1395 |      "a39feb5a6e374ea2ab65be2fe8b75b00": {
1396 |       "model_module": "@jupyter-widgets/controls",
1397 |       "model_module_version": "1.5.0",
1398 |       "model_name": "HTMLModel",
1399 |       "state": {
1400 |        "_dom_classes": [],
1401 |        "_model_module": "@jupyter-widgets/controls",
1402 |        "_model_module_version": "1.5.0",
1403 |        "_model_name": "HTMLModel",
1404 |        "_view_count": null,
1405 |        "_view_module": "@jupyter-widgets/controls",
1406 |        "_view_module_version": "1.5.0",
1407 |        "_view_name": "HTMLView",
1408 |        "description": "",
1409 |        "description_tooltip": null,
1410 |        "layout": "IPY_MODEL_57be58d12ce0415590cd75f529dc8a06",
1411 |        "placeholder": "​",
1412 |        "style": "IPY_MODEL_1e4ce92ff6a44d89b65e7917319266eb",
1413 |        "value": " 445M/445M [00:12&lt;00:00, 35.8MB/s]"
1414 |       }
1415 |      },
1416 |      "a717d5b6e71341408ed3a51d679f1ed6": {
1417 |       "model_module": "@jupyter-widgets/controls",
1418 |       "model_module_version": "1.5.0",
1419 |       "model_name": "ProgressStyleModel",
1420 |       "state": {
1421 |        "_model_module": "@jupyter-widgets/controls",
1422 |        "_model_module_version": "1.5.0",
1423 |        "_model_name": "ProgressStyleModel",
1424 |        "_view_count": null,
1425 |        "_view_module": "@jupyter-widgets/base",
1426 |        "_view_module_version": "1.2.0",
1427 |        "_view_name": "StyleView",
1428 |        "bar_color": null,
1429 |        "description_width": "initial"
1430 |       }
1431 |      },
1432 |      "ba22ce2585f54900b21f7f31ed15e78a": {
1433 |       "model_module": "@jupyter-widgets/controls",
1434 |       "model_module_version": "1.5.0",
1435 |       "model_name": "FloatProgressModel",
1436 |       "state": {
1437 |        "_dom_classes": [],
1438 |        "_model_module": "@jupyter-widgets/controls",
1439 |        "_model_module_version": "1.5.0",
1440 |        "_model_name": "FloatProgressModel",
1441 |        "_view_count": null,
1442 |        "_view_module": "@jupyter-widgets/controls",
1443 |        "_view_module_version": "1.5.0",
1444 |        "_view_name": "ProgressView",
1445 |        "bar_style": "success",
1446 |        "description": "Downloading: 100%",
1447 |        "description_tooltip": null,
1448 |        "layout": "IPY_MODEL_e8db38407d4f4525ba87dafb35c67a7d",
1449 |        "max": 508,
1450 |        "min": 0,
1451 |        "orientation": "horizontal",
1452 |        "style": "IPY_MODEL_ff8421ceeeb84863a79a95137d57e3a7",
1453 |        "value": 508
1454 |       }
1455 |      },
1456 |      "dcc661b801f940139925b83564e8f282": {
1457 |       "model_module": "@jupyter-widgets/base",
1458 |       "model_module_version": "1.2.0",
1459 |       "model_name": "LayoutModel",
1460 |       "state": {
1461 |        "_model_module": "@jupyter-widgets/base",
1462 |        "_model_module_version": "1.2.0",
1463 |        "_model_name": "LayoutModel",
1464 |        "_view_count": null,
1465 |        "_view_module": "@jupyter-widgets/base",
1466 |        "_view_module_version": "1.2.0",
1467 |        "_view_name": "LayoutView",
1468 |        "align_content": null,
1469 |        "align_items": null,
1470 |        "align_self": null,
1471 |        "border": null,
1472 |        "bottom": null,
1473 |        "display": null,
1474 |        "flex": null,
1475 |        "flex_flow": null,
1476 |        "grid_area": null,
1477 |        "grid_auto_columns": null,
1478 |        "grid_auto_flow": null,
1479 |        "grid_auto_rows": null,
1480 |        "grid_column": null,
1481 |        "grid_gap": null,
1482 |        "grid_row": null,
1483 |        "grid_template_areas": null,
1484 |        "grid_template_columns": null,
1485 |        "grid_template_rows": null,
1486 |        "height": null,
1487 |        "justify_content": null,
1488 |        "justify_items": null,
1489 |        "left": null,
1490 |        "margin": null,
1491 |        "max_height": null,
1492 |        "max_width": null,
1493 |        "min_height": null,
1494 |        "min_width": null,
1495 |        "object_fit": null,
1496 |        "object_position": null,
1497 |        "order": null,
1498 |        "overflow": null,
1499 |        "overflow_x": null,
1500 |        "overflow_y": null,
1501 |        "padding": null,
1502 |        "right": null,
1503 |        "top": null,
1504 |        "visibility": null,
1505 |        "width": null
1506 |       }
1507 |      },
1508 |      "dd232800d1994d96816b47b1eb042df7": {
1509 |       "model_module": "@jupyter-widgets/controls",
1510 |       "model_module_version": "1.5.0",
1511 |       "model_name": "HBoxModel",
1512 |       "state": {
1513 |        "_dom_classes": [],
1514 |        "_model_module": "@jupyter-widgets/controls",
1515 |        "_model_module_version": "1.5.0",
1516 |        "_model_name": "HBoxModel",
1517 |        "_view_count": null,
1518 |        "_view_module": "@jupyter-widgets/controls",
1519 |        "_view_module_version": "1.5.0",
1520 |        "_view_name": "HBoxView",
1521 |        "box_style": "",
1522 |        "children": [
1523 |         "IPY_MODEL_ba22ce2585f54900b21f7f31ed15e78a",
1524 |         "IPY_MODEL_82320d113b0b40e1b038d3cf321b3433"
1525 |        ],
1526 |        "layout": "IPY_MODEL_6a940c5ee47e4a0fa6bd17899077b04c"
1527 |       }
1528 |      },
1529 |      "e8db38407d4f4525ba87dafb35c67a7d": {
1530 |       "model_module": "@jupyter-widgets/base",
1531 |       "model_module_version": "1.2.0",
1532 |       "model_name": "LayoutModel",
1533 |       "state": {
1534 |        "_model_module": "@jupyter-widgets/base",
1535 |        "_model_module_version": "1.2.0",
1536 |        "_model_name": "LayoutModel",
1537 |        "_view_count": null,
1538 |        "_view_module": "@jupyter-widgets/base",
1539 |        "_view_module_version": "1.2.0",
1540 |        "_view_name": "LayoutView",
1541 |        "align_content": null,
1542 |        "align_items": null,
1543 |        "align_self": null,
1544 |        "border": null,
1545 |        "bottom": null,
1546 |        "display": null,
1547 |        "flex": null,
1548 |        "flex_flow": null,
1549 |        "grid_area": null,
1550 |        "grid_auto_columns": null,
1551 |        "grid_auto_flow": null,
1552 |        "grid_auto_rows": null,
1553 |        "grid_column": null,
1554 |        "grid_gap": null,
1555 |        "grid_row": null,
1556 |        "grid_template_areas": null,
1557 |        "grid_template_columns": null,
1558 |        "grid_template_rows": null,
1559 |        "height": null,
1560 |        "justify_content": null,
1561 |        "justify_items": null,
1562 |        "left": null,
1563 |        "margin": null,
1564 |        "max_height": null,
1565 |        "max_width": null,
1566 |        "min_height": null,
1567 |        "min_width": null,
1568 |        "object_fit": null,
1569 |        "object_position": null,
1570 |        "order": null,
1571 |        "overflow": null,
1572 |        "overflow_x": null,
1573 |        "overflow_y": null,
1574 |        "padding": null,
1575 |        "right": null,
1576 |        "top": null,
1577 |        "visibility": null,
1578 |        "width": null
1579 |       }
1580 |      },
1581 |      "fb5ba4132e1e455ea0b38556501346c8": {
1582 |       "model_module": "@jupyter-widgets/base",
1583 |       "model_module_version": "1.2.0",
1584 |       "model_name": "LayoutModel",
1585 |       "state": {
1586 |        "_model_module": "@jupyter-widgets/base",
1587 |        "_model_module_version": "1.2.0",
1588 |        "_model_name": "LayoutModel",
1589 |        "_view_count": null,
1590 |        "_view_module": "@jupyter-widgets/base",
1591 |        "_view_module_version": "1.2.0",
1592 |        "_view_name": "LayoutView",
1593 |        "align_content": null,
1594 |        "align_items": null,
1595 |        "align_self": null,
1596 |        "border": null,
1597 |        "bottom": null,
1598 |        "display": null,
1599 |        "flex": null,
1600 |        "flex_flow": null,
1601 |        "grid_area": null,
1602 |        "grid_auto_columns": null,
1603 |        "grid_auto_flow": null,
1604 |        "grid_auto_rows": null,
1605 |        "grid_column": null,
1606 |        "grid_gap": null,
1607 |        "grid_row": null,
1608 |        "grid_template_areas": null,
1609 |        "grid_template_columns": null,
1610 |        "grid_template_rows": null,
1611 |        "height": null,
1612 |        "justify_content": null,
1613 |        "justify_items": null,
1614 |        "left": null,
1615 |        "margin": null,
1616 |        "max_height": null,
1617 |        "max_width": null,
1618 |        "min_height": null,
1619 |        "min_width": null,
1620 |        "object_fit": null,
1621 |        "object_position": null,
1622 |        "order": null,
1623 |        "overflow": null,
1624 |        "overflow_x": null,
1625 |        "overflow_y": null,
1626 |        "padding": null,
1627 |        "right": null,
1628 |        "top": null,
1629 |        "visibility": null,
1630 |        "width": null
1631 |       }
1632 |      },
1633 |      "ff8421ceeeb84863a79a95137d57e3a7": {
1634 |       "model_module": "@jupyter-widgets/controls",
1635 |       "model_module_version": "1.5.0",
1636 |       "model_name": "ProgressStyleModel",
1637 |       "state": {
1638 |        "_model_module": "@jupyter-widgets/controls",
1639 |        "_model_module_version": "1.5.0",
1640 |        "_model_name": "ProgressStyleModel",
1641 |        "_view_count": null,
1642 |        "_view_module": "@jupyter-widgets/base",
1643 |        "_view_module_version": "1.2.0",
1644 |        "_view_name": "StyleView",
1645 |        "bar_color": null,
1646 |        "description_width": "initial"
1647 |       }
1648 |      }
1649 |     },
1650 |     "version_major": 2,
1651 |     "version_minor": 0
1652 |    }
1653 |   }
1654 |  },
1655 |  "nbformat": 4,
1656 |  "nbformat_minor": 4
1657 | }
1658 | 


--------------------------------------------------------------------------------
/flaubert_train_predict.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {
   7 |     "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
   8 |     "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
   9 |    },
  10 |    "outputs": [],
  11 |    "source": [
  12 |     "import os, time, datetime\n",
  13 |     "import numpy as np\n",
  14 |     "import pandas as pd\n",
  15 |     "from tqdm import tqdm\n",
  16 |     "import random\n",
  17 |     "import logging\n",
  18 |     "tqdm.pandas()\n",
  19 |     "import seaborn as sns\n",
  20 |     "from sklearn.model_selection import train_test_split\n",
  21 |     "\n",
  22 |     "#NN Packages\n",
  23 |     "import torch\n",
  24 |     "import torch.nn as nn\n",
  25 |     "from torch.utils.data import TensorDataset, random_split,DataLoader, RandomSampler, SequentialSampler\n",
  26 |     "\n",
  27 |     "logger = logging.getLogger(__name__)\n",
  28 |     "\n",
  29 |     "\n",
  30 |     "if torch.cuda.is_available():    \n",
  31 |     "\n",
  32 |     "    # Tell PyTorch to use the GPU.    \n",
  33 |     "    device = torch.device(\"cuda\")\n",
  34 |     "\n",
  35 |     "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
  36 |     "\n",
  37 |     "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
  38 |     "\n",
  39 |     "# If not...\n",
  40 |     "else:\n",
  41 |     "    print('No GPU available, using the CPU instead.')\n",
  42 |     "    device = torch.device(\"cpu\")"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "markdown",
  47 |    "metadata": {},
  48 |    "source": [
  49 |     "# Processing text data"
  50 |    ]
  51 |   },
  52 |   {
  53 |    "cell_type": "code",
  54 |    "execution_count": null,
  55 |    "metadata": {},
  56 |    "outputs": [],
  57 |    "source": [
  58 |     "def format_time(elapsed):\n",
  59 |     "    '''\n",
  60 |     "    Takes a time in seconds and returns a string hh:mm:ss\n",
  61 |     "    '''\n",
  62 |     "    # Round to the nearest second.\n",
  63 |     "    elapsed_rounded = int(round((elapsed)))\n",
  64 |     "    \n",
  65 |     "    # Format as hh:mm:ss\n",
  66 |     "    return str(datetime.timedelta(seconds=elapsed_rounded))\n",
  67 |     "\n",
  68 |     "class SigirPreprocess():\n",
  69 |     "    \n",
  70 |     "    \n",
  71 |     "    def __init__(self, text_data_path):\n",
  72 |     "        self.text_data_path = text_data_path\n",
  73 |     "        self.train = None\n",
  74 |     "        self.dict_code_to_id = {}\n",
  75 |     "        self.dict_id_to_code = {}\n",
  76 |     "        self.list_tags = {}\n",
  77 |     "        self.sentences = []\n",
  78 |     "        self.labels = []\n",
  79 |     "        self.text_col = None\n",
  80 |     "        self.X_test = None\n",
  81 |     "    \n",
  82 |     "    \n",
  83 |     "    def prepare_data(self ):\n",
  84 |     "        \n",
  85 |     "        #loading the train data and test data\n",
  86 |     "        catalog_eng = pd.read_csv(self.text_data_path+\"data/catalog_english_taxonomy.tsv\",sep=\"\\t\")\n",
  87 |     "        X_train= pd.read_csv(self.text_data_path+\"data/X_train.tsv\",sep=\"\\t\")\n",
  88 |     "        Y_train= pd.read_csv(self.text_data_path+\"data/Y_train.tsv\",sep=\"\\t\")\n",
  89 |     "        self.list_tags = list(Y_train['Prdtypecode'].unique())\n",
  90 |     "        \n",
  91 |     "        for i,tag in enumerate(self.list_tags):\n",
  92 |     "            self.dict_code_to_id[tag] = i \n",
  93 |     "            self.dict_id_to_code[i]=tag\n",
  94 |     "        \n",
  95 |     "        #map \n",
  96 |     "        Y_train['labels']=Y_train['Prdtypecode'].map(self.dict_code_to_id)\n",
  97 |     "        \n",
  98 |     "        #merge the train\n",
  99 |     "        train=pd.merge(left=X_train,right=Y_train,\n",
 100 |     "               how='left',left_on=['Integer_id','Image_id','Product_id'],\n",
 101 |     "               right_on=['Integer_id','Image_id','Product_id'])\n",
 102 |     "        prod_map=pd.Series(catalog_eng['Top level category'].values,\n",
 103 |     "                           index=catalog_eng['Prdtypecode']).to_dict()\n",
 104 |     "        \n",
 105 |     "        #creating the mapping\n",
 106 |     "        train['product'] = train['Prdtypecode'].map(prod_map)\n",
 107 |     "        train['title_len']=train['Title'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
 108 |     "        train['desc_len']=train['Description'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n",
 109 |     "        train['title_desc_len']=train['title_len'] + train['desc_len']\n",
 110 |     "        train.loc[train['Description'].isnull(), 'Description'] = \" \"\n",
 111 |     "        train['title_desc'] = train['Title'] + \" \" + train['Description']\n",
 112 |     "        \n",
 113 |     "        self.train = train\n",
 114 |     "        \n",
 115 |     "    def get_sentences(self, text_col, remove_null_rows=False):\n",
 116 |     "        self.text_col = text_col\n",
 117 |     "        if remove_null_rows==True:\n",
 118 |     "            new_train = self.train[self.train[text_col].notnull()]\n",
 119 |     "\n",
 120 |     "        else:\n",
 121 |     "            new_train = self.train.copy()\n",
 122 |     "            \n",
 123 |     "        self.sentences = new_train[text_col].values\n",
 124 |     "        self.labels = new_train['labels'].values\n",
 125 |     "    \n",
 126 |     "    def prepare_test(self, text_col):\n",
 127 |     "        X_test=pd.read_csv(self.text_data_path+\"data/x_test_task1_phase1.tsv\",sep=\"\\t\")\n",
 128 |     "        X_test.loc[X_test['Description'].isnull(), 'Description'] = \" \"\n",
 129 |     "        X_test['title_desc'] = X_test['Title'] + \" \" + X_test['Description']\n",
 130 |     "        self.X_test = X_test\n",
 131 |     "        self.test_sentences = X_test[text_col].values\n",
 132 |     "        "
 133 |    ]
 134 |   },
 135 |   {
 136 |    "cell_type": "code",
 137 |    "execution_count": null,
 138 |    "metadata": {},
 139 |    "outputs": [],
 140 |    "source": [
 141 |     "text_col = 'title_desc'\n",
 142 |     "max_len = 256\n",
 143 |     "val_size = 0.1\n",
 144 |     "\n",
 145 |     "# model_str_dict = {'c':'camembert',\n",
 146 |     "#                  'f':'flaubert'}\n",
 147 |     "# # 'f' for flaubert & 'c' for camembert\n",
 148 |     "# case='f' \n",
 149 |     "# model_str = model_str_dict[case]"
 150 |    ]
 151 |   },
 152 |   {
 153 |    "cell_type": "code",
 154 |    "execution_count": null,
 155 |    "metadata": {},
 156 |    "outputs": [],
 157 |    "source": [
 158 |     "Preprocess = SigirPreprocess(\"/../input/textphase1/\")\n",
 159 |     "Preprocess.prepare_data()\n",
 160 |     "Preprocess.get_sentences(text_col, True)"
 161 |    ]
 162 |   },
 163 |   {
 164 |    "cell_type": "code",
 165 |    "execution_count": null,
 166 |    "metadata": {},
 167 |    "outputs": [],
 168 |    "source": [
 169 |     "sentences = Preprocess.sentences\n",
 170 |     "labels = Preprocess.labels\n",
 171 |     "print(\"Total number of sentences:{}, labels:{}\".format(len(sentences), len(labels)))"
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": null,
 177 |    "metadata": {},
 178 |    "outputs": [],
 179 |    "source": [
 180 |     "# sns.countplot(x='product', data=self.train)\n",
 181 |     "# sns.countplot(x='Prdtypecode', data=self.train)\n",
 182 |     "# sns.distplot(Preprocess.train['title_len'])\n",
 183 |     "# sns.distplot(Preprocess.train['title_desc_len'])\n",
 184 |     "# np.percentile(Preprocess.train['title_desc_len'], 99)"
 185 |    ]
 186 |   },
 187 |   {
 188 |    "cell_type": "code",
 189 |    "execution_count": null,
 190 |    "metadata": {},
 191 |    "outputs": [],
 192 |    "source": [
 193 |     "len(Preprocess.dict_code_to_id)"
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "code",
 198 |    "execution_count": null,
 199 |    "metadata": {},
 200 |    "outputs": [],
 201 |    "source": [
 202 |     "from transformers import XLMForSequenceClassification\n",
 203 |     "from transformers import FlaubertModel, FlaubertTokenizer,FlaubertForSequenceClassification,AdamW, FlaubertConfig \n",
 204 |     "from torch.nn import Dropout,Conv1d, Linear\n",
 205 |     "from transformers.modeling_utils import SequenceSummary"
 206 |    ]
 207 |   },
 208 |   {
 209 |    "cell_type": "code",
 210 |    "execution_count": null,
 211 |    "metadata": {},
 212 |    "outputs": [],
 213 |    "source": [
 214 |     "# a1 = sentences[0]\n",
 215 |     "# max_len = 40\n",
 216 |     "# modelname = 'flaubert-base-cased'\n",
 217 |     "# tokenizer = FlaubertTokenizer.from_pretrained(modelname, do_lowercase=False)\n",
 218 |     "\n",
 219 |     "# encoded_dict = tokenizer.encode_plus(\n",
 220 |     "#                             a1,                      # Sentence to encode.\n",
 221 |     "#                             add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
 222 |     "#                             max_length = max_len,           # Pad & truncate all sentences.\n",
 223 |     "#                             pad_to_max_length = True,\n",
 224 |     "#                             return_attention_mask = True,   # Construct attn. masks.\n",
 225 |     "#                             return_tensors = 'pt',     # Return pytorch tensors.\n",
 226 |     "#                        )\n",
 227 |     "\n",
 228 |     "\n",
 229 |     "# iid = encoded_dict['input_ids']\n",
 230 |     "# mask = encoded_dict['attention_mask']\n",
 231 |     "\n",
 232 |     "# iid,mask\n",
 233 |     "\n",
 234 |     "# # modelname = 'flaubert-base-cased'\n",
 235 |     "\n",
 236 |     "# model = CustFlaubertForSequenceClassification.from_pretrained(\n",
 237 |     "#         modelname, # Use the 12-layer BERT model, with an uncased vocab.\n",
 238 |     "#         # num_labels = len(Preprocess.dict_code_to_id), # The number of output labels--2 for binary classification.\n",
 239 |     "#         # You can increase this for multi-class tasks.   \n",
 240 |     "#         output_attentions = False, # Whether the model returns attentions weights.\n",
 241 |     "#         output_hidden_states = False, # Whether the model returns all hidden-states.\n",
 242 |     "# )\n",
 243 |     "\n",
 244 |     "# outputs, embed1 = model(iid, token_type_ids=None, attention_mask=mask, \n",
 245 |     "# )"
 246 |    ]
 247 |   },
 248 |   {
 249 |    "cell_type": "code",
 250 |    "execution_count": null,
 251 |    "metadata": {},
 252 |    "outputs": [],
 253 |    "source": [
 254 |     "# #max length after tokenization\n",
 255 |     "# _max_len = 0\n",
 256 |     "# # For every sentence...\n",
 257 |     "# for sent in tqdm(sentences):\n",
 258 |     "\n",
 259 |     "#     # Tokenize the text and add `[CLS]` and `[SEP]` tokens.\n",
 260 |     "#     input_ids = tokenizer.encode(sent, add_special_tokens=True)\n",
 261 |     "\n",
 262 |     "#     # Update the maximum sentence length.\n",
 263 |     "#     _max_len = max(_max_len, len(input_ids))\n",
 264 |     "\n",
 265 |     "# print('Max sentence length: ', _max_len)"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": null,
 271 |    "metadata": {},
 272 |    "outputs": [],
 273 |    "source": [
 274 |     "modelname = 'flaubert-base-cased'\n",
 275 |     "tokenizer = FlaubertTokenizer.from_pretrained(modelname, do_lowercase=False)"
 276 |    ]
 277 |   },
 278 |   {
 279 |    "cell_type": "code",
 280 |    "execution_count": null,
 281 |    "metadata": {},
 282 |    "outputs": [],
 283 |    "source": [
 284 |     "#function to prepare input for model training\n",
 285 |     "def prep_input(sentences,labels, max_len):\n",
 286 |     "    input_ids = []\n",
 287 |     "    attention_masks = []\n",
 288 |     "\n",
 289 |     "    # For every sentence...\n",
 290 |     "    for sent in tqdm(sentences):\n",
 291 |     "        # `encode_plus` will:\n",
 292 |     "        #   (1) Tokenize the sentence.\n",
 293 |     "        #   (2) Prepend the `[CLS]` token to the start.\n",
 294 |     "        #   (3) Append the `[SEP]` token to the end.\n",
 295 |     "        #   (4) Map tokens to their IDs.\n",
 296 |     "        #   (5) Pad or truncate the sentence to `max_length`\n",
 297 |     "        #   (6) Create attention masks for [PAD] tokens.\n",
 298 |     "        encoded_dict = tokenizer.encode_plus(\n",
 299 |     "                            sent,                      # Sentence to encode.\n",
 300 |     "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
 301 |     "                            max_length = max_len,           # Pad & truncate all sentences.\n",
 302 |     "                            pad_to_max_length = True,\n",
 303 |     "                            return_attention_mask = True,   # Construct attn. masks.\n",
 304 |     "                            return_tensors = 'pt',     # Return pytorch tensors.\n",
 305 |     "                       )\n",
 306 |     "\n",
 307 |     "        # Add the encoded sentence to the list.    \n",
 308 |     "        input_ids.append(encoded_dict['input_ids'])\n",
 309 |     "\n",
 310 |     "        # And its attention mask (simply differentiates padding from non-padding).\n",
 311 |     "        attention_masks.append(encoded_dict['attention_mask'])\n",
 312 |     "\n",
 313 |     "    # Convert the lists into tensors.\n",
 314 |     "    input_ids = torch.cat(input_ids, dim=0)\n",
 315 |     "    attention_masks = torch.cat(attention_masks, dim=0)\n",
 316 |     "    if labels is not None:\n",
 317 |     "        labels = torch.tensor(labels)\n",
 318 |     "        return input_ids,attention_masks,labels\n",
 319 |     "    else:\n",
 320 |     "        return input_ids,attention_masks\n",
 321 |     "    "
 322 |    ]
 323 |   },
 324 |   {
 325 |    "cell_type": "code",
 326 |    "execution_count": null,
 327 |    "metadata": {},
 328 |    "outputs": [],
 329 |    "source": [
 330 |     "input_ids,attention_masks,labels=prep_input(sentences,labels, max_len=max_len)\n",
 331 |     "# print('Original: ', sentences[0])\n",
 332 |     "# print('Token IDs:', input_ids[0])"
 333 |    ]
 334 |   },
 335 |   {
 336 |    "cell_type": "code",
 337 |    "execution_count": null,
 338 |    "metadata": {},
 339 |    "outputs": [],
 340 |    "source": [
 341 |     "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n",
 342 |     "\n",
 343 |     "\n",
 344 |     "\n",
 345 |     "#Validation split\n",
 346 |     "tr_inputs, val_inputs, tr_labels, val_labels = train_test_split(input_ids, labels,stratify=labels,\n",
 347 |     "                                                            random_state=2020, test_size=val_size)\n",
 348 |     "\n",
 349 |     "\n",
 350 |     "tr_masks, val_masks, u,v =   train_test_split(attention_masks, labels,stratify=labels,\n",
 351 |     "                                             random_state=2020, test_size=val_size)\n",
 352 |     "\n",
 353 |     "\n",
 354 |     "train_dataset=TensorDataset(tr_inputs, tr_masks, tr_labels)\n",
 355 |     "val_dataset=TensorDataset(val_inputs, val_masks, val_labels)\n",
 356 |     "train_sampler = RandomSampler(train_dataset) \n",
 357 |     "valid_sampler = SequentialSampler(val_dataset)\n",
 358 |     "\n",
 359 |     "\n",
 360 |     "# The DataLoader needs to know our batch size for training, so we specify it \n",
 361 |     "# here. For fine-tuning BERT on a specific task, the authors recommend a batch \n",
 362 |     "# size of 16 or 32.\n",
 363 |     "batch_size = 32\n",
 364 |     "\n",
 365 |     "# Create the DataLoaders for our training and validation sets.\n",
 366 |     "# We'll take training samples in random order. \n",
 367 |     "train_dataloader = DataLoader(\n",
 368 |     "            train_dataset,  # The training samples.\n",
 369 |     "            sampler = train_sampler, # Select batches randomly\n",
 370 |     "            batch_size = batch_size # Trains with this batch size.\n",
 371 |     "        )\n",
 372 |     "\n",
 373 |     "# For validation the order doesn't matter, so we'll just read them sequentially.\n",
 374 |     "validation_dataloader = DataLoader(\n",
 375 |     "            val_dataset, # The validation samples.\n",
 376 |     "            sampler = valid_sampler, # Pull out batches sequentially.\n",
 377 |     "            batch_size = batch_size # Evaluate with this batch size.\n",
 378 |     "        )"
 379 |    ]
 380 |   },
 381 |   {
 382 |    "cell_type": "code",
 383 |    "execution_count": null,
 384 |    "metadata": {},
 385 |    "outputs": [],
 386 |    "source": [
 387 |     "# Function to calculate the accuracy of our predictions vs labels\n",
 388 |     "def flat_accuracy(preds, labels):\n",
 389 |     "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
 390 |     "    labels_flat = labels.flatten()\n",
 391 |     "    return np.sum(pred_flat == labels_flat) / len(labels_flat)"
 392 |    ]
 393 |   },
 394 |   {
 395 |    "cell_type": "code",
 396 |    "execution_count": null,
 397 |    "metadata": {},
 398 |    "outputs": [],
 399 |    "source": [
 400 |     "num_classes = 27"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": null,
 406 |    "metadata": {},
 407 |    "outputs": [],
 408 |    "source": [
 409 |     "class vec_output_FlaubertForSequenceClassification(FlaubertModel):\n",
 410 |     "    \n",
 411 |     "    config_class = FlaubertConfig\n",
 412 |     "    \n",
 413 |     "\n",
 414 |     "    def __init__(self, config):\n",
 415 |     "        super().__init__(config)\n",
 416 |     "        self.transformer = FlaubertModel(config)\n",
 417 |     "        self.sequence_summary = SequenceSummary(config)\n",
 418 |     "        self.init_weights()\n",
 419 |     "        self.dropout =  torch.nn.Dropout(0.1)\n",
 420 |     "        self.classifier = torch.nn.Linear(config.hidden_size, num_classes)\n",
 421 |     "\n",
 422 |     "\n",
 423 |     "    def forward(\n",
 424 |     "        self,\n",
 425 |     "        input_ids=None,\n",
 426 |     "        attention_mask=None,\n",
 427 |     "        langs=None,\n",
 428 |     "        token_type_ids=None,\n",
 429 |     "        position_ids=None,\n",
 430 |     "        lengths=None,\n",
 431 |     "        cache=None,\n",
 432 |     "        head_mask=None,\n",
 433 |     "        inputs_embeds=None,\n",
 434 |     "        labels=None,\n",
 435 |     "    ):\n",
 436 |     "        \n",
 437 |     "        \n",
 438 |     "        transformer_outputs = self.transformer(\n",
 439 |     "            input_ids,\n",
 440 |     "            attention_mask=attention_mask,\n",
 441 |     "            langs=langs,\n",
 442 |     "            token_type_ids=token_type_ids,\n",
 443 |     "            position_ids=position_ids,\n",
 444 |     "            lengths=lengths,\n",
 445 |     "            cache=cache,\n",
 446 |     "            head_mask=head_mask,\n",
 447 |     "            inputs_embeds=inputs_embeds,\n",
 448 |     "        )\n",
 449 |     "\n",
 450 |     "        #output = self.dropout(output)\n",
 451 |     "        output = transformer_outputs[0]\n",
 452 |     "        vec = output[:,0]\n",
 453 |     "        \n",
 454 |     "        \n",
 455 |     "        #logits\n",
 456 |     "        dense = self.dropout(vec)\n",
 457 |     "        \n",
 458 |     "        #classifier\n",
 459 |     "        logits = self.classifier(dense)\n",
 460 |     "        \n",
 461 |     "        outputs = (logits,) + transformer_outputs[1:]  # Keep new_mems and attention/hidden states if they are here\n",
 462 |     "       \n",
 463 |     "        \n",
 464 |     "        return outputs,dense"
 465 |    ]
 466 |   },
 467 |   {
 468 |    "cell_type": "code",
 469 |    "execution_count": null,
 470 |    "metadata": {},
 471 |    "outputs": [],
 472 |    "source": [
 473 |     "len(Preprocess.dict_code_to_id)"
 474 |    ]
 475 |   },
 476 |   {
 477 |    "cell_type": "code",
 478 |    "execution_count": null,
 479 |    "metadata": {},
 480 |    "outputs": [],
 481 |    "source": [
 482 |     "modelname = 'flaubert-base-cased'\n",
 483 |     "\n",
 484 |     "model = vec_output_FlaubertForSequenceClassification.from_pretrained(\n",
 485 |     "        modelname, # Use the 12-layer BERT model, with an uncased vocab.\n",
 486 |     "        num_labels = len(Preprocess.dict_code_to_id), # The number of output labels--2 for binary classification.\n",
 487 |     "        # You can increase this for multi-class tasks.   \n",
 488 |     "        output_attentions = False, # Whether the model returns attentions weights.\n",
 489 |     "        output_hidden_states = False, # Whether the model returns all hidden-states.\n",
 490 |     ")\n",
 491 |     "\n",
 492 |     "model.cuda()"
 493 |    ]
 494 |   },
 495 |   {
 496 |    "cell_type": "code",
 497 |    "execution_count": null,
 498 |    "metadata": {},
 499 |    "outputs": [],
 500 |    "source": [
 501 |     "optimizer = AdamW(model.parameters(),\n",
 502 |     "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
 503 |     "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
 504 |     "                )\n"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "code",
 509 |    "execution_count": null,
 510 |    "metadata": {},
 511 |    "outputs": [],
 512 |    "source": [
 513 |     "from transformers import get_linear_schedule_with_warmup\n",
 514 |     "# Number of training epochs. The BERT authors recommend between 2 and 4. \n",
 515 |     "# We chose to run for 4, but we'll see later that this may be over-fitting the\n",
 516 |     "# training data.\n",
 517 |     "epochs = 12\n",
 518 |     "\n",
 519 |     "# Total number of training steps is [number of batches] x [number of epochs]. \n",
 520 |     "# (Note that this is not the same as the number of training samples).\n",
 521 |     "total_steps = len(train_dataloader) * epochs\n",
 522 |     "\n",
 523 |     "# Create the learning rate scheduler.\n",
 524 |     "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
 525 |     "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
 526 |     "                                            num_training_steps = total_steps)"
 527 |    ]
 528 |   },
 529 |   {
 530 |    "cell_type": "code",
 531 |    "execution_count": null,
 532 |    "metadata": {},
 533 |    "outputs": [],
 534 |    "source": [
 535 |     "import torch.nn as nn\n",
 536 |     "loss_criterion = nn.CrossEntropyLoss()\n"
 537 |    ]
 538 |   },
 539 |   {
 540 |    "cell_type": "code",
 541 |    "execution_count": null,
 542 |    "metadata": {},
 543 |    "outputs": [],
 544 |    "source": [
 545 |     "from sklearn.metrics import f1_score\n",
 546 |     "# This training code is based on the `run_glue.py` script here:\n",
 547 |     "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
 548 |     "\n",
 549 |     "# Set the seed value all over the place to make this reproducible.\n",
 550 |     "seed_val = 42\n",
 551 |     "\n",
 552 |     "random.seed(seed_val)\n",
 553 |     "np.random.seed(seed_val)\n",
 554 |     "torch.manual_seed(seed_val)\n",
 555 |     "torch.cuda.manual_seed_all(seed_val)\n",
 556 |     "\n",
 557 |     "# We'll store a number of quantities such as training and validation loss, \n",
 558 |     "# validation accuracy, and timings.\n",
 559 |     "training_stats = []\n",
 560 |     "\n",
 561 |     "# Measure the total training time for the whole run.\n",
 562 |     "total_t0 = time.time()\n",
 563 |     "\n",
 564 |     "\n",
 565 |     "# For each epoch...\n",
 566 |     "for epoch_i in range(0, epochs):\n",
 567 |     "    \n",
 568 |     "    # ========================================\n",
 569 |     "    #               Training\n",
 570 |     "    # ========================================\n",
 571 |     "    \n",
 572 |     "    # Perform one full pass over the training set.\n",
 573 |     "\n",
 574 |     "    print(\"\")\n",
 575 |     "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
 576 |     "    print('Training...')\n",
 577 |     "    \n",
 578 |     "    #tr and val\n",
 579 |     "    vec_output_tr = []\n",
 580 |     "    vec_output_val =[]\n",
 581 |     "\n",
 582 |     "    # Measure how long the training epoch takes.\n",
 583 |     "    t0 = time.time()\n",
 584 |     "\n",
 585 |     "    # Reset the total loss for this epoch.\n",
 586 |     "    total_train_loss = 0\n",
 587 |     "\n",
 588 |     "    # Put the model into training mode. Don't be mislead--the call to \n",
 589 |     "    # `train` just changes the *mode*, it doesn't *perform* the training.\n",
 590 |     "    # `dropout` and `batchnorm` layers behave differently during training\n",
 591 |     "    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
 592 |     "    best_f1 = 0\n",
 593 |     "    model.train()\n",
 594 |     "\n",
 595 |     "    # For each batch of training data...\n",
 596 |     "    for step, batch in enumerate(train_dataloader):\n",
 597 |     "\n",
 598 |     "        # Progress update every 40 batches.\n",
 599 |     "        if step % 40 == 0 and not step == 0:\n",
 600 |     "            # Calculate elapsed time in minutes.\n",
 601 |     "            elapsed = format_time(time.time() - t0)\n",
 602 |     "            \n",
 603 |     "            # Report progress.\n",
 604 |     "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
 605 |     "\n",
 606 |     "        # Unpack this training batch from our dataloader. \n",
 607 |     "        #\n",
 608 |     "        # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
 609 |     "        # `to` method.\n",
 610 |     "        #\n",
 611 |     "        # `batch` contains three pytorch tensors:\n",
 612 |     "        #   [0]: input ids \n",
 613 |     "        #   [1]: attention masks\n",
 614 |     "        #   [2]: labels \n",
 615 |     "        b_input_ids = batch[0].to(device)\n",
 616 |     "        b_input_mask = batch[1].to(device)\n",
 617 |     "        b_labels = batch[2].to(device)\n",
 618 |     "\n",
 619 |     "        # Always clear any previously calculated gradients before performing a\n",
 620 |     "        # backward pass. PyTorch doesn't do this automatically because \n",
 621 |     "        # accumulating the gradients is \"convenient while training RNNs\". \n",
 622 |     "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
 623 |     "        model.zero_grad()        \n",
 624 |     "\n",
 625 |     "        # Perform a forward pass (evaluate the model on this training batch).\n",
 626 |     "        # The documentation for this `model` function is here: \n",
 627 |     "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
 628 |     "        # It returns different numbers of parameters depending on what arguments\n",
 629 |     "        # arge given and what flags are set. For our useage here, it returns\n",
 630 |     "        # the loss (because we provided labels) and the \"logits\"--the model\n",
 631 |     "        # outputs prior to activation.\n",
 632 |     "        logits,vec = model(b_input_ids, \n",
 633 |     "                     token_type_ids=None, \n",
 634 |     "                     attention_mask=b_input_mask\n",
 635 |     "                    )\n",
 636 |     "        #new\n",
 637 |     "        logits = logits[0]\n",
 638 |     "        \n",
 639 |     "        #Defining the loss\n",
 640 |     "        loss = loss_criterion(logits, b_labels)\n",
 641 |     "        \n",
 642 |     "        #saving the features_tr\n",
 643 |     "        vec = vec.detach().cpu().numpy()\n",
 644 |     "        vec_output_tr.extend(vec)\n",
 645 |     "        \n",
 646 |     "        # Accumulate the training loss over all of the batches so that we can\n",
 647 |     "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
 648 |     "        # single value; the `.item()` function just returns the Python value \n",
 649 |     "        # from the tensor.\n",
 650 |     "        total_train_loss += loss.item()\n",
 651 |     "\n",
 652 |     "        # Perform a backward pass to calculate the gradients.\n",
 653 |     "        loss.backward()\n",
 654 |     "\n",
 655 |     "        # Clip the norm of the gradients to 1.0.\n",
 656 |     "        # This is to help prevent the \"exploding gradients\" problem.\n",
 657 |     "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
 658 |     "\n",
 659 |     "        # Update parameters and take a step using the computed gradient.\n",
 660 |     "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
 661 |     "        # modified based on their gradients, the learning rate, etc.\n",
 662 |     "        optimizer.step()\n",
 663 |     "\n",
 664 |     "        # Update the learning rate.\n",
 665 |     "        scheduler.step()\n",
 666 |     "        \n",
 667 |     "        \n",
 668 |     "        \n",
 669 |     "\n",
 670 |     "    # Calculate the average loss over all of the batches.\n",
 671 |     "    avg_train_loss = total_train_loss / len(train_dataloader)            \n",
 672 |     "    \n",
 673 |     "    # Measure how long this epoch took.\n",
 674 |     "    training_time = format_time(time.time() - t0)\n",
 675 |     "\n",
 676 |     "    print(\"\")\n",
 677 |     "    print(\"  Average training loss: {0:.2f} \".format(avg_train_loss))\n",
 678 |     "    print(\"  Training epcoh took: {:} \".format(training_time))\n",
 679 |     "        \n",
 680 |     "    # ========================================\n",
 681 |     "    #               Validation\n",
 682 |     "    # ========================================\n",
 683 |     "    # After the completion of each training epoch, measure our performance on\n",
 684 |     "    # our validation set.\n",
 685 |     "\n",
 686 |     "    print(\"\")\n",
 687 |     "    print(\"Running Validation...\")\n",
 688 |     "\n",
 689 |     "    t0 = time.time()\n",
 690 |     "\n",
 691 |     "    # Put the model in evaluation mode--the dropout layers behave differently\n",
 692 |     "    # during evaluation.\n",
 693 |     "    model.eval()\n",
 694 |     "\n",
 695 |     "    # Tracking variables \n",
 696 |     "    total_eval_accuracy = 0\n",
 697 |     "    total_eval_loss = 0\n",
 698 |     "    nb_eval_steps = 0\n",
 699 |     "    predictions=[]\n",
 700 |     "    true_labels=[]\n",
 701 |     "    \n",
 702 |     "\n",
 703 |     "    # Evaluate data for one epoch\n",
 704 |     "    for batch in validation_dataloader:\n",
 705 |     "        \n",
 706 |     "        # Unpack this training batch from our dataloader. \n",
 707 |     "        #\n",
 708 |     "        # As we unpack the batch, we'll also copy each tensor to the GPU using \n",
 709 |     "        # the `to` method.\n",
 710 |     "        #\n",
 711 |     "        # `batch` contains three pytorch tensors:\n",
 712 |     "        #   [0]: input ids \n",
 713 |     "        #   [1]: attention masks\n",
 714 |     "        #   [2]: labels \n",
 715 |     "        b_input_ids = batch[0].to(device)\n",
 716 |     "        b_input_mask = batch[1].to(device)\n",
 717 |     "        b_labels = batch[2].to(device)\n",
 718 |     "        \n",
 719 |     "        # Tell pytorch not to bother with constructing the compute graph during\n",
 720 |     "        # the forward pass, since this is only needed for backprop (training).\n",
 721 |     "        with torch.no_grad():        \n",
 722 |     "\n",
 723 |     "            # Forward pass, calculate logit predictions.\n",
 724 |     "            # token_type_ids is the same as the \"segment ids\", which \n",
 725 |     "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
 726 |     "            # The documentation for this `model` function is here: \n",
 727 |     "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
 728 |     "            # Get the \"logits\" output by the model. The \"logits\" are the output\n",
 729 |     "            # values prior to applying an activation function like the softmax.\n",
 730 |     "            logits,vec = model(b_input_ids, \n",
 731 |     "                           token_type_ids=None, \n",
 732 |     "                           attention_mask=b_input_mask\n",
 733 |     "                           )\n",
 734 |     "            \n",
 735 |     "        #new\n",
 736 |     "        logits = logits[0]\n",
 737 |     "        \n",
 738 |     "        #defining the val loss\n",
 739 |     "        loss = loss_criterion(logits, b_labels)\n",
 740 |     "        \n",
 741 |     "        \n",
 742 |     "        # Accumulate the validation loss.\n",
 743 |     "        total_eval_loss += loss.item()\n",
 744 |     "\n",
 745 |     "        # Move logits and labels to CPU\n",
 746 |     "        logits = logits.detach().cpu().numpy()\n",
 747 |     "\n",
 748 |     "        # Move logits and labels to CPU\n",
 749 |     "        predicted_labels=np.argmax(logits,axis=1)\n",
 750 |     "        predictions.extend(predicted_labels)\n",
 751 |     "        label_ids = b_labels.to('cpu').numpy()\n",
 752 |     "        true_labels.extend(label_ids)\n",
 753 |     "        \n",
 754 |     "        #saving the features_tr\n",
 755 |     "        vec = vec.detach().cpu().numpy()\n",
 756 |     "        vec_output_val.extend(vec)\n",
 757 |     "        \n",
 758 |     "\n",
 759 |     "        # Calculate the accuracy for this batch of test sentences, and\n",
 760 |     "        # accumulate it over all batches.\n",
 761 |     "        total_eval_accuracy += flat_accuracy(logits, label_ids)\n",
 762 |     "        \n",
 763 |     "\n",
 764 |     "    # Report the final accuracy for this validation run.\n",
 765 |     "    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n",
 766 |     "    print(\"  Accuracy: {0:.2f}\".format(avg_val_accuracy))\n",
 767 |     "\n",
 768 |     "    # Calculate the average loss over all of the batches.\n",
 769 |     "    avg_val_loss = total_eval_loss / len(validation_dataloader)\n",
 770 |     "    \n",
 771 |     "    # Measure how long the validation run took.\n",
 772 |     "    validation_time = format_time(time.time() - t0)\n",
 773 |     "    \n",
 774 |     "    print(\"  Validation Loss: {0:.2f}\".format(avg_val_loss))\n",
 775 |     "    print(\"  Validation took: {:}\".format(validation_time))\n",
 776 |     "    print(\"Validation F1-Score: {}\".format(f1_score(true_labels,predictions,average='macro')))\n",
 777 |     "    curr_f1=f1_score(true_labels,predictions,average='macro')\n",
 778 |     "    if curr_f1 > best_f1:\n",
 779 |     "        best_f1=curr_f1\n",
 780 |     "        torch.save(model.state_dict(), 'best_model.pt')\n",
 781 |     "        np.save('best_vec_train.npy',vec_output_tr)\n",
 782 |     "        np.save('best_vec_val.npy',vec_output_val)\n",
 783 |     "    # Record all statistics from this epoch.\n",
 784 |     "#     training_stats.append(\n",
 785 |     "#         {\n",
 786 |     "#             'epoch': epoch_i + 1,\n",
 787 |     "#             'Training Loss': avg_train_loss,\n",
 788 |     "#             'Valid. Loss': avg_val_loss,\n",
 789 |     "#             'Valid. Accur.': avg_val_accuracy,\n",
 790 |     "#             'Training Time': training_time,\n",
 791 |     "#             'Validation Time': validation_time\n",
 792 |     "#         }\n",
 793 |     "#     )\n",
 794 |     "\n",
 795 |     "print(\"\")\n",
 796 |     "print(\"Training complete!\")\n",
 797 |     "\n",
 798 |     "print(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "code",
 803 |    "execution_count": null,
 804 |    "metadata": {},
 805 |    "outputs": [],
 806 |    "source": [
 807 |     "# Save model\n",
 808 |     "# try:\n",
 809 |     "#     model_state = {'model': model,\n",
 810 |     "#               'state_dict': model.state_dict(),\n",
 811 |     "#               'optimizer' : optimizer.state_dict()}\n",
 812 |     "\n",
 813 |     "#     torch.save(model_state, 'saved_model.pth')\n",
 814 |     "# except:\n",
 815 |     "#     print('Error in saving model')"
 816 |    ]
 817 |   },
 818 |   {
 819 |    "cell_type": "markdown",
 820 |    "metadata": {},
 821 |    "source": [
 822 |     "**Test model on unseen data**"
 823 |    ]
 824 |   },
 825 |   {
 826 |    "cell_type": "markdown",
 827 |    "metadata": {},
 828 |    "source": [
 829 |     "# PREDICTIONS"
 830 |    ]
 831 |   },
 832 |   {
 833 |    "cell_type": "markdown",
 834 |    "metadata": {},
 835 |    "source": [
 836 |     "**UNCOMMENT THE BELOW CELL IF TRAINING IS NOT PERFORMED IN THIS RUN**"
 837 |    ]
 838 |   },
 839 |   {
 840 |    "cell_type": "code",
 841 |    "execution_count": null,
 842 |    "metadata": {},
 843 |    "outputs": [],
 844 |    "source": [
 845 |     "model_path = '/../working/best_model.pt'"
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "code",
 850 |    "execution_count": null,
 851 |    "metadata": {},
 852 |    "outputs": [],
 853 |    "source": [
 854 |     "## Change the **model path** accordingly\n",
 855 |     "# model_str = 'flaubert'\n",
 856 |     "# model_path_dict = {'camembert':'/../input/camembertvinodh/saved_model.pth',\n",
 857 |     "#                   'flaubert':'/../input/flaubertekansh/saved_model.pth'}\n",
 858 |     "\n",
 859 |     "# model_path = model_path_dict[model_str]\n",
 860 |     "checkpoint = torch.load(model_path)\n",
 861 |     "# model = checkpoint['model']\n",
 862 |     "model.load_state_dict(checkpoint)"
 863 |    ]
 864 |   },
 865 |   {
 866 |    "cell_type": "code",
 867 |    "execution_count": null,
 868 |    "metadata": {},
 869 |    "outputs": [],
 870 |    "source": [
 871 |     "def predict_pyt(model, prediction_dataloader):\n",
 872 |     "    \"\"\"\n",
 873 |     "    model: pytorch model\n",
 874 |     "    prediction_dataloader: DataLoader object for which the predictions has to be made.\n",
 875 |     "    return:\n",
 876 |     "        predictions:- Direct predicted labels\n",
 877 |     "        softmax_logits:- logits which are normalized with softmax on output\"\"\"\n",
 878 |     "    \n",
 879 |     "    # Put model in evaluation mode\n",
 880 |     "    model.eval()\n",
 881 |     "\n",
 882 |     "    # Tracking variables \n",
 883 |     "    predictions = []\n",
 884 |     "    softmax_logits=[]\n",
 885 |     "    vec_outputs = []\n",
 886 |     "    \n",
 887 |     "    # Predict \n",
 888 |     "    for batch in prediction_dataloader:\n",
 889 |     "        \n",
 890 |     "        # Add batch to GPU\n",
 891 |     "        batch = tuple(t.to(device) for t in batch)\n",
 892 |     "        # Unpack the inputs from our dataloader\n",
 893 |     "        try:\n",
 894 |     "            b_input_ids, b_input_mask = batch\n",
 895 |     "        except ValueError:\n",
 896 |     "            b_input_ids, b_input_mask, _ = batch\n",
 897 |     "        # Telling the model not to compute or store gradients, saving memory and \n",
 898 |     "        # speeding up prediction\n",
 899 |     "        with torch.no_grad():\n",
 900 |     "          # Forward pass, calculate logit predictions\n",
 901 |     "            logits,vec = model(b_input_ids, token_type_ids=None, \n",
 902 |     "                          attention_mask=b_input_mask)\n",
 903 |     "            \n",
 904 |     "            logits = logits[0]\n",
 905 |     "\n",
 906 |     "        \n",
 907 |     "    #----- Add softmax---     \n",
 908 |     "        m = nn.Softmax(dim=1)\n",
 909 |     "    # #     input = torch.randn(2, 3)\n",
 910 |     "        output = m(logits)\n",
 911 |     "    #-------#------\n",
 912 |     "        \n",
 913 |     "        # Move logits and labels to CPU\n",
 914 |     "        logits = logits.detach().cpu().numpy()\n",
 915 |     "        predicted_labels=np.argmax(logits,axis=1)\n",
 916 |     "        predictions.extend(predicted_labels)\n",
 917 |     "        softmax_logits.extend(output)\n",
 918 |     "        \n",
 919 |     "        #vec_outputs saving\n",
 920 |     "        vec = vec.detach().cpu().numpy()\n",
 921 |     "        vec_outputs.extend(vec)\n",
 922 |     "\n",
 923 |     "    print('DONE')\n",
 924 |     "    return predictions, softmax_logits , vec_outputs\n",
 925 |     "\n",
 926 |     "def predict_wrapper(model, sentences, max_len=max_len, batch_size = batch_size ):\n",
 927 |     "    \"\"\"\n",
 928 |     "    Wrapper to create DataLoader object and predict, \n",
 929 |     "    this is if model and sentences are passed\"\"\"\n",
 930 |     "    input_ids,attention_masks=prep_input(sentences,labels=None, max_len=max_len)\n",
 931 |     "    prediction_data = TensorDataset(input_ids, attention_masks)\n",
 932 |     "    prediction_sampler = SequentialSampler(prediction_data)\n",
 933 |     "    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
 934 |     "    return predict_pyt(model, prediction_dataloader)"
 935 |    ]
 936 |   },
 937 |   {
 938 |    "cell_type": "code",
 939 |    "execution_count": null,
 940 |    "metadata": {},
 941 |    "outputs": [],
 942 |    "source": [
 943 |     "## Prepare the test dataset\n",
 944 |     "batch_size = 32  \n",
 945 |     "\n",
 946 |     "Preprocess.prepare_test(text_col)\n",
 947 |     "test_sentences = Preprocess.test_sentences\n",
 948 |     "X_test_phase1= Preprocess.X_test"
 949 |    ]
 950 |   },
 951 |   {
 952 |    "cell_type": "code",
 953 |    "execution_count": null,
 954 |    "metadata": {},
 955 |    "outputs": [],
 956 |    "source": [
 957 |     "# Predictions of validation set which is randomly separated from train dataset\n",
 958 |     "start = time.time()\n",
 959 |     "predictions, val_softmax_logits , vec_outputs= predict_pyt(model, validation_dataloader)\n",
 960 |     "val_softmax_logits = np.array([ten.detach().cpu().numpy() for ten in val_softmax_logits])\n",
 961 |     "np.save('validation_set_softmax_logits.npy',val_softmax_logits)\n",
 962 |     "print('Time Taken Predict for val set: {:}'.format(format_time(time.time() - start)))"
 963 |    ]
 964 |   },
 965 |   {
 966 |    "cell_type": "code",
 967 |    "execution_count": null,
 968 |    "metadata": {},
 969 |    "outputs": [],
 970 |    "source": [
 971 |     "## Predictions of test dataset \n",
 972 |     "\n",
 973 |     "start = time.time()\n",
 974 |     "predictions, softmax_logits , vec_outputs = predict_wrapper(model, test_sentences)\n",
 975 |     "\n",
 976 |     "#saving\n",
 977 |     "np.save('best_vec_test.npy',vec_outputs)\n",
 978 |     "softmax_logits = np.array([ten.detach().cpu().numpy() for ten in softmax_logits])\n",
 979 |     "np.save('X_test_phase1_softmax_logits.npy',softmax_logits)\n",
 980 |     "print('length of predictions {}'.format(len(predictions)))\n",
 981 |     "print('Time Taken Predict for val set: {:}'.format(format_time(time.time() - start) ))"
 982 |    ]
 983 |   },
 984 |   {
 985 |    "cell_type": "code",
 986 |    "execution_count": null,
 987 |    "metadata": {},
 988 |    "outputs": [],
 989 |    "source": [
 990 |     "X_test_phase1['prediction_model']= predictions\n",
 991 |     "X_test_phase1['Prdtypecode']=X_test_phase1['prediction_model'].map(Preprocess.dict_id_to_code)\n",
 992 |     "print(X_test_phase1['Prdtypecode'].value_counts())\n",
 993 |     "X_test_phase1=X_test_phase1.drop(['prediction_model','Title','Description'],axis=1)"
 994 |    ]
 995 |   },
 996 |   {
 997 |    "cell_type": "code",
 998 |    "execution_count": null,
 999 |    "metadata": {},
1000 |    "outputs": [],
1001 |    "source": [
1002 |     "X_test_phase1.to_csv('y_test_task1_phase1_pred.tsv',sep='\\t',index=False)"
1003 |    ]
1004 |   }
1005 |  ],
1006 |  "metadata": {
1007 |   "kernelspec": {
1008 |    "display_name": "Python 3",
1009 |    "language": "python",
1010 |    "name": "python3"
1011 |   },
1012 |   "language_info": {
1013 |    "codemirror_mode": {
1014 |     "name": "ipython",
1015 |     "version": 3
1016 |    },
1017 |    "file_extension": ".py",
1018 |    "mimetype": "text/x-python",
1019 |    "name": "python",
1020 |    "nbconvert_exporter": "python",
1021 |    "pygments_lexer": "ipython3",
1022 |    "version": "3.7.7"
1023 |   },
1024 |   "toc": {
1025 |    "base_numbering": 1,
1026 |    "nav_menu": {},
1027 |    "number_sections": true,
1028 |    "sideBar": true,
1029 |    "skip_h1_title": false,
1030 |    "title_cell": "Table of Contents",
1031 |    "title_sidebar": "Contents",
1032 |    "toc_cell": false,
1033 |    "toc_position": {},
1034 |    "toc_section_display": true,
1035 |    "toc_window_display": false
1036 |   },
1037 |   "widgets": {
1038 |    "application/vnd.jupyter.widget-state+json": {
1039 |     "state": {
1040 |      "06d2301a3d34440eb19a887fb51d562c": {
1041 |       "model_module": "@jupyter-widgets/controls",
1042 |       "model_module_version": "1.5.0",
1043 |       "model_name": "FloatProgressModel",
1044 |       "state": {
1045 |        "_dom_classes": [],
1046 |        "_model_module": "@jupyter-widgets/controls",
1047 |        "_model_module_version": "1.5.0",
1048 |        "_model_name": "FloatProgressModel",
1049 |        "_view_count": null,
1050 |        "_view_module": "@jupyter-widgets/controls",
1051 |        "_view_module_version": "1.5.0",
1052 |        "_view_name": "ProgressView",
1053 |        "bar_style": "success",
1054 |        "description": "Downloading: 100%",
1055 |        "description_tooltip": null,
1056 |        "layout": "IPY_MODEL_18fe6f38ca234379a17e44cd1fad50d4",
1057 |        "max": 553238687,
1058 |        "min": 0,
1059 |        "orientation": "horizontal",
1060 |        "style": "IPY_MODEL_54ca2b8f204b4760bccb27572ff7b74a",
1061 |        "value": 553238687
1062 |       }
1063 |      },
1064 |      "0f7e8f4e75fc4889a2dd464588c0516d": {
1065 |       "model_module": "@jupyter-widgets/controls",
1066 |       "model_module_version": "1.5.0",
1067 |       "model_name": "HBoxModel",
1068 |       "state": {
1069 |        "_dom_classes": [],
1070 |        "_model_module": "@jupyter-widgets/controls",
1071 |        "_model_module_version": "1.5.0",
1072 |        "_model_name": "HBoxModel",
1073 |        "_view_count": null,
1074 |        "_view_module": "@jupyter-widgets/controls",
1075 |        "_view_module_version": "1.5.0",
1076 |        "_view_name": "HBoxView",
1077 |        "box_style": "",
1078 |        "children": [
1079 |         "IPY_MODEL_33dc04e218864811a4fe0c62ca737d83",
1080 |         "IPY_MODEL_274606aec69a461f8c4259316b86c4af"
1081 |        ],
1082 |        "layout": "IPY_MODEL_f5c9c24c01114e168e90ed555fb9f05a"
1083 |       }
1084 |      },
1085 |      "12a4548fdad44ea181868776c7616455": {
1086 |       "model_module": "@jupyter-widgets/base",
1087 |       "model_module_version": "1.2.0",
1088 |       "model_name": "LayoutModel",
1089 |       "state": {
1090 |        "_model_module": "@jupyter-widgets/base",
1091 |        "_model_module_version": "1.2.0",
1092 |        "_model_name": "LayoutModel",
1093 |        "_view_count": null,
1094 |        "_view_module": "@jupyter-widgets/base",
1095 |        "_view_module_version": "1.2.0",
1096 |        "_view_name": "LayoutView",
1097 |        "align_content": null,
1098 |        "align_items": null,
1099 |        "align_self": null,
1100 |        "border": null,
1101 |        "bottom": null,
1102 |        "display": null,
1103 |        "flex": null,
1104 |        "flex_flow": null,
1105 |        "grid_area": null,
1106 |        "grid_auto_columns": null,
1107 |        "grid_auto_flow": null,
1108 |        "grid_auto_rows": null,
1109 |        "grid_column": null,
1110 |        "grid_gap": null,
1111 |        "grid_row": null,
1112 |        "grid_template_areas": null,
1113 |        "grid_template_columns": null,
1114 |        "grid_template_rows": null,
1115 |        "height": null,
1116 |        "justify_content": null,
1117 |        "justify_items": null,
1118 |        "left": null,
1119 |        "margin": null,
1120 |        "max_height": null,
1121 |        "max_width": null,
1122 |        "min_height": null,
1123 |        "min_width": null,
1124 |        "object_fit": null,
1125 |        "object_position": null,
1126 |        "order": null,
1127 |        "overflow": null,
1128 |        "overflow_x": null,
1129 |        "overflow_y": null,
1130 |        "padding": null,
1131 |        "right": null,
1132 |        "top": null,
1133 |        "visibility": null,
1134 |        "width": null
1135 |       }
1136 |      },
1137 |      "18fe6f38ca234379a17e44cd1fad50d4": {
1138 |       "model_module": "@jupyter-widgets/base",
1139 |       "model_module_version": "1.2.0",
1140 |       "model_name": "LayoutModel",
1141 |       "state": {
1142 |        "_model_module": "@jupyter-widgets/base",
1143 |        "_model_module_version": "1.2.0",
1144 |        "_model_name": "LayoutModel",
1145 |        "_view_count": null,
1146 |        "_view_module": "@jupyter-widgets/base",
1147 |        "_view_module_version": "1.2.0",
1148 |        "_view_name": "LayoutView",
1149 |        "align_content": null,
1150 |        "align_items": null,
1151 |        "align_self": null,
1152 |        "border": null,
1153 |        "bottom": null,
1154 |        "display": null,
1155 |        "flex": null,
1156 |        "flex_flow": null,
1157 |        "grid_area": null,
1158 |        "grid_auto_columns": null,
1159 |        "grid_auto_flow": null,
1160 |        "grid_auto_rows": null,
1161 |        "grid_column": null,
1162 |        "grid_gap": null,
1163 |        "grid_row": null,
1164 |        "grid_template_areas": null,
1165 |        "grid_template_columns": null,
1166 |        "grid_template_rows": null,
1167 |        "height": null,
1168 |        "justify_content": null,
1169 |        "justify_items": null,
1170 |        "left": null,
1171 |        "margin": null,
1172 |        "max_height": null,
1173 |        "max_width": null,
1174 |        "min_height": null,
1175 |        "min_width": null,
1176 |        "object_fit": null,
1177 |        "object_position": null,
1178 |        "order": null,
1179 |        "overflow": null,
1180 |        "overflow_x": null,
1181 |        "overflow_y": null,
1182 |        "padding": null,
1183 |        "right": null,
1184 |        "top": null,
1185 |        "visibility": null,
1186 |        "width": null
1187 |       }
1188 |      },
1189 |      "26cc77465c0e4f30b086bf93a81f9386": {
1190 |       "model_module": "@jupyter-widgets/controls",
1191 |       "model_module_version": "1.5.0",
1192 |       "model_name": "DescriptionStyleModel",
1193 |       "state": {
1194 |        "_model_module": "@jupyter-widgets/controls",
1195 |        "_model_module_version": "1.5.0",
1196 |        "_model_name": "DescriptionStyleModel",
1197 |        "_view_count": null,
1198 |        "_view_module": "@jupyter-widgets/base",
1199 |        "_view_module_version": "1.2.0",
1200 |        "_view_name": "StyleView",
1201 |        "description_width": ""
1202 |       }
1203 |      },
1204 |      "274606aec69a461f8c4259316b86c4af": {
1205 |       "model_module": "@jupyter-widgets/controls",
1206 |       "model_module_version": "1.5.0",
1207 |       "model_name": "HTMLModel",
1208 |       "state": {
1209 |        "_dom_classes": [],
1210 |        "_model_module": "@jupyter-widgets/controls",
1211 |        "_model_module_version": "1.5.0",
1212 |        "_model_name": "HTMLModel",
1213 |        "_view_count": null,
1214 |        "_view_module": "@jupyter-widgets/controls",
1215 |        "_view_module_version": "1.5.0",
1216 |        "_view_name": "HTMLView",
1217 |        "description": "",
1218 |        "description_tooltip": null,
1219 |        "layout": "IPY_MODEL_ed03788fd9b14684b1d339664f56bfd5",
1220 |        "placeholder": "​",
1221 |        "style": "IPY_MODEL_5fb92f13f2a5410b84cc9a7573e7da0a",
1222 |        "value": " 896k/896k [00:01&lt;00:00, 770kB/s]"
1223 |       }
1224 |      },
1225 |      "2a6633db6b2946d6a6c8a66065e394cd": {
1226 |       "model_module": "@jupyter-widgets/base",
1227 |       "model_module_version": "1.2.0",
1228 |       "model_name": "LayoutModel",
1229 |       "state": {
1230 |        "_model_module": "@jupyter-widgets/base",
1231 |        "_model_module_version": "1.2.0",
1232 |        "_model_name": "LayoutModel",
1233 |        "_view_count": null,
1234 |        "_view_module": "@jupyter-widgets/base",
1235 |        "_view_module_version": "1.2.0",
1236 |        "_view_name": "LayoutView",
1237 |        "align_content": null,
1238 |        "align_items": null,
1239 |        "align_self": null,
1240 |        "border": null,
1241 |        "bottom": null,
1242 |        "display": null,
1243 |        "flex": null,
1244 |        "flex_flow": null,
1245 |        "grid_area": null,
1246 |        "grid_auto_columns": null,
1247 |        "grid_auto_flow": null,
1248 |        "grid_auto_rows": null,
1249 |        "grid_column": null,
1250 |        "grid_gap": null,
1251 |        "grid_row": null,
1252 |        "grid_template_areas": null,
1253 |        "grid_template_columns": null,
1254 |        "grid_template_rows": null,
1255 |        "height": null,
1256 |        "justify_content": null,
1257 |        "justify_items": null,
1258 |        "left": null,
1259 |        "margin": null,
1260 |        "max_height": null,
1261 |        "max_width": null,
1262 |        "min_height": null,
1263 |        "min_width": null,
1264 |        "object_fit": null,
1265 |        "object_position": null,
1266 |        "order": null,
1267 |        "overflow": null,
1268 |        "overflow_x": null,
1269 |        "overflow_y": null,
1270 |        "padding": null,
1271 |        "right": null,
1272 |        "top": null,
1273 |        "visibility": null,
1274 |        "width": null
1275 |       }
1276 |      },
1277 |      "33dc04e218864811a4fe0c62ca737d83": {
1278 |       "model_module": "@jupyter-widgets/controls",
1279 |       "model_module_version": "1.5.0",
1280 |       "model_name": "FloatProgressModel",
1281 |       "state": {
1282 |        "_dom_classes": [],
1283 |        "_model_module": "@jupyter-widgets/controls",
1284 |        "_model_module_version": "1.5.0",
1285 |        "_model_name": "FloatProgressModel",
1286 |        "_view_count": null,
1287 |        "_view_module": "@jupyter-widgets/controls",
1288 |        "_view_module_version": "1.5.0",
1289 |        "_view_name": "ProgressView",
1290 |        "bar_style": "success",
1291 |        "description": "Downloading: 100%",
1292 |        "description_tooltip": null,
1293 |        "layout": "IPY_MODEL_703da9466c0241519229161cb6ec5d87",
1294 |        "max": 895731,
1295 |        "min": 0,
1296 |        "orientation": "horizontal",
1297 |        "style": "IPY_MODEL_55d928f692d04a008a85a77abf0e46a0",
1298 |        "value": 895731
1299 |       }
1300 |      },
1301 |      "432e4a857a5d4151a4d1b5bc7b6bb4fb": {
1302 |       "model_module": "@jupyter-widgets/controls",
1303 |       "model_module_version": "1.5.0",
1304 |       "model_name": "DescriptionStyleModel",
1305 |       "state": {
1306 |        "_model_module": "@jupyter-widgets/controls",
1307 |        "_model_module_version": "1.5.0",
1308 |        "_model_name": "DescriptionStyleModel",
1309 |        "_view_count": null,
1310 |        "_view_module": "@jupyter-widgets/base",
1311 |        "_view_module_version": "1.2.0",
1312 |        "_view_name": "StyleView",
1313 |        "description_width": ""
1314 |       }
1315 |      },
1316 |      "4c498d5d31d543ad9d4f63df61ce9332": {
1317 |       "model_module": "@jupyter-widgets/controls",
1318 |       "model_module_version": "1.5.0",
1319 |       "model_name": "HBoxModel",
1320 |       "state": {
1321 |        "_dom_classes": [],
1322 |        "_model_module": "@jupyter-widgets/controls",
1323 |        "_model_module_version": "1.5.0",
1324 |        "_model_name": "HBoxModel",
1325 |        "_view_count": null,
1326 |        "_view_module": "@jupyter-widgets/controls",
1327 |        "_view_module_version": "1.5.0",
1328 |        "_view_name": "HBoxView",
1329 |        "box_style": "",
1330 |        "children": [
1331 |         "IPY_MODEL_7be53379597948ed83e132d9014abf53",
1332 |         "IPY_MODEL_ddb8a6c765504d6ea10daf2da84a5c83"
1333 |        ],
1334 |        "layout": "IPY_MODEL_d8565e4e46924bbfbabe7b3eb8df8b79"
1335 |       }
1336 |      },
1337 |      "54ca2b8f204b4760bccb27572ff7b74a": {
1338 |       "model_module": "@jupyter-widgets/controls",
1339 |       "model_module_version": "1.5.0",
1340 |       "model_name": "ProgressStyleModel",
1341 |       "state": {
1342 |        "_model_module": "@jupyter-widgets/controls",
1343 |        "_model_module_version": "1.5.0",
1344 |        "_model_name": "ProgressStyleModel",
1345 |        "_view_count": null,
1346 |        "_view_module": "@jupyter-widgets/base",
1347 |        "_view_module_version": "1.2.0",
1348 |        "_view_name": "StyleView",
1349 |        "bar_color": null,
1350 |        "description_width": "initial"
1351 |       }
1352 |      },
1353 |      "55d928f692d04a008a85a77abf0e46a0": {
1354 |       "model_module": "@jupyter-widgets/controls",
1355 |       "model_module_version": "1.5.0",
1356 |       "model_name": "ProgressStyleModel",
1357 |       "state": {
1358 |        "_model_module": "@jupyter-widgets/controls",
1359 |        "_model_module_version": "1.5.0",
1360 |        "_model_name": "ProgressStyleModel",
1361 |        "_view_count": null,
1362 |        "_view_module": "@jupyter-widgets/base",
1363 |        "_view_module_version": "1.2.0",
1364 |        "_view_name": "StyleView",
1365 |        "bar_color": null,
1366 |        "description_width": "initial"
1367 |       }
1368 |      },
1369 |      "5bc9b6ada49a4642a1cba622c93f8b62": {
1370 |       "model_module": "@jupyter-widgets/base",
1371 |       "model_module_version": "1.2.0",
1372 |       "model_name": "LayoutModel",
1373 |       "state": {
1374 |        "_model_module": "@jupyter-widgets/base",
1375 |        "_model_module_version": "1.2.0",
1376 |        "_model_name": "LayoutModel",
1377 |        "_view_count": null,
1378 |        "_view_module": "@jupyter-widgets/base",
1379 |        "_view_module_version": "1.2.0",
1380 |        "_view_name": "LayoutView",
1381 |        "align_content": null,
1382 |        "align_items": null,
1383 |        "align_self": null,
1384 |        "border": null,
1385 |        "bottom": null,
1386 |        "display": null,
1387 |        "flex": null,
1388 |        "flex_flow": null,
1389 |        "grid_area": null,
1390 |        "grid_auto_columns": null,
1391 |        "grid_auto_flow": null,
1392 |        "grid_auto_rows": null,
1393 |        "grid_column": null,
1394 |        "grid_gap": null,
1395 |        "grid_row": null,
1396 |        "grid_template_areas": null,
1397 |        "grid_template_columns": null,
1398 |        "grid_template_rows": null,
1399 |        "height": null,
1400 |        "justify_content": null,
1401 |        "justify_items": null,
1402 |        "left": null,
1403 |        "margin": null,
1404 |        "max_height": null,
1405 |        "max_width": null,
1406 |        "min_height": null,
1407 |        "min_width": null,
1408 |        "object_fit": null,
1409 |        "object_position": null,
1410 |        "order": null,
1411 |        "overflow": null,
1412 |        "overflow_x": null,
1413 |        "overflow_y": null,
1414 |        "padding": null,
1415 |        "right": null,
1416 |        "top": null,
1417 |        "visibility": null,
1418 |        "width": null
1419 |       }
1420 |      },
1421 |      "5fb92f13f2a5410b84cc9a7573e7da0a": {
1422 |       "model_module": "@jupyter-widgets/controls",
1423 |       "model_module_version": "1.5.0",
1424 |       "model_name": "DescriptionStyleModel",
1425 |       "state": {
1426 |        "_model_module": "@jupyter-widgets/controls",
1427 |        "_model_module_version": "1.5.0",
1428 |        "_model_name": "DescriptionStyleModel",
1429 |        "_view_count": null,
1430 |        "_view_module": "@jupyter-widgets/base",
1431 |        "_view_module_version": "1.2.0",
1432 |        "_view_name": "StyleView",
1433 |        "description_width": ""
1434 |       }
1435 |      },
1436 |      "67f9de54d6e5434190bd07b7151d23b7": {
1437 |       "model_module": "@jupyter-widgets/controls",
1438 |       "model_module_version": "1.5.0",
1439 |       "model_name": "FloatProgressModel",
1440 |       "state": {
1441 |        "_dom_classes": [],
1442 |        "_model_module": "@jupyter-widgets/controls",
1443 |        "_model_module_version": "1.5.0",
1444 |        "_model_name": "FloatProgressModel",
1445 |        "_view_count": null,
1446 |        "_view_module": "@jupyter-widgets/controls",
1447 |        "_view_module_version": "1.5.0",
1448 |        "_view_name": "ProgressView",
1449 |        "bar_style": "success",
1450 |        "description": "Downloading: 100%",
1451 |        "description_tooltip": null,
1452 |        "layout": "IPY_MODEL_edcc338999ac45feaab03a86e2af75a9",
1453 |        "max": 1496,
1454 |        "min": 0,
1455 |        "orientation": "horizontal",
1456 |        "style": "IPY_MODEL_f575006dc6624157bfb408cced4e6ae6",
1457 |        "value": 1496
1458 |       }
1459 |      },
1460 |      "68cf808ab7e1428fa9acf6a9fd435b49": {
1461 |       "model_module": "@jupyter-widgets/base",
1462 |       "model_module_version": "1.2.0",
1463 |       "model_name": "LayoutModel",
1464 |       "state": {
1465 |        "_model_module": "@jupyter-widgets/base",
1466 |        "_model_module_version": "1.2.0",
1467 |        "_model_name": "LayoutModel",
1468 |        "_view_count": null,
1469 |        "_view_module": "@jupyter-widgets/base",
1470 |        "_view_module_version": "1.2.0",
1471 |        "_view_name": "LayoutView",
1472 |        "align_content": null,
1473 |        "align_items": null,
1474 |        "align_self": null,
1475 |        "border": null,
1476 |        "bottom": null,
1477 |        "display": null,
1478 |        "flex": null,
1479 |        "flex_flow": null,
1480 |        "grid_area": null,
1481 |        "grid_auto_columns": null,
1482 |        "grid_auto_flow": null,
1483 |        "grid_auto_rows": null,
1484 |        "grid_column": null,
1485 |        "grid_gap": null,
1486 |        "grid_row": null,
1487 |        "grid_template_areas": null,
1488 |        "grid_template_columns": null,
1489 |        "grid_template_rows": null,
1490 |        "height": null,
1491 |        "justify_content": null,
1492 |        "justify_items": null,
1493 |        "left": null,
1494 |        "margin": null,
1495 |        "max_height": null,
1496 |        "max_width": null,
1497 |        "min_height": null,
1498 |        "min_width": null,
1499 |        "object_fit": null,
1500 |        "object_position": null,
1501 |        "order": null,
1502 |        "overflow": null,
1503 |        "overflow_x": null,
1504 |        "overflow_y": null,
1505 |        "padding": null,
1506 |        "right": null,
1507 |        "top": null,
1508 |        "visibility": null,
1509 |        "width": null
1510 |       }
1511 |      },
1512 |      "703da9466c0241519229161cb6ec5d87": {
1513 |       "model_module": "@jupyter-widgets/base",
1514 |       "model_module_version": "1.2.0",
1515 |       "model_name": "LayoutModel",
1516 |       "state": {
1517 |        "_model_module": "@jupyter-widgets/base",
1518 |        "_model_module_version": "1.2.0",
1519 |        "_model_name": "LayoutModel",
1520 |        "_view_count": null,
1521 |        "_view_module": "@jupyter-widgets/base",
1522 |        "_view_module_version": "1.2.0",
1523 |        "_view_name": "LayoutView",
1524 |        "align_content": null,
1525 |        "align_items": null,
1526 |        "align_self": null,
1527 |        "border": null,
1528 |        "bottom": null,
1529 |        "display": null,
1530 |        "flex": null,
1531 |        "flex_flow": null,
1532 |        "grid_area": null,
1533 |        "grid_auto_columns": null,
1534 |        "grid_auto_flow": null,
1535 |        "grid_auto_rows": null,
1536 |        "grid_column": null,
1537 |        "grid_gap": null,
1538 |        "grid_row": null,
1539 |        "grid_template_areas": null,
1540 |        "grid_template_columns": null,
1541 |        "grid_template_rows": null,
1542 |        "height": null,
1543 |        "justify_content": null,
1544 |        "justify_items": null,
1545 |        "left": null,
1546 |        "margin": null,
1547 |        "max_height": null,
1548 |        "max_width": null,
1549 |        "min_height": null,
1550 |        "min_width": null,
1551 |        "object_fit": null,
1552 |        "object_position": null,
1553 |        "order": null,
1554 |        "overflow": null,
1555 |        "overflow_x": null,
1556 |        "overflow_y": null,
1557 |        "padding": null,
1558 |        "right": null,
1559 |        "top": null,
1560 |        "visibility": null,
1561 |        "width": null
1562 |       }
1563 |      },
1564 |      "7be53379597948ed83e132d9014abf53": {
1565 |       "model_module": "@jupyter-widgets/controls",
1566 |       "model_module_version": "1.5.0",
1567 |       "model_name": "FloatProgressModel",
1568 |       "state": {
1569 |        "_dom_classes": [],
1570 |        "_model_module": "@jupyter-widgets/controls",
1571 |        "_model_module_version": "1.5.0",
1572 |        "_model_name": "FloatProgressModel",
1573 |        "_view_count": null,
1574 |        "_view_module": "@jupyter-widgets/controls",
1575 |        "_view_module_version": "1.5.0",
1576 |        "_view_name": "ProgressView",
1577 |        "bar_style": "success",
1578 |        "description": "Downloading: 100%",
1579 |        "description_tooltip": null,
1580 |        "layout": "IPY_MODEL_91e5603df33e4f6fad3bc0b8fe67cb47",
1581 |        "max": 1561415,
1582 |        "min": 0,
1583 |        "orientation": "horizontal",
1584 |        "style": "IPY_MODEL_b4807859ba084927b1c01ad25559e790",
1585 |        "value": 1561415
1586 |       }
1587 |      },
1588 |      "91e35ac677a045c49b3f7dbf243b2e6a": {
1589 |       "model_module": "@jupyter-widgets/base",
1590 |       "model_module_version": "1.2.0",
1591 |       "model_name": "LayoutModel",
1592 |       "state": {
1593 |        "_model_module": "@jupyter-widgets/base",
1594 |        "_model_module_version": "1.2.0",
1595 |        "_model_name": "LayoutModel",
1596 |        "_view_count": null,
1597 |        "_view_module": "@jupyter-widgets/base",
1598 |        "_view_module_version": "1.2.0",
1599 |        "_view_name": "LayoutView",
1600 |        "align_content": null,
1601 |        "align_items": null,
1602 |        "align_self": null,
1603 |        "border": null,
1604 |        "bottom": null,
1605 |        "display": null,
1606 |        "flex": null,
1607 |        "flex_flow": null,
1608 |        "grid_area": null,
1609 |        "grid_auto_columns": null,
1610 |        "grid_auto_flow": null,
1611 |        "grid_auto_rows": null,
1612 |        "grid_column": null,
1613 |        "grid_gap": null,
1614 |        "grid_row": null,
1615 |        "grid_template_areas": null,
1616 |        "grid_template_columns": null,
1617 |        "grid_template_rows": null,
1618 |        "height": null,
1619 |        "justify_content": null,
1620 |        "justify_items": null,
1621 |        "left": null,
1622 |        "margin": null,
1623 |        "max_height": null,
1624 |        "max_width": null,
1625 |        "min_height": null,
1626 |        "min_width": null,
1627 |        "object_fit": null,
1628 |        "object_position": null,
1629 |        "order": null,
1630 |        "overflow": null,
1631 |        "overflow_x": null,
1632 |        "overflow_y": null,
1633 |        "padding": null,
1634 |        "right": null,
1635 |        "top": null,
1636 |        "visibility": null,
1637 |        "width": null
1638 |       }
1639 |      },
1640 |      "91e5603df33e4f6fad3bc0b8fe67cb47": {
1641 |       "model_module": "@jupyter-widgets/base",
1642 |       "model_module_version": "1.2.0",
1643 |       "model_name": "LayoutModel",
1644 |       "state": {
1645 |        "_model_module": "@jupyter-widgets/base",
1646 |        "_model_module_version": "1.2.0",
1647 |        "_model_name": "LayoutModel",
1648 |        "_view_count": null,
1649 |        "_view_module": "@jupyter-widgets/base",
1650 |        "_view_module_version": "1.2.0",
1651 |        "_view_name": "LayoutView",
1652 |        "align_content": null,
1653 |        "align_items": null,
1654 |        "align_self": null,
1655 |        "border": null,
1656 |        "bottom": null,
1657 |        "display": null,
1658 |        "flex": null,
1659 |        "flex_flow": null,
1660 |        "grid_area": null,
1661 |        "grid_auto_columns": null,
1662 |        "grid_auto_flow": null,
1663 |        "grid_auto_rows": null,
1664 |        "grid_column": null,
1665 |        "grid_gap": null,
1666 |        "grid_row": null,
1667 |        "grid_template_areas": null,
1668 |        "grid_template_columns": null,
1669 |        "grid_template_rows": null,
1670 |        "height": null,
1671 |        "justify_content": null,
1672 |        "justify_items": null,
1673 |        "left": null,
1674 |        "margin": null,
1675 |        "max_height": null,
1676 |        "max_width": null,
1677 |        "min_height": null,
1678 |        "min_width": null,
1679 |        "object_fit": null,
1680 |        "object_position": null,
1681 |        "order": null,
1682 |        "overflow": null,
1683 |        "overflow_x": null,
1684 |        "overflow_y": null,
1685 |        "padding": null,
1686 |        "right": null,
1687 |        "top": null,
1688 |        "visibility": null,
1689 |        "width": null
1690 |       }
1691 |      },
1692 |      "94ef6e8f88bb498783522af9621bf811": {
1693 |       "model_module": "@jupyter-widgets/controls",
1694 |       "model_module_version": "1.5.0",
1695 |       "model_name": "HTMLModel",
1696 |       "state": {
1697 |        "_dom_classes": [],
1698 |        "_model_module": "@jupyter-widgets/controls",
1699 |        "_model_module_version": "1.5.0",
1700 |        "_model_name": "HTMLModel",
1701 |        "_view_count": null,
1702 |        "_view_module": "@jupyter-widgets/controls",
1703 |        "_view_module_version": "1.5.0",
1704 |        "_view_name": "HTMLView",
1705 |        "description": "",
1706 |        "description_tooltip": null,
1707 |        "layout": "IPY_MODEL_91e35ac677a045c49b3f7dbf243b2e6a",
1708 |        "placeholder": "​",
1709 |        "style": "IPY_MODEL_432e4a857a5d4151a4d1b5bc7b6bb4fb",
1710 |        "value": " 1.50k/1.50k [00:01&lt;00:00, 1.23kB/s]"
1711 |       }
1712 |      },
1713 |      "9f4e9ae9b7fc4e89ac6aa81af567a678": {
1714 |       "model_module": "@jupyter-widgets/controls",
1715 |       "model_module_version": "1.5.0",
1716 |       "model_name": "HBoxModel",
1717 |       "state": {
1718 |        "_dom_classes": [],
1719 |        "_model_module": "@jupyter-widgets/controls",
1720 |        "_model_module_version": "1.5.0",
1721 |        "_model_name": "HBoxModel",
1722 |        "_view_count": null,
1723 |        "_view_module": "@jupyter-widgets/controls",
1724 |        "_view_module_version": "1.5.0",
1725 |        "_view_name": "HBoxView",
1726 |        "box_style": "",
1727 |        "children": [
1728 |         "IPY_MODEL_06d2301a3d34440eb19a887fb51d562c",
1729 |         "IPY_MODEL_b31585a6d0574a0cb973bb3679a8168a"
1730 |        ],
1731 |        "layout": "IPY_MODEL_68cf808ab7e1428fa9acf6a9fd435b49"
1732 |       }
1733 |      },
1734 |      "b31585a6d0574a0cb973bb3679a8168a": {
1735 |       "model_module": "@jupyter-widgets/controls",
1736 |       "model_module_version": "1.5.0",
1737 |       "model_name": "HTMLModel",
1738 |       "state": {
1739 |        "_dom_classes": [],
1740 |        "_model_module": "@jupyter-widgets/controls",
1741 |        "_model_module_version": "1.5.0",
1742 |        "_model_name": "HTMLModel",
1743 |        "_view_count": null,
1744 |        "_view_module": "@jupyter-widgets/controls",
1745 |        "_view_module_version": "1.5.0",
1746 |        "_view_name": "HTMLView",
1747 |        "description": "",
1748 |        "description_tooltip": null,
1749 |        "layout": "IPY_MODEL_12a4548fdad44ea181868776c7616455",
1750 |        "placeholder": "​",
1751 |        "style": "IPY_MODEL_d422f60f5607443da23dba147889e3b7",
1752 |        "value": " 553M/553M [00:16&lt;00:00, 34.0MB/s]"
1753 |       }
1754 |      },
1755 |      "b4807859ba084927b1c01ad25559e790": {
1756 |       "model_module": "@jupyter-widgets/controls",
1757 |       "model_module_version": "1.5.0",
1758 |       "model_name": "ProgressStyleModel",
1759 |       "state": {
1760 |        "_model_module": "@jupyter-widgets/controls",
1761 |        "_model_module_version": "1.5.0",
1762 |        "_model_name": "ProgressStyleModel",
1763 |        "_view_count": null,
1764 |        "_view_module": "@jupyter-widgets/base",
1765 |        "_view_module_version": "1.2.0",
1766 |        "_view_name": "StyleView",
1767 |        "bar_color": null,
1768 |        "description_width": "initial"
1769 |       }
1770 |      },
1771 |      "d422f60f5607443da23dba147889e3b7": {
1772 |       "model_module": "@jupyter-widgets/controls",
1773 |       "model_module_version": "1.5.0",
1774 |       "model_name": "DescriptionStyleModel",
1775 |       "state": {
1776 |        "_model_module": "@jupyter-widgets/controls",
1777 |        "_model_module_version": "1.5.0",
1778 |        "_model_name": "DescriptionStyleModel",
1779 |        "_view_count": null,
1780 |        "_view_module": "@jupyter-widgets/base",
1781 |        "_view_module_version": "1.2.0",
1782 |        "_view_name": "StyleView",
1783 |        "description_width": ""
1784 |       }
1785 |      },
1786 |      "d8565e4e46924bbfbabe7b3eb8df8b79": {
1787 |       "model_module": "@jupyter-widgets/base",
1788 |       "model_module_version": "1.2.0",
1789 |       "model_name": "LayoutModel",
1790 |       "state": {
1791 |        "_model_module": "@jupyter-widgets/base",
1792 |        "_model_module_version": "1.2.0",
1793 |        "_model_name": "LayoutModel",
1794 |        "_view_count": null,
1795 |        "_view_module": "@jupyter-widgets/base",
1796 |        "_view_module_version": "1.2.0",
1797 |        "_view_name": "LayoutView",
1798 |        "align_content": null,
1799 |        "align_items": null,
1800 |        "align_self": null,
1801 |        "border": null,
1802 |        "bottom": null,
1803 |        "display": null,
1804 |        "flex": null,
1805 |        "flex_flow": null,
1806 |        "grid_area": null,
1807 |        "grid_auto_columns": null,
1808 |        "grid_auto_flow": null,
1809 |        "grid_auto_rows": null,
1810 |        "grid_column": null,
1811 |        "grid_gap": null,
1812 |        "grid_row": null,
1813 |        "grid_template_areas": null,
1814 |        "grid_template_columns": null,
1815 |        "grid_template_rows": null,
1816 |        "height": null,
1817 |        "justify_content": null,
1818 |        "justify_items": null,
1819 |        "left": null,
1820 |        "margin": null,
1821 |        "max_height": null,
1822 |        "max_width": null,
1823 |        "min_height": null,
1824 |        "min_width": null,
1825 |        "object_fit": null,
1826 |        "object_position": null,
1827 |        "order": null,
1828 |        "overflow": null,
1829 |        "overflow_x": null,
1830 |        "overflow_y": null,
1831 |        "padding": null,
1832 |        "right": null,
1833 |        "top": null,
1834 |        "visibility": null,
1835 |        "width": null
1836 |       }
1837 |      },
1838 |      "ddb8a6c765504d6ea10daf2da84a5c83": {
1839 |       "model_module": "@jupyter-widgets/controls",
1840 |       "model_module_version": "1.5.0",
1841 |       "model_name": "HTMLModel",
1842 |       "state": {
1843 |        "_dom_classes": [],
1844 |        "_model_module": "@jupyter-widgets/controls",
1845 |        "_model_module_version": "1.5.0",
1846 |        "_model_name": "HTMLModel",
1847 |        "_view_count": null,
1848 |        "_view_module": "@jupyter-widgets/controls",
1849 |        "_view_module_version": "1.5.0",
1850 |        "_view_name": "HTMLView",
1851 |        "description": "",
1852 |        "description_tooltip": null,
1853 |        "layout": "IPY_MODEL_5bc9b6ada49a4642a1cba622c93f8b62",
1854 |        "placeholder": "​",
1855 |        "style": "IPY_MODEL_26cc77465c0e4f30b086bf93a81f9386",
1856 |        "value": " 1.56M/1.56M [00:02&lt;00:00, 629kB/s]"
1857 |       }
1858 |      },
1859 |      "ed03788fd9b14684b1d339664f56bfd5": {
1860 |       "model_module": "@jupyter-widgets/base",
1861 |       "model_module_version": "1.2.0",
1862 |       "model_name": "LayoutModel",
1863 |       "state": {
1864 |        "_model_module": "@jupyter-widgets/base",
1865 |        "_model_module_version": "1.2.0",
1866 |        "_model_name": "LayoutModel",
1867 |        "_view_count": null,
1868 |        "_view_module": "@jupyter-widgets/base",
1869 |        "_view_module_version": "1.2.0",
1870 |        "_view_name": "LayoutView",
1871 |        "align_content": null,
1872 |        "align_items": null,
1873 |        "align_self": null,
1874 |        "border": null,
1875 |        "bottom": null,
1876 |        "display": null,
1877 |        "flex": null,
1878 |        "flex_flow": null,
1879 |        "grid_area": null,
1880 |        "grid_auto_columns": null,
1881 |        "grid_auto_flow": null,
1882 |        "grid_auto_rows": null,
1883 |        "grid_column": null,
1884 |        "grid_gap": null,
1885 |        "grid_row": null,
1886 |        "grid_template_areas": null,
1887 |        "grid_template_columns": null,
1888 |        "grid_template_rows": null,
1889 |        "height": null,
1890 |        "justify_content": null,
1891 |        "justify_items": null,
1892 |        "left": null,
1893 |        "margin": null,
1894 |        "max_height": null,
1895 |        "max_width": null,
1896 |        "min_height": null,
1897 |        "min_width": null,
1898 |        "object_fit": null,
1899 |        "object_position": null,
1900 |        "order": null,
1901 |        "overflow": null,
1902 |        "overflow_x": null,
1903 |        "overflow_y": null,
1904 |        "padding": null,
1905 |        "right": null,
1906 |        "top": null,
1907 |        "visibility": null,
1908 |        "width": null
1909 |       }
1910 |      },
1911 |      "edcc338999ac45feaab03a86e2af75a9": {
1912 |       "model_module": "@jupyter-widgets/base",
1913 |       "model_module_version": "1.2.0",
1914 |       "model_name": "LayoutModel",
1915 |       "state": {
1916 |        "_model_module": "@jupyter-widgets/base",
1917 |        "_model_module_version": "1.2.0",
1918 |        "_model_name": "LayoutModel",
1919 |        "_view_count": null,
1920 |        "_view_module": "@jupyter-widgets/base",
1921 |        "_view_module_version": "1.2.0",
1922 |        "_view_name": "LayoutView",
1923 |        "align_content": null,
1924 |        "align_items": null,
1925 |        "align_self": null,
1926 |        "border": null,
1927 |        "bottom": null,
1928 |        "display": null,
1929 |        "flex": null,
1930 |        "flex_flow": null,
1931 |        "grid_area": null,
1932 |        "grid_auto_columns": null,
1933 |        "grid_auto_flow": null,
1934 |        "grid_auto_rows": null,
1935 |        "grid_column": null,
1936 |        "grid_gap": null,
1937 |        "grid_row": null,
1938 |        "grid_template_areas": null,
1939 |        "grid_template_columns": null,
1940 |        "grid_template_rows": null,
1941 |        "height": null,
1942 |        "justify_content": null,
1943 |        "justify_items": null,
1944 |        "left": null,
1945 |        "margin": null,
1946 |        "max_height": null,
1947 |        "max_width": null,
1948 |        "min_height": null,
1949 |        "min_width": null,
1950 |        "object_fit": null,
1951 |        "object_position": null,
1952 |        "order": null,
1953 |        "overflow": null,
1954 |        "overflow_x": null,
1955 |        "overflow_y": null,
1956 |        "padding": null,
1957 |        "right": null,
1958 |        "top": null,
1959 |        "visibility": null,
1960 |        "width": null
1961 |       }
1962 |      },
1963 |      "f575006dc6624157bfb408cced4e6ae6": {
1964 |       "model_module": "@jupyter-widgets/controls",
1965 |       "model_module_version": "1.5.0",
1966 |       "model_name": "ProgressStyleModel",
1967 |       "state": {
1968 |        "_model_module": "@jupyter-widgets/controls",
1969 |        "_model_module_version": "1.5.0",
1970 |        "_model_name": "ProgressStyleModel",
1971 |        "_view_count": null,
1972 |        "_view_module": "@jupyter-widgets/base",
1973 |        "_view_module_version": "1.2.0",
1974 |        "_view_name": "StyleView",
1975 |        "bar_color": null,
1976 |        "description_width": "initial"
1977 |       }
1978 |      },
1979 |      "f5c9c24c01114e168e90ed555fb9f05a": {
1980 |       "model_module": "@jupyter-widgets/base",
1981 |       "model_module_version": "1.2.0",
1982 |       "model_name": "LayoutModel",
1983 |       "state": {
1984 |        "_model_module": "@jupyter-widgets/base",
1985 |        "_model_module_version": "1.2.0",
1986 |        "_model_name": "LayoutModel",
1987 |        "_view_count": null,
1988 |        "_view_module": "@jupyter-widgets/base",
1989 |        "_view_module_version": "1.2.0",
1990 |        "_view_name": "LayoutView",
1991 |        "align_content": null,
1992 |        "align_items": null,
1993 |        "align_self": null,
1994 |        "border": null,
1995 |        "bottom": null,
1996 |        "display": null,
1997 |        "flex": null,
1998 |        "flex_flow": null,
1999 |        "grid_area": null,
2000 |        "grid_auto_columns": null,
2001 |        "grid_auto_flow": null,
2002 |        "grid_auto_rows": null,
2003 |        "grid_column": null,
2004 |        "grid_gap": null,
2005 |        "grid_row": null,
2006 |        "grid_template_areas": null,
2007 |        "grid_template_columns": null,
2008 |        "grid_template_rows": null,
2009 |        "height": null,
2010 |        "justify_content": null,
2011 |        "justify_items": null,
2012 |        "left": null,
2013 |        "margin": null,
2014 |        "max_height": null,
2015 |        "max_width": null,
2016 |        "min_height": null,
2017 |        "min_width": null,
2018 |        "object_fit": null,
2019 |        "object_position": null,
2020 |        "order": null,
2021 |        "overflow": null,
2022 |        "overflow_x": null,
2023 |        "overflow_y": null,
2024 |        "padding": null,
2025 |        "right": null,
2026 |        "top": null,
2027 |        "visibility": null,
2028 |        "width": null
2029 |       }
2030 |      },
2031 |      "f5d5b2c07f5745538d04f3968c244002": {
2032 |       "model_module": "@jupyter-widgets/controls",
2033 |       "model_module_version": "1.5.0",
2034 |       "model_name": "HBoxModel",
2035 |       "state": {
2036 |        "_dom_classes": [],
2037 |        "_model_module": "@jupyter-widgets/controls",
2038 |        "_model_module_version": "1.5.0",
2039 |        "_model_name": "HBoxModel",
2040 |        "_view_count": null,
2041 |        "_view_module": "@jupyter-widgets/controls",
2042 |        "_view_module_version": "1.5.0",
2043 |        "_view_name": "HBoxView",
2044 |        "box_style": "",
2045 |        "children": [
2046 |         "IPY_MODEL_67f9de54d6e5434190bd07b7151d23b7",
2047 |         "IPY_MODEL_94ef6e8f88bb498783522af9621bf811"
2048 |        ],
2049 |        "layout": "IPY_MODEL_2a6633db6b2946d6a6c8a66065e394cd"
2050 |       }
2051 |      }
2052 |     },
2053 |     "version_major": 2,
2054 |     "version_minor": 0
2055 |    }
2056 |   }
2057 |  },
2058 |  "nbformat": 4,
2059 |  "nbformat_minor": 4
2060 | }
2061 | 


--------------------------------------------------------------------------------
/multi_modal_addition_fusion.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","trusted":true},"cell_type":"code","source":"import os, time, datetime\nimport numpy as np\nimport pandas as pd\nfrom tqdm.notebook import tqdm\nimport random\nimport logging\ntqdm.pandas()\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\n\n#NN Packages\nimport torch\nimport torch.nn as nn\nfrom torch.utils.data import TensorDataset, random_split,DataLoader, RandomSampler, SequentialSampler\n\nlogger = logging.getLogger(__name__)\n\n\nif torch.cuda.is_available():    \n\n    # Tell PyTorch to use the GPU.    \n    device = torch.device(\"cuda\")\n\n    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n\n    print('We will use the GPU:', torch.cuda.get_device_name(0))\n\n# If not...\nelse:\n    print('No GPU available, using the CPU instead.')\n    device = torch.device(\"cpu\")\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def format_time(elapsed):\n    '''\n    Takes a time in seconds and returns a string hh:mm:ss\n    '''\n    # Round to the nearest second.\n    elapsed_rounded = int(round((elapsed)))\n    \n    # Format as hh:mm:ss\n    return str(datetime.timedelta(seconds=elapsed_rounded))\n\nclass SigirPreprocess():\n    \n    def __init__(self, text_data_path):\n        self.text_data_path = text_data_path\n        self.train = None\n        self.dict_code_to_id = {}\n        self.dict_id_to_code = {}\n        self.list_tags = {}\n        self.sentences = []\n        self.labels = []\n        self.text_col = None\n        self.X_test = None\n    def prepare_data(self ):\n        catalog_eng= pd.read_csv(self.text_data_path+\"data/catalog_english_taxonomy.tsv\",sep=\"\\t\")\n        X_train= pd.read_csv(self.text_data_path+\"data/X_train.tsv\",sep=\"\\t\")\n        Y_train= pd.read_csv(self.text_data_path+\"data/Y_train.tsv\",sep=\"\\t\")\n        \n        self.list_tags = list(Y_train['Prdtypecode'].unique())\n        for i,tag in enumerate(self.list_tags):\n            self.dict_code_to_id[tag] = i \n            self.dict_id_to_code[i]=tag\n        print(self.dict_code_to_id)\n            \n        Y_train['labels']=Y_train['Prdtypecode'].map(self.dict_code_to_id)\n        train=pd.merge(left=X_train,right=Y_train,\n               how='left',left_on=['Integer_id','Image_id','Product_id'],\n               right_on=['Integer_id','Image_id','Product_id'])\n        prod_map=pd.Series(catalog_eng['Top level category'].values,\n                           index=catalog_eng['Prdtypecode']).to_dict()\n\n        train['product'] = train['Prdtypecode'].map(prod_map)\n        train['title_len']=train['Title'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n        train['desc_len']=train['Description'].progress_apply(lambda x : len(x.split()) if pd.notna(x) else 0)\n        train['title_desc_len']=train['title_len'] + train['desc_len']\n        train.loc[train['Description'].isnull(), 'Description'] = \" \"\n        train['title_desc'] = train['Title'] + \" \" + train['Description']\n        \n        self.train = train\n        \n    def get_sentences(self, text_col, remove_null_rows=False):\n        self.text_col = text_col\n        if remove_null_rows==True:\n            new_train = self.train[self.train[text_col].notnull()]\n\n        else:\n            new_train = self.train.copy()\n            \n        self.sentences = new_train[text_col].values\n        self.labels = new_train['labels'].values\n    \n    def prepare_test(self, text_col):\n        X_test=pd.read_csv(self.text_data_path+\"data/x_test_task1_phase1.tsv\",sep=\"\\t\")\n        X_test.loc[X_test['Description'].isnull(), 'Description'] = \" \"\n        X_test['title_desc'] = X_test['Title'] + \" \" + X_test['Description']\n        self.X_test = X_test\n        self.test_sentences = X_test[text_col].values\n        ","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"text_col = 'title_desc'\nmax_len = 256\nval_size = 0.1","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"Preprocess = SigirPreprocess(\"/kaggle/input/textphase1/\")\nPreprocess.prepare_data()\nPreprocess.get_sentences(text_col, True)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"sentences = Preprocess.sentences\nlabels = Preprocess.labels\nprint(\"Total number of sentences:{}, labels:{}\".format(len(sentences), len(labels)))","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"\n# print('Using Camembert')\n# tokenizer_cam = CamembertTokenizer.from_pretrained('camembert-base', do_lowercase=False)\n# print('Using Flaubert')\n# tokenizer_flau = FlaubertTokenizer.from_pretrained('flaubert/flaubert_base_cased', do_lowercase=False)\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#function to prepare input for model training\ndef prep_input(sentences,labels, max_len,tokenizer):\n    input_ids = []\n    attention_masks = []\n\n    # For every sentence...\n    for sent in tqdm(sentences):\n        # `encode_plus` will:\n        #   (1) Tokenize the sentence.\n        #   (2) Prepend the `[CLS]` token to the start.\n        #   (3) Append the `[SEP]` token to the end.\n        #   (4) Map tokens to their IDs.\n        #   (5) Pad or truncate the sentence to `max_length`\n        #   (6) Create attention masks for [PAD] tokens.\n        encoded_dict = tokenizer.encode_plus(\n                            sent,                      # Sentence to encode.\n                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n                            max_length = max_len,           # Pad & truncate all sentences.\n                            pad_to_max_length = True,\n                            return_attention_mask = True,   # Construct attn. masks.\n                            return_tensors = 'pt',     # Return pytorch tensors.\n                       )\n\n        # Add the encoded sentence to the list.    \n        input_ids.append(encoded_dict['input_ids'])\n\n        # And its attention mask (simply differentiates padding from non-padding).\n        attention_masks.append(encoded_dict['attention_mask'])\n\n    # Convert the lists into tensors.\n    input_ids = torch.cat(input_ids, dim=0)\n    attention_masks = torch.cat(attention_masks, dim=0)\n    if labels is not None:\n        labels = torch.tensor(labels)\n        return input_ids,attention_masks,labels\n    else:\n        return input_ids,attention_masks\n    ","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# input_ids_cam,attention_masks_cam,labels_cam=prep_input(sentences,labels, max_len,tokenizer_cam)\n# # print('Original: ', sentences[0])\n# # print('Token IDs:', input_ids[0]) ","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# input_ids_flau,attention_masks_flau,labels_flau=prep_input(sentences,labels, max_len,tokenizer_flau)\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# tr_inputs_cam, val_inputs_cam, _,_ = train_test_split(input_ids_cam, labels_cam,stratify=labels_cam,\n#                                                             random_state=2020, test_size=val_size)\n# tr_masks_cam, val_masks_cam, _,_ =   train_test_split(attention_masks_cam, labels,stratify=labels,\n#                                              random_state=2020, test_size=val_size)\n\n# tr_inputs_flau, val_inputs_flau, _,_ = train_test_split(input_ids_flau, labels,stratify=labels,\n#                                                             random_state=2020, test_size=val_size)\n# tr_masks_flau, val_masks_flau, _,_ =   train_test_split(attention_masks_flau, labels,stratify=labels_flau,\n#                                              random_state=2020, test_size=val_size)\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# torch.save(tr_inputs_cam, \"tr_inputs_cam.pt\")\n# torch.save(val_inputs_cam, \"val_inputs_cam.pt\")\n# torch.save(tr_masks_cam, \"tr_masks_cam.pt\")\n# torch.save(val_masks_cam, \"val_masks_cam.pt\")\n\n# torch.save(tr_inputs_flau, \"tr_inputs_flau.pt\")\n# torch.save(val_inputs_flau, \"val_inputs_flau.pt\")\n# torch.save(tr_masks_flau, \"tr_masks_flau.pt\")\n# torch.save(val_masks_flau, \"val_masks_flau.pt\")\n\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# !mkdir -p /root/.kaggle/\n# !cp ../input/myjson/kaggle.json /root/.kaggle/\n# !chmod 600 /root/.kaggle/kaggle.json","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# data = '''{\n#   \"title\": \"Multi_modal_input_text\",\n#   \"id\": \"deepbugger/Multi-modal-input-text\",\n#   \"licenses\": [\n#     {\n#       \"name\": \"CC0-1.0\"\n#     }\n#   ]\n# }\n# '''\n# text_file = open(\"/kaggle/working/dataset-metadata.json\", 'w+')\n# n = text_file.write(data)\n# text_file.close()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# !kaggle datasets create -p /kaggle/working\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"text_input='../input/multi-modal-input-text/'\ntr_inputs_cam=torch.load(text_input+\"tr_inputs_cam.pt\")\nval_inputs_cam=torch.load(text_input+\"val_inputs_cam.pt\")\ntr_masks_cam=torch.load( text_input+\"tr_masks_cam.pt\")\nval_masks_cam=torch.load( text_input+\"val_masks_cam.pt\")\n\ntr_inputs_flau=torch.load(text_input+\"tr_inputs_flau.pt\")\nval_inputs_flau=torch.load(text_input+\"val_inputs_flau.pt\")\ntr_masks_flau=torch.load(text_input+\"tr_masks_flau.pt\")\nval_masks_flau=torch.load(text_input+\"val_masks_flau.pt\")","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"!pip install pretrainedmodels","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from transformers import CamembertConfig, CamembertTokenizer, CamembertModel, CamembertForSequenceClassification, AdamW\nfrom transformers import FlaubertModel, FlaubertTokenizer,FlaubertForSequenceClassification,AdamW, FlaubertConfig \nfrom transformers.modeling_roberta import RobertaClassificationHead\nfrom transformers.modeling_utils import SequenceSummary","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from torch.nn import functional as F\nimport torch.nn as nn\nimport pretrainedmodels\n\nclass SEResnext50_32x4d(nn.Module):\n    def __init__(self, pretrained='imagenet'):\n        super(SEResnext50_32x4d, self).__init__()\n        \n        self.base_model = pretrainedmodels.__dict__[\"se_resnext50_32x4d\"](pretrained=None)\n        if pretrained is not None:\n            self.base_model.load_state_dict(\n                torch.load(\"../input/pretrained-model-weights-pytorch/se_resnext50_32x4d-a260b3a4.pth\"\n                )\n            )\n        self.l0 = nn.Linear(2048, 27)\n    \n    def forward(self, image):\n        batch_size, _, _, _ = image.shape\n        \n        x = self.base_model.features(image)\n        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)\n        \n        out = self.l0(x)\n\n        return out","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"class Identity(nn.Module):\n    def __init__(self):\n        super(Identity, self).__init__()\n        \n    def forward(self, x):\n        return x","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# num_classes=27\n# img_model = SEResnext50_32x4d(pretrained=None)\n# img_model.load_state_dict(torch.load('../input/seresnext2048/best_model.pt'))\n# img_model.cuda()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# img_model.l0=Identity()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# img_model","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# for params in img_model.parameters():\n#     params.requires_grad=False\n    ","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"class vec_output_CamembertForSequenceClassification(CamembertModel):\n    config_class = CamembertConfig\n\n    def __init__(self, config):\n        super().__init__(config)\n        self.num_labels = config.num_labels\n\n        self.roberta = CamembertModel(config)\n        self.dense = nn.Linear(256*config.hidden_size, config.hidden_size)\n        self.dropout = nn.Dropout(0.1)\n        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)\n        self.init_weights()\n\n\n    def forward(\n        self,\n        input_ids=None,\n        attention_mask=None,\n        token_type_ids=None,\n        position_ids=None,\n        head_mask=None,\n        inputs_embeds=None,\n        labels=None,\n        output_attentions=None,\n        output_hidden_states=None,\n    ):\n        outputs = self.roberta(\n            input_ids,\n            attention_mask=attention_mask,\n            token_type_ids=token_type_ids,\n            position_ids=position_ids,\n            head_mask=head_mask,\n            inputs_embeds=inputs_embeds,\n#             output_attentions=output_attentions,\n#             output_hidden_states=output_hidden_states,\n        )\n        sequence_output = outputs[0] #(B,256,768)\n        x = sequence_output.view(sequence_output.shape[0], 256*768)\n        x = self.dense(x)  # 256*768 -> 768\n        feat= torch.tanh(x) \n        logits = self.out_proj(feat) # 768 -> 27\n        outputs = (logits,) + outputs[2:]\n        \n        return outputs  # (loss), logits, (hidden_states), (attentions)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"num_classes = 27\n\nclass vec_output_FlaubertForSequenceClassification(FlaubertModel):\n    \n    config_class = FlaubertConfig\n    \n\n    def __init__(self, config):\n        super().__init__(config)\n        self.transformer = FlaubertModel(config)\n        self.sequence_summary = SequenceSummary(config)\n        self.init_weights()\n        self.dropout =  torch.nn.Dropout(0.1)\n        self.classifier = torch.nn.Linear(config.hidden_size, num_classes)\n\n\n    def forward(\n        self,\n        input_ids=None,\n        attention_mask=None,\n        langs=None,\n        token_type_ids=None,\n        position_ids=None,\n        lengths=None,\n        cache=None,\n        head_mask=None,\n        inputs_embeds=None,\n        labels=None,\n    ):\n        \n        \n        transformer_outputs = self.transformer(\n            input_ids,\n            attention_mask=attention_mask,\n            langs=langs,\n            token_type_ids=token_type_ids,\n            position_ids=position_ids,\n            lengths=lengths,\n            cache=cache,\n            head_mask=head_mask,\n            inputs_embeds=inputs_embeds,\n        )\n\n        #output = self.dropout(output)\n        output = transformer_outputs[0]\n        vec = output[:,0]\n        \n        \n        #logits\n        dense = self.dropout(vec)\n        \n        #classifier\n        logits = self.classifier(dense)\n        \n        outputs = (logits,) + transformer_outputs[1:]  # Keep new_mems and attention/hidden states if they are here\n       \n        \n        return outputs\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# model = vec_output_CamembertForSequenceClassification.from_pretrained(\n#     modelname, # Use the 12-layer BERT model, with an uncased vocab.\n#     num_labels = len(Preprocess.dict_code_to_id), # The number of output labels--2 for binary classification.\n#                     # You can increase this for multi-class tasks.   \n#     output_attentions = False, # Whether the model returns attentions weights.\n#     output_hidden_states = False, # Whether the model returns all hidden-states.\n# )","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# model_path = '../input/camembert-vec-256m768-10ep/best_model.pt'\n# checkpoint = torch.load(model_path)\n# # model = checkpoint['model']\n# model.load_state_dict(checkpoint)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# model.cuda()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# for param in model.parameters():\n#     param.requires_grad=False","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# model.out_proj=Identity()","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### Image data prep"},{"metadata":{"trusted":true},"cell_type":"code","source":"catalog_eng= pd.read_csv(\"/kaggle/input/textphase1/data/catalog_english_taxonomy.tsv\",sep=\"\\t\")\nX_train= pd.read_csv(\"/kaggle/input/textphase1/data/X_train.tsv\",sep=\"\\t\")\nY_train= pd.read_csv(\"/kaggle/input/textphase1/data/Y_train.tsv\",sep=\"\\t\")\nX_test=pd.read_csv(\"/kaggle/input/textphase1/data/x_test_task1_phase1.tsv\",sep=\"\\t\")\ndict_code_to_id = {}\ndict_id_to_code={}\nlist_tags = list(Y_train['Prdtypecode'].unique())\n\nfor i,tag in enumerate(list_tags):\n    dict_code_to_id[tag] = i \n    dict_id_to_code[i]=tag\nY_train['labels']=Y_train['Prdtypecode'].map(dict_code_to_id)\ntrain=pd.merge(left=X_train,right=Y_train,\n               how='left',left_on=['Integer_id','Image_id','Product_id'],\n               right_on=['Integer_id','Image_id','Product_id'])\nprod_map=pd.Series(catalog_eng['Top level category'].values,index=catalog_eng['Prdtypecode']).to_dict()\ntrain['product']=train['Prdtypecode'].map(prod_map)\n\ndef get_img_path(img_id,prd_id,path):\n    \n    pattern = 'image'+'_'+str(img_id)+'_'+'product'+'_'+str(prd_id)+'.jpg'\n    return path + pattern\ntrain_img = train[['Image_id','Product_id','labels','product']]\n\ntrain_img['image_path']=train_img.progress_apply(lambda x: get_img_path(x['Image_id'],x['Product_id'],\n                                                                path = '/kaggle/input/imagetrain/image_training/'),axis=1)\nX_test['image_path']=X_test.progress_apply(lambda x: get_img_path(x['Image_id'],x['Product_id'],\n                                                    path='/kaggle/input/imagetest/image_test/image_test_task1_phase1/'),axis=1)\ntrain_df, val_df, _, _ = train_test_split(train_img, train_img['labels'],random_state=2020, test_size = 0.1, stratify=train_img['labels'])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"input_size = 224 # for Resnt\n# Applying Transforms to the Data\nfrom torchvision import datasets, models, transforms\n\nimage_transforms = { \n    'train': transforms.Compose([\n        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),\n        transforms.RandomRotation(degrees=15),\n        transforms.RandomHorizontalFlip(),\n        transforms.Resize(size=256),\n        transforms.CenterCrop(size=input_size),\n        transforms.ToTensor(),\n        transforms.Normalize([0.485, 0.456, 0.406],\n                             [0.229, 0.224, 0.225])\n    ]),\n    'valid': transforms.Compose([\n        transforms.Resize(size=256),\n        transforms.CenterCrop(size=input_size),\n        transforms.ToTensor(),\n        transforms.Normalize([0.485, 0.456, 0.406],\n                             [0.229, 0.224, 0.225])\n    ]),\n    'test': transforms.Compose([\n        transforms.Resize(size=256),\n        transforms.CenterCrop(size=input_size),\n        transforms.ToTensor(),\n        transforms.Normalize([0.485, 0.456, 0.406],\n                             [0.229, 0.224, 0.225])\n    ])\n}","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from torch.utils.data import Dataset, DataLoader, Subset\nimport cv2\nfrom PIL import Image\n\nclass FusionDataset(Dataset):\n    \n    def __init__(self,df,inputs_cam,masks_cam,inputs_flau,masks_flau,transform=None,mode='train'):\n        self.df = df\n        self.transform=transform\n        self.mode=mode\n        self.inputs_cam=inputs_cam\n        self.masks_cam=masks_cam\n        self.inputs_flau=inputs_flau\n        self.masks_flau=masks_flau\n         \n    def __len__(self):\n        return len(self.df)\n    \n    def __getitem__(self,idx):\n        \n        im_path = self.df.iloc[idx]['image_path']\n        img = cv2.imread(im_path)\n        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n        img=Image.fromarray(img)\n        if self.transform is not None:\n            img = self.transform(img)\n        img=img.cuda()\n        input_id_cam=self.inputs_cam[idx].cuda()\n        input_mask_cam=self.masks_cam[idx].cuda()\n        input_id_flau=self.inputs_flau[idx].cuda()\n        input_mask_flau=self.masks_flau[idx].cuda()\n        \n        if self.mode=='test':\n            return img,input_id_cam,input_mask_cam,input_id_flau,input_mask_flau\n        else:\n#             labels = torch.tensor(self.df.iloc[idx]['labels'])\n            labels = torch.tensor(self.df.iloc[idx]['labels']).cuda()             \n\n            return img,input_id_cam,input_mask_cam,input_id_flau,input_mask_flau,labels","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"batch_size = 32  \nPreprocess.prepare_test(text_col)\ntest_sentences = Preprocess.test_sentences\nX_test_phase1= Preprocess.X_test","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# print('Using Camembert')\ntokenizer_cam = CamembertTokenizer.from_pretrained('camembert-base', do_lowercase=False)\n# print('Using Flaubert')\ntokenizer_flau = FlaubertTokenizer.from_pretrained('flaubert/flaubert_base_cased', do_lowercase=False)\n\ninput_ids_test_flau,attention_masks_test_flau=prep_input(test_sentences,labels=None, max_len=max_len,tokenizer = tokenizer_flau)\ninput_ids_test_cam,attention_masks_test_cam=prep_input(test_sentences,labels=None, max_len=max_len,tokenizer = tokenizer_cam)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"class vector_fusion(nn.Module):\n    \n    def __init__(self):\n        super(vector_fusion, self).__init__()\n        self.img_model = SEResnext50_32x4d(pretrained=None)\n        self.img_model.load_state_dict(torch.load('../input/seresnext2048/best_model.pt'))\n        self.img_model.l0=Identity()\n        for params in self.img_model.parameters():\n            params.requires_grad=False\n\n        self.cam_model= vec_output_CamembertForSequenceClassification.from_pretrained(\n    'camembert-base', # Use the 12-layer BERT model, with an uncased vocab.\n    num_labels = len(Preprocess.dict_code_to_id), # The number of output labels--2 for binary classification.\n                    # You can increase this for multi-class tasks.   \n    output_attentions = False, # Whether the model returns attentions weights.\n    output_hidden_states = False,) # Whether the model returns all hidden-states.\n        \n        \n        cam_model_path = '../input/camembert-vec-256m768-10ep/best_model.pt'\n        checkpoint = torch.load(cam_model_path)\n        # model = checkpoint['model']\n        self.cam_model.load_state_dict(checkpoint)\n        for param in self.cam_model.parameters():\n            param.requires_grad=False\n        self.cam_model.out_proj=Identity()\n        \n        self.flau_model=vec_output_FlaubertForSequenceClassification.from_pretrained(\n        'flaubert/flaubert_base_cased', \n        num_labels = len(Preprocess.dict_code_to_id), \n        output_attentions = False,\n        output_hidden_states = False,)\n        flau_model_path='../input/flaubert-8933/best_model.pt'\n        checkpoint = torch.load(flau_model_path)\n        self.flau_model.load_state_dict(checkpoint)\n        for param in self.flau_model.parameters():\n            param.requires_grad=False\n        self.flau_model.classifier=Identity()\n        \n        \n        #reducing the dimensionality\n        self.reduce_dim=nn.Conv1d(in_channels = 2048 , out_channels = 768 , kernel_size= 1)\n        \n        #output\n        self.out=nn.Linear(768, 27)\n        \n\n        \n        \n    def forward(self,img,input_id_cam,input_mask_cam,input_id_flau,input_mask_flau):\n        \n        cam_emb =self.cam_model(input_id_cam, \n                     token_type_ids=None,               ###### bs * 768  \n                     attention_mask=input_mask_cam)\n        \n        #alignment\n        #cam_emb1 = cam_emb[0]\n        \n        \n        flau_emb =self.flau_model(input_id_flau,  \n                     token_type_ids=None,               ###### bs * 768 \n                     attention_mask=input_mask_flau)\n        \n        #alignment\n        #flau_emb1 = flau_emb[0]\n        \n        #Projecting the image embedding to lower dimension\n        img_emb=self.img_model(img)\n        img_emb=img_emb.view(img_emb.shape[0],img_emb.shape[1],1) \n        img_emb=self.reduce_dim(img_emb)                         \n        img_emb=img_emb.view(img_emb.shape[0],img_emb.shape[1]) ###### bs * 768 \n        \n        #adding\n        fuse= img_emb + cam_emb[0] + flau_emb[0]\n        \n        logits=self.out(fuse)\n        return logits","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"model = vector_fusion()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"model.cuda()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"train_dataset=FusionDataset(train_df,tr_inputs_cam,tr_masks_cam,tr_inputs_flau,tr_masks_flau,\n                            transform=image_transforms['test'])\nval_dataset=FusionDataset(val_df,val_inputs_cam,val_masks_cam,val_inputs_flau,val_masks_flau,\n                          transform=image_transforms['test'])\ntest_dataset=FusionDataset(X_test,input_ids_test_cam,attention_masks_test_cam,input_ids_test_flau,attention_masks_test_flau\n                           ,transform=image_transforms['test'],mode = 'test')","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"batch_size=64\ntrain_dataloader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)\nvalidation_dataloader=DataLoader(val_dataset,batch_size=batch_size,shuffle=False)\ntest_dataloader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# tr_inputs, val_inputs, tr_labels, val_labels = train_test_split(input_ids, labels,stratify=labels,\n#                                                             random_state=2020, test_size=val_size)\n# tr_masks, val_masks, u,v =   train_test_split(attention_masks, labels,stratify=labels,\n#                                              random_state=2020, test_size=val_size)\n\n\n# train_dataset=TensorDataset(tr_inputs, tr_masks, tr_labels)\n# val_dataset=TensorDataset(val_inputs, val_masks, val_labels)\n# train_sampler = RandomSampler(train_dataset) \n# valid_sampler = SequentialSampler(val_dataset)\n# from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n\n# # The DataLoader needs to know our batch size for training, so we specify it \n# # here. For fine-tuning BERT on a specific task, the authors recommend a batch \n# # size of 16 or 32.\n# batch_size = 32\n\n# # Create the DataLoaders for our training and validation sets.\n# # We'll take training samples in random order. \n# train_dataloader = DataLoader(\n#             train_dataset,  # The training samples.\n#             sampler = train_sampler, # Select batches randomly\n#             batch_size = batch_size # Trains with this batch size.\n#         )\n\n# # For validation the order doesn't matter, so we'll just read them sequentially.\n# validation_dataloader = DataLoader(\n#             val_dataset, # The validation samples.\n#             sampler = valid_sampler, # Pull out batches sequentially.\n#             batch_size = batch_size # Evaluate with this batch size.\n#         )","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"optimizer = AdamW(model.parameters(),\n                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n                )","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def count_parameters(model):\n    return sum(p.numel() for p in model.parameters() if p.requires_grad)\ncount_parameters(model)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from transformers import get_linear_schedule_with_warmup\n\n# Number of training epochs. The BERT authors recommend between 2 and 4. \n# We chose to run for 4, but we'll see later that this may be over-fitting the\n# training data.\nepochs = 6\n\n# Total number of training steps is [number of batches] x [number of epochs]. \n# (Note that this is not the same as the number of training samples).\ntotal_steps = len(train_dataloader) * epochs\n\n# Create the learning rate scheduler.\nscheduler = get_linear_schedule_with_warmup(optimizer, \n                                            num_warmup_steps = 0, # Default value in run_glue.py\n                                            num_training_steps = total_steps)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"import torch.nn as nn\nloss_criterion = nn.CrossEntropyLoss()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def flat_accuracy(preds, labels):\n    pred_flat = np.argmax(preds, axis=1).flatten()\n    labels_flat = labels.flatten()\n    return np.sum(pred_flat == labels_flat) / len(labels_flat)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from sklearn.metrics import f1_score\n\nseed_val = 42\n\nrandom.seed(seed_val)\nnp.random.seed(seed_val)\ntorch.manual_seed(seed_val)\ntorch.cuda.manual_seed_all(seed_val)\n\n# We'll store a number of quantities such as training and validation loss, \n# validation accuracy, and timings.\ntraining_stats = []\n\n# Measure the total training time for the whole run.\ntotal_t0 = time.time()\n\n\n# For each epoch...\nfor epoch_i in range(0, epochs):\n    \n    # ========================================\n    #               Training\n    # ========================================\n    \n    # Perform one full pass over the training set.\n\n    print(\"\")\n    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n    print('Training...')\n    \n    #tr and val\n#     vec_output_tr = []\n#     vec_output_val =[]\n\n    # Measure how long the training epoch takes.\n    t0 = time.time()\n\n    # Reset the total loss for this epoch.\n    total_train_loss = 0\n\n    # Put the model into training mode. Don't be mislead--the call to \n    # `train` just changes the *mode*, it doesn't *perform* the training.\n    # `dropout` and `batchnorm` layers behave differently during training\n    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n    best_f1 = 0\n    model.train()\n\n    # For each batch of training data...\n    for step, batch in tqdm(enumerate(train_dataloader)):\n        \n        # Unpack this training batch from our dataloader. \n        #\n        \n        # As we unpack the batch, we'll also copy each tensor to the GPU using the \n        # `to` method.\n        #\n        # `batch` contains three pytorch tensors:\n        #   [0]: input ids \n        #   [1]: attention masks\n        #   [2]: labels \n#         return img,input_id_cam,input_mask_cam,input_id_flau,input_mask_flau\n\n        b_img=batch[0].to(device)\n\n        b_input_id_cam = batch[1].to(device)\n        b_input_mask_cam = batch[2].to(device)\n        b_input_id_flau = batch[3].to(device)\n        b_input_mask_flau = batch[4].to(device)\n\n        b_labels = batch[5].to(device)\n        \n        \n        model.zero_grad()        \n\n        \n        logits = model(b_img,b_input_id_cam ,b_input_mask_cam,b_input_id_flau,b_input_mask_flau)\n                            \n        #Defining the loss\n        loss = loss_criterion(logits, b_labels)\n        \n        #saving the features_tr\n#         vec = vec.detach().cpu().numpy()\n#         vec_output_tr.extend(vec)\n        \n        # Accumulate the training loss over all of the batches so that we can\n        # calculate the average loss at the end. `loss` is a Tensor containing a\n        # single value; the `.item()` function just returns the Python value \n        # from the tensor.\n        total_train_loss += loss.item()\n\n        # Perform a backward pass to calculate the gradients.\n        loss.backward()\n\n        # Clip the norm of the gradients to 1.0.\n        # This is to help prevent the \"exploding gradients\" problem.\n        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n\n        # Update parameters and take a step using the computed gradient.\n        # The optimizer dictates the \"update rule\"--how the parameters are\n        # modified based on their gradients, the learning rate, etc.\n        optimizer.step()\n\n        # Update the learning rate.\n        scheduler.step()\n        \n        \n        \n\n    # Calculate the average loss over all of the batches.\n    avg_train_loss = total_train_loss / len(train_dataloader)            \n    \n    # Measure how long this epoch took.\n    training_time = format_time(time.time() - t0)\n\n    print(\"\")\n    print(\"  Average training loss: {0:.2f} \".format(avg_train_loss))\n    print(\"  Training epcoh took: {:} \".format(training_time))\n        \n    # ========================================\n    #               Validation\n    # ========================================\n    # After the completion of each training epoch, measure our performance on\n    # our validation set.\n\n    print(\"\")\n    print(\"Running Validation...\")\n\n    t0 = time.time()\n\n    # Put the model in evaluation mode--the dropout layers behave differently\n    # during evaluation.\n    model.eval()\n\n    # Tracking variables \n    total_eval_accuracy = 0\n    total_eval_loss = 0\n    nb_eval_steps = 0\n    predictions=[]\n    true_labels=[]\n    \n\n    # Evaluate data for one epoch\n    for batch in tqdm(validation_dataloader):\n        \n        # Unpack this training batch from our dataloader. \n        #\n        # As we unpack the batch, we'll also copy each tensor to the GPU using \n        # the `to` method.\n        #\n        # `batch` contains three pytorch tensors:\n        #   [0]: input ids \n        #   [1]: attention masks\n        #   [2]: labels \n        b_img=batch[0].to(device)\n\n        b_input_id_cam = batch[1].to(device)\n        b_input_mask_cam = batch[2].to(device)\n        b_input_id_flau = batch[3].to(device)\n        b_input_mask_flau = batch[4].to(device)\n\n        b_labels = batch[5].to(device)\n        \n        \n        # Tell pytorch not to bother with constructing the compute graph during\n        # the forward pass, since this is only needed for backprop (training).\n        with torch.no_grad():       \n        \n\n            # Forward pass, calculate logit predictions.\n            # token_type_ids is the same as the \"segment ids\", which \n            # differentiates sentence 1 and 2 in 2-sentence tasks.\n            # The documentation for this `model` function is here: \n            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n            # Get the \"logits\" output by the model. The \"logits\" are the output\n            # values prior to applying an activation function like the softmax.\n            logits = model(b_img,b_input_id_cam ,b_input_mask_cam,b_input_id_flau,b_input_mask_flau)\n            \n        #new\n        \n        #defining the val loss\n        loss = loss_criterion(logits, b_labels)\n        \n        \n        # Accumulate the validation loss.\n        total_eval_loss += loss.item()\n\n        # Move logits and labels to CPU\n        logits = logits.detach().cpu().numpy()\n\n        # Move logits and labels to CPU\n        predicted_labels=np.argmax(logits,axis=1)\n        predictions.extend(predicted_labels)\n        label_ids = b_labels.to('cpu').numpy()\n        true_labels.extend(label_ids)\n        \n        #saving the features_tr\n#         vec = vec.detach().cpu().numpy()\n#         vec_output_val.extend(vec)\n        \n\n        # Calculate the accuracy for this batch of test sentences, and\n        # accumulate it over all batches.\n        total_eval_accuracy += flat_accuracy(logits, label_ids)\n        \n\n    # Report the final accuracy for this validation run.\n    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n    print(\"  Accuracy: {0:.2f}\".format(avg_val_accuracy))\n\n    # Calculate the average loss over all of the batches.\n    avg_val_loss = total_eval_loss / len(validation_dataloader)\n    \n    # Measure how long the validation run took.\n    validation_time = format_time(time.time() - t0)\n    \n    print(\"  Validation Loss: {0:.2f}\".format(avg_val_loss))\n    print(\"  Validation took: {:}\".format(validation_time))\n    print(\"Validation F1-Score: {}\".format(f1_score(true_labels,predictions,average='macro')))\n    curr_f1=f1_score(true_labels,predictions,average='macro')\n    if curr_f1 > best_f1:\n        best_f1=curr_f1\n        torch.save(model.state_dict(), 'best_model.pt')\n#         np.save('best_vec_train_model_train.npy',vec_output_tr)\n#         np.save('best_vec_val.npy',vec_output_val)\n        \n    # Record all statistics from this epoch.\n#     training_stats.append(\n#         {\n#             'epoch': epoch_i + 1,\n#             'Training Loss': avg_train_loss,\n#             'Valid. Loss': avg_val_loss,\n#             'Valid. Accur.': avg_val_accuracy,\n#             'Training Time': training_time,\n#             'Validation Time': validation_time\n#         }\n#     )\n\nprint(\"\")\nprint(\"Training complete!\")\n\nprint(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"ckpt = '../input/vec-fusion-9093/best_model.pt'\nmodel.load_state_dict(torch.load(ckpt))","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def predict_pyt(model, prediction_dataloader):\n    \"\"\"\n    model: pytorch model\n    prediction_dataloader: DataLoader object for which the predictions has to be made.\n    return:\n        predictions:- Direct predicted labels\n        softmax_logits:- logits which are normalized with softmax on output\"\"\"\n    # Put model in evaluation mode\n    model.eval()\n    # Tracking variables \n    predictions = []\n    softmax_logits=[]\n    # Predict \n    \n    for batch in tqdm(prediction_dataloader):\n        \n        # Add batch to GPU\n        b_img=batch[0].to(device)\n        b_input_id_cam = batch[1].to(device)\n        b_input_mask_cam = batch[2].to(device)\n        b_input_id_flau = batch[3].to(device)\n        b_input_mask_flau = batch[4].to(device)\n        \n        \n        # Telling the model not to compute or store gradients, saving memory and \n        # speeding up prediction\n        with torch.no_grad():\n            # Forward pass, calculate logit predictions\n            logits = model(b_img,b_input_id_cam ,b_input_mask_cam,b_input_id_flau,b_input_mask_flau)\n        \n        \n        #find logits\n    #----- Add softmax---     \n        m = nn.Softmax(dim=1)\n    # #     input = torch.randn(2, 3)\n        output = m(logits)\n    #-------#------\n        # Move logits and labels to CPU\n        logits = logits.detach().cpu().numpy()\n        predicted_labels=np.argmax(logits,axis=1)\n        predictions.extend(predicted_labels)\n        softmax_logits.extend(output)\n    print('DONE')\n    return predictions, softmax_logits\n","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#validation predictions\npredictions_val, softmax_logits_val = predict_pyt(model, validation_dataloader)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"softmax_logits_val = np.array([ten.detach().cpu().numpy() for ten in softmax_logits_val])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"np.save('predictions_val_add.npy',np.array(predictions_val))\nnp.save('softmax_logits_val_add.npy',softmax_logits_val)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#test_predictions\n#predictions_test, softmax_logits_test = predict_pyt(model, test_dataloader)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#softmax_logits_test = np.array([ten.detach().cpu().numpy() for ten in softmax_logits_test])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# np.save('predictions_test_9093.npy',np.array(predictions_test))\n# np.save('softmax_logits_test_9093.npy',softmax_logits_test)","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat":4,"nbformat_minor":4}


--------------------------------------------------------------------------------