├── Finetune_AnyLLM.ipynb
├── Finetune_Mixtral_lora.ipynb
├── LICENSE.txt
├── README.md
└── inference.ipynb


/Finetune_Mixtral_lora.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {},
   6 |       "source": [
   7 |         "# License\n",
   8 |         "This notebook is licensed under the MIT License - see the LICENSE file in [this repository](https://github.com/PrakharSaxena24/RepoForLLMs/) for details.\n"
   9 |       ]
  10 |     },
  11 |     {
  12 |       "cell_type": "markdown",
  13 |       "metadata": {
  14 |         "id": "-MlkIQ0pLSrY"
  15 |       },
  16 |       "source": [
  17 |         "# Finetune Mixtral8x7B.\n",
  18 |         "This is being run on A100 (40GB).\n"
  19 |       ]
  20 |     },
  21 |     {
  22 |       "cell_type": "code",
  23 |       "execution_count": null,
  24 |       "metadata": {
  25 |         "id": "S1CJwtl7J2Eg"
  26 |       },
  27 |       "outputs": [],
  28 |       "source": [
  29 |         "!pip install -q -U bitsandbytes transformers peft accelerate datasets scipy\n"
  30 |       ]
  31 |     },
  32 |     {
  33 |       "cell_type": "code",
  34 |       "execution_count": null,
  35 |       "metadata": {
  36 |         "id": "NBPD8rQUkmTg"
  37 |       },
  38 |       "outputs": [],
  39 |       "source": [
  40 |         "import torch\n",
  41 |         "import transformers\n",
  42 |         "from datasets import load_dataset\n",
  43 |         "from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments\n",
  44 |         "from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel"
  45 |       ]
  46 |     },
  47 |     {
  48 |       "cell_type": "code",
  49 |       "execution_count": null,
  50 |       "metadata": {
  51 |         "colab": {
  52 |           "base_uri": "https://localhost:8080/",
  53 |           "height": 49,
  54 |           "referenced_widgets": [
  55 |             "442bd77df1b4493794f8a2e49fecd5d1",
  56 |             "ce646a2be8d84eefb8910a2b62d0e831",
  57 |             "9283af3600a24f5fbb630ecf529fe045",
  58 |             "046c8d8c2822419eafe2ecc5e015d7b2",
  59 |             "7fb8fec91f84454bbcd5dcba9001d731",
  60 |             "87ecefbbf8be4e5fbf1719aed14af9ad",
  61 |             "a54b7839a9454671a9f3d275fb338e6c",
  62 |             "a275e5f970094dfa9408445452b12b3e",
  63 |             "a30e1efe178743108b95fb5bfe32b0f0",
  64 |             "87e2d5e8da95478b92a7a725c9b100a6",
  65 |             "326790700c434c6098c386101cc9e6c4"
  66 |           ]
  67 |         },
  68 |         "id": "E8JNln8MknxJ",
  69 |         "outputId": "ea33aedd-e100-4e21-fff8-40d8b22dcdf9"
  70 |       },
  71 |       "outputs": [
  72 |         {
  73 |           "data": {
  74 |             "application/vnd.jupyter.widget-view+json": {
  75 |               "model_id": "442bd77df1b4493794f8a2e49fecd5d1",
  76 |               "version_major": 2,
  77 |               "version_minor": 0
  78 |             },
  79 |             "text/plain": [
  80 |               "Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]"
  81 |             ]
  82 |           },
  83 |           "metadata": {},
  84 |           "output_type": "display_data"
  85 |         }
  86 |       ],
  87 |       "source": [
  88 |         "tokenizer = AutoTokenizer.from_pretrained(\"mistralai/Mixtral-8x7B-Instruct-v0.1\")\n",
  89 |         "model = AutoModelForCausalLM.from_pretrained(\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
  90 |         "                                             load_in_4bit=True,\n",
  91 |         "                                             torch_dtype=torch.float16,\n",
  92 |         "                                             device_map=\"auto\",\n",
  93 |         "                                            # attn_implementation=\"flash_attention_2\",   #You can use flash attention on your local GPU with specific libraries\n",
  94 |         "                                             )"
  95 |       ]
  96 |     },
  97 |     {
  98 |       "cell_type": "code",
  99 |       "execution_count": null,
 100 |       "metadata": {
 101 |         "id": "IeRJMgM3ko6B"
 102 |       },
 103 |       "outputs": [],
 104 |       "source": [
 105 |         "tokenizer.pad_token = \"!\" #Not EOS, will explain another time."
 106 |       ]
 107 |     },
 108 |     {
 109 |       "cell_type": "code",
 110 |       "execution_count": null,
 111 |       "metadata": {
 112 |         "id": "u5AHv0KfnQs2"
 113 |       },
 114 |       "outputs": [],
 115 |       "source": [
 116 |         "CUTOFF_LEN = 256  #Our dataset has shot text\n",
 117 |         "LORA_R = 8\n",
 118 |         "LORA_ALPHA = 2 * LORA_R\n",
 119 |         "LORA_DROPOUT = 0.1"
 120 |       ]
 121 |     },
 122 |     {
 123 |       "cell_type": "code",
 124 |       "execution_count": null,
 125 |       "metadata": {
 126 |         "id": "rcX9e6_8mojQ"
 127 |       },
 128 |       "outputs": [],
 129 |       "source": [
 130 |         "config = LoraConfig(\n",
 131 |         "    r=LORA_R,\n",
 132 |         "    lora_alpha=LORA_ALPHA,\n",
 133 |         "    target_modules=[ \"w1\", \"w2\", \"w3\"],  #Only Training the \"expert\" layers\n",
 134 |         "    lora_dropout=LORA_DROPOUT,\n",
 135 |         "    bias=\"none\",\n",
 136 |         "    task_type=\"CAUSAL_LM\"\n",
 137 |         ")\n",
 138 |         "\n",
 139 |         "model = get_peft_model(model, config)"
 140 |       ]
 141 |     },
 142 |     {
 143 |       "cell_type": "code",
 144 |       "execution_count": null,
 145 |       "metadata": {
 146 |         "colab": {
 147 |           "base_uri": "https://localhost:8080/"
 148 |         },
 149 |         "id": "fKRjCSQPod8Q",
 150 |         "outputId": "7ea3d8b0-ccab-49e6-ed6e-cc47d09af527"
 151 |       },
 152 |       "outputs": [
 153 |         {
 154 |           "name": "stdout",
 155 |           "output_type": "stream",
 156 |           "text": [
 157 |             "trainable params: 113246208 || all params: 23595847680 || trainable%: 0.4799412571898752\n"
 158 |           ]
 159 |         }
 160 |       ],
 161 |       "source": [
 162 |         "def print_trainable_parameters(m):\n",
 163 |         "    trainable_params = sum(p.numel() for p in m.parameters() if p.requires_grad)\n",
 164 |         "    all_params = sum(p.numel() for p in m.parameters())\n",
 165 |         "    print(f\"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}\")\n",
 166 |         "\n",
 167 |         "print_trainable_parameters(model)"
 168 |       ]
 169 |     },
 170 |     {
 171 |       "cell_type": "code",
 172 |       "execution_count": null,
 173 |       "metadata": {
 174 |         "colab": {
 175 |           "base_uri": "https://localhost:8080/"
 176 |         },
 177 |         "id": "8Zagq7JrohqK",
 178 |         "outputId": "00ad6477-0e89-4027-8bc4-62846236cacc"
 179 |       },
 180 |       "outputs": [
 181 |         {
 182 |           "name": "stdout",
 183 |           "output_type": "stream",
 184 |           "text": [
 185 |             "dataset DatasetDict({\n",
 186 |             "    train: Dataset({\n",
 187 |             "        features: ['modern', 'shakespearean'],\n",
 188 |             "        num_rows: 274\n",
 189 |             "    })\n",
 190 |             "})\n"
 191 |           ]
 192 |         }
 193 |       ],
 194 |       "source": [
 195 |         "dataset = load_dataset(\"harpreetsahota/modern-to-shakesperean-translation\") #Found a good small dataset for a quick test run!\n",
 196 |         "print(\"dataset\", dataset)\n",
 197 |         "train_data = dataset[\"train\"] # Not using evaluation data"
 198 |       ]
 199 |     },
 200 |     {
 201 |       "cell_type": "code",
 202 |       "execution_count": null,
 203 |       "metadata": {
 204 |         "id": "qjQhqAn4o_Of"
 205 |       },
 206 |       "outputs": [],
 207 |       "source": [
 208 |         "def generate_prompt(user_query,  sep=\"\\n\\n### \"):  #The prompt format is taken from the official Mixtral huggingface page\n",
 209 |         "    sys_msg= \"Translate the given text to Shakespearean style.\"\n",
 210 |         "    p =  \"<s> [INST]\" + sys_msg +\"\\n\"+ user_query[\"modern\"] + \"[/INST]\" +  user_query[\"shakespearean\"] + \"</s>\"\n",
 211 |         "    return p"
 212 |       ]
 213 |     },
 214 |     {
 215 |       "cell_type": "code",
 216 |       "execution_count": null,
 217 |       "metadata": {
 218 |         "id": "z5BN_OW-qTyr"
 219 |       },
 220 |       "outputs": [],
 221 |       "source": [
 222 |         "def tokenize(prompt):\n",
 223 |         "    return tokenizer(\n",
 224 |         "        prompt + tokenizer.eos_token,\n",
 225 |         "        truncation=True,\n",
 226 |         "        max_length=CUTOFF_LEN ,\n",
 227 |         "        padding=\"max_length\"\n",
 228 |         "    )"
 229 |       ]
 230 |     },
 231 |     {
 232 |       "cell_type": "code",
 233 |       "execution_count": null,
 234 |       "metadata": {
 235 |         "colab": {
 236 |           "base_uri": "https://localhost:8080/",
 237 |           "height": 49,
 238 |           "referenced_widgets": [
 239 |             "82023d1657324e7dbd604a02ed9a8565",
 240 |             "c2465ae3c2df4af1b80def5cd6e3daa3",
 241 |             "a00e1e7c8dc74e928a42ec4fcd7c110f",
 242 |             "49024a3b2fc746d7b89ec2297077badb",
 243 |             "0bdfbbbf29c7478298e7006189e40d3c",
 244 |             "a0c07ee1d65d4d70b983fb33ae78dd36",
 245 |             "56145d6b220e4228ba37fe38b1d1a08a",
 246 |             "f8bb1f285c0f4fdda057fb64a3379262",
 247 |             "b1610a3654634afabcaa455e12911c9c",
 248 |             "1733e70e3664473985e530372f856ca7",
 249 |             "e7c559612c0f4ffcb7467e01e4d33b2d"
 250 |           ]
 251 |         },
 252 |         "id": "ov8PNhNPqtlq",
 253 |         "outputId": "de260c66-4a80-4d11-937f-bf53590ca678"
 254 |       },
 255 |       "outputs": [
 256 |         {
 257 |           "data": {
 258 |             "application/vnd.jupyter.widget-view+json": {
 259 |               "model_id": "82023d1657324e7dbd604a02ed9a8565",
 260 |               "version_major": 2,
 261 |               "version_minor": 0
 262 |             },
 263 |             "text/plain": [
 264 |               "Map:   0%|          | 0/274 [00:00<?, ? examples/s]"
 265 |             ]
 266 |           },
 267 |           "metadata": {},
 268 |           "output_type": "display_data"
 269 |         }
 270 |       ],
 271 |       "source": [
 272 |         "train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=[\"modern\" , \"shakespearean\"])"
 273 |       ]
 274 |     },
 275 |     {
 276 |       "cell_type": "code",
 277 |       "execution_count": null,
 278 |       "metadata": {
 279 |         "id": "2TeVtAVbq04C"
 280 |       },
 281 |       "outputs": [],
 282 |       "source": [
 283 |         "trainer = Trainer(\n",
 284 |         "    model=model,\n",
 285 |         "    train_dataset=train_data,\n",
 286 |         "    args=TrainingArguments(\n",
 287 |         "        per_device_train_batch_size=1,\n",
 288 |         "        gradient_accumulation_steps=4,\n",
 289 |         "        num_train_epochs=6,\n",
 290 |         "        learning_rate=1e-4,\n",
 291 |         "        logging_steps=2,\n",
 292 |         "        optim=\"adamw_torch\",\n",
 293 |         "        save_strategy=\"epoch\",\n",
 294 |         "        output_dir=\"mixtral-moe-lora-instruct-shapeskeare\"\n",
 295 |         "    ),\n",
 296 |         "    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)\n",
 297 |         ")\n",
 298 |         "model.config.use_cache = False\n"
 299 |       ]
 300 |     },
 301 |     {
 302 |       "cell_type": "code",
 303 |       "execution_count": null,
 304 |       "metadata": {
 305 |         "colab": {
 306 |           "base_uri": "https://localhost:8080/",
 307 |           "height": 964
 308 |         },
 309 |         "id": "L683Bua0rCjN",
 310 |         "outputId": "9d7d7879-2270-4a3b-ccbd-df3baa5bc7c3"
 311 |       },
 312 |       "outputs": [
 313 |         {
 314 |           "name": "stderr",
 315 |           "output_type": "stream",
 316 |           "text": [
 317 |             "You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
 318 |             "/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py:226: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference or training speed.\n",
 319 |             "  warnings.warn(f'Input type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference or training speed.')\n"
 320 |           ]
 321 |         },
 322 |         {
 323 |           "data": {
 324 |             "text/html": [
 325 |               "\n",
 326 |               "    <div>\n",
 327 |               "      \n",
 328 |               "      <progress value='55' max='408' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 329 |               "      [ 55/408 07:57 < 53:00, 0.11 it/s, Epoch 0.79/6]\n",
 330 |               "    </div>\n",
 331 |               "    <table border=\"1\" class=\"dataframe\">\n",
 332 |               "  <thead>\n",
 333 |               " <tr style=\"text-align: left;\">\n",
 334 |               "      <th>Step</th>\n",
 335 |               "      <th>Training Loss</th>\n",
 336 |               "    </tr>\n",
 337 |               "  </thead>\n",
 338 |               "  <tbody>\n",
 339 |               "    <tr>\n",
 340 |               "      <td>2</td>\n",
 341 |               "      <td>6.575100</td>\n",
 342 |               "    </tr>\n",
 343 |               "    <tr>\n",
 344 |               "      <td>4</td>\n",
 345 |               "      <td>5.346300</td>\n",
 346 |               "    </tr>\n",
 347 |               "    <tr>\n",
 348 |               "      <td>6</td>\n",
 349 |               "      <td>3.909500</td>\n",
 350 |               "    </tr>\n",
 351 |               "    <tr>\n",
 352 |               "      <td>8</td>\n",
 353 |               "      <td>3.360000</td>\n",
 354 |               "    </tr>\n",
 355 |               "    <tr>\n",
 356 |               "      <td>10</td>\n",
 357 |               "      <td>2.603800</td>\n",
 358 |               "    </tr>\n",
 359 |               "    <tr>\n",
 360 |               "      <td>12</td>\n",
 361 |               "      <td>2.199500</td>\n",
 362 |               "    </tr>\n",
 363 |               "    <tr>\n",
 364 |               "      <td>14</td>\n",
 365 |               "      <td>2.069200</td>\n",
 366 |               "    </tr>\n",
 367 |               "    <tr>\n",
 368 |               "      <td>16</td>\n",
 369 |               "      <td>1.869600</td>\n",
 370 |               "    </tr>\n",
 371 |               "    <tr>\n",
 372 |               "      <td>18</td>\n",
 373 |               "      <td>1.914100</td>\n",
 374 |               "    </tr>\n",
 375 |               "    <tr>\n",
 376 |               "      <td>20</td>\n",
 377 |               "      <td>1.813700</td>\n",
 378 |               "    </tr>\n",
 379 |               "    <tr>\n",
 380 |               "      <td>22</td>\n",
 381 |               "      <td>1.680800</td>\n",
 382 |               "    </tr>\n",
 383 |               "    <tr>\n",
 384 |               "      <td>24</td>\n",
 385 |               "      <td>1.706800</td>\n",
 386 |               "    </tr>\n",
 387 |               "    <tr>\n",
 388 |               "      <td>26</td>\n",
 389 |               "      <td>1.474800</td>\n",
 390 |               "    </tr>\n",
 391 |               "    <tr>\n",
 392 |               "      <td>28</td>\n",
 393 |               "      <td>1.683900</td>\n",
 394 |               "    </tr>\n",
 395 |               "    <tr>\n",
 396 |               "      <td>30</td>\n",
 397 |               "      <td>1.678800</td>\n",
 398 |               "    </tr>\n",
 399 |               "    <tr>\n",
 400 |               "      <td>32</td>\n",
 401 |               "      <td>1.478400</td>\n",
 402 |               "    </tr>\n",
 403 |               "    <tr>\n",
 404 |               "      <td>34</td>\n",
 405 |               "      <td>1.557600</td>\n",
 406 |               "    </tr>\n",
 407 |               "    <tr>\n",
 408 |               "      <td>36</td>\n",
 409 |               "      <td>1.424500</td>\n",
 410 |               "    </tr>\n",
 411 |               "    <tr>\n",
 412 |               "      <td>38</td>\n",
 413 |               "      <td>1.405000</td>\n",
 414 |               "    </tr>\n",
 415 |               "    <tr>\n",
 416 |               "      <td>40</td>\n",
 417 |               "      <td>1.498200</td>\n",
 418 |               "    </tr>\n",
 419 |               "    <tr>\n",
 420 |               "      <td>42</td>\n",
 421 |               "      <td>1.407700</td>\n",
 422 |               "    </tr>\n",
 423 |               "    <tr>\n",
 424 |               "      <td>44</td>\n",
 425 |               "      <td>1.307800</td>\n",
 426 |               "    </tr>\n",
 427 |               "    <tr>\n",
 428 |               "      <td>46</td>\n",
 429 |               "      <td>1.285800</td>\n",
 430 |               "    </tr>\n",
 431 |               "    <tr>\n",
 432 |               "      <td>48</td>\n",
 433 |               "      <td>1.359700</td>\n",
 434 |               "    </tr>\n",
 435 |               "    <tr>\n",
 436 |               "      <td>50</td>\n",
 437 |               "      <td>1.449500</td>\n",
 438 |               "    </tr>\n",
 439 |               "    <tr>\n",
 440 |               "      <td>52</td>\n",
 441 |               "      <td>1.409000</td>\n",
 442 |               "    </tr>\n",
 443 |               "  </tbody>\n",
 444 |               "</table><p>"
 445 |             ],
 446 |             "text/plain": [
 447 |               "<IPython.core.display.HTML object>"
 448 |             ]
 449 |           },
 450 |           "metadata": {},
 451 |           "output_type": "display_data"
 452 |         }
 453 |       ],
 454 |       "source": [
 455 |         "trainer.train()"
 456 |       ]
 457 |     },
 458 |     {
 459 |       "cell_type": "code",
 460 |       "execution_count": null,
 461 |       "metadata": {
 462 |         "id": "agczdazarIdk"
 463 |       },
 464 |       "outputs": [],
 465 |       "source": []
 466 |     }
 467 |   ],
 468 |   "metadata": {
 469 |     "accelerator": "GPU",
 470 |     "colab": {
 471 |       "gpuType": "A100",
 472 |       "machine_shape": "hm",
 473 |       "provenance": []
 474 |     },
 475 |     "kernelspec": {
 476 |       "display_name": "Python 3",
 477 |       "name": "python3"
 478 |     },
 479 |     "language_info": {
 480 |       "name": "python"
 481 |     },
 482 |     "widgets": {
 483 |       "application/vnd.jupyter.widget-state+json": {
 484 |         "046c8d8c2822419eafe2ecc5e015d7b2": {
 485 |           "model_module": "@jupyter-widgets/controls",
 486 |           "model_module_version": "1.5.0",
 487 |           "model_name": "HTMLModel",
 488 |           "state": {
 489 |             "_dom_classes": [],
 490 |             "_model_module": "@jupyter-widgets/controls",
 491 |             "_model_module_version": "1.5.0",
 492 |             "_model_name": "HTMLModel",
 493 |             "_view_count": null,
 494 |             "_view_module": "@jupyter-widgets/controls",
 495 |             "_view_module_version": "1.5.0",
 496 |             "_view_name": "HTMLView",
 497 |             "description": "",
 498 |             "description_tooltip": null,
 499 |             "layout": "IPY_MODEL_87e2d5e8da95478b92a7a725c9b100a6",
 500 |             "placeholder": "​",
 501 |             "style": "IPY_MODEL_326790700c434c6098c386101cc9e6c4",
 502 |             "value": " 19/19 [06:15&lt;00:00, 18.74s/it]"
 503 |           }
 504 |         },
 505 |         "0bdfbbbf29c7478298e7006189e40d3c": {
 506 |           "model_module": "@jupyter-widgets/base",
 507 |           "model_module_version": "1.2.0",
 508 |           "model_name": "LayoutModel",
 509 |           "state": {
 510 |             "_model_module": "@jupyter-widgets/base",
 511 |             "_model_module_version": "1.2.0",
 512 |             "_model_name": "LayoutModel",
 513 |             "_view_count": null,
 514 |             "_view_module": "@jupyter-widgets/base",
 515 |             "_view_module_version": "1.2.0",
 516 |             "_view_name": "LayoutView",
 517 |             "align_content": null,
 518 |             "align_items": null,
 519 |             "align_self": null,
 520 |             "border": null,
 521 |             "bottom": null,
 522 |             "display": null,
 523 |             "flex": null,
 524 |             "flex_flow": null,
 525 |             "grid_area": null,
 526 |             "grid_auto_columns": null,
 527 |             "grid_auto_flow": null,
 528 |             "grid_auto_rows": null,
 529 |             "grid_column": null,
 530 |             "grid_gap": null,
 531 |             "grid_row": null,
 532 |             "grid_template_areas": null,
 533 |             "grid_template_columns": null,
 534 |             "grid_template_rows": null,
 535 |             "height": null,
 536 |             "justify_content": null,
 537 |             "justify_items": null,
 538 |             "left": null,
 539 |             "margin": null,
 540 |             "max_height": null,
 541 |             "max_width": null,
 542 |             "min_height": null,
 543 |             "min_width": null,
 544 |             "object_fit": null,
 545 |             "object_position": null,
 546 |             "order": null,
 547 |             "overflow": null,
 548 |             "overflow_x": null,
 549 |             "overflow_y": null,
 550 |             "padding": null,
 551 |             "right": null,
 552 |             "top": null,
 553 |             "visibility": null,
 554 |             "width": null
 555 |           }
 556 |         },
 557 |         "1733e70e3664473985e530372f856ca7": {
 558 |           "model_module": "@jupyter-widgets/base",
 559 |           "model_module_version": "1.2.0",
 560 |           "model_name": "LayoutModel",
 561 |           "state": {
 562 |             "_model_module": "@jupyter-widgets/base",
 563 |             "_model_module_version": "1.2.0",
 564 |             "_model_name": "LayoutModel",
 565 |             "_view_count": null,
 566 |             "_view_module": "@jupyter-widgets/base",
 567 |             "_view_module_version": "1.2.0",
 568 |             "_view_name": "LayoutView",
 569 |             "align_content": null,
 570 |             "align_items": null,
 571 |             "align_self": null,
 572 |             "border": null,
 573 |             "bottom": null,
 574 |             "display": null,
 575 |             "flex": null,
 576 |             "flex_flow": null,
 577 |             "grid_area": null,
 578 |             "grid_auto_columns": null,
 579 |             "grid_auto_flow": null,
 580 |             "grid_auto_rows": null,
 581 |             "grid_column": null,
 582 |             "grid_gap": null,
 583 |             "grid_row": null,
 584 |             "grid_template_areas": null,
 585 |             "grid_template_columns": null,
 586 |             "grid_template_rows": null,
 587 |             "height": null,
 588 |             "justify_content": null,
 589 |             "justify_items": null,
 590 |             "left": null,
 591 |             "margin": null,
 592 |             "max_height": null,
 593 |             "max_width": null,
 594 |             "min_height": null,
 595 |             "min_width": null,
 596 |             "object_fit": null,
 597 |             "object_position": null,
 598 |             "order": null,
 599 |             "overflow": null,
 600 |             "overflow_x": null,
 601 |             "overflow_y": null,
 602 |             "padding": null,
 603 |             "right": null,
 604 |             "top": null,
 605 |             "visibility": null,
 606 |             "width": null
 607 |           }
 608 |         },
 609 |         "326790700c434c6098c386101cc9e6c4": {
 610 |           "model_module": "@jupyter-widgets/controls",
 611 |           "model_module_version": "1.5.0",
 612 |           "model_name": "DescriptionStyleModel",
 613 |           "state": {
 614 |             "_model_module": "@jupyter-widgets/controls",
 615 |             "_model_module_version": "1.5.0",
 616 |             "_model_name": "DescriptionStyleModel",
 617 |             "_view_count": null,
 618 |             "_view_module": "@jupyter-widgets/base",
 619 |             "_view_module_version": "1.2.0",
 620 |             "_view_name": "StyleView",
 621 |             "description_width": ""
 622 |           }
 623 |         },
 624 |         "442bd77df1b4493794f8a2e49fecd5d1": {
 625 |           "model_module": "@jupyter-widgets/controls",
 626 |           "model_module_version": "1.5.0",
 627 |           "model_name": "HBoxModel",
 628 |           "state": {
 629 |             "_dom_classes": [],
 630 |             "_model_module": "@jupyter-widgets/controls",
 631 |             "_model_module_version": "1.5.0",
 632 |             "_model_name": "HBoxModel",
 633 |             "_view_count": null,
 634 |             "_view_module": "@jupyter-widgets/controls",
 635 |             "_view_module_version": "1.5.0",
 636 |             "_view_name": "HBoxView",
 637 |             "box_style": "",
 638 |             "children": [
 639 |               "IPY_MODEL_ce646a2be8d84eefb8910a2b62d0e831",
 640 |               "IPY_MODEL_9283af3600a24f5fbb630ecf529fe045",
 641 |               "IPY_MODEL_046c8d8c2822419eafe2ecc5e015d7b2"
 642 |             ],
 643 |             "layout": "IPY_MODEL_7fb8fec91f84454bbcd5dcba9001d731"
 644 |           }
 645 |         },
 646 |         "49024a3b2fc746d7b89ec2297077badb": {
 647 |           "model_module": "@jupyter-widgets/controls",
 648 |           "model_module_version": "1.5.0",
 649 |           "model_name": "HTMLModel",
 650 |           "state": {
 651 |             "_dom_classes": [],
 652 |             "_model_module": "@jupyter-widgets/controls",
 653 |             "_model_module_version": "1.5.0",
 654 |             "_model_name": "HTMLModel",
 655 |             "_view_count": null,
 656 |             "_view_module": "@jupyter-widgets/controls",
 657 |             "_view_module_version": "1.5.0",
 658 |             "_view_name": "HTMLView",
 659 |             "description": "",
 660 |             "description_tooltip": null,
 661 |             "layout": "IPY_MODEL_1733e70e3664473985e530372f856ca7",
 662 |             "placeholder": "​",
 663 |             "style": "IPY_MODEL_e7c559612c0f4ffcb7467e01e4d33b2d",
 664 |             "value": " 274/274 [00:00&lt;00:00, 2056.92 examples/s]"
 665 |           }
 666 |         },
 667 |         "56145d6b220e4228ba37fe38b1d1a08a": {
 668 |           "model_module": "@jupyter-widgets/controls",
 669 |           "model_module_version": "1.5.0",
 670 |           "model_name": "DescriptionStyleModel",
 671 |           "state": {
 672 |             "_model_module": "@jupyter-widgets/controls",
 673 |             "_model_module_version": "1.5.0",
 674 |             "_model_name": "DescriptionStyleModel",
 675 |             "_view_count": null,
 676 |             "_view_module": "@jupyter-widgets/base",
 677 |             "_view_module_version": "1.2.0",
 678 |             "_view_name": "StyleView",
 679 |             "description_width": ""
 680 |           }
 681 |         },
 682 |         "7fb8fec91f84454bbcd5dcba9001d731": {
 683 |           "model_module": "@jupyter-widgets/base",
 684 |           "model_module_version": "1.2.0",
 685 |           "model_name": "LayoutModel",
 686 |           "state": {
 687 |             "_model_module": "@jupyter-widgets/base",
 688 |             "_model_module_version": "1.2.0",
 689 |             "_model_name": "LayoutModel",
 690 |             "_view_count": null,
 691 |             "_view_module": "@jupyter-widgets/base",
 692 |             "_view_module_version": "1.2.0",
 693 |             "_view_name": "LayoutView",
 694 |             "align_content": null,
 695 |             "align_items": null,
 696 |             "align_self": null,
 697 |             "border": null,
 698 |             "bottom": null,
 699 |             "display": null,
 700 |             "flex": null,
 701 |             "flex_flow": null,
 702 |             "grid_area": null,
 703 |             "grid_auto_columns": null,
 704 |             "grid_auto_flow": null,
 705 |             "grid_auto_rows": null,
 706 |             "grid_column": null,
 707 |             "grid_gap": null,
 708 |             "grid_row": null,
 709 |             "grid_template_areas": null,
 710 |             "grid_template_columns": null,
 711 |             "grid_template_rows": null,
 712 |             "height": null,
 713 |             "justify_content": null,
 714 |             "justify_items": null,
 715 |             "left": null,
 716 |             "margin": null,
 717 |             "max_height": null,
 718 |             "max_width": null,
 719 |             "min_height": null,
 720 |             "min_width": null,
 721 |             "object_fit": null,
 722 |             "object_position": null,
 723 |             "order": null,
 724 |             "overflow": null,
 725 |             "overflow_x": null,
 726 |             "overflow_y": null,
 727 |             "padding": null,
 728 |             "right": null,
 729 |             "top": null,
 730 |             "visibility": null,
 731 |             "width": null
 732 |           }
 733 |         },
 734 |         "82023d1657324e7dbd604a02ed9a8565": {
 735 |           "model_module": "@jupyter-widgets/controls",
 736 |           "model_module_version": "1.5.0",
 737 |           "model_name": "HBoxModel",
 738 |           "state": {
 739 |             "_dom_classes": [],
 740 |             "_model_module": "@jupyter-widgets/controls",
 741 |             "_model_module_version": "1.5.0",
 742 |             "_model_name": "HBoxModel",
 743 |             "_view_count": null,
 744 |             "_view_module": "@jupyter-widgets/controls",
 745 |             "_view_module_version": "1.5.0",
 746 |             "_view_name": "HBoxView",
 747 |             "box_style": "",
 748 |             "children": [
 749 |               "IPY_MODEL_c2465ae3c2df4af1b80def5cd6e3daa3",
 750 |               "IPY_MODEL_a00e1e7c8dc74e928a42ec4fcd7c110f",
 751 |               "IPY_MODEL_49024a3b2fc746d7b89ec2297077badb"
 752 |             ],
 753 |             "layout": "IPY_MODEL_0bdfbbbf29c7478298e7006189e40d3c"
 754 |           }
 755 |         },
 756 |         "87e2d5e8da95478b92a7a725c9b100a6": {
 757 |           "model_module": "@jupyter-widgets/base",
 758 |           "model_module_version": "1.2.0",
 759 |           "model_name": "LayoutModel",
 760 |           "state": {
 761 |             "_model_module": "@jupyter-widgets/base",
 762 |             "_model_module_version": "1.2.0",
 763 |             "_model_name": "LayoutModel",
 764 |             "_view_count": null,
 765 |             "_view_module": "@jupyter-widgets/base",
 766 |             "_view_module_version": "1.2.0",
 767 |             "_view_name": "LayoutView",
 768 |             "align_content": null,
 769 |             "align_items": null,
 770 |             "align_self": null,
 771 |             "border": null,
 772 |             "bottom": null,
 773 |             "display": null,
 774 |             "flex": null,
 775 |             "flex_flow": null,
 776 |             "grid_area": null,
 777 |             "grid_auto_columns": null,
 778 |             "grid_auto_flow": null,
 779 |             "grid_auto_rows": null,
 780 |             "grid_column": null,
 781 |             "grid_gap": null,
 782 |             "grid_row": null,
 783 |             "grid_template_areas": null,
 784 |             "grid_template_columns": null,
 785 |             "grid_template_rows": null,
 786 |             "height": null,
 787 |             "justify_content": null,
 788 |             "justify_items": null,
 789 |             "left": null,
 790 |             "margin": null,
 791 |             "max_height": null,
 792 |             "max_width": null,
 793 |             "min_height": null,
 794 |             "min_width": null,
 795 |             "object_fit": null,
 796 |             "object_position": null,
 797 |             "order": null,
 798 |             "overflow": null,
 799 |             "overflow_x": null,
 800 |             "overflow_y": null,
 801 |             "padding": null,
 802 |             "right": null,
 803 |             "top": null,
 804 |             "visibility": null,
 805 |             "width": null
 806 |           }
 807 |         },
 808 |         "87ecefbbf8be4e5fbf1719aed14af9ad": {
 809 |           "model_module": "@jupyter-widgets/base",
 810 |           "model_module_version": "1.2.0",
 811 |           "model_name": "LayoutModel",
 812 |           "state": {
 813 |             "_model_module": "@jupyter-widgets/base",
 814 |             "_model_module_version": "1.2.0",
 815 |             "_model_name": "LayoutModel",
 816 |             "_view_count": null,
 817 |             "_view_module": "@jupyter-widgets/base",
 818 |             "_view_module_version": "1.2.0",
 819 |             "_view_name": "LayoutView",
 820 |             "align_content": null,
 821 |             "align_items": null,
 822 |             "align_self": null,
 823 |             "border": null,
 824 |             "bottom": null,
 825 |             "display": null,
 826 |             "flex": null,
 827 |             "flex_flow": null,
 828 |             "grid_area": null,
 829 |             "grid_auto_columns": null,
 830 |             "grid_auto_flow": null,
 831 |             "grid_auto_rows": null,
 832 |             "grid_column": null,
 833 |             "grid_gap": null,
 834 |             "grid_row": null,
 835 |             "grid_template_areas": null,
 836 |             "grid_template_columns": null,
 837 |             "grid_template_rows": null,
 838 |             "height": null,
 839 |             "justify_content": null,
 840 |             "justify_items": null,
 841 |             "left": null,
 842 |             "margin": null,
 843 |             "max_height": null,
 844 |             "max_width": null,
 845 |             "min_height": null,
 846 |             "min_width": null,
 847 |             "object_fit": null,
 848 |             "object_position": null,
 849 |             "order": null,
 850 |             "overflow": null,
 851 |             "overflow_x": null,
 852 |             "overflow_y": null,
 853 |             "padding": null,
 854 |             "right": null,
 855 |             "top": null,
 856 |             "visibility": null,
 857 |             "width": null
 858 |           }
 859 |         },
 860 |         "9283af3600a24f5fbb630ecf529fe045": {
 861 |           "model_module": "@jupyter-widgets/controls",
 862 |           "model_module_version": "1.5.0",
 863 |           "model_name": "FloatProgressModel",
 864 |           "state": {
 865 |             "_dom_classes": [],
 866 |             "_model_module": "@jupyter-widgets/controls",
 867 |             "_model_module_version": "1.5.0",
 868 |             "_model_name": "FloatProgressModel",
 869 |             "_view_count": null,
 870 |             "_view_module": "@jupyter-widgets/controls",
 871 |             "_view_module_version": "1.5.0",
 872 |             "_view_name": "ProgressView",
 873 |             "bar_style": "success",
 874 |             "description": "",
 875 |             "description_tooltip": null,
 876 |             "layout": "IPY_MODEL_a275e5f970094dfa9408445452b12b3e",
 877 |             "max": 19,
 878 |             "min": 0,
 879 |             "orientation": "horizontal",
 880 |             "style": "IPY_MODEL_a30e1efe178743108b95fb5bfe32b0f0",
 881 |             "value": 19
 882 |           }
 883 |         },
 884 |         "a00e1e7c8dc74e928a42ec4fcd7c110f": {
 885 |           "model_module": "@jupyter-widgets/controls",
 886 |           "model_module_version": "1.5.0",
 887 |           "model_name": "FloatProgressModel",
 888 |           "state": {
 889 |             "_dom_classes": [],
 890 |             "_model_module": "@jupyter-widgets/controls",
 891 |             "_model_module_version": "1.5.0",
 892 |             "_model_name": "FloatProgressModel",
 893 |             "_view_count": null,
 894 |             "_view_module": "@jupyter-widgets/controls",
 895 |             "_view_module_version": "1.5.0",
 896 |             "_view_name": "ProgressView",
 897 |             "bar_style": "success",
 898 |             "description": "",
 899 |             "description_tooltip": null,
 900 |             "layout": "IPY_MODEL_f8bb1f285c0f4fdda057fb64a3379262",
 901 |             "max": 274,
 902 |             "min": 0,
 903 |             "orientation": "horizontal",
 904 |             "style": "IPY_MODEL_b1610a3654634afabcaa455e12911c9c",
 905 |             "value": 274
 906 |           }
 907 |         },
 908 |         "a0c07ee1d65d4d70b983fb33ae78dd36": {
 909 |           "model_module": "@jupyter-widgets/base",
 910 |           "model_module_version": "1.2.0",
 911 |           "model_name": "LayoutModel",
 912 |           "state": {
 913 |             "_model_module": "@jupyter-widgets/base",
 914 |             "_model_module_version": "1.2.0",
 915 |             "_model_name": "LayoutModel",
 916 |             "_view_count": null,
 917 |             "_view_module": "@jupyter-widgets/base",
 918 |             "_view_module_version": "1.2.0",
 919 |             "_view_name": "LayoutView",
 920 |             "align_content": null,
 921 |             "align_items": null,
 922 |             "align_self": null,
 923 |             "border": null,
 924 |             "bottom": null,
 925 |             "display": null,
 926 |             "flex": null,
 927 |             "flex_flow": null,
 928 |             "grid_area": null,
 929 |             "grid_auto_columns": null,
 930 |             "grid_auto_flow": null,
 931 |             "grid_auto_rows": null,
 932 |             "grid_column": null,
 933 |             "grid_gap": null,
 934 |             "grid_row": null,
 935 |             "grid_template_areas": null,
 936 |             "grid_template_columns": null,
 937 |             "grid_template_rows": null,
 938 |             "height": null,
 939 |             "justify_content": null,
 940 |             "justify_items": null,
 941 |             "left": null,
 942 |             "margin": null,
 943 |             "max_height": null,
 944 |             "max_width": null,
 945 |             "min_height": null,
 946 |             "min_width": null,
 947 |             "object_fit": null,
 948 |             "object_position": null,
 949 |             "order": null,
 950 |             "overflow": null,
 951 |             "overflow_x": null,
 952 |             "overflow_y": null,
 953 |             "padding": null,
 954 |             "right": null,
 955 |             "top": null,
 956 |             "visibility": null,
 957 |             "width": null
 958 |           }
 959 |         },
 960 |         "a275e5f970094dfa9408445452b12b3e": {
 961 |           "model_module": "@jupyter-widgets/base",
 962 |           "model_module_version": "1.2.0",
 963 |           "model_name": "LayoutModel",
 964 |           "state": {
 965 |             "_model_module": "@jupyter-widgets/base",
 966 |             "_model_module_version": "1.2.0",
 967 |             "_model_name": "LayoutModel",
 968 |             "_view_count": null,
 969 |             "_view_module": "@jupyter-widgets/base",
 970 |             "_view_module_version": "1.2.0",
 971 |             "_view_name": "LayoutView",
 972 |             "align_content": null,
 973 |             "align_items": null,
 974 |             "align_self": null,
 975 |             "border": null,
 976 |             "bottom": null,
 977 |             "display": null,
 978 |             "flex": null,
 979 |             "flex_flow": null,
 980 |             "grid_area": null,
 981 |             "grid_auto_columns": null,
 982 |             "grid_auto_flow": null,
 983 |             "grid_auto_rows": null,
 984 |             "grid_column": null,
 985 |             "grid_gap": null,
 986 |             "grid_row": null,
 987 |             "grid_template_areas": null,
 988 |             "grid_template_columns": null,
 989 |             "grid_template_rows": null,
 990 |             "height": null,
 991 |             "justify_content": null,
 992 |             "justify_items": null,
 993 |             "left": null,
 994 |             "margin": null,
 995 |             "max_height": null,
 996 |             "max_width": null,
 997 |             "min_height": null,
 998 |             "min_width": null,
 999 |             "object_fit": null,
1000 |             "object_position": null,
1001 |             "order": null,
1002 |             "overflow": null,
1003 |             "overflow_x": null,
1004 |             "overflow_y": null,
1005 |             "padding": null,
1006 |             "right": null,
1007 |             "top": null,
1008 |             "visibility": null,
1009 |             "width": null
1010 |           }
1011 |         },
1012 |         "a30e1efe178743108b95fb5bfe32b0f0": {
1013 |           "model_module": "@jupyter-widgets/controls",
1014 |           "model_module_version": "1.5.0",
1015 |           "model_name": "ProgressStyleModel",
1016 |           "state": {
1017 |             "_model_module": "@jupyter-widgets/controls",
1018 |             "_model_module_version": "1.5.0",
1019 |             "_model_name": "ProgressStyleModel",
1020 |             "_view_count": null,
1021 |             "_view_module": "@jupyter-widgets/base",
1022 |             "_view_module_version": "1.2.0",
1023 |             "_view_name": "StyleView",
1024 |             "bar_color": null,
1025 |             "description_width": ""
1026 |           }
1027 |         },
1028 |         "a54b7839a9454671a9f3d275fb338e6c": {
1029 |           "model_module": "@jupyter-widgets/controls",
1030 |           "model_module_version": "1.5.0",
1031 |           "model_name": "DescriptionStyleModel",
1032 |           "state": {
1033 |             "_model_module": "@jupyter-widgets/controls",
1034 |             "_model_module_version": "1.5.0",
1035 |             "_model_name": "DescriptionStyleModel",
1036 |             "_view_count": null,
1037 |             "_view_module": "@jupyter-widgets/base",
1038 |             "_view_module_version": "1.2.0",
1039 |             "_view_name": "StyleView",
1040 |             "description_width": ""
1041 |           }
1042 |         },
1043 |         "b1610a3654634afabcaa455e12911c9c": {
1044 |           "model_module": "@jupyter-widgets/controls",
1045 |           "model_module_version": "1.5.0",
1046 |           "model_name": "ProgressStyleModel",
1047 |           "state": {
1048 |             "_model_module": "@jupyter-widgets/controls",
1049 |             "_model_module_version": "1.5.0",
1050 |             "_model_name": "ProgressStyleModel",
1051 |             "_view_count": null,
1052 |             "_view_module": "@jupyter-widgets/base",
1053 |             "_view_module_version": "1.2.0",
1054 |             "_view_name": "StyleView",
1055 |             "bar_color": null,
1056 |             "description_width": ""
1057 |           }
1058 |         },
1059 |         "c2465ae3c2df4af1b80def5cd6e3daa3": {
1060 |           "model_module": "@jupyter-widgets/controls",
1061 |           "model_module_version": "1.5.0",
1062 |           "model_name": "HTMLModel",
1063 |           "state": {
1064 |             "_dom_classes": [],
1065 |             "_model_module": "@jupyter-widgets/controls",
1066 |             "_model_module_version": "1.5.0",
1067 |             "_model_name": "HTMLModel",
1068 |             "_view_count": null,
1069 |             "_view_module": "@jupyter-widgets/controls",
1070 |             "_view_module_version": "1.5.0",
1071 |             "_view_name": "HTMLView",
1072 |             "description": "",
1073 |             "description_tooltip": null,
1074 |             "layout": "IPY_MODEL_a0c07ee1d65d4d70b983fb33ae78dd36",
1075 |             "placeholder": "​",
1076 |             "style": "IPY_MODEL_56145d6b220e4228ba37fe38b1d1a08a",
1077 |             "value": "Map: 100%"
1078 |           }
1079 |         },
1080 |         "ce646a2be8d84eefb8910a2b62d0e831": {
1081 |           "model_module": "@jupyter-widgets/controls",
1082 |           "model_module_version": "1.5.0",
1083 |           "model_name": "HTMLModel",
1084 |           "state": {
1085 |             "_dom_classes": [],
1086 |             "_model_module": "@jupyter-widgets/controls",
1087 |             "_model_module_version": "1.5.0",
1088 |             "_model_name": "HTMLModel",
1089 |             "_view_count": null,
1090 |             "_view_module": "@jupyter-widgets/controls",
1091 |             "_view_module_version": "1.5.0",
1092 |             "_view_name": "HTMLView",
1093 |             "description": "",
1094 |             "description_tooltip": null,
1095 |             "layout": "IPY_MODEL_87ecefbbf8be4e5fbf1719aed14af9ad",
1096 |             "placeholder": "​",
1097 |             "style": "IPY_MODEL_a54b7839a9454671a9f3d275fb338e6c",
1098 |             "value": "Loading checkpoint shards: 100%"
1099 |           }
1100 |         },
1101 |         "e7c559612c0f4ffcb7467e01e4d33b2d": {
1102 |           "model_module": "@jupyter-widgets/controls",
1103 |           "model_module_version": "1.5.0",
1104 |           "model_name": "DescriptionStyleModel",
1105 |           "state": {
1106 |             "_model_module": "@jupyter-widgets/controls",
1107 |             "_model_module_version": "1.5.0",
1108 |             "_model_name": "DescriptionStyleModel",
1109 |             "_view_count": null,
1110 |             "_view_module": "@jupyter-widgets/base",
1111 |             "_view_module_version": "1.2.0",
1112 |             "_view_name": "StyleView",
1113 |             "description_width": ""
1114 |           }
1115 |         },
1116 |         "f8bb1f285c0f4fdda057fb64a3379262": {
1117 |           "model_module": "@jupyter-widgets/base",
1118 |           "model_module_version": "1.2.0",
1119 |           "model_name": "LayoutModel",
1120 |           "state": {
1121 |             "_model_module": "@jupyter-widgets/base",
1122 |             "_model_module_version": "1.2.0",
1123 |             "_model_name": "LayoutModel",
1124 |             "_view_count": null,
1125 |             "_view_module": "@jupyter-widgets/base",
1126 |             "_view_module_version": "1.2.0",
1127 |             "_view_name": "LayoutView",
1128 |             "align_content": null,
1129 |             "align_items": null,
1130 |             "align_self": null,
1131 |             "border": null,
1132 |             "bottom": null,
1133 |             "display": null,
1134 |             "flex": null,
1135 |             "flex_flow": null,
1136 |             "grid_area": null,
1137 |             "grid_auto_columns": null,
1138 |             "grid_auto_flow": null,
1139 |             "grid_auto_rows": null,
1140 |             "grid_column": null,
1141 |             "grid_gap": null,
1142 |             "grid_row": null,
1143 |             "grid_template_areas": null,
1144 |             "grid_template_columns": null,
1145 |             "grid_template_rows": null,
1146 |             "height": null,
1147 |             "justify_content": null,
1148 |             "justify_items": null,
1149 |             "left": null,
1150 |             "margin": null,
1151 |             "max_height": null,
1152 |             "max_width": null,
1153 |             "min_height": null,
1154 |             "min_width": null,
1155 |             "object_fit": null,
1156 |             "object_position": null,
1157 |             "order": null,
1158 |             "overflow": null,
1159 |             "overflow_x": null,
1160 |             "overflow_y": null,
1161 |             "padding": null,
1162 |             "right": null,
1163 |             "top": null,
1164 |             "visibility": null,
1165 |             "width": null
1166 |           }
1167 |         }
1168 |       }
1169 |     }
1170 |   },
1171 |   "nbformat": 4,
1172 |   "nbformat_minor": 0
1173 | }
1174 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Prakhar Saxena
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # About LLMs Repository
 2 | 
 3 | ## Overview
 4 | Easy tutorials on LLMs. This repository is dedicated to sharing insights and knowledge about LLMs, including less talked about topics like tokenizers, data collators etc.
 5 | 
 6 | ## Features
 7 | - Insights on various aspects of LLMs.
 8 | - Regular updates with new content.
 9 | 
10 | Stay tuned for more!
11 | 
12 | ## License
13 | This project is licensed under the MIT License - see the [LICENSE](LICENSE.txt) file for details.
14 | 


--------------------------------------------------------------------------------
/inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\" #use this accordign to your GPU setup\n",
 11 |     "import os\n",
 12 |     "import torch\n",
 13 |     "import transformers\n",
 14 |     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
 15 |     "from peft import  LoraConfig, get_peft_model, PeftModel\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "seed = 42\n",
 25 |     "torch.manual_seed(seed)\n",
 26 |     "torch.cuda.manual_seed_all(seed)\n",
 27 |     "\n",
 28 |     "# Use teh base model whihc you trained below, can be llama, mixtral etc\n",
 29 |     "model_name = \"mistralai/Mistral-7B-Instruct-v0.2\"\n",
 30 |     "\n",
 31 |     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
 32 |     "model = AutoModelForCausalLM.from_pretrained(\n",
 33 |     "   model_name,\n",
 34 |     "    load_in_8bit=True,\n",
 35 |     "    device_map=\"auto\",\n",
 36 |     "    torch_dtype=torch.float16\n",
 37 |     ")\n",
 38 |     "print(model)\n",
 39 |     "\n",
 40 |     " # path to the checkpoint folder, check your path as the checkpoint number might be different\n",
 41 |     "lora =  \"./mistral-lora-instruct-shapeskeare/checkpoint-32/\"\n",
 42 |     "\n",
 43 |     "model = PeftModel.from_pretrained(model, lora)\n",
 44 |     "\n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def generate_prompt(user_query):  #The prompt format is taken from the official Mistral huggingface page, yformat for each model might differ\n",
 54 |     "    return \"[INST]\" + user_query + \"[/INST]\" \n",
 55 |     "\n",
 56 |     "def evaluate(question):\n",
 57 |     "\n",
 58 |     "    prompt= generate_prompt(question)\n",
 59 |     "    inputs = tokenizer(prompt, add_special_tokens=False, return_tensors=\"pt\")\n",
 60 |     "    input_ids = inputs[\"input_ids\"].cuda()\n",
 61 |     "\n",
 62 |     "    with torch.no_grad():\n",
 63 |     "        gen_tokens = model.generate(\n",
 64 |     "             input_ids=input_ids,\n",
 65 |     "            attention_mask=inputs[\"attention_mask\"].cuda(),\n",
 66 |     "           max_new_tokens=512,\n",
 67 |     "            do_sample=True,\n",
 68 |     "            temperature=0.8,\n",
 69 |     "            top_p=0.95,\n",
 70 |     "            eos_token_id=2,\n",
 71 |     "                )\n",
 72 |     "\n",
 73 |     "    out=tokenizer.decode(gen_tokens[0],\n",
 74 |     "        skip_special_tokens=False)\n",
 75 |     "\n",
 76 |     "    return out\n",
 77 |     "\n"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "questions_eval = [\n",
 87 |     "    \"What is your favorite color?\",\n",
 88 |     "    \"How many continents are there in the world?\",\n",
 89 |     "    \"What year was the first iPhone released?\",\n",
 90 |     "    \"What is the capital of France?\",\n",
 91 |     "    \"Who wrote 'To Kill a Mockingbird'?\",\n",
 92 |     "    \"What gas do plants breathe in that humans and animals breathe out?\",\n",
 93 |     "    \"How many planets are in our solar system?\",\n",
 94 |     "    \"What is the boiling point of water?\",\n",
 95 |     "    \"What is the largest ocean on Earth?\",\n",
 96 |     "    \"Who is the current president of the United States?\"\n",
 97 |     "]\n",
 98 |     "\n",
 99 |     "\n",
100 |     "for question in  questions_eval:\n",
101 |     "    a=evaluate(question)\n",
102 |     "    print(a)\n",
103 |     "    print()\n",
104 |     "    print(\"-\"*50)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": []
113 |   }
114 |  ],
115 |  "metadata": {
116 |   "kernelspec": {
117 |    "display_name": "repe",
118 |    "language": "python",
119 |    "name": "python3"
120 |   },
121 |   "language_info": {
122 |    "codemirror_mode": {
123 |     "name": "ipython",
124 |     "version": 3
125 |    },
126 |    "file_extension": ".py",
127 |    "mimetype": "text/x-python",
128 |    "name": "python",
129 |    "nbconvert_exporter": "python",
130 |    "pygments_lexer": "ipython3",
131 |    "version": "3.10.14"
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 2
136 | }
137 | 


--------------------------------------------------------------------------------