├── Finetune_AnyLLM.ipynb ├── Finetune_Mixtral_lora.ipynb ├── LICENSE.txt ├── README.md └── inference.ipynb /Finetune_Mixtral_lora.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# License\n", 8 | "This notebook is licensed under the MIT License - see the LICENSE file in [this repository](https://github.com/PrakharSaxena24/RepoForLLMs/) for details.\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": { 14 | "id": "-MlkIQ0pLSrY" 15 | }, 16 | "source": [ 17 | "# Finetune Mixtral8x7B.\n", 18 | "This is being run on A100 (40GB).\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "id": "S1CJwtl7J2Eg" 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "!pip install -q -U bitsandbytes transformers peft accelerate datasets scipy\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "id": "NBPD8rQUkmTg" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import torch\n", 41 | "import transformers\n", 42 | "from datasets import load_dataset\n", 43 | "from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments\n", 44 | "from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "colab": { 52 | "base_uri": "https://localhost:8080/", 53 | "height": 49, 54 | "referenced_widgets": [ 55 | "442bd77df1b4493794f8a2e49fecd5d1", 56 | "ce646a2be8d84eefb8910a2b62d0e831", 57 | "9283af3600a24f5fbb630ecf529fe045", 58 | "046c8d8c2822419eafe2ecc5e015d7b2", 59 | "7fb8fec91f84454bbcd5dcba9001d731", 60 | "87ecefbbf8be4e5fbf1719aed14af9ad", 61 | "a54b7839a9454671a9f3d275fb338e6c", 62 | "a275e5f970094dfa9408445452b12b3e", 63 | "a30e1efe178743108b95fb5bfe32b0f0", 64 | "87e2d5e8da95478b92a7a725c9b100a6", 65 | "326790700c434c6098c386101cc9e6c4" 66 | ] 67 | }, 68 | "id": "E8JNln8MknxJ", 69 | "outputId": "ea33aedd-e100-4e21-fff8-40d8b22dcdf9" 70 | }, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "application/vnd.jupyter.widget-view+json": { 75 | "model_id": "442bd77df1b4493794f8a2e49fecd5d1", 76 | "version_major": 2, 77 | "version_minor": 0 78 | }, 79 | "text/plain": [ 80 | "Loading checkpoint shards: 0%| | 0/19 [00:00 [INST]\" + sys_msg +\"\\n\"+ user_query[\"modern\"] + \"[/INST]\" + user_query[\"shakespearean\"] + \"\"\n", 211 | " return p" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": { 218 | "id": "z5BN_OW-qTyr" 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "def tokenize(prompt):\n", 223 | " return tokenizer(\n", 224 | " prompt + tokenizer.eos_token,\n", 225 | " truncation=True,\n", 226 | " max_length=CUTOFF_LEN ,\n", 227 | " padding=\"max_length\"\n", 228 | " )" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "colab": { 236 | "base_uri": "https://localhost:8080/", 237 | "height": 49, 238 | "referenced_widgets": [ 239 | "82023d1657324e7dbd604a02ed9a8565", 240 | "c2465ae3c2df4af1b80def5cd6e3daa3", 241 | "a00e1e7c8dc74e928a42ec4fcd7c110f", 242 | "49024a3b2fc746d7b89ec2297077badb", 243 | "0bdfbbbf29c7478298e7006189e40d3c", 244 | "a0c07ee1d65d4d70b983fb33ae78dd36", 245 | "56145d6b220e4228ba37fe38b1d1a08a", 246 | "f8bb1f285c0f4fdda057fb64a3379262", 247 | "b1610a3654634afabcaa455e12911c9c", 248 | "1733e70e3664473985e530372f856ca7", 249 | "e7c559612c0f4ffcb7467e01e4d33b2d" 250 | ] 251 | }, 252 | "id": "ov8PNhNPqtlq", 253 | "outputId": "de260c66-4a80-4d11-937f-bf53590ca678" 254 | }, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "application/vnd.jupyter.widget-view+json": { 259 | "model_id": "82023d1657324e7dbd604a02ed9a8565", 260 | "version_major": 2, 261 | "version_minor": 0 262 | }, 263 | "text/plain": [ 264 | "Map: 0%| | 0/274 [00:00\n", 327 | " \n", 328 | " \n", 329 | " [ 55/408 07:57 < 53:00, 0.11 it/s, Epoch 0.79/6]\n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | "
StepTraining Loss
26.575100
45.346300
63.909500
83.360000
102.603800
122.199500
142.069200
161.869600
181.914100
201.813700
221.680800
241.706800
261.474800
281.683900
301.678800
321.478400
341.557600
361.424500
381.405000
401.498200
421.407700
441.307800
461.285800
481.359700
501.449500
521.409000

" 445 | ], 446 | "text/plain": [ 447 | "" 448 | ] 449 | }, 450 | "metadata": {}, 451 | "output_type": "display_data" 452 | } 453 | ], 454 | "source": [ 455 | "trainer.train()" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": { 462 | "id": "agczdazarIdk" 463 | }, 464 | "outputs": [], 465 | "source": [] 466 | } 467 | ], 468 | "metadata": { 469 | "accelerator": "GPU", 470 | "colab": { 471 | "gpuType": "A100", 472 | "machine_shape": "hm", 473 | "provenance": [] 474 | }, 475 | "kernelspec": { 476 | "display_name": "Python 3", 477 | "name": "python3" 478 | }, 479 | "language_info": { 480 | "name": "python" 481 | }, 482 | "widgets": { 483 | "application/vnd.jupyter.widget-state+json": { 484 | "046c8d8c2822419eafe2ecc5e015d7b2": { 485 | "model_module": "@jupyter-widgets/controls", 486 | "model_module_version": "1.5.0", 487 | "model_name": "HTMLModel", 488 | "state": { 489 | "_dom_classes": [], 490 | "_model_module": "@jupyter-widgets/controls", 491 | "_model_module_version": "1.5.0", 492 | "_model_name": "HTMLModel", 493 | "_view_count": null, 494 | "_view_module": "@jupyter-widgets/controls", 495 | "_view_module_version": "1.5.0", 496 | "_view_name": "HTMLView", 497 | "description": "", 498 | "description_tooltip": null, 499 | "layout": "IPY_MODEL_87e2d5e8da95478b92a7a725c9b100a6", 500 | "placeholder": "​", 501 | "style": "IPY_MODEL_326790700c434c6098c386101cc9e6c4", 502 | "value": " 19/19 [06:15<00:00, 18.74s/it]" 503 | } 504 | }, 505 | "0bdfbbbf29c7478298e7006189e40d3c": { 506 | "model_module": "@jupyter-widgets/base", 507 | "model_module_version": "1.2.0", 508 | "model_name": "LayoutModel", 509 | "state": { 510 | "_model_module": "@jupyter-widgets/base", 511 | "_model_module_version": "1.2.0", 512 | "_model_name": "LayoutModel", 513 | "_view_count": null, 514 | "_view_module": "@jupyter-widgets/base", 515 | "_view_module_version": "1.2.0", 516 | "_view_name": "LayoutView", 517 | "align_content": null, 518 | "align_items": null, 519 | "align_self": null, 520 | "border": null, 521 | "bottom": null, 522 | "display": null, 523 | "flex": null, 524 | "flex_flow": null, 525 | "grid_area": null, 526 | "grid_auto_columns": null, 527 | "grid_auto_flow": null, 528 | "grid_auto_rows": null, 529 | "grid_column": null, 530 | "grid_gap": null, 531 | "grid_row": null, 532 | "grid_template_areas": null, 533 | "grid_template_columns": null, 534 | "grid_template_rows": null, 535 | "height": null, 536 | "justify_content": null, 537 | "justify_items": null, 538 | "left": null, 539 | "margin": null, 540 | "max_height": null, 541 | "max_width": null, 542 | "min_height": null, 543 | "min_width": null, 544 | "object_fit": null, 545 | "object_position": null, 546 | "order": null, 547 | "overflow": null, 548 | "overflow_x": null, 549 | "overflow_y": null, 550 | "padding": null, 551 | "right": null, 552 | "top": null, 553 | "visibility": null, 554 | "width": null 555 | } 556 | }, 557 | "1733e70e3664473985e530372f856ca7": { 558 | "model_module": "@jupyter-widgets/base", 559 | "model_module_version": "1.2.0", 560 | "model_name": "LayoutModel", 561 | "state": { 562 | "_model_module": "@jupyter-widgets/base", 563 | "_model_module_version": "1.2.0", 564 | "_model_name": "LayoutModel", 565 | "_view_count": null, 566 | "_view_module": "@jupyter-widgets/base", 567 | "_view_module_version": "1.2.0", 568 | "_view_name": "LayoutView", 569 | "align_content": null, 570 | "align_items": null, 571 | "align_self": null, 572 | "border": null, 573 | "bottom": null, 574 | "display": null, 575 | "flex": null, 576 | "flex_flow": null, 577 | "grid_area": null, 578 | "grid_auto_columns": null, 579 | "grid_auto_flow": null, 580 | "grid_auto_rows": null, 581 | "grid_column": null, 582 | "grid_gap": null, 583 | "grid_row": null, 584 | "grid_template_areas": null, 585 | "grid_template_columns": null, 586 | "grid_template_rows": null, 587 | "height": null, 588 | "justify_content": null, 589 | "justify_items": null, 590 | "left": null, 591 | "margin": null, 592 | "max_height": null, 593 | "max_width": null, 594 | "min_height": null, 595 | "min_width": null, 596 | "object_fit": null, 597 | "object_position": null, 598 | "order": null, 599 | "overflow": null, 600 | "overflow_x": null, 601 | "overflow_y": null, 602 | "padding": null, 603 | "right": null, 604 | "top": null, 605 | "visibility": null, 606 | "width": null 607 | } 608 | }, 609 | "326790700c434c6098c386101cc9e6c4": { 610 | "model_module": "@jupyter-widgets/controls", 611 | "model_module_version": "1.5.0", 612 | "model_name": "DescriptionStyleModel", 613 | "state": { 614 | "_model_module": "@jupyter-widgets/controls", 615 | "_model_module_version": "1.5.0", 616 | "_model_name": "DescriptionStyleModel", 617 | "_view_count": null, 618 | "_view_module": "@jupyter-widgets/base", 619 | "_view_module_version": "1.2.0", 620 | "_view_name": "StyleView", 621 | "description_width": "" 622 | } 623 | }, 624 | "442bd77df1b4493794f8a2e49fecd5d1": { 625 | "model_module": "@jupyter-widgets/controls", 626 | "model_module_version": "1.5.0", 627 | "model_name": "HBoxModel", 628 | "state": { 629 | "_dom_classes": [], 630 | "_model_module": "@jupyter-widgets/controls", 631 | "_model_module_version": "1.5.0", 632 | "_model_name": "HBoxModel", 633 | "_view_count": null, 634 | "_view_module": "@jupyter-widgets/controls", 635 | "_view_module_version": "1.5.0", 636 | "_view_name": "HBoxView", 637 | "box_style": "", 638 | "children": [ 639 | "IPY_MODEL_ce646a2be8d84eefb8910a2b62d0e831", 640 | "IPY_MODEL_9283af3600a24f5fbb630ecf529fe045", 641 | "IPY_MODEL_046c8d8c2822419eafe2ecc5e015d7b2" 642 | ], 643 | "layout": "IPY_MODEL_7fb8fec91f84454bbcd5dcba9001d731" 644 | } 645 | }, 646 | "49024a3b2fc746d7b89ec2297077badb": { 647 | "model_module": "@jupyter-widgets/controls", 648 | "model_module_version": "1.5.0", 649 | "model_name": "HTMLModel", 650 | "state": { 651 | "_dom_classes": [], 652 | "_model_module": "@jupyter-widgets/controls", 653 | "_model_module_version": "1.5.0", 654 | "_model_name": "HTMLModel", 655 | "_view_count": null, 656 | "_view_module": "@jupyter-widgets/controls", 657 | "_view_module_version": "1.5.0", 658 | "_view_name": "HTMLView", 659 | "description": "", 660 | "description_tooltip": null, 661 | "layout": "IPY_MODEL_1733e70e3664473985e530372f856ca7", 662 | "placeholder": "​", 663 | "style": "IPY_MODEL_e7c559612c0f4ffcb7467e01e4d33b2d", 664 | "value": " 274/274 [00:00<00:00, 2056.92 examples/s]" 665 | } 666 | }, 667 | "56145d6b220e4228ba37fe38b1d1a08a": { 668 | "model_module": "@jupyter-widgets/controls", 669 | "model_module_version": "1.5.0", 670 | "model_name": "DescriptionStyleModel", 671 | "state": { 672 | "_model_module": "@jupyter-widgets/controls", 673 | "_model_module_version": "1.5.0", 674 | "_model_name": "DescriptionStyleModel", 675 | "_view_count": null, 676 | "_view_module": "@jupyter-widgets/base", 677 | "_view_module_version": "1.2.0", 678 | "_view_name": "StyleView", 679 | "description_width": "" 680 | } 681 | }, 682 | "7fb8fec91f84454bbcd5dcba9001d731": { 683 | "model_module": "@jupyter-widgets/base", 684 | "model_module_version": "1.2.0", 685 | "model_name": "LayoutModel", 686 | "state": { 687 | "_model_module": "@jupyter-widgets/base", 688 | "_model_module_version": "1.2.0", 689 | "_model_name": "LayoutModel", 690 | "_view_count": null, 691 | "_view_module": "@jupyter-widgets/base", 692 | "_view_module_version": "1.2.0", 693 | "_view_name": "LayoutView", 694 | "align_content": null, 695 | "align_items": null, 696 | "align_self": null, 697 | "border": null, 698 | "bottom": null, 699 | "display": null, 700 | "flex": null, 701 | "flex_flow": null, 702 | "grid_area": null, 703 | "grid_auto_columns": null, 704 | "grid_auto_flow": null, 705 | "grid_auto_rows": null, 706 | "grid_column": null, 707 | "grid_gap": null, 708 | "grid_row": null, 709 | "grid_template_areas": null, 710 | "grid_template_columns": null, 711 | "grid_template_rows": null, 712 | "height": null, 713 | "justify_content": null, 714 | "justify_items": null, 715 | "left": null, 716 | "margin": null, 717 | "max_height": null, 718 | "max_width": null, 719 | "min_height": null, 720 | "min_width": null, 721 | "object_fit": null, 722 | "object_position": null, 723 | "order": null, 724 | "overflow": null, 725 | "overflow_x": null, 726 | "overflow_y": null, 727 | "padding": null, 728 | "right": null, 729 | "top": null, 730 | "visibility": null, 731 | "width": null 732 | } 733 | }, 734 | "82023d1657324e7dbd604a02ed9a8565": { 735 | "model_module": "@jupyter-widgets/controls", 736 | "model_module_version": "1.5.0", 737 | "model_name": "HBoxModel", 738 | "state": { 739 | "_dom_classes": [], 740 | "_model_module": "@jupyter-widgets/controls", 741 | "_model_module_version": "1.5.0", 742 | "_model_name": "HBoxModel", 743 | "_view_count": null, 744 | "_view_module": "@jupyter-widgets/controls", 745 | "_view_module_version": "1.5.0", 746 | "_view_name": "HBoxView", 747 | "box_style": "", 748 | "children": [ 749 | "IPY_MODEL_c2465ae3c2df4af1b80def5cd6e3daa3", 750 | "IPY_MODEL_a00e1e7c8dc74e928a42ec4fcd7c110f", 751 | "IPY_MODEL_49024a3b2fc746d7b89ec2297077badb" 752 | ], 753 | "layout": "IPY_MODEL_0bdfbbbf29c7478298e7006189e40d3c" 754 | } 755 | }, 756 | "87e2d5e8da95478b92a7a725c9b100a6": { 757 | "model_module": "@jupyter-widgets/base", 758 | "model_module_version": "1.2.0", 759 | "model_name": "LayoutModel", 760 | "state": { 761 | "_model_module": "@jupyter-widgets/base", 762 | "_model_module_version": "1.2.0", 763 | "_model_name": "LayoutModel", 764 | "_view_count": null, 765 | "_view_module": "@jupyter-widgets/base", 766 | "_view_module_version": "1.2.0", 767 | "_view_name": "LayoutView", 768 | "align_content": null, 769 | "align_items": null, 770 | "align_self": null, 771 | "border": null, 772 | "bottom": null, 773 | "display": null, 774 | "flex": null, 775 | "flex_flow": null, 776 | "grid_area": null, 777 | "grid_auto_columns": null, 778 | "grid_auto_flow": null, 779 | "grid_auto_rows": null, 780 | "grid_column": null, 781 | "grid_gap": null, 782 | "grid_row": null, 783 | "grid_template_areas": null, 784 | "grid_template_columns": null, 785 | "grid_template_rows": null, 786 | "height": null, 787 | "justify_content": null, 788 | "justify_items": null, 789 | "left": null, 790 | "margin": null, 791 | "max_height": null, 792 | "max_width": null, 793 | "min_height": null, 794 | "min_width": null, 795 | "object_fit": null, 796 | "object_position": null, 797 | "order": null, 798 | "overflow": null, 799 | "overflow_x": null, 800 | "overflow_y": null, 801 | "padding": null, 802 | "right": null, 803 | "top": null, 804 | "visibility": null, 805 | "width": null 806 | } 807 | }, 808 | "87ecefbbf8be4e5fbf1719aed14af9ad": { 809 | "model_module": "@jupyter-widgets/base", 810 | "model_module_version": "1.2.0", 811 | "model_name": "LayoutModel", 812 | "state": { 813 | "_model_module": "@jupyter-widgets/base", 814 | "_model_module_version": "1.2.0", 815 | "_model_name": "LayoutModel", 816 | "_view_count": null, 817 | "_view_module": "@jupyter-widgets/base", 818 | "_view_module_version": "1.2.0", 819 | "_view_name": "LayoutView", 820 | "align_content": null, 821 | "align_items": null, 822 | "align_self": null, 823 | "border": null, 824 | "bottom": null, 825 | "display": null, 826 | "flex": null, 827 | "flex_flow": null, 828 | "grid_area": null, 829 | "grid_auto_columns": null, 830 | "grid_auto_flow": null, 831 | "grid_auto_rows": null, 832 | "grid_column": null, 833 | "grid_gap": null, 834 | "grid_row": null, 835 | "grid_template_areas": null, 836 | "grid_template_columns": null, 837 | "grid_template_rows": null, 838 | "height": null, 839 | "justify_content": null, 840 | "justify_items": null, 841 | "left": null, 842 | "margin": null, 843 | "max_height": null, 844 | "max_width": null, 845 | "min_height": null, 846 | "min_width": null, 847 | "object_fit": null, 848 | "object_position": null, 849 | "order": null, 850 | "overflow": null, 851 | "overflow_x": null, 852 | "overflow_y": null, 853 | "padding": null, 854 | "right": null, 855 | "top": null, 856 | "visibility": null, 857 | "width": null 858 | } 859 | }, 860 | "9283af3600a24f5fbb630ecf529fe045": { 861 | "model_module": "@jupyter-widgets/controls", 862 | "model_module_version": "1.5.0", 863 | "model_name": "FloatProgressModel", 864 | "state": { 865 | "_dom_classes": [], 866 | "_model_module": "@jupyter-widgets/controls", 867 | "_model_module_version": "1.5.0", 868 | "_model_name": "FloatProgressModel", 869 | "_view_count": null, 870 | "_view_module": "@jupyter-widgets/controls", 871 | "_view_module_version": "1.5.0", 872 | "_view_name": "ProgressView", 873 | "bar_style": "success", 874 | "description": "", 875 | "description_tooltip": null, 876 | "layout": "IPY_MODEL_a275e5f970094dfa9408445452b12b3e", 877 | "max": 19, 878 | "min": 0, 879 | "orientation": "horizontal", 880 | "style": "IPY_MODEL_a30e1efe178743108b95fb5bfe32b0f0", 881 | "value": 19 882 | } 883 | }, 884 | "a00e1e7c8dc74e928a42ec4fcd7c110f": { 885 | "model_module": "@jupyter-widgets/controls", 886 | "model_module_version": "1.5.0", 887 | "model_name": "FloatProgressModel", 888 | "state": { 889 | "_dom_classes": [], 890 | "_model_module": "@jupyter-widgets/controls", 891 | "_model_module_version": "1.5.0", 892 | "_model_name": "FloatProgressModel", 893 | "_view_count": null, 894 | "_view_module": "@jupyter-widgets/controls", 895 | "_view_module_version": "1.5.0", 896 | "_view_name": "ProgressView", 897 | "bar_style": "success", 898 | "description": "", 899 | "description_tooltip": null, 900 | "layout": "IPY_MODEL_f8bb1f285c0f4fdda057fb64a3379262", 901 | "max": 274, 902 | "min": 0, 903 | "orientation": "horizontal", 904 | "style": "IPY_MODEL_b1610a3654634afabcaa455e12911c9c", 905 | "value": 274 906 | } 907 | }, 908 | "a0c07ee1d65d4d70b983fb33ae78dd36": { 909 | "model_module": "@jupyter-widgets/base", 910 | "model_module_version": "1.2.0", 911 | "model_name": "LayoutModel", 912 | "state": { 913 | "_model_module": "@jupyter-widgets/base", 914 | "_model_module_version": "1.2.0", 915 | "_model_name": "LayoutModel", 916 | "_view_count": null, 917 | "_view_module": "@jupyter-widgets/base", 918 | "_view_module_version": "1.2.0", 919 | "_view_name": "LayoutView", 920 | "align_content": null, 921 | "align_items": null, 922 | "align_self": null, 923 | "border": null, 924 | "bottom": null, 925 | "display": null, 926 | "flex": null, 927 | "flex_flow": null, 928 | "grid_area": null, 929 | "grid_auto_columns": null, 930 | "grid_auto_flow": null, 931 | "grid_auto_rows": null, 932 | "grid_column": null, 933 | "grid_gap": null, 934 | "grid_row": null, 935 | "grid_template_areas": null, 936 | "grid_template_columns": null, 937 | "grid_template_rows": null, 938 | "height": null, 939 | "justify_content": null, 940 | "justify_items": null, 941 | "left": null, 942 | "margin": null, 943 | "max_height": null, 944 | "max_width": null, 945 | "min_height": null, 946 | "min_width": null, 947 | "object_fit": null, 948 | "object_position": null, 949 | "order": null, 950 | "overflow": null, 951 | "overflow_x": null, 952 | "overflow_y": null, 953 | "padding": null, 954 | "right": null, 955 | "top": null, 956 | "visibility": null, 957 | "width": null 958 | } 959 | }, 960 | "a275e5f970094dfa9408445452b12b3e": { 961 | "model_module": "@jupyter-widgets/base", 962 | "model_module_version": "1.2.0", 963 | "model_name": "LayoutModel", 964 | "state": { 965 | "_model_module": "@jupyter-widgets/base", 966 | "_model_module_version": "1.2.0", 967 | "_model_name": "LayoutModel", 968 | "_view_count": null, 969 | "_view_module": "@jupyter-widgets/base", 970 | "_view_module_version": "1.2.0", 971 | "_view_name": "LayoutView", 972 | "align_content": null, 973 | "align_items": null, 974 | "align_self": null, 975 | "border": null, 976 | "bottom": null, 977 | "display": null, 978 | "flex": null, 979 | "flex_flow": null, 980 | "grid_area": null, 981 | "grid_auto_columns": null, 982 | "grid_auto_flow": null, 983 | "grid_auto_rows": null, 984 | "grid_column": null, 985 | "grid_gap": null, 986 | "grid_row": null, 987 | "grid_template_areas": null, 988 | "grid_template_columns": null, 989 | "grid_template_rows": null, 990 | "height": null, 991 | "justify_content": null, 992 | "justify_items": null, 993 | "left": null, 994 | "margin": null, 995 | "max_height": null, 996 | "max_width": null, 997 | "min_height": null, 998 | "min_width": null, 999 | "object_fit": null, 1000 | "object_position": null, 1001 | "order": null, 1002 | "overflow": null, 1003 | "overflow_x": null, 1004 | "overflow_y": null, 1005 | "padding": null, 1006 | "right": null, 1007 | "top": null, 1008 | "visibility": null, 1009 | "width": null 1010 | } 1011 | }, 1012 | "a30e1efe178743108b95fb5bfe32b0f0": { 1013 | "model_module": "@jupyter-widgets/controls", 1014 | "model_module_version": "1.5.0", 1015 | "model_name": "ProgressStyleModel", 1016 | "state": { 1017 | "_model_module": "@jupyter-widgets/controls", 1018 | "_model_module_version": "1.5.0", 1019 | "_model_name": "ProgressStyleModel", 1020 | "_view_count": null, 1021 | "_view_module": "@jupyter-widgets/base", 1022 | "_view_module_version": "1.2.0", 1023 | "_view_name": "StyleView", 1024 | "bar_color": null, 1025 | "description_width": "" 1026 | } 1027 | }, 1028 | "a54b7839a9454671a9f3d275fb338e6c": { 1029 | "model_module": "@jupyter-widgets/controls", 1030 | "model_module_version": "1.5.0", 1031 | "model_name": "DescriptionStyleModel", 1032 | "state": { 1033 | "_model_module": "@jupyter-widgets/controls", 1034 | "_model_module_version": "1.5.0", 1035 | "_model_name": "DescriptionStyleModel", 1036 | "_view_count": null, 1037 | "_view_module": "@jupyter-widgets/base", 1038 | "_view_module_version": "1.2.0", 1039 | "_view_name": "StyleView", 1040 | "description_width": "" 1041 | } 1042 | }, 1043 | "b1610a3654634afabcaa455e12911c9c": { 1044 | "model_module": "@jupyter-widgets/controls", 1045 | "model_module_version": "1.5.0", 1046 | "model_name": "ProgressStyleModel", 1047 | "state": { 1048 | "_model_module": "@jupyter-widgets/controls", 1049 | "_model_module_version": "1.5.0", 1050 | "_model_name": "ProgressStyleModel", 1051 | "_view_count": null, 1052 | "_view_module": "@jupyter-widgets/base", 1053 | "_view_module_version": "1.2.0", 1054 | "_view_name": "StyleView", 1055 | "bar_color": null, 1056 | "description_width": "" 1057 | } 1058 | }, 1059 | "c2465ae3c2df4af1b80def5cd6e3daa3": { 1060 | "model_module": "@jupyter-widgets/controls", 1061 | "model_module_version": "1.5.0", 1062 | "model_name": "HTMLModel", 1063 | "state": { 1064 | "_dom_classes": [], 1065 | "_model_module": "@jupyter-widgets/controls", 1066 | "_model_module_version": "1.5.0", 1067 | "_model_name": "HTMLModel", 1068 | "_view_count": null, 1069 | "_view_module": "@jupyter-widgets/controls", 1070 | "_view_module_version": "1.5.0", 1071 | "_view_name": "HTMLView", 1072 | "description": "", 1073 | "description_tooltip": null, 1074 | "layout": "IPY_MODEL_a0c07ee1d65d4d70b983fb33ae78dd36", 1075 | "placeholder": "​", 1076 | "style": "IPY_MODEL_56145d6b220e4228ba37fe38b1d1a08a", 1077 | "value": "Map: 100%" 1078 | } 1079 | }, 1080 | "ce646a2be8d84eefb8910a2b62d0e831": { 1081 | "model_module": "@jupyter-widgets/controls", 1082 | "model_module_version": "1.5.0", 1083 | "model_name": "HTMLModel", 1084 | "state": { 1085 | "_dom_classes": [], 1086 | "_model_module": "@jupyter-widgets/controls", 1087 | "_model_module_version": "1.5.0", 1088 | "_model_name": "HTMLModel", 1089 | "_view_count": null, 1090 | "_view_module": "@jupyter-widgets/controls", 1091 | "_view_module_version": "1.5.0", 1092 | "_view_name": "HTMLView", 1093 | "description": "", 1094 | "description_tooltip": null, 1095 | "layout": "IPY_MODEL_87ecefbbf8be4e5fbf1719aed14af9ad", 1096 | "placeholder": "​", 1097 | "style": "IPY_MODEL_a54b7839a9454671a9f3d275fb338e6c", 1098 | "value": "Loading checkpoint shards: 100%" 1099 | } 1100 | }, 1101 | "e7c559612c0f4ffcb7467e01e4d33b2d": { 1102 | "model_module": "@jupyter-widgets/controls", 1103 | "model_module_version": "1.5.0", 1104 | "model_name": "DescriptionStyleModel", 1105 | "state": { 1106 | "_model_module": "@jupyter-widgets/controls", 1107 | "_model_module_version": "1.5.0", 1108 | "_model_name": "DescriptionStyleModel", 1109 | "_view_count": null, 1110 | "_view_module": "@jupyter-widgets/base", 1111 | "_view_module_version": "1.2.0", 1112 | "_view_name": "StyleView", 1113 | "description_width": "" 1114 | } 1115 | }, 1116 | "f8bb1f285c0f4fdda057fb64a3379262": { 1117 | "model_module": "@jupyter-widgets/base", 1118 | "model_module_version": "1.2.0", 1119 | "model_name": "LayoutModel", 1120 | "state": { 1121 | "_model_module": "@jupyter-widgets/base", 1122 | "_model_module_version": "1.2.0", 1123 | "_model_name": "LayoutModel", 1124 | "_view_count": null, 1125 | "_view_module": "@jupyter-widgets/base", 1126 | "_view_module_version": "1.2.0", 1127 | "_view_name": "LayoutView", 1128 | "align_content": null, 1129 | "align_items": null, 1130 | "align_self": null, 1131 | "border": null, 1132 | "bottom": null, 1133 | "display": null, 1134 | "flex": null, 1135 | "flex_flow": null, 1136 | "grid_area": null, 1137 | "grid_auto_columns": null, 1138 | "grid_auto_flow": null, 1139 | "grid_auto_rows": null, 1140 | "grid_column": null, 1141 | "grid_gap": null, 1142 | "grid_row": null, 1143 | "grid_template_areas": null, 1144 | "grid_template_columns": null, 1145 | "grid_template_rows": null, 1146 | "height": null, 1147 | "justify_content": null, 1148 | "justify_items": null, 1149 | "left": null, 1150 | "margin": null, 1151 | "max_height": null, 1152 | "max_width": null, 1153 | "min_height": null, 1154 | "min_width": null, 1155 | "object_fit": null, 1156 | "object_position": null, 1157 | "order": null, 1158 | "overflow": null, 1159 | "overflow_x": null, 1160 | "overflow_y": null, 1161 | "padding": null, 1162 | "right": null, 1163 | "top": null, 1164 | "visibility": null, 1165 | "width": null 1166 | } 1167 | } 1168 | } 1169 | } 1170 | }, 1171 | "nbformat": 4, 1172 | "nbformat_minor": 0 1173 | } 1174 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Prakhar Saxena 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About LLMs Repository 2 | 3 | ## Overview 4 | Easy tutorials on LLMs. This repository is dedicated to sharing insights and knowledge about LLMs, including less talked about topics like tokenizers, data collators etc. 5 | 6 | ## Features 7 | - Insights on various aspects of LLMs. 8 | - Regular updates with new content. 9 | 10 | Stay tuned for more! 11 | 12 | ## License 13 | This project is licensed under the MIT License - see the [LICENSE](LICENSE.txt) file for details. 14 | -------------------------------------------------------------------------------- /inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\" #use this accordign to your GPU setup\n", 11 | "import os\n", 12 | "import torch\n", 13 | "import transformers\n", 14 | "from transformers import AutoTokenizer, AutoModelForCausalLM\n", 15 | "from peft import LoraConfig, get_peft_model, PeftModel\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "seed = 42\n", 25 | "torch.manual_seed(seed)\n", 26 | "torch.cuda.manual_seed_all(seed)\n", 27 | "\n", 28 | "# Use teh base model whihc you trained below, can be llama, mixtral etc\n", 29 | "model_name = \"mistralai/Mistral-7B-Instruct-v0.2\"\n", 30 | "\n", 31 | "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", 32 | "model = AutoModelForCausalLM.from_pretrained(\n", 33 | " model_name,\n", 34 | " load_in_8bit=True,\n", 35 | " device_map=\"auto\",\n", 36 | " torch_dtype=torch.float16\n", 37 | ")\n", 38 | "print(model)\n", 39 | "\n", 40 | " # path to the checkpoint folder, check your path as the checkpoint number might be different\n", 41 | "lora = \"./mistral-lora-instruct-shapeskeare/checkpoint-32/\"\n", 42 | "\n", 43 | "model = PeftModel.from_pretrained(model, lora)\n", 44 | "\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "def generate_prompt(user_query): #The prompt format is taken from the official Mistral huggingface page, yformat for each model might differ\n", 54 | " return \"[INST]\" + user_query + \"[/INST]\" \n", 55 | "\n", 56 | "def evaluate(question):\n", 57 | "\n", 58 | " prompt= generate_prompt(question)\n", 59 | " inputs = tokenizer(prompt, add_special_tokens=False, return_tensors=\"pt\")\n", 60 | " input_ids = inputs[\"input_ids\"].cuda()\n", 61 | "\n", 62 | " with torch.no_grad():\n", 63 | " gen_tokens = model.generate(\n", 64 | " input_ids=input_ids,\n", 65 | " attention_mask=inputs[\"attention_mask\"].cuda(),\n", 66 | " max_new_tokens=512,\n", 67 | " do_sample=True,\n", 68 | " temperature=0.8,\n", 69 | " top_p=0.95,\n", 70 | " eos_token_id=2,\n", 71 | " )\n", 72 | "\n", 73 | " out=tokenizer.decode(gen_tokens[0],\n", 74 | " skip_special_tokens=False)\n", 75 | "\n", 76 | " return out\n", 77 | "\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "questions_eval = [\n", 87 | " \"What is your favorite color?\",\n", 88 | " \"How many continents are there in the world?\",\n", 89 | " \"What year was the first iPhone released?\",\n", 90 | " \"What is the capital of France?\",\n", 91 | " \"Who wrote 'To Kill a Mockingbird'?\",\n", 92 | " \"What gas do plants breathe in that humans and animals breathe out?\",\n", 93 | " \"How many planets are in our solar system?\",\n", 94 | " \"What is the boiling point of water?\",\n", 95 | " \"What is the largest ocean on Earth?\",\n", 96 | " \"Who is the current president of the United States?\"\n", 97 | "]\n", 98 | "\n", 99 | "\n", 100 | "for question in questions_eval:\n", 101 | " a=evaluate(question)\n", 102 | " print(a)\n", 103 | " print()\n", 104 | " print(\"-\"*50)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "repe", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.10.14" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | --------------------------------------------------------------------------------