├── LICENSE
├── README.md
├── instruction_finetuning
    ├── dataset_generation
    │   └── Ad_Copy_Dataset.ipynb
    └── training
    │   ├── __init__.py
    │   ├── requirements.txt
    │   ├── run_peft.sh
    │   ├── train.py
    │   └── utils.py
├── multimodal_instruction_finetuning
    └── IDEFICS_Finetuning_demo.ipynb
└── personal_copilot
    ├── dataset_generation
        ├── README.md
        ├── clone_hf_repos.py
        ├── prepare_dataset.py
        └── requirements.txt
    └── training
        ├── fim.py
        ├── llama_flash_attn_monkey_patch.py
        ├── requirements.txt
        ├── run_peft.sh
        └── train.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # peft-pytorch-conference
2 | Code for the examples presented in the talk "Training a Llama in your backyard: fine-tuning very large models on consumer hardware" given at PyTorch Conference 2023
3 | 


--------------------------------------------------------------------------------
/instruction_finetuning/dataset_generation/Ad_Copy_Dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "id": "7a78203d",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from transformers import AutoTokenizer\n",
 11 |     "\n",
 12 |     "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\", use_auth_token=True)\n"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 6,
 18 |    "id": "84a5c544",
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "print(tokenizer.default_chat_template)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 19,
 36 |    "id": "f785dd97",
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "DatasetDict({\n",
 44 |       "    train: Dataset({\n",
 45 |       "        features: ['content'],\n",
 46 |       "        num_rows: 1000\n",
 47 |       "    })\n",
 48 |       "    test: Dataset({\n",
 49 |       "        features: ['content'],\n",
 50 |       "        num_rows: 141\n",
 51 |       "    })\n",
 52 |       "})\n",
 53 |       "{'content': '<s>[INST] <<SYS>>\\nCreate a text ad given the following product and description.\\n<</SYS>>\\n\\nProduct: Fitness Magazine\\nDescription: Fitness magazine for staying active and achieving your fitness goals. [/INST] Ad: Stay active with a Fitness Magazine! 💪📖 Experience fitness tips and motivating stories. Perfect for fitness enthusiasts and reaching your health and wellness goals. Limited stock - achieve fitness with a touch of motivation! 🌟🌟🏋️\\u200d♀️ </s>'}\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "from datasets import load_dataset\n",
 59 |     "\n",
 60 |     "system_prompt = \"\"\"Create a text ad given the following product and description.\"\"\"\n",
 61 |     "\n",
 62 |     "def preprocess(samples):\n",
 63 |     "    batch = []\n",
 64 |     "    for product, desc, ad_copy in zip(samples[\"product\"],samples[\"description\"],samples[\"ad\"]):\n",
 65 |     "        conversation = [\n",
 66 |     "            {\"role\": \"system\", \"content\": system_prompt},\n",
 67 |     "            {\"role\": \"user\", \"content\": f\"\"\"Product: {product}\\nDescription: {desc}\\n\"\"\"},\n",
 68 |     "            {\"role\": \"assistant\", \"content\": f\"\"\"Ad: {ad_copy}\\n\"\"\"},\n",
 69 |     "        ]\n",
 70 |     "        batch.append(tokenizer.apply_chat_template(conversation, tokenize=False))\n",
 71 |     "    return {\"content\": batch}\n",
 72 |     "            \n",
 73 |     "    \n",
 74 |     "\n",
 75 |     "\n",
 76 |     "dataset = load_dataset(\"jaykin01/advertisement-copy\")\n",
 77 |     "dataset\n",
 78 |     "dataset = dataset.map(\n",
 79 |     "    preprocess,\n",
 80 |     "    batched=True,\n",
 81 |     "    remove_columns=dataset[\"train\"].column_names\n",
 82 |     ")\n",
 83 |     "\n",
 84 |     "dataset[\"train\"] = dataset[\"train\"].shuffle(100)\n",
 85 |     "dataset_subsets = dataset[\"train\"].train_test_split(141)\n",
 86 |     "dataset[\"train\"] = dataset_subsets[\"train\"]\n",
 87 |     "dataset[\"test\"] = dataset_subsets[\"test\"]\n",
 88 |     "print(dataset)\n",
 89 |     "print(dataset[\"train\"][0])"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 20,
 95 |    "id": "8cbcdfa1",
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "data": {
100 |       "application/vnd.jupyter.widget-view+json": {
101 |        "model_id": "a7dfe18f4a4d418da38237e6e98d1d91",
102 |        "version_major": 2,
103 |        "version_minor": 0
104 |       },
105 |       "text/plain": [
106 |        "Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]"
107 |       ]
108 |      },
109 |      "metadata": {},
110 |      "output_type": "display_data"
111 |     },
112 |     {
113 |      "data": {
114 |       "application/vnd.jupyter.widget-view+json": {
115 |        "model_id": "f930a96c5f154bed9fa57313abc98f3c",
116 |        "version_major": 2,
117 |        "version_minor": 0
118 |       },
119 |       "text/plain": [
120 |        "Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
121 |       ]
122 |      },
123 |      "metadata": {},
124 |      "output_type": "display_data"
125 |     },
126 |     {
127 |      "data": {
128 |       "application/vnd.jupyter.widget-view+json": {
129 |        "model_id": "096881bbab2442699979197f2357fc0b",
130 |        "version_major": 2,
131 |        "version_minor": 0
132 |       },
133 |       "text/plain": [
134 |        "Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]"
135 |       ]
136 |      },
137 |      "metadata": {},
138 |      "output_type": "display_data"
139 |     },
140 |     {
141 |      "data": {
142 |       "application/vnd.jupyter.widget-view+json": {
143 |        "model_id": "040a89d92d9d4b0ab7817599579a1c57",
144 |        "version_major": 2,
145 |        "version_minor": 0
146 |       },
147 |       "text/plain": [
148 |        "Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
149 |       ]
150 |      },
151 |      "metadata": {},
152 |      "output_type": "display_data"
153 |     }
154 |    ],
155 |    "source": [
156 |     "dataset.push_to_hub(\"ad-copy-generation\", private=False)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "id": "3eeb6577",
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": []
166 |   }
167 |  ],
168 |  "metadata": {
169 |   "kernelspec": {
170 |    "display_name": "Python 3 (ipykernel)",
171 |    "language": "python",
172 |    "name": "python3"
173 |   },
174 |   "language_info": {
175 |    "codemirror_mode": {
176 |     "name": "ipython",
177 |     "version": 3
178 |    },
179 |    "file_extension": ".py",
180 |    "mimetype": "text/x-python",
181 |    "name": "python",
182 |    "nbconvert_exporter": "python",
183 |    "pygments_lexer": "ipython3",
184 |    "version": "3.10.11"
185 |   }
186 |  },
187 |  "nbformat": 4,
188 |  "nbformat_minor": 5
189 | }
190 | 


--------------------------------------------------------------------------------
/instruction_finetuning/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/peft-pytorch-conference/57116ac4e78ae5623436cc72f478151195c195aa/instruction_finetuning/training/__init__.py


--------------------------------------------------------------------------------
/instruction_finetuning/training/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/huggingface/transformers
 2 | git+https://github.com/huggingface/accelerate
 3 | git+https://github.com/huggingface/peft
 4 | trl
 5 | huggingface-hub
 6 | bitsandbytes
 7 | evaluate
 8 | datasets
 9 | einops
10 | wandb
11 | tiktoken


--------------------------------------------------------------------------------
/instruction_finetuning/training/run_peft.sh:
--------------------------------------------------------------------------------
 1 | python train.py \
 2 | --model_name "meta-llama/Llama-2-7b-chat-hf" \
 3 | --dataset_name "smangrul/ad-copy-generation" \
 4 | --max_seq_len 512 \
 5 | --max_steps 100 \
 6 | --logging_steps 5 \
 7 | --eval_steps 25 \
 8 | --save_steps 25 \
 9 | --push_to_hub \
10 | --fp16 True \
11 | --packing True \
12 | --output_dir "llama-ad-gen" \
13 | --per_device_train_batch_size 8 \
14 | --gradient_accumulation_steps 1 \
15 | --dataset_text_field "content" \
16 | --use_peft_lora True \
17 | --lora_r 8 \
18 | --lora_alpha 32 \
19 | --lora_target_modules "q_proj,k_proj,v_proj,o_proj,down_proj,up_proj,gate_proj" \
20 | --use_4bit_qunatization True \
21 | --use_nested_quant True \
22 | --bnb_4bit_compute_dtype "float16" \
23 | --use_flash_attn False \
24 | --use_gradient_checkpointing


--------------------------------------------------------------------------------
/instruction_finetuning/training/train.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | from dataclasses import dataclass, field
 16 | import os
 17 | import subprocess
 18 | from typing import Optional
 19 | 
 20 | from transformers import HfArgumentParser, TrainingArguments, Trainer
 21 | from utils import *
 22 | 
 23 | ########################################################################
 24 | # This is a fully working simple example to use trl's RewardTrainer.
 25 | #
 26 | # This example fine-tunes any causal language model (GPT-2, GPT-Neo, etc.)
 27 | # by using the RewardTrainer from trl, we will leverage PEFT library to finetune
 28 | # adapters on the model.
 29 | #
 30 | ########################################################################
 31 | 
 32 | 
 33 | # Define and parse arguments.
 34 | @dataclass
 35 | class ScriptArguments:
 36 |     """
 37 |     These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train.
 38 |     """
 39 | 
 40 |     local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
 41 | 
 42 |     per_device_train_batch_size: Optional[int] = field(default=4)
 43 |     per_device_eval_batch_size: Optional[int] = field(default=1)
 44 |     gradient_accumulation_steps: Optional[int] = field(default=4)
 45 |     learning_rate: Optional[float] = field(default=2e-4)
 46 |     max_grad_norm: Optional[float] = field(default=0.3)
 47 |     weight_decay: Optional[float] = field(default=0.001)
 48 |     lora_alpha: Optional[int] = field(default=16)
 49 |     lora_dropout: Optional[float] = field(default=0.1)
 50 |     lora_r: Optional[int] = field(default=64)
 51 |     lora_target_modules: Optional[str] = field(
 52 |         default="q_proj,k_proj,v_proj,o_proj,down_proj,up_proj,gate_proj",
 53 |         metadata={"help": "comma separated list of target modules to apply LoRA layers to"},
 54 |     )
 55 |     max_seq_length: Optional[int] = field(default=512)
 56 |     model_name: Optional[str] = field(
 57 |         default="meta-llama/Llama-2-7b-chat-hf",
 58 |         metadata={
 59 |             "help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc."
 60 |         },
 61 |     )
 62 |     dataset_name: Optional[str] = field(
 63 |         default="timdettmers/openassistant-guanaco",
 64 |         metadata={"help": "The preference dataset to use."},
 65 |     )
 66 |     use_4bit: Optional[bool] = field(
 67 |         default=True,
 68 |         metadata={"help": "Activate 4bit precision base model loading"},
 69 |     )
 70 |     use_nested_quant: Optional[bool] = field(
 71 |         default=False,
 72 |         metadata={"help": "Activate nested quantization for 4bit base models"},
 73 |     )
 74 |     bnb_4bit_compute_dtype: Optional[str] = field(
 75 |         default="float16",
 76 |         metadata={"help": "Compute dtype for 4bit base models"},
 77 |     )
 78 |     bnb_4bit_quant_type: Optional[str] = field(
 79 |         default="nf4",
 80 |         metadata={"help": "Quantization type fp4 or nf4"},
 81 |     )
 82 |     num_train_epochs: Optional[int] = field(
 83 |         default=1,
 84 |         metadata={"help": "The number of training epochs for the reward model."},
 85 |     )
 86 |     fp16: Optional[bool] = field(
 87 |         default=False,
 88 |         metadata={"help": "Enables fp16 training."},
 89 |     )
 90 |     bf16: Optional[bool] = field(
 91 |         default=False,
 92 |         metadata={"help": "Enables bf16 training."},
 93 |     )
 94 |     packing: Optional[bool] = field(
 95 |         default=False,
 96 |         metadata={"help": "Use packing dataset creating."},
 97 |     )
 98 |     gradient_checkpointing: Optional[bool] = field(
 99 |         default=True,
100 |         metadata={"help": "Enables gradient checkpointing."},
101 |     )
102 |     optim: Optional[str] = field(
103 |         default="paged_adamw_32bit",
104 |         metadata={"help": "The optimizer to use."},
105 |     )
106 |     lr_scheduler_type: str = field(
107 |         default="constant",
108 |         metadata={"help": "Learning rate schedule. Constant a bit better than cosine, and has advantage for analysis"},
109 |     )
110 |     max_steps: int = field(default=10000, metadata={"help": "How many optimizer update steps to take"})
111 |     warmup_ratio: float = field(default=0.03, metadata={"help": "Fraction of steps to do a warmup for"})
112 |     save_steps: int = field(default=10, metadata={"help": "Save checkpoint every X updates steps."})
113 |     eval_steps: int = field(default=10, metadata={"help": "Eval model every X steps."})
114 |     logging_steps: int = field(default=10, metadata={"help": "Log every X updates steps."})
115 |     output_dir: str = field(default="results", metadata={"help": "Where to store the final model."})
116 |     use_flash_attn: Optional[bool] = field(
117 |         default=False,
118 |         metadata={"help": "Enables Flash attention for training."},
119 |     )
120 |     use_peft_lora: Optional[bool] = field(
121 |         default=False,
122 |         metadata={"help": "Enables PEFT LoRA for training."},
123 |     )
124 |     use_8bit_qunatization: Optional[bool] = field(
125 |         default=False,
126 |         metadata={"help": "Enables loading model in 8bit."},
127 |     )
128 |     use_4bit_qunatization: Optional[bool] = field(
129 |         default=False,
130 |         metadata={"help": "Enables loading model in 4bit."},
131 |     )
132 |     use_gradient_checkpointing: Optional[bool] = field(
133 |         default=False,
134 |         metadata={"help": "Enables Gradient Checkpointing."},
135 |     )
136 |     dataset_text_field: str = field(default="text", metadata={"help": "Dataset field to use as input text."})
137 |     push_to_hub: Optional[bool] = field(
138 |         default=False,
139 |         metadata={"help": "If True, pushes the model to the HF Hub"},
140 |     )
141 |     num_workers: int = field(default=4, metadata={"help": "Number of dataset workers to use."})
142 |     debug: Optional[bool] = field(
143 |         default=False,
144 |         metadata={"help": "If True, tests things like proper saving/loading/logging of model"},
145 |     )
146 | 
147 | 
148 | def main(args):
149 |     # training arguments
150 |     training_arguments = TrainingArguments(
151 |         output_dir=args.output_dir,
152 |         per_device_train_batch_size=args.per_device_train_batch_size,
153 |         gradient_accumulation_steps=args.gradient_accumulation_steps,
154 |         optim=args.optim,
155 |         learning_rate=args.learning_rate,
156 |         fp16=args.fp16,
157 |         bf16=args.bf16,
158 |         max_grad_norm=args.max_grad_norm,
159 |         warmup_ratio=args.warmup_ratio,
160 |         lr_scheduler_type=args.lr_scheduler_type,
161 |         num_train_epochs=args.num_train_epochs,
162 |         evaluation_strategy="steps",
163 |         save_strategy="steps",
164 |         max_steps=args.max_steps,
165 |         eval_steps=args.eval_steps,
166 |         save_steps=args.save_steps,
167 |         logging_steps=args.logging_steps,
168 |         push_to_hub=args.push_to_hub,
169 |         gradient_checkpointing=args.use_gradient_checkpointing,
170 |     )
171 | 
172 |     # model
173 |     model, peft_config, tokenizer = create_and_prepare_model(args)
174 |     model.config.use_cache = False
175 | 
176 |     # datasets
177 |     train_dataset, eval_dataset = create_datasets(tokenizer, args)
178 | 
179 |     # trainer
180 |     trainer = Trainer(model=model, args=training_arguments, train_dataset=train_dataset, eval_dataset=eval_dataset)
181 |     trainer.accelerator.print(f"{trainer.model}")
182 |     if args.use_peft_lora:
183 |         trainer.model.print_trainable_parameters()
184 | 
185 |     if args.use_peft_lora:
186 |         peft_module_casting_to_bf16(trainer.model, args)
187 | 
188 |     # train
189 |     trainer.train()
190 | 
191 |     if args.push_to_hub:
192 |         trainer.push_to_hub()
193 |         if args.use_peft_lora:
194 |             trainer.model.push_to_hub(args.output_dir)
195 |     else:
196 |         trainer.save_model(args.output_dir)
197 | 
198 | 
199 | if __name__ == "__main__":
200 |     parser = HfArgumentParser(ScriptArguments)
201 |     args = parser.parse_args_into_dataclasses()[0]
202 |     main(args)
203 | 


--------------------------------------------------------------------------------
/instruction_finetuning/training/utils.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl
  4 | from torch.utils.data import IterableDataset
  5 | from datasets import load_dataset
  6 | from tqdm import tqdm
  7 | import warnings
  8 | from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
  9 | from peft.tuners.lora import LoraLayer
 10 | from transformers import (
 11 |     AutoModelForCausalLM,
 12 |     AutoTokenizer,
 13 |     BitsAndBytesConfig,
 14 |     AutoTokenizer,
 15 |     TrainingArguments,
 16 | )
 17 | 
 18 | 
 19 | class SaveDeepSpeedPeftModelCallback(TrainerCallback):
 20 |     def __init__(self, trainer, save_steps=500):
 21 |         self.trainer = trainer
 22 |         self.save_steps = save_steps
 23 | 
 24 |     def on_step_end(
 25 |         self,
 26 |         args: TrainingArguments,
 27 |         state: TrainerState,
 28 |         control: TrainerControl,
 29 |         **kwargs,
 30 |     ):
 31 |         if (state.global_step + 1) % self.save_steps == 0:
 32 |             self.trainer.accelerator.wait_for_everyone()
 33 |             state_dict = self.trainer.accelerator.get_state_dict(self.trainer.deepspeed)
 34 |             unwrapped_model = self.trainer.accelerator.unwrap_model(self.trainer.deepspeed)
 35 |             if self.trainer.accelerator.is_main_process:
 36 |                 unwrapped_model.save_pretrained(args.output_dir, state_dict=state_dict)
 37 |             self.trainer.accelerator.wait_for_everyone()
 38 |         return control
 39 | 
 40 | 
 41 | class ConstantLengthDataset(IterableDataset):
 42 |     """
 43 |     Iterable dataset that returns constant length chunks of tokens from stream of text files.
 44 |         Args:
 45 |             tokenizer (Tokenizer): The processor used for proccessing the data.
 46 |             dataset (dataset.Dataset): Dataset with text files.
 47 |             infinite (bool): If True the iterator is reset after dataset reaches end else stops.
 48 |             seq_length (int): Length of token sequences to return.
 49 |             num_of_sequences (int): Number of token sequences to keep in buffer.
 50 |             chars_per_token (int): Number of characters per token used to estimate number of tokens in text buffer.
 51 |             shuffle (bool): If true, the samples in each buffer are suffled. Default is `True`.
 52 |             add_eos_token (bool): If true, each buffer is delimited with eos token. Default is `True`.
 53 |     """
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         tokenizer,
 58 |         dataset,
 59 |         infinite=False,
 60 |         seq_length=1024,
 61 |         num_of_sequences=1024,
 62 |         chars_per_token=3.6,
 63 |         content_field="content",
 64 |         shuffle=True,
 65 |         add_eos_token=True,
 66 |     ):
 67 |         self.tokenizer = tokenizer
 68 |         self.concat_token_id = tokenizer.eos_token_id
 69 |         self.dataset = dataset
 70 |         self.seq_length = seq_length
 71 |         self.infinite = infinite
 72 |         self.current_size = 0
 73 |         self.max_buffer_size = seq_length * chars_per_token * num_of_sequences
 74 |         self.content_field = content_field
 75 |         self.shuffle = shuffle
 76 |         self.add_eos_token = add_eos_token
 77 | 
 78 |     def __iter__(self):
 79 |         iterator = iter(self.dataset)
 80 |         more_examples = True
 81 |         while more_examples:
 82 |             buffer, buffer_len = [], 0
 83 |             while True:
 84 |                 if buffer_len >= self.max_buffer_size:
 85 |                     break
 86 |                 try:
 87 |                     buffer.append(next(iterator)[self.content_field])
 88 |                     buffer_len += len(buffer[-1])
 89 |                 except StopIteration:
 90 |                     if self.infinite:
 91 |                         iterator = iter(self.dataset)
 92 |                     else:
 93 |                         more_examples = False
 94 |                         break
 95 |             tokenized_inputs = self.tokenizer(buffer, truncation=False, add_special_tokens=False)["input_ids"]
 96 |             all_token_ids = []
 97 |             for tokenized_input in tokenized_inputs:
 98 |                 if self.add_eos_token:
 99 |                     tokenized_input = tokenized_input + [self.concat_token_id]
100 |                 all_token_ids.extend(tokenized_input)
101 |             examples = []
102 |             for i in range(0, len(all_token_ids), self.seq_length):
103 |                 input_ids = all_token_ids[i : i + self.seq_length]
104 |                 if len(input_ids) == self.seq_length:
105 |                     examples.append(input_ids)
106 |             if self.shuffle:
107 |                 random.shuffle(examples)
108 |             for example in examples:
109 |                 self.current_size += 1
110 |                 yield {
111 |                     "input_ids": torch.LongTensor(example),
112 |                     "labels": torch.LongTensor(example),
113 |                 }
114 | 
115 | 
116 | def chars_token_ratio(dataset, tokenizer, data_column, nb_examples=400):
117 |     """
118 |     Estimate the average number of characters per token in the dataset.
119 |     """
120 |     total_characters, total_tokens = 0, 0
121 |     for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):
122 |         total_characters += len(example[data_column])
123 |         total_tokens += len(tokenizer(example[data_column]).tokens())
124 | 
125 |     return total_characters / total_tokens
126 | 
127 | 
128 | def create_datasets(tokenizer, args):
129 |     dataset = load_dataset(args.dataset_name, use_auth_token=True, num_proc=args.num_workers)
130 |     train_data = dataset["train"]
131 |     valid_data = dataset["test"]
132 |     print(f"Size of the train set: {len(train_data)}. Size of the validation set: {len(valid_data)}")
133 |     chars_per_token = chars_token_ratio(train_data, tokenizer, args.dataset_text_field)
134 |     print(f"The character to token ratio of the dataset is: {chars_per_token:.2f}")
135 |     train_dataset = ConstantLengthDataset(
136 |         tokenizer,
137 |         train_data,
138 |         infinite=True,
139 |         seq_length=args.max_seq_length,
140 |         chars_per_token=chars_per_token,
141 |         content_field=args.dataset_text_field,
142 |         shuffle=True,
143 |         add_eos_token=False,
144 |     )
145 |     valid_dataset = ConstantLengthDataset(
146 |         tokenizer,
147 |         valid_data,
148 |         infinite=False,
149 |         seq_length=args.max_seq_length,
150 |         chars_per_token=chars_per_token,
151 |         content_field=args.dataset_text_field,
152 |         shuffle=False,
153 |         add_eos_token=False,
154 |     )
155 | 
156 |     return train_dataset, valid_dataset
157 | 
158 | 
159 | def create_and_prepare_model(args):
160 |     device_map = None
161 |     bnb_config = None
162 |     load_in_8bit = args.use_8bit_qunatization
163 | 
164 |     if args.use_4bit_qunatization:
165 |         compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)
166 | 
167 |         bnb_config = BitsAndBytesConfig(
168 |             load_in_4bit=args.use_4bit_qunatization,
169 |             bnb_4bit_quant_type=args.bnb_4bit_quant_type,
170 |             bnb_4bit_compute_dtype=compute_dtype,
171 |             bnb_4bit_use_double_quant=args.use_nested_quant,
172 |         )
173 | 
174 |         if compute_dtype == torch.float16 and args.use_4bit_qunatization:
175 |             major, _ = torch.cuda.get_device_capability()
176 |             if major >= 8:
177 |                 print("=" * 80)
178 |                 print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
179 |                 print("=" * 80)
180 | 
181 |     if args.use_4bit_qunatization or args.use_8bit_qunatization:
182 |         device_map = "auto"  # {"": 0}
183 | 
184 |     model = AutoModelForCausalLM.from_pretrained(
185 |         args.model_name,
186 |         torch_dtype=compute_dtype,
187 |         load_in_8bit=load_in_8bit,
188 |         quantization_config=bnb_config,
189 |         device_map=device_map,
190 |         use_cache=not args.use_gradient_checkpointing,
191 |         trust_remote_code=True,
192 |         use_flash_attention_2=args.use_flash_attn,
193 |     )
194 | 
195 |     peft_config = None
196 |     if args.use_peft_lora:
197 |         peft_config = LoraConfig(
198 |             lora_alpha=args.lora_alpha,
199 |             lora_dropout=args.lora_dropout,
200 |             r=args.lora_r,
201 |             bias="none",
202 |             task_type="CAUSAL_LM",
203 |             target_modules=args.lora_target_modules.split(","),
204 |         )
205 | 
206 |         if (args.use_4bit_qunatization or args.use_8bit_qunatization) and args.use_peft_lora:
207 |             model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=args.use_gradient_checkpointing)
208 | 
209 |         if args.use_gradient_checkpointing:
210 |             model.gradient_checkpointing_enable()
211 | 
212 |         model = get_peft_model(model, peft_config)
213 |         model.print_trainable_parameters()
214 | 
215 |     tokenizer = AutoTokenizer.from_pretrained(args.model_name, trust_remote_code=True)
216 |     tokenizer.pad_token = tokenizer.eos_token
217 | 
218 |     return model, peft_config, tokenizer
219 | 
220 | 
221 | def peft_module_casting_to_bf16(model, args):
222 |     for name, module in model.named_modules():
223 |         if isinstance(module, LoraLayer):
224 |             if args.bf16:
225 |                 module = module.to(torch.bfloat16)
226 |         if "norm" in name:
227 |             module = module.to(torch.float32)
228 |         if any(x in name for x in ["lm_head", "embed_tokens", "wte", "wpe"]):
229 |             if hasattr(module, "weight"):
230 |                 if args.bf16 and module.weight.dtype == torch.float32:
231 |                     module = module.to(torch.bfloat16)
232 | 


--------------------------------------------------------------------------------
/multimodal_instruction_finetuning/IDEFICS_Finetuning_demo.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {
   6 |         "id": "UNCNPVi8iAgw"
   7 |       },
   8 |       "source": [
   9 |         "# IDEFICS: A Flamingo-based model, trained at scale for the community\n",
  10 |         "# Finetuning Demo Notebook:\n",
  11 |         "\n",
  12 |         "<div style=\"text-align: center;\">\n",
  13 |         "</div>\n",
  14 |         "<div style=\"display: flex; justify-content: center;\">\n",
  15 |         "    <img src=\"https://huggingface.co/HuggingFaceM4/idefics-80b/resolve/main/assets/Idefics_colab.png\" alt=\"Idefics image\" >\n",
  16 |         "</div>\n",
  17 |         "\n",
  18 |         "Credit: [Flamingo blog](https://www.deepmind.com/blog/tackling-multiple-tasks-with-a-single-visual-language-model)\n",
  19 |         "\n",
  20 |         "This google colab notebook shows how to run predictions with the 4-bit quantized 🤗 [Idefics-9B model](https://huggingface.co/HuggingFaceM4/idefics-9b) and finetune it on a specific dataset.\n",
  21 |         "\n",
  22 |         "[IDEFICS](https://huggingface.co/HuggingFaceM4/idefics-80b) is a multi-modal model based on the [Flamingo](https://arxiv.org/abs/2204.14198) architecture. It can take images and texts as input and return text outputs but it does not support image generation. \\\\\n",
  23 |         "IDEFICS is built on top of two unimodal open-access pre-trained models to connect the two modalities. Newly initialized parameters in the form of Transformer blocks bridge the gap between the vision encoder and the language model. The model is trained on a mixture of image/text pairs and unstrucutred multimodal web documents. \\\\\n",
  24 |         "The [finetuned versions](https://huggingface.co/HuggingFaceM4/idefics-80b-instruct) of IDEFICS behave like LLM chatbots while also understanding visual input. \\\\\n",
  25 |         "You can play with the [demo here](https://huggingface.co/spaces/HuggingFaceM4/idefics_playground)\n",
  26 |         "\n",
  27 |         "The code for this notebook was contributed to by *Léo Tronchon, Younes Belkada, and Stas Bekman*, the IDEFICS model has been contributed to by: *Lucile Saulnier, Léo Tronchon, Hugo Laurençon, Stas Bekman, Amanpreet Singh, Siddharth Karamcheti, and Victor Sanh*"
  28 |       ]
  29 |     },
  30 |     {
  31 |       "cell_type": "markdown",
  32 |       "source": [
  33 |         "# Install and import necessary libraries"
  34 |       ],
  35 |       "metadata": {
  36 |         "id": "7m9zw1wcCC8e"
  37 |       }
  38 |     },
  39 |     {
  40 |       "cell_type": "code",
  41 |       "source": [
  42 |         "!pip install -q datasets\n",
  43 |         "!pip install -q git+https://github.com/huggingface/transformers.git@add-model-idefics\n",
  44 |         "!pip install -q bitsandbytes sentencepiece accelerate loralib\n",
  45 |         "!pip install -q -U git+https://github.com/huggingface/peft.git"
  46 |       ],
  47 |       "metadata": {
  48 |         "colab": {
  49 |           "base_uri": "https://localhost:8080/"
  50 |         },
  51 |         "id": "prXRsUiXCII9",
  52 |         "outputId": "3b9da6dd-365b-484d-9d37-a723eee947de"
  53 |       },
  54 |       "execution_count": null,
  55 |       "outputs": [
  56 |         {
  57 |           "output_type": "stream",
  58 |           "name": "stdout",
  59 |           "text": [
  60 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  61 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  62 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  63 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  64 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  65 |             "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
  66 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
  67 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
  68 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  69 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  70 |             "\u001b[?25h  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
  71 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  72 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m67.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  73 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m244.2/244.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
  74 |             "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
  75 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
  76 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
  77 |             "  Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
  78 |           ]
  79 |         }
  80 |       ]
  81 |     },
  82 |     {
  83 |       "cell_type": "code",
  84 |       "execution_count": null,
  85 |       "metadata": {
  86 |         "id": "MxoHmx-HfAgf"
  87 |       },
  88 |       "outputs": [],
  89 |       "source": [
  90 |         "import torch\n",
  91 |         "from datasets import load_dataset\n",
  92 |         "from peft import LoraConfig, get_peft_model\n",
  93 |         "from PIL import Image\n",
  94 |         "from transformers import IdeficsForVisionText2Text, AutoProcessor, Trainer, TrainingArguments, BitsAndBytesConfig\n",
  95 |         "import torchvision.transforms as transforms"
  96 |       ]
  97 |     },
  98 |     {
  99 |       "cell_type": "markdown",
 100 |       "metadata": {
 101 |         "id": "DP_ilre6jI6l"
 102 |       },
 103 |       "source": [
 104 |         "# Load quantized model\n",
 105 |         "First get the quantized version of the model. This will allow us to use the 9B version of Idefics with a single 16GB gpu\n",
 106 |         "\n"
 107 |       ]
 108 |     },
 109 |     {
 110 |       "cell_type": "code",
 111 |       "execution_count": null,
 112 |       "metadata": {
 113 |         "colab": {
 114 |           "base_uri": "https://localhost:8080/",
 115 |           "height": 84,
 116 |           "referenced_widgets": [
 117 |             "cf454254fbc74724a6909e60d82f86a3",
 118 |             "561b1b43dbc1484784ea2abed7278c08",
 119 |             "996e2ae7de594ccc968ce83382786365",
 120 |             "7e72c1fdf039470f8b14859034c7942f",
 121 |             "f34958207dca46fd9aa044912ec9fddb",
 122 |             "0fa55920c3a54b30aca74aa7247fe2ea",
 123 |             "119ec52a3ce54b0d9565a0d44e731850",
 124 |             "27e2b5c562174873bb966f1408727058",
 125 |             "008e6d4c958149819fd7e64e30f79e39",
 126 |             "9302d5fbae224b999a0c3fcb3f34beb3",
 127 |             "8c82d2f9f97047478d8399b2aee3389f"
 128 |           ]
 129 |         },
 130 |         "id": "IRiT0q0Ck-3Y",
 131 |         "outputId": "52bc69ec-32ec-45d7-b1a2-1a7af0539506"
 132 |       },
 133 |       "outputs": [
 134 |         {
 135 |           "output_type": "stream",
 136 |           "name": "stderr",
 137 |           "text": [
 138 |             "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/processing_auto.py:203: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
 139 |             "  warnings.warn(\n"
 140 |           ]
 141 |         },
 142 |         {
 143 |           "output_type": "display_data",
 144 |           "data": {
 145 |             "text/plain": [
 146 |               "Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]"
 147 |             ],
 148 |             "application/vnd.jupyter.widget-view+json": {
 149 |               "version_major": 2,
 150 |               "version_minor": 0,
 151 |               "model_id": "cf454254fbc74724a6909e60d82f86a3"
 152 |             }
 153 |           },
 154 |           "metadata": {}
 155 |         }
 156 |       ],
 157 |       "source": [
 158 |         "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
 159 |         "\n",
 160 |         "# checkpoint = \"HuggingFaceM4/tiny-random-idefics\"\n",
 161 |         "checkpoint = \"HuggingFaceM4/idefics-9b\"\n",
 162 |         "\n",
 163 |         "# Here we skip some special modules that can't be quantized properly\n",
 164 |         "bnb_config = BitsAndBytesConfig(\n",
 165 |         "    load_in_4bit=True,\n",
 166 |         "    bnb_4bit_use_double_quant=True,\n",
 167 |         "    bnb_4bit_quant_type=\"nf4\",\n",
 168 |         "    bnb_4bit_compute_dtype=torch.float16,\n",
 169 |         "    llm_int8_skip_modules=[\"lm_head\", \"embed_tokens\"],\n",
 170 |         ")\n",
 171 |         "\n",
 172 |         "processor = AutoProcessor.from_pretrained(checkpoint, use_auth_token=True)\n",
 173 |         "# Simply take-off the quantization_config arg if you want to load the original model\n",
 174 |         "model = IdeficsForVisionText2Text.from_pretrained(checkpoint, quantization_config=bnb_config, device_map=\"auto\")"
 175 |       ]
 176 |     },
 177 |     {
 178 |       "cell_type": "markdown",
 179 |       "source": [
 180 |         "If you print the model, you will see that all `nn.Linear` layers are in fact replaced by `bnb.nn.Linear4bit` layers."
 181 |       ],
 182 |       "metadata": {
 183 |         "id": "PloVmAaM75kJ"
 184 |       }
 185 |     },
 186 |     {
 187 |       "cell_type": "code",
 188 |       "source": [
 189 |         "print(model)"
 190 |       ],
 191 |       "metadata": {
 192 |         "colab": {
 193 |           "base_uri": "https://localhost:8080/"
 194 |         },
 195 |         "id": "4gaDzRK174Ur",
 196 |         "outputId": "5ab94bef-eb66-43af-d2e7-33bf9ec9acf9"
 197 |       },
 198 |       "execution_count": null,
 199 |       "outputs": [
 200 |         {
 201 |           "output_type": "stream",
 202 |           "name": "stdout",
 203 |           "text": [
 204 |             "IdeficsForVisionText2Text(\n",
 205 |             "  (model): IdeficsModel(\n",
 206 |             "    (embed_tokens): IdeficsDecoupledEmbedding(\n",
 207 |             "      num_embeddings=32000, num_additional_embeddings=2, embedding_dim=4096, partially_freeze=False\n",
 208 |             "      (additional_embedding): Embedding(2, 4096)\n",
 209 |             "    )\n",
 210 |             "    (vision_model): IdeficsVisionTransformer(\n",
 211 |             "      (embeddings): IdeficsVisionEmbeddings(\n",
 212 |             "        (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), bias=False)\n",
 213 |             "        (position_embedding): Embedding(257, 1280)\n",
 214 |             "      )\n",
 215 |             "      (pre_layrnorm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 216 |             "      (encoder): IdeficsVisionEncoder(\n",
 217 |             "        (layers): ModuleList(\n",
 218 |             "          (0-31): 32 x IdeficsVisionEncoderLayer(\n",
 219 |             "            (self_attn): IdeficsVisionAttention(\n",
 220 |             "              (k_proj): Linear4bit(in_features=1280, out_features=1280, bias=True)\n",
 221 |             "              (v_proj): Linear4bit(in_features=1280, out_features=1280, bias=True)\n",
 222 |             "              (q_proj): Linear4bit(in_features=1280, out_features=1280, bias=True)\n",
 223 |             "              (out_proj): Linear4bit(in_features=1280, out_features=1280, bias=True)\n",
 224 |             "            )\n",
 225 |             "            (layer_norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 226 |             "            (mlp): IdeficsVisionMLP(\n",
 227 |             "              (activation_fn): QuickGELUActivation()\n",
 228 |             "              (fc1): Linear4bit(in_features=1280, out_features=5120, bias=True)\n",
 229 |             "              (fc2): Linear4bit(in_features=5120, out_features=1280, bias=True)\n",
 230 |             "            )\n",
 231 |             "            (layer_norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 232 |             "          )\n",
 233 |             "        )\n",
 234 |             "      )\n",
 235 |             "      (post_layernorm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 236 |             "    )\n",
 237 |             "    (perceiver_resampler): IdeficsPerceiverResampler(\n",
 238 |             "      (blocks): ModuleList(\n",
 239 |             "        (0-5): 6 x ModuleList(\n",
 240 |             "          (0): IdeficsPerceiverAttention(\n",
 241 |             "            (context_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 242 |             "            (latents_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 243 |             "            (q_layer_norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)\n",
 244 |             "            (k_layer_norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)\n",
 245 |             "            (q_proj): Linear4bit(in_features=1280, out_features=1536, bias=False)\n",
 246 |             "            (k_proj): Linear4bit(in_features=1280, out_features=1536, bias=False)\n",
 247 |             "            (v_proj): Linear4bit(in_features=1280, out_features=1536, bias=False)\n",
 248 |             "            (output_proj): Linear4bit(in_features=1536, out_features=1280, bias=False)\n",
 249 |             "          )\n",
 250 |             "          (1): IdeficsMLP(\n",
 251 |             "            (ln): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 252 |             "            (fc): Linear4bit(in_features=1280, out_features=5120, bias=False)\n",
 253 |             "            (act): ReLU()\n",
 254 |             "            (c_proj): Linear4bit(in_features=5120, out_features=1280, bias=False)\n",
 255 |             "          )\n",
 256 |             "        )\n",
 257 |             "      )\n",
 258 |             "      (layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
 259 |             "    )\n",
 260 |             "    (layers): ModuleList(\n",
 261 |             "      (0-31): 32 x IdeficsDecoderLayer(\n",
 262 |             "        (self_attn): IdeficsAttention(\n",
 263 |             "          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 264 |             "          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 265 |             "          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 266 |             "          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 267 |             "          (rotary_emb): IdeficsEmbedding()\n",
 268 |             "        )\n",
 269 |             "        (mlp): IdeficsMLP(\n",
 270 |             "          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n",
 271 |             "          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)\n",
 272 |             "          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n",
 273 |             "          (act_fn): SiLUActivation()\n",
 274 |             "        )\n",
 275 |             "        (input_layernorm): IdeficsRMSNorm()\n",
 276 |             "        (post_attention_layernorm): IdeficsRMSNorm()\n",
 277 |             "      )\n",
 278 |             "    )\n",
 279 |             "    (gated_cross_attn_layers): ModuleList(\n",
 280 |             "      (0-7): 8 x IdeficsGatedCrossAttentionLayer(\n",
 281 |             "        (cross_attn): IdeficsAttention(\n",
 282 |             "          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 283 |             "          (k_proj): Linear4bit(in_features=1280, out_features=4096, bias=False)\n",
 284 |             "          (v_proj): Linear4bit(in_features=1280, out_features=4096, bias=False)\n",
 285 |             "          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n",
 286 |             "          (rotary_emb): IdeficsEmbedding()\n",
 287 |             "          (q_layer_norm): IdeficsRMSNorm()\n",
 288 |             "          (k_layer_norm): IdeficsRMSNorm()\n",
 289 |             "        )\n",
 290 |             "        (mlp): IdeficsMLP(\n",
 291 |             "          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n",
 292 |             "          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)\n",
 293 |             "          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n",
 294 |             "          (act_fn): SiLUActivation()\n",
 295 |             "        )\n",
 296 |             "        (input_layernorm): IdeficsRMSNorm()\n",
 297 |             "        (post_attention_layernorm): IdeficsRMSNorm()\n",
 298 |             "        (act_cross_attn): Tanh()\n",
 299 |             "        (act_dense): Tanh()\n",
 300 |             "      )\n",
 301 |             "    )\n",
 302 |             "    (norm): IdeficsRMSNorm()\n",
 303 |             "  )\n",
 304 |             "  (lm_head): IdeficsDecoupledLinear(\n",
 305 |             "    in_features=4096, out_features=32000, out_additional_features=2, bias=False, partially_freeze=False\n",
 306 |             "    (additional_fc): Linear(in_features=4096, out_features=2, bias=False)\n",
 307 |             "  )\n",
 308 |             ")\n"
 309 |           ]
 310 |         }
 311 |       ]
 312 |     },
 313 |     {
 314 |       "cell_type": "markdown",
 315 |       "source": [
 316 |         "# Inference\n",
 317 |         "Let's make a simple method to test the model's inference"
 318 |       ],
 319 |       "metadata": {
 320 |         "id": "8EIvXR6fPG6d"
 321 |       }
 322 |     },
 323 |     {
 324 |       "cell_type": "code",
 325 |       "source": [
 326 |         "def check_inference(model, processor, prompts, max_new_tokens=50):\n",
 327 |         "    tokenizer = processor.tokenizer\n",
 328 |         "    bad_words = [\"<image>\", \"<fake_token_around_image>\"]\n",
 329 |         "    if len(bad_words) > 0:\n",
 330 |         "        bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids\n",
 331 |         "\n",
 332 |         "    eos_token = \"</s>\"\n",
 333 |         "    eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)\n",
 334 |         "\n",
 335 |         "    inputs = processor(prompts, return_tensors=\"pt\").to(device)\n",
 336 |         "    generated_ids = model.generate(**inputs, eos_token_id=[eos_token_id], bad_words_ids=bad_words_ids, max_new_tokens=max_new_tokens, early_stopping=True)\n",
 337 |         "    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
 338 |         "    print(generated_text)"
 339 |       ],
 340 |       "metadata": {
 341 |         "id": "J5MSZ3xdPF4f"
 342 |       },
 343 |       "execution_count": null,
 344 |       "outputs": []
 345 |     },
 346 |     {
 347 |       "cell_type": "markdown",
 348 |       "metadata": {
 349 |         "id": "RYA2HKGC0n9d"
 350 |       },
 351 |       "source": [
 352 |         "\n",
 353 |         "Let's run prediction with the quantized model for the image below which pictures two kittens. \\\\\n",
 354 |         "<img src=\"https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg\" width=\"400\"/>"
 355 |       ]
 356 |     },
 357 |     {
 358 |       "cell_type": "code",
 359 |       "execution_count": null,
 360 |       "metadata": {
 361 |         "id": "6I_iDtQN03jE",
 362 |         "colab": {
 363 |           "base_uri": "https://localhost:8080/"
 364 |         },
 365 |         "outputId": "a4a77c65-186a-45e0-f819-3ea3d9d319c0"
 366 |       },
 367 |       "outputs": [
 368 |         {
 369 |           "output_type": "stream",
 370 |           "name": "stdout",
 371 |           "text": [
 372 |             "Question: What's on the picture? Answer: Two kittens.\n"
 373 |           ]
 374 |         }
 375 |       ],
 376 |       "source": [
 377 |         "url = \"https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg\"\n",
 378 |         "prompts = [\n",
 379 |         "    # \"Instruction: provide an answer to the question. Use the image to answer.\\n\",\n",
 380 |         "    url,\n",
 381 |         "    \"Question: What's on the picture? Answer:\",\n",
 382 |         "]\n",
 383 |         "check_inference(model, processor, prompts, max_new_tokens=5)\n"
 384 |       ]
 385 |     },
 386 |     {
 387 |       "cell_type": "markdown",
 388 |       "source": [
 389 |         "Now let's see how the model fares on pokemon knowledge before we try to finetune it further. \\\\\n",
 390 |         "<img src=\"https://images.pokemontcg.io/pop6/2_hires.png\" width=\"194\"/>\n"
 391 |       ],
 392 |       "metadata": {
 393 |         "id": "DLiwPnGBxiJf"
 394 |       }
 395 |     },
 396 |     {
 397 |       "cell_type": "code",
 398 |       "source": [
 399 |         "# check generation before finetuning\n",
 400 |         "\n",
 401 |         "url = \"https://images.pokemontcg.io/pop6/2_hires.png\"\n",
 402 |         "prompts = [\n",
 403 |         "    url,\n",
 404 |         "    \"Question: What's on the picture? Answer:\",\n",
 405 |         "]\n",
 406 |         "check_inference(model, processor, prompts, max_new_tokens=100)\n",
 407 |         "# It looks like the model is already aware of pokemon - but it could be more specific, and less repetitive"
 408 |       ],
 409 |       "metadata": {
 410 |         "colab": {
 411 |           "base_uri": "https://localhost:8080/"
 412 |         },
 413 |         "id": "lDVDUE1ew7tZ",
 414 |         "outputId": "37ba5c61-c607-4282-e57b-25cada593391"
 415 |       },
 416 |       "execution_count": null,
 417 |       "outputs": [
 418 |         {
 419 |           "output_type": "stream",
 420 |           "name": "stdout",
 421 |           "text": [
 422 |             "Question: What's on the picture? Answer: Lucario\n",
 423 |             "\n",
 424 |             "Lucario is a Pokémon that is a combination of a bear and a lion. It is a Pokémon that is a combination of a bear and a lion. It is a Pokémon that is a combination of a bear and a lion. It is a Pokémon that is a combination of a bear and a lion. It is a Pokémon that is a combination of a bear and a lion. It is a Pok\n"
 425 |           ]
 426 |         }
 427 |       ]
 428 |     },
 429 |     {
 430 |       "cell_type": "markdown",
 431 |       "source": [
 432 |         "# Finetuning dataset\n",
 433 |         "Prepare the dataset that will be used for finetuning\n"
 434 |       ],
 435 |       "metadata": {
 436 |         "id": "ydBhQT6SQiWy"
 437 |       }
 438 |     },
 439 |     {
 440 |       "cell_type": "code",
 441 |       "execution_count": null,
 442 |       "metadata": {
 443 |         "colab": {
 444 |           "base_uri": "https://localhost:8080/",
 445 |           "height": 177,
 446 |           "referenced_widgets": [
 447 |             "eac0761e22a84275aaee5d7ec7929da6",
 448 |             "ba24eb82f1194ecab3514466eca8a2b8",
 449 |             "52997c23e16a4f8aa220909e99b5452e",
 450 |             "6b7767dc6c5b45a89f7becfe5fcf81d7",
 451 |             "050b365a82b0412b83918f9f9603bf2f",
 452 |             "39c0d7023e574db9a55eb7e82913d4ed",
 453 |             "9bdbd4871dcd49a5bbfaa86b813e9a36",
 454 |             "7bfee1d1c4134316af5b82cd354457ba",
 455 |             "594fd06a2b07443a9ce27200468d5fe3",
 456 |             "55de5af50af247cd93da17057661fd6c",
 457 |             "450f2b15f9df4f72b23c4f916bc18f3b",
 458 |             "22df8e4fce3b470b94fdce6e7b77a9ea",
 459 |             "cd89e195d2bb4537889ec8cc9e7a815e",
 460 |             "196951cc2fdd43d4a153de2666067cd0",
 461 |             "aaf9e7678c174fe8820c5c0bebb6bb1e",
 462 |             "699f568cedd846f590efa2500dd8b3a9",
 463 |             "a0f3836eb674483295fbb147065b74fc",
 464 |             "da922717666a496da59a4cf8840e6554",
 465 |             "cc9ddc6c56324dd59cfb8bc9649fea28",
 466 |             "830ec2345d9a4be88b486ad24bfc3b10",
 467 |             "05b63fe3c99c417fb6bcdb450081bff8",
 468 |             "e2872821e4e84271b32b8c8c8c093bfc",
 469 |             "782d656769144ef9b48a3a37de81abb5",
 470 |             "eb2f4bcb78534f4d9f9e2ccb52e738b7",
 471 |             "a2bcf8164d904dcbada2196189b332be",
 472 |             "99b5b2cd3f104c72b5ee880fe1d0e9b9",
 473 |             "3197f87aadd5422cbb9804b0843ffc48",
 474 |             "5cdf7a7b08cc46f5a4b2da143ba39bb6",
 475 |             "8f335a7d85574c11b183fb700aeac5c3",
 476 |             "6b96186a1ccb4e24b491b5849ac90c50",
 477 |             "3a845c0efe954da1a47e77740f8623ff",
 478 |             "4c8f47c325a54f52abab545362f36c43",
 479 |             "7c1dc629e6dc4048b1b88a224c9a352d",
 480 |             "da84172eaff34e61ac902681dbd364ca",
 481 |             "2796bada5f6748b6af59f6b14b0957af",
 482 |             "400b852ef365473cad76663421954c86",
 483 |             "fd58bb90108a4486967a217eb3bc4389",
 484 |             "b96a2d9afc324a4eb52f7a04caab630a",
 485 |             "7c20b8d8e3b14504bba903e68d043e79",
 486 |             "c8bc395e18e14492ae40ec6ff21a18d1",
 487 |             "6c85b036e1be434faa2d515bed62e228",
 488 |             "da15ec7761a847678dc696b214c67ada",
 489 |             "03d2d213eb2a4c819bbcf8457e11904b",
 490 |             "f651fffdc274473a85ed701097afaa1f",
 491 |             "3fbc282a30cc49b99f335216df028cd6",
 492 |             "651249802d0249479eb1700e600f9a5a",
 493 |             "31e2d7d5057a4dfa96a65888697e9923",
 494 |             "cbaf9ba59da24341a933c3c7473a3b7d",
 495 |             "ee26c8314e6742a88cd59429f3d5b745",
 496 |             "dd9e81eb4e3d45cca5c6e2b1e6cf335d",
 497 |             "a7c9efe8c49a43d0ba6929bada9f78c2",
 498 |             "15d3af1073fe4447847d0e6f3543f953",
 499 |             "e4daf9a3e9e14e93ab55b91da59ecc9b",
 500 |             "3557bb8fc4064fdf99ca2a1ec5469cff",
 501 |             "b2ccec96efa1415fa4623ec8fa0f2c21"
 502 |           ]
 503 |         },
 504 |         "id": "5iZAz655m8Q9",
 505 |         "outputId": "6524cedf-f0f1-43fa-d5dc-2b4f2d8f6eb1"
 506 |       },
 507 |       "outputs": [
 508 |         {
 509 |           "output_type": "display_data",
 510 |           "data": {
 511 |             "text/plain": [
 512 |               "Downloading readme:   0%|          | 0.00/2.77k [00:00<?, ?B/s]"
 513 |             ],
 514 |             "application/vnd.jupyter.widget-view+json": {
 515 |               "version_major": 2,
 516 |               "version_minor": 0,
 517 |               "model_id": "eac0761e22a84275aaee5d7ec7929da6"
 518 |             }
 519 |           },
 520 |           "metadata": {}
 521 |         },
 522 |         {
 523 |           "output_type": "display_data",
 524 |           "data": {
 525 |             "text/plain": [
 526 |               "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
 527 |             ],
 528 |             "application/vnd.jupyter.widget-view+json": {
 529 |               "version_major": 2,
 530 |               "version_minor": 0,
 531 |               "model_id": "22df8e4fce3b470b94fdce6e7b77a9ea"
 532 |             }
 533 |           },
 534 |           "metadata": {}
 535 |         },
 536 |         {
 537 |           "output_type": "display_data",
 538 |           "data": {
 539 |             "text/plain": [
 540 |               "Downloading data:   0%|          | 0.00/9.28M [00:00<?, ?B/s]"
 541 |             ],
 542 |             "application/vnd.jupyter.widget-view+json": {
 543 |               "version_major": 2,
 544 |               "version_minor": 0,
 545 |               "model_id": "782d656769144ef9b48a3a37de81abb5"
 546 |             }
 547 |           },
 548 |           "metadata": {}
 549 |         },
 550 |         {
 551 |           "output_type": "display_data",
 552 |           "data": {
 553 |             "text/plain": [
 554 |               "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
 555 |             ],
 556 |             "application/vnd.jupyter.widget-view+json": {
 557 |               "version_major": 2,
 558 |               "version_minor": 0,
 559 |               "model_id": "da84172eaff34e61ac902681dbd364ca"
 560 |             }
 561 |           },
 562 |           "metadata": {}
 563 |         },
 564 |         {
 565 |           "output_type": "display_data",
 566 |           "data": {
 567 |             "text/plain": [
 568 |               "Generating train split: 0 examples [00:00, ? examples/s]"
 569 |             ],
 570 |             "application/vnd.jupyter.widget-view+json": {
 571 |               "version_major": 2,
 572 |               "version_minor": 0,
 573 |               "model_id": "3fbc282a30cc49b99f335216df028cd6"
 574 |             }
 575 |           },
 576 |           "metadata": {}
 577 |         }
 578 |       ],
 579 |       "source": [
 580 |         "def convert_to_rgb(image):\n",
 581 |         "    # `image.convert(\"RGB\")` would only work for .jpg images, as it creates a wrong background\n",
 582 |         "    # for transparent images. The call to `alpha_composite` handles this case\n",
 583 |         "    if image.mode == \"RGB\":\n",
 584 |         "        return image\n",
 585 |         "\n",
 586 |         "    image_rgba = image.convert(\"RGBA\")\n",
 587 |         "    background = Image.new(\"RGBA\", image_rgba.size, (255, 255, 255))\n",
 588 |         "    alpha_composite = Image.alpha_composite(background, image_rgba)\n",
 589 |         "    alpha_composite = alpha_composite.convert(\"RGB\")\n",
 590 |         "    return alpha_composite\n",
 591 |         "\n",
 592 |         "def ds_transforms(example_batch):\n",
 593 |         "    image_size = processor.image_processor.image_size\n",
 594 |         "    image_mean = processor.image_processor.image_mean\n",
 595 |         "    image_std = processor.image_processor.image_std\n",
 596 |         "\n",
 597 |         "    image_transform = transforms.Compose([\n",
 598 |         "        convert_to_rgb,\n",
 599 |         "        transforms.RandomResizedCrop((image_size, image_size), scale=(0.9, 1.0), interpolation=transforms.InterpolationMode.BICUBIC),\n",
 600 |         "        transforms.ToTensor(),\n",
 601 |         "        transforms.Normalize(mean=image_mean, std=image_std),\n",
 602 |         "    ])\n",
 603 |         "\n",
 604 |         "    prompts = []\n",
 605 |         "    for i in range(len(example_batch['caption'])):\n",
 606 |         "        # We split the captions to avoid having very long examples, which would require more GPU ram during training\n",
 607 |         "        caption = example_batch['caption'][i].split(\".\")[0]\n",
 608 |         "        prompts.append(\n",
 609 |         "            [\n",
 610 |         "                example_batch['image_url'][i],\n",
 611 |         "                f\"Question: What's on the picture? Answer: This is {example_batch['name'][i]}. {caption}</s>\",\n",
 612 |         "            ],\n",
 613 |         "        )\n",
 614 |         "\n",
 615 |         "    inputs = processor(prompts, transform=image_transform, return_tensors=\"pt\").to(device)\n",
 616 |         "\n",
 617 |         "    inputs[\"labels\"] = inputs[\"input_ids\"]\n",
 618 |         "\n",
 619 |         "    return inputs\n",
 620 |         "\n",
 621 |         "\n",
 622 |         "# load and prepare dataset\n",
 623 |         "ds = load_dataset(\"TheFusion21/PokemonCards\")\n",
 624 |         "ds = ds[\"train\"].train_test_split(test_size=0.002)\n",
 625 |         "train_ds = ds[\"train\"]\n",
 626 |         "eval_ds = ds[\"test\"]\n",
 627 |         "train_ds.set_transform(ds_transforms)\n",
 628 |         "eval_ds.set_transform(ds_transforms)"
 629 |       ]
 630 |     },
 631 |     {
 632 |       "cell_type": "markdown",
 633 |       "source": [
 634 |         "# LoRA\n",
 635 |         "After specifying the low-rank adapters (LoRA) config, we load the PeftModel using the get_peft_model utility function"
 636 |       ],
 637 |       "metadata": {
 638 |         "id": "Kui4EkCmOQzd"
 639 |       }
 640 |     },
 641 |     {
 642 |       "cell_type": "code",
 643 |       "execution_count": null,
 644 |       "metadata": {
 645 |         "id": "jKa5oTorp_A-"
 646 |       },
 647 |       "outputs": [],
 648 |       "source": [
 649 |         "model_name = checkpoint.split(\"/\")[1]\n",
 650 |         "config = LoraConfig(\n",
 651 |         "    r=16,\n",
 652 |         "    lora_alpha=32,\n",
 653 |         "    target_modules=[\"q_proj\", \"k_proj\", \"v_proj\"],\n",
 654 |         "    lora_dropout=0.05,\n",
 655 |         "    bias=\"none\",\n",
 656 |         ")\n",
 657 |         "model = get_peft_model(model, config)"
 658 |       ]
 659 |     },
 660 |     {
 661 |       "cell_type": "code",
 662 |       "execution_count": null,
 663 |       "metadata": {
 664 |         "colab": {
 665 |           "base_uri": "https://localhost:8080/"
 666 |         },
 667 |         "id": "ShuZJ5K2pYoL",
 668 |         "outputId": "6c22299b-5584-4994-c906-e9d031b40ad1"
 669 |       },
 670 |       "outputs": [
 671 |         {
 672 |           "output_type": "stream",
 673 |           "name": "stdout",
 674 |           "text": [
 675 |             "trainable params: 19,750,912 || all params: 8,949,430,544 || trainable%: 0.2206946230030432\n"
 676 |           ]
 677 |         }
 678 |       ],
 679 |       "source": [
 680 |         "model.print_trainable_parameters()"
 681 |       ]
 682 |     },
 683 |     {
 684 |       "cell_type": "markdown",
 685 |       "source": [
 686 |         "# Training\n",
 687 |         "Finally, using the Hugging Face Trainer, we can finetune the model! \\\\\n",
 688 |         "For the sake of the demo, we have set the max_steps at 40. That's about 0.05 epoch on this dataset, so feel free to tune further!"
 689 |       ],
 690 |       "metadata": {
 691 |         "id": "0Ok1sOZKQ29s"
 692 |       }
 693 |     },
 694 |     {
 695 |       "cell_type": "code",
 696 |       "execution_count": null,
 697 |       "metadata": {
 698 |         "colab": {
 699 |           "base_uri": "https://localhost:8080/",
 700 |           "height": 155
 701 |         },
 702 |         "id": "9cD3OuygpR5l",
 703 |         "outputId": "a8238139-59c3-49cb-c654-4aacb010dd7a"
 704 |       },
 705 |       "outputs": [
 706 |         {
 707 |           "output_type": "display_data",
 708 |           "data": {
 709 |             "text/plain": [
 710 |               "<IPython.core.display.HTML object>"
 711 |             ],
 712 |             "text/html": [
 713 |               "\n",
 714 |               "    <div>\n",
 715 |               "      \n",
 716 |               "      <progress value='40' max='40' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 717 |               "      [40/40 06:32, Epoch 0/1]\n",
 718 |               "    </div>\n",
 719 |               "    <table border=\"1\" class=\"dataframe\">\n",
 720 |               "  <thead>\n",
 721 |               " <tr style=\"text-align: left;\">\n",
 722 |               "      <th>Step</th>\n",
 723 |               "      <th>Training Loss</th>\n",
 724 |               "      <th>Validation Loss</th>\n",
 725 |               "    </tr>\n",
 726 |               "  </thead>\n",
 727 |               "  <tbody>\n",
 728 |               "    <tr>\n",
 729 |               "      <td>20</td>\n",
 730 |               "      <td>1.450000</td>\n",
 731 |               "      <td>0.880157</td>\n",
 732 |               "    </tr>\n",
 733 |               "    <tr>\n",
 734 |               "      <td>40</td>\n",
 735 |               "      <td>0.702000</td>\n",
 736 |               "      <td>0.675355</td>\n",
 737 |               "    </tr>\n",
 738 |               "  </tbody>\n",
 739 |               "</table><p>"
 740 |             ]
 741 |           },
 742 |           "metadata": {}
 743 |         },
 744 |         {
 745 |           "output_type": "execute_result",
 746 |           "data": {
 747 |             "text/plain": [
 748 |               "TrainOutput(global_step=40, training_loss=1.0759869813919067, metrics={'train_runtime': 403.1999, 'train_samples_per_second': 1.587, 'train_steps_per_second': 0.099, 'total_flos': 1445219210656320.0, 'train_loss': 1.0759869813919067, 'epoch': 0.05})"
 749 |             ]
 750 |           },
 751 |           "metadata": {},
 752 |           "execution_count": 23
 753 |         }
 754 |       ],
 755 |       "source": [
 756 |         "training_args = TrainingArguments(\n",
 757 |         "    output_dir=f\"{model_name}-pokemon\",\n",
 758 |         "    learning_rate=2e-4,\n",
 759 |         "    fp16=True,\n",
 760 |         "    per_device_train_batch_size=2,\n",
 761 |         "    per_device_eval_batch_size=2,\n",
 762 |         "    gradient_accumulation_steps=8,\n",
 763 |         "    dataloader_pin_memory=False,\n",
 764 |         "    save_total_limit=3,\n",
 765 |         "    evaluation_strategy=\"steps\",\n",
 766 |         "    save_strategy=\"steps\",\n",
 767 |         "    save_steps=40,\n",
 768 |         "    eval_steps=20,\n",
 769 |         "    logging_steps=20,\n",
 770 |         "    max_steps=40,\n",
 771 |         "    remove_unused_columns=False,\n",
 772 |         "    push_to_hub=False,\n",
 773 |         "    label_names=[\"labels\"],\n",
 774 |         "    load_best_model_at_end=True,\n",
 775 |         "    report_to=None,\n",
 776 |         "    optim=\"paged_adamw_8bit\",\n",
 777 |         ")\n",
 778 |         "\n",
 779 |         "trainer = Trainer(\n",
 780 |         "    model=model,\n",
 781 |         "    args=training_args,\n",
 782 |         "    train_dataset=train_ds,\n",
 783 |         "    eval_dataset=eval_ds,\n",
 784 |         ")\n",
 785 |         "\n",
 786 |         "trainer.train()"
 787 |       ]
 788 |     },
 789 |     {
 790 |       "cell_type": "code",
 791 |       "source": [
 792 |         "# check generation again after finetuning\n",
 793 |         "check_inference(model, processor, prompts, max_new_tokens=100)"
 794 |       ],
 795 |       "metadata": {
 796 |         "colab": {
 797 |           "base_uri": "https://localhost:8080/"
 798 |         },
 799 |         "id": "v6NZ47vYTr-z",
 800 |         "outputId": "8807a1dc-e37e-4c36-da02-507029a546ab"
 801 |       },
 802 |       "execution_count": null,
 803 |       "outputs": [
 804 |         {
 805 |           "output_type": "stream",
 806 |           "name": "stdout",
 807 |           "text": [
 808 |             "Question: What's on the picture? Answer: This is Lucario. A Stage 2 Pokemon Card of type Fighting with the title Lucario and 90 HP of rarity Rare evolved from Pikachu from the set Neo Destiny and the flavor text: It can use its tail as a whip\n"
 809 |           ]
 810 |         }
 811 |       ]
 812 |     },
 813 |     {
 814 |       "cell_type": "markdown",
 815 |       "source": [
 816 |         "# Push your new model to the hub!\n"
 817 |       ],
 818 |       "metadata": {
 819 |         "id": "zgqonle8AdPs"
 820 |       }
 821 |     },
 822 |     {
 823 |       "cell_type": "code",
 824 |       "source": [
 825 |         "# Insert your \"write\" token. You should find it in the settings of your HF profile\n",
 826 |         "!huggingface-cli login"
 827 |       ],
 828 |       "metadata": {
 829 |         "colab": {
 830 |           "base_uri": "https://localhost:8080/"
 831 |         },
 832 |         "id": "KrnB4kFxAjIA",
 833 |         "outputId": "8370ee48-9b3d-446b-b69a-c3cec93f61fd"
 834 |       },
 835 |       "execution_count": null,
 836 |       "outputs": [
 837 |         {
 838 |           "output_type": "stream",
 839 |           "name": "stdout",
 840 |           "text": [
 841 |             "\n",
 842 |             "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
 843 |             "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
 844 |             "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
 845 |             "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
 846 |             "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
 847 |             "    \n",
 848 |             "    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\n",
 849 |             "    Setting a new token will erase the existing one.\n",
 850 |             "    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
 851 |             "Token: \n",
 852 |             "Add token as git credential? (Y/n) Y\n",
 853 |             "Token is valid (permission: write).\n",
 854 |             "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n",
 855 |             "You might have to re-authenticate when pushing to the Hugging Face Hub.\n",
 856 |             "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n",
 857 |             "\n",
 858 |             "git config --global credential.helper store\n",
 859 |             "\n",
 860 |             "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n",
 861 |             "Token has not been saved to git credential helper.\n",
 862 |             "Your token has been saved to /root/.cache/huggingface/token\n",
 863 |             "Login successful\n"
 864 |           ]
 865 |         }
 866 |       ]
 867 |     },
 868 |     {
 869 |       "cell_type": "code",
 870 |       "source": [
 871 |         "model.push_to_hub(f\"{model_name}-pokemon\", private=False)"
 872 |       ],
 873 |       "metadata": {
 874 |         "colab": {
 875 |           "base_uri": "https://localhost:8080/",
 876 |           "height": 66,
 877 |           "referenced_widgets": [
 878 |             "73bdfdf8980d4c358c90d574eb91bef5",
 879 |             "3f49f9009fa14fe3b87bb123491a4b0f",
 880 |             "548fc33764964fe9a0498194df85b768",
 881 |             "e60ce018bf3a4b15941062300143e2a3",
 882 |             "d0a78497d9694dc6b7e903392daf6a26",
 883 |             "5b585e82891a40b0826679a79583ee7c",
 884 |             "db9f5a1c1a0a49b3b58a30f0a74c3329",
 885 |             "641cf05e799e4ae89ec84fdf8c225b93",
 886 |             "cf937013fade482f90bd599eced8bfb4",
 887 |             "ee49f8d2b11f43e2bb30d27407744ed3",
 888 |             "ef2f2655d7b9432f983ae508f6dd4e0b"
 889 |           ]
 890 |         },
 891 |         "id": "_jFKg3iP172d",
 892 |         "outputId": "2b58ecb2-fe97-4a6c-bd2c-7fdaaf03e99a"
 893 |       },
 894 |       "execution_count": null,
 895 |       "outputs": [
 896 |         {
 897 |           "output_type": "display_data",
 898 |           "data": {
 899 |             "text/plain": [
 900 |               "adapter_model.bin:   0%|          | 0.00/79.2M [00:00<?, ?B/s]"
 901 |             ],
 902 |             "application/vnd.jupyter.widget-view+json": {
 903 |               "version_major": 2,
 904 |               "version_minor": 0,
 905 |               "model_id": "73bdfdf8980d4c358c90d574eb91bef5"
 906 |             }
 907 |           },
 908 |           "metadata": {}
 909 |         },
 910 |         {
 911 |           "output_type": "execute_result",
 912 |           "data": {
 913 |             "text/plain": [
 914 |               "CommitInfo(commit_url='https://huggingface.co/Leyo/idefics-9b-pokemon/commit/6e08354af8529c0c286d16a42a674ca28ce7f3ed', commit_message='Upload model', commit_description='', oid='6e08354af8529c0c286d16a42a674ca28ce7f3ed', pr_url=None, pr_revision=None, pr_num=None)"
 915 |             ]
 916 |           },
 917 |           "metadata": {},
 918 |           "execution_count": 29
 919 |         }
 920 |       ]
 921 |     },
 922 |     {
 923 |       "cell_type": "code",
 924 |       "source": [],
 925 |       "metadata": {
 926 |         "id": "oOEcg7O9NFP8"
 927 |       },
 928 |       "execution_count": null,
 929 |       "outputs": []
 930 |     }
 931 |   ],
 932 |   "metadata": {
 933 |     "accelerator": "GPU",
 934 |     "colab": {
 935 |       "provenance": [],
 936 |       "machine_shape": "hm",
 937 |       "gpuType": "V100"
 938 |     },
 939 |     "kernelspec": {
 940 |       "display_name": "Python 3",
 941 |       "name": "python3"
 942 |     },
 943 |     "language_info": {
 944 |       "name": "python"
 945 |     },
 946 |     "widgets": {
 947 |       "application/vnd.jupyter.widget-state+json": {
 948 |         "cf454254fbc74724a6909e60d82f86a3": {
 949 |           "model_module": "@jupyter-widgets/controls",
 950 |           "model_name": "HBoxModel",
 951 |           "model_module_version": "1.5.0",
 952 |           "state": {
 953 |             "_dom_classes": [],
 954 |             "_model_module": "@jupyter-widgets/controls",
 955 |             "_model_module_version": "1.5.0",
 956 |             "_model_name": "HBoxModel",
 957 |             "_view_count": null,
 958 |             "_view_module": "@jupyter-widgets/controls",
 959 |             "_view_module_version": "1.5.0",
 960 |             "_view_name": "HBoxView",
 961 |             "box_style": "",
 962 |             "children": [
 963 |               "IPY_MODEL_561b1b43dbc1484784ea2abed7278c08",
 964 |               "IPY_MODEL_996e2ae7de594ccc968ce83382786365",
 965 |               "IPY_MODEL_7e72c1fdf039470f8b14859034c7942f"
 966 |             ],
 967 |             "layout": "IPY_MODEL_f34958207dca46fd9aa044912ec9fddb"
 968 |           }
 969 |         },
 970 |         "561b1b43dbc1484784ea2abed7278c08": {
 971 |           "model_module": "@jupyter-widgets/controls",
 972 |           "model_name": "HTMLModel",
 973 |           "model_module_version": "1.5.0",
 974 |           "state": {
 975 |             "_dom_classes": [],
 976 |             "_model_module": "@jupyter-widgets/controls",
 977 |             "_model_module_version": "1.5.0",
 978 |             "_model_name": "HTMLModel",
 979 |             "_view_count": null,
 980 |             "_view_module": "@jupyter-widgets/controls",
 981 |             "_view_module_version": "1.5.0",
 982 |             "_view_name": "HTMLView",
 983 |             "description": "",
 984 |             "description_tooltip": null,
 985 |             "layout": "IPY_MODEL_0fa55920c3a54b30aca74aa7247fe2ea",
 986 |             "placeholder": "​",
 987 |             "style": "IPY_MODEL_119ec52a3ce54b0d9565a0d44e731850",
 988 |             "value": "Loading checkpoint shards: 100%"
 989 |           }
 990 |         },
 991 |         "996e2ae7de594ccc968ce83382786365": {
 992 |           "model_module": "@jupyter-widgets/controls",
 993 |           "model_name": "FloatProgressModel",
 994 |           "model_module_version": "1.5.0",
 995 |           "state": {
 996 |             "_dom_classes": [],
 997 |             "_model_module": "@jupyter-widgets/controls",
 998 |             "_model_module_version": "1.5.0",
 999 |             "_model_name": "FloatProgressModel",
1000 |             "_view_count": null,
1001 |             "_view_module": "@jupyter-widgets/controls",
1002 |             "_view_module_version": "1.5.0",
1003 |             "_view_name": "ProgressView",
1004 |             "bar_style": "success",
1005 |             "description": "",
1006 |             "description_tooltip": null,
1007 |             "layout": "IPY_MODEL_27e2b5c562174873bb966f1408727058",
1008 |             "max": 19,
1009 |             "min": 0,
1010 |             "orientation": "horizontal",
1011 |             "style": "IPY_MODEL_008e6d4c958149819fd7e64e30f79e39",
1012 |             "value": 19
1013 |           }
1014 |         },
1015 |         "7e72c1fdf039470f8b14859034c7942f": {
1016 |           "model_module": "@jupyter-widgets/controls",
1017 |           "model_name": "HTMLModel",
1018 |           "model_module_version": "1.5.0",
1019 |           "state": {
1020 |             "_dom_classes": [],
1021 |             "_model_module": "@jupyter-widgets/controls",
1022 |             "_model_module_version": "1.5.0",
1023 |             "_model_name": "HTMLModel",
1024 |             "_view_count": null,
1025 |             "_view_module": "@jupyter-widgets/controls",
1026 |             "_view_module_version": "1.5.0",
1027 |             "_view_name": "HTMLView",
1028 |             "description": "",
1029 |             "description_tooltip": null,
1030 |             "layout": "IPY_MODEL_9302d5fbae224b999a0c3fcb3f34beb3",
1031 |             "placeholder": "​",
1032 |             "style": "IPY_MODEL_8c82d2f9f97047478d8399b2aee3389f",
1033 |             "value": " 19/19 [02:57&lt;00:00,  7.17s/it]"
1034 |           }
1035 |         },
1036 |         "f34958207dca46fd9aa044912ec9fddb": {
1037 |           "model_module": "@jupyter-widgets/base",
1038 |           "model_name": "LayoutModel",
1039 |           "model_module_version": "1.2.0",
1040 |           "state": {
1041 |             "_model_module": "@jupyter-widgets/base",
1042 |             "_model_module_version": "1.2.0",
1043 |             "_model_name": "LayoutModel",
1044 |             "_view_count": null,
1045 |             "_view_module": "@jupyter-widgets/base",
1046 |             "_view_module_version": "1.2.0",
1047 |             "_view_name": "LayoutView",
1048 |             "align_content": null,
1049 |             "align_items": null,
1050 |             "align_self": null,
1051 |             "border": null,
1052 |             "bottom": null,
1053 |             "display": null,
1054 |             "flex": null,
1055 |             "flex_flow": null,
1056 |             "grid_area": null,
1057 |             "grid_auto_columns": null,
1058 |             "grid_auto_flow": null,
1059 |             "grid_auto_rows": null,
1060 |             "grid_column": null,
1061 |             "grid_gap": null,
1062 |             "grid_row": null,
1063 |             "grid_template_areas": null,
1064 |             "grid_template_columns": null,
1065 |             "grid_template_rows": null,
1066 |             "height": null,
1067 |             "justify_content": null,
1068 |             "justify_items": null,
1069 |             "left": null,
1070 |             "margin": null,
1071 |             "max_height": null,
1072 |             "max_width": null,
1073 |             "min_height": null,
1074 |             "min_width": null,
1075 |             "object_fit": null,
1076 |             "object_position": null,
1077 |             "order": null,
1078 |             "overflow": null,
1079 |             "overflow_x": null,
1080 |             "overflow_y": null,
1081 |             "padding": null,
1082 |             "right": null,
1083 |             "top": null,
1084 |             "visibility": null,
1085 |             "width": null
1086 |           }
1087 |         },
1088 |         "0fa55920c3a54b30aca74aa7247fe2ea": {
1089 |           "model_module": "@jupyter-widgets/base",
1090 |           "model_name": "LayoutModel",
1091 |           "model_module_version": "1.2.0",
1092 |           "state": {
1093 |             "_model_module": "@jupyter-widgets/base",
1094 |             "_model_module_version": "1.2.0",
1095 |             "_model_name": "LayoutModel",
1096 |             "_view_count": null,
1097 |             "_view_module": "@jupyter-widgets/base",
1098 |             "_view_module_version": "1.2.0",
1099 |             "_view_name": "LayoutView",
1100 |             "align_content": null,
1101 |             "align_items": null,
1102 |             "align_self": null,
1103 |             "border": null,
1104 |             "bottom": null,
1105 |             "display": null,
1106 |             "flex": null,
1107 |             "flex_flow": null,
1108 |             "grid_area": null,
1109 |             "grid_auto_columns": null,
1110 |             "grid_auto_flow": null,
1111 |             "grid_auto_rows": null,
1112 |             "grid_column": null,
1113 |             "grid_gap": null,
1114 |             "grid_row": null,
1115 |             "grid_template_areas": null,
1116 |             "grid_template_columns": null,
1117 |             "grid_template_rows": null,
1118 |             "height": null,
1119 |             "justify_content": null,
1120 |             "justify_items": null,
1121 |             "left": null,
1122 |             "margin": null,
1123 |             "max_height": null,
1124 |             "max_width": null,
1125 |             "min_height": null,
1126 |             "min_width": null,
1127 |             "object_fit": null,
1128 |             "object_position": null,
1129 |             "order": null,
1130 |             "overflow": null,
1131 |             "overflow_x": null,
1132 |             "overflow_y": null,
1133 |             "padding": null,
1134 |             "right": null,
1135 |             "top": null,
1136 |             "visibility": null,
1137 |             "width": null
1138 |           }
1139 |         },
1140 |         "119ec52a3ce54b0d9565a0d44e731850": {
1141 |           "model_module": "@jupyter-widgets/controls",
1142 |           "model_name": "DescriptionStyleModel",
1143 |           "model_module_version": "1.5.0",
1144 |           "state": {
1145 |             "_model_module": "@jupyter-widgets/controls",
1146 |             "_model_module_version": "1.5.0",
1147 |             "_model_name": "DescriptionStyleModel",
1148 |             "_view_count": null,
1149 |             "_view_module": "@jupyter-widgets/base",
1150 |             "_view_module_version": "1.2.0",
1151 |             "_view_name": "StyleView",
1152 |             "description_width": ""
1153 |           }
1154 |         },
1155 |         "27e2b5c562174873bb966f1408727058": {
1156 |           "model_module": "@jupyter-widgets/base",
1157 |           "model_name": "LayoutModel",
1158 |           "model_module_version": "1.2.0",
1159 |           "state": {
1160 |             "_model_module": "@jupyter-widgets/base",
1161 |             "_model_module_version": "1.2.0",
1162 |             "_model_name": "LayoutModel",
1163 |             "_view_count": null,
1164 |             "_view_module": "@jupyter-widgets/base",
1165 |             "_view_module_version": "1.2.0",
1166 |             "_view_name": "LayoutView",
1167 |             "align_content": null,
1168 |             "align_items": null,
1169 |             "align_self": null,
1170 |             "border": null,
1171 |             "bottom": null,
1172 |             "display": null,
1173 |             "flex": null,
1174 |             "flex_flow": null,
1175 |             "grid_area": null,
1176 |             "grid_auto_columns": null,
1177 |             "grid_auto_flow": null,
1178 |             "grid_auto_rows": null,
1179 |             "grid_column": null,
1180 |             "grid_gap": null,
1181 |             "grid_row": null,
1182 |             "grid_template_areas": null,
1183 |             "grid_template_columns": null,
1184 |             "grid_template_rows": null,
1185 |             "height": null,
1186 |             "justify_content": null,
1187 |             "justify_items": null,
1188 |             "left": null,
1189 |             "margin": null,
1190 |             "max_height": null,
1191 |             "max_width": null,
1192 |             "min_height": null,
1193 |             "min_width": null,
1194 |             "object_fit": null,
1195 |             "object_position": null,
1196 |             "order": null,
1197 |             "overflow": null,
1198 |             "overflow_x": null,
1199 |             "overflow_y": null,
1200 |             "padding": null,
1201 |             "right": null,
1202 |             "top": null,
1203 |             "visibility": null,
1204 |             "width": null
1205 |           }
1206 |         },
1207 |         "008e6d4c958149819fd7e64e30f79e39": {
1208 |           "model_module": "@jupyter-widgets/controls",
1209 |           "model_name": "ProgressStyleModel",
1210 |           "model_module_version": "1.5.0",
1211 |           "state": {
1212 |             "_model_module": "@jupyter-widgets/controls",
1213 |             "_model_module_version": "1.5.0",
1214 |             "_model_name": "ProgressStyleModel",
1215 |             "_view_count": null,
1216 |             "_view_module": "@jupyter-widgets/base",
1217 |             "_view_module_version": "1.2.0",
1218 |             "_view_name": "StyleView",
1219 |             "bar_color": null,
1220 |             "description_width": ""
1221 |           }
1222 |         },
1223 |         "9302d5fbae224b999a0c3fcb3f34beb3": {
1224 |           "model_module": "@jupyter-widgets/base",
1225 |           "model_name": "LayoutModel",
1226 |           "model_module_version": "1.2.0",
1227 |           "state": {
1228 |             "_model_module": "@jupyter-widgets/base",
1229 |             "_model_module_version": "1.2.0",
1230 |             "_model_name": "LayoutModel",
1231 |             "_view_count": null,
1232 |             "_view_module": "@jupyter-widgets/base",
1233 |             "_view_module_version": "1.2.0",
1234 |             "_view_name": "LayoutView",
1235 |             "align_content": null,
1236 |             "align_items": null,
1237 |             "align_self": null,
1238 |             "border": null,
1239 |             "bottom": null,
1240 |             "display": null,
1241 |             "flex": null,
1242 |             "flex_flow": null,
1243 |             "grid_area": null,
1244 |             "grid_auto_columns": null,
1245 |             "grid_auto_flow": null,
1246 |             "grid_auto_rows": null,
1247 |             "grid_column": null,
1248 |             "grid_gap": null,
1249 |             "grid_row": null,
1250 |             "grid_template_areas": null,
1251 |             "grid_template_columns": null,
1252 |             "grid_template_rows": null,
1253 |             "height": null,
1254 |             "justify_content": null,
1255 |             "justify_items": null,
1256 |             "left": null,
1257 |             "margin": null,
1258 |             "max_height": null,
1259 |             "max_width": null,
1260 |             "min_height": null,
1261 |             "min_width": null,
1262 |             "object_fit": null,
1263 |             "object_position": null,
1264 |             "order": null,
1265 |             "overflow": null,
1266 |             "overflow_x": null,
1267 |             "overflow_y": null,
1268 |             "padding": null,
1269 |             "right": null,
1270 |             "top": null,
1271 |             "visibility": null,
1272 |             "width": null
1273 |           }
1274 |         },
1275 |         "8c82d2f9f97047478d8399b2aee3389f": {
1276 |           "model_module": "@jupyter-widgets/controls",
1277 |           "model_name": "DescriptionStyleModel",
1278 |           "model_module_version": "1.5.0",
1279 |           "state": {
1280 |             "_model_module": "@jupyter-widgets/controls",
1281 |             "_model_module_version": "1.5.0",
1282 |             "_model_name": "DescriptionStyleModel",
1283 |             "_view_count": null,
1284 |             "_view_module": "@jupyter-widgets/base",
1285 |             "_view_module_version": "1.2.0",
1286 |             "_view_name": "StyleView",
1287 |             "description_width": ""
1288 |           }
1289 |         },
1290 |         "eac0761e22a84275aaee5d7ec7929da6": {
1291 |           "model_module": "@jupyter-widgets/controls",
1292 |           "model_name": "HBoxModel",
1293 |           "model_module_version": "1.5.0",
1294 |           "state": {
1295 |             "_dom_classes": [],
1296 |             "_model_module": "@jupyter-widgets/controls",
1297 |             "_model_module_version": "1.5.0",
1298 |             "_model_name": "HBoxModel",
1299 |             "_view_count": null,
1300 |             "_view_module": "@jupyter-widgets/controls",
1301 |             "_view_module_version": "1.5.0",
1302 |             "_view_name": "HBoxView",
1303 |             "box_style": "",
1304 |             "children": [
1305 |               "IPY_MODEL_ba24eb82f1194ecab3514466eca8a2b8",
1306 |               "IPY_MODEL_52997c23e16a4f8aa220909e99b5452e",
1307 |               "IPY_MODEL_6b7767dc6c5b45a89f7becfe5fcf81d7"
1308 |             ],
1309 |             "layout": "IPY_MODEL_050b365a82b0412b83918f9f9603bf2f"
1310 |           }
1311 |         },
1312 |         "ba24eb82f1194ecab3514466eca8a2b8": {
1313 |           "model_module": "@jupyter-widgets/controls",
1314 |           "model_name": "HTMLModel",
1315 |           "model_module_version": "1.5.0",
1316 |           "state": {
1317 |             "_dom_classes": [],
1318 |             "_model_module": "@jupyter-widgets/controls",
1319 |             "_model_module_version": "1.5.0",
1320 |             "_model_name": "HTMLModel",
1321 |             "_view_count": null,
1322 |             "_view_module": "@jupyter-widgets/controls",
1323 |             "_view_module_version": "1.5.0",
1324 |             "_view_name": "HTMLView",
1325 |             "description": "",
1326 |             "description_tooltip": null,
1327 |             "layout": "IPY_MODEL_39c0d7023e574db9a55eb7e82913d4ed",
1328 |             "placeholder": "​",
1329 |             "style": "IPY_MODEL_9bdbd4871dcd49a5bbfaa86b813e9a36",
1330 |             "value": "Downloading readme: 100%"
1331 |           }
1332 |         },
1333 |         "52997c23e16a4f8aa220909e99b5452e": {
1334 |           "model_module": "@jupyter-widgets/controls",
1335 |           "model_name": "FloatProgressModel",
1336 |           "model_module_version": "1.5.0",
1337 |           "state": {
1338 |             "_dom_classes": [],
1339 |             "_model_module": "@jupyter-widgets/controls",
1340 |             "_model_module_version": "1.5.0",
1341 |             "_model_name": "FloatProgressModel",
1342 |             "_view_count": null,
1343 |             "_view_module": "@jupyter-widgets/controls",
1344 |             "_view_module_version": "1.5.0",
1345 |             "_view_name": "ProgressView",
1346 |             "bar_style": "success",
1347 |             "description": "",
1348 |             "description_tooltip": null,
1349 |             "layout": "IPY_MODEL_7bfee1d1c4134316af5b82cd354457ba",
1350 |             "max": 2765,
1351 |             "min": 0,
1352 |             "orientation": "horizontal",
1353 |             "style": "IPY_MODEL_594fd06a2b07443a9ce27200468d5fe3",
1354 |             "value": 2765
1355 |           }
1356 |         },
1357 |         "6b7767dc6c5b45a89f7becfe5fcf81d7": {
1358 |           "model_module": "@jupyter-widgets/controls",
1359 |           "model_name": "HTMLModel",
1360 |           "model_module_version": "1.5.0",
1361 |           "state": {
1362 |             "_dom_classes": [],
1363 |             "_model_module": "@jupyter-widgets/controls",
1364 |             "_model_module_version": "1.5.0",
1365 |             "_model_name": "HTMLModel",
1366 |             "_view_count": null,
1367 |             "_view_module": "@jupyter-widgets/controls",
1368 |             "_view_module_version": "1.5.0",
1369 |             "_view_name": "HTMLView",
1370 |             "description": "",
1371 |             "description_tooltip": null,
1372 |             "layout": "IPY_MODEL_55de5af50af247cd93da17057661fd6c",
1373 |             "placeholder": "​",
1374 |             "style": "IPY_MODEL_450f2b15f9df4f72b23c4f916bc18f3b",
1375 |             "value": " 2.77k/2.77k [00:00&lt;00:00, 177kB/s]"
1376 |           }
1377 |         },
1378 |         "050b365a82b0412b83918f9f9603bf2f": {
1379 |           "model_module": "@jupyter-widgets/base",
1380 |           "model_name": "LayoutModel",
1381 |           "model_module_version": "1.2.0",
1382 |           "state": {
1383 |             "_model_module": "@jupyter-widgets/base",
1384 |             "_model_module_version": "1.2.0",
1385 |             "_model_name": "LayoutModel",
1386 |             "_view_count": null,
1387 |             "_view_module": "@jupyter-widgets/base",
1388 |             "_view_module_version": "1.2.0",
1389 |             "_view_name": "LayoutView",
1390 |             "align_content": null,
1391 |             "align_items": null,
1392 |             "align_self": null,
1393 |             "border": null,
1394 |             "bottom": null,
1395 |             "display": null,
1396 |             "flex": null,
1397 |             "flex_flow": null,
1398 |             "grid_area": null,
1399 |             "grid_auto_columns": null,
1400 |             "grid_auto_flow": null,
1401 |             "grid_auto_rows": null,
1402 |             "grid_column": null,
1403 |             "grid_gap": null,
1404 |             "grid_row": null,
1405 |             "grid_template_areas": null,
1406 |             "grid_template_columns": null,
1407 |             "grid_template_rows": null,
1408 |             "height": null,
1409 |             "justify_content": null,
1410 |             "justify_items": null,
1411 |             "left": null,
1412 |             "margin": null,
1413 |             "max_height": null,
1414 |             "max_width": null,
1415 |             "min_height": null,
1416 |             "min_width": null,
1417 |             "object_fit": null,
1418 |             "object_position": null,
1419 |             "order": null,
1420 |             "overflow": null,
1421 |             "overflow_x": null,
1422 |             "overflow_y": null,
1423 |             "padding": null,
1424 |             "right": null,
1425 |             "top": null,
1426 |             "visibility": null,
1427 |             "width": null
1428 |           }
1429 |         },
1430 |         "39c0d7023e574db9a55eb7e82913d4ed": {
1431 |           "model_module": "@jupyter-widgets/base",
1432 |           "model_name": "LayoutModel",
1433 |           "model_module_version": "1.2.0",
1434 |           "state": {
1435 |             "_model_module": "@jupyter-widgets/base",
1436 |             "_model_module_version": "1.2.0",
1437 |             "_model_name": "LayoutModel",
1438 |             "_view_count": null,
1439 |             "_view_module": "@jupyter-widgets/base",
1440 |             "_view_module_version": "1.2.0",
1441 |             "_view_name": "LayoutView",
1442 |             "align_content": null,
1443 |             "align_items": null,
1444 |             "align_self": null,
1445 |             "border": null,
1446 |             "bottom": null,
1447 |             "display": null,
1448 |             "flex": null,
1449 |             "flex_flow": null,
1450 |             "grid_area": null,
1451 |             "grid_auto_columns": null,
1452 |             "grid_auto_flow": null,
1453 |             "grid_auto_rows": null,
1454 |             "grid_column": null,
1455 |             "grid_gap": null,
1456 |             "grid_row": null,
1457 |             "grid_template_areas": null,
1458 |             "grid_template_columns": null,
1459 |             "grid_template_rows": null,
1460 |             "height": null,
1461 |             "justify_content": null,
1462 |             "justify_items": null,
1463 |             "left": null,
1464 |             "margin": null,
1465 |             "max_height": null,
1466 |             "max_width": null,
1467 |             "min_height": null,
1468 |             "min_width": null,
1469 |             "object_fit": null,
1470 |             "object_position": null,
1471 |             "order": null,
1472 |             "overflow": null,
1473 |             "overflow_x": null,
1474 |             "overflow_y": null,
1475 |             "padding": null,
1476 |             "right": null,
1477 |             "top": null,
1478 |             "visibility": null,
1479 |             "width": null
1480 |           }
1481 |         },
1482 |         "9bdbd4871dcd49a5bbfaa86b813e9a36": {
1483 |           "model_module": "@jupyter-widgets/controls",
1484 |           "model_name": "DescriptionStyleModel",
1485 |           "model_module_version": "1.5.0",
1486 |           "state": {
1487 |             "_model_module": "@jupyter-widgets/controls",
1488 |             "_model_module_version": "1.5.0",
1489 |             "_model_name": "DescriptionStyleModel",
1490 |             "_view_count": null,
1491 |             "_view_module": "@jupyter-widgets/base",
1492 |             "_view_module_version": "1.2.0",
1493 |             "_view_name": "StyleView",
1494 |             "description_width": ""
1495 |           }
1496 |         },
1497 |         "7bfee1d1c4134316af5b82cd354457ba": {
1498 |           "model_module": "@jupyter-widgets/base",
1499 |           "model_name": "LayoutModel",
1500 |           "model_module_version": "1.2.0",
1501 |           "state": {
1502 |             "_model_module": "@jupyter-widgets/base",
1503 |             "_model_module_version": "1.2.0",
1504 |             "_model_name": "LayoutModel",
1505 |             "_view_count": null,
1506 |             "_view_module": "@jupyter-widgets/base",
1507 |             "_view_module_version": "1.2.0",
1508 |             "_view_name": "LayoutView",
1509 |             "align_content": null,
1510 |             "align_items": null,
1511 |             "align_self": null,
1512 |             "border": null,
1513 |             "bottom": null,
1514 |             "display": null,
1515 |             "flex": null,
1516 |             "flex_flow": null,
1517 |             "grid_area": null,
1518 |             "grid_auto_columns": null,
1519 |             "grid_auto_flow": null,
1520 |             "grid_auto_rows": null,
1521 |             "grid_column": null,
1522 |             "grid_gap": null,
1523 |             "grid_row": null,
1524 |             "grid_template_areas": null,
1525 |             "grid_template_columns": null,
1526 |             "grid_template_rows": null,
1527 |             "height": null,
1528 |             "justify_content": null,
1529 |             "justify_items": null,
1530 |             "left": null,
1531 |             "margin": null,
1532 |             "max_height": null,
1533 |             "max_width": null,
1534 |             "min_height": null,
1535 |             "min_width": null,
1536 |             "object_fit": null,
1537 |             "object_position": null,
1538 |             "order": null,
1539 |             "overflow": null,
1540 |             "overflow_x": null,
1541 |             "overflow_y": null,
1542 |             "padding": null,
1543 |             "right": null,
1544 |             "top": null,
1545 |             "visibility": null,
1546 |             "width": null
1547 |           }
1548 |         },
1549 |         "594fd06a2b07443a9ce27200468d5fe3": {
1550 |           "model_module": "@jupyter-widgets/controls",
1551 |           "model_name": "ProgressStyleModel",
1552 |           "model_module_version": "1.5.0",
1553 |           "state": {
1554 |             "_model_module": "@jupyter-widgets/controls",
1555 |             "_model_module_version": "1.5.0",
1556 |             "_model_name": "ProgressStyleModel",
1557 |             "_view_count": null,
1558 |             "_view_module": "@jupyter-widgets/base",
1559 |             "_view_module_version": "1.2.0",
1560 |             "_view_name": "StyleView",
1561 |             "bar_color": null,
1562 |             "description_width": ""
1563 |           }
1564 |         },
1565 |         "55de5af50af247cd93da17057661fd6c": {
1566 |           "model_module": "@jupyter-widgets/base",
1567 |           "model_name": "LayoutModel",
1568 |           "model_module_version": "1.2.0",
1569 |           "state": {
1570 |             "_model_module": "@jupyter-widgets/base",
1571 |             "_model_module_version": "1.2.0",
1572 |             "_model_name": "LayoutModel",
1573 |             "_view_count": null,
1574 |             "_view_module": "@jupyter-widgets/base",
1575 |             "_view_module_version": "1.2.0",
1576 |             "_view_name": "LayoutView",
1577 |             "align_content": null,
1578 |             "align_items": null,
1579 |             "align_self": null,
1580 |             "border": null,
1581 |             "bottom": null,
1582 |             "display": null,
1583 |             "flex": null,
1584 |             "flex_flow": null,
1585 |             "grid_area": null,
1586 |             "grid_auto_columns": null,
1587 |             "grid_auto_flow": null,
1588 |             "grid_auto_rows": null,
1589 |             "grid_column": null,
1590 |             "grid_gap": null,
1591 |             "grid_row": null,
1592 |             "grid_template_areas": null,
1593 |             "grid_template_columns": null,
1594 |             "grid_template_rows": null,
1595 |             "height": null,
1596 |             "justify_content": null,
1597 |             "justify_items": null,
1598 |             "left": null,
1599 |             "margin": null,
1600 |             "max_height": null,
1601 |             "max_width": null,
1602 |             "min_height": null,
1603 |             "min_width": null,
1604 |             "object_fit": null,
1605 |             "object_position": null,
1606 |             "order": null,
1607 |             "overflow": null,
1608 |             "overflow_x": null,
1609 |             "overflow_y": null,
1610 |             "padding": null,
1611 |             "right": null,
1612 |             "top": null,
1613 |             "visibility": null,
1614 |             "width": null
1615 |           }
1616 |         },
1617 |         "450f2b15f9df4f72b23c4f916bc18f3b": {
1618 |           "model_module": "@jupyter-widgets/controls",
1619 |           "model_name": "DescriptionStyleModel",
1620 |           "model_module_version": "1.5.0",
1621 |           "state": {
1622 |             "_model_module": "@jupyter-widgets/controls",
1623 |             "_model_module_version": "1.5.0",
1624 |             "_model_name": "DescriptionStyleModel",
1625 |             "_view_count": null,
1626 |             "_view_module": "@jupyter-widgets/base",
1627 |             "_view_module_version": "1.2.0",
1628 |             "_view_name": "StyleView",
1629 |             "description_width": ""
1630 |           }
1631 |         },
1632 |         "22df8e4fce3b470b94fdce6e7b77a9ea": {
1633 |           "model_module": "@jupyter-widgets/controls",
1634 |           "model_name": "HBoxModel",
1635 |           "model_module_version": "1.5.0",
1636 |           "state": {
1637 |             "_dom_classes": [],
1638 |             "_model_module": "@jupyter-widgets/controls",
1639 |             "_model_module_version": "1.5.0",
1640 |             "_model_name": "HBoxModel",
1641 |             "_view_count": null,
1642 |             "_view_module": "@jupyter-widgets/controls",
1643 |             "_view_module_version": "1.5.0",
1644 |             "_view_name": "HBoxView",
1645 |             "box_style": "",
1646 |             "children": [
1647 |               "IPY_MODEL_cd89e195d2bb4537889ec8cc9e7a815e",
1648 |               "IPY_MODEL_196951cc2fdd43d4a153de2666067cd0",
1649 |               "IPY_MODEL_aaf9e7678c174fe8820c5c0bebb6bb1e"
1650 |             ],
1651 |             "layout": "IPY_MODEL_699f568cedd846f590efa2500dd8b3a9"
1652 |           }
1653 |         },
1654 |         "cd89e195d2bb4537889ec8cc9e7a815e": {
1655 |           "model_module": "@jupyter-widgets/controls",
1656 |           "model_name": "HTMLModel",
1657 |           "model_module_version": "1.5.0",
1658 |           "state": {
1659 |             "_dom_classes": [],
1660 |             "_model_module": "@jupyter-widgets/controls",
1661 |             "_model_module_version": "1.5.0",
1662 |             "_model_name": "HTMLModel",
1663 |             "_view_count": null,
1664 |             "_view_module": "@jupyter-widgets/controls",
1665 |             "_view_module_version": "1.5.0",
1666 |             "_view_name": "HTMLView",
1667 |             "description": "",
1668 |             "description_tooltip": null,
1669 |             "layout": "IPY_MODEL_a0f3836eb674483295fbb147065b74fc",
1670 |             "placeholder": "​",
1671 |             "style": "IPY_MODEL_da922717666a496da59a4cf8840e6554",
1672 |             "value": "Downloading data files: 100%"
1673 |           }
1674 |         },
1675 |         "196951cc2fdd43d4a153de2666067cd0": {
1676 |           "model_module": "@jupyter-widgets/controls",
1677 |           "model_name": "FloatProgressModel",
1678 |           "model_module_version": "1.5.0",
1679 |           "state": {
1680 |             "_dom_classes": [],
1681 |             "_model_module": "@jupyter-widgets/controls",
1682 |             "_model_module_version": "1.5.0",
1683 |             "_model_name": "FloatProgressModel",
1684 |             "_view_count": null,
1685 |             "_view_module": "@jupyter-widgets/controls",
1686 |             "_view_module_version": "1.5.0",
1687 |             "_view_name": "ProgressView",
1688 |             "bar_style": "success",
1689 |             "description": "",
1690 |             "description_tooltip": null,
1691 |             "layout": "IPY_MODEL_cc9ddc6c56324dd59cfb8bc9649fea28",
1692 |             "max": 1,
1693 |             "min": 0,
1694 |             "orientation": "horizontal",
1695 |             "style": "IPY_MODEL_830ec2345d9a4be88b486ad24bfc3b10",
1696 |             "value": 1
1697 |           }
1698 |         },
1699 |         "aaf9e7678c174fe8820c5c0bebb6bb1e": {
1700 |           "model_module": "@jupyter-widgets/controls",
1701 |           "model_name": "HTMLModel",
1702 |           "model_module_version": "1.5.0",
1703 |           "state": {
1704 |             "_dom_classes": [],
1705 |             "_model_module": "@jupyter-widgets/controls",
1706 |             "_model_module_version": "1.5.0",
1707 |             "_model_name": "HTMLModel",
1708 |             "_view_count": null,
1709 |             "_view_module": "@jupyter-widgets/controls",
1710 |             "_view_module_version": "1.5.0",
1711 |             "_view_name": "HTMLView",
1712 |             "description": "",
1713 |             "description_tooltip": null,
1714 |             "layout": "IPY_MODEL_05b63fe3c99c417fb6bcdb450081bff8",
1715 |             "placeholder": "​",
1716 |             "style": "IPY_MODEL_e2872821e4e84271b32b8c8c8c093bfc",
1717 |             "value": " 1/1 [00:00&lt;00:00,  2.20it/s]"
1718 |           }
1719 |         },
1720 |         "699f568cedd846f590efa2500dd8b3a9": {
1721 |           "model_module": "@jupyter-widgets/base",
1722 |           "model_name": "LayoutModel",
1723 |           "model_module_version": "1.2.0",
1724 |           "state": {
1725 |             "_model_module": "@jupyter-widgets/base",
1726 |             "_model_module_version": "1.2.0",
1727 |             "_model_name": "LayoutModel",
1728 |             "_view_count": null,
1729 |             "_view_module": "@jupyter-widgets/base",
1730 |             "_view_module_version": "1.2.0",
1731 |             "_view_name": "LayoutView",
1732 |             "align_content": null,
1733 |             "align_items": null,
1734 |             "align_self": null,
1735 |             "border": null,
1736 |             "bottom": null,
1737 |             "display": null,
1738 |             "flex": null,
1739 |             "flex_flow": null,
1740 |             "grid_area": null,
1741 |             "grid_auto_columns": null,
1742 |             "grid_auto_flow": null,
1743 |             "grid_auto_rows": null,
1744 |             "grid_column": null,
1745 |             "grid_gap": null,
1746 |             "grid_row": null,
1747 |             "grid_template_areas": null,
1748 |             "grid_template_columns": null,
1749 |             "grid_template_rows": null,
1750 |             "height": null,
1751 |             "justify_content": null,
1752 |             "justify_items": null,
1753 |             "left": null,
1754 |             "margin": null,
1755 |             "max_height": null,
1756 |             "max_width": null,
1757 |             "min_height": null,
1758 |             "min_width": null,
1759 |             "object_fit": null,
1760 |             "object_position": null,
1761 |             "order": null,
1762 |             "overflow": null,
1763 |             "overflow_x": null,
1764 |             "overflow_y": null,
1765 |             "padding": null,
1766 |             "right": null,
1767 |             "top": null,
1768 |             "visibility": null,
1769 |             "width": null
1770 |           }
1771 |         },
1772 |         "a0f3836eb674483295fbb147065b74fc": {
1773 |           "model_module": "@jupyter-widgets/base",
1774 |           "model_name": "LayoutModel",
1775 |           "model_module_version": "1.2.0",
1776 |           "state": {
1777 |             "_model_module": "@jupyter-widgets/base",
1778 |             "_model_module_version": "1.2.0",
1779 |             "_model_name": "LayoutModel",
1780 |             "_view_count": null,
1781 |             "_view_module": "@jupyter-widgets/base",
1782 |             "_view_module_version": "1.2.0",
1783 |             "_view_name": "LayoutView",
1784 |             "align_content": null,
1785 |             "align_items": null,
1786 |             "align_self": null,
1787 |             "border": null,
1788 |             "bottom": null,
1789 |             "display": null,
1790 |             "flex": null,
1791 |             "flex_flow": null,
1792 |             "grid_area": null,
1793 |             "grid_auto_columns": null,
1794 |             "grid_auto_flow": null,
1795 |             "grid_auto_rows": null,
1796 |             "grid_column": null,
1797 |             "grid_gap": null,
1798 |             "grid_row": null,
1799 |             "grid_template_areas": null,
1800 |             "grid_template_columns": null,
1801 |             "grid_template_rows": null,
1802 |             "height": null,
1803 |             "justify_content": null,
1804 |             "justify_items": null,
1805 |             "left": null,
1806 |             "margin": null,
1807 |             "max_height": null,
1808 |             "max_width": null,
1809 |             "min_height": null,
1810 |             "min_width": null,
1811 |             "object_fit": null,
1812 |             "object_position": null,
1813 |             "order": null,
1814 |             "overflow": null,
1815 |             "overflow_x": null,
1816 |             "overflow_y": null,
1817 |             "padding": null,
1818 |             "right": null,
1819 |             "top": null,
1820 |             "visibility": null,
1821 |             "width": null
1822 |           }
1823 |         },
1824 |         "da922717666a496da59a4cf8840e6554": {
1825 |           "model_module": "@jupyter-widgets/controls",
1826 |           "model_name": "DescriptionStyleModel",
1827 |           "model_module_version": "1.5.0",
1828 |           "state": {
1829 |             "_model_module": "@jupyter-widgets/controls",
1830 |             "_model_module_version": "1.5.0",
1831 |             "_model_name": "DescriptionStyleModel",
1832 |             "_view_count": null,
1833 |             "_view_module": "@jupyter-widgets/base",
1834 |             "_view_module_version": "1.2.0",
1835 |             "_view_name": "StyleView",
1836 |             "description_width": ""
1837 |           }
1838 |         },
1839 |         "cc9ddc6c56324dd59cfb8bc9649fea28": {
1840 |           "model_module": "@jupyter-widgets/base",
1841 |           "model_name": "LayoutModel",
1842 |           "model_module_version": "1.2.0",
1843 |           "state": {
1844 |             "_model_module": "@jupyter-widgets/base",
1845 |             "_model_module_version": "1.2.0",
1846 |             "_model_name": "LayoutModel",
1847 |             "_view_count": null,
1848 |             "_view_module": "@jupyter-widgets/base",
1849 |             "_view_module_version": "1.2.0",
1850 |             "_view_name": "LayoutView",
1851 |             "align_content": null,
1852 |             "align_items": null,
1853 |             "align_self": null,
1854 |             "border": null,
1855 |             "bottom": null,
1856 |             "display": null,
1857 |             "flex": null,
1858 |             "flex_flow": null,
1859 |             "grid_area": null,
1860 |             "grid_auto_columns": null,
1861 |             "grid_auto_flow": null,
1862 |             "grid_auto_rows": null,
1863 |             "grid_column": null,
1864 |             "grid_gap": null,
1865 |             "grid_row": null,
1866 |             "grid_template_areas": null,
1867 |             "grid_template_columns": null,
1868 |             "grid_template_rows": null,
1869 |             "height": null,
1870 |             "justify_content": null,
1871 |             "justify_items": null,
1872 |             "left": null,
1873 |             "margin": null,
1874 |             "max_height": null,
1875 |             "max_width": null,
1876 |             "min_height": null,
1877 |             "min_width": null,
1878 |             "object_fit": null,
1879 |             "object_position": null,
1880 |             "order": null,
1881 |             "overflow": null,
1882 |             "overflow_x": null,
1883 |             "overflow_y": null,
1884 |             "padding": null,
1885 |             "right": null,
1886 |             "top": null,
1887 |             "visibility": null,
1888 |             "width": null
1889 |           }
1890 |         },
1891 |         "830ec2345d9a4be88b486ad24bfc3b10": {
1892 |           "model_module": "@jupyter-widgets/controls",
1893 |           "model_name": "ProgressStyleModel",
1894 |           "model_module_version": "1.5.0",
1895 |           "state": {
1896 |             "_model_module": "@jupyter-widgets/controls",
1897 |             "_model_module_version": "1.5.0",
1898 |             "_model_name": "ProgressStyleModel",
1899 |             "_view_count": null,
1900 |             "_view_module": "@jupyter-widgets/base",
1901 |             "_view_module_version": "1.2.0",
1902 |             "_view_name": "StyleView",
1903 |             "bar_color": null,
1904 |             "description_width": ""
1905 |           }
1906 |         },
1907 |         "05b63fe3c99c417fb6bcdb450081bff8": {
1908 |           "model_module": "@jupyter-widgets/base",
1909 |           "model_name": "LayoutModel",
1910 |           "model_module_version": "1.2.0",
1911 |           "state": {
1912 |             "_model_module": "@jupyter-widgets/base",
1913 |             "_model_module_version": "1.2.0",
1914 |             "_model_name": "LayoutModel",
1915 |             "_view_count": null,
1916 |             "_view_module": "@jupyter-widgets/base",
1917 |             "_view_module_version": "1.2.0",
1918 |             "_view_name": "LayoutView",
1919 |             "align_content": null,
1920 |             "align_items": null,
1921 |             "align_self": null,
1922 |             "border": null,
1923 |             "bottom": null,
1924 |             "display": null,
1925 |             "flex": null,
1926 |             "flex_flow": null,
1927 |             "grid_area": null,
1928 |             "grid_auto_columns": null,
1929 |             "grid_auto_flow": null,
1930 |             "grid_auto_rows": null,
1931 |             "grid_column": null,
1932 |             "grid_gap": null,
1933 |             "grid_row": null,
1934 |             "grid_template_areas": null,
1935 |             "grid_template_columns": null,
1936 |             "grid_template_rows": null,
1937 |             "height": null,
1938 |             "justify_content": null,
1939 |             "justify_items": null,
1940 |             "left": null,
1941 |             "margin": null,
1942 |             "max_height": null,
1943 |             "max_width": null,
1944 |             "min_height": null,
1945 |             "min_width": null,
1946 |             "object_fit": null,
1947 |             "object_position": null,
1948 |             "order": null,
1949 |             "overflow": null,
1950 |             "overflow_x": null,
1951 |             "overflow_y": null,
1952 |             "padding": null,
1953 |             "right": null,
1954 |             "top": null,
1955 |             "visibility": null,
1956 |             "width": null
1957 |           }
1958 |         },
1959 |         "e2872821e4e84271b32b8c8c8c093bfc": {
1960 |           "model_module": "@jupyter-widgets/controls",
1961 |           "model_name": "DescriptionStyleModel",
1962 |           "model_module_version": "1.5.0",
1963 |           "state": {
1964 |             "_model_module": "@jupyter-widgets/controls",
1965 |             "_model_module_version": "1.5.0",
1966 |             "_model_name": "DescriptionStyleModel",
1967 |             "_view_count": null,
1968 |             "_view_module": "@jupyter-widgets/base",
1969 |             "_view_module_version": "1.2.0",
1970 |             "_view_name": "StyleView",
1971 |             "description_width": ""
1972 |           }
1973 |         },
1974 |         "782d656769144ef9b48a3a37de81abb5": {
1975 |           "model_module": "@jupyter-widgets/controls",
1976 |           "model_name": "HBoxModel",
1977 |           "model_module_version": "1.5.0",
1978 |           "state": {
1979 |             "_dom_classes": [],
1980 |             "_model_module": "@jupyter-widgets/controls",
1981 |             "_model_module_version": "1.5.0",
1982 |             "_model_name": "HBoxModel",
1983 |             "_view_count": null,
1984 |             "_view_module": "@jupyter-widgets/controls",
1985 |             "_view_module_version": "1.5.0",
1986 |             "_view_name": "HBoxView",
1987 |             "box_style": "",
1988 |             "children": [
1989 |               "IPY_MODEL_eb2f4bcb78534f4d9f9e2ccb52e738b7",
1990 |               "IPY_MODEL_a2bcf8164d904dcbada2196189b332be",
1991 |               "IPY_MODEL_99b5b2cd3f104c72b5ee880fe1d0e9b9"
1992 |             ],
1993 |             "layout": "IPY_MODEL_3197f87aadd5422cbb9804b0843ffc48"
1994 |           }
1995 |         },
1996 |         "eb2f4bcb78534f4d9f9e2ccb52e738b7": {
1997 |           "model_module": "@jupyter-widgets/controls",
1998 |           "model_name": "HTMLModel",
1999 |           "model_module_version": "1.5.0",
2000 |           "state": {
2001 |             "_dom_classes": [],
2002 |             "_model_module": "@jupyter-widgets/controls",
2003 |             "_model_module_version": "1.5.0",
2004 |             "_model_name": "HTMLModel",
2005 |             "_view_count": null,
2006 |             "_view_module": "@jupyter-widgets/controls",
2007 |             "_view_module_version": "1.5.0",
2008 |             "_view_name": "HTMLView",
2009 |             "description": "",
2010 |             "description_tooltip": null,
2011 |             "layout": "IPY_MODEL_5cdf7a7b08cc46f5a4b2da143ba39bb6",
2012 |             "placeholder": "​",
2013 |             "style": "IPY_MODEL_8f335a7d85574c11b183fb700aeac5c3",
2014 |             "value": "Downloading data: 100%"
2015 |           }
2016 |         },
2017 |         "a2bcf8164d904dcbada2196189b332be": {
2018 |           "model_module": "@jupyter-widgets/controls",
2019 |           "model_name": "FloatProgressModel",
2020 |           "model_module_version": "1.5.0",
2021 |           "state": {
2022 |             "_dom_classes": [],
2023 |             "_model_module": "@jupyter-widgets/controls",
2024 |             "_model_module_version": "1.5.0",
2025 |             "_model_name": "FloatProgressModel",
2026 |             "_view_count": null,
2027 |             "_view_module": "@jupyter-widgets/controls",
2028 |             "_view_module_version": "1.5.0",
2029 |             "_view_name": "ProgressView",
2030 |             "bar_style": "success",
2031 |             "description": "",
2032 |             "description_tooltip": null,
2033 |             "layout": "IPY_MODEL_6b96186a1ccb4e24b491b5849ac90c50",
2034 |             "max": 9280790,
2035 |             "min": 0,
2036 |             "orientation": "horizontal",
2037 |             "style": "IPY_MODEL_3a845c0efe954da1a47e77740f8623ff",
2038 |             "value": 9280790
2039 |           }
2040 |         },
2041 |         "99b5b2cd3f104c72b5ee880fe1d0e9b9": {
2042 |           "model_module": "@jupyter-widgets/controls",
2043 |           "model_name": "HTMLModel",
2044 |           "model_module_version": "1.5.0",
2045 |           "state": {
2046 |             "_dom_classes": [],
2047 |             "_model_module": "@jupyter-widgets/controls",
2048 |             "_model_module_version": "1.5.0",
2049 |             "_model_name": "HTMLModel",
2050 |             "_view_count": null,
2051 |             "_view_module": "@jupyter-widgets/controls",
2052 |             "_view_module_version": "1.5.0",
2053 |             "_view_name": "HTMLView",
2054 |             "description": "",
2055 |             "description_tooltip": null,
2056 |             "layout": "IPY_MODEL_4c8f47c325a54f52abab545362f36c43",
2057 |             "placeholder": "​",
2058 |             "style": "IPY_MODEL_7c1dc629e6dc4048b1b88a224c9a352d",
2059 |             "value": " 9.28M/9.28M [00:00&lt;00:00, 9.96MB/s]"
2060 |           }
2061 |         },
2062 |         "3197f87aadd5422cbb9804b0843ffc48": {
2063 |           "model_module": "@jupyter-widgets/base",
2064 |           "model_name": "LayoutModel",
2065 |           "model_module_version": "1.2.0",
2066 |           "state": {
2067 |             "_model_module": "@jupyter-widgets/base",
2068 |             "_model_module_version": "1.2.0",
2069 |             "_model_name": "LayoutModel",
2070 |             "_view_count": null,
2071 |             "_view_module": "@jupyter-widgets/base",
2072 |             "_view_module_version": "1.2.0",
2073 |             "_view_name": "LayoutView",
2074 |             "align_content": null,
2075 |             "align_items": null,
2076 |             "align_self": null,
2077 |             "border": null,
2078 |             "bottom": null,
2079 |             "display": null,
2080 |             "flex": null,
2081 |             "flex_flow": null,
2082 |             "grid_area": null,
2083 |             "grid_auto_columns": null,
2084 |             "grid_auto_flow": null,
2085 |             "grid_auto_rows": null,
2086 |             "grid_column": null,
2087 |             "grid_gap": null,
2088 |             "grid_row": null,
2089 |             "grid_template_areas": null,
2090 |             "grid_template_columns": null,
2091 |             "grid_template_rows": null,
2092 |             "height": null,
2093 |             "justify_content": null,
2094 |             "justify_items": null,
2095 |             "left": null,
2096 |             "margin": null,
2097 |             "max_height": null,
2098 |             "max_width": null,
2099 |             "min_height": null,
2100 |             "min_width": null,
2101 |             "object_fit": null,
2102 |             "object_position": null,
2103 |             "order": null,
2104 |             "overflow": null,
2105 |             "overflow_x": null,
2106 |             "overflow_y": null,
2107 |             "padding": null,
2108 |             "right": null,
2109 |             "top": null,
2110 |             "visibility": null,
2111 |             "width": null
2112 |           }
2113 |         },
2114 |         "5cdf7a7b08cc46f5a4b2da143ba39bb6": {
2115 |           "model_module": "@jupyter-widgets/base",
2116 |           "model_name": "LayoutModel",
2117 |           "model_module_version": "1.2.0",
2118 |           "state": {
2119 |             "_model_module": "@jupyter-widgets/base",
2120 |             "_model_module_version": "1.2.0",
2121 |             "_model_name": "LayoutModel",
2122 |             "_view_count": null,
2123 |             "_view_module": "@jupyter-widgets/base",
2124 |             "_view_module_version": "1.2.0",
2125 |             "_view_name": "LayoutView",
2126 |             "align_content": null,
2127 |             "align_items": null,
2128 |             "align_self": null,
2129 |             "border": null,
2130 |             "bottom": null,
2131 |             "display": null,
2132 |             "flex": null,
2133 |             "flex_flow": null,
2134 |             "grid_area": null,
2135 |             "grid_auto_columns": null,
2136 |             "grid_auto_flow": null,
2137 |             "grid_auto_rows": null,
2138 |             "grid_column": null,
2139 |             "grid_gap": null,
2140 |             "grid_row": null,
2141 |             "grid_template_areas": null,
2142 |             "grid_template_columns": null,
2143 |             "grid_template_rows": null,
2144 |             "height": null,
2145 |             "justify_content": null,
2146 |             "justify_items": null,
2147 |             "left": null,
2148 |             "margin": null,
2149 |             "max_height": null,
2150 |             "max_width": null,
2151 |             "min_height": null,
2152 |             "min_width": null,
2153 |             "object_fit": null,
2154 |             "object_position": null,
2155 |             "order": null,
2156 |             "overflow": null,
2157 |             "overflow_x": null,
2158 |             "overflow_y": null,
2159 |             "padding": null,
2160 |             "right": null,
2161 |             "top": null,
2162 |             "visibility": null,
2163 |             "width": null
2164 |           }
2165 |         },
2166 |         "8f335a7d85574c11b183fb700aeac5c3": {
2167 |           "model_module": "@jupyter-widgets/controls",
2168 |           "model_name": "DescriptionStyleModel",
2169 |           "model_module_version": "1.5.0",
2170 |           "state": {
2171 |             "_model_module": "@jupyter-widgets/controls",
2172 |             "_model_module_version": "1.5.0",
2173 |             "_model_name": "DescriptionStyleModel",
2174 |             "_view_count": null,
2175 |             "_view_module": "@jupyter-widgets/base",
2176 |             "_view_module_version": "1.2.0",
2177 |             "_view_name": "StyleView",
2178 |             "description_width": ""
2179 |           }
2180 |         },
2181 |         "6b96186a1ccb4e24b491b5849ac90c50": {
2182 |           "model_module": "@jupyter-widgets/base",
2183 |           "model_name": "LayoutModel",
2184 |           "model_module_version": "1.2.0",
2185 |           "state": {
2186 |             "_model_module": "@jupyter-widgets/base",
2187 |             "_model_module_version": "1.2.0",
2188 |             "_model_name": "LayoutModel",
2189 |             "_view_count": null,
2190 |             "_view_module": "@jupyter-widgets/base",
2191 |             "_view_module_version": "1.2.0",
2192 |             "_view_name": "LayoutView",
2193 |             "align_content": null,
2194 |             "align_items": null,
2195 |             "align_self": null,
2196 |             "border": null,
2197 |             "bottom": null,
2198 |             "display": null,
2199 |             "flex": null,
2200 |             "flex_flow": null,
2201 |             "grid_area": null,
2202 |             "grid_auto_columns": null,
2203 |             "grid_auto_flow": null,
2204 |             "grid_auto_rows": null,
2205 |             "grid_column": null,
2206 |             "grid_gap": null,
2207 |             "grid_row": null,
2208 |             "grid_template_areas": null,
2209 |             "grid_template_columns": null,
2210 |             "grid_template_rows": null,
2211 |             "height": null,
2212 |             "justify_content": null,
2213 |             "justify_items": null,
2214 |             "left": null,
2215 |             "margin": null,
2216 |             "max_height": null,
2217 |             "max_width": null,
2218 |             "min_height": null,
2219 |             "min_width": null,
2220 |             "object_fit": null,
2221 |             "object_position": null,
2222 |             "order": null,
2223 |             "overflow": null,
2224 |             "overflow_x": null,
2225 |             "overflow_y": null,
2226 |             "padding": null,
2227 |             "right": null,
2228 |             "top": null,
2229 |             "visibility": null,
2230 |             "width": null
2231 |           }
2232 |         },
2233 |         "3a845c0efe954da1a47e77740f8623ff": {
2234 |           "model_module": "@jupyter-widgets/controls",
2235 |           "model_name": "ProgressStyleModel",
2236 |           "model_module_version": "1.5.0",
2237 |           "state": {
2238 |             "_model_module": "@jupyter-widgets/controls",
2239 |             "_model_module_version": "1.5.0",
2240 |             "_model_name": "ProgressStyleModel",
2241 |             "_view_count": null,
2242 |             "_view_module": "@jupyter-widgets/base",
2243 |             "_view_module_version": "1.2.0",
2244 |             "_view_name": "StyleView",
2245 |             "bar_color": null,
2246 |             "description_width": ""
2247 |           }
2248 |         },
2249 |         "4c8f47c325a54f52abab545362f36c43": {
2250 |           "model_module": "@jupyter-widgets/base",
2251 |           "model_name": "LayoutModel",
2252 |           "model_module_version": "1.2.0",
2253 |           "state": {
2254 |             "_model_module": "@jupyter-widgets/base",
2255 |             "_model_module_version": "1.2.0",
2256 |             "_model_name": "LayoutModel",
2257 |             "_view_count": null,
2258 |             "_view_module": "@jupyter-widgets/base",
2259 |             "_view_module_version": "1.2.0",
2260 |             "_view_name": "LayoutView",
2261 |             "align_content": null,
2262 |             "align_items": null,
2263 |             "align_self": null,
2264 |             "border": null,
2265 |             "bottom": null,
2266 |             "display": null,
2267 |             "flex": null,
2268 |             "flex_flow": null,
2269 |             "grid_area": null,
2270 |             "grid_auto_columns": null,
2271 |             "grid_auto_flow": null,
2272 |             "grid_auto_rows": null,
2273 |             "grid_column": null,
2274 |             "grid_gap": null,
2275 |             "grid_row": null,
2276 |             "grid_template_areas": null,
2277 |             "grid_template_columns": null,
2278 |             "grid_template_rows": null,
2279 |             "height": null,
2280 |             "justify_content": null,
2281 |             "justify_items": null,
2282 |             "left": null,
2283 |             "margin": null,
2284 |             "max_height": null,
2285 |             "max_width": null,
2286 |             "min_height": null,
2287 |             "min_width": null,
2288 |             "object_fit": null,
2289 |             "object_position": null,
2290 |             "order": null,
2291 |             "overflow": null,
2292 |             "overflow_x": null,
2293 |             "overflow_y": null,
2294 |             "padding": null,
2295 |             "right": null,
2296 |             "top": null,
2297 |             "visibility": null,
2298 |             "width": null
2299 |           }
2300 |         },
2301 |         "7c1dc629e6dc4048b1b88a224c9a352d": {
2302 |           "model_module": "@jupyter-widgets/controls",
2303 |           "model_name": "DescriptionStyleModel",
2304 |           "model_module_version": "1.5.0",
2305 |           "state": {
2306 |             "_model_module": "@jupyter-widgets/controls",
2307 |             "_model_module_version": "1.5.0",
2308 |             "_model_name": "DescriptionStyleModel",
2309 |             "_view_count": null,
2310 |             "_view_module": "@jupyter-widgets/base",
2311 |             "_view_module_version": "1.2.0",
2312 |             "_view_name": "StyleView",
2313 |             "description_width": ""
2314 |           }
2315 |         },
2316 |         "da84172eaff34e61ac902681dbd364ca": {
2317 |           "model_module": "@jupyter-widgets/controls",
2318 |           "model_name": "HBoxModel",
2319 |           "model_module_version": "1.5.0",
2320 |           "state": {
2321 |             "_dom_classes": [],
2322 |             "_model_module": "@jupyter-widgets/controls",
2323 |             "_model_module_version": "1.5.0",
2324 |             "_model_name": "HBoxModel",
2325 |             "_view_count": null,
2326 |             "_view_module": "@jupyter-widgets/controls",
2327 |             "_view_module_version": "1.5.0",
2328 |             "_view_name": "HBoxView",
2329 |             "box_style": "",
2330 |             "children": [
2331 |               "IPY_MODEL_2796bada5f6748b6af59f6b14b0957af",
2332 |               "IPY_MODEL_400b852ef365473cad76663421954c86",
2333 |               "IPY_MODEL_fd58bb90108a4486967a217eb3bc4389"
2334 |             ],
2335 |             "layout": "IPY_MODEL_b96a2d9afc324a4eb52f7a04caab630a"
2336 |           }
2337 |         },
2338 |         "2796bada5f6748b6af59f6b14b0957af": {
2339 |           "model_module": "@jupyter-widgets/controls",
2340 |           "model_name": "HTMLModel",
2341 |           "model_module_version": "1.5.0",
2342 |           "state": {
2343 |             "_dom_classes": [],
2344 |             "_model_module": "@jupyter-widgets/controls",
2345 |             "_model_module_version": "1.5.0",
2346 |             "_model_name": "HTMLModel",
2347 |             "_view_count": null,
2348 |             "_view_module": "@jupyter-widgets/controls",
2349 |             "_view_module_version": "1.5.0",
2350 |             "_view_name": "HTMLView",
2351 |             "description": "",
2352 |             "description_tooltip": null,
2353 |             "layout": "IPY_MODEL_7c20b8d8e3b14504bba903e68d043e79",
2354 |             "placeholder": "​",
2355 |             "style": "IPY_MODEL_c8bc395e18e14492ae40ec6ff21a18d1",
2356 |             "value": "Extracting data files: 100%"
2357 |           }
2358 |         },
2359 |         "400b852ef365473cad76663421954c86": {
2360 |           "model_module": "@jupyter-widgets/controls",
2361 |           "model_name": "FloatProgressModel",
2362 |           "model_module_version": "1.5.0",
2363 |           "state": {
2364 |             "_dom_classes": [],
2365 |             "_model_module": "@jupyter-widgets/controls",
2366 |             "_model_module_version": "1.5.0",
2367 |             "_model_name": "FloatProgressModel",
2368 |             "_view_count": null,
2369 |             "_view_module": "@jupyter-widgets/controls",
2370 |             "_view_module_version": "1.5.0",
2371 |             "_view_name": "ProgressView",
2372 |             "bar_style": "success",
2373 |             "description": "",
2374 |             "description_tooltip": null,
2375 |             "layout": "IPY_MODEL_6c85b036e1be434faa2d515bed62e228",
2376 |             "max": 1,
2377 |             "min": 0,
2378 |             "orientation": "horizontal",
2379 |             "style": "IPY_MODEL_da15ec7761a847678dc696b214c67ada",
2380 |             "value": 1
2381 |           }
2382 |         },
2383 |         "fd58bb90108a4486967a217eb3bc4389": {
2384 |           "model_module": "@jupyter-widgets/controls",
2385 |           "model_name": "HTMLModel",
2386 |           "model_module_version": "1.5.0",
2387 |           "state": {
2388 |             "_dom_classes": [],
2389 |             "_model_module": "@jupyter-widgets/controls",
2390 |             "_model_module_version": "1.5.0",
2391 |             "_model_name": "HTMLModel",
2392 |             "_view_count": null,
2393 |             "_view_module": "@jupyter-widgets/controls",
2394 |             "_view_module_version": "1.5.0",
2395 |             "_view_name": "HTMLView",
2396 |             "description": "",
2397 |             "description_tooltip": null,
2398 |             "layout": "IPY_MODEL_03d2d213eb2a4c819bbcf8457e11904b",
2399 |             "placeholder": "​",
2400 |             "style": "IPY_MODEL_f651fffdc274473a85ed701097afaa1f",
2401 |             "value": " 1/1 [00:00&lt;00:00, 42.91it/s]"
2402 |           }
2403 |         },
2404 |         "b96a2d9afc324a4eb52f7a04caab630a": {
2405 |           "model_module": "@jupyter-widgets/base",
2406 |           "model_name": "LayoutModel",
2407 |           "model_module_version": "1.2.0",
2408 |           "state": {
2409 |             "_model_module": "@jupyter-widgets/base",
2410 |             "_model_module_version": "1.2.0",
2411 |             "_model_name": "LayoutModel",
2412 |             "_view_count": null,
2413 |             "_view_module": "@jupyter-widgets/base",
2414 |             "_view_module_version": "1.2.0",
2415 |             "_view_name": "LayoutView",
2416 |             "align_content": null,
2417 |             "align_items": null,
2418 |             "align_self": null,
2419 |             "border": null,
2420 |             "bottom": null,
2421 |             "display": null,
2422 |             "flex": null,
2423 |             "flex_flow": null,
2424 |             "grid_area": null,
2425 |             "grid_auto_columns": null,
2426 |             "grid_auto_flow": null,
2427 |             "grid_auto_rows": null,
2428 |             "grid_column": null,
2429 |             "grid_gap": null,
2430 |             "grid_row": null,
2431 |             "grid_template_areas": null,
2432 |             "grid_template_columns": null,
2433 |             "grid_template_rows": null,
2434 |             "height": null,
2435 |             "justify_content": null,
2436 |             "justify_items": null,
2437 |             "left": null,
2438 |             "margin": null,
2439 |             "max_height": null,
2440 |             "max_width": null,
2441 |             "min_height": null,
2442 |             "min_width": null,
2443 |             "object_fit": null,
2444 |             "object_position": null,
2445 |             "order": null,
2446 |             "overflow": null,
2447 |             "overflow_x": null,
2448 |             "overflow_y": null,
2449 |             "padding": null,
2450 |             "right": null,
2451 |             "top": null,
2452 |             "visibility": null,
2453 |             "width": null
2454 |           }
2455 |         },
2456 |         "7c20b8d8e3b14504bba903e68d043e79": {
2457 |           "model_module": "@jupyter-widgets/base",
2458 |           "model_name": "LayoutModel",
2459 |           "model_module_version": "1.2.0",
2460 |           "state": {
2461 |             "_model_module": "@jupyter-widgets/base",
2462 |             "_model_module_version": "1.2.0",
2463 |             "_model_name": "LayoutModel",
2464 |             "_view_count": null,
2465 |             "_view_module": "@jupyter-widgets/base",
2466 |             "_view_module_version": "1.2.0",
2467 |             "_view_name": "LayoutView",
2468 |             "align_content": null,
2469 |             "align_items": null,
2470 |             "align_self": null,
2471 |             "border": null,
2472 |             "bottom": null,
2473 |             "display": null,
2474 |             "flex": null,
2475 |             "flex_flow": null,
2476 |             "grid_area": null,
2477 |             "grid_auto_columns": null,
2478 |             "grid_auto_flow": null,
2479 |             "grid_auto_rows": null,
2480 |             "grid_column": null,
2481 |             "grid_gap": null,
2482 |             "grid_row": null,
2483 |             "grid_template_areas": null,
2484 |             "grid_template_columns": null,
2485 |             "grid_template_rows": null,
2486 |             "height": null,
2487 |             "justify_content": null,
2488 |             "justify_items": null,
2489 |             "left": null,
2490 |             "margin": null,
2491 |             "max_height": null,
2492 |             "max_width": null,
2493 |             "min_height": null,
2494 |             "min_width": null,
2495 |             "object_fit": null,
2496 |             "object_position": null,
2497 |             "order": null,
2498 |             "overflow": null,
2499 |             "overflow_x": null,
2500 |             "overflow_y": null,
2501 |             "padding": null,
2502 |             "right": null,
2503 |             "top": null,
2504 |             "visibility": null,
2505 |             "width": null
2506 |           }
2507 |         },
2508 |         "c8bc395e18e14492ae40ec6ff21a18d1": {
2509 |           "model_module": "@jupyter-widgets/controls",
2510 |           "model_name": "DescriptionStyleModel",
2511 |           "model_module_version": "1.5.0",
2512 |           "state": {
2513 |             "_model_module": "@jupyter-widgets/controls",
2514 |             "_model_module_version": "1.5.0",
2515 |             "_model_name": "DescriptionStyleModel",
2516 |             "_view_count": null,
2517 |             "_view_module": "@jupyter-widgets/base",
2518 |             "_view_module_version": "1.2.0",
2519 |             "_view_name": "StyleView",
2520 |             "description_width": ""
2521 |           }
2522 |         },
2523 |         "6c85b036e1be434faa2d515bed62e228": {
2524 |           "model_module": "@jupyter-widgets/base",
2525 |           "model_name": "LayoutModel",
2526 |           "model_module_version": "1.2.0",
2527 |           "state": {
2528 |             "_model_module": "@jupyter-widgets/base",
2529 |             "_model_module_version": "1.2.0",
2530 |             "_model_name": "LayoutModel",
2531 |             "_view_count": null,
2532 |             "_view_module": "@jupyter-widgets/base",
2533 |             "_view_module_version": "1.2.0",
2534 |             "_view_name": "LayoutView",
2535 |             "align_content": null,
2536 |             "align_items": null,
2537 |             "align_self": null,
2538 |             "border": null,
2539 |             "bottom": null,
2540 |             "display": null,
2541 |             "flex": null,
2542 |             "flex_flow": null,
2543 |             "grid_area": null,
2544 |             "grid_auto_columns": null,
2545 |             "grid_auto_flow": null,
2546 |             "grid_auto_rows": null,
2547 |             "grid_column": null,
2548 |             "grid_gap": null,
2549 |             "grid_row": null,
2550 |             "grid_template_areas": null,
2551 |             "grid_template_columns": null,
2552 |             "grid_template_rows": null,
2553 |             "height": null,
2554 |             "justify_content": null,
2555 |             "justify_items": null,
2556 |             "left": null,
2557 |             "margin": null,
2558 |             "max_height": null,
2559 |             "max_width": null,
2560 |             "min_height": null,
2561 |             "min_width": null,
2562 |             "object_fit": null,
2563 |             "object_position": null,
2564 |             "order": null,
2565 |             "overflow": null,
2566 |             "overflow_x": null,
2567 |             "overflow_y": null,
2568 |             "padding": null,
2569 |             "right": null,
2570 |             "top": null,
2571 |             "visibility": null,
2572 |             "width": null
2573 |           }
2574 |         },
2575 |         "da15ec7761a847678dc696b214c67ada": {
2576 |           "model_module": "@jupyter-widgets/controls",
2577 |           "model_name": "ProgressStyleModel",
2578 |           "model_module_version": "1.5.0",
2579 |           "state": {
2580 |             "_model_module": "@jupyter-widgets/controls",
2581 |             "_model_module_version": "1.5.0",
2582 |             "_model_name": "ProgressStyleModel",
2583 |             "_view_count": null,
2584 |             "_view_module": "@jupyter-widgets/base",
2585 |             "_view_module_version": "1.2.0",
2586 |             "_view_name": "StyleView",
2587 |             "bar_color": null,
2588 |             "description_width": ""
2589 |           }
2590 |         },
2591 |         "03d2d213eb2a4c819bbcf8457e11904b": {
2592 |           "model_module": "@jupyter-widgets/base",
2593 |           "model_name": "LayoutModel",
2594 |           "model_module_version": "1.2.0",
2595 |           "state": {
2596 |             "_model_module": "@jupyter-widgets/base",
2597 |             "_model_module_version": "1.2.0",
2598 |             "_model_name": "LayoutModel",
2599 |             "_view_count": null,
2600 |             "_view_module": "@jupyter-widgets/base",
2601 |             "_view_module_version": "1.2.0",
2602 |             "_view_name": "LayoutView",
2603 |             "align_content": null,
2604 |             "align_items": null,
2605 |             "align_self": null,
2606 |             "border": null,
2607 |             "bottom": null,
2608 |             "display": null,
2609 |             "flex": null,
2610 |             "flex_flow": null,
2611 |             "grid_area": null,
2612 |             "grid_auto_columns": null,
2613 |             "grid_auto_flow": null,
2614 |             "grid_auto_rows": null,
2615 |             "grid_column": null,
2616 |             "grid_gap": null,
2617 |             "grid_row": null,
2618 |             "grid_template_areas": null,
2619 |             "grid_template_columns": null,
2620 |             "grid_template_rows": null,
2621 |             "height": null,
2622 |             "justify_content": null,
2623 |             "justify_items": null,
2624 |             "left": null,
2625 |             "margin": null,
2626 |             "max_height": null,
2627 |             "max_width": null,
2628 |             "min_height": null,
2629 |             "min_width": null,
2630 |             "object_fit": null,
2631 |             "object_position": null,
2632 |             "order": null,
2633 |             "overflow": null,
2634 |             "overflow_x": null,
2635 |             "overflow_y": null,
2636 |             "padding": null,
2637 |             "right": null,
2638 |             "top": null,
2639 |             "visibility": null,
2640 |             "width": null
2641 |           }
2642 |         },
2643 |         "f651fffdc274473a85ed701097afaa1f": {
2644 |           "model_module": "@jupyter-widgets/controls",
2645 |           "model_name": "DescriptionStyleModel",
2646 |           "model_module_version": "1.5.0",
2647 |           "state": {
2648 |             "_model_module": "@jupyter-widgets/controls",
2649 |             "_model_module_version": "1.5.0",
2650 |             "_model_name": "DescriptionStyleModel",
2651 |             "_view_count": null,
2652 |             "_view_module": "@jupyter-widgets/base",
2653 |             "_view_module_version": "1.2.0",
2654 |             "_view_name": "StyleView",
2655 |             "description_width": ""
2656 |           }
2657 |         },
2658 |         "3fbc282a30cc49b99f335216df028cd6": {
2659 |           "model_module": "@jupyter-widgets/controls",
2660 |           "model_name": "HBoxModel",
2661 |           "model_module_version": "1.5.0",
2662 |           "state": {
2663 |             "_dom_classes": [],
2664 |             "_model_module": "@jupyter-widgets/controls",
2665 |             "_model_module_version": "1.5.0",
2666 |             "_model_name": "HBoxModel",
2667 |             "_view_count": null,
2668 |             "_view_module": "@jupyter-widgets/controls",
2669 |             "_view_module_version": "1.5.0",
2670 |             "_view_name": "HBoxView",
2671 |             "box_style": "",
2672 |             "children": [
2673 |               "IPY_MODEL_651249802d0249479eb1700e600f9a5a",
2674 |               "IPY_MODEL_31e2d7d5057a4dfa96a65888697e9923",
2675 |               "IPY_MODEL_cbaf9ba59da24341a933c3c7473a3b7d"
2676 |             ],
2677 |             "layout": "IPY_MODEL_ee26c8314e6742a88cd59429f3d5b745"
2678 |           }
2679 |         },
2680 |         "651249802d0249479eb1700e600f9a5a": {
2681 |           "model_module": "@jupyter-widgets/controls",
2682 |           "model_name": "HTMLModel",
2683 |           "model_module_version": "1.5.0",
2684 |           "state": {
2685 |             "_dom_classes": [],
2686 |             "_model_module": "@jupyter-widgets/controls",
2687 |             "_model_module_version": "1.5.0",
2688 |             "_model_name": "HTMLModel",
2689 |             "_view_count": null,
2690 |             "_view_module": "@jupyter-widgets/controls",
2691 |             "_view_module_version": "1.5.0",
2692 |             "_view_name": "HTMLView",
2693 |             "description": "",
2694 |             "description_tooltip": null,
2695 |             "layout": "IPY_MODEL_dd9e81eb4e3d45cca5c6e2b1e6cf335d",
2696 |             "placeholder": "​",
2697 |             "style": "IPY_MODEL_a7c9efe8c49a43d0ba6929bada9f78c2",
2698 |             "value": "Generating train split: "
2699 |           }
2700 |         },
2701 |         "31e2d7d5057a4dfa96a65888697e9923": {
2702 |           "model_module": "@jupyter-widgets/controls",
2703 |           "model_name": "FloatProgressModel",
2704 |           "model_module_version": "1.5.0",
2705 |           "state": {
2706 |             "_dom_classes": [],
2707 |             "_model_module": "@jupyter-widgets/controls",
2708 |             "_model_module_version": "1.5.0",
2709 |             "_model_name": "FloatProgressModel",
2710 |             "_view_count": null,
2711 |             "_view_module": "@jupyter-widgets/controls",
2712 |             "_view_module_version": "1.5.0",
2713 |             "_view_name": "ProgressView",
2714 |             "bar_style": "success",
2715 |             "description": "",
2716 |             "description_tooltip": null,
2717 |             "layout": "IPY_MODEL_15d3af1073fe4447847d0e6f3543f953",
2718 |             "max": 1,
2719 |             "min": 0,
2720 |             "orientation": "horizontal",
2721 |             "style": "IPY_MODEL_e4daf9a3e9e14e93ab55b91da59ecc9b",
2722 |             "value": 1
2723 |           }
2724 |         },
2725 |         "cbaf9ba59da24341a933c3c7473a3b7d": {
2726 |           "model_module": "@jupyter-widgets/controls",
2727 |           "model_name": "HTMLModel",
2728 |           "model_module_version": "1.5.0",
2729 |           "state": {
2730 |             "_dom_classes": [],
2731 |             "_model_module": "@jupyter-widgets/controls",
2732 |             "_model_module_version": "1.5.0",
2733 |             "_model_name": "HTMLModel",
2734 |             "_view_count": null,
2735 |             "_view_module": "@jupyter-widgets/controls",
2736 |             "_view_module_version": "1.5.0",
2737 |             "_view_name": "HTMLView",
2738 |             "description": "",
2739 |             "description_tooltip": null,
2740 |             "layout": "IPY_MODEL_3557bb8fc4064fdf99ca2a1ec5469cff",
2741 |             "placeholder": "​",
2742 |             "style": "IPY_MODEL_b2ccec96efa1415fa4623ec8fa0f2c21",
2743 |             "value": " 13139/0 [00:00&lt;00:00, 36568.30 examples/s]"
2744 |           }
2745 |         },
2746 |         "ee26c8314e6742a88cd59429f3d5b745": {
2747 |           "model_module": "@jupyter-widgets/base",
2748 |           "model_name": "LayoutModel",
2749 |           "model_module_version": "1.2.0",
2750 |           "state": {
2751 |             "_model_module": "@jupyter-widgets/base",
2752 |             "_model_module_version": "1.2.0",
2753 |             "_model_name": "LayoutModel",
2754 |             "_view_count": null,
2755 |             "_view_module": "@jupyter-widgets/base",
2756 |             "_view_module_version": "1.2.0",
2757 |             "_view_name": "LayoutView",
2758 |             "align_content": null,
2759 |             "align_items": null,
2760 |             "align_self": null,
2761 |             "border": null,
2762 |             "bottom": null,
2763 |             "display": null,
2764 |             "flex": null,
2765 |             "flex_flow": null,
2766 |             "grid_area": null,
2767 |             "grid_auto_columns": null,
2768 |             "grid_auto_flow": null,
2769 |             "grid_auto_rows": null,
2770 |             "grid_column": null,
2771 |             "grid_gap": null,
2772 |             "grid_row": null,
2773 |             "grid_template_areas": null,
2774 |             "grid_template_columns": null,
2775 |             "grid_template_rows": null,
2776 |             "height": null,
2777 |             "justify_content": null,
2778 |             "justify_items": null,
2779 |             "left": null,
2780 |             "margin": null,
2781 |             "max_height": null,
2782 |             "max_width": null,
2783 |             "min_height": null,
2784 |             "min_width": null,
2785 |             "object_fit": null,
2786 |             "object_position": null,
2787 |             "order": null,
2788 |             "overflow": null,
2789 |             "overflow_x": null,
2790 |             "overflow_y": null,
2791 |             "padding": null,
2792 |             "right": null,
2793 |             "top": null,
2794 |             "visibility": null,
2795 |             "width": null
2796 |           }
2797 |         },
2798 |         "dd9e81eb4e3d45cca5c6e2b1e6cf335d": {
2799 |           "model_module": "@jupyter-widgets/base",
2800 |           "model_name": "LayoutModel",
2801 |           "model_module_version": "1.2.0",
2802 |           "state": {
2803 |             "_model_module": "@jupyter-widgets/base",
2804 |             "_model_module_version": "1.2.0",
2805 |             "_model_name": "LayoutModel",
2806 |             "_view_count": null,
2807 |             "_view_module": "@jupyter-widgets/base",
2808 |             "_view_module_version": "1.2.0",
2809 |             "_view_name": "LayoutView",
2810 |             "align_content": null,
2811 |             "align_items": null,
2812 |             "align_self": null,
2813 |             "border": null,
2814 |             "bottom": null,
2815 |             "display": null,
2816 |             "flex": null,
2817 |             "flex_flow": null,
2818 |             "grid_area": null,
2819 |             "grid_auto_columns": null,
2820 |             "grid_auto_flow": null,
2821 |             "grid_auto_rows": null,
2822 |             "grid_column": null,
2823 |             "grid_gap": null,
2824 |             "grid_row": null,
2825 |             "grid_template_areas": null,
2826 |             "grid_template_columns": null,
2827 |             "grid_template_rows": null,
2828 |             "height": null,
2829 |             "justify_content": null,
2830 |             "justify_items": null,
2831 |             "left": null,
2832 |             "margin": null,
2833 |             "max_height": null,
2834 |             "max_width": null,
2835 |             "min_height": null,
2836 |             "min_width": null,
2837 |             "object_fit": null,
2838 |             "object_position": null,
2839 |             "order": null,
2840 |             "overflow": null,
2841 |             "overflow_x": null,
2842 |             "overflow_y": null,
2843 |             "padding": null,
2844 |             "right": null,
2845 |             "top": null,
2846 |             "visibility": null,
2847 |             "width": null
2848 |           }
2849 |         },
2850 |         "a7c9efe8c49a43d0ba6929bada9f78c2": {
2851 |           "model_module": "@jupyter-widgets/controls",
2852 |           "model_name": "DescriptionStyleModel",
2853 |           "model_module_version": "1.5.0",
2854 |           "state": {
2855 |             "_model_module": "@jupyter-widgets/controls",
2856 |             "_model_module_version": "1.5.0",
2857 |             "_model_name": "DescriptionStyleModel",
2858 |             "_view_count": null,
2859 |             "_view_module": "@jupyter-widgets/base",
2860 |             "_view_module_version": "1.2.0",
2861 |             "_view_name": "StyleView",
2862 |             "description_width": ""
2863 |           }
2864 |         },
2865 |         "15d3af1073fe4447847d0e6f3543f953": {
2866 |           "model_module": "@jupyter-widgets/base",
2867 |           "model_name": "LayoutModel",
2868 |           "model_module_version": "1.2.0",
2869 |           "state": {
2870 |             "_model_module": "@jupyter-widgets/base",
2871 |             "_model_module_version": "1.2.0",
2872 |             "_model_name": "LayoutModel",
2873 |             "_view_count": null,
2874 |             "_view_module": "@jupyter-widgets/base",
2875 |             "_view_module_version": "1.2.0",
2876 |             "_view_name": "LayoutView",
2877 |             "align_content": null,
2878 |             "align_items": null,
2879 |             "align_self": null,
2880 |             "border": null,
2881 |             "bottom": null,
2882 |             "display": null,
2883 |             "flex": null,
2884 |             "flex_flow": null,
2885 |             "grid_area": null,
2886 |             "grid_auto_columns": null,
2887 |             "grid_auto_flow": null,
2888 |             "grid_auto_rows": null,
2889 |             "grid_column": null,
2890 |             "grid_gap": null,
2891 |             "grid_row": null,
2892 |             "grid_template_areas": null,
2893 |             "grid_template_columns": null,
2894 |             "grid_template_rows": null,
2895 |             "height": null,
2896 |             "justify_content": null,
2897 |             "justify_items": null,
2898 |             "left": null,
2899 |             "margin": null,
2900 |             "max_height": null,
2901 |             "max_width": null,
2902 |             "min_height": null,
2903 |             "min_width": null,
2904 |             "object_fit": null,
2905 |             "object_position": null,
2906 |             "order": null,
2907 |             "overflow": null,
2908 |             "overflow_x": null,
2909 |             "overflow_y": null,
2910 |             "padding": null,
2911 |             "right": null,
2912 |             "top": null,
2913 |             "visibility": null,
2914 |             "width": "20px"
2915 |           }
2916 |         },
2917 |         "e4daf9a3e9e14e93ab55b91da59ecc9b": {
2918 |           "model_module": "@jupyter-widgets/controls",
2919 |           "model_name": "ProgressStyleModel",
2920 |           "model_module_version": "1.5.0",
2921 |           "state": {
2922 |             "_model_module": "@jupyter-widgets/controls",
2923 |             "_model_module_version": "1.5.0",
2924 |             "_model_name": "ProgressStyleModel",
2925 |             "_view_count": null,
2926 |             "_view_module": "@jupyter-widgets/base",
2927 |             "_view_module_version": "1.2.0",
2928 |             "_view_name": "StyleView",
2929 |             "bar_color": null,
2930 |             "description_width": ""
2931 |           }
2932 |         },
2933 |         "3557bb8fc4064fdf99ca2a1ec5469cff": {
2934 |           "model_module": "@jupyter-widgets/base",
2935 |           "model_name": "LayoutModel",
2936 |           "model_module_version": "1.2.0",
2937 |           "state": {
2938 |             "_model_module": "@jupyter-widgets/base",
2939 |             "_model_module_version": "1.2.0",
2940 |             "_model_name": "LayoutModel",
2941 |             "_view_count": null,
2942 |             "_view_module": "@jupyter-widgets/base",
2943 |             "_view_module_version": "1.2.0",
2944 |             "_view_name": "LayoutView",
2945 |             "align_content": null,
2946 |             "align_items": null,
2947 |             "align_self": null,
2948 |             "border": null,
2949 |             "bottom": null,
2950 |             "display": null,
2951 |             "flex": null,
2952 |             "flex_flow": null,
2953 |             "grid_area": null,
2954 |             "grid_auto_columns": null,
2955 |             "grid_auto_flow": null,
2956 |             "grid_auto_rows": null,
2957 |             "grid_column": null,
2958 |             "grid_gap": null,
2959 |             "grid_row": null,
2960 |             "grid_template_areas": null,
2961 |             "grid_template_columns": null,
2962 |             "grid_template_rows": null,
2963 |             "height": null,
2964 |             "justify_content": null,
2965 |             "justify_items": null,
2966 |             "left": null,
2967 |             "margin": null,
2968 |             "max_height": null,
2969 |             "max_width": null,
2970 |             "min_height": null,
2971 |             "min_width": null,
2972 |             "object_fit": null,
2973 |             "object_position": null,
2974 |             "order": null,
2975 |             "overflow": null,
2976 |             "overflow_x": null,
2977 |             "overflow_y": null,
2978 |             "padding": null,
2979 |             "right": null,
2980 |             "top": null,
2981 |             "visibility": null,
2982 |             "width": null
2983 |           }
2984 |         },
2985 |         "b2ccec96efa1415fa4623ec8fa0f2c21": {
2986 |           "model_module": "@jupyter-widgets/controls",
2987 |           "model_name": "DescriptionStyleModel",
2988 |           "model_module_version": "1.5.0",
2989 |           "state": {
2990 |             "_model_module": "@jupyter-widgets/controls",
2991 |             "_model_module_version": "1.5.0",
2992 |             "_model_name": "DescriptionStyleModel",
2993 |             "_view_count": null,
2994 |             "_view_module": "@jupyter-widgets/base",
2995 |             "_view_module_version": "1.2.0",
2996 |             "_view_name": "StyleView",
2997 |             "description_width": ""
2998 |           }
2999 |         },
3000 |         "73bdfdf8980d4c358c90d574eb91bef5": {
3001 |           "model_module": "@jupyter-widgets/controls",
3002 |           "model_name": "HBoxModel",
3003 |           "model_module_version": "1.5.0",
3004 |           "state": {
3005 |             "_dom_classes": [],
3006 |             "_model_module": "@jupyter-widgets/controls",
3007 |             "_model_module_version": "1.5.0",
3008 |             "_model_name": "HBoxModel",
3009 |             "_view_count": null,
3010 |             "_view_module": "@jupyter-widgets/controls",
3011 |             "_view_module_version": "1.5.0",
3012 |             "_view_name": "HBoxView",
3013 |             "box_style": "",
3014 |             "children": [
3015 |               "IPY_MODEL_3f49f9009fa14fe3b87bb123491a4b0f",
3016 |               "IPY_MODEL_548fc33764964fe9a0498194df85b768",
3017 |               "IPY_MODEL_e60ce018bf3a4b15941062300143e2a3"
3018 |             ],
3019 |             "layout": "IPY_MODEL_d0a78497d9694dc6b7e903392daf6a26"
3020 |           }
3021 |         },
3022 |         "3f49f9009fa14fe3b87bb123491a4b0f": {
3023 |           "model_module": "@jupyter-widgets/controls",
3024 |           "model_name": "HTMLModel",
3025 |           "model_module_version": "1.5.0",
3026 |           "state": {
3027 |             "_dom_classes": [],
3028 |             "_model_module": "@jupyter-widgets/controls",
3029 |             "_model_module_version": "1.5.0",
3030 |             "_model_name": "HTMLModel",
3031 |             "_view_count": null,
3032 |             "_view_module": "@jupyter-widgets/controls",
3033 |             "_view_module_version": "1.5.0",
3034 |             "_view_name": "HTMLView",
3035 |             "description": "",
3036 |             "description_tooltip": null,
3037 |             "layout": "IPY_MODEL_5b585e82891a40b0826679a79583ee7c",
3038 |             "placeholder": "​",
3039 |             "style": "IPY_MODEL_db9f5a1c1a0a49b3b58a30f0a74c3329",
3040 |             "value": "adapter_model.bin: 100%"
3041 |           }
3042 |         },
3043 |         "548fc33764964fe9a0498194df85b768": {
3044 |           "model_module": "@jupyter-widgets/controls",
3045 |           "model_name": "FloatProgressModel",
3046 |           "model_module_version": "1.5.0",
3047 |           "state": {
3048 |             "_dom_classes": [],
3049 |             "_model_module": "@jupyter-widgets/controls",
3050 |             "_model_module_version": "1.5.0",
3051 |             "_model_name": "FloatProgressModel",
3052 |             "_view_count": null,
3053 |             "_view_module": "@jupyter-widgets/controls",
3054 |             "_view_module_version": "1.5.0",
3055 |             "_view_name": "ProgressView",
3056 |             "bar_style": "success",
3057 |             "description": "",
3058 |             "description_tooltip": null,
3059 |             "layout": "IPY_MODEL_641cf05e799e4ae89ec84fdf8c225b93",
3060 |             "max": 79177949,
3061 |             "min": 0,
3062 |             "orientation": "horizontal",
3063 |             "style": "IPY_MODEL_cf937013fade482f90bd599eced8bfb4",
3064 |             "value": 79177949
3065 |           }
3066 |         },
3067 |         "e60ce018bf3a4b15941062300143e2a3": {
3068 |           "model_module": "@jupyter-widgets/controls",
3069 |           "model_name": "HTMLModel",
3070 |           "model_module_version": "1.5.0",
3071 |           "state": {
3072 |             "_dom_classes": [],
3073 |             "_model_module": "@jupyter-widgets/controls",
3074 |             "_model_module_version": "1.5.0",
3075 |             "_model_name": "HTMLModel",
3076 |             "_view_count": null,
3077 |             "_view_module": "@jupyter-widgets/controls",
3078 |             "_view_module_version": "1.5.0",
3079 |             "_view_name": "HTMLView",
3080 |             "description": "",
3081 |             "description_tooltip": null,
3082 |             "layout": "IPY_MODEL_ee49f8d2b11f43e2bb30d27407744ed3",
3083 |             "placeholder": "​",
3084 |             "style": "IPY_MODEL_ef2f2655d7b9432f983ae508f6dd4e0b",
3085 |             "value": " 79.2M/79.2M [00:02&lt;00:00, 47.1MB/s]"
3086 |           }
3087 |         },
3088 |         "d0a78497d9694dc6b7e903392daf6a26": {
3089 |           "model_module": "@jupyter-widgets/base",
3090 |           "model_name": "LayoutModel",
3091 |           "model_module_version": "1.2.0",
3092 |           "state": {
3093 |             "_model_module": "@jupyter-widgets/base",
3094 |             "_model_module_version": "1.2.0",
3095 |             "_model_name": "LayoutModel",
3096 |             "_view_count": null,
3097 |             "_view_module": "@jupyter-widgets/base",
3098 |             "_view_module_version": "1.2.0",
3099 |             "_view_name": "LayoutView",
3100 |             "align_content": null,
3101 |             "align_items": null,
3102 |             "align_self": null,
3103 |             "border": null,
3104 |             "bottom": null,
3105 |             "display": null,
3106 |             "flex": null,
3107 |             "flex_flow": null,
3108 |             "grid_area": null,
3109 |             "grid_auto_columns": null,
3110 |             "grid_auto_flow": null,
3111 |             "grid_auto_rows": null,
3112 |             "grid_column": null,
3113 |             "grid_gap": null,
3114 |             "grid_row": null,
3115 |             "grid_template_areas": null,
3116 |             "grid_template_columns": null,
3117 |             "grid_template_rows": null,
3118 |             "height": null,
3119 |             "justify_content": null,
3120 |             "justify_items": null,
3121 |             "left": null,
3122 |             "margin": null,
3123 |             "max_height": null,
3124 |             "max_width": null,
3125 |             "min_height": null,
3126 |             "min_width": null,
3127 |             "object_fit": null,
3128 |             "object_position": null,
3129 |             "order": null,
3130 |             "overflow": null,
3131 |             "overflow_x": null,
3132 |             "overflow_y": null,
3133 |             "padding": null,
3134 |             "right": null,
3135 |             "top": null,
3136 |             "visibility": null,
3137 |             "width": null
3138 |           }
3139 |         },
3140 |         "5b585e82891a40b0826679a79583ee7c": {
3141 |           "model_module": "@jupyter-widgets/base",
3142 |           "model_name": "LayoutModel",
3143 |           "model_module_version": "1.2.0",
3144 |           "state": {
3145 |             "_model_module": "@jupyter-widgets/base",
3146 |             "_model_module_version": "1.2.0",
3147 |             "_model_name": "LayoutModel",
3148 |             "_view_count": null,
3149 |             "_view_module": "@jupyter-widgets/base",
3150 |             "_view_module_version": "1.2.0",
3151 |             "_view_name": "LayoutView",
3152 |             "align_content": null,
3153 |             "align_items": null,
3154 |             "align_self": null,
3155 |             "border": null,
3156 |             "bottom": null,
3157 |             "display": null,
3158 |             "flex": null,
3159 |             "flex_flow": null,
3160 |             "grid_area": null,
3161 |             "grid_auto_columns": null,
3162 |             "grid_auto_flow": null,
3163 |             "grid_auto_rows": null,
3164 |             "grid_column": null,
3165 |             "grid_gap": null,
3166 |             "grid_row": null,
3167 |             "grid_template_areas": null,
3168 |             "grid_template_columns": null,
3169 |             "grid_template_rows": null,
3170 |             "height": null,
3171 |             "justify_content": null,
3172 |             "justify_items": null,
3173 |             "left": null,
3174 |             "margin": null,
3175 |             "max_height": null,
3176 |             "max_width": null,
3177 |             "min_height": null,
3178 |             "min_width": null,
3179 |             "object_fit": null,
3180 |             "object_position": null,
3181 |             "order": null,
3182 |             "overflow": null,
3183 |             "overflow_x": null,
3184 |             "overflow_y": null,
3185 |             "padding": null,
3186 |             "right": null,
3187 |             "top": null,
3188 |             "visibility": null,
3189 |             "width": null
3190 |           }
3191 |         },
3192 |         "db9f5a1c1a0a49b3b58a30f0a74c3329": {
3193 |           "model_module": "@jupyter-widgets/controls",
3194 |           "model_name": "DescriptionStyleModel",
3195 |           "model_module_version": "1.5.0",
3196 |           "state": {
3197 |             "_model_module": "@jupyter-widgets/controls",
3198 |             "_model_module_version": "1.5.0",
3199 |             "_model_name": "DescriptionStyleModel",
3200 |             "_view_count": null,
3201 |             "_view_module": "@jupyter-widgets/base",
3202 |             "_view_module_version": "1.2.0",
3203 |             "_view_name": "StyleView",
3204 |             "description_width": ""
3205 |           }
3206 |         },
3207 |         "641cf05e799e4ae89ec84fdf8c225b93": {
3208 |           "model_module": "@jupyter-widgets/base",
3209 |           "model_name": "LayoutModel",
3210 |           "model_module_version": "1.2.0",
3211 |           "state": {
3212 |             "_model_module": "@jupyter-widgets/base",
3213 |             "_model_module_version": "1.2.0",
3214 |             "_model_name": "LayoutModel",
3215 |             "_view_count": null,
3216 |             "_view_module": "@jupyter-widgets/base",
3217 |             "_view_module_version": "1.2.0",
3218 |             "_view_name": "LayoutView",
3219 |             "align_content": null,
3220 |             "align_items": null,
3221 |             "align_self": null,
3222 |             "border": null,
3223 |             "bottom": null,
3224 |             "display": null,
3225 |             "flex": null,
3226 |             "flex_flow": null,
3227 |             "grid_area": null,
3228 |             "grid_auto_columns": null,
3229 |             "grid_auto_flow": null,
3230 |             "grid_auto_rows": null,
3231 |             "grid_column": null,
3232 |             "grid_gap": null,
3233 |             "grid_row": null,
3234 |             "grid_template_areas": null,
3235 |             "grid_template_columns": null,
3236 |             "grid_template_rows": null,
3237 |             "height": null,
3238 |             "justify_content": null,
3239 |             "justify_items": null,
3240 |             "left": null,
3241 |             "margin": null,
3242 |             "max_height": null,
3243 |             "max_width": null,
3244 |             "min_height": null,
3245 |             "min_width": null,
3246 |             "object_fit": null,
3247 |             "object_position": null,
3248 |             "order": null,
3249 |             "overflow": null,
3250 |             "overflow_x": null,
3251 |             "overflow_y": null,
3252 |             "padding": null,
3253 |             "right": null,
3254 |             "top": null,
3255 |             "visibility": null,
3256 |             "width": null
3257 |           }
3258 |         },
3259 |         "cf937013fade482f90bd599eced8bfb4": {
3260 |           "model_module": "@jupyter-widgets/controls",
3261 |           "model_name": "ProgressStyleModel",
3262 |           "model_module_version": "1.5.0",
3263 |           "state": {
3264 |             "_model_module": "@jupyter-widgets/controls",
3265 |             "_model_module_version": "1.5.0",
3266 |             "_model_name": "ProgressStyleModel",
3267 |             "_view_count": null,
3268 |             "_view_module": "@jupyter-widgets/base",
3269 |             "_view_module_version": "1.2.0",
3270 |             "_view_name": "StyleView",
3271 |             "bar_color": null,
3272 |             "description_width": ""
3273 |           }
3274 |         },
3275 |         "ee49f8d2b11f43e2bb30d27407744ed3": {
3276 |           "model_module": "@jupyter-widgets/base",
3277 |           "model_name": "LayoutModel",
3278 |           "model_module_version": "1.2.0",
3279 |           "state": {
3280 |             "_model_module": "@jupyter-widgets/base",
3281 |             "_model_module_version": "1.2.0",
3282 |             "_model_name": "LayoutModel",
3283 |             "_view_count": null,
3284 |             "_view_module": "@jupyter-widgets/base",
3285 |             "_view_module_version": "1.2.0",
3286 |             "_view_name": "LayoutView",
3287 |             "align_content": null,
3288 |             "align_items": null,
3289 |             "align_self": null,
3290 |             "border": null,
3291 |             "bottom": null,
3292 |             "display": null,
3293 |             "flex": null,
3294 |             "flex_flow": null,
3295 |             "grid_area": null,
3296 |             "grid_auto_columns": null,
3297 |             "grid_auto_flow": null,
3298 |             "grid_auto_rows": null,
3299 |             "grid_column": null,
3300 |             "grid_gap": null,
3301 |             "grid_row": null,
3302 |             "grid_template_areas": null,
3303 |             "grid_template_columns": null,
3304 |             "grid_template_rows": null,
3305 |             "height": null,
3306 |             "justify_content": null,
3307 |             "justify_items": null,
3308 |             "left": null,
3309 |             "margin": null,
3310 |             "max_height": null,
3311 |             "max_width": null,
3312 |             "min_height": null,
3313 |             "min_width": null,
3314 |             "object_fit": null,
3315 |             "object_position": null,
3316 |             "order": null,
3317 |             "overflow": null,
3318 |             "overflow_x": null,
3319 |             "overflow_y": null,
3320 |             "padding": null,
3321 |             "right": null,
3322 |             "top": null,
3323 |             "visibility": null,
3324 |             "width": null
3325 |           }
3326 |         },
3327 |         "ef2f2655d7b9432f983ae508f6dd4e0b": {
3328 |           "model_module": "@jupyter-widgets/controls",
3329 |           "model_name": "DescriptionStyleModel",
3330 |           "model_module_version": "1.5.0",
3331 |           "state": {
3332 |             "_model_module": "@jupyter-widgets/controls",
3333 |             "_model_module_version": "1.5.0",
3334 |             "_model_name": "DescriptionStyleModel",
3335 |             "_view_count": null,
3336 |             "_view_module": "@jupyter-widgets/base",
3337 |             "_view_module_version": "1.2.0",
3338 |             "_view_name": "StyleView",
3339 |             "description_width": ""
3340 |           }
3341 |         }
3342 |       }
3343 |     }
3344 |   },
3345 |   "nbformat": 4,
3346 |   "nbformat_minor": 0
3347 | }


--------------------------------------------------------------------------------
/personal_copilot/dataset_generation/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/peft-pytorch-conference/57116ac4e78ae5623436cc72f478151195c195aa/personal_copilot/dataset_generation/README.md


--------------------------------------------------------------------------------
/personal_copilot/dataset_generation/clone_hf_repos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Courtesy: Sayak Paul and Chansung Park.
 3 | """
 4 | 
 5 | import os
 6 | import subprocess
 7 | from multiprocessing import Pool
 8 | from github import Github
 9 | 
10 | ORG = "huggingface"
11 | MIRROR_DIRECTORY = "hf_public_repos"
12 | TOP_K = 10
13 | 
14 | 
15 | def get_repos(username, access_token=None, include_fork=False):
16 |     """Fetches repositories for a particular GitHub user.
17 | 
18 |     Courtesy: Chansung Park.
19 |     """
20 |     g = Github(access_token)
21 |     user = g.get_user(username)
22 | 
23 |     results = []
24 |     for repo in user.get_repos():
25 |         if repo.fork is False:
26 |             results.append((repo.name, repo.stargazers_count))
27 |         else:
28 |             if include_fork is True:
29 |                 results.append((repo.name, repo.stargazers_count))
30 |     print(results)
31 |     return results
32 | 
33 | 
34 | def sort_repos_by_stars(repos):
35 |     return sorted(repos, key=lambda x: x[1], reverse=True)
36 | 
37 | 
38 | def mirror_repository(repository):
39 |     """Locally clones a repository."""
40 |     repository_url = f"https://github.com/{ORG}/{repository}.git"
41 |     repository_path = os.path.join(MIRROR_DIRECTORY, repository)
42 | 
43 |     # Clone the repository
44 |     subprocess.run(["git", "clone", repository_url, repository_path])
45 | 
46 | 
47 | def mirror_repositories():
48 |     # Create the mirror directory if it doesn't exist
49 |     if not os.path.exists(MIRROR_DIRECTORY):
50 |         os.makedirs(MIRROR_DIRECTORY)
51 | 
52 |     # Get the list of repositories in the organization
53 |     if not os.environ["GH_ACCESS_TOKEN"]:
54 |         raise ValueError("You must set `GH_ACCESS_TOKEN` as an env variable.")
55 |     repositories = get_repos(ORG, os.environ["GH_ACCESS_TOKEN"])
56 |     sorted_repos = sort_repos_by_stars(repositories)
57 |     selected_repos = [x[0] for x in sorted_repos[:TOP_K]]
58 | 
59 |     print(f"Total repositories found: {len(selected_repos)}.")
60 |     print(selected_repos)
61 |     # Mirror repositories using multiprocessing
62 |     print("Cloning repositories.")
63 |     with Pool() as pool:
64 |         pool.map(mirror_repository, selected_repos)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     mirror_repositories()
69 | 


--------------------------------------------------------------------------------
/personal_copilot/dataset_generation/prepare_dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Courtesy: Sayak Paul and Chansung Park.
  3 | """
  4 | 
  5 | import os
  6 | import pandas as pd
  7 | from nbformat import reads, NO_CONVERT
  8 | from tqdm import tqdm
  9 | from datasets import Dataset
 10 | from typing import Dict
 11 | from huggingface_hub import HfApi, create_repo
 12 | import tempfile
 13 | import subprocess
 14 | 
 15 | MIRROR_DIRECTORY = "hf_public_repos"
 16 | DATASET_ID = "hf-stack-v2"
 17 | SERIALIZE_IN_CHUNKS = False
 18 | FEATHER_FORMAT = "ftr"
 19 | PARQUET_FORMAT = "parquet"
 20 | 
 21 | # Block the following formats.
 22 | IMAGE = ["png", "jpg", "jpeg", "gif"]
 23 | VIDEO = ["mp4", "jfif"]
 24 | DOC = [
 25 |     "key",
 26 |     "PDF",
 27 |     "pdf",
 28 |     "docx",
 29 |     "xlsx",
 30 |     "pptx",
 31 | ]
 32 | AUDIO = ["flac", "ogg", "mid", "webm", "wav", "mp3"]
 33 | ARCHIVE = ["jar", "aar", "gz", "zip", "bz2"]
 34 | MODEL = ["onnx", "pickle", "model", "neuron"]
 35 | OTHERS = [
 36 |     "npy",
 37 |     "index",
 38 |     "inv",
 39 |     "index",
 40 |     "DS_Store",
 41 |     "rdb",
 42 |     "pack",
 43 |     "idx",
 44 |     "glb",
 45 |     "gltf",
 46 |     "len",
 47 |     "otf",
 48 |     "unitypackage",
 49 |     "ttf",
 50 |     "xz",
 51 |     "pcm",
 52 |     "opus",
 53 | ]
 54 | ANTI_FOMATS = tuple(IMAGE + VIDEO + DOC + AUDIO + ARCHIVE + OTHERS)
 55 | 
 56 | 
 57 | def upload_to_hub(file_format: str, repo_id: str):
 58 |     """Moves all the files matching `file_format` to a folder and
 59 |     uploads the folder to the Hugging Face Hub."""
 60 |     api = HfApi()
 61 |     repo_id = create_repo(repo_id=repo_id, exist_ok=True, repo_type="dataset").repo_id
 62 | 
 63 |     with tempfile.TemporaryDirectory() as tmpdirname:
 64 |         os.makedirs(tmpdirname, exist_ok=True)
 65 |         command = f"mv *.{file_format} {tmpdirname}"
 66 |         _ = subprocess.run(command.split())
 67 |         api.upload_folder(repo_id=repo_id, folder_path=tmpdirname, repo_type="dataset")
 68 | 
 69 | 
 70 | def filter_code_cell(cell) -> bool:
 71 |     """Filters a code cell w.r.t shell commands, etc."""
 72 |     only_shell = cell["source"].startswith("!")
 73 |     only_magic = "%%capture" in cell["source"]
 74 |     if only_shell or only_magic:
 75 |         return False
 76 |     else:
 77 |         return True
 78 | 
 79 | 
 80 | def process_file(directory_name: str, file_path: str) -> Dict[str, str]:
 81 |     """Processes a single file."""
 82 |     try:
 83 |         with open(file_path, "r", encoding="utf-8") as file:
 84 |             content = file.read()
 85 |             if file_path.endswith("ipynb"):
 86 |                 # Code courtesy: Chansung Park and Sayak Paul.
 87 |                 code_cell_str = ""
 88 |                 notebook = reads(content, NO_CONVERT)
 89 | 
 90 |                 code_cells = [c for c in notebook["cells"] if c["cell_type"] == "code" if filter_code_cell(c)]
 91 | 
 92 |                 for cell in code_cells:
 93 |                     code_cell_str += cell["source"]
 94 |                 content = code_cell_str
 95 |     except Exception:
 96 |         content = ""
 97 | 
 98 |     return {
 99 |         "repo_id": directory_name,
100 |         "file_path": file_path,
101 |         "content": content,
102 |     }
103 | 
104 | 
105 | def read_repository_files(directory) -> pd.DataFrame:
106 |     """Reads the files from the locally cloned repositories."""
107 |     file_paths = []
108 |     df = pd.DataFrame(columns=["repo_id", "file_path", "content"])
109 |     chunk_flag = 0
110 | 
111 |     # Recursively find all files within the directory
112 |     for root, _, files in os.walk(directory):
113 |         for file in files:
114 |             file_path = os.path.join(root, file)
115 |             if not file_path.endswith(ANTI_FOMATS) and all(
116 |                 k not in file_path for k in [".git", "__pycache__", "xcodeproj"]
117 |             ):
118 |                 file_paths.append((os.path.dirname(root), file_path))
119 | 
120 |     # Process files sequentially.
121 |     print(f"Total file paths: {len(file_paths)}.")
122 |     print("Reading file contents...")
123 | 
124 |     for i, (directory_name, file_path) in enumerate(tqdm(file_paths)):
125 |         file_content = process_file(directory_name, file_path)
126 | 
127 |         if file_content["content"] != "":
128 |             temp_df = pd.DataFrame.from_dict([file_content])
129 |             df = pd.concat([df, temp_df])
130 | 
131 |             if SERIALIZE_IN_CHUNKS and len(df) != 0 and (len(df) % SERIALIZE_IN_CHUNKS == 0):
132 |                 df_path = f"df_chunk_{chunk_flag}_{len(df)}.{FEATHER_FORMAT}"
133 |                 print(f"Serializing dataframe to {df_path}...")
134 |                 df.reset_index().to_parquet(df_path)
135 |                 del df
136 |                 df = pd.DataFrame(columns=["repo_id", "file_path", "content"])
137 |                 chunk_flag += 1
138 | 
139 |     return df
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     df = read_repository_files(MIRROR_DIRECTORY)
144 |     print("DataFrame created, creating dataset...")
145 |     upload_to_hub(file_format=PARQUET_FORMAT, repo_id=DATASET_ID)
146 |     print(f"{FEATHER_FORMAT} files uploaded to the Hub.")
147 |     if not SERIALIZE_IN_CHUNKS:
148 |         dataset = Dataset.from_pandas(df)
149 |         dataset.push_to_hub(DATASET_ID, private=True)
150 | 


--------------------------------------------------------------------------------
/personal_copilot/dataset_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | PyGithub
2 | datasets
3 | nbformat
4 | pandas


--------------------------------------------------------------------------------
/personal_copilot/training/fim.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | # this is expensive so we cache it
 7 | @functools.lru_cache(maxsize=None)
 8 | def get_fim_token_ids(tokenizer):
 9 |     return tokenizer.bos_token_id, tokenizer.suffix_id, tokenizer.prefix_id, tokenizer.middle_id, 0
10 | 
11 | 
12 | ## Adapted from https://github.com/bigcode-project/Megatron-LM/blob/6c4bf908df8fd86b4977f54bf5b8bd4b521003d1/megatron/data/gpt_dataset.py
13 | def permute(
14 |     sample,
15 |     np_rng,
16 |     bos_token_id,
17 |     suffix_tok_id,
18 |     prefix_tok_id,
19 |     middle_tok_id,
20 |     pad_tok_id,
21 |     fim_rate=0.5,
22 |     fim_spm_rate=0.5,
23 |     truncate_or_pad=False,
24 | ):
25 |     """
26 |     Take in a sample (list of tokens) and perform a FIM transformation on it with a probability of fim_rate, using two FIM modes:
27 |     PSM and SPM (with a probability of fim_spm_rate).
28 |     """
29 | 
30 |     if np_rng.binomial(1, fim_rate):
31 |         boundaries = list(np_rng.randint(low=0, high=len(sample) + 1, size=2))
32 |         boundaries.sort()
33 | 
34 |         prefix = np.array(sample[: boundaries[0]], dtype=np.int64)
35 |         middle = np.array(sample[boundaries[0] : boundaries[1]], dtype=np.int64)
36 |         suffix = np.array(sample[boundaries[1] :], dtype=np.int64)
37 | 
38 |         if truncate_or_pad:
39 |             new_length = suffix.shape[0] + prefix.shape[0] + middle.shape[0] + 3
40 |             diff = new_length - len(sample)
41 |             if diff > 0:
42 |                 if suffix.shape[0] <= diff:
43 |                     return sample, np_rng
44 |                 suffix = suffix[: suffix.shape[0] - diff]
45 |             elif diff < 0:
46 |                 suffix = np.concatenate([suffix, np.full((-1 * diff), pad_tok_id)])
47 | 
48 |         if np_rng.binomial(1, fim_spm_rate):
49 |             # SPM (variant 2 from FIM paper)
50 |             new_sample = np.concatenate(
51 |                 [
52 |                     [bos_token_id, prefix_tok_id, suffix_tok_id],
53 |                     suffix,
54 |                     [middle_tok_id],
55 |                     prefix,
56 |                     middle,
57 |                 ]
58 |             )
59 |         else:
60 |             # PSM
61 |             new_sample = np.concatenate(
62 |                 [
63 |                     [bos_token_id, prefix_tok_id],
64 |                     prefix,
65 |                     [suffix_tok_id],
66 |                     suffix,
67 |                     [middle_tok_id],
68 |                     middle,
69 |                 ]
70 |             )
71 |     else:
72 |         # don't do FIM preproc
73 |         new_sample = sample
74 | 
75 |     return list(new_sample), np_rng
76 | 


--------------------------------------------------------------------------------
/personal_copilot/training/llama_flash_attn_monkey_patch.py:
--------------------------------------------------------------------------------
  1 | # copied from https://github.com/lm-sys/FastChat/blob/main/fastchat/train/llama_flash_attn_monkey_patch.py
  2 | 
  3 | from typing import List, Optional, Tuple, Union
  4 | import logging
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | 
  9 | import transformers
 10 | from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
 11 | 
 12 | from einops import rearrange
 13 | from flash_attn import flash_attn_func
 14 | 
 15 | 
 16 | def forward(
 17 |     self,
 18 |     hidden_states: torch.Tensor,
 19 |     attention_mask: Optional[torch.Tensor] = None,
 20 |     position_ids: Optional[torch.LongTensor] = None,
 21 |     past_key_value: Optional[Tuple[torch.Tensor]] = None,
 22 |     output_attentions: bool = False,
 23 |     use_cache: bool = False,
 24 |     **dummy_kwargs,
 25 | ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
 26 |     bsz, q_len, _ = hidden_states.size()
 27 | 
 28 |     if self.config.pretraining_tp > 1:
 29 |         raise ValueError("pretraining_tp > 1 is not supported for flash attention")
 30 |     else:
 31 |         query_states = self.q_proj(hidden_states)
 32 |         key_states = self.k_proj(hidden_states)
 33 |         value_states = self.v_proj(hidden_states)
 34 | 
 35 |     query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
 36 |     key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
 37 |     value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
 38 | 
 39 |     kv_seq_len = key_states.shape[-2]
 40 | 
 41 |     if past_key_value is not None:
 42 |         kv_seq_len += past_key_value[0].shape[-2]
 43 |     cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
 44 |     query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
 45 | 
 46 |     if past_key_value is not None:
 47 |         # reuse k, v, self_attention
 48 |         key_states = torch.cat([past_key_value[0], key_states], dim=2)
 49 |         value_states = torch.cat([past_key_value[1], value_states], dim=2)
 50 | 
 51 |     past_key_value = (key_states, value_states) if use_cache else None
 52 | 
 53 |     query_states, key_states, value_states = [
 54 |         rearrange(x, "b h s d -> b s h d") for x in [query_states, key_states, value_states]
 55 |     ]
 56 | 
 57 |     query_states, key_states, value_states = [x.to(torch.bfloat16) for x in [query_states, key_states, value_states]]
 58 |     # print(f"{query.shape=} {key.shape=} {value.shape=}")
 59 |     # below output will have shape (batch_size, seqlen, nheads, headdim)
 60 |     attn_output = flash_attn_func(query_states, key_states, value_states, causal=True)
 61 | 
 62 |     if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim):
 63 |         raise ValueError(
 64 |             f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is"
 65 |             f" {attn_output.size()}"
 66 |         )
 67 | 
 68 |     attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
 69 |     attn_output = self.o_proj(attn_output)
 70 |     if output_attentions:
 71 |         raise NotImplementedError("`output_attentions` is not supported when `use_flash_attn` is True")
 72 |     attn_weights = None
 73 | 
 74 |     return attn_output, attn_weights, past_key_value
 75 | 
 76 | 
 77 | # def forward(
 78 | #     self,
 79 | #     hidden_states: torch.Tensor,
 80 | #     attention_mask: Optional[torch.Tensor] = None,
 81 | #     position_ids: Optional[torch.Tensor] = None,
 82 | #     past_key_value: Optional[Tuple[torch.Tensor]] = None,
 83 | #     output_attentions: bool = False,
 84 | #     use_cache: bool = False,
 85 | # ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
 86 | #     """Input shape: Batch x Time x Channel
 87 | 
 88 | #     attention_mask: [bsz, q_len]
 89 | #     """
 90 | #     bsz, q_len, _ = hidden_states.size()
 91 | 
 92 | #     query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
 93 | #     key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
 94 | #     value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
 95 | #     # [bsz, q_len, nh, hd]
 96 | #     # [bsz, nh, q_len, hd]
 97 | 
 98 | #     kv_seq_len = key_states.shape[-2]
 99 | #     assert past_key_value is None, "past_key_value is not supported"
100 | 
101 | #     cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
102 | #     query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
103 | #     # [bsz, nh, t, hd]
104 | #     assert not output_attentions, "output_attentions is not supported"
105 | #     assert not use_cache, "use_cache is not supported"
106 | 
107 | #     # Flash attention codes from
108 | #     # https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attention.py
109 | 
110 | #     # transform the data into the format required by flash attention
111 | #     qkv = torch.stack([query_states, key_states, value_states], dim=2)  # [bsz, nh, 3, q_len, hd]
112 | #     qkv = qkv.transpose(1, 3)  # [bsz, q_len, 3, nh, hd]
113 | #     # We have disabled _prepare_decoder_attention_mask in LlamaModel
114 | #     # the attention_mask should be the same as the key_padding_mask
115 | #     key_padding_mask = attention_mask
116 | 
117 | #     if key_padding_mask is None:
118 | #         qkv = rearrange(qkv, "b s ... -> (b s) ...")
119 | #         max_s = q_len
120 | #         cu_q_lens = torch.arange(0, (bsz + 1) * q_len, step=q_len, dtype=torch.int32, device=qkv.device)
121 | #         output = flash_attn_unpadded_qkvpacked_func(qkv, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True)
122 | #         output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
123 | #     else:
124 | #         nheads = qkv.shape[-2]
125 | #         x = rearrange(qkv, "b s three h d -> b s (three h d)")
126 | #         x_unpad, indices, cu_q_lens, max_s = unpad_input(x, key_padding_mask)
127 | #         x_unpad = rearrange(x_unpad, "nnz (three h d) -> nnz three h d", three=3, h=nheads)
128 | #         output_unpad = flash_attn_unpadded_qkvpacked_func(
129 | #             x_unpad, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True
130 | #         )
131 | #         output = rearrange(
132 | #             pad_input(rearrange(output_unpad, "nnz h d -> nnz (h d)"), indices, bsz, q_len),
133 | #             "b s (h d) -> b s h d",
134 | #             h=nheads,
135 | #         )
136 | #     return self.o_proj(rearrange(output, "b s h d -> b s (h d)")), None, None
137 | 
138 | 
139 | # Disable the transformation of the attention mask in LlamaModel as the flash attention
140 | # requires the attention mask to be the same as the key_padding_mask
141 | def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
142 |     # [bsz, seq_len]
143 |     return attention_mask
144 | 
145 | 
146 | def replace_llama_attn_with_flash_attn():
147 |     cuda_major, cuda_minor = torch.cuda.get_device_capability()
148 |     if cuda_major < 8:
149 |         logging.warning(
150 |             "Flash attention is only supported on A100 or H100 GPU during training due to head dim > 64 backward."
151 |             "ref: https://github.com/HazyResearch/flash-attention/issues/190#issuecomment-1523359593"
152 |         )
153 |     transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = (
154 |         _prepare_decoder_attention_mask
155 |     )
156 |     transformers.models.llama.modeling_llama.LlamaAttention.forward = forward
157 | 


--------------------------------------------------------------------------------
/personal_copilot/training/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/huggingface/transformers
 2 | git+https://github.com/huggingface/accelerate
 3 | git+https://github.com/huggingface/peft
 4 | trl
 5 | huggingface-hub
 6 | bitsandbytes
 7 | evaluate
 8 | datasets
 9 | einops
10 | wandb
11 | tiktoken
12 | deepspeed
13 | tqdm
14 | safetensors


--------------------------------------------------------------------------------
/personal_copilot/training/run_peft.sh:
--------------------------------------------------------------------------------
 1 | python train.py \
 2 |     --model_path "codellama/CodeLlama-13b-Instruct-hf" \
 3 |     --dataset_name "smangrul/hf-stack-v3" \
 4 |     --subset "data" \
 5 |     --data_column "content" \
 6 |     --split "train" \
 7 |     --seq_length 2048 \
 8 |     --max_steps 2000 \
 9 |     --batch_size 8 \
10 |     --gradient_accumulation_steps 2 \
11 |     --learning_rate 3e-4 \
12 |     --lr_scheduler_type "cosine" \
13 |     --weight_decay 0.01 \
14 |     --num_warmup_steps 30 \
15 |     --eval_freq 100 \
16 |     --save_freq 100 \
17 |     --log_freq 5 \
18 |     --push_to_hub \
19 |     --num_workers 4 \
20 |     --bf16 \
21 |     --no_fp16 \
22 |     --output_dir "codellama-13b-personal-copilot" \
23 |     --fim_rate 0.5 \
24 |     --fim_spm_rate 0.0 \
25 |     --use_peft_lora \
26 |     --lora_r 8 \
27 |     --lora_alpha 32 \
28 |     --lora_dropout 0.1 \
29 |     --lora_target_modules "q_proj,k_proj,v_proj,o_proj,down_proj,up_proj,gate_proj" \
30 |     --use_flash_attn \
31 |     --use_4bit_qunatization \
32 |     --use_nested_quant \
33 |     --bnb_4bit_compute_dtype "bfloat16" \
34 |     --seed 24


--------------------------------------------------------------------------------
/personal_copilot/training/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Fine-Tune StarCoder on code/text dataset
  3 | """
  4 | 
  5 | import argparse
  6 | import os
  7 | import random
  8 | import subprocess
  9 | import warnings
 10 | 
 11 | import numpy as np
 12 | import torch
 13 | from datasets import load_dataset
 14 | from torch.utils.data import IterableDataset
 15 | from torch.utils.data.dataloader import DataLoader
 16 | from tqdm import tqdm
 17 | from transformers import (
 18 |     AutoModelForCausalLM,
 19 |     AutoTokenizer,
 20 |     Trainer,
 21 |     TrainingArguments,
 22 |     logging,
 23 |     set_seed,
 24 |     BitsAndBytesConfig,
 25 | )
 26 | 
 27 | from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
 28 | from peft.tuners.lora import LoraLayer
 29 | 
 30 | import fim
 31 | 
 32 | 
 33 | def get_args():
 34 |     parser = argparse.ArgumentParser()
 35 |     parser.add_argument("--model_path", type=str, default="codellama/CodeLlama-13b-Instruct-hf")
 36 |     parser.add_argument("--dataset_name", type=str, default="smangrul/hf-stack-v2")
 37 |     parser.add_argument("--subset", type=str, default="data")
 38 |     parser.add_argument("--split", type=str, default="train")
 39 |     parser.add_argument("--size_valid_set", type=int, default=4000)
 40 |     parser.add_argument("--test_size", type=float, default=0.005)
 41 |     parser.add_argument("--streaming", action="store_true")
 42 |     parser.add_argument("--shuffle_buffer", type=int, default=5000)
 43 |     parser.add_argument("--data_column", type=str, default="content")
 44 | 
 45 |     parser.add_argument("--seq_length", type=int, default=8192)
 46 |     parser.add_argument("--max_steps", type=int, default=10000)
 47 |     parser.add_argument("--batch_size", type=int, default=2)
 48 |     parser.add_argument("--gradient_accumulation_steps", type=int, default=8)
 49 |     parser.add_argument("--eos_token_id", type=int, default=49152)
 50 | 
 51 |     parser.add_argument("--learning_rate", type=float, default=5e-5)
 52 |     parser.add_argument("--lr_scheduler_type", type=str, default="cosine")
 53 |     parser.add_argument("--num_warmup_steps", type=int, default=100)
 54 |     parser.add_argument("--weight_decay", type=float, default=0.05)
 55 | 
 56 |     parser.add_argument("--local_rank", type=int, default=0)
 57 |     parser.add_argument("--no_fp16", action="store_false")
 58 |     parser.add_argument("--bf16", action="store_true")
 59 |     parser.add_argument("--no_gradient_checkpointing", action="store_false")
 60 |     parser.add_argument("--seed", type=int, default=0)
 61 |     parser.add_argument("--num_workers", type=int, default=None)
 62 |     parser.add_argument("--output_dir", type=str, default="./checkpoints")
 63 |     parser.add_argument("--log_freq", default=1, type=int)
 64 |     parser.add_argument("--eval_freq", default=1000, type=int)
 65 |     parser.add_argument("--save_freq", default=1000, type=int)
 66 | 
 67 |     parser.add_argument("--fim_rate", type=float, default=0)
 68 |     parser.add_argument("--fim_spm_rate", type=float, default=0)
 69 | 
 70 |     parser.add_argument("--use_peft_lora", action="store_true")
 71 |     parser.add_argument("--lora_r", type=int, default=0)
 72 |     parser.add_argument("--lora_alpha", type=int, default=0)
 73 |     parser.add_argument("--lora_dropout", type=float, default=0)
 74 |     parser.add_argument("--lora_target_modules", type=str, default=None)
 75 | 
 76 |     parser.add_argument("--use_flash_attn", action="store_true")
 77 | 
 78 |     parser.add_argument("--use_4bit_qunatization", action="store_true")
 79 |     parser.add_argument("--use_nested_quant", action="store_true")
 80 |     parser.add_argument("--bnb_4bit_quant_type", type=str, default="nf4")
 81 |     parser.add_argument("--bnb_4bit_compute_dtype", type=str, default="float16")
 82 | 
 83 |     parser.add_argument("--use_8bit_qunatization", action="store_true")
 84 | 
 85 |     parser.add_argument("--push_to_hub", action="store_true")
 86 | 
 87 |     return parser.parse_args()
 88 | 
 89 | 
 90 | def chars_token_ratio(dataset, tokenizer, data_column, nb_examples=400):
 91 |     """
 92 |     Estimate the average number of characters per token in the dataset.
 93 |     """
 94 |     total_characters, total_tokens = 0, 0
 95 |     for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):
 96 |         total_characters += len(example[data_column])
 97 |         total_tokens += len(tokenizer(example[data_column]).tokens())
 98 | 
 99 |     return total_characters / total_tokens
100 | 
101 | 
102 | class ConstantLengthDataset(IterableDataset):
103 |     """
104 |     Iterable dataset that returns constant length chunks of tokens from stream of text files.
105 |         Args:
106 |             tokenizer (Tokenizer): The processor used for proccessing the data.
107 |             dataset (dataset.Dataset): Dataset with text files.
108 |             infinite (bool): If True the iterator is reset after dataset reaches end else stops.
109 |             seq_length (int): Length of token sequences to return.
110 |             num_of_sequences (int): Number of token sequences to keep in buffer.
111 |             chars_per_token (int): Number of characters per token used to estimate number of tokens in text buffer.
112 |             fim_rate (float): Rate (0.0 to 1.0) that sample will be permuted with FIM.
113 |             fim_spm_rate (float): Rate (0.0 to 1.0) of FIM permuations that will use SPM.
114 |             seed (int): Seed for random number generator.
115 |     """
116 | 
117 |     def __init__(
118 |         self,
119 |         tokenizer,
120 |         dataset,
121 |         infinite=False,
122 |         seq_length=1024,
123 |         num_of_sequences=1024,
124 |         chars_per_token=3.6,
125 |         content_field="content",
126 |         fim_rate=0.5,
127 |         fim_spm_rate=0.5,
128 |         seed=0,
129 |     ):
130 |         self.tokenizer = tokenizer
131 |         self.concat_token_id = tokenizer.eos_token_id
132 |         self.dataset = dataset
133 |         self.seq_length = seq_length
134 |         self.infinite = infinite
135 |         self.current_size = 0
136 |         self.max_buffer_size = seq_length * chars_per_token * num_of_sequences
137 |         self.content_field = content_field
138 |         self.fim_rate = fim_rate
139 |         self.fim_spm_rate = fim_spm_rate
140 |         self.seed = seed
141 | 
142 |         (
143 |             self.bos_token_id,
144 |             self.suffix_tok_id,
145 |             self.prefix_tok_id,
146 |             self.middle_tok_id,
147 |             self.pad_tok_id,
148 |         ) = fim.get_fim_token_ids(self.tokenizer)
149 |         if not self.suffix_tok_id and self.fim_rate > 0:
150 |             print("FIM is not supported by tokenizer, disabling FIM")
151 |             self.fim_rate = 0
152 | 
153 |     def __iter__(self):
154 |         iterator = iter(self.dataset)
155 |         more_examples = True
156 |         while more_examples:
157 |             buffer, buffer_len = [], 0
158 |             while True:
159 |                 if buffer_len >= self.max_buffer_size:
160 |                     break
161 |                 try:
162 |                     buffer.append(next(iterator)[self.content_field])
163 |                     buffer_len += len(buffer[-1])
164 |                 except StopIteration:
165 |                     if self.infinite:
166 |                         iterator = iter(self.dataset)
167 |                     else:
168 |                         more_examples = False
169 |                         break
170 |             tokenized_inputs = self.tokenizer(buffer, truncation=False, add_special_tokens=False)["input_ids"]
171 |             all_token_ids = []
172 | 
173 |             np_rng = np.random.RandomState(seed=self.seed)
174 |             for tokenized_input in tokenized_inputs:
175 |                 # optionally do FIM permutations
176 |                 if self.fim_rate > 0:
177 |                     tokenized_input, np_rng = fim.permute(
178 |                         tokenized_input,
179 |                         np_rng,
180 |                         self.bos_token_id,
181 |                         self.suffix_tok_id,
182 |                         self.prefix_tok_id,
183 |                         self.middle_tok_id,
184 |                         self.pad_tok_id,
185 |                         fim_rate=self.fim_rate,
186 |                         fim_spm_rate=self.fim_spm_rate,
187 |                         truncate_or_pad=False,
188 |                     )
189 | 
190 |                 all_token_ids.extend(tokenized_input + [self.concat_token_id])
191 |             examples = []
192 |             for i in range(0, len(all_token_ids), self.seq_length):
193 |                 input_ids = all_token_ids[i : i + self.seq_length]
194 |                 if len(input_ids) == self.seq_length:
195 |                     examples.append(input_ids)
196 |             random.shuffle(examples)
197 |             for example in examples:
198 |                 self.current_size += 1
199 |                 yield {
200 |                     "input_ids": torch.LongTensor(example),
201 |                     "labels": torch.LongTensor(example),
202 |                 }
203 | 
204 | 
205 | def create_datasets(tokenizer, args):
206 |     dataset = load_dataset(
207 |         args.dataset_name,
208 |         split=args.split,
209 |         use_auth_token=True,
210 |         num_proc=args.num_workers if not args.streaming else None,
211 |         streaming=args.streaming,
212 |     )
213 |     if args.streaming:
214 |         print("Loading the dataset in streaming mode")
215 |         valid_data = dataset.take(args.size_valid_set)
216 |         train_data = dataset.skip(args.size_valid_set)
217 |         train_data = train_data.shuffle(buffer_size=args.shuffle_buffer, seed=args.seed)
218 |     else:
219 |         dataset = dataset.train_test_split(test_size=args.test_size, seed=args.seed, shuffle=True)
220 |         train_data = dataset["train"]
221 |         valid_data = dataset["test"]
222 |         print(f"Size of the train set: {len(train_data)}. Size of the validation set: {len(valid_data)}")
223 |     chars_per_token = chars_token_ratio(train_data, tokenizer, args.data_column)
224 |     print(f"The character to token ratio of the dataset is: {chars_per_token:.2f}")
225 |     train_dataset = ConstantLengthDataset(
226 |         tokenizer,
227 |         train_data,
228 |         infinite=True,
229 |         seq_length=args.seq_length,
230 |         chars_per_token=chars_per_token,
231 |         content_field=args.data_column,
232 |         fim_rate=args.fim_rate,
233 |         fim_spm_rate=args.fim_spm_rate,
234 |         seed=args.seed,
235 |     )
236 |     valid_dataset = ConstantLengthDataset(
237 |         tokenizer,
238 |         valid_data,
239 |         infinite=False,
240 |         seq_length=args.seq_length,
241 |         chars_per_token=chars_per_token,
242 |         content_field=args.data_column,
243 |         fim_rate=args.fim_rate,
244 |         fim_spm_rate=args.fim_spm_rate,
245 |         seed=args.seed,
246 |     )
247 | 
248 |     return train_dataset, valid_dataset
249 | 
250 | 
251 | def create_and_prepare_model(args):
252 |     device_map = None
253 |     bnb_config = None
254 | 
255 |     load_in_8bit = args.use_8bit_qunatization
256 | 
257 |     if args.use_4bit_qunatization:
258 |         compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)
259 | 
260 |         bnb_config = BitsAndBytesConfig(
261 |             load_in_4bit=args.use_4bit_qunatization,
262 |             bnb_4bit_quant_type=args.bnb_4bit_quant_type,
263 |             bnb_4bit_compute_dtype=compute_dtype,
264 |             bnb_4bit_use_double_quant=args.use_nested_quant,
265 |         )
266 | 
267 |         if compute_dtype == torch.float16 and args.use_4bit_qunatization:
268 |             major, _ = torch.cuda.get_device_capability()
269 |             if major >= 8:
270 |                 print("=" * 80)
271 |                 print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
272 |                 print("=" * 80)
273 | 
274 |     if args.use_4bit_qunatization or args.use_8bit_qunatization:
275 |         device_map = {"": 0}
276 | 
277 |     model = AutoModelForCausalLM.from_pretrained(
278 |         args.model_path,
279 |         torch_dtype=compute_dtype,
280 |         load_in_8bit=load_in_8bit,
281 |         quantization_config=bnb_config,
282 |         device_map=device_map,
283 |         use_cache=not args.no_gradient_checkpointing,
284 |         trust_remote_code=True,
285 |         # use_flash_attention_2=args.use_flash_attn,
286 |     )
287 | 
288 |     if (args.use_4bit_qunatization or args.use_8bit_qunatization) and args.use_peft_lora:
289 |         model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=args.no_gradient_checkpointing)
290 | 
291 |     if args.use_peft_lora:
292 |         peft_config = LoraConfig(
293 |             lora_alpha=args.lora_alpha,
294 |             lora_dropout=args.lora_dropout,
295 |             r=args.lora_r,
296 |             bias="none",
297 |             task_type="CAUSAL_LM",
298 |             target_modules=args.lora_target_modules.split(","),
299 |         )
300 | 
301 |         if args.no_gradient_checkpointing:
302 |             model.gradient_checkpointing_enable()
303 | 
304 |         model = get_peft_model(model, peft_config)
305 |         model.print_trainable_parameters()
306 |     return model
307 | 
308 | 
309 | def run_training(args, train_data, val_data):
310 |     train_data.start_iteration = 0
311 | 
312 |     print(f"Starting main loop")
313 |     training_args = TrainingArguments(
314 |         output_dir=args.output_dir,
315 |         dataloader_drop_last=True,
316 |         evaluation_strategy="steps",
317 |         save_strategy="steps",
318 |         max_steps=args.max_steps,
319 |         eval_steps=args.eval_freq,
320 |         save_steps=args.save_freq,
321 |         logging_steps=args.log_freq,
322 |         per_device_train_batch_size=args.batch_size,
323 |         per_device_eval_batch_size=args.batch_size,
324 |         learning_rate=args.learning_rate,
325 |         lr_scheduler_type=args.lr_scheduler_type,
326 |         warmup_steps=args.num_warmup_steps,
327 |         gradient_accumulation_steps=args.gradient_accumulation_steps,
328 |         gradient_checkpointing=args.no_gradient_checkpointing,
329 |         fp16=args.no_fp16,
330 |         bf16=args.bf16,
331 |         weight_decay=args.weight_decay,
332 |         run_name=f"codellama-copilot",
333 |         push_to_hub=args.push_to_hub,
334 |     )
335 | 
336 |     print("Loading the model")
337 |     model = create_and_prepare_model(args)
338 |     print(model)
339 |     if args.use_peft_lora:
340 |         model.print_trainable_parameters()
341 | 
342 |     trainer = Trainer(model=model, args=training_args, train_dataset=train_data, eval_dataset=val_data)
343 | 
344 |     # post process for faster training when using PEFT + INT4 Quantization
345 |     if args.use_peft_lora:
346 |         for name, module in trainer.model.named_modules():
347 |             if isinstance(module, LoraLayer):
348 |                 if args.bf16:
349 |                     module = module.to(torch.bfloat16)
350 |             if "norm" in name:
351 |                 module = module.to(torch.float32)
352 |             if any(x in name for x in ["lm_head", "embed_tokens", "wte", "wpe"]):
353 |                 if hasattr(module, "weight"):
354 |                     if args.bf16 and module.weight.dtype == torch.float32:
355 |                         module = module.to(torch.bfloat16)
356 | 
357 |     print("Training...")
358 |     trainer.train()
359 |     if args.use_peft_lora:
360 |         print("Saving last checkpoint of the model")
361 |         model.save_pretrained(os.path.join(args.output_dir, "final_checkpoint/"))
362 | 
363 |     if args.push_to_hub:
364 |         trainer.push_to_hub()
365 |     else:
366 |         trainer.save_model(args.output_dir)
367 |     trainer.accelerator.print(f"Model saved to {args.output_dir}")
368 | 
369 |     if args.use_peft_lora:
370 |         trainer.model.push_to_hub(args.output_dir)
371 | 
372 | 
373 | def main(args):
374 |     if args.use_flash_attn:
375 |         warnings.warn(
376 |             "Flash V2 support implemented here ignores padding/attention_mask/custom_mask. \n"
377 |             + "It is meant for continued pre-training with packing inputs to consume the entire sequence lengths."
378 |         )
379 |         from llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn
380 | 
381 |         replace_llama_attn_with_flash_attn()
382 |     tokenizer = AutoTokenizer.from_pretrained(args.model_path, use_auth_token=True, trust_remote_code=True)
383 |     train_dataset, eval_dataset = create_datasets(tokenizer, args)
384 |     run_training(args, train_dataset, eval_dataset)
385 | 
386 | 
387 | if __name__ == "__main__":
388 |     args = get_args()
389 |     set_seed(args.seed)
390 |     os.makedirs(args.output_dir, exist_ok=True)
391 |     main(args)
392 | 


--------------------------------------------------------------------------------