├── FineTuning_LayoutLMv3_Trainer_HF_DocLayNet.ipynb ├── Fine_tune_KOSMOS_2_for_multimodal_grounding.ipynb ├── LayoutLMv3_Inference.ipynb ├── README.md ├── SAM_DocLayNet.ipynb ├── UDOPEncoderModel_fine_tune_DocLayNet.ipynb ├── UDOP_DocLayNet_Inference.ipynb └── test.png /Fine_tune_KOSMOS_2_for_multimodal_grounding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "PPs11Mw5p1RA" 17 | }, 18 | "source": [ 19 | "## Inference with KOSMOS-2 for multimodal grounding and referral\n", 20 | "\n", 21 | "In this notebook, we'll perform inference with Microsoft's new impressive multimodal large language model (LLM) called [KOSMOS-2](https://huggingface.co/docs/transformers/main/en/model_doc/kosmos-2).\n", 22 | "\n", 23 | "\n", 24 | "https://github.com/huggingface/transformers/blob/main/src/transformers/trainer.py#L619\n", 25 | "\n", 26 | "https://github.com/huggingface/transformers/blob/main/src/transformers/trainer.py#L2924\n", 27 | "\n", 28 | "https://discuss.huggingface.co/t/how-is-the-data-shifted-by-one-token-during-causallm-fine-tuning/36386\n", 29 | "\n", 30 | "https://github.com/huggingface/transformers/blob/b2748a6efd045dd771f8fd48e8b309cbc061c618/src/transformers/models/kosmos2/__init__.py\n", 31 | "\n", 32 | "https://github.com/microsoft/unilm/blob/master/kosmos-2/fairseq/fairseq/logging/metrics.py\n", 33 | "\n", 34 | "https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L482" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "id": "gUTmGeUXngno" 41 | }, 42 | "source": [ 43 | "## Set-up environment\n", 44 | "\n", 45 | "Let's start by installing 🤗 Transformers. We install from main here since the model is brand new at the time of writing. We also install Accelerate and Bitsandbytes since those will provide [4-bit inference](https://huggingface.co/blog/4bit-transformers-bitsandbytes), greatly reducing the memory requirements to load the model (without those I wouldn't be able to load the model in Google Colab)." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "id": "NIZ4b6kQnewl", 53 | "colab": { 54 | "base_uri": "https://localhost:8080/" 55 | }, 56 | "outputId": "3b606ff3-e773-4008-d24c-d57a71a296aa" 57 | }, 58 | "outputs": [ 59 | { 60 | "output_type": "stream", 61 | "name": "stdout", 62 | "text": [ 63 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 64 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.1/290.1 kB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 65 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 66 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 67 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 68 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 69 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m41.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 70 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 71 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 72 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 73 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m42.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 74 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 75 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m54.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 76 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 77 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 78 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 79 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 81 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 82 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 83 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 84 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 85 | "\u001b[?25h Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "# install required libaries\n", 91 | "!pip install -q -U transformers accelerate bitsandbytes seqeval evaluate" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "id": "91psL2aw4uN_" 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "from transformers import AutoProcessor, AutoModelForVision2Seq\n", 103 | "import requests\n", 104 | "from datasets import load_dataset\n", 105 | "from datasets.features import ClassLabel\n", 106 | "import re\n", 107 | "from PIL import Image, ImageDraw, ImageFont\n", 108 | "import math\n", 109 | "import random\n", 110 | "from transformers import Kosmos2Config, Kosmos2Model" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": { 116 | "id": "W8E--vZukZT3" 117 | }, 118 | "source": [ 119 | "\n", 120 | "\n", 121 | "> The image resolution is set to 1280×1280 and the patch size is 10×10. We divide the width and height of the image into 256 bins, with each bin consisting of 5×5 pixels. A total of 256×256 location tokens are added to the vocabulary.\n", 122 | "\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "id": "Dku7xH2hlTRg" 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "## Config of Kosmos2 changed just to demonstrate fine tuning\n", 134 | "# configuration For to test fine tuning code\n", 135 | "configuration = Kosmos2Config(\n", 136 | " text_config = {\"layers\" : 4},\n", 137 | " vision_config = {\"num_hidden_layers\" : 4}\n", 138 | ")\n", 139 | "'''\n", 140 | "# configuration for actual fine-tuning\n", 141 | "configuration = Kosmos2Config()\n", 142 | "'''" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "id": "73Ik8HzakeBN", 150 | "colab": { 151 | "base_uri": "https://localhost:8080/" 152 | }, 153 | "outputId": "50e53879-426d-413d-b145-93b25a80a7d0" 154 | }, 155 | "outputs": [ 156 | { 157 | "output_type": "stream", 158 | "name": "stderr", 159 | "text": [ 160 | "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", 161 | "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", 162 | "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", 163 | "You will be able to reuse this secret in all of your notebooks.\n", 164 | "Please note that authentication is recommended but still optional to access public models or datasets.\n", 165 | " warnings.warn(\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "from transformers import Kosmos2ForConditionalGeneration\n", 171 | "from transformers import Kosmos2Config, Kosmos2Model, AutoProcessor\n", 172 | "\n", 173 | "model = Kosmos2ForConditionalGeneration.from_pretrained(\"microsoft/kosmos-2-patch14-224\", device_map=\"auto\", config = configuration)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "id": "A4TyIx7kkTrg", 181 | "colab": { 182 | "base_uri": "https://localhost:8080/" 183 | }, 184 | "outputId": "59fc80bb-ed75-4121-ac1a-0472431d0f4c" 185 | }, 186 | "outputs": [ 187 | { 188 | "output_type": "stream", 189 | "name": "stderr", 190 | "text": [ 191 | "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "processor = AutoProcessor.from_pretrained(\"microsoft/kosmos-2-patch14-224\", add_eos_token=True, device_map=\"auto\")" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "id": "2LkHlPBtpUX8" 203 | }, 204 | "source": [ 205 | "## Load model\n", 206 | "\n", 207 | "Next, let's load the model along with its processor. We specify `load_in_4bit=True` to reduce the size of the weights to be able to load the model in Google Colab. This is all thanks to the magic of bitsandbytes' integration in the Transformers library (see [this blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes) for all info). We also specify to place the model on the GPU (with id=0, meaning the first GPU on our system)." 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "colab": { 215 | "base_uri": "https://localhost:8080/" 216 | }, 217 | "id": "N8kVsop21jgE", 218 | "outputId": "2c7217eb-5237-4821-af69-8f2a49327c64" 219 | }, 220 | "outputs": [ 221 | { 222 | "output_type": "stream", 223 | "name": "stdout", 224 | "text": [ 225 | "Train dataset size: 4\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "# dataset_id =\"pierreguillou/DocLayNet-small\"\n", 231 | "# This dataset is takes from DocLayNet dataset\n", 232 | "## This finetuning was done for the layout detection in the any image. Task was to find table/ header/ footer... from the any given image.\n", 233 | "dataset_id = \"Mit1208/test_dataset\"\n", 234 | "\n", 235 | "dataset = load_dataset(dataset_id, trust_remote_code=True)\n", 236 | "\n", 237 | "print(f\"Train dataset size: {len(dataset['train'])}\")\n", 238 | "# print(f\"Test dataset size: {len(dataset['test'])}\")" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "id": "oQkH4NpT10nF" 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "# Remove data which has no text\n", 250 | "# https://github.com/huggingface/transformers/blob/main/src/transformers/models/kosmos2/processing_kosmos2.py#L154\n", 251 | "dataset = dataset.filter(lambda example: len(example['texts']) > 0)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "source": [ 257 | "## Define variables\n", 258 | "\n", 259 | "below part is to defind id2label and label2id, some of the code is for creating visualization of layouts on the image (you can ignore color part)." 260 | ], 261 | "metadata": { 262 | "id": "6LY4J4LBACVy" 263 | } 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": { 269 | "id": "RqL5-AhlEyjd" 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "features = dataset[\"train\"].features\n", 274 | "column_names = dataset[\"train\"].column_names\n", 275 | "image_column_name = \"image\"\n", 276 | "text_column_name = \"texts\"\n", 277 | "boxes_column_name = \"bboxes_block\"\n", 278 | "label_column_name = \"categories\"\n", 279 | "\n", 280 | "# In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the\n", 281 | "# unique labels.\n", 282 | "def get_label_list(labels):\n", 283 | " unique_labels = set()\n", 284 | " for label in labels:\n", 285 | " unique_labels = unique_labels | set(label)\n", 286 | " label_list = list(unique_labels)\n", 287 | " label_list.sort()\n", 288 | " return label_list\n", 289 | "\n", 290 | "if isinstance(features[label_column_name].feature, ClassLabel):\n", 291 | " label_list = features[label_column_name].feature.names\n", 292 | " # No need to convert the labels since they are already ints.\n", 293 | " id2label = {k: v for k,v in enumerate(label_list)}\n", 294 | " label2id = {v: k for k,v in enumerate(label_list)}\n", 295 | "else:\n", 296 | " label_list = get_label_list(dataset[\"train\"][label_column_name])\n", 297 | " id2label = {k: v for k,v in enumerate(label_list)}\n", 298 | " label2id = {v: k for k,v in enumerate(label_list)}\n", 299 | "num_labels = len(label_list)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "colab": { 307 | "base_uri": "https://localhost:8080/" 308 | }, 309 | "id": "O4KB67IxD8IP", 310 | "outputId": "12efe284-43c5-49a0-98b1-5b4e72660899" 311 | }, 312 | "outputs": [ 313 | { 314 | "output_type": "execute_result", 315 | "data": { 316 | "text/plain": [ 317 | "{0: 'Caption',\n", 318 | " 1: 'Footnote',\n", 319 | " 2: 'Formula',\n", 320 | " 3: 'List-item',\n", 321 | " 4: 'Page-footer',\n", 322 | " 5: 'Page-header',\n", 323 | " 6: 'Picture',\n", 324 | " 7: 'Section-header',\n", 325 | " 8: 'Table',\n", 326 | " 9: 'Text',\n", 327 | " 10: 'Title'}" 328 | ] 329 | }, 330 | "metadata": {}, 331 | "execution_count": 8 332 | } 333 | ], 334 | "source": [ 335 | "id2label" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "id": "g9tB7iibOoNm" 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "# Define colors for all labels\n", 347 | "get_colors = lambda n: list(map(lambda i: \"#\" + \"%06x\" % random.randint(0, 0xFFFFFF),range(n)))\n", 348 | "colors = get_colors(len(label_list))\n", 349 | "font = ImageFont.load_default()\n", 350 | "label2color = {label: colors[idx] for idx, label in enumerate(label_list)}" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": { 357 | "id": "cW58lqieR8sZ" 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "# Normalize box diamentions to range 0 to 1000\n", 362 | "def normalized_box(box, image_width=1025, image_height=1025):\n", 363 | " return [\n", 364 | " round(float(box[0] / image_width), 6),\n", 365 | " round(float(box[1] / image_height), 6),\n", 366 | " round(float(box[2] / image_width), 6),\n", 367 | " round(float(box[3] / image_height), 6),\n", 368 | " ]\n", 369 | "\n", 370 | "def convert_box(bbox):\n", 371 | " x, y, w, h = tuple(bbox) # Box coordinates are in (left, top, width, height) format\n", 372 | " return [x, y, x+w, y+h] # we need to convert it into (x1, y1, x2, y2) which is (left, top, left+widght, top+height)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": { 379 | "id": "wMSbhg-vSRcw" 380 | }, 381 | "outputs": [], 382 | "source": [ 383 | "example = dataset[\"train\"][0]\n", 384 | "# This function remove duplicate entries from the dataset\n", 385 | "def set_cat_box(example):\n", 386 | " list1_tuples = [tuple(inner_list) for inner_list in example['bboxes_block']]\n", 387 | "\n", 388 | " # Create unique pairs\n", 389 | " unique_pairs = set(zip(list1_tuples, example['categories']))\n", 390 | "\n", 391 | " # Separate the unique pairs back into lists\n", 392 | " result_list1, result_list2 = zip(*unique_pairs)\n", 393 | " return result_list1, result_list2\n", 394 | "\n", 395 | "# set_boxs, set_categories = set_cat_box(example)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "id": "x_34sFXnNbPH" 403 | }, 404 | "outputs": [], 405 | "source": [ 406 | "import pandas as pd\n", 407 | "from tqdm import tqdm\n", 408 | "tqdm.pandas()\n", 409 | "\n", 410 | "train_df = pd.DataFrame(dataset['train'])" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": { 417 | "id": "297s45-RNmIR" 418 | }, 419 | "outputs": [], 420 | "source": [ 421 | "train_df['type'] = 'train'\n", 422 | "all_df = train_df" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": { 429 | "colab": { 430 | "base_uri": "https://localhost:8080/" 431 | }, 432 | "id": "C26yzCDNNt5F", 433 | "outputId": "381399fb-b8cb-4382-f043-5b8372778d51" 434 | }, 435 | "outputs": [ 436 | { 437 | "output_type": "execute_result", 438 | "data": { 439 | "text/plain": [ 440 | "DatasetDict({\n", 441 | " train: Dataset({\n", 442 | " features: ['id', 'texts', 'bboxes_block', 'bboxes_line', 'categories', 'image', 'page_hash', 'original_filename', 'page_no', 'num_pages', 'original_width', 'original_height', 'coco_width', 'coco_height', 'collection', 'doc_category'],\n", 443 | " num_rows: 4\n", 444 | " })\n", 445 | "})" 446 | ] 447 | }, 448 | "metadata": {}, 449 | "execution_count": 16 450 | } 451 | ], 452 | "source": [ 453 | "dataset" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": { 460 | "id": "W7GrLRqoFweg" 461 | }, 462 | "outputs": [], 463 | "source": [ 464 | "## Create proper prompt which has grounding labels and it's location.\n", 465 | "def pre_process_data(example_df):\n", 466 | "\n", 467 | " set_boxs, set_categories = set_cat_box(example_df)\n", 468 | " example_df['float_val'] = [tuple(normalized_box(convert_box(i))) for i in set_boxs]\n", 469 | " example_df['text'] = ' This image is type of ' + example_df['doc_category'] + '. It has multiple page layouts ' + \", \".join([\"\" + id2label[i] +\"\" for i in set_categories]) + 'in it.'\n", 470 | "\n", 471 | " # print(encoding)\n", 472 | " return example_df" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": { 479 | "id": "YvzgeNUgNl_X", 480 | "colab": { 481 | "base_uri": "https://localhost:8080/" 482 | }, 483 | "outputId": "fae7d25f-f137-4f0c-f2e2-a4f153b71cda" 484 | }, 485 | "outputs": [ 486 | { 487 | "output_type": "stream", 488 | "name": "stderr", 489 | "text": [ 490 | "100%|██████████| 4/4 [00:00<00:00, 373.47it/s]\n" 491 | ] 492 | } 493 | ], 494 | "source": [ 495 | "all_df = all_df.progress_apply(pre_process_data, axis=1)" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": { 502 | "id": "m6BrA0aAlHbC" 503 | }, 504 | "outputs": [], 505 | "source": [ 506 | "import torch\n", 507 | "device = 'cuda' if torch.cuda.is_available() else 'cpu'" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": { 514 | "id": "PYF1XTaENl1_" 515 | }, 516 | "outputs": [], 517 | "source": [ 518 | "from datasets import Dataset\n", 519 | "## process prompt. Note: this will convert bounding box to required text and then convert it to number\n", 520 | "inputs = processor(images = all_df['image'].to_list(), text = all_df['text'].to_list(), bboxes = all_df['float_val'].to_list(), padding=True, truncation= True, return_tensors=\"pt\")\n", 521 | "labels = inputs['input_ids'].clone()\n", 522 | "labels[inputs['input_ids'] == 1] = -100\n", 523 | "inputs['labels'] = labels\n", 524 | "\n", 525 | "dataset = Dataset.from_dict(inputs)\n", 526 | "train_test_split = dataset.train_test_split(test_size=0.3)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": { 533 | "colab": { 534 | "base_uri": "https://localhost:8080/" 535 | }, 536 | "id": "RxUi6Ug9NlhK", 537 | "outputId": "be42bf88-cdc3-4ec3-df11-d6fc46a0ee14" 538 | }, 539 | "outputs": [ 540 | { 541 | "output_type": "execute_result", 542 | "data": { 543 | "text/plain": [ 544 | "DatasetDict({\n", 545 | " train: Dataset({\n", 546 | " features: ['pixel_values', 'input_ids', 'attention_mask', 'image_embeds_position_mask', 'labels'],\n", 547 | " num_rows: 2\n", 548 | " })\n", 549 | " test: Dataset({\n", 550 | " features: ['pixel_values', 'input_ids', 'attention_mask', 'image_embeds_position_mask', 'labels'],\n", 551 | " num_rows: 2\n", 552 | " })\n", 553 | "})" 554 | ] 555 | }, 556 | "metadata": {}, 557 | "execution_count": 18 558 | } 559 | ], 560 | "source": [ 561 | "train_test_split" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": { 568 | "id": "9YNbyTP4o4GC" 569 | }, 570 | "outputs": [], 571 | "source": [ 572 | "train_dataset = train_test_split['train']\n", 573 | "test_dataset = train_test_split['test']" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": null, 579 | "metadata": { 580 | "id": "wi-QA0eNtoy1" 581 | }, 582 | "outputs": [], 583 | "source": [ 584 | "train_dataset.set_format(\"torch\")\n", 585 | "test_dataset.set_format(\"torch\")" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": { 592 | "colab": { 593 | "base_uri": "https://localhost:8080/" 594 | }, 595 | "id": "9Knwkv6cQ1K6", 596 | "outputId": "861ec678-84b5-4914-be84-eb57da0c0367" 597 | }, 598 | "outputs": [ 599 | { 600 | "output_type": "stream", 601 | "name": "stdout", 602 | "text": [ 603 | "pixel_values torch.Size([3, 224, 224])\n", 604 | "input_ids torch.Size([221])\n", 605 | "attention_mask torch.Size([221])\n", 606 | "image_embeds_position_mask torch.Size([221])\n", 607 | "labels torch.Size([221])\n" 608 | ] 609 | } 610 | ], 611 | "source": [ 612 | "import torch\n", 613 | "\n", 614 | "example = train_test_split['train'][0]\n", 615 | "for k,v in example.items():\n", 616 | " print(k,v.shape)" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": { 623 | "id": "KNWDM6DmciNC" 624 | }, 625 | "outputs": [], 626 | "source": [ 627 | "from huggingface_hub import notebook_login" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": { 634 | "colab": { 635 | "base_uri": "https://localhost:8080/", 636 | "height": 145, 637 | "referenced_widgets": [ 638 | "6675760ed4764f2ea3919417116abc53", 639 | "c9c834b4a4a74cae90469d0232e9bcb2", 640 | "257d63ff695043ee8c3299f03df92634", 641 | "14558fa6407b4f1fa9ed108e486eac0c", 642 | "c0bdf76e44e44b1fa04c138c5e383eaa", 643 | "82a33816d1954d26bc0ff6ea3576f513", 644 | "c018269ce79d4437907dea34e202427c", 645 | "7aa20c1dc69f48a792381c01f5071b8d", 646 | "62afc0a26592489ba0ed297bde5d4357", 647 | "1b7101c5d9dd440b846e60aa4c26fb72", 648 | "cef699be72da4aaeb370f71a6ac356a9", 649 | "50f583f944274cb9a49c0362b7acd713", 650 | "55f4ee25f1484576bb33cf476e2aa038", 651 | "85495656479c4323bd62279b8ef544d3", 652 | "afed23dafec44c079cda02c1268e8a6d", 653 | "7f87103d999a40dcaa4efe8d75843da2", 654 | "d89b8da9708744bf92eb71c5b5ca48a5", 655 | "23552beda4c94263bafbe834ed283921", 656 | "a59dc124d8104c0ab29574e281885dd9", 657 | "e2f52d9a90764140bbfcc4c6f6c4589c", 658 | "123e74f8e0a8412f9bbf8b2e918d99d8", 659 | "523f4a3541094d38a8dbc4dd3ef9c928", 660 | "b20db9890b464247a9ef7e4fb1adb732", 661 | "b3984a95687d4b978a25fa153ec0a877", 662 | "a3b62f55344340fa9f0db744ff2c8c70", 663 | "e4ee7889597f4f078504df0a19976c69", 664 | "7a98f305582c46d890a3f599f4dc9cf3", 665 | "803b81733b7e4bafa94100201436ceb9", 666 | "bc2570f343ed47e7bd296162db9e43a7", 667 | "7efc9dc702fd40469875f367a90c126a", 668 | "985f228d52e34ac39be0fd52a2dce7c3", 669 | "7879ea9286004824b34798805700d5b7" 670 | ] 671 | }, 672 | "id": "TPxza5wpcjqA", 673 | "outputId": "83a96fe5-cccb-432a-be84-658a10524f34" 674 | }, 675 | "outputs": [ 676 | { 677 | "output_type": "display_data", 678 | "data": { 679 | "text/plain": [ 680 | "VBox(children=(HTML(value='
" 798 | ], 799 | "text/html": [ 800 | "\n", 801 | "
\n", 802 | " \n", 803 | " \n", 804 | " [1000/1000 16:24, Epoch 1000/1000]\n", 805 | "
\n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | "
StepTraining LossValidation Loss
100No log1.612360
200No log1.566668
3002.3930001.660224
4002.3930001.736173
5000.0042001.764009
6000.0042001.800707
7000.0042001.779289
8000.0016001.784427
9000.0016001.802788
10000.0010001.804111

" 867 | ] 868 | }, 869 | "metadata": {} 870 | }, 871 | { 872 | "output_type": "error", 873 | "ename": "SafetensorError", 874 | "evalue": "Error while serializing: IoError(Os { code: 28, kind: StorageFull, message: \"No space left on device\" })", 875 | "traceback": [ 876 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 877 | "\u001b[0;31mSafetensorError\u001b[0m Traceback (most recent call last)", 878 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 879 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1769\u001b[0m \u001b[0;31m# Disable progress bars when uploading models during checkpoints to avoid polluting stdout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1770\u001b[0m \u001b[0mhf_hub_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisable_progress_bars\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1771\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 1772\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1773\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 880 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2191\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2193\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_log_save_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_norm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2194\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2195\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_substep_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 881 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2586\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2587\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2588\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2589\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2590\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 882 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m 2654\u001b[0m \u001b[0mrun_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_output_dir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2655\u001b[0m \u001b[0moutput_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheckpoint_folder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2656\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_internal_call\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2657\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_only_model\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 883 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36msave_model\u001b[0;34m(self, output_dir, _internal_call)\u001b[0m\n\u001b[1;32m 3148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3149\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3150\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3152\u001b[0m \u001b[0;31m# Push to the Hub when `save_model` is called by the user.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 884 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_save\u001b[0;34m(self, output_dir, state_dict)\u001b[0m\n\u001b[1;32m 3223\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mWEIGHTS_NAME\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3224\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3225\u001b[0;31m self.model.save_pretrained(\n\u001b[0m\u001b[1;32m 3226\u001b[0m \u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msafe_serialization\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_safetensors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3227\u001b[0m )\n", 885 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36msave_pretrained\u001b[0;34m(self, save_directory, is_main_process, state_dict, save_function, push_to_hub, max_shard_size, safe_serialization, variant, token, save_peft_format, **kwargs)\u001b[0m\n\u001b[1;32m 2466\u001b[0m \u001b[0;31m# At some point we will need to deal better with save_function (used for TPU and other distributed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2467\u001b[0m \u001b[0;31m# joyfulness), but for now this enough.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2468\u001b[0;31m \u001b[0msafe_save_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_directory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshard_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"format\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2469\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2470\u001b[0m \u001b[0msave_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_directory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshard_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 886 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/safetensors/torch.py\u001b[0m in \u001b[0;36msave_file\u001b[0;34m(tensors, filename, metadata)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \"\"\"\n\u001b[0;32m--> 281\u001b[0;31m \u001b[0mserialize_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_flatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 887 | "\u001b[0;31mSafetensorError\u001b[0m: Error while serializing: IoError(Os { code: 28, kind: StorageFull, message: \"No space left on device\" })" 888 | ] 889 | } 890 | ] 891 | }, 892 | { 893 | "cell_type": "code", 894 | "execution_count": null, 895 | "metadata": { 896 | "id": "AXzFb4mDfcnk" 897 | }, 898 | "outputs": [], 899 | "source": [] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": null, 904 | "metadata": { 905 | "id": "JzMvoc5xfccW" 906 | }, 907 | "outputs": [], 908 | "source": [] 909 | }, 910 | { 911 | "cell_type": "markdown", 912 | "source": [ 913 | "## Only useful if patch size is different then 224" 914 | ], 915 | "metadata": { 916 | "id": "_0dbr6HkAYPY" 917 | } 918 | }, 919 | { 920 | "cell_type": "code", 921 | "source": [ 922 | "'''\n", 923 | "# Initializing a Kosmos-2 kosmos-2-patch14-224 style configuration\n", 924 | "configuration = Kosmos2Config(\n", 925 | " text_config = {\"max_position_embeddings\" : 2048*2, \"attention_heads\" : 32*4},\n", 926 | " vision_config = {\"image_size\" : 1280, \"patch_size\" : 256}\n", 927 | " )\n", 928 | "# configuration = Kosmos2Config(latent_query_num = 64 * 4)\n", 929 | "# model = Kosmos2ForConditionalGeneration.from_pretrained(\"microsoft/kosmos-2-patch14-224\", config = configuration, ignore_mismatched_sizes=True)\n", 930 | "# num_patches_per_side = 32*math.sqrt(total_tokens_increase_by)\n", 931 | "# total_tokens_increase_by = 64\n", 932 | "# # , num_patch_index_tokens = 1024 * total_tokens_increase_by\n", 933 | "'''" 934 | ], 935 | "metadata": { 936 | "id": "GlAvDEl5AhEM" 937 | }, 938 | "execution_count": null, 939 | "outputs": [] 940 | }, 941 | { 942 | "cell_type": "code", 943 | "execution_count": null, 944 | "metadata": { 945 | "id": "Kpp8VEnsfcZq" 946 | }, 947 | "outputs": [], 948 | "source": [ 949 | "# copied from https://github.com/microsoft/unilm/blob/97e4923e97d3ee10b57e97013556e3fd0d207a9b/kosmos-2/demo/decode_string.py#L35C1-L75C38\n", 950 | "# (with format modifications)\n", 951 | "def patch_index_to_coordinate(ul_idx: int, lr_idx: int, num_patches_per_side: int):\n", 952 | " # Compute the size of each cell in the grid\n", 953 | " cell_size = 1.0 / num_patches_per_side\n", 954 | "\n", 955 | " # Compute the x and y indices of the upper-left and lower-right corners of the bounding box\n", 956 | " ul_x = ul_idx % num_patches_per_side\n", 957 | " ul_y = ul_idx // num_patches_per_side\n", 958 | "\n", 959 | " lr_x = lr_idx % num_patches_per_side\n", 960 | " lr_y = lr_idx // num_patches_per_side\n", 961 | "\n", 962 | " # Compute the normalized coordinates of the bounding box\n", 963 | " if ul_idx == lr_idx:\n", 964 | " x1 = ul_x * cell_size\n", 965 | " y1 = ul_y * cell_size\n", 966 | " x2 = lr_x * cell_size + cell_size\n", 967 | " y2 = lr_y * cell_size + cell_size\n", 968 | " elif ul_x == lr_x or ul_y == lr_y:\n", 969 | " x1 = ul_x * cell_size\n", 970 | " y1 = ul_y * cell_size\n", 971 | " x2 = lr_x * cell_size + cell_size\n", 972 | " y2 = lr_y * cell_size + cell_size\n", 973 | " else:\n", 974 | " x1 = ul_x * cell_size + cell_size / 2\n", 975 | " y1 = ul_y * cell_size + cell_size / 2\n", 976 | " x2 = lr_x * cell_size + cell_size / 2\n", 977 | " y2 = lr_y * cell_size + cell_size / 2\n", 978 | "\n", 979 | " return x1, y1, x2, y2\n", 980 | "\n", 981 | "\n", 982 | "# copied from https://github.com/microsoft/unilm/blob/97e4923e97d3ee10b57e97013556e3fd0d207a9b/kosmos-2/demo/decode_string.py#L4-L33\n", 983 | "# (with format modifications)\n", 984 | "def extract_entities_with_patch_indices(text):\n", 985 | " # The regular expression pattern for matching the required formats\n", 986 | " pattern = r\"(?:(([^<]+)))?((?:)*)\"\n", 987 | "\n", 988 | " # Find all matches in the given string\n", 989 | " matches = re.finditer(pattern, text)\n", 990 | "\n", 991 | " # Initialize an empty list to store the valid patch_index combinations\n", 992 | " entities_with_patch_indices = []\n", 993 | "\n", 994 | " for match in matches:\n", 995 | " # span of a `phrase` that is between and \n", 996 | " span = match.span(2)\n", 997 | " phrase_tag, phrase, match_content = match.groups()\n", 998 | " if not phrase_tag:\n", 999 | " phrase = None\n", 1000 | " # We take the starting position of ``\n", 1001 | " span = (match.span(0)[0], match.span(0)[0])\n", 1002 | "\n", 1003 | " # Split the match_content by the delimiter to get individual patch_index pairs\n", 1004 | " patch_index_pairs = match_content.split(\"\")\n", 1005 | "\n", 1006 | " entity_bboxes = []\n", 1007 | " for pair in patch_index_pairs:\n", 1008 | " # Extract the xxxx and yyyy values from the patch_index pair\n", 1009 | " x = re.search(r\"\", pair)\n", 1010 | " y = re.search(r\"\", pair[1:])\n", 1011 | "\n", 1012 | " if x and y:\n", 1013 | " if phrase:\n", 1014 | " entity_bboxes.append((int(x.group(1)), int(y.group(1))))\n", 1015 | " else:\n", 1016 | " entity_bboxes.append((int(x.group(1)), int(y.group(1))))\n", 1017 | "\n", 1018 | " if phrase:\n", 1019 | " entities_with_patch_indices.append((phrase, span, entity_bboxes))\n", 1020 | " else:\n", 1021 | " for bbox in entity_bboxes:\n", 1022 | " # fake entity name\n", 1023 | " entity = f\"\"\n", 1024 | " entities_with_patch_indices.append((entity, span, [bbox]))\n", 1025 | "\n", 1026 | " return entities_with_patch_indices\n", 1027 | "\n", 1028 | "\n", 1029 | "def adjust_entity_positions(entity, text):\n", 1030 | " \"\"\"Adjust the positions of the entities in `text` to be relative to the text with special fields removed.\"\"\"\n", 1031 | " entity_name, (start, end) = entity\n", 1032 | " # computed the length of strings with special fields (tag tokens, patch index tokens, etc.) removed\n", 1033 | " adjusted_start = len(re.sub(\"<.*?>\", \"\", text[:start]))\n", 1034 | " adjusted_end = len(re.sub(\"<.*?>\", \"\", text[:end]))\n", 1035 | " adjusted_entity = (entity_name, (adjusted_start, adjusted_end))\n", 1036 | " return adjusted_entity\n", 1037 | "\n", 1038 | "\n", 1039 | "def _cleanup_spaces(text, entities):\n", 1040 | " \"\"\"Remove the spaces around the text and the entities in it.\"\"\"\n", 1041 | " new_text = text.strip()\n", 1042 | " leading_spaces = len(text) - len(text.lstrip())\n", 1043 | "\n", 1044 | " new_entities = []\n", 1045 | " for entity_name, (start, end), bboxes in entities:\n", 1046 | " entity_name_leading_spaces = len(entity_name) - len(entity_name.lstrip())\n", 1047 | " entity_name_trailing_spaces = len(entity_name) - len(entity_name.rstrip())\n", 1048 | "\n", 1049 | " start = start - leading_spaces + entity_name_leading_spaces\n", 1050 | " end = end - leading_spaces - entity_name_trailing_spaces\n", 1051 | " entity_name = entity_name.strip()\n", 1052 | "\n", 1053 | " new_entities.append((entity_name, (start, end), bboxes))\n", 1054 | "\n", 1055 | " return new_text, new_entities\n", 1056 | "\n", 1057 | "\n", 1058 | "# copied from https://github.com/microsoft/unilm/blob/97e4923e97d3ee10b57e97013556e3fd0d207a9b/kosmos-2/demo/decode_string.py#L77-L87\n", 1059 | "# (with format modifications)\n", 1060 | "def clean_text_and_extract_entities_with_bboxes(text, num_patches_per_side=32):\n", 1061 | " # remove special fields (tag tokens, patch index tokens, etc.)\n", 1062 | " processed_text = re.sub(\"<.*?>\", \"\", text)\n", 1063 | "\n", 1064 | " entities_with_patch_indices = extract_entities_with_patch_indices(text)\n", 1065 | " entities = []\n", 1066 | " for item in entities_with_patch_indices:\n", 1067 | " entity, bboxes = item[0:2], item[2]\n", 1068 | " adjusted_entity = adjust_entity_positions(entity, text)\n", 1069 | " bboxes_in_coords = [patch_index_to_coordinate(bbox[0], bbox[1], num_patches_per_side) for bbox in bboxes]\n", 1070 | "\n", 1071 | " entities.append(adjusted_entity + (bboxes_in_coords,))\n", 1072 | "\n", 1073 | " return _cleanup_spaces(processed_text, entities)" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "source": [], 1079 | "metadata": { 1080 | "id": "jc9bMQNnAcli" 1081 | }, 1082 | "execution_count": null, 1083 | "outputs": [] 1084 | } 1085 | ], 1086 | "metadata": { 1087 | "accelerator": "GPU", 1088 | "colab": { 1089 | "gpuType": "T4", 1090 | "provenance": [], 1091 | "include_colab_link": true 1092 | }, 1093 | "kernelspec": { 1094 | "display_name": "Python 3", 1095 | "name": "python3" 1096 | }, 1097 | "language_info": { 1098 | "name": "python" 1099 | }, 1100 | "widgets": { 1101 | "application/vnd.jupyter.widget-state+json": { 1102 | "6675760ed4764f2ea3919417116abc53": { 1103 | "model_module": "@jupyter-widgets/controls", 1104 | "model_name": "VBoxModel", 1105 | "model_module_version": "1.5.0", 1106 | "state": { 1107 | "_dom_classes": [], 1108 | "_model_module": "@jupyter-widgets/controls", 1109 | "_model_module_version": "1.5.0", 1110 | "_model_name": "VBoxModel", 1111 | "_view_count": null, 1112 | "_view_module": "@jupyter-widgets/controls", 1113 | "_view_module_version": "1.5.0", 1114 | "_view_name": "VBoxView", 1115 | "box_style": "", 1116 | "children": [ 1117 | "IPY_MODEL_123e74f8e0a8412f9bbf8b2e918d99d8", 1118 | "IPY_MODEL_523f4a3541094d38a8dbc4dd3ef9c928", 1119 | "IPY_MODEL_b20db9890b464247a9ef7e4fb1adb732", 1120 | "IPY_MODEL_b3984a95687d4b978a25fa153ec0a877" 1121 | ], 1122 | "layout": "IPY_MODEL_c018269ce79d4437907dea34e202427c" 1123 | } 1124 | }, 1125 | "c9c834b4a4a74cae90469d0232e9bcb2": { 1126 | "model_module": "@jupyter-widgets/controls", 1127 | "model_name": "HTMLModel", 1128 | "model_module_version": "1.5.0", 1129 | "state": { 1130 | "_dom_classes": [], 1131 | "_model_module": "@jupyter-widgets/controls", 1132 | "_model_module_version": "1.5.0", 1133 | "_model_name": "HTMLModel", 1134 | "_view_count": null, 1135 | "_view_module": "@jupyter-widgets/controls", 1136 | "_view_module_version": "1.5.0", 1137 | "_view_name": "HTMLView", 1138 | "description": "", 1139 | "description_tooltip": null, 1140 | "layout": "IPY_MODEL_7aa20c1dc69f48a792381c01f5071b8d", 1141 | "placeholder": "​", 1142 | "style": "IPY_MODEL_62afc0a26592489ba0ed297bde5d4357", 1143 | "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" 1144 | } 1145 | }, 1146 | "257d63ff695043ee8c3299f03df92634": { 1147 | "model_module": "@jupyter-widgets/controls", 1148 | "model_name": "PasswordModel", 1149 | "model_module_version": "1.5.0", 1150 | "state": { 1151 | "_dom_classes": [], 1152 | "_model_module": "@jupyter-widgets/controls", 1153 | "_model_module_version": "1.5.0", 1154 | "_model_name": "PasswordModel", 1155 | "_view_count": null, 1156 | "_view_module": "@jupyter-widgets/controls", 1157 | "_view_module_version": "1.5.0", 1158 | "_view_name": "PasswordView", 1159 | "continuous_update": true, 1160 | "description": "Token:", 1161 | "description_tooltip": null, 1162 | "disabled": false, 1163 | "layout": "IPY_MODEL_1b7101c5d9dd440b846e60aa4c26fb72", 1164 | "placeholder": "​", 1165 | "style": "IPY_MODEL_cef699be72da4aaeb370f71a6ac356a9", 1166 | "value": "" 1167 | } 1168 | }, 1169 | "14558fa6407b4f1fa9ed108e486eac0c": { 1170 | "model_module": "@jupyter-widgets/controls", 1171 | "model_name": "CheckboxModel", 1172 | "model_module_version": "1.5.0", 1173 | "state": { 1174 | "_dom_classes": [], 1175 | "_model_module": "@jupyter-widgets/controls", 1176 | "_model_module_version": "1.5.0", 1177 | "_model_name": "CheckboxModel", 1178 | "_view_count": null, 1179 | "_view_module": "@jupyter-widgets/controls", 1180 | "_view_module_version": "1.5.0", 1181 | "_view_name": "CheckboxView", 1182 | "description": "Add token as git credential?", 1183 | "description_tooltip": null, 1184 | "disabled": false, 1185 | "indent": true, 1186 | "layout": "IPY_MODEL_50f583f944274cb9a49c0362b7acd713", 1187 | "style": "IPY_MODEL_55f4ee25f1484576bb33cf476e2aa038", 1188 | "value": true 1189 | } 1190 | }, 1191 | "c0bdf76e44e44b1fa04c138c5e383eaa": { 1192 | "model_module": "@jupyter-widgets/controls", 1193 | "model_name": "ButtonModel", 1194 | "model_module_version": "1.5.0", 1195 | "state": { 1196 | "_dom_classes": [], 1197 | "_model_module": "@jupyter-widgets/controls", 1198 | "_model_module_version": "1.5.0", 1199 | "_model_name": "ButtonModel", 1200 | "_view_count": null, 1201 | "_view_module": "@jupyter-widgets/controls", 1202 | "_view_module_version": "1.5.0", 1203 | "_view_name": "ButtonView", 1204 | "button_style": "", 1205 | "description": "Login", 1206 | "disabled": false, 1207 | "icon": "", 1208 | "layout": "IPY_MODEL_85495656479c4323bd62279b8ef544d3", 1209 | "style": "IPY_MODEL_afed23dafec44c079cda02c1268e8a6d", 1210 | "tooltip": "" 1211 | } 1212 | }, 1213 | "82a33816d1954d26bc0ff6ea3576f513": { 1214 | "model_module": "@jupyter-widgets/controls", 1215 | "model_name": "HTMLModel", 1216 | "model_module_version": "1.5.0", 1217 | "state": { 1218 | "_dom_classes": [], 1219 | "_model_module": "@jupyter-widgets/controls", 1220 | "_model_module_version": "1.5.0", 1221 | "_model_name": "HTMLModel", 1222 | "_view_count": null, 1223 | "_view_module": "@jupyter-widgets/controls", 1224 | "_view_module_version": "1.5.0", 1225 | "_view_name": "HTMLView", 1226 | "description": "", 1227 | "description_tooltip": null, 1228 | "layout": "IPY_MODEL_7f87103d999a40dcaa4efe8d75843da2", 1229 | "placeholder": "​", 1230 | "style": "IPY_MODEL_d89b8da9708744bf92eb71c5b5ca48a5", 1231 | "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " 1232 | } 1233 | }, 1234 | "c018269ce79d4437907dea34e202427c": { 1235 | "model_module": "@jupyter-widgets/base", 1236 | "model_name": "LayoutModel", 1237 | "model_module_version": "1.2.0", 1238 | "state": { 1239 | "_model_module": "@jupyter-widgets/base", 1240 | "_model_module_version": "1.2.0", 1241 | "_model_name": "LayoutModel", 1242 | "_view_count": null, 1243 | "_view_module": "@jupyter-widgets/base", 1244 | "_view_module_version": "1.2.0", 1245 | "_view_name": "LayoutView", 1246 | "align_content": null, 1247 | "align_items": "center", 1248 | "align_self": null, 1249 | "border": null, 1250 | "bottom": null, 1251 | "display": "flex", 1252 | "flex": null, 1253 | "flex_flow": "column", 1254 | "grid_area": null, 1255 | "grid_auto_columns": null, 1256 | "grid_auto_flow": null, 1257 | "grid_auto_rows": null, 1258 | "grid_column": null, 1259 | "grid_gap": null, 1260 | "grid_row": null, 1261 | "grid_template_areas": null, 1262 | "grid_template_columns": null, 1263 | "grid_template_rows": null, 1264 | "height": null, 1265 | "justify_content": null, 1266 | "justify_items": null, 1267 | "left": null, 1268 | "margin": null, 1269 | "max_height": null, 1270 | "max_width": null, 1271 | "min_height": null, 1272 | "min_width": null, 1273 | "object_fit": null, 1274 | "object_position": null, 1275 | "order": null, 1276 | "overflow": null, 1277 | "overflow_x": null, 1278 | "overflow_y": null, 1279 | "padding": null, 1280 | "right": null, 1281 | "top": null, 1282 | "visibility": null, 1283 | "width": "50%" 1284 | } 1285 | }, 1286 | "7aa20c1dc69f48a792381c01f5071b8d": { 1287 | "model_module": "@jupyter-widgets/base", 1288 | "model_name": "LayoutModel", 1289 | "model_module_version": "1.2.0", 1290 | "state": { 1291 | "_model_module": "@jupyter-widgets/base", 1292 | "_model_module_version": "1.2.0", 1293 | "_model_name": "LayoutModel", 1294 | "_view_count": null, 1295 | "_view_module": "@jupyter-widgets/base", 1296 | "_view_module_version": "1.2.0", 1297 | "_view_name": "LayoutView", 1298 | "align_content": null, 1299 | "align_items": null, 1300 | "align_self": null, 1301 | "border": null, 1302 | "bottom": null, 1303 | "display": null, 1304 | "flex": null, 1305 | "flex_flow": null, 1306 | "grid_area": null, 1307 | "grid_auto_columns": null, 1308 | "grid_auto_flow": null, 1309 | "grid_auto_rows": null, 1310 | "grid_column": null, 1311 | "grid_gap": null, 1312 | "grid_row": null, 1313 | "grid_template_areas": null, 1314 | "grid_template_columns": null, 1315 | "grid_template_rows": null, 1316 | "height": null, 1317 | "justify_content": null, 1318 | "justify_items": null, 1319 | "left": null, 1320 | "margin": null, 1321 | "max_height": null, 1322 | "max_width": null, 1323 | "min_height": null, 1324 | "min_width": null, 1325 | "object_fit": null, 1326 | "object_position": null, 1327 | "order": null, 1328 | "overflow": null, 1329 | "overflow_x": null, 1330 | "overflow_y": null, 1331 | "padding": null, 1332 | "right": null, 1333 | "top": null, 1334 | "visibility": null, 1335 | "width": null 1336 | } 1337 | }, 1338 | "62afc0a26592489ba0ed297bde5d4357": { 1339 | "model_module": "@jupyter-widgets/controls", 1340 | "model_name": "DescriptionStyleModel", 1341 | "model_module_version": "1.5.0", 1342 | "state": { 1343 | "_model_module": "@jupyter-widgets/controls", 1344 | "_model_module_version": "1.5.0", 1345 | "_model_name": "DescriptionStyleModel", 1346 | "_view_count": null, 1347 | "_view_module": "@jupyter-widgets/base", 1348 | "_view_module_version": "1.2.0", 1349 | "_view_name": "StyleView", 1350 | "description_width": "" 1351 | } 1352 | }, 1353 | "1b7101c5d9dd440b846e60aa4c26fb72": { 1354 | "model_module": "@jupyter-widgets/base", 1355 | "model_name": "LayoutModel", 1356 | "model_module_version": "1.2.0", 1357 | "state": { 1358 | "_model_module": "@jupyter-widgets/base", 1359 | "_model_module_version": "1.2.0", 1360 | "_model_name": "LayoutModel", 1361 | "_view_count": null, 1362 | "_view_module": "@jupyter-widgets/base", 1363 | "_view_module_version": "1.2.0", 1364 | "_view_name": "LayoutView", 1365 | "align_content": null, 1366 | "align_items": null, 1367 | "align_self": null, 1368 | "border": null, 1369 | "bottom": null, 1370 | "display": null, 1371 | "flex": null, 1372 | "flex_flow": null, 1373 | "grid_area": null, 1374 | "grid_auto_columns": null, 1375 | "grid_auto_flow": null, 1376 | "grid_auto_rows": null, 1377 | "grid_column": null, 1378 | "grid_gap": null, 1379 | "grid_row": null, 1380 | "grid_template_areas": null, 1381 | "grid_template_columns": null, 1382 | "grid_template_rows": null, 1383 | "height": null, 1384 | "justify_content": null, 1385 | "justify_items": null, 1386 | "left": null, 1387 | "margin": null, 1388 | "max_height": null, 1389 | "max_width": null, 1390 | "min_height": null, 1391 | "min_width": null, 1392 | "object_fit": null, 1393 | "object_position": null, 1394 | "order": null, 1395 | "overflow": null, 1396 | "overflow_x": null, 1397 | "overflow_y": null, 1398 | "padding": null, 1399 | "right": null, 1400 | "top": null, 1401 | "visibility": null, 1402 | "width": null 1403 | } 1404 | }, 1405 | "cef699be72da4aaeb370f71a6ac356a9": { 1406 | "model_module": "@jupyter-widgets/controls", 1407 | "model_name": "DescriptionStyleModel", 1408 | "model_module_version": "1.5.0", 1409 | "state": { 1410 | "_model_module": "@jupyter-widgets/controls", 1411 | "_model_module_version": "1.5.0", 1412 | "_model_name": "DescriptionStyleModel", 1413 | "_view_count": null, 1414 | "_view_module": "@jupyter-widgets/base", 1415 | "_view_module_version": "1.2.0", 1416 | "_view_name": "StyleView", 1417 | "description_width": "" 1418 | } 1419 | }, 1420 | "50f583f944274cb9a49c0362b7acd713": { 1421 | "model_module": "@jupyter-widgets/base", 1422 | "model_name": "LayoutModel", 1423 | "model_module_version": "1.2.0", 1424 | "state": { 1425 | "_model_module": "@jupyter-widgets/base", 1426 | "_model_module_version": "1.2.0", 1427 | "_model_name": "LayoutModel", 1428 | "_view_count": null, 1429 | "_view_module": "@jupyter-widgets/base", 1430 | "_view_module_version": "1.2.0", 1431 | "_view_name": "LayoutView", 1432 | "align_content": null, 1433 | "align_items": null, 1434 | "align_self": null, 1435 | "border": null, 1436 | "bottom": null, 1437 | "display": null, 1438 | "flex": null, 1439 | "flex_flow": null, 1440 | "grid_area": null, 1441 | "grid_auto_columns": null, 1442 | "grid_auto_flow": null, 1443 | "grid_auto_rows": null, 1444 | "grid_column": null, 1445 | "grid_gap": null, 1446 | "grid_row": null, 1447 | "grid_template_areas": null, 1448 | "grid_template_columns": null, 1449 | "grid_template_rows": null, 1450 | "height": null, 1451 | "justify_content": null, 1452 | "justify_items": null, 1453 | "left": null, 1454 | "margin": null, 1455 | "max_height": null, 1456 | "max_width": null, 1457 | "min_height": null, 1458 | "min_width": null, 1459 | "object_fit": null, 1460 | "object_position": null, 1461 | "order": null, 1462 | "overflow": null, 1463 | "overflow_x": null, 1464 | "overflow_y": null, 1465 | "padding": null, 1466 | "right": null, 1467 | "top": null, 1468 | "visibility": null, 1469 | "width": null 1470 | } 1471 | }, 1472 | "55f4ee25f1484576bb33cf476e2aa038": { 1473 | "model_module": "@jupyter-widgets/controls", 1474 | "model_name": "DescriptionStyleModel", 1475 | "model_module_version": "1.5.0", 1476 | "state": { 1477 | "_model_module": "@jupyter-widgets/controls", 1478 | "_model_module_version": "1.5.0", 1479 | "_model_name": "DescriptionStyleModel", 1480 | "_view_count": null, 1481 | "_view_module": "@jupyter-widgets/base", 1482 | "_view_module_version": "1.2.0", 1483 | "_view_name": "StyleView", 1484 | "description_width": "" 1485 | } 1486 | }, 1487 | "85495656479c4323bd62279b8ef544d3": { 1488 | "model_module": "@jupyter-widgets/base", 1489 | "model_name": "LayoutModel", 1490 | "model_module_version": "1.2.0", 1491 | "state": { 1492 | "_model_module": "@jupyter-widgets/base", 1493 | "_model_module_version": "1.2.0", 1494 | "_model_name": "LayoutModel", 1495 | "_view_count": null, 1496 | "_view_module": "@jupyter-widgets/base", 1497 | "_view_module_version": "1.2.0", 1498 | "_view_name": "LayoutView", 1499 | "align_content": null, 1500 | "align_items": null, 1501 | "align_self": null, 1502 | "border": null, 1503 | "bottom": null, 1504 | "display": null, 1505 | "flex": null, 1506 | "flex_flow": null, 1507 | "grid_area": null, 1508 | "grid_auto_columns": null, 1509 | "grid_auto_flow": null, 1510 | "grid_auto_rows": null, 1511 | "grid_column": null, 1512 | "grid_gap": null, 1513 | "grid_row": null, 1514 | "grid_template_areas": null, 1515 | "grid_template_columns": null, 1516 | "grid_template_rows": null, 1517 | "height": null, 1518 | "justify_content": null, 1519 | "justify_items": null, 1520 | "left": null, 1521 | "margin": null, 1522 | "max_height": null, 1523 | "max_width": null, 1524 | "min_height": null, 1525 | "min_width": null, 1526 | "object_fit": null, 1527 | "object_position": null, 1528 | "order": null, 1529 | "overflow": null, 1530 | "overflow_x": null, 1531 | "overflow_y": null, 1532 | "padding": null, 1533 | "right": null, 1534 | "top": null, 1535 | "visibility": null, 1536 | "width": null 1537 | } 1538 | }, 1539 | "afed23dafec44c079cda02c1268e8a6d": { 1540 | "model_module": "@jupyter-widgets/controls", 1541 | "model_name": "ButtonStyleModel", 1542 | "model_module_version": "1.5.0", 1543 | "state": { 1544 | "_model_module": "@jupyter-widgets/controls", 1545 | "_model_module_version": "1.5.0", 1546 | "_model_name": "ButtonStyleModel", 1547 | "_view_count": null, 1548 | "_view_module": "@jupyter-widgets/base", 1549 | "_view_module_version": "1.2.0", 1550 | "_view_name": "StyleView", 1551 | "button_color": null, 1552 | "font_weight": "" 1553 | } 1554 | }, 1555 | "7f87103d999a40dcaa4efe8d75843da2": { 1556 | "model_module": "@jupyter-widgets/base", 1557 | "model_name": "LayoutModel", 1558 | "model_module_version": "1.2.0", 1559 | "state": { 1560 | "_model_module": "@jupyter-widgets/base", 1561 | "_model_module_version": "1.2.0", 1562 | "_model_name": "LayoutModel", 1563 | "_view_count": null, 1564 | "_view_module": "@jupyter-widgets/base", 1565 | "_view_module_version": "1.2.0", 1566 | "_view_name": "LayoutView", 1567 | "align_content": null, 1568 | "align_items": null, 1569 | "align_self": null, 1570 | "border": null, 1571 | "bottom": null, 1572 | "display": null, 1573 | "flex": null, 1574 | "flex_flow": null, 1575 | "grid_area": null, 1576 | "grid_auto_columns": null, 1577 | "grid_auto_flow": null, 1578 | "grid_auto_rows": null, 1579 | "grid_column": null, 1580 | "grid_gap": null, 1581 | "grid_row": null, 1582 | "grid_template_areas": null, 1583 | "grid_template_columns": null, 1584 | "grid_template_rows": null, 1585 | "height": null, 1586 | "justify_content": null, 1587 | "justify_items": null, 1588 | "left": null, 1589 | "margin": null, 1590 | "max_height": null, 1591 | "max_width": null, 1592 | "min_height": null, 1593 | "min_width": null, 1594 | "object_fit": null, 1595 | "object_position": null, 1596 | "order": null, 1597 | "overflow": null, 1598 | "overflow_x": null, 1599 | "overflow_y": null, 1600 | "padding": null, 1601 | "right": null, 1602 | "top": null, 1603 | "visibility": null, 1604 | "width": null 1605 | } 1606 | }, 1607 | "d89b8da9708744bf92eb71c5b5ca48a5": { 1608 | "model_module": "@jupyter-widgets/controls", 1609 | "model_name": "DescriptionStyleModel", 1610 | "model_module_version": "1.5.0", 1611 | "state": { 1612 | "_model_module": "@jupyter-widgets/controls", 1613 | "_model_module_version": "1.5.0", 1614 | "_model_name": "DescriptionStyleModel", 1615 | "_view_count": null, 1616 | "_view_module": "@jupyter-widgets/base", 1617 | "_view_module_version": "1.2.0", 1618 | "_view_name": "StyleView", 1619 | "description_width": "" 1620 | } 1621 | }, 1622 | "23552beda4c94263bafbe834ed283921": { 1623 | "model_module": "@jupyter-widgets/controls", 1624 | "model_name": "LabelModel", 1625 | "model_module_version": "1.5.0", 1626 | "state": { 1627 | "_dom_classes": [], 1628 | "_model_module": "@jupyter-widgets/controls", 1629 | "_model_module_version": "1.5.0", 1630 | "_model_name": "LabelModel", 1631 | "_view_count": null, 1632 | "_view_module": "@jupyter-widgets/controls", 1633 | "_view_module_version": "1.5.0", 1634 | "_view_name": "LabelView", 1635 | "description": "", 1636 | "description_tooltip": null, 1637 | "layout": "IPY_MODEL_a59dc124d8104c0ab29574e281885dd9", 1638 | "placeholder": "​", 1639 | "style": "IPY_MODEL_e2f52d9a90764140bbfcc4c6f6c4589c", 1640 | "value": "Connecting..." 1641 | } 1642 | }, 1643 | "a59dc124d8104c0ab29574e281885dd9": { 1644 | "model_module": "@jupyter-widgets/base", 1645 | "model_name": "LayoutModel", 1646 | "model_module_version": "1.2.0", 1647 | "state": { 1648 | "_model_module": "@jupyter-widgets/base", 1649 | "_model_module_version": "1.2.0", 1650 | "_model_name": "LayoutModel", 1651 | "_view_count": null, 1652 | "_view_module": "@jupyter-widgets/base", 1653 | "_view_module_version": "1.2.0", 1654 | "_view_name": "LayoutView", 1655 | "align_content": null, 1656 | "align_items": null, 1657 | "align_self": null, 1658 | "border": null, 1659 | "bottom": null, 1660 | "display": null, 1661 | "flex": null, 1662 | "flex_flow": null, 1663 | "grid_area": null, 1664 | "grid_auto_columns": null, 1665 | "grid_auto_flow": null, 1666 | "grid_auto_rows": null, 1667 | "grid_column": null, 1668 | "grid_gap": null, 1669 | "grid_row": null, 1670 | "grid_template_areas": null, 1671 | "grid_template_columns": null, 1672 | "grid_template_rows": null, 1673 | "height": null, 1674 | "justify_content": null, 1675 | "justify_items": null, 1676 | "left": null, 1677 | "margin": null, 1678 | "max_height": null, 1679 | "max_width": null, 1680 | "min_height": null, 1681 | "min_width": null, 1682 | "object_fit": null, 1683 | "object_position": null, 1684 | "order": null, 1685 | "overflow": null, 1686 | "overflow_x": null, 1687 | "overflow_y": null, 1688 | "padding": null, 1689 | "right": null, 1690 | "top": null, 1691 | "visibility": null, 1692 | "width": null 1693 | } 1694 | }, 1695 | "e2f52d9a90764140bbfcc4c6f6c4589c": { 1696 | "model_module": "@jupyter-widgets/controls", 1697 | "model_name": "DescriptionStyleModel", 1698 | "model_module_version": "1.5.0", 1699 | "state": { 1700 | "_model_module": "@jupyter-widgets/controls", 1701 | "_model_module_version": "1.5.0", 1702 | "_model_name": "DescriptionStyleModel", 1703 | "_view_count": null, 1704 | "_view_module": "@jupyter-widgets/base", 1705 | "_view_module_version": "1.2.0", 1706 | "_view_name": "StyleView", 1707 | "description_width": "" 1708 | } 1709 | }, 1710 | "123e74f8e0a8412f9bbf8b2e918d99d8": { 1711 | "model_module": "@jupyter-widgets/controls", 1712 | "model_name": "LabelModel", 1713 | "model_module_version": "1.5.0", 1714 | "state": { 1715 | "_dom_classes": [], 1716 | "_model_module": "@jupyter-widgets/controls", 1717 | "_model_module_version": "1.5.0", 1718 | "_model_name": "LabelModel", 1719 | "_view_count": null, 1720 | "_view_module": "@jupyter-widgets/controls", 1721 | "_view_module_version": "1.5.0", 1722 | "_view_name": "LabelView", 1723 | "description": "", 1724 | "description_tooltip": null, 1725 | "layout": "IPY_MODEL_a3b62f55344340fa9f0db744ff2c8c70", 1726 | "placeholder": "​", 1727 | "style": "IPY_MODEL_e4ee7889597f4f078504df0a19976c69", 1728 | "value": "Token is valid (permission: write)." 1729 | } 1730 | }, 1731 | "523f4a3541094d38a8dbc4dd3ef9c928": { 1732 | "model_module": "@jupyter-widgets/controls", 1733 | "model_name": "LabelModel", 1734 | "model_module_version": "1.5.0", 1735 | "state": { 1736 | "_dom_classes": [], 1737 | "_model_module": "@jupyter-widgets/controls", 1738 | "_model_module_version": "1.5.0", 1739 | "_model_name": "LabelModel", 1740 | "_view_count": null, 1741 | "_view_module": "@jupyter-widgets/controls", 1742 | "_view_module_version": "1.5.0", 1743 | "_view_name": "LabelView", 1744 | "description": "", 1745 | "description_tooltip": null, 1746 | "layout": "IPY_MODEL_7a98f305582c46d890a3f599f4dc9cf3", 1747 | "placeholder": "​", 1748 | "style": "IPY_MODEL_803b81733b7e4bafa94100201436ceb9", 1749 | "value": "Your token has been saved in your configured git credential helpers (store)." 1750 | } 1751 | }, 1752 | "b20db9890b464247a9ef7e4fb1adb732": { 1753 | "model_module": "@jupyter-widgets/controls", 1754 | "model_name": "LabelModel", 1755 | "model_module_version": "1.5.0", 1756 | "state": { 1757 | "_dom_classes": [], 1758 | "_model_module": "@jupyter-widgets/controls", 1759 | "_model_module_version": "1.5.0", 1760 | "_model_name": "LabelModel", 1761 | "_view_count": null, 1762 | "_view_module": "@jupyter-widgets/controls", 1763 | "_view_module_version": "1.5.0", 1764 | "_view_name": "LabelView", 1765 | "description": "", 1766 | "description_tooltip": null, 1767 | "layout": "IPY_MODEL_bc2570f343ed47e7bd296162db9e43a7", 1768 | "placeholder": "​", 1769 | "style": "IPY_MODEL_7efc9dc702fd40469875f367a90c126a", 1770 | "value": "Your token has been saved to /root/.cache/huggingface/token" 1771 | } 1772 | }, 1773 | "b3984a95687d4b978a25fa153ec0a877": { 1774 | "model_module": "@jupyter-widgets/controls", 1775 | "model_name": "LabelModel", 1776 | "model_module_version": "1.5.0", 1777 | "state": { 1778 | "_dom_classes": [], 1779 | "_model_module": "@jupyter-widgets/controls", 1780 | "_model_module_version": "1.5.0", 1781 | "_model_name": "LabelModel", 1782 | "_view_count": null, 1783 | "_view_module": "@jupyter-widgets/controls", 1784 | "_view_module_version": "1.5.0", 1785 | "_view_name": "LabelView", 1786 | "description": "", 1787 | "description_tooltip": null, 1788 | "layout": "IPY_MODEL_985f228d52e34ac39be0fd52a2dce7c3", 1789 | "placeholder": "​", 1790 | "style": "IPY_MODEL_7879ea9286004824b34798805700d5b7", 1791 | "value": "Login successful" 1792 | } 1793 | }, 1794 | "a3b62f55344340fa9f0db744ff2c8c70": { 1795 | "model_module": "@jupyter-widgets/base", 1796 | "model_name": "LayoutModel", 1797 | "model_module_version": "1.2.0", 1798 | "state": { 1799 | "_model_module": "@jupyter-widgets/base", 1800 | "_model_module_version": "1.2.0", 1801 | "_model_name": "LayoutModel", 1802 | "_view_count": null, 1803 | "_view_module": "@jupyter-widgets/base", 1804 | "_view_module_version": "1.2.0", 1805 | "_view_name": "LayoutView", 1806 | "align_content": null, 1807 | "align_items": null, 1808 | "align_self": null, 1809 | "border": null, 1810 | "bottom": null, 1811 | "display": null, 1812 | "flex": null, 1813 | "flex_flow": null, 1814 | "grid_area": null, 1815 | "grid_auto_columns": null, 1816 | "grid_auto_flow": null, 1817 | "grid_auto_rows": null, 1818 | "grid_column": null, 1819 | "grid_gap": null, 1820 | "grid_row": null, 1821 | "grid_template_areas": null, 1822 | "grid_template_columns": null, 1823 | "grid_template_rows": null, 1824 | "height": null, 1825 | "justify_content": null, 1826 | "justify_items": null, 1827 | "left": null, 1828 | "margin": null, 1829 | "max_height": null, 1830 | "max_width": null, 1831 | "min_height": null, 1832 | "min_width": null, 1833 | "object_fit": null, 1834 | "object_position": null, 1835 | "order": null, 1836 | "overflow": null, 1837 | "overflow_x": null, 1838 | "overflow_y": null, 1839 | "padding": null, 1840 | "right": null, 1841 | "top": null, 1842 | "visibility": null, 1843 | "width": null 1844 | } 1845 | }, 1846 | "e4ee7889597f4f078504df0a19976c69": { 1847 | "model_module": "@jupyter-widgets/controls", 1848 | "model_name": "DescriptionStyleModel", 1849 | "model_module_version": "1.5.0", 1850 | "state": { 1851 | "_model_module": "@jupyter-widgets/controls", 1852 | "_model_module_version": "1.5.0", 1853 | "_model_name": "DescriptionStyleModel", 1854 | "_view_count": null, 1855 | "_view_module": "@jupyter-widgets/base", 1856 | "_view_module_version": "1.2.0", 1857 | "_view_name": "StyleView", 1858 | "description_width": "" 1859 | } 1860 | }, 1861 | "7a98f305582c46d890a3f599f4dc9cf3": { 1862 | "model_module": "@jupyter-widgets/base", 1863 | "model_name": "LayoutModel", 1864 | "model_module_version": "1.2.0", 1865 | "state": { 1866 | "_model_module": "@jupyter-widgets/base", 1867 | "_model_module_version": "1.2.0", 1868 | "_model_name": "LayoutModel", 1869 | "_view_count": null, 1870 | "_view_module": "@jupyter-widgets/base", 1871 | "_view_module_version": "1.2.0", 1872 | "_view_name": "LayoutView", 1873 | "align_content": null, 1874 | "align_items": null, 1875 | "align_self": null, 1876 | "border": null, 1877 | "bottom": null, 1878 | "display": null, 1879 | "flex": null, 1880 | "flex_flow": null, 1881 | "grid_area": null, 1882 | "grid_auto_columns": null, 1883 | "grid_auto_flow": null, 1884 | "grid_auto_rows": null, 1885 | "grid_column": null, 1886 | "grid_gap": null, 1887 | "grid_row": null, 1888 | "grid_template_areas": null, 1889 | "grid_template_columns": null, 1890 | "grid_template_rows": null, 1891 | "height": null, 1892 | "justify_content": null, 1893 | "justify_items": null, 1894 | "left": null, 1895 | "margin": null, 1896 | "max_height": null, 1897 | "max_width": null, 1898 | "min_height": null, 1899 | "min_width": null, 1900 | "object_fit": null, 1901 | "object_position": null, 1902 | "order": null, 1903 | "overflow": null, 1904 | "overflow_x": null, 1905 | "overflow_y": null, 1906 | "padding": null, 1907 | "right": null, 1908 | "top": null, 1909 | "visibility": null, 1910 | "width": null 1911 | } 1912 | }, 1913 | "803b81733b7e4bafa94100201436ceb9": { 1914 | "model_module": "@jupyter-widgets/controls", 1915 | "model_name": "DescriptionStyleModel", 1916 | "model_module_version": "1.5.0", 1917 | "state": { 1918 | "_model_module": "@jupyter-widgets/controls", 1919 | "_model_module_version": "1.5.0", 1920 | "_model_name": "DescriptionStyleModel", 1921 | "_view_count": null, 1922 | "_view_module": "@jupyter-widgets/base", 1923 | "_view_module_version": "1.2.0", 1924 | "_view_name": "StyleView", 1925 | "description_width": "" 1926 | } 1927 | }, 1928 | "bc2570f343ed47e7bd296162db9e43a7": { 1929 | "model_module": "@jupyter-widgets/base", 1930 | "model_name": "LayoutModel", 1931 | "model_module_version": "1.2.0", 1932 | "state": { 1933 | "_model_module": "@jupyter-widgets/base", 1934 | "_model_module_version": "1.2.0", 1935 | "_model_name": "LayoutModel", 1936 | "_view_count": null, 1937 | "_view_module": "@jupyter-widgets/base", 1938 | "_view_module_version": "1.2.0", 1939 | "_view_name": "LayoutView", 1940 | "align_content": null, 1941 | "align_items": null, 1942 | "align_self": null, 1943 | "border": null, 1944 | "bottom": null, 1945 | "display": null, 1946 | "flex": null, 1947 | "flex_flow": null, 1948 | "grid_area": null, 1949 | "grid_auto_columns": null, 1950 | "grid_auto_flow": null, 1951 | "grid_auto_rows": null, 1952 | "grid_column": null, 1953 | "grid_gap": null, 1954 | "grid_row": null, 1955 | "grid_template_areas": null, 1956 | "grid_template_columns": null, 1957 | "grid_template_rows": null, 1958 | "height": null, 1959 | "justify_content": null, 1960 | "justify_items": null, 1961 | "left": null, 1962 | "margin": null, 1963 | "max_height": null, 1964 | "max_width": null, 1965 | "min_height": null, 1966 | "min_width": null, 1967 | "object_fit": null, 1968 | "object_position": null, 1969 | "order": null, 1970 | "overflow": null, 1971 | "overflow_x": null, 1972 | "overflow_y": null, 1973 | "padding": null, 1974 | "right": null, 1975 | "top": null, 1976 | "visibility": null, 1977 | "width": null 1978 | } 1979 | }, 1980 | "7efc9dc702fd40469875f367a90c126a": { 1981 | "model_module": "@jupyter-widgets/controls", 1982 | "model_name": "DescriptionStyleModel", 1983 | "model_module_version": "1.5.0", 1984 | "state": { 1985 | "_model_module": "@jupyter-widgets/controls", 1986 | "_model_module_version": "1.5.0", 1987 | "_model_name": "DescriptionStyleModel", 1988 | "_view_count": null, 1989 | "_view_module": "@jupyter-widgets/base", 1990 | "_view_module_version": "1.2.0", 1991 | "_view_name": "StyleView", 1992 | "description_width": "" 1993 | } 1994 | }, 1995 | "985f228d52e34ac39be0fd52a2dce7c3": { 1996 | "model_module": "@jupyter-widgets/base", 1997 | "model_name": "LayoutModel", 1998 | "model_module_version": "1.2.0", 1999 | "state": { 2000 | "_model_module": "@jupyter-widgets/base", 2001 | "_model_module_version": "1.2.0", 2002 | "_model_name": "LayoutModel", 2003 | "_view_count": null, 2004 | "_view_module": "@jupyter-widgets/base", 2005 | "_view_module_version": "1.2.0", 2006 | "_view_name": "LayoutView", 2007 | "align_content": null, 2008 | "align_items": null, 2009 | "align_self": null, 2010 | "border": null, 2011 | "bottom": null, 2012 | "display": null, 2013 | "flex": null, 2014 | "flex_flow": null, 2015 | "grid_area": null, 2016 | "grid_auto_columns": null, 2017 | "grid_auto_flow": null, 2018 | "grid_auto_rows": null, 2019 | "grid_column": null, 2020 | "grid_gap": null, 2021 | "grid_row": null, 2022 | "grid_template_areas": null, 2023 | "grid_template_columns": null, 2024 | "grid_template_rows": null, 2025 | "height": null, 2026 | "justify_content": null, 2027 | "justify_items": null, 2028 | "left": null, 2029 | "margin": null, 2030 | "max_height": null, 2031 | "max_width": null, 2032 | "min_height": null, 2033 | "min_width": null, 2034 | "object_fit": null, 2035 | "object_position": null, 2036 | "order": null, 2037 | "overflow": null, 2038 | "overflow_x": null, 2039 | "overflow_y": null, 2040 | "padding": null, 2041 | "right": null, 2042 | "top": null, 2043 | "visibility": null, 2044 | "width": null 2045 | } 2046 | }, 2047 | "7879ea9286004824b34798805700d5b7": { 2048 | "model_module": "@jupyter-widgets/controls", 2049 | "model_name": "DescriptionStyleModel", 2050 | "model_module_version": "1.5.0", 2051 | "state": { 2052 | "_model_module": "@jupyter-widgets/controls", 2053 | "_model_module_version": "1.5.0", 2054 | "_model_name": "DescriptionStyleModel", 2055 | "_view_count": null, 2056 | "_view_module": "@jupyter-widgets/base", 2057 | "_view_module_version": "1.2.0", 2058 | "_view_name": "StyleView", 2059 | "description_width": "" 2060 | } 2061 | } 2062 | } 2063 | } 2064 | }, 2065 | "nbformat": 4, 2066 | "nbformat_minor": 0 2067 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Document-AI -------------------------------------------------------------------------------- /test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit1208/Document-AI/f096be50902e501b2ef3ad9144eea79f702907b1/test.png --------------------------------------------------------------------------------