├── README.md ├── RecurrentGemma_2b_colab.ipynb └── llama3_2_3b_colab.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # GPU Poor LLM Notebooks ⚡ 2 | 3 | A good old T4 is more potent than you think! 4 | 5 | This repository is a collection of notebooks to run <15B param LLMs using Transformers and Accelerate in `bfloat16`/ `float16` on the mighty T4. 6 | 7 | I'll update this with each new LLM release. Feel free to open an issue with feature requests. 8 | 9 | LLMs covered so far: 10 | 1. Mathstral 7B 11 | 2. Gemma 2 9B 12 | 3. CodeGemma 13 | 4. RecurrentGemma 14 | 5. Mistral Nemo 12B 15 | -------------------------------------------------------------------------------- /RecurrentGemma_2b_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU", 18 | "widgets": { 19 | "application/vnd.jupyter.widget-state+json": { 20 | "e6f2b94e3bb345859811226dc345a6e3": { 21 | "model_module": "@jupyter-widgets/controls", 22 | "model_name": "HBoxModel", 23 | "model_module_version": "1.5.0", 24 | "state": { 25 | "_dom_classes": [], 26 | "_model_module": "@jupyter-widgets/controls", 27 | "_model_module_version": "1.5.0", 28 | "_model_name": "HBoxModel", 29 | "_view_count": null, 30 | "_view_module": "@jupyter-widgets/controls", 31 | "_view_module_version": "1.5.0", 32 | "_view_name": "HBoxView", 33 | "box_style": "", 34 | "children": [ 35 | "IPY_MODEL_ce12a18f666740eabe1f71be5152e7d8", 36 | "IPY_MODEL_76a4282443194092af1039de43c523c6", 37 | "IPY_MODEL_3775878f2e484e84b6cb0971d2d35a7f" 38 | ], 39 | "layout": "IPY_MODEL_df936531829f4c1580c04719de77059c" 40 | } 41 | }, 42 | "ce12a18f666740eabe1f71be5152e7d8": { 43 | "model_module": "@jupyter-widgets/controls", 44 | "model_name": "HTMLModel", 45 | "model_module_version": "1.5.0", 46 | "state": { 47 | "_dom_classes": [], 48 | "_model_module": "@jupyter-widgets/controls", 49 | "_model_module_version": "1.5.0", 50 | "_model_name": "HTMLModel", 51 | "_view_count": null, 52 | "_view_module": "@jupyter-widgets/controls", 53 | "_view_module_version": "1.5.0", 54 | "_view_name": "HTMLView", 55 | "description": "", 56 | "description_tooltip": null, 57 | "layout": "IPY_MODEL_ab60c88077984531aa08150c72fa7ab5", 58 | "placeholder": "​", 59 | "style": "IPY_MODEL_ad9901a17d3d4b1b898941d16c16f1cb", 60 | "value": "tokenizer_config.json: 100%" 61 | } 62 | }, 63 | "76a4282443194092af1039de43c523c6": { 64 | "model_module": "@jupyter-widgets/controls", 65 | "model_name": "FloatProgressModel", 66 | "model_module_version": "1.5.0", 67 | "state": { 68 | "_dom_classes": [], 69 | "_model_module": "@jupyter-widgets/controls", 70 | "_model_module_version": "1.5.0", 71 | "_model_name": "FloatProgressModel", 72 | "_view_count": null, 73 | "_view_module": "@jupyter-widgets/controls", 74 | "_view_module_version": "1.5.0", 75 | "_view_name": "ProgressView", 76 | "bar_style": "success", 77 | "description": "", 78 | "description_tooltip": null, 79 | "layout": "IPY_MODEL_f2fbf227d64e451681083a0ca189405c", 80 | "max": 40529, 81 | "min": 0, 82 | "orientation": "horizontal", 83 | "style": "IPY_MODEL_72364dfb4d994115bfd049dae5f53423", 84 | "value": 40529 85 | } 86 | }, 87 | "3775878f2e484e84b6cb0971d2d35a7f": { 88 | "model_module": "@jupyter-widgets/controls", 89 | "model_name": "HTMLModel", 90 | "model_module_version": "1.5.0", 91 | "state": { 92 | "_dom_classes": [], 93 | "_model_module": "@jupyter-widgets/controls", 94 | "_model_module_version": "1.5.0", 95 | "_model_name": "HTMLModel", 96 | "_view_count": null, 97 | "_view_module": "@jupyter-widgets/controls", 98 | "_view_module_version": "1.5.0", 99 | "_view_name": "HTMLView", 100 | "description": "", 101 | "description_tooltip": null, 102 | "layout": "IPY_MODEL_fcbe52122eeb466d91b44011f7f9bc47", 103 | "placeholder": "​", 104 | "style": "IPY_MODEL_c0915cfb93634ede86568eface4115d8", 105 | "value": " 40.5k/40.5k [00:00<00:00, 670kB/s]" 106 | } 107 | }, 108 | "df936531829f4c1580c04719de77059c": { 109 | "model_module": "@jupyter-widgets/base", 110 | "model_name": "LayoutModel", 111 | "model_module_version": "1.2.0", 112 | "state": { 113 | "_model_module": "@jupyter-widgets/base", 114 | "_model_module_version": "1.2.0", 115 | "_model_name": "LayoutModel", 116 | "_view_count": null, 117 | "_view_module": "@jupyter-widgets/base", 118 | "_view_module_version": "1.2.0", 119 | "_view_name": "LayoutView", 120 | "align_content": null, 121 | "align_items": null, 122 | "align_self": null, 123 | "border": null, 124 | "bottom": null, 125 | "display": null, 126 | "flex": null, 127 | "flex_flow": null, 128 | "grid_area": null, 129 | "grid_auto_columns": null, 130 | "grid_auto_flow": null, 131 | "grid_auto_rows": null, 132 | "grid_column": null, 133 | "grid_gap": null, 134 | "grid_row": null, 135 | "grid_template_areas": null, 136 | "grid_template_columns": null, 137 | "grid_template_rows": null, 138 | "height": null, 139 | "justify_content": null, 140 | "justify_items": null, 141 | "left": null, 142 | "margin": null, 143 | "max_height": null, 144 | "max_width": null, 145 | "min_height": null, 146 | "min_width": null, 147 | "object_fit": null, 148 | "object_position": null, 149 | "order": null, 150 | "overflow": null, 151 | "overflow_x": null, 152 | "overflow_y": null, 153 | "padding": null, 154 | "right": null, 155 | "top": null, 156 | "visibility": null, 157 | "width": null 158 | } 159 | }, 160 | "ab60c88077984531aa08150c72fa7ab5": { 161 | "model_module": "@jupyter-widgets/base", 162 | "model_name": "LayoutModel", 163 | "model_module_version": "1.2.0", 164 | "state": { 165 | "_model_module": "@jupyter-widgets/base", 166 | "_model_module_version": "1.2.0", 167 | "_model_name": "LayoutModel", 168 | "_view_count": null, 169 | "_view_module": "@jupyter-widgets/base", 170 | "_view_module_version": "1.2.0", 171 | "_view_name": "LayoutView", 172 | "align_content": null, 173 | "align_items": null, 174 | "align_self": null, 175 | "border": null, 176 | "bottom": null, 177 | "display": null, 178 | "flex": null, 179 | "flex_flow": null, 180 | "grid_area": null, 181 | "grid_auto_columns": null, 182 | "grid_auto_flow": null, 183 | "grid_auto_rows": null, 184 | "grid_column": null, 185 | "grid_gap": null, 186 | "grid_row": null, 187 | "grid_template_areas": null, 188 | "grid_template_columns": null, 189 | "grid_template_rows": null, 190 | "height": null, 191 | "justify_content": null, 192 | "justify_items": null, 193 | "left": null, 194 | "margin": null, 195 | "max_height": null, 196 | "max_width": null, 197 | "min_height": null, 198 | "min_width": null, 199 | "object_fit": null, 200 | "object_position": null, 201 | "order": null, 202 | "overflow": null, 203 | "overflow_x": null, 204 | "overflow_y": null, 205 | "padding": null, 206 | "right": null, 207 | "top": null, 208 | "visibility": null, 209 | "width": null 210 | } 211 | }, 212 | "ad9901a17d3d4b1b898941d16c16f1cb": { 213 | "model_module": "@jupyter-widgets/controls", 214 | "model_name": "DescriptionStyleModel", 215 | "model_module_version": "1.5.0", 216 | "state": { 217 | "_model_module": "@jupyter-widgets/controls", 218 | "_model_module_version": "1.5.0", 219 | "_model_name": "DescriptionStyleModel", 220 | "_view_count": null, 221 | "_view_module": "@jupyter-widgets/base", 222 | "_view_module_version": "1.2.0", 223 | "_view_name": "StyleView", 224 | "description_width": "" 225 | } 226 | }, 227 | "f2fbf227d64e451681083a0ca189405c": { 228 | "model_module": "@jupyter-widgets/base", 229 | "model_name": "LayoutModel", 230 | "model_module_version": "1.2.0", 231 | "state": { 232 | "_model_module": "@jupyter-widgets/base", 233 | "_model_module_version": "1.2.0", 234 | "_model_name": "LayoutModel", 235 | "_view_count": null, 236 | "_view_module": "@jupyter-widgets/base", 237 | "_view_module_version": "1.2.0", 238 | "_view_name": "LayoutView", 239 | "align_content": null, 240 | "align_items": null, 241 | "align_self": null, 242 | "border": null, 243 | "bottom": null, 244 | "display": null, 245 | "flex": null, 246 | "flex_flow": null, 247 | "grid_area": null, 248 | "grid_auto_columns": null, 249 | "grid_auto_flow": null, 250 | "grid_auto_rows": null, 251 | "grid_column": null, 252 | "grid_gap": null, 253 | "grid_row": null, 254 | "grid_template_areas": null, 255 | "grid_template_columns": null, 256 | "grid_template_rows": null, 257 | "height": null, 258 | "justify_content": null, 259 | "justify_items": null, 260 | "left": null, 261 | "margin": null, 262 | "max_height": null, 263 | "max_width": null, 264 | "min_height": null, 265 | "min_width": null, 266 | "object_fit": null, 267 | "object_position": null, 268 | "order": null, 269 | "overflow": null, 270 | "overflow_x": null, 271 | "overflow_y": null, 272 | "padding": null, 273 | "right": null, 274 | "top": null, 275 | "visibility": null, 276 | "width": null 277 | } 278 | }, 279 | "72364dfb4d994115bfd049dae5f53423": { 280 | "model_module": "@jupyter-widgets/controls", 281 | "model_name": "ProgressStyleModel", 282 | "model_module_version": "1.5.0", 283 | "state": { 284 | "_model_module": "@jupyter-widgets/controls", 285 | "_model_module_version": "1.5.0", 286 | "_model_name": "ProgressStyleModel", 287 | "_view_count": null, 288 | "_view_module": "@jupyter-widgets/base", 289 | "_view_module_version": "1.2.0", 290 | "_view_name": "StyleView", 291 | "bar_color": null, 292 | "description_width": "" 293 | } 294 | }, 295 | "fcbe52122eeb466d91b44011f7f9bc47": { 296 | "model_module": "@jupyter-widgets/base", 297 | "model_name": "LayoutModel", 298 | "model_module_version": "1.2.0", 299 | "state": { 300 | "_model_module": "@jupyter-widgets/base", 301 | "_model_module_version": "1.2.0", 302 | "_model_name": "LayoutModel", 303 | "_view_count": null, 304 | "_view_module": "@jupyter-widgets/base", 305 | "_view_module_version": "1.2.0", 306 | "_view_name": "LayoutView", 307 | "align_content": null, 308 | "align_items": null, 309 | "align_self": null, 310 | "border": null, 311 | "bottom": null, 312 | "display": null, 313 | "flex": null, 314 | "flex_flow": null, 315 | "grid_area": null, 316 | "grid_auto_columns": null, 317 | "grid_auto_flow": null, 318 | "grid_auto_rows": null, 319 | "grid_column": null, 320 | "grid_gap": null, 321 | "grid_row": null, 322 | "grid_template_areas": null, 323 | "grid_template_columns": null, 324 | "grid_template_rows": null, 325 | "height": null, 326 | "justify_content": null, 327 | "justify_items": null, 328 | "left": null, 329 | "margin": null, 330 | "max_height": null, 331 | "max_width": null, 332 | "min_height": null, 333 | "min_width": null, 334 | "object_fit": null, 335 | "object_position": null, 336 | "order": null, 337 | "overflow": null, 338 | "overflow_x": null, 339 | "overflow_y": null, 340 | "padding": null, 341 | "right": null, 342 | "top": null, 343 | "visibility": null, 344 | "width": null 345 | } 346 | }, 347 | "c0915cfb93634ede86568eface4115d8": { 348 | "model_module": "@jupyter-widgets/controls", 349 | "model_name": "DescriptionStyleModel", 350 | "model_module_version": "1.5.0", 351 | "state": { 352 | "_model_module": "@jupyter-widgets/controls", 353 | "_model_module_version": "1.5.0", 354 | "_model_name": "DescriptionStyleModel", 355 | "_view_count": null, 356 | "_view_module": "@jupyter-widgets/base", 357 | "_view_module_version": "1.2.0", 358 | "_view_name": "StyleView", 359 | "description_width": "" 360 | } 361 | }, 362 | "401cecca00fb42b29f1ec3fd5cfa4396": { 363 | "model_module": "@jupyter-widgets/controls", 364 | "model_name": "HBoxModel", 365 | "model_module_version": "1.5.0", 366 | "state": { 367 | "_dom_classes": [], 368 | "_model_module": "@jupyter-widgets/controls", 369 | "_model_module_version": "1.5.0", 370 | "_model_name": "HBoxModel", 371 | "_view_count": null, 372 | "_view_module": "@jupyter-widgets/controls", 373 | "_view_module_version": "1.5.0", 374 | "_view_name": "HBoxView", 375 | "box_style": "", 376 | "children": [ 377 | "IPY_MODEL_e98c3904a7e346a5870c5ac768cd6a98", 378 | "IPY_MODEL_d908df5a5a8945dc88f8d0f147245bbd", 379 | "IPY_MODEL_f1a29b9608244a9db3cc919ad149ef48" 380 | ], 381 | "layout": "IPY_MODEL_d19c6905802d46c4beb6fe8886cb6e8c" 382 | } 383 | }, 384 | "e98c3904a7e346a5870c5ac768cd6a98": { 385 | "model_module": "@jupyter-widgets/controls", 386 | "model_name": "HTMLModel", 387 | "model_module_version": "1.5.0", 388 | "state": { 389 | "_dom_classes": [], 390 | "_model_module": "@jupyter-widgets/controls", 391 | "_model_module_version": "1.5.0", 392 | "_model_name": "HTMLModel", 393 | "_view_count": null, 394 | "_view_module": "@jupyter-widgets/controls", 395 | "_view_module_version": "1.5.0", 396 | "_view_name": "HTMLView", 397 | "description": "", 398 | "description_tooltip": null, 399 | "layout": "IPY_MODEL_f7d7bc20d2ba40eeb576e8865cdbb8ec", 400 | "placeholder": "​", 401 | "style": "IPY_MODEL_1a431c4a814941169c8feff1b4741052", 402 | "value": "Downloading shards: 100%" 403 | } 404 | }, 405 | "d908df5a5a8945dc88f8d0f147245bbd": { 406 | "model_module": "@jupyter-widgets/controls", 407 | "model_name": "FloatProgressModel", 408 | "model_module_version": "1.5.0", 409 | "state": { 410 | "_dom_classes": [], 411 | "_model_module": "@jupyter-widgets/controls", 412 | "_model_module_version": "1.5.0", 413 | "_model_name": "FloatProgressModel", 414 | "_view_count": null, 415 | "_view_module": "@jupyter-widgets/controls", 416 | "_view_module_version": "1.5.0", 417 | "_view_name": "ProgressView", 418 | "bar_style": "success", 419 | "description": "", 420 | "description_tooltip": null, 421 | "layout": "IPY_MODEL_21a66f11f21e4913a1a5a975727916f0", 422 | "max": 2, 423 | "min": 0, 424 | "orientation": "horizontal", 425 | "style": "IPY_MODEL_b931a5080c154b7dbbdcebf1a48aa9a3", 426 | "value": 2 427 | } 428 | }, 429 | "f1a29b9608244a9db3cc919ad149ef48": { 430 | "model_module": "@jupyter-widgets/controls", 431 | "model_name": "HTMLModel", 432 | "model_module_version": "1.5.0", 433 | "state": { 434 | "_dom_classes": [], 435 | "_model_module": "@jupyter-widgets/controls", 436 | "_model_module_version": "1.5.0", 437 | "_model_name": "HTMLModel", 438 | "_view_count": null, 439 | "_view_module": "@jupyter-widgets/controls", 440 | "_view_module_version": "1.5.0", 441 | "_view_name": "HTMLView", 442 | "description": "", 443 | "description_tooltip": null, 444 | "layout": "IPY_MODEL_6cc6f7e129fc47c9ac57d38f713c50ea", 445 | "placeholder": "​", 446 | "style": "IPY_MODEL_896598f7441c4e84b4c7963b520d6daf", 447 | "value": " 2/2 [00:00<00:00,  7.70it/s]" 448 | } 449 | }, 450 | "d19c6905802d46c4beb6fe8886cb6e8c": { 451 | "model_module": "@jupyter-widgets/base", 452 | "model_name": "LayoutModel", 453 | "model_module_version": "1.2.0", 454 | "state": { 455 | "_model_module": "@jupyter-widgets/base", 456 | "_model_module_version": "1.2.0", 457 | "_model_name": "LayoutModel", 458 | "_view_count": null, 459 | "_view_module": "@jupyter-widgets/base", 460 | "_view_module_version": "1.2.0", 461 | "_view_name": "LayoutView", 462 | "align_content": null, 463 | "align_items": null, 464 | "align_self": null, 465 | "border": null, 466 | "bottom": null, 467 | "display": null, 468 | "flex": null, 469 | "flex_flow": null, 470 | "grid_area": null, 471 | "grid_auto_columns": null, 472 | "grid_auto_flow": null, 473 | "grid_auto_rows": null, 474 | "grid_column": null, 475 | "grid_gap": null, 476 | "grid_row": null, 477 | "grid_template_areas": null, 478 | "grid_template_columns": null, 479 | "grid_template_rows": null, 480 | "height": null, 481 | "justify_content": null, 482 | "justify_items": null, 483 | "left": null, 484 | "margin": null, 485 | "max_height": null, 486 | "max_width": null, 487 | "min_height": null, 488 | "min_width": null, 489 | "object_fit": null, 490 | "object_position": null, 491 | "order": null, 492 | "overflow": null, 493 | "overflow_x": null, 494 | "overflow_y": null, 495 | "padding": null, 496 | "right": null, 497 | "top": null, 498 | "visibility": null, 499 | "width": null 500 | } 501 | }, 502 | "f7d7bc20d2ba40eeb576e8865cdbb8ec": { 503 | "model_module": "@jupyter-widgets/base", 504 | "model_name": "LayoutModel", 505 | "model_module_version": "1.2.0", 506 | "state": { 507 | "_model_module": "@jupyter-widgets/base", 508 | "_model_module_version": "1.2.0", 509 | "_model_name": "LayoutModel", 510 | "_view_count": null, 511 | "_view_module": "@jupyter-widgets/base", 512 | "_view_module_version": "1.2.0", 513 | "_view_name": "LayoutView", 514 | "align_content": null, 515 | "align_items": null, 516 | "align_self": null, 517 | "border": null, 518 | "bottom": null, 519 | "display": null, 520 | "flex": null, 521 | "flex_flow": null, 522 | "grid_area": null, 523 | "grid_auto_columns": null, 524 | "grid_auto_flow": null, 525 | "grid_auto_rows": null, 526 | "grid_column": null, 527 | "grid_gap": null, 528 | "grid_row": null, 529 | "grid_template_areas": null, 530 | "grid_template_columns": null, 531 | "grid_template_rows": null, 532 | "height": null, 533 | "justify_content": null, 534 | "justify_items": null, 535 | "left": null, 536 | "margin": null, 537 | "max_height": null, 538 | "max_width": null, 539 | "min_height": null, 540 | "min_width": null, 541 | "object_fit": null, 542 | "object_position": null, 543 | "order": null, 544 | "overflow": null, 545 | "overflow_x": null, 546 | "overflow_y": null, 547 | "padding": null, 548 | "right": null, 549 | "top": null, 550 | "visibility": null, 551 | "width": null 552 | } 553 | }, 554 | "1a431c4a814941169c8feff1b4741052": { 555 | "model_module": "@jupyter-widgets/controls", 556 | "model_name": "DescriptionStyleModel", 557 | "model_module_version": "1.5.0", 558 | "state": { 559 | "_model_module": "@jupyter-widgets/controls", 560 | "_model_module_version": "1.5.0", 561 | "_model_name": "DescriptionStyleModel", 562 | "_view_count": null, 563 | "_view_module": "@jupyter-widgets/base", 564 | "_view_module_version": "1.2.0", 565 | "_view_name": "StyleView", 566 | "description_width": "" 567 | } 568 | }, 569 | "21a66f11f21e4913a1a5a975727916f0": { 570 | "model_module": "@jupyter-widgets/base", 571 | "model_name": "LayoutModel", 572 | "model_module_version": "1.2.0", 573 | "state": { 574 | "_model_module": "@jupyter-widgets/base", 575 | "_model_module_version": "1.2.0", 576 | "_model_name": "LayoutModel", 577 | "_view_count": null, 578 | "_view_module": "@jupyter-widgets/base", 579 | "_view_module_version": "1.2.0", 580 | "_view_name": "LayoutView", 581 | "align_content": null, 582 | "align_items": null, 583 | "align_self": null, 584 | "border": null, 585 | "bottom": null, 586 | "display": null, 587 | "flex": null, 588 | "flex_flow": null, 589 | "grid_area": null, 590 | "grid_auto_columns": null, 591 | "grid_auto_flow": null, 592 | "grid_auto_rows": null, 593 | "grid_column": null, 594 | "grid_gap": null, 595 | "grid_row": null, 596 | "grid_template_areas": null, 597 | "grid_template_columns": null, 598 | "grid_template_rows": null, 599 | "height": null, 600 | "justify_content": null, 601 | "justify_items": null, 602 | "left": null, 603 | "margin": null, 604 | "max_height": null, 605 | "max_width": null, 606 | "min_height": null, 607 | "min_width": null, 608 | "object_fit": null, 609 | "object_position": null, 610 | "order": null, 611 | "overflow": null, 612 | "overflow_x": null, 613 | "overflow_y": null, 614 | "padding": null, 615 | "right": null, 616 | "top": null, 617 | "visibility": null, 618 | "width": null 619 | } 620 | }, 621 | "b931a5080c154b7dbbdcebf1a48aa9a3": { 622 | "model_module": "@jupyter-widgets/controls", 623 | "model_name": "ProgressStyleModel", 624 | "model_module_version": "1.5.0", 625 | "state": { 626 | "_model_module": "@jupyter-widgets/controls", 627 | "_model_module_version": "1.5.0", 628 | "_model_name": "ProgressStyleModel", 629 | "_view_count": null, 630 | "_view_module": "@jupyter-widgets/base", 631 | "_view_module_version": "1.2.0", 632 | "_view_name": "StyleView", 633 | "bar_color": null, 634 | "description_width": "" 635 | } 636 | }, 637 | "6cc6f7e129fc47c9ac57d38f713c50ea": { 638 | "model_module": "@jupyter-widgets/base", 639 | "model_name": "LayoutModel", 640 | "model_module_version": "1.2.0", 641 | "state": { 642 | "_model_module": "@jupyter-widgets/base", 643 | "_model_module_version": "1.2.0", 644 | "_model_name": "LayoutModel", 645 | "_view_count": null, 646 | "_view_module": "@jupyter-widgets/base", 647 | "_view_module_version": "1.2.0", 648 | "_view_name": "LayoutView", 649 | "align_content": null, 650 | "align_items": null, 651 | "align_self": null, 652 | "border": null, 653 | "bottom": null, 654 | "display": null, 655 | "flex": null, 656 | "flex_flow": null, 657 | "grid_area": null, 658 | "grid_auto_columns": null, 659 | "grid_auto_flow": null, 660 | "grid_auto_rows": null, 661 | "grid_column": null, 662 | "grid_gap": null, 663 | "grid_row": null, 664 | "grid_template_areas": null, 665 | "grid_template_columns": null, 666 | "grid_template_rows": null, 667 | "height": null, 668 | "justify_content": null, 669 | "justify_items": null, 670 | "left": null, 671 | "margin": null, 672 | "max_height": null, 673 | "max_width": null, 674 | "min_height": null, 675 | "min_width": null, 676 | "object_fit": null, 677 | "object_position": null, 678 | "order": null, 679 | "overflow": null, 680 | "overflow_x": null, 681 | "overflow_y": null, 682 | "padding": null, 683 | "right": null, 684 | "top": null, 685 | "visibility": null, 686 | "width": null 687 | } 688 | }, 689 | "896598f7441c4e84b4c7963b520d6daf": { 690 | "model_module": "@jupyter-widgets/controls", 691 | "model_name": "DescriptionStyleModel", 692 | "model_module_version": "1.5.0", 693 | "state": { 694 | "_model_module": "@jupyter-widgets/controls", 695 | "_model_module_version": "1.5.0", 696 | "_model_name": "DescriptionStyleModel", 697 | "_view_count": null, 698 | "_view_module": "@jupyter-widgets/base", 699 | "_view_module_version": "1.2.0", 700 | "_view_name": "StyleView", 701 | "description_width": "" 702 | } 703 | }, 704 | "b391b63b204848009b051b9c9a5062a3": { 705 | "model_module": "@jupyter-widgets/controls", 706 | "model_name": "HBoxModel", 707 | "model_module_version": "1.5.0", 708 | "state": { 709 | "_dom_classes": [], 710 | "_model_module": "@jupyter-widgets/controls", 711 | "_model_module_version": "1.5.0", 712 | "_model_name": "HBoxModel", 713 | "_view_count": null, 714 | "_view_module": "@jupyter-widgets/controls", 715 | "_view_module_version": "1.5.0", 716 | "_view_name": "HBoxView", 717 | "box_style": "", 718 | "children": [ 719 | "IPY_MODEL_0eee8063610d46139d7576ef02ddc228", 720 | "IPY_MODEL_5d3bd17d0aa44d84a91d3ac8255dc296", 721 | "IPY_MODEL_84981495b59f46009bde2cdbec478a5f" 722 | ], 723 | "layout": "IPY_MODEL_f51d9c821e3b4f558ad76706f99d76a6" 724 | } 725 | }, 726 | "0eee8063610d46139d7576ef02ddc228": { 727 | "model_module": "@jupyter-widgets/controls", 728 | "model_name": "HTMLModel", 729 | "model_module_version": "1.5.0", 730 | "state": { 731 | "_dom_classes": [], 732 | "_model_module": "@jupyter-widgets/controls", 733 | "_model_module_version": "1.5.0", 734 | "_model_name": "HTMLModel", 735 | "_view_count": null, 736 | "_view_module": "@jupyter-widgets/controls", 737 | "_view_module_version": "1.5.0", 738 | "_view_name": "HTMLView", 739 | "description": "", 740 | "description_tooltip": null, 741 | "layout": "IPY_MODEL_07e7af89197e489b877e309189e6ea53", 742 | "placeholder": "​", 743 | "style": "IPY_MODEL_66e1d6e6e45146a7a56d3e935f56ad51", 744 | "value": "Loading checkpoint shards: 100%" 745 | } 746 | }, 747 | "5d3bd17d0aa44d84a91d3ac8255dc296": { 748 | "model_module": "@jupyter-widgets/controls", 749 | "model_name": "FloatProgressModel", 750 | "model_module_version": "1.5.0", 751 | "state": { 752 | "_dom_classes": [], 753 | "_model_module": "@jupyter-widgets/controls", 754 | "_model_module_version": "1.5.0", 755 | "_model_name": "FloatProgressModel", 756 | "_view_count": null, 757 | "_view_module": "@jupyter-widgets/controls", 758 | "_view_module_version": "1.5.0", 759 | "_view_name": "ProgressView", 760 | "bar_style": "success", 761 | "description": "", 762 | "description_tooltip": null, 763 | "layout": "IPY_MODEL_85b6c5d6fdc745d4a533004de3c97408", 764 | "max": 2, 765 | "min": 0, 766 | "orientation": "horizontal", 767 | "style": "IPY_MODEL_9f5ddcf6583246af9ec1ebe7f23446d6", 768 | "value": 2 769 | } 770 | }, 771 | "84981495b59f46009bde2cdbec478a5f": { 772 | "model_module": "@jupyter-widgets/controls", 773 | "model_name": "HTMLModel", 774 | "model_module_version": "1.5.0", 775 | "state": { 776 | "_dom_classes": [], 777 | "_model_module": "@jupyter-widgets/controls", 778 | "_model_module_version": "1.5.0", 779 | "_model_name": "HTMLModel", 780 | "_view_count": null, 781 | "_view_module": "@jupyter-widgets/controls", 782 | "_view_module_version": "1.5.0", 783 | "_view_name": "HTMLView", 784 | "description": "", 785 | "description_tooltip": null, 786 | "layout": "IPY_MODEL_58b61a060918476c82be882ed6d5cc10", 787 | "placeholder": "​", 788 | "style": "IPY_MODEL_7a2fce54921c4062a739fb690387f156", 789 | "value": " 2/2 [00:19<00:00,  8.21s/it]" 790 | } 791 | }, 792 | "f51d9c821e3b4f558ad76706f99d76a6": { 793 | "model_module": "@jupyter-widgets/base", 794 | "model_name": "LayoutModel", 795 | "model_module_version": "1.2.0", 796 | "state": { 797 | "_model_module": "@jupyter-widgets/base", 798 | "_model_module_version": "1.2.0", 799 | "_model_name": "LayoutModel", 800 | "_view_count": null, 801 | "_view_module": "@jupyter-widgets/base", 802 | "_view_module_version": "1.2.0", 803 | "_view_name": "LayoutView", 804 | "align_content": null, 805 | "align_items": null, 806 | "align_self": null, 807 | "border": null, 808 | "bottom": null, 809 | "display": null, 810 | "flex": null, 811 | "flex_flow": null, 812 | "grid_area": null, 813 | "grid_auto_columns": null, 814 | "grid_auto_flow": null, 815 | "grid_auto_rows": null, 816 | "grid_column": null, 817 | "grid_gap": null, 818 | "grid_row": null, 819 | "grid_template_areas": null, 820 | "grid_template_columns": null, 821 | "grid_template_rows": null, 822 | "height": null, 823 | "justify_content": null, 824 | "justify_items": null, 825 | "left": null, 826 | "margin": null, 827 | "max_height": null, 828 | "max_width": null, 829 | "min_height": null, 830 | "min_width": null, 831 | "object_fit": null, 832 | "object_position": null, 833 | "order": null, 834 | "overflow": null, 835 | "overflow_x": null, 836 | "overflow_y": null, 837 | "padding": null, 838 | "right": null, 839 | "top": null, 840 | "visibility": null, 841 | "width": null 842 | } 843 | }, 844 | "07e7af89197e489b877e309189e6ea53": { 845 | "model_module": "@jupyter-widgets/base", 846 | "model_name": "LayoutModel", 847 | "model_module_version": "1.2.0", 848 | "state": { 849 | "_model_module": "@jupyter-widgets/base", 850 | "_model_module_version": "1.2.0", 851 | "_model_name": "LayoutModel", 852 | "_view_count": null, 853 | "_view_module": "@jupyter-widgets/base", 854 | "_view_module_version": "1.2.0", 855 | "_view_name": "LayoutView", 856 | "align_content": null, 857 | "align_items": null, 858 | "align_self": null, 859 | "border": null, 860 | "bottom": null, 861 | "display": null, 862 | "flex": null, 863 | "flex_flow": null, 864 | "grid_area": null, 865 | "grid_auto_columns": null, 866 | "grid_auto_flow": null, 867 | "grid_auto_rows": null, 868 | "grid_column": null, 869 | "grid_gap": null, 870 | "grid_row": null, 871 | "grid_template_areas": null, 872 | "grid_template_columns": null, 873 | "grid_template_rows": null, 874 | "height": null, 875 | "justify_content": null, 876 | "justify_items": null, 877 | "left": null, 878 | "margin": null, 879 | "max_height": null, 880 | "max_width": null, 881 | "min_height": null, 882 | "min_width": null, 883 | "object_fit": null, 884 | "object_position": null, 885 | "order": null, 886 | "overflow": null, 887 | "overflow_x": null, 888 | "overflow_y": null, 889 | "padding": null, 890 | "right": null, 891 | "top": null, 892 | "visibility": null, 893 | "width": null 894 | } 895 | }, 896 | "66e1d6e6e45146a7a56d3e935f56ad51": { 897 | "model_module": "@jupyter-widgets/controls", 898 | "model_name": "DescriptionStyleModel", 899 | "model_module_version": "1.5.0", 900 | "state": { 901 | "_model_module": "@jupyter-widgets/controls", 902 | "_model_module_version": "1.5.0", 903 | "_model_name": "DescriptionStyleModel", 904 | "_view_count": null, 905 | "_view_module": "@jupyter-widgets/base", 906 | "_view_module_version": "1.2.0", 907 | "_view_name": "StyleView", 908 | "description_width": "" 909 | } 910 | }, 911 | "85b6c5d6fdc745d4a533004de3c97408": { 912 | "model_module": "@jupyter-widgets/base", 913 | "model_name": "LayoutModel", 914 | "model_module_version": "1.2.0", 915 | "state": { 916 | "_model_module": "@jupyter-widgets/base", 917 | "_model_module_version": "1.2.0", 918 | "_model_name": "LayoutModel", 919 | "_view_count": null, 920 | "_view_module": "@jupyter-widgets/base", 921 | "_view_module_version": "1.2.0", 922 | "_view_name": "LayoutView", 923 | "align_content": null, 924 | "align_items": null, 925 | "align_self": null, 926 | "border": null, 927 | "bottom": null, 928 | "display": null, 929 | "flex": null, 930 | "flex_flow": null, 931 | "grid_area": null, 932 | "grid_auto_columns": null, 933 | "grid_auto_flow": null, 934 | "grid_auto_rows": null, 935 | "grid_column": null, 936 | "grid_gap": null, 937 | "grid_row": null, 938 | "grid_template_areas": null, 939 | "grid_template_columns": null, 940 | "grid_template_rows": null, 941 | "height": null, 942 | "justify_content": null, 943 | "justify_items": null, 944 | "left": null, 945 | "margin": null, 946 | "max_height": null, 947 | "max_width": null, 948 | "min_height": null, 949 | "min_width": null, 950 | "object_fit": null, 951 | "object_position": null, 952 | "order": null, 953 | "overflow": null, 954 | "overflow_x": null, 955 | "overflow_y": null, 956 | "padding": null, 957 | "right": null, 958 | "top": null, 959 | "visibility": null, 960 | "width": null 961 | } 962 | }, 963 | "9f5ddcf6583246af9ec1ebe7f23446d6": { 964 | "model_module": "@jupyter-widgets/controls", 965 | "model_name": "ProgressStyleModel", 966 | "model_module_version": "1.5.0", 967 | "state": { 968 | "_model_module": "@jupyter-widgets/controls", 969 | "_model_module_version": "1.5.0", 970 | "_model_name": "ProgressStyleModel", 971 | "_view_count": null, 972 | "_view_module": "@jupyter-widgets/base", 973 | "_view_module_version": "1.2.0", 974 | "_view_name": "StyleView", 975 | "bar_color": null, 976 | "description_width": "" 977 | } 978 | }, 979 | "58b61a060918476c82be882ed6d5cc10": { 980 | "model_module": "@jupyter-widgets/base", 981 | "model_name": "LayoutModel", 982 | "model_module_version": "1.2.0", 983 | "state": { 984 | "_model_module": "@jupyter-widgets/base", 985 | "_model_module_version": "1.2.0", 986 | "_model_name": "LayoutModel", 987 | "_view_count": null, 988 | "_view_module": "@jupyter-widgets/base", 989 | "_view_module_version": "1.2.0", 990 | "_view_name": "LayoutView", 991 | "align_content": null, 992 | "align_items": null, 993 | "align_self": null, 994 | "border": null, 995 | "bottom": null, 996 | "display": null, 997 | "flex": null, 998 | "flex_flow": null, 999 | "grid_area": null, 1000 | "grid_auto_columns": null, 1001 | "grid_auto_flow": null, 1002 | "grid_auto_rows": null, 1003 | "grid_column": null, 1004 | "grid_gap": null, 1005 | "grid_row": null, 1006 | "grid_template_areas": null, 1007 | "grid_template_columns": null, 1008 | "grid_template_rows": null, 1009 | "height": null, 1010 | "justify_content": null, 1011 | "justify_items": null, 1012 | "left": null, 1013 | "margin": null, 1014 | "max_height": null, 1015 | "max_width": null, 1016 | "min_height": null, 1017 | "min_width": null, 1018 | "object_fit": null, 1019 | "object_position": null, 1020 | "order": null, 1021 | "overflow": null, 1022 | "overflow_x": null, 1023 | "overflow_y": null, 1024 | "padding": null, 1025 | "right": null, 1026 | "top": null, 1027 | "visibility": null, 1028 | "width": null 1029 | } 1030 | }, 1031 | "7a2fce54921c4062a739fb690387f156": { 1032 | "model_module": "@jupyter-widgets/controls", 1033 | "model_name": "DescriptionStyleModel", 1034 | "model_module_version": "1.5.0", 1035 | "state": { 1036 | "_model_module": "@jupyter-widgets/controls", 1037 | "_model_module_version": "1.5.0", 1038 | "_model_name": "DescriptionStyleModel", 1039 | "_view_count": null, 1040 | "_view_module": "@jupyter-widgets/base", 1041 | "_view_module_version": "1.2.0", 1042 | "_view_name": "StyleView", 1043 | "description_width": "" 1044 | } 1045 | } 1046 | } 1047 | } 1048 | }, 1049 | "cells": [ 1050 | { 1051 | "cell_type": "markdown", 1052 | "metadata": { 1053 | "id": "view-in-github", 1054 | "colab_type": "text" 1055 | }, 1056 | "source": [ 1057 | "\"Open" 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "markdown", 1062 | "source": [ 1063 | "# RecurrentGemma - 2B & 2B-it\n", 1064 | "\n", 1065 | "RecurrentGemma is a family of open language models built on a novel recurrent architecture developed at Google. Both pre-trained (2B) and instruction-tuned (2B-it) versions are available in English.\n", 1066 | "\n", 1067 | "Like Gemma, [RecurrentGemma](https://huggingface.co/google/recurrentgemma-2b-it) models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Because of its novel architecture, RecurrentGemma requires less memory than Gemma and achieves faster inference when generating long sequences." 1068 | ], 1069 | "metadata": { 1070 | "id": "MVkIfH6Cg7Fx" 1071 | } 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": null, 1076 | "metadata": { 1077 | "colab": { 1078 | "base_uri": "https://localhost:8080/" 1079 | }, 1080 | "id": "ahVaTC6rEIVI", 1081 | "outputId": "2036392c-b381-4ca0-80ba-16ba8c87cde3" 1082 | }, 1083 | "outputs": [ 1084 | { 1085 | "output_type": "stream", 1086 | "name": "stdout", 1087 | "text": [ 1088 | "Collecting transformers==4.40.0.dev0\n", 1089 | " Downloading https://huggingface.co/datasets/reach-vb/random-wheels/resolve/main/transformers-4.40.0.dev0-py3-none-any.whl (8.8 MB)\n", 1090 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 1091 | "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n", 1092 | "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n", 1093 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n", 1094 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n", 1095 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n", 1096 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n", 1097 | "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n", 1098 | "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n", 1099 | "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n", 1100 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n", 1101 | "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n", 1102 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n", 1103 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n", 1104 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n", 1105 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n", 1106 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n", 1107 | "Installing collected packages: transformers\n", 1108 | " Attempting uninstall: transformers\n", 1109 | " Found existing installation: transformers 4.38.2\n", 1110 | " Uninstalling transformers-4.38.2:\n", 1111 | " Successfully uninstalled transformers-4.38.2\n", 1112 | "Successfully installed transformers-4.40.0.dev0\n" 1113 | ] 1114 | } 1115 | ], 1116 | "source": [ 1117 | "!pip install git+https://github.com/huggingface/transformers.git" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "markdown", 1122 | "source": [ 1123 | "## Load the model checkpoints\n", 1124 | "\n", 1125 | "Make sure to accept the terms and conditions for the model before running the code further here: https://huggingface.co/google/recurrentgemma-2b-it.\n" 1126 | ], 1127 | "metadata": { 1128 | "id": "FZK4T_zHhL9Q" 1129 | } 1130 | }, 1131 | { 1132 | "cell_type": "code", 1133 | "source": [ 1134 | "import torch\n", 1135 | "from transformers import AutoTokenizer, AutoModelForCausalLM\n", 1136 | "\n", 1137 | "tokenizer = AutoTokenizer.from_pretrained(\"google/recurrentgemma-2b-it\")\n", 1138 | "model = AutoModelForCausalLM.from_pretrained(\"google/recurrentgemma-2b-it\", torch_dtype=torch.float16).to(\"cuda:0\")" 1139 | ], 1140 | "metadata": { 1141 | "colab": { 1142 | "base_uri": "https://localhost:8080/", 1143 | "height": 129, 1144 | "referenced_widgets": [ 1145 | "e6f2b94e3bb345859811226dc345a6e3", 1146 | "ce12a18f666740eabe1f71be5152e7d8", 1147 | "76a4282443194092af1039de43c523c6", 1148 | "3775878f2e484e84b6cb0971d2d35a7f", 1149 | "df936531829f4c1580c04719de77059c", 1150 | "ab60c88077984531aa08150c72fa7ab5", 1151 | "ad9901a17d3d4b1b898941d16c16f1cb", 1152 | "f2fbf227d64e451681083a0ca189405c", 1153 | "72364dfb4d994115bfd049dae5f53423", 1154 | "fcbe52122eeb466d91b44011f7f9bc47", 1155 | "c0915cfb93634ede86568eface4115d8", 1156 | "401cecca00fb42b29f1ec3fd5cfa4396", 1157 | "e98c3904a7e346a5870c5ac768cd6a98", 1158 | "d908df5a5a8945dc88f8d0f147245bbd", 1159 | "f1a29b9608244a9db3cc919ad149ef48", 1160 | "d19c6905802d46c4beb6fe8886cb6e8c", 1161 | "f7d7bc20d2ba40eeb576e8865cdbb8ec", 1162 | "1a431c4a814941169c8feff1b4741052", 1163 | "21a66f11f21e4913a1a5a975727916f0", 1164 | "b931a5080c154b7dbbdcebf1a48aa9a3", 1165 | "6cc6f7e129fc47c9ac57d38f713c50ea", 1166 | "896598f7441c4e84b4c7963b520d6daf", 1167 | "b391b63b204848009b051b9c9a5062a3", 1168 | "0eee8063610d46139d7576ef02ddc228", 1169 | "5d3bd17d0aa44d84a91d3ac8255dc296", 1170 | "84981495b59f46009bde2cdbec478a5f", 1171 | "f51d9c821e3b4f558ad76706f99d76a6", 1172 | "07e7af89197e489b877e309189e6ea53", 1173 | "66e1d6e6e45146a7a56d3e935f56ad51", 1174 | "85b6c5d6fdc745d4a533004de3c97408", 1175 | "9f5ddcf6583246af9ec1ebe7f23446d6", 1176 | "58b61a060918476c82be882ed6d5cc10", 1177 | "7a2fce54921c4062a739fb690387f156" 1178 | ] 1179 | }, 1180 | "id": "XItA_HZ-EPIR", 1181 | "outputId": "22b1edbc-c6d7-4ad0-b992-0f59682a30ce" 1182 | }, 1183 | "execution_count": null, 1184 | "outputs": [ 1185 | { 1186 | "output_type": "display_data", 1187 | "data": { 1188 | "text/plain": [ 1189 | "tokenizer_config.json: 0%| | 0.00/40.5k [00:00user\\nWrite a hello world program\\nmodel\\n```python\\nprint(\"Hello, world!\")\\n```\\n\\nThis program will print the message \"Hello, world!\" to the console.\\n\\n**Explanation:**\\n\\n* `print()` is a built-in Python function that prints the given argument to the console.\\n* `\"Hello, world!\"` is the string that will be printed.\\n\\n**Output:**\\n\\n```\\nHello, world!\\n```']\n" 1305 | ] 1306 | } 1307 | ] 1308 | }, 1309 | { 1310 | "cell_type": "markdown", 1311 | "source": [ 1312 | "Enjoy! There's much more you can do to maximise the output of your generation. Check out this guide: https://huggingface.co/docs/transformers/generation_strategies" 1313 | ], 1314 | "metadata": { 1315 | "id": "rkpXJ5sHwmMH" 1316 | } 1317 | } 1318 | ] 1319 | } -------------------------------------------------------------------------------- /llama3_2_3b_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "authorship_tag": "ABX9TyOes6wtI/0rnFu0CghstjWU", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | }, 18 | "accelerator": "GPU", 19 | "widgets": { 20 | "application/vnd.jupyter.widget-state+json": { 21 | "0b10add730774dcbb03cf0834fd0b724": { 22 | "model_module": "@jupyter-widgets/controls", 23 | "model_name": "HBoxModel", 24 | "model_module_version": "1.5.0", 25 | "state": { 26 | "_dom_classes": [], 27 | "_model_module": "@jupyter-widgets/controls", 28 | "_model_module_version": "1.5.0", 29 | "_model_name": "HBoxModel", 30 | "_view_count": null, 31 | "_view_module": "@jupyter-widgets/controls", 32 | "_view_module_version": "1.5.0", 33 | "_view_name": "HBoxView", 34 | "box_style": "", 35 | "children": [ 36 | "IPY_MODEL_a74dd475cb0848c0bf343bb772ddac6a", 37 | "IPY_MODEL_3f3bf676a4dc4673a1acca0ef4616033", 38 | "IPY_MODEL_f7204a8184db44cb8bbb3d3e12478a0f" 39 | ], 40 | "layout": "IPY_MODEL_e1048e107534477897425885d08094a9" 41 | } 42 | }, 43 | "a74dd475cb0848c0bf343bb772ddac6a": { 44 | "model_module": "@jupyter-widgets/controls", 45 | "model_name": "HTMLModel", 46 | "model_module_version": "1.5.0", 47 | "state": { 48 | "_dom_classes": [], 49 | "_model_module": "@jupyter-widgets/controls", 50 | "_model_module_version": "1.5.0", 51 | "_model_name": "HTMLModel", 52 | "_view_count": null, 53 | "_view_module": "@jupyter-widgets/controls", 54 | "_view_module_version": "1.5.0", 55 | "_view_name": "HTMLView", 56 | "description": "", 57 | "description_tooltip": null, 58 | "layout": "IPY_MODEL_2fe67aae4b854a2c814cca55840b81c2", 59 | "placeholder": "​", 60 | "style": "IPY_MODEL_d8ae9d0d1d254353ba82b1574d7fe1dd", 61 | "value": "Downloading shards: 100%" 62 | } 63 | }, 64 | "3f3bf676a4dc4673a1acca0ef4616033": { 65 | "model_module": "@jupyter-widgets/controls", 66 | "model_name": "FloatProgressModel", 67 | "model_module_version": "1.5.0", 68 | "state": { 69 | "_dom_classes": [], 70 | "_model_module": "@jupyter-widgets/controls", 71 | "_model_module_version": "1.5.0", 72 | "_model_name": "FloatProgressModel", 73 | "_view_count": null, 74 | "_view_module": "@jupyter-widgets/controls", 75 | "_view_module_version": "1.5.0", 76 | "_view_name": "ProgressView", 77 | "bar_style": "success", 78 | "description": "", 79 | "description_tooltip": null, 80 | "layout": "IPY_MODEL_f7add879d74a48ae9ffc18c8b7694b4a", 81 | "max": 2, 82 | "min": 0, 83 | "orientation": "horizontal", 84 | "style": "IPY_MODEL_11655f24216f46b9a07e4944f70cede1", 85 | "value": 2 86 | } 87 | }, 88 | "f7204a8184db44cb8bbb3d3e12478a0f": { 89 | "model_module": "@jupyter-widgets/controls", 90 | "model_name": "HTMLModel", 91 | "model_module_version": "1.5.0", 92 | "state": { 93 | "_dom_classes": [], 94 | "_model_module": "@jupyter-widgets/controls", 95 | "_model_module_version": "1.5.0", 96 | "_model_name": "HTMLModel", 97 | "_view_count": null, 98 | "_view_module": "@jupyter-widgets/controls", 99 | "_view_module_version": "1.5.0", 100 | "_view_name": "HTMLView", 101 | "description": "", 102 | "description_tooltip": null, 103 | "layout": "IPY_MODEL_1d0264c3d2704b6bbe51583c4b4a3b96", 104 | "placeholder": "​", 105 | "style": "IPY_MODEL_7ec800ba8b1a4fbeae940b7aab631794", 106 | "value": " 2/2 [01:55<00:00, 58.51s/it]" 107 | } 108 | }, 109 | "e1048e107534477897425885d08094a9": { 110 | "model_module": "@jupyter-widgets/base", 111 | "model_name": "LayoutModel", 112 | "model_module_version": "1.2.0", 113 | "state": { 114 | "_model_module": "@jupyter-widgets/base", 115 | "_model_module_version": "1.2.0", 116 | "_model_name": "LayoutModel", 117 | "_view_count": null, 118 | "_view_module": "@jupyter-widgets/base", 119 | "_view_module_version": "1.2.0", 120 | "_view_name": "LayoutView", 121 | "align_content": null, 122 | "align_items": null, 123 | "align_self": null, 124 | "border": null, 125 | "bottom": null, 126 | "display": null, 127 | "flex": null, 128 | "flex_flow": null, 129 | "grid_area": null, 130 | "grid_auto_columns": null, 131 | "grid_auto_flow": null, 132 | "grid_auto_rows": null, 133 | "grid_column": null, 134 | "grid_gap": null, 135 | "grid_row": null, 136 | "grid_template_areas": null, 137 | "grid_template_columns": null, 138 | "grid_template_rows": null, 139 | "height": null, 140 | "justify_content": null, 141 | "justify_items": null, 142 | "left": null, 143 | "margin": null, 144 | "max_height": null, 145 | "max_width": null, 146 | "min_height": null, 147 | "min_width": null, 148 | "object_fit": null, 149 | "object_position": null, 150 | "order": null, 151 | "overflow": null, 152 | "overflow_x": null, 153 | "overflow_y": null, 154 | "padding": null, 155 | "right": null, 156 | "top": null, 157 | "visibility": null, 158 | "width": null 159 | } 160 | }, 161 | "2fe67aae4b854a2c814cca55840b81c2": { 162 | "model_module": "@jupyter-widgets/base", 163 | "model_name": "LayoutModel", 164 | "model_module_version": "1.2.0", 165 | "state": { 166 | "_model_module": "@jupyter-widgets/base", 167 | "_model_module_version": "1.2.0", 168 | "_model_name": "LayoutModel", 169 | "_view_count": null, 170 | "_view_module": "@jupyter-widgets/base", 171 | "_view_module_version": "1.2.0", 172 | "_view_name": "LayoutView", 173 | "align_content": null, 174 | "align_items": null, 175 | "align_self": null, 176 | "border": null, 177 | "bottom": null, 178 | "display": null, 179 | "flex": null, 180 | "flex_flow": null, 181 | "grid_area": null, 182 | "grid_auto_columns": null, 183 | "grid_auto_flow": null, 184 | "grid_auto_rows": null, 185 | "grid_column": null, 186 | "grid_gap": null, 187 | "grid_row": null, 188 | "grid_template_areas": null, 189 | "grid_template_columns": null, 190 | "grid_template_rows": null, 191 | "height": null, 192 | "justify_content": null, 193 | "justify_items": null, 194 | "left": null, 195 | "margin": null, 196 | "max_height": null, 197 | "max_width": null, 198 | "min_height": null, 199 | "min_width": null, 200 | "object_fit": null, 201 | "object_position": null, 202 | "order": null, 203 | "overflow": null, 204 | "overflow_x": null, 205 | "overflow_y": null, 206 | "padding": null, 207 | "right": null, 208 | "top": null, 209 | "visibility": null, 210 | "width": null 211 | } 212 | }, 213 | "d8ae9d0d1d254353ba82b1574d7fe1dd": { 214 | "model_module": "@jupyter-widgets/controls", 215 | "model_name": "DescriptionStyleModel", 216 | "model_module_version": "1.5.0", 217 | "state": { 218 | "_model_module": "@jupyter-widgets/controls", 219 | "_model_module_version": "1.5.0", 220 | "_model_name": "DescriptionStyleModel", 221 | "_view_count": null, 222 | "_view_module": "@jupyter-widgets/base", 223 | "_view_module_version": "1.2.0", 224 | "_view_name": "StyleView", 225 | "description_width": "" 226 | } 227 | }, 228 | "f7add879d74a48ae9ffc18c8b7694b4a": { 229 | "model_module": "@jupyter-widgets/base", 230 | "model_name": "LayoutModel", 231 | "model_module_version": "1.2.0", 232 | "state": { 233 | "_model_module": "@jupyter-widgets/base", 234 | "_model_module_version": "1.2.0", 235 | "_model_name": "LayoutModel", 236 | "_view_count": null, 237 | "_view_module": "@jupyter-widgets/base", 238 | "_view_module_version": "1.2.0", 239 | "_view_name": "LayoutView", 240 | "align_content": null, 241 | "align_items": null, 242 | "align_self": null, 243 | "border": null, 244 | "bottom": null, 245 | "display": null, 246 | "flex": null, 247 | "flex_flow": null, 248 | "grid_area": null, 249 | "grid_auto_columns": null, 250 | "grid_auto_flow": null, 251 | "grid_auto_rows": null, 252 | "grid_column": null, 253 | "grid_gap": null, 254 | "grid_row": null, 255 | "grid_template_areas": null, 256 | "grid_template_columns": null, 257 | "grid_template_rows": null, 258 | "height": null, 259 | "justify_content": null, 260 | "justify_items": null, 261 | "left": null, 262 | "margin": null, 263 | "max_height": null, 264 | "max_width": null, 265 | "min_height": null, 266 | "min_width": null, 267 | "object_fit": null, 268 | "object_position": null, 269 | "order": null, 270 | "overflow": null, 271 | "overflow_x": null, 272 | "overflow_y": null, 273 | "padding": null, 274 | "right": null, 275 | "top": null, 276 | "visibility": null, 277 | "width": null 278 | } 279 | }, 280 | "11655f24216f46b9a07e4944f70cede1": { 281 | "model_module": "@jupyter-widgets/controls", 282 | "model_name": "ProgressStyleModel", 283 | "model_module_version": "1.5.0", 284 | "state": { 285 | "_model_module": "@jupyter-widgets/controls", 286 | "_model_module_version": "1.5.0", 287 | "_model_name": "ProgressStyleModel", 288 | "_view_count": null, 289 | "_view_module": "@jupyter-widgets/base", 290 | "_view_module_version": "1.2.0", 291 | "_view_name": "StyleView", 292 | "bar_color": null, 293 | "description_width": "" 294 | } 295 | }, 296 | "1d0264c3d2704b6bbe51583c4b4a3b96": { 297 | "model_module": "@jupyter-widgets/base", 298 | "model_name": "LayoutModel", 299 | "model_module_version": "1.2.0", 300 | "state": { 301 | "_model_module": "@jupyter-widgets/base", 302 | "_model_module_version": "1.2.0", 303 | "_model_name": "LayoutModel", 304 | "_view_count": null, 305 | "_view_module": "@jupyter-widgets/base", 306 | "_view_module_version": "1.2.0", 307 | "_view_name": "LayoutView", 308 | "align_content": null, 309 | "align_items": null, 310 | "align_self": null, 311 | "border": null, 312 | "bottom": null, 313 | "display": null, 314 | "flex": null, 315 | "flex_flow": null, 316 | "grid_area": null, 317 | "grid_auto_columns": null, 318 | "grid_auto_flow": null, 319 | "grid_auto_rows": null, 320 | "grid_column": null, 321 | "grid_gap": null, 322 | "grid_row": null, 323 | "grid_template_areas": null, 324 | "grid_template_columns": null, 325 | "grid_template_rows": null, 326 | "height": null, 327 | "justify_content": null, 328 | "justify_items": null, 329 | "left": null, 330 | "margin": null, 331 | "max_height": null, 332 | "max_width": null, 333 | "min_height": null, 334 | "min_width": null, 335 | "object_fit": null, 336 | "object_position": null, 337 | "order": null, 338 | "overflow": null, 339 | "overflow_x": null, 340 | "overflow_y": null, 341 | "padding": null, 342 | "right": null, 343 | "top": null, 344 | "visibility": null, 345 | "width": null 346 | } 347 | }, 348 | "7ec800ba8b1a4fbeae940b7aab631794": { 349 | "model_module": "@jupyter-widgets/controls", 350 | "model_name": "DescriptionStyleModel", 351 | "model_module_version": "1.5.0", 352 | "state": { 353 | "_model_module": "@jupyter-widgets/controls", 354 | "_model_module_version": "1.5.0", 355 | "_model_name": "DescriptionStyleModel", 356 | "_view_count": null, 357 | "_view_module": "@jupyter-widgets/base", 358 | "_view_module_version": "1.2.0", 359 | "_view_name": "StyleView", 360 | "description_width": "" 361 | } 362 | }, 363 | "943c1bafe9ec4618869cfd028a610213": { 364 | "model_module": "@jupyter-widgets/controls", 365 | "model_name": "HBoxModel", 366 | "model_module_version": "1.5.0", 367 | "state": { 368 | "_dom_classes": [], 369 | "_model_module": "@jupyter-widgets/controls", 370 | "_model_module_version": "1.5.0", 371 | "_model_name": "HBoxModel", 372 | "_view_count": null, 373 | "_view_module": "@jupyter-widgets/controls", 374 | "_view_module_version": "1.5.0", 375 | "_view_name": "HBoxView", 376 | "box_style": "", 377 | "children": [ 378 | "IPY_MODEL_abf1255b444e4cb9be53d1a556f2ca44", 379 | "IPY_MODEL_c77ab3e2cec54aa7b11fc1957c64389b", 380 | "IPY_MODEL_33da38440919410bae7be13caec4929d" 381 | ], 382 | "layout": "IPY_MODEL_71b9f62cd90e4c898518fe75267e0e8f" 383 | } 384 | }, 385 | "abf1255b444e4cb9be53d1a556f2ca44": { 386 | "model_module": "@jupyter-widgets/controls", 387 | "model_name": "HTMLModel", 388 | "model_module_version": "1.5.0", 389 | "state": { 390 | "_dom_classes": [], 391 | "_model_module": "@jupyter-widgets/controls", 392 | "_model_module_version": "1.5.0", 393 | "_model_name": "HTMLModel", 394 | "_view_count": null, 395 | "_view_module": "@jupyter-widgets/controls", 396 | "_view_module_version": "1.5.0", 397 | "_view_name": "HTMLView", 398 | "description": "", 399 | "description_tooltip": null, 400 | "layout": "IPY_MODEL_0ea2047d4cec4026bfa9ae05eb5d4e18", 401 | "placeholder": "​", 402 | "style": "IPY_MODEL_05c1d4ab928d480094cf672339a98f29", 403 | "value": "model-00001-of-00002.safetensors: 100%" 404 | } 405 | }, 406 | "c77ab3e2cec54aa7b11fc1957c64389b": { 407 | "model_module": "@jupyter-widgets/controls", 408 | "model_name": "FloatProgressModel", 409 | "model_module_version": "1.5.0", 410 | "state": { 411 | "_dom_classes": [], 412 | "_model_module": "@jupyter-widgets/controls", 413 | "_model_module_version": "1.5.0", 414 | "_model_name": "FloatProgressModel", 415 | "_view_count": null, 416 | "_view_module": "@jupyter-widgets/controls", 417 | "_view_module_version": "1.5.0", 418 | "_view_name": "ProgressView", 419 | "bar_style": "success", 420 | "description": "", 421 | "description_tooltip": null, 422 | "layout": "IPY_MODEL_d164fe1776bc413bae36e96fc0371907", 423 | "max": 4965799096, 424 | "min": 0, 425 | "orientation": "horizontal", 426 | "style": "IPY_MODEL_e8f2756358d24df9b887bfc3d5531e83", 427 | "value": 4965799096 428 | } 429 | }, 430 | "33da38440919410bae7be13caec4929d": { 431 | "model_module": "@jupyter-widgets/controls", 432 | "model_name": "HTMLModel", 433 | "model_module_version": "1.5.0", 434 | "state": { 435 | "_dom_classes": [], 436 | "_model_module": "@jupyter-widgets/controls", 437 | "_model_module_version": "1.5.0", 438 | "_model_name": "HTMLModel", 439 | "_view_count": null, 440 | "_view_module": "@jupyter-widgets/controls", 441 | "_view_module_version": "1.5.0", 442 | "_view_name": "HTMLView", 443 | "description": "", 444 | "description_tooltip": null, 445 | "layout": "IPY_MODEL_11e5200b4fee425991ceed611bfb560a", 446 | "placeholder": "​", 447 | "style": "IPY_MODEL_3a5b9c9609a544858f8cf78b0b72788f", 448 | "value": " 4.97G/4.97G [00:52<00:00, 38.9MB/s]" 449 | } 450 | }, 451 | "71b9f62cd90e4c898518fe75267e0e8f": { 452 | "model_module": "@jupyter-widgets/base", 453 | "model_name": "LayoutModel", 454 | "model_module_version": "1.2.0", 455 | "state": { 456 | "_model_module": "@jupyter-widgets/base", 457 | "_model_module_version": "1.2.0", 458 | "_model_name": "LayoutModel", 459 | "_view_count": null, 460 | "_view_module": "@jupyter-widgets/base", 461 | "_view_module_version": "1.2.0", 462 | "_view_name": "LayoutView", 463 | "align_content": null, 464 | "align_items": null, 465 | "align_self": null, 466 | "border": null, 467 | "bottom": null, 468 | "display": null, 469 | "flex": null, 470 | "flex_flow": null, 471 | "grid_area": null, 472 | "grid_auto_columns": null, 473 | "grid_auto_flow": null, 474 | "grid_auto_rows": null, 475 | "grid_column": null, 476 | "grid_gap": null, 477 | "grid_row": null, 478 | "grid_template_areas": null, 479 | "grid_template_columns": null, 480 | "grid_template_rows": null, 481 | "height": null, 482 | "justify_content": null, 483 | "justify_items": null, 484 | "left": null, 485 | "margin": null, 486 | "max_height": null, 487 | "max_width": null, 488 | "min_height": null, 489 | "min_width": null, 490 | "object_fit": null, 491 | "object_position": null, 492 | "order": null, 493 | "overflow": null, 494 | "overflow_x": null, 495 | "overflow_y": null, 496 | "padding": null, 497 | "right": null, 498 | "top": null, 499 | "visibility": null, 500 | "width": null 501 | } 502 | }, 503 | "0ea2047d4cec4026bfa9ae05eb5d4e18": { 504 | "model_module": "@jupyter-widgets/base", 505 | "model_name": "LayoutModel", 506 | "model_module_version": "1.2.0", 507 | "state": { 508 | "_model_module": "@jupyter-widgets/base", 509 | "_model_module_version": "1.2.0", 510 | "_model_name": "LayoutModel", 511 | "_view_count": null, 512 | "_view_module": "@jupyter-widgets/base", 513 | "_view_module_version": "1.2.0", 514 | "_view_name": "LayoutView", 515 | "align_content": null, 516 | "align_items": null, 517 | "align_self": null, 518 | "border": null, 519 | "bottom": null, 520 | "display": null, 521 | "flex": null, 522 | "flex_flow": null, 523 | "grid_area": null, 524 | "grid_auto_columns": null, 525 | "grid_auto_flow": null, 526 | "grid_auto_rows": null, 527 | "grid_column": null, 528 | "grid_gap": null, 529 | "grid_row": null, 530 | "grid_template_areas": null, 531 | "grid_template_columns": null, 532 | "grid_template_rows": null, 533 | "height": null, 534 | "justify_content": null, 535 | "justify_items": null, 536 | "left": null, 537 | "margin": null, 538 | "max_height": null, 539 | "max_width": null, 540 | "min_height": null, 541 | "min_width": null, 542 | "object_fit": null, 543 | "object_position": null, 544 | "order": null, 545 | "overflow": null, 546 | "overflow_x": null, 547 | "overflow_y": null, 548 | "padding": null, 549 | "right": null, 550 | "top": null, 551 | "visibility": null, 552 | "width": null 553 | } 554 | }, 555 | "05c1d4ab928d480094cf672339a98f29": { 556 | "model_module": "@jupyter-widgets/controls", 557 | "model_name": "DescriptionStyleModel", 558 | "model_module_version": "1.5.0", 559 | "state": { 560 | "_model_module": "@jupyter-widgets/controls", 561 | "_model_module_version": "1.5.0", 562 | "_model_name": "DescriptionStyleModel", 563 | "_view_count": null, 564 | "_view_module": "@jupyter-widgets/base", 565 | "_view_module_version": "1.2.0", 566 | "_view_name": "StyleView", 567 | "description_width": "" 568 | } 569 | }, 570 | "d164fe1776bc413bae36e96fc0371907": { 571 | "model_module": "@jupyter-widgets/base", 572 | "model_name": "LayoutModel", 573 | "model_module_version": "1.2.0", 574 | "state": { 575 | "_model_module": "@jupyter-widgets/base", 576 | "_model_module_version": "1.2.0", 577 | "_model_name": "LayoutModel", 578 | "_view_count": null, 579 | "_view_module": "@jupyter-widgets/base", 580 | "_view_module_version": "1.2.0", 581 | "_view_name": "LayoutView", 582 | "align_content": null, 583 | "align_items": null, 584 | "align_self": null, 585 | "border": null, 586 | "bottom": null, 587 | "display": null, 588 | "flex": null, 589 | "flex_flow": null, 590 | "grid_area": null, 591 | "grid_auto_columns": null, 592 | "grid_auto_flow": null, 593 | "grid_auto_rows": null, 594 | "grid_column": null, 595 | "grid_gap": null, 596 | "grid_row": null, 597 | "grid_template_areas": null, 598 | "grid_template_columns": null, 599 | "grid_template_rows": null, 600 | "height": null, 601 | "justify_content": null, 602 | "justify_items": null, 603 | "left": null, 604 | "margin": null, 605 | "max_height": null, 606 | "max_width": null, 607 | "min_height": null, 608 | "min_width": null, 609 | "object_fit": null, 610 | "object_position": null, 611 | "order": null, 612 | "overflow": null, 613 | "overflow_x": null, 614 | "overflow_y": null, 615 | "padding": null, 616 | "right": null, 617 | "top": null, 618 | "visibility": null, 619 | "width": null 620 | } 621 | }, 622 | "e8f2756358d24df9b887bfc3d5531e83": { 623 | "model_module": "@jupyter-widgets/controls", 624 | "model_name": "ProgressStyleModel", 625 | "model_module_version": "1.5.0", 626 | "state": { 627 | "_model_module": "@jupyter-widgets/controls", 628 | "_model_module_version": "1.5.0", 629 | "_model_name": "ProgressStyleModel", 630 | "_view_count": null, 631 | "_view_module": "@jupyter-widgets/base", 632 | "_view_module_version": "1.2.0", 633 | "_view_name": "StyleView", 634 | "bar_color": null, 635 | "description_width": "" 636 | } 637 | }, 638 | "11e5200b4fee425991ceed611bfb560a": { 639 | "model_module": "@jupyter-widgets/base", 640 | "model_name": "LayoutModel", 641 | "model_module_version": "1.2.0", 642 | "state": { 643 | "_model_module": "@jupyter-widgets/base", 644 | "_model_module_version": "1.2.0", 645 | "_model_name": "LayoutModel", 646 | "_view_count": null, 647 | "_view_module": "@jupyter-widgets/base", 648 | "_view_module_version": "1.2.0", 649 | "_view_name": "LayoutView", 650 | "align_content": null, 651 | "align_items": null, 652 | "align_self": null, 653 | "border": null, 654 | "bottom": null, 655 | "display": null, 656 | "flex": null, 657 | "flex_flow": null, 658 | "grid_area": null, 659 | "grid_auto_columns": null, 660 | "grid_auto_flow": null, 661 | "grid_auto_rows": null, 662 | "grid_column": null, 663 | "grid_gap": null, 664 | "grid_row": null, 665 | "grid_template_areas": null, 666 | "grid_template_columns": null, 667 | "grid_template_rows": null, 668 | "height": null, 669 | "justify_content": null, 670 | "justify_items": null, 671 | "left": null, 672 | "margin": null, 673 | "max_height": null, 674 | "max_width": null, 675 | "min_height": null, 676 | "min_width": null, 677 | "object_fit": null, 678 | "object_position": null, 679 | "order": null, 680 | "overflow": null, 681 | "overflow_x": null, 682 | "overflow_y": null, 683 | "padding": null, 684 | "right": null, 685 | "top": null, 686 | "visibility": null, 687 | "width": null 688 | } 689 | }, 690 | "3a5b9c9609a544858f8cf78b0b72788f": { 691 | "model_module": "@jupyter-widgets/controls", 692 | "model_name": "DescriptionStyleModel", 693 | "model_module_version": "1.5.0", 694 | "state": { 695 | "_model_module": "@jupyter-widgets/controls", 696 | "_model_module_version": "1.5.0", 697 | "_model_name": "DescriptionStyleModel", 698 | "_view_count": null, 699 | "_view_module": "@jupyter-widgets/base", 700 | "_view_module_version": "1.2.0", 701 | "_view_name": "StyleView", 702 | "description_width": "" 703 | } 704 | }, 705 | "6e6d6dae6bc44498b6774c83685fb0b9": { 706 | "model_module": "@jupyter-widgets/controls", 707 | "model_name": "HBoxModel", 708 | "model_module_version": "1.5.0", 709 | "state": { 710 | "_dom_classes": [], 711 | "_model_module": "@jupyter-widgets/controls", 712 | "_model_module_version": "1.5.0", 713 | "_model_name": "HBoxModel", 714 | "_view_count": null, 715 | "_view_module": "@jupyter-widgets/controls", 716 | "_view_module_version": "1.5.0", 717 | "_view_name": "HBoxView", 718 | "box_style": "", 719 | "children": [ 720 | "IPY_MODEL_4156204d442346a79a9f7d5193b02e1e", 721 | "IPY_MODEL_32d71755c0f8460790b88f963ff1e4e6", 722 | "IPY_MODEL_23fba6d9a7044060a9b5a02ee7ba71e5" 723 | ], 724 | "layout": "IPY_MODEL_2da95739a2444ce896fb3ff81c59e6d0" 725 | } 726 | }, 727 | "4156204d442346a79a9f7d5193b02e1e": { 728 | "model_module": "@jupyter-widgets/controls", 729 | "model_name": "HTMLModel", 730 | "model_module_version": "1.5.0", 731 | "state": { 732 | "_dom_classes": [], 733 | "_model_module": "@jupyter-widgets/controls", 734 | "_model_module_version": "1.5.0", 735 | "_model_name": "HTMLModel", 736 | "_view_count": null, 737 | "_view_module": "@jupyter-widgets/controls", 738 | "_view_module_version": "1.5.0", 739 | "_view_name": "HTMLView", 740 | "description": "", 741 | "description_tooltip": null, 742 | "layout": "IPY_MODEL_2bdfdc5610ec4dc1acfe22283eb1541e", 743 | "placeholder": "​", 744 | "style": "IPY_MODEL_baba10fb45dc42bcbf6321dd59cfaf6e", 745 | "value": "model-00002-of-00002.safetensors: 100%" 746 | } 747 | }, 748 | "32d71755c0f8460790b88f963ff1e4e6": { 749 | "model_module": "@jupyter-widgets/controls", 750 | "model_name": "FloatProgressModel", 751 | "model_module_version": "1.5.0", 752 | "state": { 753 | "_dom_classes": [], 754 | "_model_module": "@jupyter-widgets/controls", 755 | "_model_module_version": "1.5.0", 756 | "_model_name": "FloatProgressModel", 757 | "_view_count": null, 758 | "_view_module": "@jupyter-widgets/controls", 759 | "_view_module_version": "1.5.0", 760 | "_view_name": "ProgressView", 761 | "bar_style": "success", 762 | "description": "", 763 | "description_tooltip": null, 764 | "layout": "IPY_MODEL_a1404373c9314d81bf99c41fdc9f11b4", 765 | "max": 1459729952, 766 | "min": 0, 767 | "orientation": "horizontal", 768 | "style": "IPY_MODEL_1eb7881728a04d1ca9e209a17da9bef2", 769 | "value": 1459729952 770 | } 771 | }, 772 | "23fba6d9a7044060a9b5a02ee7ba71e5": { 773 | "model_module": "@jupyter-widgets/controls", 774 | "model_name": "HTMLModel", 775 | "model_module_version": "1.5.0", 776 | "state": { 777 | "_dom_classes": [], 778 | "_model_module": "@jupyter-widgets/controls", 779 | "_model_module_version": "1.5.0", 780 | "_model_name": "HTMLModel", 781 | "_view_count": null, 782 | "_view_module": "@jupyter-widgets/controls", 783 | "_view_module_version": "1.5.0", 784 | "_view_name": "HTMLView", 785 | "description": "", 786 | "description_tooltip": null, 787 | "layout": "IPY_MODEL_a472fa2b93f449f0a5c1444d1dacd833", 788 | "placeholder": "​", 789 | "style": "IPY_MODEL_1c149ec6a5a640f3bd5a0209840e58a4", 790 | "value": " 1.46G/1.46G [01:01<00:00, 34.6MB/s]" 791 | } 792 | }, 793 | "2da95739a2444ce896fb3ff81c59e6d0": { 794 | "model_module": "@jupyter-widgets/base", 795 | "model_name": "LayoutModel", 796 | "model_module_version": "1.2.0", 797 | "state": { 798 | "_model_module": "@jupyter-widgets/base", 799 | "_model_module_version": "1.2.0", 800 | "_model_name": "LayoutModel", 801 | "_view_count": null, 802 | "_view_module": "@jupyter-widgets/base", 803 | "_view_module_version": "1.2.0", 804 | "_view_name": "LayoutView", 805 | "align_content": null, 806 | "align_items": null, 807 | "align_self": null, 808 | "border": null, 809 | "bottom": null, 810 | "display": null, 811 | "flex": null, 812 | "flex_flow": null, 813 | "grid_area": null, 814 | "grid_auto_columns": null, 815 | "grid_auto_flow": null, 816 | "grid_auto_rows": null, 817 | "grid_column": null, 818 | "grid_gap": null, 819 | "grid_row": null, 820 | "grid_template_areas": null, 821 | "grid_template_columns": null, 822 | "grid_template_rows": null, 823 | "height": null, 824 | "justify_content": null, 825 | "justify_items": null, 826 | "left": null, 827 | "margin": null, 828 | "max_height": null, 829 | "max_width": null, 830 | "min_height": null, 831 | "min_width": null, 832 | "object_fit": null, 833 | "object_position": null, 834 | "order": null, 835 | "overflow": null, 836 | "overflow_x": null, 837 | "overflow_y": null, 838 | "padding": null, 839 | "right": null, 840 | "top": null, 841 | "visibility": null, 842 | "width": null 843 | } 844 | }, 845 | "2bdfdc5610ec4dc1acfe22283eb1541e": { 846 | "model_module": "@jupyter-widgets/base", 847 | "model_name": "LayoutModel", 848 | "model_module_version": "1.2.0", 849 | "state": { 850 | "_model_module": "@jupyter-widgets/base", 851 | "_model_module_version": "1.2.0", 852 | "_model_name": "LayoutModel", 853 | "_view_count": null, 854 | "_view_module": "@jupyter-widgets/base", 855 | "_view_module_version": "1.2.0", 856 | "_view_name": "LayoutView", 857 | "align_content": null, 858 | "align_items": null, 859 | "align_self": null, 860 | "border": null, 861 | "bottom": null, 862 | "display": null, 863 | "flex": null, 864 | "flex_flow": null, 865 | "grid_area": null, 866 | "grid_auto_columns": null, 867 | "grid_auto_flow": null, 868 | "grid_auto_rows": null, 869 | "grid_column": null, 870 | "grid_gap": null, 871 | "grid_row": null, 872 | "grid_template_areas": null, 873 | "grid_template_columns": null, 874 | "grid_template_rows": null, 875 | "height": null, 876 | "justify_content": null, 877 | "justify_items": null, 878 | "left": null, 879 | "margin": null, 880 | "max_height": null, 881 | "max_width": null, 882 | "min_height": null, 883 | "min_width": null, 884 | "object_fit": null, 885 | "object_position": null, 886 | "order": null, 887 | "overflow": null, 888 | "overflow_x": null, 889 | "overflow_y": null, 890 | "padding": null, 891 | "right": null, 892 | "top": null, 893 | "visibility": null, 894 | "width": null 895 | } 896 | }, 897 | "baba10fb45dc42bcbf6321dd59cfaf6e": { 898 | "model_module": "@jupyter-widgets/controls", 899 | "model_name": "DescriptionStyleModel", 900 | "model_module_version": "1.5.0", 901 | "state": { 902 | "_model_module": "@jupyter-widgets/controls", 903 | "_model_module_version": "1.5.0", 904 | "_model_name": "DescriptionStyleModel", 905 | "_view_count": null, 906 | "_view_module": "@jupyter-widgets/base", 907 | "_view_module_version": "1.2.0", 908 | "_view_name": "StyleView", 909 | "description_width": "" 910 | } 911 | }, 912 | "a1404373c9314d81bf99c41fdc9f11b4": { 913 | "model_module": "@jupyter-widgets/base", 914 | "model_name": "LayoutModel", 915 | "model_module_version": "1.2.0", 916 | "state": { 917 | "_model_module": "@jupyter-widgets/base", 918 | "_model_module_version": "1.2.0", 919 | "_model_name": "LayoutModel", 920 | "_view_count": null, 921 | "_view_module": "@jupyter-widgets/base", 922 | "_view_module_version": "1.2.0", 923 | "_view_name": "LayoutView", 924 | "align_content": null, 925 | "align_items": null, 926 | "align_self": null, 927 | "border": null, 928 | "bottom": null, 929 | "display": null, 930 | "flex": null, 931 | "flex_flow": null, 932 | "grid_area": null, 933 | "grid_auto_columns": null, 934 | "grid_auto_flow": null, 935 | "grid_auto_rows": null, 936 | "grid_column": null, 937 | "grid_gap": null, 938 | "grid_row": null, 939 | "grid_template_areas": null, 940 | "grid_template_columns": null, 941 | "grid_template_rows": null, 942 | "height": null, 943 | "justify_content": null, 944 | "justify_items": null, 945 | "left": null, 946 | "margin": null, 947 | "max_height": null, 948 | "max_width": null, 949 | "min_height": null, 950 | "min_width": null, 951 | "object_fit": null, 952 | "object_position": null, 953 | "order": null, 954 | "overflow": null, 955 | "overflow_x": null, 956 | "overflow_y": null, 957 | "padding": null, 958 | "right": null, 959 | "top": null, 960 | "visibility": null, 961 | "width": null 962 | } 963 | }, 964 | "1eb7881728a04d1ca9e209a17da9bef2": { 965 | "model_module": "@jupyter-widgets/controls", 966 | "model_name": "ProgressStyleModel", 967 | "model_module_version": "1.5.0", 968 | "state": { 969 | "_model_module": "@jupyter-widgets/controls", 970 | "_model_module_version": "1.5.0", 971 | "_model_name": "ProgressStyleModel", 972 | "_view_count": null, 973 | "_view_module": "@jupyter-widgets/base", 974 | "_view_module_version": "1.2.0", 975 | "_view_name": "StyleView", 976 | "bar_color": null, 977 | "description_width": "" 978 | } 979 | }, 980 | "a472fa2b93f449f0a5c1444d1dacd833": { 981 | "model_module": "@jupyter-widgets/base", 982 | "model_name": "LayoutModel", 983 | "model_module_version": "1.2.0", 984 | "state": { 985 | "_model_module": "@jupyter-widgets/base", 986 | "_model_module_version": "1.2.0", 987 | "_model_name": "LayoutModel", 988 | "_view_count": null, 989 | "_view_module": "@jupyter-widgets/base", 990 | "_view_module_version": "1.2.0", 991 | "_view_name": "LayoutView", 992 | "align_content": null, 993 | "align_items": null, 994 | "align_self": null, 995 | "border": null, 996 | "bottom": null, 997 | "display": null, 998 | "flex": null, 999 | "flex_flow": null, 1000 | "grid_area": null, 1001 | "grid_auto_columns": null, 1002 | "grid_auto_flow": null, 1003 | "grid_auto_rows": null, 1004 | "grid_column": null, 1005 | "grid_gap": null, 1006 | "grid_row": null, 1007 | "grid_template_areas": null, 1008 | "grid_template_columns": null, 1009 | "grid_template_rows": null, 1010 | "height": null, 1011 | "justify_content": null, 1012 | "justify_items": null, 1013 | "left": null, 1014 | "margin": null, 1015 | "max_height": null, 1016 | "max_width": null, 1017 | "min_height": null, 1018 | "min_width": null, 1019 | "object_fit": null, 1020 | "object_position": null, 1021 | "order": null, 1022 | "overflow": null, 1023 | "overflow_x": null, 1024 | "overflow_y": null, 1025 | "padding": null, 1026 | "right": null, 1027 | "top": null, 1028 | "visibility": null, 1029 | "width": null 1030 | } 1031 | }, 1032 | "1c149ec6a5a640f3bd5a0209840e58a4": { 1033 | "model_module": "@jupyter-widgets/controls", 1034 | "model_name": "DescriptionStyleModel", 1035 | "model_module_version": "1.5.0", 1036 | "state": { 1037 | "_model_module": "@jupyter-widgets/controls", 1038 | "_model_module_version": "1.5.0", 1039 | "_model_name": "DescriptionStyleModel", 1040 | "_view_count": null, 1041 | "_view_module": "@jupyter-widgets/base", 1042 | "_view_module_version": "1.2.0", 1043 | "_view_name": "StyleView", 1044 | "description_width": "" 1045 | } 1046 | }, 1047 | "8648320acb1f4fed83f6a7822ec43043": { 1048 | "model_module": "@jupyter-widgets/controls", 1049 | "model_name": "HBoxModel", 1050 | "model_module_version": "1.5.0", 1051 | "state": { 1052 | "_dom_classes": [], 1053 | "_model_module": "@jupyter-widgets/controls", 1054 | "_model_module_version": "1.5.0", 1055 | "_model_name": "HBoxModel", 1056 | "_view_count": null, 1057 | "_view_module": "@jupyter-widgets/controls", 1058 | "_view_module_version": "1.5.0", 1059 | "_view_name": "HBoxView", 1060 | "box_style": "", 1061 | "children": [ 1062 | "IPY_MODEL_ba14c4a3589f419aaa14b9b8b7bdfa73", 1063 | "IPY_MODEL_07584bcd3cd24debbb1ac2fa59458e2b", 1064 | "IPY_MODEL_61993ca2959f4f39870cea582314b7f8" 1065 | ], 1066 | "layout": "IPY_MODEL_6e1abca235f7485484abcd5c76b7b3d7" 1067 | } 1068 | }, 1069 | "ba14c4a3589f419aaa14b9b8b7bdfa73": { 1070 | "model_module": "@jupyter-widgets/controls", 1071 | "model_name": "HTMLModel", 1072 | "model_module_version": "1.5.0", 1073 | "state": { 1074 | "_dom_classes": [], 1075 | "_model_module": "@jupyter-widgets/controls", 1076 | "_model_module_version": "1.5.0", 1077 | "_model_name": "HTMLModel", 1078 | "_view_count": null, 1079 | "_view_module": "@jupyter-widgets/controls", 1080 | "_view_module_version": "1.5.0", 1081 | "_view_name": "HTMLView", 1082 | "description": "", 1083 | "description_tooltip": null, 1084 | "layout": "IPY_MODEL_beb839820dba4f0a8862f04e03e861b0", 1085 | "placeholder": "​", 1086 | "style": "IPY_MODEL_e977a6d7ec764683aeb36e30f006d238", 1087 | "value": "Loading checkpoint shards: 100%" 1088 | } 1089 | }, 1090 | "07584bcd3cd24debbb1ac2fa59458e2b": { 1091 | "model_module": "@jupyter-widgets/controls", 1092 | "model_name": "FloatProgressModel", 1093 | "model_module_version": "1.5.0", 1094 | "state": { 1095 | "_dom_classes": [], 1096 | "_model_module": "@jupyter-widgets/controls", 1097 | "_model_module_version": "1.5.0", 1098 | "_model_name": "FloatProgressModel", 1099 | "_view_count": null, 1100 | "_view_module": "@jupyter-widgets/controls", 1101 | "_view_module_version": "1.5.0", 1102 | "_view_name": "ProgressView", 1103 | "bar_style": "success", 1104 | "description": "", 1105 | "description_tooltip": null, 1106 | "layout": "IPY_MODEL_138403b96c874e86a36b267b7a6f721e", 1107 | "max": 2, 1108 | "min": 0, 1109 | "orientation": "horizontal", 1110 | "style": "IPY_MODEL_42d5c7b11aba49d593951dbe4a0c9916", 1111 | "value": 2 1112 | } 1113 | }, 1114 | "61993ca2959f4f39870cea582314b7f8": { 1115 | "model_module": "@jupyter-widgets/controls", 1116 | "model_name": "HTMLModel", 1117 | "model_module_version": "1.5.0", 1118 | "state": { 1119 | "_dom_classes": [], 1120 | "_model_module": "@jupyter-widgets/controls", 1121 | "_model_module_version": "1.5.0", 1122 | "_model_name": "HTMLModel", 1123 | "_view_count": null, 1124 | "_view_module": "@jupyter-widgets/controls", 1125 | "_view_module_version": "1.5.0", 1126 | "_view_name": "HTMLView", 1127 | "description": "", 1128 | "description_tooltip": null, 1129 | "layout": "IPY_MODEL_93deb96f5e9c4fb78c8a89a7b95dc240", 1130 | "placeholder": "​", 1131 | "style": "IPY_MODEL_c412e8d1c59247ddb01ff5e41be21294", 1132 | "value": " 2/2 [00:00<00:00,  3.47it/s]" 1133 | } 1134 | }, 1135 | "6e1abca235f7485484abcd5c76b7b3d7": { 1136 | "model_module": "@jupyter-widgets/base", 1137 | "model_name": "LayoutModel", 1138 | "model_module_version": "1.2.0", 1139 | "state": { 1140 | "_model_module": "@jupyter-widgets/base", 1141 | "_model_module_version": "1.2.0", 1142 | "_model_name": "LayoutModel", 1143 | "_view_count": null, 1144 | "_view_module": "@jupyter-widgets/base", 1145 | "_view_module_version": "1.2.0", 1146 | "_view_name": "LayoutView", 1147 | "align_content": null, 1148 | "align_items": null, 1149 | "align_self": null, 1150 | "border": null, 1151 | "bottom": null, 1152 | "display": null, 1153 | "flex": null, 1154 | "flex_flow": null, 1155 | "grid_area": null, 1156 | "grid_auto_columns": null, 1157 | "grid_auto_flow": null, 1158 | "grid_auto_rows": null, 1159 | "grid_column": null, 1160 | "grid_gap": null, 1161 | "grid_row": null, 1162 | "grid_template_areas": null, 1163 | "grid_template_columns": null, 1164 | "grid_template_rows": null, 1165 | "height": null, 1166 | "justify_content": null, 1167 | "justify_items": null, 1168 | "left": null, 1169 | "margin": null, 1170 | "max_height": null, 1171 | "max_width": null, 1172 | "min_height": null, 1173 | "min_width": null, 1174 | "object_fit": null, 1175 | "object_position": null, 1176 | "order": null, 1177 | "overflow": null, 1178 | "overflow_x": null, 1179 | "overflow_y": null, 1180 | "padding": null, 1181 | "right": null, 1182 | "top": null, 1183 | "visibility": null, 1184 | "width": null 1185 | } 1186 | }, 1187 | "beb839820dba4f0a8862f04e03e861b0": { 1188 | "model_module": "@jupyter-widgets/base", 1189 | "model_name": "LayoutModel", 1190 | "model_module_version": "1.2.0", 1191 | "state": { 1192 | "_model_module": "@jupyter-widgets/base", 1193 | "_model_module_version": "1.2.0", 1194 | "_model_name": "LayoutModel", 1195 | "_view_count": null, 1196 | "_view_module": "@jupyter-widgets/base", 1197 | "_view_module_version": "1.2.0", 1198 | "_view_name": "LayoutView", 1199 | "align_content": null, 1200 | "align_items": null, 1201 | "align_self": null, 1202 | "border": null, 1203 | "bottom": null, 1204 | "display": null, 1205 | "flex": null, 1206 | "flex_flow": null, 1207 | "grid_area": null, 1208 | "grid_auto_columns": null, 1209 | "grid_auto_flow": null, 1210 | "grid_auto_rows": null, 1211 | "grid_column": null, 1212 | "grid_gap": null, 1213 | "grid_row": null, 1214 | "grid_template_areas": null, 1215 | "grid_template_columns": null, 1216 | "grid_template_rows": null, 1217 | "height": null, 1218 | "justify_content": null, 1219 | "justify_items": null, 1220 | "left": null, 1221 | "margin": null, 1222 | "max_height": null, 1223 | "max_width": null, 1224 | "min_height": null, 1225 | "min_width": null, 1226 | "object_fit": null, 1227 | "object_position": null, 1228 | "order": null, 1229 | "overflow": null, 1230 | "overflow_x": null, 1231 | "overflow_y": null, 1232 | "padding": null, 1233 | "right": null, 1234 | "top": null, 1235 | "visibility": null, 1236 | "width": null 1237 | } 1238 | }, 1239 | "e977a6d7ec764683aeb36e30f006d238": { 1240 | "model_module": "@jupyter-widgets/controls", 1241 | "model_name": "DescriptionStyleModel", 1242 | "model_module_version": "1.5.0", 1243 | "state": { 1244 | "_model_module": "@jupyter-widgets/controls", 1245 | "_model_module_version": "1.5.0", 1246 | "_model_name": "DescriptionStyleModel", 1247 | "_view_count": null, 1248 | "_view_module": "@jupyter-widgets/base", 1249 | "_view_module_version": "1.2.0", 1250 | "_view_name": "StyleView", 1251 | "description_width": "" 1252 | } 1253 | }, 1254 | "138403b96c874e86a36b267b7a6f721e": { 1255 | "model_module": "@jupyter-widgets/base", 1256 | "model_name": "LayoutModel", 1257 | "model_module_version": "1.2.0", 1258 | "state": { 1259 | "_model_module": "@jupyter-widgets/base", 1260 | "_model_module_version": "1.2.0", 1261 | "_model_name": "LayoutModel", 1262 | "_view_count": null, 1263 | "_view_module": "@jupyter-widgets/base", 1264 | "_view_module_version": "1.2.0", 1265 | "_view_name": "LayoutView", 1266 | "align_content": null, 1267 | "align_items": null, 1268 | "align_self": null, 1269 | "border": null, 1270 | "bottom": null, 1271 | "display": null, 1272 | "flex": null, 1273 | "flex_flow": null, 1274 | "grid_area": null, 1275 | "grid_auto_columns": null, 1276 | "grid_auto_flow": null, 1277 | "grid_auto_rows": null, 1278 | "grid_column": null, 1279 | "grid_gap": null, 1280 | "grid_row": null, 1281 | "grid_template_areas": null, 1282 | "grid_template_columns": null, 1283 | "grid_template_rows": null, 1284 | "height": null, 1285 | "justify_content": null, 1286 | "justify_items": null, 1287 | "left": null, 1288 | "margin": null, 1289 | "max_height": null, 1290 | "max_width": null, 1291 | "min_height": null, 1292 | "min_width": null, 1293 | "object_fit": null, 1294 | "object_position": null, 1295 | "order": null, 1296 | "overflow": null, 1297 | "overflow_x": null, 1298 | "overflow_y": null, 1299 | "padding": null, 1300 | "right": null, 1301 | "top": null, 1302 | "visibility": null, 1303 | "width": null 1304 | } 1305 | }, 1306 | "42d5c7b11aba49d593951dbe4a0c9916": { 1307 | "model_module": "@jupyter-widgets/controls", 1308 | "model_name": "ProgressStyleModel", 1309 | "model_module_version": "1.5.0", 1310 | "state": { 1311 | "_model_module": "@jupyter-widgets/controls", 1312 | "_model_module_version": "1.5.0", 1313 | "_model_name": "ProgressStyleModel", 1314 | "_view_count": null, 1315 | "_view_module": "@jupyter-widgets/base", 1316 | "_view_module_version": "1.2.0", 1317 | "_view_name": "StyleView", 1318 | "bar_color": null, 1319 | "description_width": "" 1320 | } 1321 | }, 1322 | "93deb96f5e9c4fb78c8a89a7b95dc240": { 1323 | "model_module": "@jupyter-widgets/base", 1324 | "model_name": "LayoutModel", 1325 | "model_module_version": "1.2.0", 1326 | "state": { 1327 | "_model_module": "@jupyter-widgets/base", 1328 | "_model_module_version": "1.2.0", 1329 | "_model_name": "LayoutModel", 1330 | "_view_count": null, 1331 | "_view_module": "@jupyter-widgets/base", 1332 | "_view_module_version": "1.2.0", 1333 | "_view_name": "LayoutView", 1334 | "align_content": null, 1335 | "align_items": null, 1336 | "align_self": null, 1337 | "border": null, 1338 | "bottom": null, 1339 | "display": null, 1340 | "flex": null, 1341 | "flex_flow": null, 1342 | "grid_area": null, 1343 | "grid_auto_columns": null, 1344 | "grid_auto_flow": null, 1345 | "grid_auto_rows": null, 1346 | "grid_column": null, 1347 | "grid_gap": null, 1348 | "grid_row": null, 1349 | "grid_template_areas": null, 1350 | "grid_template_columns": null, 1351 | "grid_template_rows": null, 1352 | "height": null, 1353 | "justify_content": null, 1354 | "justify_items": null, 1355 | "left": null, 1356 | "margin": null, 1357 | "max_height": null, 1358 | "max_width": null, 1359 | "min_height": null, 1360 | "min_width": null, 1361 | "object_fit": null, 1362 | "object_position": null, 1363 | "order": null, 1364 | "overflow": null, 1365 | "overflow_x": null, 1366 | "overflow_y": null, 1367 | "padding": null, 1368 | "right": null, 1369 | "top": null, 1370 | "visibility": null, 1371 | "width": null 1372 | } 1373 | }, 1374 | "c412e8d1c59247ddb01ff5e41be21294": { 1375 | "model_module": "@jupyter-widgets/controls", 1376 | "model_name": "DescriptionStyleModel", 1377 | "model_module_version": "1.5.0", 1378 | "state": { 1379 | "_model_module": "@jupyter-widgets/controls", 1380 | "_model_module_version": "1.5.0", 1381 | "_model_name": "DescriptionStyleModel", 1382 | "_view_count": null, 1383 | "_view_module": "@jupyter-widgets/base", 1384 | "_view_module_version": "1.2.0", 1385 | "_view_name": "StyleView", 1386 | "description_width": "" 1387 | } 1388 | }, 1389 | "d798703b03304bdcb9de0ed306ca9943": { 1390 | "model_module": "@jupyter-widgets/controls", 1391 | "model_name": "HBoxModel", 1392 | "model_module_version": "1.5.0", 1393 | "state": { 1394 | "_dom_classes": [], 1395 | "_model_module": "@jupyter-widgets/controls", 1396 | "_model_module_version": "1.5.0", 1397 | "_model_name": "HBoxModel", 1398 | "_view_count": null, 1399 | "_view_module": "@jupyter-widgets/controls", 1400 | "_view_module_version": "1.5.0", 1401 | "_view_name": "HBoxView", 1402 | "box_style": "", 1403 | "children": [ 1404 | "IPY_MODEL_1de9c5460dbb4e80bb7e5c45b90439a1", 1405 | "IPY_MODEL_e0291b1e01184f7393bbca2d10902a7e", 1406 | "IPY_MODEL_25e1398b6e9a41f19d1bdb645c1cb35c" 1407 | ], 1408 | "layout": "IPY_MODEL_1f51124cf426489aa1d9748f4ec83d0c" 1409 | } 1410 | }, 1411 | "1de9c5460dbb4e80bb7e5c45b90439a1": { 1412 | "model_module": "@jupyter-widgets/controls", 1413 | "model_name": "HTMLModel", 1414 | "model_module_version": "1.5.0", 1415 | "state": { 1416 | "_dom_classes": [], 1417 | "_model_module": "@jupyter-widgets/controls", 1418 | "_model_module_version": "1.5.0", 1419 | "_model_name": "HTMLModel", 1420 | "_view_count": null, 1421 | "_view_module": "@jupyter-widgets/controls", 1422 | "_view_module_version": "1.5.0", 1423 | "_view_name": "HTMLView", 1424 | "description": "", 1425 | "description_tooltip": null, 1426 | "layout": "IPY_MODEL_abc003a02bb5418ca3969f5e475808a0", 1427 | "placeholder": "​", 1428 | "style": "IPY_MODEL_e47d3b79aef646a1be1da4d61d466927", 1429 | "value": "generation_config.json: 100%" 1430 | } 1431 | }, 1432 | "e0291b1e01184f7393bbca2d10902a7e": { 1433 | "model_module": "@jupyter-widgets/controls", 1434 | "model_name": "FloatProgressModel", 1435 | "model_module_version": "1.5.0", 1436 | "state": { 1437 | "_dom_classes": [], 1438 | "_model_module": "@jupyter-widgets/controls", 1439 | "_model_module_version": "1.5.0", 1440 | "_model_name": "FloatProgressModel", 1441 | "_view_count": null, 1442 | "_view_module": "@jupyter-widgets/controls", 1443 | "_view_module_version": "1.5.0", 1444 | "_view_name": "ProgressView", 1445 | "bar_style": "success", 1446 | "description": "", 1447 | "description_tooltip": null, 1448 | "layout": "IPY_MODEL_dae4a3b4986047f8b071095b720d20dc", 1449 | "max": 189, 1450 | "min": 0, 1451 | "orientation": "horizontal", 1452 | "style": "IPY_MODEL_b41721c47ad04ea3856c3177702bee1e", 1453 | "value": 189 1454 | } 1455 | }, 1456 | "25e1398b6e9a41f19d1bdb645c1cb35c": { 1457 | "model_module": "@jupyter-widgets/controls", 1458 | "model_name": "HTMLModel", 1459 | "model_module_version": "1.5.0", 1460 | "state": { 1461 | "_dom_classes": [], 1462 | "_model_module": "@jupyter-widgets/controls", 1463 | "_model_module_version": "1.5.0", 1464 | "_model_name": "HTMLModel", 1465 | "_view_count": null, 1466 | "_view_module": "@jupyter-widgets/controls", 1467 | "_view_module_version": "1.5.0", 1468 | "_view_name": "HTMLView", 1469 | "description": "", 1470 | "description_tooltip": null, 1471 | "layout": "IPY_MODEL_2d17eeb690584ef9a98c58f31551e1ae", 1472 | "placeholder": "​", 1473 | "style": "IPY_MODEL_117d30dc8c334efba28bbfe9c7713344", 1474 | "value": " 189/189 [00:00<00:00, 14.9kB/s]" 1475 | } 1476 | }, 1477 | "1f51124cf426489aa1d9748f4ec83d0c": { 1478 | "model_module": "@jupyter-widgets/base", 1479 | "model_name": "LayoutModel", 1480 | "model_module_version": "1.2.0", 1481 | "state": { 1482 | "_model_module": "@jupyter-widgets/base", 1483 | "_model_module_version": "1.2.0", 1484 | "_model_name": "LayoutModel", 1485 | "_view_count": null, 1486 | "_view_module": "@jupyter-widgets/base", 1487 | "_view_module_version": "1.2.0", 1488 | "_view_name": "LayoutView", 1489 | "align_content": null, 1490 | "align_items": null, 1491 | "align_self": null, 1492 | "border": null, 1493 | "bottom": null, 1494 | "display": null, 1495 | "flex": null, 1496 | "flex_flow": null, 1497 | "grid_area": null, 1498 | "grid_auto_columns": null, 1499 | "grid_auto_flow": null, 1500 | "grid_auto_rows": null, 1501 | "grid_column": null, 1502 | "grid_gap": null, 1503 | "grid_row": null, 1504 | "grid_template_areas": null, 1505 | "grid_template_columns": null, 1506 | "grid_template_rows": null, 1507 | "height": null, 1508 | "justify_content": null, 1509 | "justify_items": null, 1510 | "left": null, 1511 | "margin": null, 1512 | "max_height": null, 1513 | "max_width": null, 1514 | "min_height": null, 1515 | "min_width": null, 1516 | "object_fit": null, 1517 | "object_position": null, 1518 | "order": null, 1519 | "overflow": null, 1520 | "overflow_x": null, 1521 | "overflow_y": null, 1522 | "padding": null, 1523 | "right": null, 1524 | "top": null, 1525 | "visibility": null, 1526 | "width": null 1527 | } 1528 | }, 1529 | "abc003a02bb5418ca3969f5e475808a0": { 1530 | "model_module": "@jupyter-widgets/base", 1531 | "model_name": "LayoutModel", 1532 | "model_module_version": "1.2.0", 1533 | "state": { 1534 | "_model_module": "@jupyter-widgets/base", 1535 | "_model_module_version": "1.2.0", 1536 | "_model_name": "LayoutModel", 1537 | "_view_count": null, 1538 | "_view_module": "@jupyter-widgets/base", 1539 | "_view_module_version": "1.2.0", 1540 | "_view_name": "LayoutView", 1541 | "align_content": null, 1542 | "align_items": null, 1543 | "align_self": null, 1544 | "border": null, 1545 | "bottom": null, 1546 | "display": null, 1547 | "flex": null, 1548 | "flex_flow": null, 1549 | "grid_area": null, 1550 | "grid_auto_columns": null, 1551 | "grid_auto_flow": null, 1552 | "grid_auto_rows": null, 1553 | "grid_column": null, 1554 | "grid_gap": null, 1555 | "grid_row": null, 1556 | "grid_template_areas": null, 1557 | "grid_template_columns": null, 1558 | "grid_template_rows": null, 1559 | "height": null, 1560 | "justify_content": null, 1561 | "justify_items": null, 1562 | "left": null, 1563 | "margin": null, 1564 | "max_height": null, 1565 | "max_width": null, 1566 | "min_height": null, 1567 | "min_width": null, 1568 | "object_fit": null, 1569 | "object_position": null, 1570 | "order": null, 1571 | "overflow": null, 1572 | "overflow_x": null, 1573 | "overflow_y": null, 1574 | "padding": null, 1575 | "right": null, 1576 | "top": null, 1577 | "visibility": null, 1578 | "width": null 1579 | } 1580 | }, 1581 | "e47d3b79aef646a1be1da4d61d466927": { 1582 | "model_module": "@jupyter-widgets/controls", 1583 | "model_name": "DescriptionStyleModel", 1584 | "model_module_version": "1.5.0", 1585 | "state": { 1586 | "_model_module": "@jupyter-widgets/controls", 1587 | "_model_module_version": "1.5.0", 1588 | "_model_name": "DescriptionStyleModel", 1589 | "_view_count": null, 1590 | "_view_module": "@jupyter-widgets/base", 1591 | "_view_module_version": "1.2.0", 1592 | "_view_name": "StyleView", 1593 | "description_width": "" 1594 | } 1595 | }, 1596 | "dae4a3b4986047f8b071095b720d20dc": { 1597 | "model_module": "@jupyter-widgets/base", 1598 | "model_name": "LayoutModel", 1599 | "model_module_version": "1.2.0", 1600 | "state": { 1601 | "_model_module": "@jupyter-widgets/base", 1602 | "_model_module_version": "1.2.0", 1603 | "_model_name": "LayoutModel", 1604 | "_view_count": null, 1605 | "_view_module": "@jupyter-widgets/base", 1606 | "_view_module_version": "1.2.0", 1607 | "_view_name": "LayoutView", 1608 | "align_content": null, 1609 | "align_items": null, 1610 | "align_self": null, 1611 | "border": null, 1612 | "bottom": null, 1613 | "display": null, 1614 | "flex": null, 1615 | "flex_flow": null, 1616 | "grid_area": null, 1617 | "grid_auto_columns": null, 1618 | "grid_auto_flow": null, 1619 | "grid_auto_rows": null, 1620 | "grid_column": null, 1621 | "grid_gap": null, 1622 | "grid_row": null, 1623 | "grid_template_areas": null, 1624 | "grid_template_columns": null, 1625 | "grid_template_rows": null, 1626 | "height": null, 1627 | "justify_content": null, 1628 | "justify_items": null, 1629 | "left": null, 1630 | "margin": null, 1631 | "max_height": null, 1632 | "max_width": null, 1633 | "min_height": null, 1634 | "min_width": null, 1635 | "object_fit": null, 1636 | "object_position": null, 1637 | "order": null, 1638 | "overflow": null, 1639 | "overflow_x": null, 1640 | "overflow_y": null, 1641 | "padding": null, 1642 | "right": null, 1643 | "top": null, 1644 | "visibility": null, 1645 | "width": null 1646 | } 1647 | }, 1648 | "b41721c47ad04ea3856c3177702bee1e": { 1649 | "model_module": "@jupyter-widgets/controls", 1650 | "model_name": "ProgressStyleModel", 1651 | "model_module_version": "1.5.0", 1652 | "state": { 1653 | "_model_module": "@jupyter-widgets/controls", 1654 | "_model_module_version": "1.5.0", 1655 | "_model_name": "ProgressStyleModel", 1656 | "_view_count": null, 1657 | "_view_module": "@jupyter-widgets/base", 1658 | "_view_module_version": "1.2.0", 1659 | "_view_name": "StyleView", 1660 | "bar_color": null, 1661 | "description_width": "" 1662 | } 1663 | }, 1664 | "2d17eeb690584ef9a98c58f31551e1ae": { 1665 | "model_module": "@jupyter-widgets/base", 1666 | "model_name": "LayoutModel", 1667 | "model_module_version": "1.2.0", 1668 | "state": { 1669 | "_model_module": "@jupyter-widgets/base", 1670 | "_model_module_version": "1.2.0", 1671 | "_model_name": "LayoutModel", 1672 | "_view_count": null, 1673 | "_view_module": "@jupyter-widgets/base", 1674 | "_view_module_version": "1.2.0", 1675 | "_view_name": "LayoutView", 1676 | "align_content": null, 1677 | "align_items": null, 1678 | "align_self": null, 1679 | "border": null, 1680 | "bottom": null, 1681 | "display": null, 1682 | "flex": null, 1683 | "flex_flow": null, 1684 | "grid_area": null, 1685 | "grid_auto_columns": null, 1686 | "grid_auto_flow": null, 1687 | "grid_auto_rows": null, 1688 | "grid_column": null, 1689 | "grid_gap": null, 1690 | "grid_row": null, 1691 | "grid_template_areas": null, 1692 | "grid_template_columns": null, 1693 | "grid_template_rows": null, 1694 | "height": null, 1695 | "justify_content": null, 1696 | "justify_items": null, 1697 | "left": null, 1698 | "margin": null, 1699 | "max_height": null, 1700 | "max_width": null, 1701 | "min_height": null, 1702 | "min_width": null, 1703 | "object_fit": null, 1704 | "object_position": null, 1705 | "order": null, 1706 | "overflow": null, 1707 | "overflow_x": null, 1708 | "overflow_y": null, 1709 | "padding": null, 1710 | "right": null, 1711 | "top": null, 1712 | "visibility": null, 1713 | "width": null 1714 | } 1715 | }, 1716 | "117d30dc8c334efba28bbfe9c7713344": { 1717 | "model_module": "@jupyter-widgets/controls", 1718 | "model_name": "DescriptionStyleModel", 1719 | "model_module_version": "1.5.0", 1720 | "state": { 1721 | "_model_module": "@jupyter-widgets/controls", 1722 | "_model_module_version": "1.5.0", 1723 | "_model_name": "DescriptionStyleModel", 1724 | "_view_count": null, 1725 | "_view_module": "@jupyter-widgets/base", 1726 | "_view_module_version": "1.2.0", 1727 | "_view_name": "StyleView", 1728 | "description_width": "" 1729 | } 1730 | } 1731 | } 1732 | } 1733 | }, 1734 | "cells": [ 1735 | { 1736 | "cell_type": "markdown", 1737 | "metadata": { 1738 | "id": "view-in-github", 1739 | "colab_type": "text" 1740 | }, 1741 | "source": [ 1742 | "\"Open" 1743 | ] 1744 | }, 1745 | { 1746 | "cell_type": "markdown", 1747 | "source": [ 1748 | "# Run Llama 3.2 3B in a FREE Google Colab!\n", 1749 | "\n", 1750 | "Powered by Transformers 🤗\n", 1751 | "\n", 1752 | "[Model Checkpoint 3B](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)\n", 1753 | "\n", 1754 | "[Model Checkpoint 1B](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)\n", 1755 | "\n", 1756 | "*Make sure to accept the license by visiting the model checkpoint above.*" 1757 | ], 1758 | "metadata": { 1759 | "id": "ycrm7hWWxYoX" 1760 | } 1761 | }, 1762 | { 1763 | "cell_type": "markdown", 1764 | "source": [ 1765 | "## Setup Environment\n", 1766 | "\n", 1767 | "Llama 3.2 3B should work out of the box with Transformers, make sure to be on the latest transformers release!" 1768 | ], 1769 | "metadata": { 1770 | "id": "PCzvqFRoyGcM" 1771 | } 1772 | }, 1773 | { 1774 | "cell_type": "code", 1775 | "execution_count": null, 1776 | "metadata": { 1777 | "id": "u5hdTjYoYHqn" 1778 | }, 1779 | "outputs": [], 1780 | "source": [ 1781 | "!pip install -q --upgrade transformers accelerate" 1782 | ] 1783 | }, 1784 | { 1785 | "cell_type": "markdown", 1786 | "source": [ 1787 | "## Load Tokenizer and Model checkpoint" 1788 | ], 1789 | "metadata": { 1790 | "id": "fO093ZGCynnm" 1791 | } 1792 | }, 1793 | { 1794 | "cell_type": "code", 1795 | "source": [ 1796 | "import torch\n", 1797 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 1798 | "\n", 1799 | "model_id = \"meta-llama/Llama-3.2-3B-Instruct\"\n", 1800 | "\n", 1801 | "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", 1802 | "\n", 1803 | "model = AutoModelForCausalLM.from_pretrained(\n", 1804 | " model_id,\n", 1805 | " torch_dtype=torch.bfloat16,\n", 1806 | " low_cpu_mem_usage=True,\n", 1807 | ").to(\"cuda\")" 1808 | ], 1809 | "metadata": { 1810 | "colab": { 1811 | "base_uri": "https://localhost:8080/", 1812 | "height": 202, 1813 | "referenced_widgets": [ 1814 | "0b10add730774dcbb03cf0834fd0b724", 1815 | "a74dd475cb0848c0bf343bb772ddac6a", 1816 | "3f3bf676a4dc4673a1acca0ef4616033", 1817 | "f7204a8184db44cb8bbb3d3e12478a0f", 1818 | "e1048e107534477897425885d08094a9", 1819 | "2fe67aae4b854a2c814cca55840b81c2", 1820 | "d8ae9d0d1d254353ba82b1574d7fe1dd", 1821 | "f7add879d74a48ae9ffc18c8b7694b4a", 1822 | "11655f24216f46b9a07e4944f70cede1", 1823 | "1d0264c3d2704b6bbe51583c4b4a3b96", 1824 | "7ec800ba8b1a4fbeae940b7aab631794", 1825 | "943c1bafe9ec4618869cfd028a610213", 1826 | "abf1255b444e4cb9be53d1a556f2ca44", 1827 | "c77ab3e2cec54aa7b11fc1957c64389b", 1828 | "33da38440919410bae7be13caec4929d", 1829 | "71b9f62cd90e4c898518fe75267e0e8f", 1830 | "0ea2047d4cec4026bfa9ae05eb5d4e18", 1831 | "05c1d4ab928d480094cf672339a98f29", 1832 | "d164fe1776bc413bae36e96fc0371907", 1833 | "e8f2756358d24df9b887bfc3d5531e83", 1834 | "11e5200b4fee425991ceed611bfb560a", 1835 | "3a5b9c9609a544858f8cf78b0b72788f", 1836 | "6e6d6dae6bc44498b6774c83685fb0b9", 1837 | "4156204d442346a79a9f7d5193b02e1e", 1838 | "32d71755c0f8460790b88f963ff1e4e6", 1839 | "23fba6d9a7044060a9b5a02ee7ba71e5", 1840 | "2da95739a2444ce896fb3ff81c59e6d0", 1841 | "2bdfdc5610ec4dc1acfe22283eb1541e", 1842 | "baba10fb45dc42bcbf6321dd59cfaf6e", 1843 | "a1404373c9314d81bf99c41fdc9f11b4", 1844 | "1eb7881728a04d1ca9e209a17da9bef2", 1845 | "a472fa2b93f449f0a5c1444d1dacd833", 1846 | "1c149ec6a5a640f3bd5a0209840e58a4", 1847 | "8648320acb1f4fed83f6a7822ec43043", 1848 | "ba14c4a3589f419aaa14b9b8b7bdfa73", 1849 | "07584bcd3cd24debbb1ac2fa59458e2b", 1850 | "61993ca2959f4f39870cea582314b7f8", 1851 | "6e1abca235f7485484abcd5c76b7b3d7", 1852 | "beb839820dba4f0a8862f04e03e861b0", 1853 | "e977a6d7ec764683aeb36e30f006d238", 1854 | "138403b96c874e86a36b267b7a6f721e", 1855 | "42d5c7b11aba49d593951dbe4a0c9916", 1856 | "93deb96f5e9c4fb78c8a89a7b95dc240", 1857 | "c412e8d1c59247ddb01ff5e41be21294", 1858 | "d798703b03304bdcb9de0ed306ca9943", 1859 | "1de9c5460dbb4e80bb7e5c45b90439a1", 1860 | "e0291b1e01184f7393bbca2d10902a7e", 1861 | "25e1398b6e9a41f19d1bdb645c1cb35c", 1862 | "1f51124cf426489aa1d9748f4ec83d0c", 1863 | "abc003a02bb5418ca3969f5e475808a0", 1864 | "e47d3b79aef646a1be1da4d61d466927", 1865 | "dae4a3b4986047f8b071095b720d20dc", 1866 | "b41721c47ad04ea3856c3177702bee1e", 1867 | "2d17eeb690584ef9a98c58f31551e1ae", 1868 | "117d30dc8c334efba28bbfe9c7713344" 1869 | ] 1870 | }, 1871 | "id": "OLFqj9b6YW5n", 1872 | "outputId": "aa6b8990-54ee-4c7b-e4e2-e8a2cfe89e39" 1873 | }, 1874 | "execution_count": null, 1875 | "outputs": [ 1876 | { 1877 | "output_type": "display_data", 1878 | "data": { 1879 | "text/plain": [ 1880 | "Downloading shards: 0%| | 0/2 [00:00