├── CodeGemma_colab.ipynb
├── Complete_guide_to_audio_datasets.ipynb
├── Infer_Whisper_🤗transformers_edition.ipynb
├── README.md
├── RecurrentGemma_colab.ipynb
├── SmolVLM_500M_inference.ipynb
├── Whisper_Large_8bit_loading_w_bnb.ipynb
├── Whisper_transformers_timestamps.ipynb
├── Whisper_translate_with_🤗transformers_pipeline.ipynb
├── Whisper_w_PEFT.ipynb
├── dduf_my_repo_colab.ipynb
├── deepseek_r1_distill_qwen1_5B_transformers.ipynb
├── gemma_2_9b_colab.ipynb
├── hf_gguf_convert.ipynb
├── insanely_fast_whisper_colab.ipynb
├── kokoro_tts.ipynb
├── mathstral_7b_colab.ipynb
├── orpheus-pretrained-inference-demo.ipynb
├── stable_audio_open_colab.ipynb
├── text_to_music_with_spectrogram_diffusion_and_diffusers.ipynb
├── text_to_sound_with_audioLDM_and_diffusers.ipynb
├── transformers_autoawq_colab.ipynb
├── transformers_whisper_ckpt_to_OAI.ipynb
├── translate_w_seamless_m4tv2.ipynb
├── use_encodec_w_transformers.ipynb
├── whisper_turbo_in_transformers.ipynb
├── zephyr_assisted_musicgen_generations.ipynb
└── zero_to_asr_101.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 | 


--------------------------------------------------------------------------------
/RecurrentGemma_colab.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "provenance": [],
   7 |       "gpuType": "T4",
   8 |       "include_colab_link": true
   9 |     },
  10 |     "kernelspec": {
  11 |       "name": "python3",
  12 |       "display_name": "Python 3"
  13 |     },
  14 |     "language_info": {
  15 |       "name": "python"
  16 |     },
  17 |     "accelerator": "GPU",
  18 |     "widgets": {
  19 |       "application/vnd.jupyter.widget-state+json": {
  20 |         "e6f2b94e3bb345859811226dc345a6e3": {
  21 |           "model_module": "@jupyter-widgets/controls",
  22 |           "model_name": "HBoxModel",
  23 |           "model_module_version": "1.5.0",
  24 |           "state": {
  25 |             "_dom_classes": [],
  26 |             "_model_module": "@jupyter-widgets/controls",
  27 |             "_model_module_version": "1.5.0",
  28 |             "_model_name": "HBoxModel",
  29 |             "_view_count": null,
  30 |             "_view_module": "@jupyter-widgets/controls",
  31 |             "_view_module_version": "1.5.0",
  32 |             "_view_name": "HBoxView",
  33 |             "box_style": "",
  34 |             "children": [
  35 |               "IPY_MODEL_ce12a18f666740eabe1f71be5152e7d8",
  36 |               "IPY_MODEL_76a4282443194092af1039de43c523c6",
  37 |               "IPY_MODEL_3775878f2e484e84b6cb0971d2d35a7f"
  38 |             ],
  39 |             "layout": "IPY_MODEL_df936531829f4c1580c04719de77059c"
  40 |           }
  41 |         },
  42 |         "ce12a18f666740eabe1f71be5152e7d8": {
  43 |           "model_module": "@jupyter-widgets/controls",
  44 |           "model_name": "HTMLModel",
  45 |           "model_module_version": "1.5.0",
  46 |           "state": {
  47 |             "_dom_classes": [],
  48 |             "_model_module": "@jupyter-widgets/controls",
  49 |             "_model_module_version": "1.5.0",
  50 |             "_model_name": "HTMLModel",
  51 |             "_view_count": null,
  52 |             "_view_module": "@jupyter-widgets/controls",
  53 |             "_view_module_version": "1.5.0",
  54 |             "_view_name": "HTMLView",
  55 |             "description": "",
  56 |             "description_tooltip": null,
  57 |             "layout": "IPY_MODEL_ab60c88077984531aa08150c72fa7ab5",
  58 |             "placeholder": "​",
  59 |             "style": "IPY_MODEL_ad9901a17d3d4b1b898941d16c16f1cb",
  60 |             "value": "tokenizer_config.json: 100%"
  61 |           }
  62 |         },
  63 |         "76a4282443194092af1039de43c523c6": {
  64 |           "model_module": "@jupyter-widgets/controls",
  65 |           "model_name": "FloatProgressModel",
  66 |           "model_module_version": "1.5.0",
  67 |           "state": {
  68 |             "_dom_classes": [],
  69 |             "_model_module": "@jupyter-widgets/controls",
  70 |             "_model_module_version": "1.5.0",
  71 |             "_model_name": "FloatProgressModel",
  72 |             "_view_count": null,
  73 |             "_view_module": "@jupyter-widgets/controls",
  74 |             "_view_module_version": "1.5.0",
  75 |             "_view_name": "ProgressView",
  76 |             "bar_style": "success",
  77 |             "description": "",
  78 |             "description_tooltip": null,
  79 |             "layout": "IPY_MODEL_f2fbf227d64e451681083a0ca189405c",
  80 |             "max": 40529,
  81 |             "min": 0,
  82 |             "orientation": "horizontal",
  83 |             "style": "IPY_MODEL_72364dfb4d994115bfd049dae5f53423",
  84 |             "value": 40529
  85 |           }
  86 |         },
  87 |         "3775878f2e484e84b6cb0971d2d35a7f": {
  88 |           "model_module": "@jupyter-widgets/controls",
  89 |           "model_name": "HTMLModel",
  90 |           "model_module_version": "1.5.0",
  91 |           "state": {
  92 |             "_dom_classes": [],
  93 |             "_model_module": "@jupyter-widgets/controls",
  94 |             "_model_module_version": "1.5.0",
  95 |             "_model_name": "HTMLModel",
  96 |             "_view_count": null,
  97 |             "_view_module": "@jupyter-widgets/controls",
  98 |             "_view_module_version": "1.5.0",
  99 |             "_view_name": "HTMLView",
 100 |             "description": "",
 101 |             "description_tooltip": null,
 102 |             "layout": "IPY_MODEL_fcbe52122eeb466d91b44011f7f9bc47",
 103 |             "placeholder": "​",
 104 |             "style": "IPY_MODEL_c0915cfb93634ede86568eface4115d8",
 105 |             "value": " 40.5k/40.5k [00:00&lt;00:00, 670kB/s]"
 106 |           }
 107 |         },
 108 |         "df936531829f4c1580c04719de77059c": {
 109 |           "model_module": "@jupyter-widgets/base",
 110 |           "model_name": "LayoutModel",
 111 |           "model_module_version": "1.2.0",
 112 |           "state": {
 113 |             "_model_module": "@jupyter-widgets/base",
 114 |             "_model_module_version": "1.2.0",
 115 |             "_model_name": "LayoutModel",
 116 |             "_view_count": null,
 117 |             "_view_module": "@jupyter-widgets/base",
 118 |             "_view_module_version": "1.2.0",
 119 |             "_view_name": "LayoutView",
 120 |             "align_content": null,
 121 |             "align_items": null,
 122 |             "align_self": null,
 123 |             "border": null,
 124 |             "bottom": null,
 125 |             "display": null,
 126 |             "flex": null,
 127 |             "flex_flow": null,
 128 |             "grid_area": null,
 129 |             "grid_auto_columns": null,
 130 |             "grid_auto_flow": null,
 131 |             "grid_auto_rows": null,
 132 |             "grid_column": null,
 133 |             "grid_gap": null,
 134 |             "grid_row": null,
 135 |             "grid_template_areas": null,
 136 |             "grid_template_columns": null,
 137 |             "grid_template_rows": null,
 138 |             "height": null,
 139 |             "justify_content": null,
 140 |             "justify_items": null,
 141 |             "left": null,
 142 |             "margin": null,
 143 |             "max_height": null,
 144 |             "max_width": null,
 145 |             "min_height": null,
 146 |             "min_width": null,
 147 |             "object_fit": null,
 148 |             "object_position": null,
 149 |             "order": null,
 150 |             "overflow": null,
 151 |             "overflow_x": null,
 152 |             "overflow_y": null,
 153 |             "padding": null,
 154 |             "right": null,
 155 |             "top": null,
 156 |             "visibility": null,
 157 |             "width": null
 158 |           }
 159 |         },
 160 |         "ab60c88077984531aa08150c72fa7ab5": {
 161 |           "model_module": "@jupyter-widgets/base",
 162 |           "model_name": "LayoutModel",
 163 |           "model_module_version": "1.2.0",
 164 |           "state": {
 165 |             "_model_module": "@jupyter-widgets/base",
 166 |             "_model_module_version": "1.2.0",
 167 |             "_model_name": "LayoutModel",
 168 |             "_view_count": null,
 169 |             "_view_module": "@jupyter-widgets/base",
 170 |             "_view_module_version": "1.2.0",
 171 |             "_view_name": "LayoutView",
 172 |             "align_content": null,
 173 |             "align_items": null,
 174 |             "align_self": null,
 175 |             "border": null,
 176 |             "bottom": null,
 177 |             "display": null,
 178 |             "flex": null,
 179 |             "flex_flow": null,
 180 |             "grid_area": null,
 181 |             "grid_auto_columns": null,
 182 |             "grid_auto_flow": null,
 183 |             "grid_auto_rows": null,
 184 |             "grid_column": null,
 185 |             "grid_gap": null,
 186 |             "grid_row": null,
 187 |             "grid_template_areas": null,
 188 |             "grid_template_columns": null,
 189 |             "grid_template_rows": null,
 190 |             "height": null,
 191 |             "justify_content": null,
 192 |             "justify_items": null,
 193 |             "left": null,
 194 |             "margin": null,
 195 |             "max_height": null,
 196 |             "max_width": null,
 197 |             "min_height": null,
 198 |             "min_width": null,
 199 |             "object_fit": null,
 200 |             "object_position": null,
 201 |             "order": null,
 202 |             "overflow": null,
 203 |             "overflow_x": null,
 204 |             "overflow_y": null,
 205 |             "padding": null,
 206 |             "right": null,
 207 |             "top": null,
 208 |             "visibility": null,
 209 |             "width": null
 210 |           }
 211 |         },
 212 |         "ad9901a17d3d4b1b898941d16c16f1cb": {
 213 |           "model_module": "@jupyter-widgets/controls",
 214 |           "model_name": "DescriptionStyleModel",
 215 |           "model_module_version": "1.5.0",
 216 |           "state": {
 217 |             "_model_module": "@jupyter-widgets/controls",
 218 |             "_model_module_version": "1.5.0",
 219 |             "_model_name": "DescriptionStyleModel",
 220 |             "_view_count": null,
 221 |             "_view_module": "@jupyter-widgets/base",
 222 |             "_view_module_version": "1.2.0",
 223 |             "_view_name": "StyleView",
 224 |             "description_width": ""
 225 |           }
 226 |         },
 227 |         "f2fbf227d64e451681083a0ca189405c": {
 228 |           "model_module": "@jupyter-widgets/base",
 229 |           "model_name": "LayoutModel",
 230 |           "model_module_version": "1.2.0",
 231 |           "state": {
 232 |             "_model_module": "@jupyter-widgets/base",
 233 |             "_model_module_version": "1.2.0",
 234 |             "_model_name": "LayoutModel",
 235 |             "_view_count": null,
 236 |             "_view_module": "@jupyter-widgets/base",
 237 |             "_view_module_version": "1.2.0",
 238 |             "_view_name": "LayoutView",
 239 |             "align_content": null,
 240 |             "align_items": null,
 241 |             "align_self": null,
 242 |             "border": null,
 243 |             "bottom": null,
 244 |             "display": null,
 245 |             "flex": null,
 246 |             "flex_flow": null,
 247 |             "grid_area": null,
 248 |             "grid_auto_columns": null,
 249 |             "grid_auto_flow": null,
 250 |             "grid_auto_rows": null,
 251 |             "grid_column": null,
 252 |             "grid_gap": null,
 253 |             "grid_row": null,
 254 |             "grid_template_areas": null,
 255 |             "grid_template_columns": null,
 256 |             "grid_template_rows": null,
 257 |             "height": null,
 258 |             "justify_content": null,
 259 |             "justify_items": null,
 260 |             "left": null,
 261 |             "margin": null,
 262 |             "max_height": null,
 263 |             "max_width": null,
 264 |             "min_height": null,
 265 |             "min_width": null,
 266 |             "object_fit": null,
 267 |             "object_position": null,
 268 |             "order": null,
 269 |             "overflow": null,
 270 |             "overflow_x": null,
 271 |             "overflow_y": null,
 272 |             "padding": null,
 273 |             "right": null,
 274 |             "top": null,
 275 |             "visibility": null,
 276 |             "width": null
 277 |           }
 278 |         },
 279 |         "72364dfb4d994115bfd049dae5f53423": {
 280 |           "model_module": "@jupyter-widgets/controls",
 281 |           "model_name": "ProgressStyleModel",
 282 |           "model_module_version": "1.5.0",
 283 |           "state": {
 284 |             "_model_module": "@jupyter-widgets/controls",
 285 |             "_model_module_version": "1.5.0",
 286 |             "_model_name": "ProgressStyleModel",
 287 |             "_view_count": null,
 288 |             "_view_module": "@jupyter-widgets/base",
 289 |             "_view_module_version": "1.2.0",
 290 |             "_view_name": "StyleView",
 291 |             "bar_color": null,
 292 |             "description_width": ""
 293 |           }
 294 |         },
 295 |         "fcbe52122eeb466d91b44011f7f9bc47": {
 296 |           "model_module": "@jupyter-widgets/base",
 297 |           "model_name": "LayoutModel",
 298 |           "model_module_version": "1.2.0",
 299 |           "state": {
 300 |             "_model_module": "@jupyter-widgets/base",
 301 |             "_model_module_version": "1.2.0",
 302 |             "_model_name": "LayoutModel",
 303 |             "_view_count": null,
 304 |             "_view_module": "@jupyter-widgets/base",
 305 |             "_view_module_version": "1.2.0",
 306 |             "_view_name": "LayoutView",
 307 |             "align_content": null,
 308 |             "align_items": null,
 309 |             "align_self": null,
 310 |             "border": null,
 311 |             "bottom": null,
 312 |             "display": null,
 313 |             "flex": null,
 314 |             "flex_flow": null,
 315 |             "grid_area": null,
 316 |             "grid_auto_columns": null,
 317 |             "grid_auto_flow": null,
 318 |             "grid_auto_rows": null,
 319 |             "grid_column": null,
 320 |             "grid_gap": null,
 321 |             "grid_row": null,
 322 |             "grid_template_areas": null,
 323 |             "grid_template_columns": null,
 324 |             "grid_template_rows": null,
 325 |             "height": null,
 326 |             "justify_content": null,
 327 |             "justify_items": null,
 328 |             "left": null,
 329 |             "margin": null,
 330 |             "max_height": null,
 331 |             "max_width": null,
 332 |             "min_height": null,
 333 |             "min_width": null,
 334 |             "object_fit": null,
 335 |             "object_position": null,
 336 |             "order": null,
 337 |             "overflow": null,
 338 |             "overflow_x": null,
 339 |             "overflow_y": null,
 340 |             "padding": null,
 341 |             "right": null,
 342 |             "top": null,
 343 |             "visibility": null,
 344 |             "width": null
 345 |           }
 346 |         },
 347 |         "c0915cfb93634ede86568eface4115d8": {
 348 |           "model_module": "@jupyter-widgets/controls",
 349 |           "model_name": "DescriptionStyleModel",
 350 |           "model_module_version": "1.5.0",
 351 |           "state": {
 352 |             "_model_module": "@jupyter-widgets/controls",
 353 |             "_model_module_version": "1.5.0",
 354 |             "_model_name": "DescriptionStyleModel",
 355 |             "_view_count": null,
 356 |             "_view_module": "@jupyter-widgets/base",
 357 |             "_view_module_version": "1.2.0",
 358 |             "_view_name": "StyleView",
 359 |             "description_width": ""
 360 |           }
 361 |         },
 362 |         "401cecca00fb42b29f1ec3fd5cfa4396": {
 363 |           "model_module": "@jupyter-widgets/controls",
 364 |           "model_name": "HBoxModel",
 365 |           "model_module_version": "1.5.0",
 366 |           "state": {
 367 |             "_dom_classes": [],
 368 |             "_model_module": "@jupyter-widgets/controls",
 369 |             "_model_module_version": "1.5.0",
 370 |             "_model_name": "HBoxModel",
 371 |             "_view_count": null,
 372 |             "_view_module": "@jupyter-widgets/controls",
 373 |             "_view_module_version": "1.5.0",
 374 |             "_view_name": "HBoxView",
 375 |             "box_style": "",
 376 |             "children": [
 377 |               "IPY_MODEL_e98c3904a7e346a5870c5ac768cd6a98",
 378 |               "IPY_MODEL_d908df5a5a8945dc88f8d0f147245bbd",
 379 |               "IPY_MODEL_f1a29b9608244a9db3cc919ad149ef48"
 380 |             ],
 381 |             "layout": "IPY_MODEL_d19c6905802d46c4beb6fe8886cb6e8c"
 382 |           }
 383 |         },
 384 |         "e98c3904a7e346a5870c5ac768cd6a98": {
 385 |           "model_module": "@jupyter-widgets/controls",
 386 |           "model_name": "HTMLModel",
 387 |           "model_module_version": "1.5.0",
 388 |           "state": {
 389 |             "_dom_classes": [],
 390 |             "_model_module": "@jupyter-widgets/controls",
 391 |             "_model_module_version": "1.5.0",
 392 |             "_model_name": "HTMLModel",
 393 |             "_view_count": null,
 394 |             "_view_module": "@jupyter-widgets/controls",
 395 |             "_view_module_version": "1.5.0",
 396 |             "_view_name": "HTMLView",
 397 |             "description": "",
 398 |             "description_tooltip": null,
 399 |             "layout": "IPY_MODEL_f7d7bc20d2ba40eeb576e8865cdbb8ec",
 400 |             "placeholder": "​",
 401 |             "style": "IPY_MODEL_1a431c4a814941169c8feff1b4741052",
 402 |             "value": "Downloading shards: 100%"
 403 |           }
 404 |         },
 405 |         "d908df5a5a8945dc88f8d0f147245bbd": {
 406 |           "model_module": "@jupyter-widgets/controls",
 407 |           "model_name": "FloatProgressModel",
 408 |           "model_module_version": "1.5.0",
 409 |           "state": {
 410 |             "_dom_classes": [],
 411 |             "_model_module": "@jupyter-widgets/controls",
 412 |             "_model_module_version": "1.5.0",
 413 |             "_model_name": "FloatProgressModel",
 414 |             "_view_count": null,
 415 |             "_view_module": "@jupyter-widgets/controls",
 416 |             "_view_module_version": "1.5.0",
 417 |             "_view_name": "ProgressView",
 418 |             "bar_style": "success",
 419 |             "description": "",
 420 |             "description_tooltip": null,
 421 |             "layout": "IPY_MODEL_21a66f11f21e4913a1a5a975727916f0",
 422 |             "max": 2,
 423 |             "min": 0,
 424 |             "orientation": "horizontal",
 425 |             "style": "IPY_MODEL_b931a5080c154b7dbbdcebf1a48aa9a3",
 426 |             "value": 2
 427 |           }
 428 |         },
 429 |         "f1a29b9608244a9db3cc919ad149ef48": {
 430 |           "model_module": "@jupyter-widgets/controls",
 431 |           "model_name": "HTMLModel",
 432 |           "model_module_version": "1.5.0",
 433 |           "state": {
 434 |             "_dom_classes": [],
 435 |             "_model_module": "@jupyter-widgets/controls",
 436 |             "_model_module_version": "1.5.0",
 437 |             "_model_name": "HTMLModel",
 438 |             "_view_count": null,
 439 |             "_view_module": "@jupyter-widgets/controls",
 440 |             "_view_module_version": "1.5.0",
 441 |             "_view_name": "HTMLView",
 442 |             "description": "",
 443 |             "description_tooltip": null,
 444 |             "layout": "IPY_MODEL_6cc6f7e129fc47c9ac57d38f713c50ea",
 445 |             "placeholder": "​",
 446 |             "style": "IPY_MODEL_896598f7441c4e84b4c7963b520d6daf",
 447 |             "value": " 2/2 [00:00&lt;00:00,  7.70it/s]"
 448 |           }
 449 |         },
 450 |         "d19c6905802d46c4beb6fe8886cb6e8c": {
 451 |           "model_module": "@jupyter-widgets/base",
 452 |           "model_name": "LayoutModel",
 453 |           "model_module_version": "1.2.0",
 454 |           "state": {
 455 |             "_model_module": "@jupyter-widgets/base",
 456 |             "_model_module_version": "1.2.0",
 457 |             "_model_name": "LayoutModel",
 458 |             "_view_count": null,
 459 |             "_view_module": "@jupyter-widgets/base",
 460 |             "_view_module_version": "1.2.0",
 461 |             "_view_name": "LayoutView",
 462 |             "align_content": null,
 463 |             "align_items": null,
 464 |             "align_self": null,
 465 |             "border": null,
 466 |             "bottom": null,
 467 |             "display": null,
 468 |             "flex": null,
 469 |             "flex_flow": null,
 470 |             "grid_area": null,
 471 |             "grid_auto_columns": null,
 472 |             "grid_auto_flow": null,
 473 |             "grid_auto_rows": null,
 474 |             "grid_column": null,
 475 |             "grid_gap": null,
 476 |             "grid_row": null,
 477 |             "grid_template_areas": null,
 478 |             "grid_template_columns": null,
 479 |             "grid_template_rows": null,
 480 |             "height": null,
 481 |             "justify_content": null,
 482 |             "justify_items": null,
 483 |             "left": null,
 484 |             "margin": null,
 485 |             "max_height": null,
 486 |             "max_width": null,
 487 |             "min_height": null,
 488 |             "min_width": null,
 489 |             "object_fit": null,
 490 |             "object_position": null,
 491 |             "order": null,
 492 |             "overflow": null,
 493 |             "overflow_x": null,
 494 |             "overflow_y": null,
 495 |             "padding": null,
 496 |             "right": null,
 497 |             "top": null,
 498 |             "visibility": null,
 499 |             "width": null
 500 |           }
 501 |         },
 502 |         "f7d7bc20d2ba40eeb576e8865cdbb8ec": {
 503 |           "model_module": "@jupyter-widgets/base",
 504 |           "model_name": "LayoutModel",
 505 |           "model_module_version": "1.2.0",
 506 |           "state": {
 507 |             "_model_module": "@jupyter-widgets/base",
 508 |             "_model_module_version": "1.2.0",
 509 |             "_model_name": "LayoutModel",
 510 |             "_view_count": null,
 511 |             "_view_module": "@jupyter-widgets/base",
 512 |             "_view_module_version": "1.2.0",
 513 |             "_view_name": "LayoutView",
 514 |             "align_content": null,
 515 |             "align_items": null,
 516 |             "align_self": null,
 517 |             "border": null,
 518 |             "bottom": null,
 519 |             "display": null,
 520 |             "flex": null,
 521 |             "flex_flow": null,
 522 |             "grid_area": null,
 523 |             "grid_auto_columns": null,
 524 |             "grid_auto_flow": null,
 525 |             "grid_auto_rows": null,
 526 |             "grid_column": null,
 527 |             "grid_gap": null,
 528 |             "grid_row": null,
 529 |             "grid_template_areas": null,
 530 |             "grid_template_columns": null,
 531 |             "grid_template_rows": null,
 532 |             "height": null,
 533 |             "justify_content": null,
 534 |             "justify_items": null,
 535 |             "left": null,
 536 |             "margin": null,
 537 |             "max_height": null,
 538 |             "max_width": null,
 539 |             "min_height": null,
 540 |             "min_width": null,
 541 |             "object_fit": null,
 542 |             "object_position": null,
 543 |             "order": null,
 544 |             "overflow": null,
 545 |             "overflow_x": null,
 546 |             "overflow_y": null,
 547 |             "padding": null,
 548 |             "right": null,
 549 |             "top": null,
 550 |             "visibility": null,
 551 |             "width": null
 552 |           }
 553 |         },
 554 |         "1a431c4a814941169c8feff1b4741052": {
 555 |           "model_module": "@jupyter-widgets/controls",
 556 |           "model_name": "DescriptionStyleModel",
 557 |           "model_module_version": "1.5.0",
 558 |           "state": {
 559 |             "_model_module": "@jupyter-widgets/controls",
 560 |             "_model_module_version": "1.5.0",
 561 |             "_model_name": "DescriptionStyleModel",
 562 |             "_view_count": null,
 563 |             "_view_module": "@jupyter-widgets/base",
 564 |             "_view_module_version": "1.2.0",
 565 |             "_view_name": "StyleView",
 566 |             "description_width": ""
 567 |           }
 568 |         },
 569 |         "21a66f11f21e4913a1a5a975727916f0": {
 570 |           "model_module": "@jupyter-widgets/base",
 571 |           "model_name": "LayoutModel",
 572 |           "model_module_version": "1.2.0",
 573 |           "state": {
 574 |             "_model_module": "@jupyter-widgets/base",
 575 |             "_model_module_version": "1.2.0",
 576 |             "_model_name": "LayoutModel",
 577 |             "_view_count": null,
 578 |             "_view_module": "@jupyter-widgets/base",
 579 |             "_view_module_version": "1.2.0",
 580 |             "_view_name": "LayoutView",
 581 |             "align_content": null,
 582 |             "align_items": null,
 583 |             "align_self": null,
 584 |             "border": null,
 585 |             "bottom": null,
 586 |             "display": null,
 587 |             "flex": null,
 588 |             "flex_flow": null,
 589 |             "grid_area": null,
 590 |             "grid_auto_columns": null,
 591 |             "grid_auto_flow": null,
 592 |             "grid_auto_rows": null,
 593 |             "grid_column": null,
 594 |             "grid_gap": null,
 595 |             "grid_row": null,
 596 |             "grid_template_areas": null,
 597 |             "grid_template_columns": null,
 598 |             "grid_template_rows": null,
 599 |             "height": null,
 600 |             "justify_content": null,
 601 |             "justify_items": null,
 602 |             "left": null,
 603 |             "margin": null,
 604 |             "max_height": null,
 605 |             "max_width": null,
 606 |             "min_height": null,
 607 |             "min_width": null,
 608 |             "object_fit": null,
 609 |             "object_position": null,
 610 |             "order": null,
 611 |             "overflow": null,
 612 |             "overflow_x": null,
 613 |             "overflow_y": null,
 614 |             "padding": null,
 615 |             "right": null,
 616 |             "top": null,
 617 |             "visibility": null,
 618 |             "width": null
 619 |           }
 620 |         },
 621 |         "b931a5080c154b7dbbdcebf1a48aa9a3": {
 622 |           "model_module": "@jupyter-widgets/controls",
 623 |           "model_name": "ProgressStyleModel",
 624 |           "model_module_version": "1.5.0",
 625 |           "state": {
 626 |             "_model_module": "@jupyter-widgets/controls",
 627 |             "_model_module_version": "1.5.0",
 628 |             "_model_name": "ProgressStyleModel",
 629 |             "_view_count": null,
 630 |             "_view_module": "@jupyter-widgets/base",
 631 |             "_view_module_version": "1.2.0",
 632 |             "_view_name": "StyleView",
 633 |             "bar_color": null,
 634 |             "description_width": ""
 635 |           }
 636 |         },
 637 |         "6cc6f7e129fc47c9ac57d38f713c50ea": {
 638 |           "model_module": "@jupyter-widgets/base",
 639 |           "model_name": "LayoutModel",
 640 |           "model_module_version": "1.2.0",
 641 |           "state": {
 642 |             "_model_module": "@jupyter-widgets/base",
 643 |             "_model_module_version": "1.2.0",
 644 |             "_model_name": "LayoutModel",
 645 |             "_view_count": null,
 646 |             "_view_module": "@jupyter-widgets/base",
 647 |             "_view_module_version": "1.2.0",
 648 |             "_view_name": "LayoutView",
 649 |             "align_content": null,
 650 |             "align_items": null,
 651 |             "align_self": null,
 652 |             "border": null,
 653 |             "bottom": null,
 654 |             "display": null,
 655 |             "flex": null,
 656 |             "flex_flow": null,
 657 |             "grid_area": null,
 658 |             "grid_auto_columns": null,
 659 |             "grid_auto_flow": null,
 660 |             "grid_auto_rows": null,
 661 |             "grid_column": null,
 662 |             "grid_gap": null,
 663 |             "grid_row": null,
 664 |             "grid_template_areas": null,
 665 |             "grid_template_columns": null,
 666 |             "grid_template_rows": null,
 667 |             "height": null,
 668 |             "justify_content": null,
 669 |             "justify_items": null,
 670 |             "left": null,
 671 |             "margin": null,
 672 |             "max_height": null,
 673 |             "max_width": null,
 674 |             "min_height": null,
 675 |             "min_width": null,
 676 |             "object_fit": null,
 677 |             "object_position": null,
 678 |             "order": null,
 679 |             "overflow": null,
 680 |             "overflow_x": null,
 681 |             "overflow_y": null,
 682 |             "padding": null,
 683 |             "right": null,
 684 |             "top": null,
 685 |             "visibility": null,
 686 |             "width": null
 687 |           }
 688 |         },
 689 |         "896598f7441c4e84b4c7963b520d6daf": {
 690 |           "model_module": "@jupyter-widgets/controls",
 691 |           "model_name": "DescriptionStyleModel",
 692 |           "model_module_version": "1.5.0",
 693 |           "state": {
 694 |             "_model_module": "@jupyter-widgets/controls",
 695 |             "_model_module_version": "1.5.0",
 696 |             "_model_name": "DescriptionStyleModel",
 697 |             "_view_count": null,
 698 |             "_view_module": "@jupyter-widgets/base",
 699 |             "_view_module_version": "1.2.0",
 700 |             "_view_name": "StyleView",
 701 |             "description_width": ""
 702 |           }
 703 |         },
 704 |         "b391b63b204848009b051b9c9a5062a3": {
 705 |           "model_module": "@jupyter-widgets/controls",
 706 |           "model_name": "HBoxModel",
 707 |           "model_module_version": "1.5.0",
 708 |           "state": {
 709 |             "_dom_classes": [],
 710 |             "_model_module": "@jupyter-widgets/controls",
 711 |             "_model_module_version": "1.5.0",
 712 |             "_model_name": "HBoxModel",
 713 |             "_view_count": null,
 714 |             "_view_module": "@jupyter-widgets/controls",
 715 |             "_view_module_version": "1.5.0",
 716 |             "_view_name": "HBoxView",
 717 |             "box_style": "",
 718 |             "children": [
 719 |               "IPY_MODEL_0eee8063610d46139d7576ef02ddc228",
 720 |               "IPY_MODEL_5d3bd17d0aa44d84a91d3ac8255dc296",
 721 |               "IPY_MODEL_84981495b59f46009bde2cdbec478a5f"
 722 |             ],
 723 |             "layout": "IPY_MODEL_f51d9c821e3b4f558ad76706f99d76a6"
 724 |           }
 725 |         },
 726 |         "0eee8063610d46139d7576ef02ddc228": {
 727 |           "model_module": "@jupyter-widgets/controls",
 728 |           "model_name": "HTMLModel",
 729 |           "model_module_version": "1.5.0",
 730 |           "state": {
 731 |             "_dom_classes": [],
 732 |             "_model_module": "@jupyter-widgets/controls",
 733 |             "_model_module_version": "1.5.0",
 734 |             "_model_name": "HTMLModel",
 735 |             "_view_count": null,
 736 |             "_view_module": "@jupyter-widgets/controls",
 737 |             "_view_module_version": "1.5.0",
 738 |             "_view_name": "HTMLView",
 739 |             "description": "",
 740 |             "description_tooltip": null,
 741 |             "layout": "IPY_MODEL_07e7af89197e489b877e309189e6ea53",
 742 |             "placeholder": "​",
 743 |             "style": "IPY_MODEL_66e1d6e6e45146a7a56d3e935f56ad51",
 744 |             "value": "Loading checkpoint shards: 100%"
 745 |           }
 746 |         },
 747 |         "5d3bd17d0aa44d84a91d3ac8255dc296": {
 748 |           "model_module": "@jupyter-widgets/controls",
 749 |           "model_name": "FloatProgressModel",
 750 |           "model_module_version": "1.5.0",
 751 |           "state": {
 752 |             "_dom_classes": [],
 753 |             "_model_module": "@jupyter-widgets/controls",
 754 |             "_model_module_version": "1.5.0",
 755 |             "_model_name": "FloatProgressModel",
 756 |             "_view_count": null,
 757 |             "_view_module": "@jupyter-widgets/controls",
 758 |             "_view_module_version": "1.5.0",
 759 |             "_view_name": "ProgressView",
 760 |             "bar_style": "success",
 761 |             "description": "",
 762 |             "description_tooltip": null,
 763 |             "layout": "IPY_MODEL_85b6c5d6fdc745d4a533004de3c97408",
 764 |             "max": 2,
 765 |             "min": 0,
 766 |             "orientation": "horizontal",
 767 |             "style": "IPY_MODEL_9f5ddcf6583246af9ec1ebe7f23446d6",
 768 |             "value": 2
 769 |           }
 770 |         },
 771 |         "84981495b59f46009bde2cdbec478a5f": {
 772 |           "model_module": "@jupyter-widgets/controls",
 773 |           "model_name": "HTMLModel",
 774 |           "model_module_version": "1.5.0",
 775 |           "state": {
 776 |             "_dom_classes": [],
 777 |             "_model_module": "@jupyter-widgets/controls",
 778 |             "_model_module_version": "1.5.0",
 779 |             "_model_name": "HTMLModel",
 780 |             "_view_count": null,
 781 |             "_view_module": "@jupyter-widgets/controls",
 782 |             "_view_module_version": "1.5.0",
 783 |             "_view_name": "HTMLView",
 784 |             "description": "",
 785 |             "description_tooltip": null,
 786 |             "layout": "IPY_MODEL_58b61a060918476c82be882ed6d5cc10",
 787 |             "placeholder": "​",
 788 |             "style": "IPY_MODEL_7a2fce54921c4062a739fb690387f156",
 789 |             "value": " 2/2 [00:19&lt;00:00,  8.21s/it]"
 790 |           }
 791 |         },
 792 |         "f51d9c821e3b4f558ad76706f99d76a6": {
 793 |           "model_module": "@jupyter-widgets/base",
 794 |           "model_name": "LayoutModel",
 795 |           "model_module_version": "1.2.0",
 796 |           "state": {
 797 |             "_model_module": "@jupyter-widgets/base",
 798 |             "_model_module_version": "1.2.0",
 799 |             "_model_name": "LayoutModel",
 800 |             "_view_count": null,
 801 |             "_view_module": "@jupyter-widgets/base",
 802 |             "_view_module_version": "1.2.0",
 803 |             "_view_name": "LayoutView",
 804 |             "align_content": null,
 805 |             "align_items": null,
 806 |             "align_self": null,
 807 |             "border": null,
 808 |             "bottom": null,
 809 |             "display": null,
 810 |             "flex": null,
 811 |             "flex_flow": null,
 812 |             "grid_area": null,
 813 |             "grid_auto_columns": null,
 814 |             "grid_auto_flow": null,
 815 |             "grid_auto_rows": null,
 816 |             "grid_column": null,
 817 |             "grid_gap": null,
 818 |             "grid_row": null,
 819 |             "grid_template_areas": null,
 820 |             "grid_template_columns": null,
 821 |             "grid_template_rows": null,
 822 |             "height": null,
 823 |             "justify_content": null,
 824 |             "justify_items": null,
 825 |             "left": null,
 826 |             "margin": null,
 827 |             "max_height": null,
 828 |             "max_width": null,
 829 |             "min_height": null,
 830 |             "min_width": null,
 831 |             "object_fit": null,
 832 |             "object_position": null,
 833 |             "order": null,
 834 |             "overflow": null,
 835 |             "overflow_x": null,
 836 |             "overflow_y": null,
 837 |             "padding": null,
 838 |             "right": null,
 839 |             "top": null,
 840 |             "visibility": null,
 841 |             "width": null
 842 |           }
 843 |         },
 844 |         "07e7af89197e489b877e309189e6ea53": {
 845 |           "model_module": "@jupyter-widgets/base",
 846 |           "model_name": "LayoutModel",
 847 |           "model_module_version": "1.2.0",
 848 |           "state": {
 849 |             "_model_module": "@jupyter-widgets/base",
 850 |             "_model_module_version": "1.2.0",
 851 |             "_model_name": "LayoutModel",
 852 |             "_view_count": null,
 853 |             "_view_module": "@jupyter-widgets/base",
 854 |             "_view_module_version": "1.2.0",
 855 |             "_view_name": "LayoutView",
 856 |             "align_content": null,
 857 |             "align_items": null,
 858 |             "align_self": null,
 859 |             "border": null,
 860 |             "bottom": null,
 861 |             "display": null,
 862 |             "flex": null,
 863 |             "flex_flow": null,
 864 |             "grid_area": null,
 865 |             "grid_auto_columns": null,
 866 |             "grid_auto_flow": null,
 867 |             "grid_auto_rows": null,
 868 |             "grid_column": null,
 869 |             "grid_gap": null,
 870 |             "grid_row": null,
 871 |             "grid_template_areas": null,
 872 |             "grid_template_columns": null,
 873 |             "grid_template_rows": null,
 874 |             "height": null,
 875 |             "justify_content": null,
 876 |             "justify_items": null,
 877 |             "left": null,
 878 |             "margin": null,
 879 |             "max_height": null,
 880 |             "max_width": null,
 881 |             "min_height": null,
 882 |             "min_width": null,
 883 |             "object_fit": null,
 884 |             "object_position": null,
 885 |             "order": null,
 886 |             "overflow": null,
 887 |             "overflow_x": null,
 888 |             "overflow_y": null,
 889 |             "padding": null,
 890 |             "right": null,
 891 |             "top": null,
 892 |             "visibility": null,
 893 |             "width": null
 894 |           }
 895 |         },
 896 |         "66e1d6e6e45146a7a56d3e935f56ad51": {
 897 |           "model_module": "@jupyter-widgets/controls",
 898 |           "model_name": "DescriptionStyleModel",
 899 |           "model_module_version": "1.5.0",
 900 |           "state": {
 901 |             "_model_module": "@jupyter-widgets/controls",
 902 |             "_model_module_version": "1.5.0",
 903 |             "_model_name": "DescriptionStyleModel",
 904 |             "_view_count": null,
 905 |             "_view_module": "@jupyter-widgets/base",
 906 |             "_view_module_version": "1.2.0",
 907 |             "_view_name": "StyleView",
 908 |             "description_width": ""
 909 |           }
 910 |         },
 911 |         "85b6c5d6fdc745d4a533004de3c97408": {
 912 |           "model_module": "@jupyter-widgets/base",
 913 |           "model_name": "LayoutModel",
 914 |           "model_module_version": "1.2.0",
 915 |           "state": {
 916 |             "_model_module": "@jupyter-widgets/base",
 917 |             "_model_module_version": "1.2.0",
 918 |             "_model_name": "LayoutModel",
 919 |             "_view_count": null,
 920 |             "_view_module": "@jupyter-widgets/base",
 921 |             "_view_module_version": "1.2.0",
 922 |             "_view_name": "LayoutView",
 923 |             "align_content": null,
 924 |             "align_items": null,
 925 |             "align_self": null,
 926 |             "border": null,
 927 |             "bottom": null,
 928 |             "display": null,
 929 |             "flex": null,
 930 |             "flex_flow": null,
 931 |             "grid_area": null,
 932 |             "grid_auto_columns": null,
 933 |             "grid_auto_flow": null,
 934 |             "grid_auto_rows": null,
 935 |             "grid_column": null,
 936 |             "grid_gap": null,
 937 |             "grid_row": null,
 938 |             "grid_template_areas": null,
 939 |             "grid_template_columns": null,
 940 |             "grid_template_rows": null,
 941 |             "height": null,
 942 |             "justify_content": null,
 943 |             "justify_items": null,
 944 |             "left": null,
 945 |             "margin": null,
 946 |             "max_height": null,
 947 |             "max_width": null,
 948 |             "min_height": null,
 949 |             "min_width": null,
 950 |             "object_fit": null,
 951 |             "object_position": null,
 952 |             "order": null,
 953 |             "overflow": null,
 954 |             "overflow_x": null,
 955 |             "overflow_y": null,
 956 |             "padding": null,
 957 |             "right": null,
 958 |             "top": null,
 959 |             "visibility": null,
 960 |             "width": null
 961 |           }
 962 |         },
 963 |         "9f5ddcf6583246af9ec1ebe7f23446d6": {
 964 |           "model_module": "@jupyter-widgets/controls",
 965 |           "model_name": "ProgressStyleModel",
 966 |           "model_module_version": "1.5.0",
 967 |           "state": {
 968 |             "_model_module": "@jupyter-widgets/controls",
 969 |             "_model_module_version": "1.5.0",
 970 |             "_model_name": "ProgressStyleModel",
 971 |             "_view_count": null,
 972 |             "_view_module": "@jupyter-widgets/base",
 973 |             "_view_module_version": "1.2.0",
 974 |             "_view_name": "StyleView",
 975 |             "bar_color": null,
 976 |             "description_width": ""
 977 |           }
 978 |         },
 979 |         "58b61a060918476c82be882ed6d5cc10": {
 980 |           "model_module": "@jupyter-widgets/base",
 981 |           "model_name": "LayoutModel",
 982 |           "model_module_version": "1.2.0",
 983 |           "state": {
 984 |             "_model_module": "@jupyter-widgets/base",
 985 |             "_model_module_version": "1.2.0",
 986 |             "_model_name": "LayoutModel",
 987 |             "_view_count": null,
 988 |             "_view_module": "@jupyter-widgets/base",
 989 |             "_view_module_version": "1.2.0",
 990 |             "_view_name": "LayoutView",
 991 |             "align_content": null,
 992 |             "align_items": null,
 993 |             "align_self": null,
 994 |             "border": null,
 995 |             "bottom": null,
 996 |             "display": null,
 997 |             "flex": null,
 998 |             "flex_flow": null,
 999 |             "grid_area": null,
1000 |             "grid_auto_columns": null,
1001 |             "grid_auto_flow": null,
1002 |             "grid_auto_rows": null,
1003 |             "grid_column": null,
1004 |             "grid_gap": null,
1005 |             "grid_row": null,
1006 |             "grid_template_areas": null,
1007 |             "grid_template_columns": null,
1008 |             "grid_template_rows": null,
1009 |             "height": null,
1010 |             "justify_content": null,
1011 |             "justify_items": null,
1012 |             "left": null,
1013 |             "margin": null,
1014 |             "max_height": null,
1015 |             "max_width": null,
1016 |             "min_height": null,
1017 |             "min_width": null,
1018 |             "object_fit": null,
1019 |             "object_position": null,
1020 |             "order": null,
1021 |             "overflow": null,
1022 |             "overflow_x": null,
1023 |             "overflow_y": null,
1024 |             "padding": null,
1025 |             "right": null,
1026 |             "top": null,
1027 |             "visibility": null,
1028 |             "width": null
1029 |           }
1030 |         },
1031 |         "7a2fce54921c4062a739fb690387f156": {
1032 |           "model_module": "@jupyter-widgets/controls",
1033 |           "model_name": "DescriptionStyleModel",
1034 |           "model_module_version": "1.5.0",
1035 |           "state": {
1036 |             "_model_module": "@jupyter-widgets/controls",
1037 |             "_model_module_version": "1.5.0",
1038 |             "_model_name": "DescriptionStyleModel",
1039 |             "_view_count": null,
1040 |             "_view_module": "@jupyter-widgets/base",
1041 |             "_view_module_version": "1.2.0",
1042 |             "_view_name": "StyleView",
1043 |             "description_width": ""
1044 |           }
1045 |         }
1046 |       }
1047 |     }
1048 |   },
1049 |   "cells": [
1050 |     {
1051 |       "cell_type": "markdown",
1052 |       "metadata": {
1053 |         "id": "view-in-github",
1054 |         "colab_type": "text"
1055 |       },
1056 |       "source": [
1057 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/RecurrentGemma_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
1058 |       ]
1059 |     },
1060 |     {
1061 |       "cell_type": "markdown",
1062 |       "source": [
1063 |         "# RecurrentGemma - 2B & 2B-it\n",
1064 |         "\n",
1065 |         "RecurrentGemma is a family of open language models built on a novel recurrent architecture developed at Google. Both pre-trained (2B) and instruction-tuned (2B-it) versions are available in English.\n",
1066 |         "\n",
1067 |         "Like Gemma, [RecurrentGemma](https://huggingface.co/google/recurrentgemma-2b-it) models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Because of its novel architecture, RecurrentGemma requires less memory than Gemma and achieves faster inference when generating long sequences."
1068 |       ],
1069 |       "metadata": {
1070 |         "id": "MVkIfH6Cg7Fx"
1071 |       }
1072 |     },
1073 |     {
1074 |       "cell_type": "code",
1075 |       "execution_count": null,
1076 |       "metadata": {
1077 |         "colab": {
1078 |           "base_uri": "https://localhost:8080/"
1079 |         },
1080 |         "id": "ahVaTC6rEIVI",
1081 |         "outputId": "2036392c-b381-4ca0-80ba-16ba8c87cde3"
1082 |       },
1083 |       "outputs": [
1084 |         {
1085 |           "output_type": "stream",
1086 |           "name": "stdout",
1087 |           "text": [
1088 |             "Collecting transformers==4.40.0.dev0\n",
1089 |             "  Downloading https://huggingface.co/datasets/reach-vb/random-wheels/resolve/main/transformers-4.40.0.dev0-py3-none-any.whl (8.8 MB)\n",
1090 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1091 |             "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n",
1092 |             "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n",
1093 |             "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n",
1094 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n",
1095 |             "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n",
1096 |             "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n",
1097 |             "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n",
1098 |             "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n",
1099 |             "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n",
1100 |             "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n",
1101 |             "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n",
1102 |             "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n",
1103 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n",
1104 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n",
1105 |             "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n",
1106 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n",
1107 |             "Installing collected packages: transformers\n",
1108 |             "  Attempting uninstall: transformers\n",
1109 |             "    Found existing installation: transformers 4.38.2\n",
1110 |             "    Uninstalling transformers-4.38.2:\n",
1111 |             "      Successfully uninstalled transformers-4.38.2\n",
1112 |             "Successfully installed transformers-4.40.0.dev0\n"
1113 |           ]
1114 |         }
1115 |       ],
1116 |       "source": [
1117 |         "!pip install git+https://github.com/huggingface/transformers.git"
1118 |       ]
1119 |     },
1120 |     {
1121 |       "cell_type": "markdown",
1122 |       "source": [
1123 |         "## Load the model checkpoints\n",
1124 |         "\n",
1125 |         "Make sure to accept the terms and conditions for the model before running the code further here: https://huggingface.co/google/recurrentgemma-2b-it.\n"
1126 |       ],
1127 |       "metadata": {
1128 |         "id": "FZK4T_zHhL9Q"
1129 |       }
1130 |     },
1131 |     {
1132 |       "cell_type": "code",
1133 |       "source": [
1134 |         "import torch\n",
1135 |         "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
1136 |         "\n",
1137 |         "tokenizer = AutoTokenizer.from_pretrained(\"google/recurrentgemma-2b-it\")\n",
1138 |         "model = AutoModelForCausalLM.from_pretrained(\"google/recurrentgemma-2b-it\", torch_dtype=torch.float16).to(\"cuda:0\")"
1139 |       ],
1140 |       "metadata": {
1141 |         "colab": {
1142 |           "base_uri": "https://localhost:8080/",
1143 |           "height": 129,
1144 |           "referenced_widgets": [
1145 |             "e6f2b94e3bb345859811226dc345a6e3",
1146 |             "ce12a18f666740eabe1f71be5152e7d8",
1147 |             "76a4282443194092af1039de43c523c6",
1148 |             "3775878f2e484e84b6cb0971d2d35a7f",
1149 |             "df936531829f4c1580c04719de77059c",
1150 |             "ab60c88077984531aa08150c72fa7ab5",
1151 |             "ad9901a17d3d4b1b898941d16c16f1cb",
1152 |             "f2fbf227d64e451681083a0ca189405c",
1153 |             "72364dfb4d994115bfd049dae5f53423",
1154 |             "fcbe52122eeb466d91b44011f7f9bc47",
1155 |             "c0915cfb93634ede86568eface4115d8",
1156 |             "401cecca00fb42b29f1ec3fd5cfa4396",
1157 |             "e98c3904a7e346a5870c5ac768cd6a98",
1158 |             "d908df5a5a8945dc88f8d0f147245bbd",
1159 |             "f1a29b9608244a9db3cc919ad149ef48",
1160 |             "d19c6905802d46c4beb6fe8886cb6e8c",
1161 |             "f7d7bc20d2ba40eeb576e8865cdbb8ec",
1162 |             "1a431c4a814941169c8feff1b4741052",
1163 |             "21a66f11f21e4913a1a5a975727916f0",
1164 |             "b931a5080c154b7dbbdcebf1a48aa9a3",
1165 |             "6cc6f7e129fc47c9ac57d38f713c50ea",
1166 |             "896598f7441c4e84b4c7963b520d6daf",
1167 |             "b391b63b204848009b051b9c9a5062a3",
1168 |             "0eee8063610d46139d7576ef02ddc228",
1169 |             "5d3bd17d0aa44d84a91d3ac8255dc296",
1170 |             "84981495b59f46009bde2cdbec478a5f",
1171 |             "f51d9c821e3b4f558ad76706f99d76a6",
1172 |             "07e7af89197e489b877e309189e6ea53",
1173 |             "66e1d6e6e45146a7a56d3e935f56ad51",
1174 |             "85b6c5d6fdc745d4a533004de3c97408",
1175 |             "9f5ddcf6583246af9ec1ebe7f23446d6",
1176 |             "58b61a060918476c82be882ed6d5cc10",
1177 |             "7a2fce54921c4062a739fb690387f156"
1178 |           ]
1179 |         },
1180 |         "id": "XItA_HZ-EPIR",
1181 |         "outputId": "22b1edbc-c6d7-4ad0-b992-0f59682a30ce"
1182 |       },
1183 |       "execution_count": null,
1184 |       "outputs": [
1185 |         {
1186 |           "output_type": "display_data",
1187 |           "data": {
1188 |             "text/plain": [
1189 |               "tokenizer_config.json:   0%|          | 0.00/40.5k [00:00<?, ?B/s]"
1190 |             ],
1191 |             "application/vnd.jupyter.widget-view+json": {
1192 |               "version_major": 2,
1193 |               "version_minor": 0,
1194 |               "model_id": "e6f2b94e3bb345859811226dc345a6e3"
1195 |             }
1196 |           },
1197 |           "metadata": {}
1198 |         },
1199 |         {
1200 |           "output_type": "display_data",
1201 |           "data": {
1202 |             "text/plain": [
1203 |               "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
1204 |             ],
1205 |             "application/vnd.jupyter.widget-view+json": {
1206 |               "version_major": 2,
1207 |               "version_minor": 0,
1208 |               "model_id": "401cecca00fb42b29f1ec3fd5cfa4396"
1209 |             }
1210 |           },
1211 |           "metadata": {}
1212 |         },
1213 |         {
1214 |           "output_type": "display_data",
1215 |           "data": {
1216 |             "text/plain": [
1217 |               "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
1218 |             ],
1219 |             "application/vnd.jupyter.widget-view+json": {
1220 |               "version_major": 2,
1221 |               "version_minor": 0,
1222 |               "model_id": "b391b63b204848009b051b9c9a5062a3"
1223 |             }
1224 |           },
1225 |           "metadata": {}
1226 |         }
1227 |       ]
1228 |     },
1229 |     {
1230 |       "cell_type": "markdown",
1231 |       "source": [
1232 |         "## Prepare our input text with chat template.\n",
1233 |         "\n",
1234 |         "The instruction-tuned models use a chat template that must be adhered to for conversational use. The easiest way to apply it is using the tokenizer's built-in chat template, as shown in the following snippet.\n",
1235 |         "\n",
1236 |         "Let's load the model and apply the chat template to a conversation. In this example, we'll start with a single user interaction:"
1237 |       ],
1238 |       "metadata": {
1239 |         "id": "u3hoYG18hmHS"
1240 |       }
1241 |     },
1242 |     {
1243 |       "cell_type": "code",
1244 |       "source": [
1245 |         "chat = [\n",
1246 |         "    { \"role\": \"user\", \"content\": \"Write a hello world program\" },\n",
1247 |         "]\n",
1248 |         "prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)"
1249 |       ],
1250 |       "metadata": {
1251 |         "id": "IrVmrmVpkgN3"
1252 |       },
1253 |       "execution_count": null,
1254 |       "outputs": []
1255 |     },
1256 |     {
1257 |       "cell_type": "markdown",
1258 |       "source": [
1259 |         "## Tokenize the inputs"
1260 |       ],
1261 |       "metadata": {
1262 |         "id": "fDAnnNAYsghB"
1263 |       }
1264 |     },
1265 |     {
1266 |       "cell_type": "code",
1267 |       "source": [
1268 |         "inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors=\"pt\").to(model.device)"
1269 |       ],
1270 |       "metadata": {
1271 |         "id": "dlokCV7isgEI"
1272 |       },
1273 |       "execution_count": null,
1274 |       "outputs": []
1275 |     },
1276 |     {
1277 |       "cell_type": "markdown",
1278 |       "source": [
1279 |         "## Pass the input through the model and generate."
1280 |       ],
1281 |       "metadata": {
1282 |         "id": "pBXeqctLhuGy"
1283 |       }
1284 |     },
1285 |     {
1286 |       "cell_type": "code",
1287 |       "source": [
1288 |         "outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)\n",
1289 |         "print(tokenizer.batch_decode(outputs, skip_special_tokens=True))"
1290 |       ],
1291 |       "metadata": {
1292 |         "colab": {
1293 |           "base_uri": "https://localhost:8080/"
1294 |         },
1295 |         "id": "odmrX7pQeRo6",
1296 |         "outputId": "3d2f4189-0f7e-4536-eee1-e5265915c657"
1297 |       },
1298 |       "execution_count": null,
1299 |       "outputs": [
1300 |         {
1301 |           "output_type": "stream",
1302 |           "name": "stdout",
1303 |           "text": [
1304 |             "['<start_of_turn>user\\nWrite a hello world program<end_of_turn>\\n<start_of_turn>model\\n```python\\nprint(\"Hello, world!\")\\n```\\n\\nThis program will print the message \"Hello, world!\" to the console.\\n\\n**Explanation:**\\n\\n* `print()` is a built-in Python function that prints the given argument to the console.\\n* `\"Hello, world!\"` is the string that will be printed.\\n\\n**Output:**\\n\\n```\\nHello, world!\\n```']\n"
1305 |           ]
1306 |         }
1307 |       ]
1308 |     },
1309 |     {
1310 |       "cell_type": "markdown",
1311 |       "source": [
1312 |         "Enjoy! There's much more you can do to maximise the output of your generation. Check out this guide: https://huggingface.co/docs/transformers/generation_strategies"
1313 |       ],
1314 |       "metadata": {
1315 |         "id": "rkpXJ5sHwmMH"
1316 |       }
1317 |     }
1318 |   ]
1319 | }


--------------------------------------------------------------------------------
/SmolVLM_500M_inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "authorship_tag": "ABX9TyM1i88r1apXFhIZpD959OWG",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     },
 18 |     "accelerator": "GPU",
 19 |     "widgets": {
 20 |       "application/vnd.jupyter.widget-state+json": {
 21 |         "79fbb885952040c18d4aa8ab66baa034": {
 22 |           "model_module": "@jupyter-widgets/controls",
 23 |           "model_name": "HBoxModel",
 24 |           "model_module_version": "1.5.0",
 25 |           "state": {
 26 |             "_dom_classes": [],
 27 |             "_model_module": "@jupyter-widgets/controls",
 28 |             "_model_module_version": "1.5.0",
 29 |             "_model_name": "HBoxModel",
 30 |             "_view_count": null,
 31 |             "_view_module": "@jupyter-widgets/controls",
 32 |             "_view_module_version": "1.5.0",
 33 |             "_view_name": "HBoxView",
 34 |             "box_style": "",
 35 |             "children": [
 36 |               "IPY_MODEL_bf0e759d410a482cbd4d3e2eaf9665a2",
 37 |               "IPY_MODEL_ac87ee6d1c7e4595a2c362be34e52b3e",
 38 |               "IPY_MODEL_794101aa3d194073ac116c66ca73fd41"
 39 |             ],
 40 |             "layout": "IPY_MODEL_cd24a5374c29462c9d8d70f19892dd56"
 41 |           }
 42 |         },
 43 |         "bf0e759d410a482cbd4d3e2eaf9665a2": {
 44 |           "model_module": "@jupyter-widgets/controls",
 45 |           "model_name": "HTMLModel",
 46 |           "model_module_version": "1.5.0",
 47 |           "state": {
 48 |             "_dom_classes": [],
 49 |             "_model_module": "@jupyter-widgets/controls",
 50 |             "_model_module_version": "1.5.0",
 51 |             "_model_name": "HTMLModel",
 52 |             "_view_count": null,
 53 |             "_view_module": "@jupyter-widgets/controls",
 54 |             "_view_module_version": "1.5.0",
 55 |             "_view_name": "HTMLView",
 56 |             "description": "",
 57 |             "description_tooltip": null,
 58 |             "layout": "IPY_MODEL_2222a49475bf4a9b89dd760bd057c89a",
 59 |             "placeholder": "​",
 60 |             "style": "IPY_MODEL_6a9083cd5884400a95aebf432d236c67",
 61 |             "value": "generation_config.json: 100%"
 62 |           }
 63 |         },
 64 |         "ac87ee6d1c7e4595a2c362be34e52b3e": {
 65 |           "model_module": "@jupyter-widgets/controls",
 66 |           "model_name": "FloatProgressModel",
 67 |           "model_module_version": "1.5.0",
 68 |           "state": {
 69 |             "_dom_classes": [],
 70 |             "_model_module": "@jupyter-widgets/controls",
 71 |             "_model_module_version": "1.5.0",
 72 |             "_model_name": "FloatProgressModel",
 73 |             "_view_count": null,
 74 |             "_view_module": "@jupyter-widgets/controls",
 75 |             "_view_module_version": "1.5.0",
 76 |             "_view_name": "ProgressView",
 77 |             "bar_style": "success",
 78 |             "description": "",
 79 |             "description_tooltip": null,
 80 |             "layout": "IPY_MODEL_c0f685a98c274cb39ebf5ec6d1883906",
 81 |             "max": 136,
 82 |             "min": 0,
 83 |             "orientation": "horizontal",
 84 |             "style": "IPY_MODEL_3bf5037740d3416bbe7c53882e67a447",
 85 |             "value": 136
 86 |           }
 87 |         },
 88 |         "794101aa3d194073ac116c66ca73fd41": {
 89 |           "model_module": "@jupyter-widgets/controls",
 90 |           "model_name": "HTMLModel",
 91 |           "model_module_version": "1.5.0",
 92 |           "state": {
 93 |             "_dom_classes": [],
 94 |             "_model_module": "@jupyter-widgets/controls",
 95 |             "_model_module_version": "1.5.0",
 96 |             "_model_name": "HTMLModel",
 97 |             "_view_count": null,
 98 |             "_view_module": "@jupyter-widgets/controls",
 99 |             "_view_module_version": "1.5.0",
100 |             "_view_name": "HTMLView",
101 |             "description": "",
102 |             "description_tooltip": null,
103 |             "layout": "IPY_MODEL_3ed28d63ed164447b6fcb61ead12b509",
104 |             "placeholder": "​",
105 |             "style": "IPY_MODEL_43746e3c771e49369d1a573506876bc4",
106 |             "value": " 136/136 [00:00&lt;00:00, 4.65kB/s]"
107 |           }
108 |         },
109 |         "cd24a5374c29462c9d8d70f19892dd56": {
110 |           "model_module": "@jupyter-widgets/base",
111 |           "model_name": "LayoutModel",
112 |           "model_module_version": "1.2.0",
113 |           "state": {
114 |             "_model_module": "@jupyter-widgets/base",
115 |             "_model_module_version": "1.2.0",
116 |             "_model_name": "LayoutModel",
117 |             "_view_count": null,
118 |             "_view_module": "@jupyter-widgets/base",
119 |             "_view_module_version": "1.2.0",
120 |             "_view_name": "LayoutView",
121 |             "align_content": null,
122 |             "align_items": null,
123 |             "align_self": null,
124 |             "border": null,
125 |             "bottom": null,
126 |             "display": null,
127 |             "flex": null,
128 |             "flex_flow": null,
129 |             "grid_area": null,
130 |             "grid_auto_columns": null,
131 |             "grid_auto_flow": null,
132 |             "grid_auto_rows": null,
133 |             "grid_column": null,
134 |             "grid_gap": null,
135 |             "grid_row": null,
136 |             "grid_template_areas": null,
137 |             "grid_template_columns": null,
138 |             "grid_template_rows": null,
139 |             "height": null,
140 |             "justify_content": null,
141 |             "justify_items": null,
142 |             "left": null,
143 |             "margin": null,
144 |             "max_height": null,
145 |             "max_width": null,
146 |             "min_height": null,
147 |             "min_width": null,
148 |             "object_fit": null,
149 |             "object_position": null,
150 |             "order": null,
151 |             "overflow": null,
152 |             "overflow_x": null,
153 |             "overflow_y": null,
154 |             "padding": null,
155 |             "right": null,
156 |             "top": null,
157 |             "visibility": null,
158 |             "width": null
159 |           }
160 |         },
161 |         "2222a49475bf4a9b89dd760bd057c89a": {
162 |           "model_module": "@jupyter-widgets/base",
163 |           "model_name": "LayoutModel",
164 |           "model_module_version": "1.2.0",
165 |           "state": {
166 |             "_model_module": "@jupyter-widgets/base",
167 |             "_model_module_version": "1.2.0",
168 |             "_model_name": "LayoutModel",
169 |             "_view_count": null,
170 |             "_view_module": "@jupyter-widgets/base",
171 |             "_view_module_version": "1.2.0",
172 |             "_view_name": "LayoutView",
173 |             "align_content": null,
174 |             "align_items": null,
175 |             "align_self": null,
176 |             "border": null,
177 |             "bottom": null,
178 |             "display": null,
179 |             "flex": null,
180 |             "flex_flow": null,
181 |             "grid_area": null,
182 |             "grid_auto_columns": null,
183 |             "grid_auto_flow": null,
184 |             "grid_auto_rows": null,
185 |             "grid_column": null,
186 |             "grid_gap": null,
187 |             "grid_row": null,
188 |             "grid_template_areas": null,
189 |             "grid_template_columns": null,
190 |             "grid_template_rows": null,
191 |             "height": null,
192 |             "justify_content": null,
193 |             "justify_items": null,
194 |             "left": null,
195 |             "margin": null,
196 |             "max_height": null,
197 |             "max_width": null,
198 |             "min_height": null,
199 |             "min_width": null,
200 |             "object_fit": null,
201 |             "object_position": null,
202 |             "order": null,
203 |             "overflow": null,
204 |             "overflow_x": null,
205 |             "overflow_y": null,
206 |             "padding": null,
207 |             "right": null,
208 |             "top": null,
209 |             "visibility": null,
210 |             "width": null
211 |           }
212 |         },
213 |         "6a9083cd5884400a95aebf432d236c67": {
214 |           "model_module": "@jupyter-widgets/controls",
215 |           "model_name": "DescriptionStyleModel",
216 |           "model_module_version": "1.5.0",
217 |           "state": {
218 |             "_model_module": "@jupyter-widgets/controls",
219 |             "_model_module_version": "1.5.0",
220 |             "_model_name": "DescriptionStyleModel",
221 |             "_view_count": null,
222 |             "_view_module": "@jupyter-widgets/base",
223 |             "_view_module_version": "1.2.0",
224 |             "_view_name": "StyleView",
225 |             "description_width": ""
226 |           }
227 |         },
228 |         "c0f685a98c274cb39ebf5ec6d1883906": {
229 |           "model_module": "@jupyter-widgets/base",
230 |           "model_name": "LayoutModel",
231 |           "model_module_version": "1.2.0",
232 |           "state": {
233 |             "_model_module": "@jupyter-widgets/base",
234 |             "_model_module_version": "1.2.0",
235 |             "_model_name": "LayoutModel",
236 |             "_view_count": null,
237 |             "_view_module": "@jupyter-widgets/base",
238 |             "_view_module_version": "1.2.0",
239 |             "_view_name": "LayoutView",
240 |             "align_content": null,
241 |             "align_items": null,
242 |             "align_self": null,
243 |             "border": null,
244 |             "bottom": null,
245 |             "display": null,
246 |             "flex": null,
247 |             "flex_flow": null,
248 |             "grid_area": null,
249 |             "grid_auto_columns": null,
250 |             "grid_auto_flow": null,
251 |             "grid_auto_rows": null,
252 |             "grid_column": null,
253 |             "grid_gap": null,
254 |             "grid_row": null,
255 |             "grid_template_areas": null,
256 |             "grid_template_columns": null,
257 |             "grid_template_rows": null,
258 |             "height": null,
259 |             "justify_content": null,
260 |             "justify_items": null,
261 |             "left": null,
262 |             "margin": null,
263 |             "max_height": null,
264 |             "max_width": null,
265 |             "min_height": null,
266 |             "min_width": null,
267 |             "object_fit": null,
268 |             "object_position": null,
269 |             "order": null,
270 |             "overflow": null,
271 |             "overflow_x": null,
272 |             "overflow_y": null,
273 |             "padding": null,
274 |             "right": null,
275 |             "top": null,
276 |             "visibility": null,
277 |             "width": null
278 |           }
279 |         },
280 |         "3bf5037740d3416bbe7c53882e67a447": {
281 |           "model_module": "@jupyter-widgets/controls",
282 |           "model_name": "ProgressStyleModel",
283 |           "model_module_version": "1.5.0",
284 |           "state": {
285 |             "_model_module": "@jupyter-widgets/controls",
286 |             "_model_module_version": "1.5.0",
287 |             "_model_name": "ProgressStyleModel",
288 |             "_view_count": null,
289 |             "_view_module": "@jupyter-widgets/base",
290 |             "_view_module_version": "1.2.0",
291 |             "_view_name": "StyleView",
292 |             "bar_color": null,
293 |             "description_width": ""
294 |           }
295 |         },
296 |         "3ed28d63ed164447b6fcb61ead12b509": {
297 |           "model_module": "@jupyter-widgets/base",
298 |           "model_name": "LayoutModel",
299 |           "model_module_version": "1.2.0",
300 |           "state": {
301 |             "_model_module": "@jupyter-widgets/base",
302 |             "_model_module_version": "1.2.0",
303 |             "_model_name": "LayoutModel",
304 |             "_view_count": null,
305 |             "_view_module": "@jupyter-widgets/base",
306 |             "_view_module_version": "1.2.0",
307 |             "_view_name": "LayoutView",
308 |             "align_content": null,
309 |             "align_items": null,
310 |             "align_self": null,
311 |             "border": null,
312 |             "bottom": null,
313 |             "display": null,
314 |             "flex": null,
315 |             "flex_flow": null,
316 |             "grid_area": null,
317 |             "grid_auto_columns": null,
318 |             "grid_auto_flow": null,
319 |             "grid_auto_rows": null,
320 |             "grid_column": null,
321 |             "grid_gap": null,
322 |             "grid_row": null,
323 |             "grid_template_areas": null,
324 |             "grid_template_columns": null,
325 |             "grid_template_rows": null,
326 |             "height": null,
327 |             "justify_content": null,
328 |             "justify_items": null,
329 |             "left": null,
330 |             "margin": null,
331 |             "max_height": null,
332 |             "max_width": null,
333 |             "min_height": null,
334 |             "min_width": null,
335 |             "object_fit": null,
336 |             "object_position": null,
337 |             "order": null,
338 |             "overflow": null,
339 |             "overflow_x": null,
340 |             "overflow_y": null,
341 |             "padding": null,
342 |             "right": null,
343 |             "top": null,
344 |             "visibility": null,
345 |             "width": null
346 |           }
347 |         },
348 |         "43746e3c771e49369d1a573506876bc4": {
349 |           "model_module": "@jupyter-widgets/controls",
350 |           "model_name": "DescriptionStyleModel",
351 |           "model_module_version": "1.5.0",
352 |           "state": {
353 |             "_model_module": "@jupyter-widgets/controls",
354 |             "_model_module_version": "1.5.0",
355 |             "_model_name": "DescriptionStyleModel",
356 |             "_view_count": null,
357 |             "_view_module": "@jupyter-widgets/base",
358 |             "_view_module_version": "1.2.0",
359 |             "_view_name": "StyleView",
360 |             "description_width": ""
361 |           }
362 |         }
363 |       }
364 |     }
365 |   },
366 |   "cells": [
367 |     {
368 |       "cell_type": "markdown",
369 |       "metadata": {
370 |         "id": "view-in-github",
371 |         "colab_type": "text"
372 |       },
373 |       "source": [
374 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/SmolVLM_500M_inference.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
375 |       ]
376 |     },
377 |     {
378 |       "cell_type": "markdown",
379 |       "source": [
380 |         "# Smollest VLM out there: SmolVLM 256M & 500M 🔥\n",
381 |         "\n",
382 |         "Check out the model checkpoints and spaces [here](https://huggingface.co/collections/HuggingFaceTB/smolvlm-256m-and-500m-6791fafc5bb0ab8acc960fb0)"
383 |       ],
384 |       "metadata": {
385 |         "id": "B1Enw9pfXN50"
386 |       }
387 |     },
388 |     {
389 |       "cell_type": "code",
390 |       "execution_count": 1,
391 |       "metadata": {
392 |         "id": "XmSco3okWA0q"
393 |       },
394 |       "outputs": [],
395 |       "source": [
396 |         "import torch\n",
397 |         "from PIL import Image\n",
398 |         "from transformers import AutoProcessor, AutoModelForVision2Seq\n",
399 |         "from transformers.image_utils import load_image\n",
400 |         "\n",
401 |         "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\""
402 |       ]
403 |     },
404 |     {
405 |       "cell_type": "markdown",
406 |       "source": [
407 |         "## Load the Processor & the Model"
408 |       ],
409 |       "metadata": {
410 |         "id": "ZMNgWos8ZyWp"
411 |       }
412 |     },
413 |     {
414 |       "cell_type": "code",
415 |       "source": [
416 |         "# Load images\n",
417 |         "image = load_image(\"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\")\n",
418 |         "\n",
419 |         "# Initialize processor and model\n",
420 |         "processor = AutoProcessor.from_pretrained(\"HuggingFaceTB/SmolVLM-500M-Instruct\")\n",
421 |         "model = AutoModelForVision2Seq.from_pretrained(\n",
422 |         "    \"HuggingFaceTB/SmolVLM-500M-Instruct\",\n",
423 |         "    torch_dtype=torch.bfloat16,\n",
424 |         ").to(DEVICE)\n"
425 |       ],
426 |       "metadata": {
427 |         "colab": {
428 |           "base_uri": "https://localhost:8080/",
429 |           "height": 74,
430 |           "referenced_widgets": [
431 |             "79fbb885952040c18d4aa8ab66baa034",
432 |             "bf0e759d410a482cbd4d3e2eaf9665a2",
433 |             "ac87ee6d1c7e4595a2c362be34e52b3e",
434 |             "794101aa3d194073ac116c66ca73fd41",
435 |             "cd24a5374c29462c9d8d70f19892dd56",
436 |             "2222a49475bf4a9b89dd760bd057c89a",
437 |             "6a9083cd5884400a95aebf432d236c67",
438 |             "c0f685a98c274cb39ebf5ec6d1883906",
439 |             "3bf5037740d3416bbe7c53882e67a447",
440 |             "3ed28d63ed164447b6fcb61ead12b509",
441 |             "43746e3c771e49369d1a573506876bc4"
442 |           ]
443 |         },
444 |         "id": "3V_VnjSJWPOh",
445 |         "outputId": "39bdd135-e37e-4187-8237-2c1483388a10"
446 |       },
447 |       "execution_count": 3,
448 |       "outputs": [
449 |         {
450 |           "output_type": "stream",
451 |           "name": "stderr",
452 |           "text": [
453 |             "Some kwargs in processor config are unused and will not have any effect: image_seq_len. \n"
454 |           ]
455 |         },
456 |         {
457 |           "output_type": "display_data",
458 |           "data": {
459 |             "text/plain": [
460 |               "generation_config.json:   0%|          | 0.00/136 [00:00<?, ?B/s]"
461 |             ],
462 |             "application/vnd.jupyter.widget-view+json": {
463 |               "version_major": 2,
464 |               "version_minor": 0,
465 |               "model_id": "79fbb885952040c18d4aa8ab66baa034"
466 |             }
467 |           },
468 |           "metadata": {}
469 |         }
470 |       ]
471 |     },
472 |     {
473 |       "cell_type": "markdown",
474 |       "source": [
475 |         "## Describe the question"
476 |       ],
477 |       "metadata": {
478 |         "id": "I-NzHMf9Z4JY"
479 |       }
480 |     },
481 |     {
482 |       "cell_type": "code",
483 |       "source": [
484 |         "# Create input messages\n",
485 |         "messages = [\n",
486 |         "    {\n",
487 |         "        \"role\": \"user\",\n",
488 |         "        \"content\": [\n",
489 |         "            {\"type\": \"image\"},\n",
490 |         "            {\"type\": \"text\", \"text\": \"Can you describe this image?\"}\n",
491 |         "        ]\n",
492 |         "    },\n",
493 |         "]"
494 |       ],
495 |       "metadata": {
496 |         "id": "BwYCH85OWSG4"
497 |       },
498 |       "execution_count": 4,
499 |       "outputs": []
500 |     },
501 |     {
502 |       "cell_type": "markdown",
503 |       "source": [
504 |         "## Run inference"
505 |       ],
506 |       "metadata": {
507 |         "id": "UKD6bPlvZ8h6"
508 |       }
509 |     },
510 |     {
511 |       "cell_type": "code",
512 |       "source": [
513 |         "# Prepare inputs\n",
514 |         "prompt = processor.apply_chat_template(messages, add_generation_prompt=True)\n",
515 |         "inputs = processor(text=prompt, images=[image], return_tensors=\"pt\")\n",
516 |         "inputs = inputs.to(DEVICE)\n",
517 |         "\n",
518 |         "# Generate outputs\n",
519 |         "generated_ids = model.generate(**inputs, max_new_tokens=500)\n",
520 |         "generated_texts = processor.batch_decode(\n",
521 |         "    generated_ids,\n",
522 |         "    skip_special_tokens=True,\n",
523 |         ")"
524 |       ],
525 |       "metadata": {
526 |         "id": "hcx9WEt8WUw9"
527 |       },
528 |       "execution_count": 5,
529 |       "outputs": []
530 |     },
531 |     {
532 |       "cell_type": "markdown",
533 |       "source": [
534 |         "# Gander at the output! 🤗"
535 |       ],
536 |       "metadata": {
537 |         "id": "28clcc1Dabih"
538 |       }
539 |     },
540 |     {
541 |       "cell_type": "code",
542 |       "source": [
543 |         "print(generated_texts[0])"
544 |       ],
545 |       "metadata": {
546 |         "colab": {
547 |           "base_uri": "https://localhost:8080/",
548 |           "height": 518
549 |         },
550 |         "id": "qqJHjGpxWXFZ",
551 |         "outputId": "3c80aa6c-2f59-49ef-bf80-56c687c8a970"
552 |       },
553 |       "execution_count": 6,
554 |       "outputs": [
555 |         {
556 |           "output_type": "stream",
557 |           "name": "stdout",
558 |           "text": [
559 |             "User:\n",
560 |             "\n",
561 |             "\n",
562 |             "\n",
563 |             "Can you describe this image?\n",
564 |             "Assistant: The image depicts a cityscape featuring a prominent landmark, the Statue of Liberty, prominently displayed in the foreground. The statue is situated on Liberty Island, which is a small, rocky island located in the heart of the Atlantic Ocean. The statue is characterized by its green hue and is adorned with a crown, which is a common feature of the Statue of Liberty.\n",
565 |             "\n",
566 |             "In the background, the cityscape is filled with numerous high-rise buildings, which are typical of urban settings. These buildings vary in height and architectural style, with some being modern and others older. The sky above is clear, suggesting a sunny day, and the sun is casting a golden hue on the city, indicating that it is either early morning or late afternoon.\n",
567 |             "\n",
568 |             "The water surrounding the island is calm, with a few small boats visible, indicating that the area is a popular spot for boating. There are also some trees and greenery on the shore, which adds a touch of natural beauty to the urban environment.\n",
569 |             "\n",
570 |             "The overall composition of the image suggests a harmonious blend of nature and urban life. The statue, with its iconic green color and crown, stands as a symbol of freedom and hope, while the cityscape, with its towering buildings and modern architecture, represents progress and development.\n",
571 |             "\n",
572 |             "In summary, the image captures a cityscape with the Statue of Liberty prominently displayed on Liberty Island, surrounded by a bustling urban environment with high-rise buildings and a calm, natural backdrop. The combination of the statue and the cityscape creates a visually striking and thought-provoking scene.\n"
573 |           ]
574 |         },
575 |         {
576 |           "output_type": "execute_result",
577 |           "data": {
578 |             "text/plain": [
579 |               "\"\\nAssistant: The image depicts a cityscape featuring a prominent landmark, the Statue of Liberty, prominently positioned on Liberty Island. The statue is a green, humanoid figure with a crown atop its head and is situated on a small island surrounded by water. The statue is characterized by its large, detailed structure, with a statue of a woman holding a torch above her head and a tablet in her left hand. The statue is surrounded by a small, rocky island, which is partially visible in the foreground.\\nIn the background, the cityscape is dominated by numerous high-rise buildings, which are densely packed and vary in height. The buildings are primarily made of glass and steel, reflecting the sunlight and creating a bright, urban skyline. The skyline is filled with various architectural styles, including modern skyscrapers and older, more traditional buildings.\\nThe water surrounding the island is calm, with a few small boats visible, indicating that the area is likely a popular tourist destination. The water is a deep blue, suggesting that it is a large body of water, possibly a river or a large lake.\\nIn the foreground, there is a small strip of land with trees and grass, which adds a touch of natural beauty to the urban landscape. The trees are green, indicating that it is likely spring or summer.\\nThe image captures a moment of tranquility and reflection, as the statue and the cityscape come together to create a harmonious and picturesque scene. The statue's presence in the foreground draws attention to the city's grandeur, while the calm water and natural elements in the background provide a sense of peace and serenity.\\nIn summary, the image showcases the Statue of Liberty, a symbol of freedom and democracy, set against a backdrop of a bustling cityscape. The statue is a prominent and iconic representation of human achievement, while the cityscape is a testament to human ingenuity and progress. The image captures the beauty and complexity of urban life, with the statue serving as a symbol of hope and freedom, while the cityscape provides a glimpse into the modern world.\\n\""
580 |             ],
581 |             "application/vnd.google.colaboratory.intrinsic+json": {
582 |               "type": "string"
583 |             }
584 |           },
585 |           "metadata": {},
586 |           "execution_count": 6
587 |         }
588 |       ]
589 |     },
590 |     {
591 |       "cell_type": "code",
592 |       "source": [],
593 |       "metadata": {
594 |         "id": "2j4ofDhBawzl"
595 |       },
596 |       "execution_count": null,
597 |       "outputs": []
598 |     }
599 |   ]
600 | }


--------------------------------------------------------------------------------
/Whisper_Large_8bit_loading_w_bnb.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyP+MiRyiV5xQ5ZbVq2Qh2cH",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     },
 17 |     "accelerator": "GPU",
 18 |     "gpuClass": "standard",
 19 |     "widgets": {
 20 |       "application/vnd.jupyter.widget-state+json": {
 21 |         "f7a390ea8adc42b3917a18aa20ac7b08": {
 22 |           "model_module": "@jupyter-widgets/controls",
 23 |           "model_name": "VBoxModel",
 24 |           "model_module_version": "1.5.0",
 25 |           "state": {
 26 |             "_dom_classes": [],
 27 |             "_model_module": "@jupyter-widgets/controls",
 28 |             "_model_module_version": "1.5.0",
 29 |             "_model_name": "VBoxModel",
 30 |             "_view_count": null,
 31 |             "_view_module": "@jupyter-widgets/controls",
 32 |             "_view_module_version": "1.5.0",
 33 |             "_view_name": "VBoxView",
 34 |             "box_style": "",
 35 |             "children": [
 36 |               "IPY_MODEL_9d22b9fb75264a64905c196405996b1f",
 37 |               "IPY_MODEL_83e7115a09d144ab9ef10dd0c60c6a68",
 38 |               "IPY_MODEL_85b25d75e78b4cda8546259be94235b1",
 39 |               "IPY_MODEL_4a69efc93763470eb2afd6fe1c258076",
 40 |               "IPY_MODEL_b32ea47b26f44f0582aecf7cd1526f0e"
 41 |             ],
 42 |             "layout": "IPY_MODEL_4b818f15db4d47ce910c4987c64501c7"
 43 |           }
 44 |         },
 45 |         "9d22b9fb75264a64905c196405996b1f": {
 46 |           "model_module": "@jupyter-widgets/controls",
 47 |           "model_name": "HTMLModel",
 48 |           "model_module_version": "1.5.0",
 49 |           "state": {
 50 |             "_dom_classes": [],
 51 |             "_model_module": "@jupyter-widgets/controls",
 52 |             "_model_module_version": "1.5.0",
 53 |             "_model_name": "HTMLModel",
 54 |             "_view_count": null,
 55 |             "_view_module": "@jupyter-widgets/controls",
 56 |             "_view_module_version": "1.5.0",
 57 |             "_view_name": "HTMLView",
 58 |             "description": "",
 59 |             "description_tooltip": null,
 60 |             "layout": "IPY_MODEL_3ea81ae0965840ed8ad532ee02606d05",
 61 |             "placeholder": "​",
 62 |             "style": "IPY_MODEL_a1bfa88b478542a3a4b8773693355bb2",
 63 |             "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
 64 |           }
 65 |         },
 66 |         "83e7115a09d144ab9ef10dd0c60c6a68": {
 67 |           "model_module": "@jupyter-widgets/controls",
 68 |           "model_name": "PasswordModel",
 69 |           "model_module_version": "1.5.0",
 70 |           "state": {
 71 |             "_dom_classes": [],
 72 |             "_model_module": "@jupyter-widgets/controls",
 73 |             "_model_module_version": "1.5.0",
 74 |             "_model_name": "PasswordModel",
 75 |             "_view_count": null,
 76 |             "_view_module": "@jupyter-widgets/controls",
 77 |             "_view_module_version": "1.5.0",
 78 |             "_view_name": "PasswordView",
 79 |             "continuous_update": true,
 80 |             "description": "Token:",
 81 |             "description_tooltip": null,
 82 |             "disabled": false,
 83 |             "layout": "IPY_MODEL_b1f2b510eb2b4e63b4ccc5e0c963da3d",
 84 |             "placeholder": "​",
 85 |             "style": "IPY_MODEL_fd6bd029bfeb4028b34b4ffa848e5ee7",
 86 |             "value": ""
 87 |           }
 88 |         },
 89 |         "85b25d75e78b4cda8546259be94235b1": {
 90 |           "model_module": "@jupyter-widgets/controls",
 91 |           "model_name": "CheckboxModel",
 92 |           "model_module_version": "1.5.0",
 93 |           "state": {
 94 |             "_dom_classes": [],
 95 |             "_model_module": "@jupyter-widgets/controls",
 96 |             "_model_module_version": "1.5.0",
 97 |             "_model_name": "CheckboxModel",
 98 |             "_view_count": null,
 99 |             "_view_module": "@jupyter-widgets/controls",
100 |             "_view_module_version": "1.5.0",
101 |             "_view_name": "CheckboxView",
102 |             "description": "Add token as git credential?",
103 |             "description_tooltip": null,
104 |             "disabled": false,
105 |             "indent": true,
106 |             "layout": "IPY_MODEL_7788f987f5ee48dbb63d3634b87309cd",
107 |             "style": "IPY_MODEL_a28a135050b641b19d5d7bb12d666ae7",
108 |             "value": true
109 |           }
110 |         },
111 |         "4a69efc93763470eb2afd6fe1c258076": {
112 |           "model_module": "@jupyter-widgets/controls",
113 |           "model_name": "ButtonModel",
114 |           "model_module_version": "1.5.0",
115 |           "state": {
116 |             "_dom_classes": [],
117 |             "_model_module": "@jupyter-widgets/controls",
118 |             "_model_module_version": "1.5.0",
119 |             "_model_name": "ButtonModel",
120 |             "_view_count": null,
121 |             "_view_module": "@jupyter-widgets/controls",
122 |             "_view_module_version": "1.5.0",
123 |             "_view_name": "ButtonView",
124 |             "button_style": "",
125 |             "description": "Login",
126 |             "disabled": false,
127 |             "icon": "",
128 |             "layout": "IPY_MODEL_05ffd42668424ac09d052c325dbad8d3",
129 |             "style": "IPY_MODEL_cb1a9f6bf78047ca88d90d31ed9ab1ee",
130 |             "tooltip": ""
131 |           }
132 |         },
133 |         "b32ea47b26f44f0582aecf7cd1526f0e": {
134 |           "model_module": "@jupyter-widgets/controls",
135 |           "model_name": "HTMLModel",
136 |           "model_module_version": "1.5.0",
137 |           "state": {
138 |             "_dom_classes": [],
139 |             "_model_module": "@jupyter-widgets/controls",
140 |             "_model_module_version": "1.5.0",
141 |             "_model_name": "HTMLModel",
142 |             "_view_count": null,
143 |             "_view_module": "@jupyter-widgets/controls",
144 |             "_view_module_version": "1.5.0",
145 |             "_view_name": "HTMLView",
146 |             "description": "",
147 |             "description_tooltip": null,
148 |             "layout": "IPY_MODEL_bea114e4787c4318a99072f76ab641bc",
149 |             "placeholder": "​",
150 |             "style": "IPY_MODEL_87390232ac4649ba9681b65beb48d17a",
151 |             "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
152 |           }
153 |         },
154 |         "4b818f15db4d47ce910c4987c64501c7": {
155 |           "model_module": "@jupyter-widgets/base",
156 |           "model_name": "LayoutModel",
157 |           "model_module_version": "1.2.0",
158 |           "state": {
159 |             "_model_module": "@jupyter-widgets/base",
160 |             "_model_module_version": "1.2.0",
161 |             "_model_name": "LayoutModel",
162 |             "_view_count": null,
163 |             "_view_module": "@jupyter-widgets/base",
164 |             "_view_module_version": "1.2.0",
165 |             "_view_name": "LayoutView",
166 |             "align_content": null,
167 |             "align_items": "center",
168 |             "align_self": null,
169 |             "border": null,
170 |             "bottom": null,
171 |             "display": "flex",
172 |             "flex": null,
173 |             "flex_flow": "column",
174 |             "grid_area": null,
175 |             "grid_auto_columns": null,
176 |             "grid_auto_flow": null,
177 |             "grid_auto_rows": null,
178 |             "grid_column": null,
179 |             "grid_gap": null,
180 |             "grid_row": null,
181 |             "grid_template_areas": null,
182 |             "grid_template_columns": null,
183 |             "grid_template_rows": null,
184 |             "height": null,
185 |             "justify_content": null,
186 |             "justify_items": null,
187 |             "left": null,
188 |             "margin": null,
189 |             "max_height": null,
190 |             "max_width": null,
191 |             "min_height": null,
192 |             "min_width": null,
193 |             "object_fit": null,
194 |             "object_position": null,
195 |             "order": null,
196 |             "overflow": null,
197 |             "overflow_x": null,
198 |             "overflow_y": null,
199 |             "padding": null,
200 |             "right": null,
201 |             "top": null,
202 |             "visibility": null,
203 |             "width": "50%"
204 |           }
205 |         },
206 |         "3ea81ae0965840ed8ad532ee02606d05": {
207 |           "model_module": "@jupyter-widgets/base",
208 |           "model_name": "LayoutModel",
209 |           "model_module_version": "1.2.0",
210 |           "state": {
211 |             "_model_module": "@jupyter-widgets/base",
212 |             "_model_module_version": "1.2.0",
213 |             "_model_name": "LayoutModel",
214 |             "_view_count": null,
215 |             "_view_module": "@jupyter-widgets/base",
216 |             "_view_module_version": "1.2.0",
217 |             "_view_name": "LayoutView",
218 |             "align_content": null,
219 |             "align_items": null,
220 |             "align_self": null,
221 |             "border": null,
222 |             "bottom": null,
223 |             "display": null,
224 |             "flex": null,
225 |             "flex_flow": null,
226 |             "grid_area": null,
227 |             "grid_auto_columns": null,
228 |             "grid_auto_flow": null,
229 |             "grid_auto_rows": null,
230 |             "grid_column": null,
231 |             "grid_gap": null,
232 |             "grid_row": null,
233 |             "grid_template_areas": null,
234 |             "grid_template_columns": null,
235 |             "grid_template_rows": null,
236 |             "height": null,
237 |             "justify_content": null,
238 |             "justify_items": null,
239 |             "left": null,
240 |             "margin": null,
241 |             "max_height": null,
242 |             "max_width": null,
243 |             "min_height": null,
244 |             "min_width": null,
245 |             "object_fit": null,
246 |             "object_position": null,
247 |             "order": null,
248 |             "overflow": null,
249 |             "overflow_x": null,
250 |             "overflow_y": null,
251 |             "padding": null,
252 |             "right": null,
253 |             "top": null,
254 |             "visibility": null,
255 |             "width": null
256 |           }
257 |         },
258 |         "a1bfa88b478542a3a4b8773693355bb2": {
259 |           "model_module": "@jupyter-widgets/controls",
260 |           "model_name": "DescriptionStyleModel",
261 |           "model_module_version": "1.5.0",
262 |           "state": {
263 |             "_model_module": "@jupyter-widgets/controls",
264 |             "_model_module_version": "1.5.0",
265 |             "_model_name": "DescriptionStyleModel",
266 |             "_view_count": null,
267 |             "_view_module": "@jupyter-widgets/base",
268 |             "_view_module_version": "1.2.0",
269 |             "_view_name": "StyleView",
270 |             "description_width": ""
271 |           }
272 |         },
273 |         "b1f2b510eb2b4e63b4ccc5e0c963da3d": {
274 |           "model_module": "@jupyter-widgets/base",
275 |           "model_name": "LayoutModel",
276 |           "model_module_version": "1.2.0",
277 |           "state": {
278 |             "_model_module": "@jupyter-widgets/base",
279 |             "_model_module_version": "1.2.0",
280 |             "_model_name": "LayoutModel",
281 |             "_view_count": null,
282 |             "_view_module": "@jupyter-widgets/base",
283 |             "_view_module_version": "1.2.0",
284 |             "_view_name": "LayoutView",
285 |             "align_content": null,
286 |             "align_items": null,
287 |             "align_self": null,
288 |             "border": null,
289 |             "bottom": null,
290 |             "display": null,
291 |             "flex": null,
292 |             "flex_flow": null,
293 |             "grid_area": null,
294 |             "grid_auto_columns": null,
295 |             "grid_auto_flow": null,
296 |             "grid_auto_rows": null,
297 |             "grid_column": null,
298 |             "grid_gap": null,
299 |             "grid_row": null,
300 |             "grid_template_areas": null,
301 |             "grid_template_columns": null,
302 |             "grid_template_rows": null,
303 |             "height": null,
304 |             "justify_content": null,
305 |             "justify_items": null,
306 |             "left": null,
307 |             "margin": null,
308 |             "max_height": null,
309 |             "max_width": null,
310 |             "min_height": null,
311 |             "min_width": null,
312 |             "object_fit": null,
313 |             "object_position": null,
314 |             "order": null,
315 |             "overflow": null,
316 |             "overflow_x": null,
317 |             "overflow_y": null,
318 |             "padding": null,
319 |             "right": null,
320 |             "top": null,
321 |             "visibility": null,
322 |             "width": null
323 |           }
324 |         },
325 |         "fd6bd029bfeb4028b34b4ffa848e5ee7": {
326 |           "model_module": "@jupyter-widgets/controls",
327 |           "model_name": "DescriptionStyleModel",
328 |           "model_module_version": "1.5.0",
329 |           "state": {
330 |             "_model_module": "@jupyter-widgets/controls",
331 |             "_model_module_version": "1.5.0",
332 |             "_model_name": "DescriptionStyleModel",
333 |             "_view_count": null,
334 |             "_view_module": "@jupyter-widgets/base",
335 |             "_view_module_version": "1.2.0",
336 |             "_view_name": "StyleView",
337 |             "description_width": ""
338 |           }
339 |         },
340 |         "7788f987f5ee48dbb63d3634b87309cd": {
341 |           "model_module": "@jupyter-widgets/base",
342 |           "model_name": "LayoutModel",
343 |           "model_module_version": "1.2.0",
344 |           "state": {
345 |             "_model_module": "@jupyter-widgets/base",
346 |             "_model_module_version": "1.2.0",
347 |             "_model_name": "LayoutModel",
348 |             "_view_count": null,
349 |             "_view_module": "@jupyter-widgets/base",
350 |             "_view_module_version": "1.2.0",
351 |             "_view_name": "LayoutView",
352 |             "align_content": null,
353 |             "align_items": null,
354 |             "align_self": null,
355 |             "border": null,
356 |             "bottom": null,
357 |             "display": null,
358 |             "flex": null,
359 |             "flex_flow": null,
360 |             "grid_area": null,
361 |             "grid_auto_columns": null,
362 |             "grid_auto_flow": null,
363 |             "grid_auto_rows": null,
364 |             "grid_column": null,
365 |             "grid_gap": null,
366 |             "grid_row": null,
367 |             "grid_template_areas": null,
368 |             "grid_template_columns": null,
369 |             "grid_template_rows": null,
370 |             "height": null,
371 |             "justify_content": null,
372 |             "justify_items": null,
373 |             "left": null,
374 |             "margin": null,
375 |             "max_height": null,
376 |             "max_width": null,
377 |             "min_height": null,
378 |             "min_width": null,
379 |             "object_fit": null,
380 |             "object_position": null,
381 |             "order": null,
382 |             "overflow": null,
383 |             "overflow_x": null,
384 |             "overflow_y": null,
385 |             "padding": null,
386 |             "right": null,
387 |             "top": null,
388 |             "visibility": null,
389 |             "width": null
390 |           }
391 |         },
392 |         "a28a135050b641b19d5d7bb12d666ae7": {
393 |           "model_module": "@jupyter-widgets/controls",
394 |           "model_name": "DescriptionStyleModel",
395 |           "model_module_version": "1.5.0",
396 |           "state": {
397 |             "_model_module": "@jupyter-widgets/controls",
398 |             "_model_module_version": "1.5.0",
399 |             "_model_name": "DescriptionStyleModel",
400 |             "_view_count": null,
401 |             "_view_module": "@jupyter-widgets/base",
402 |             "_view_module_version": "1.2.0",
403 |             "_view_name": "StyleView",
404 |             "description_width": ""
405 |           }
406 |         },
407 |         "05ffd42668424ac09d052c325dbad8d3": {
408 |           "model_module": "@jupyter-widgets/base",
409 |           "model_name": "LayoutModel",
410 |           "model_module_version": "1.2.0",
411 |           "state": {
412 |             "_model_module": "@jupyter-widgets/base",
413 |             "_model_module_version": "1.2.0",
414 |             "_model_name": "LayoutModel",
415 |             "_view_count": null,
416 |             "_view_module": "@jupyter-widgets/base",
417 |             "_view_module_version": "1.2.0",
418 |             "_view_name": "LayoutView",
419 |             "align_content": null,
420 |             "align_items": null,
421 |             "align_self": null,
422 |             "border": null,
423 |             "bottom": null,
424 |             "display": null,
425 |             "flex": null,
426 |             "flex_flow": null,
427 |             "grid_area": null,
428 |             "grid_auto_columns": null,
429 |             "grid_auto_flow": null,
430 |             "grid_auto_rows": null,
431 |             "grid_column": null,
432 |             "grid_gap": null,
433 |             "grid_row": null,
434 |             "grid_template_areas": null,
435 |             "grid_template_columns": null,
436 |             "grid_template_rows": null,
437 |             "height": null,
438 |             "justify_content": null,
439 |             "justify_items": null,
440 |             "left": null,
441 |             "margin": null,
442 |             "max_height": null,
443 |             "max_width": null,
444 |             "min_height": null,
445 |             "min_width": null,
446 |             "object_fit": null,
447 |             "object_position": null,
448 |             "order": null,
449 |             "overflow": null,
450 |             "overflow_x": null,
451 |             "overflow_y": null,
452 |             "padding": null,
453 |             "right": null,
454 |             "top": null,
455 |             "visibility": null,
456 |             "width": null
457 |           }
458 |         },
459 |         "cb1a9f6bf78047ca88d90d31ed9ab1ee": {
460 |           "model_module": "@jupyter-widgets/controls",
461 |           "model_name": "ButtonStyleModel",
462 |           "model_module_version": "1.5.0",
463 |           "state": {
464 |             "_model_module": "@jupyter-widgets/controls",
465 |             "_model_module_version": "1.5.0",
466 |             "_model_name": "ButtonStyleModel",
467 |             "_view_count": null,
468 |             "_view_module": "@jupyter-widgets/base",
469 |             "_view_module_version": "1.2.0",
470 |             "_view_name": "StyleView",
471 |             "button_color": null,
472 |             "font_weight": ""
473 |           }
474 |         },
475 |         "bea114e4787c4318a99072f76ab641bc": {
476 |           "model_module": "@jupyter-widgets/base",
477 |           "model_name": "LayoutModel",
478 |           "model_module_version": "1.2.0",
479 |           "state": {
480 |             "_model_module": "@jupyter-widgets/base",
481 |             "_model_module_version": "1.2.0",
482 |             "_model_name": "LayoutModel",
483 |             "_view_count": null,
484 |             "_view_module": "@jupyter-widgets/base",
485 |             "_view_module_version": "1.2.0",
486 |             "_view_name": "LayoutView",
487 |             "align_content": null,
488 |             "align_items": null,
489 |             "align_self": null,
490 |             "border": null,
491 |             "bottom": null,
492 |             "display": null,
493 |             "flex": null,
494 |             "flex_flow": null,
495 |             "grid_area": null,
496 |             "grid_auto_columns": null,
497 |             "grid_auto_flow": null,
498 |             "grid_auto_rows": null,
499 |             "grid_column": null,
500 |             "grid_gap": null,
501 |             "grid_row": null,
502 |             "grid_template_areas": null,
503 |             "grid_template_columns": null,
504 |             "grid_template_rows": null,
505 |             "height": null,
506 |             "justify_content": null,
507 |             "justify_items": null,
508 |             "left": null,
509 |             "margin": null,
510 |             "max_height": null,
511 |             "max_width": null,
512 |             "min_height": null,
513 |             "min_width": null,
514 |             "object_fit": null,
515 |             "object_position": null,
516 |             "order": null,
517 |             "overflow": null,
518 |             "overflow_x": null,
519 |             "overflow_y": null,
520 |             "padding": null,
521 |             "right": null,
522 |             "top": null,
523 |             "visibility": null,
524 |             "width": null
525 |           }
526 |         },
527 |         "87390232ac4649ba9681b65beb48d17a": {
528 |           "model_module": "@jupyter-widgets/controls",
529 |           "model_name": "DescriptionStyleModel",
530 |           "model_module_version": "1.5.0",
531 |           "state": {
532 |             "_model_module": "@jupyter-widgets/controls",
533 |             "_model_module_version": "1.5.0",
534 |             "_model_name": "DescriptionStyleModel",
535 |             "_view_count": null,
536 |             "_view_module": "@jupyter-widgets/base",
537 |             "_view_module_version": "1.2.0",
538 |             "_view_name": "StyleView",
539 |             "description_width": ""
540 |           }
541 |         }
542 |       }
543 |     }
544 |   },
545 |   "cells": [
546 |     {
547 |       "cell_type": "markdown",
548 |       "metadata": {
549 |         "id": "view-in-github",
550 |         "colab_type": "text"
551 |       },
552 |       "source": [
553 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/Whisper_Large_8bit_loading_w_bnb.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
554 |       ]
555 |     },
556 |     {
557 |       "cell_type": "markdown",
558 |       "source": [
559 |         "# Whisper Large inference in 8-bit mode\n",
560 |         "\n",
561 |         "For faster and memory efficient inference for large models. Read more about it [here](https://huggingface.co/blog/hf-bitsandbytes-integration)\n",
562 |         "\n",
563 |         "Compiled by: [Vaibhav (VB) Srivastav](https://twitter.com/reach_vb)"
564 |       ],
565 |       "metadata": {
566 |         "id": "YnVYTxOBJMmR"
567 |       }
568 |     },
569 |     {
570 |       "cell_type": "markdown",
571 |       "source": [
572 |         "We'll first install the necessary packages. We need ffmpeg to decode `mp3` files from the CV11 dataset and transformers, bnb and accelerate to load the model in 8bit mode."
573 |       ],
574 |       "metadata": {
575 |         "id": "KgGPly6_Lrhm"
576 |       }
577 |     },
578 |     {
579 |       "cell_type": "code",
580 |       "execution_count": null,
581 |       "metadata": {
582 |         "colab": {
583 |           "base_uri": "https://localhost:8080/"
584 |         },
585 |         "id": "JKOGmCPxrfJ4",
586 |         "outputId": "b4a4a392-53ab-44aa-8616-047507375c87"
587 |       },
588 |       "outputs": [
589 |         {
590 |           "output_type": "stream",
591 |           "name": "stdout",
592 |           "text": [
593 |             "\r0% [Working]\r            \rHit:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease\n",
594 |             "\r0% [Connecting to archive.ubuntu.com (185.125.190.36)] [Connecting to security.\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Connecting to security.ubu\r                                                                               \rHit:2 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
595 |             "\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Waiting for headers] [Wait\r                                                                               \rHit:3 http://security.ubuntu.com/ubuntu bionic-security InRelease\n",
596 |             "\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Waiting for headers] [Wait\r                                                                               \rHit:4 http://archive.ubuntu.com/ubuntu bionic-updates InRelease\n",
597 |             "Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n",
598 |             "Hit:6 http://archive.ubuntu.com/ubuntu bionic-backports InRelease\n",
599 |             "Ign:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease\n",
600 |             "Hit:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease\n",
601 |             "Hit:9 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release\n",
602 |             "Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n",
603 |             "Hit:11 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n",
604 |             "Hit:12 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n",
605 |             "Hit:13 http://ppa.launchpad.net/jonathonf/ffmpeg-4/ubuntu bionic InRelease\n",
606 |             "Reading package lists... Done\n",
607 |             "Hit:1 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n",
608 |             "Hit:2 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n",
609 |             "Hit:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease\n",
610 |             "Hit:4 http://security.ubuntu.com/ubuntu bionic-security InRelease\n",
611 |             "Hit:5 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
612 |             "Hit:6 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n",
613 |             "Hit:7 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n",
614 |             "Hit:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease\n",
615 |             "Ign:9 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease\n",
616 |             "Hit:10 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease\n",
617 |             "Hit:11 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release\n",
618 |             "Hit:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease\n",
619 |             "Hit:13 http://ppa.launchpad.net/jonathonf/ffmpeg-4/ubuntu bionic InRelease\n",
620 |             "Reading package lists... Done\n",
621 |             "Building dependency tree       \n",
622 |             "Reading state information... Done\n",
623 |             "32 packages can be upgraded. Run 'apt list --upgradable' to see them.\n",
624 |             "Reading package lists... Done\n",
625 |             "Building dependency tree       \n",
626 |             "Reading state information... Done\n",
627 |             "ffmpeg is already the newest version (7:4.3.2-0york0~18.04).\n",
628 |             "The following package was automatically installed and is no longer required:\n",
629 |             "  libnvidia-common-460\n",
630 |             "Use 'apt autoremove' to remove it.\n",
631 |             "0 upgraded, 0 newly installed, 0 to remove and 32 not upgraded.\n",
632 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
633 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
634 |             "    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n"
635 |           ]
636 |         }
637 |       ],
638 |       "source": [
639 |         "!add-apt-repository -y ppa:jonathonf/ffmpeg-4 && apt update && apt install -y ffmpeg\n",
640 |         "!pip install --quiet datasets git+https://github.com/huggingface/transformers evaluate huggingface_hub jiwer bitsandbytes accelerate"
641 |       ]
642 |     },
643 |     {
644 |       "cell_type": "markdown",
645 |       "source": [
646 |         "Since we will be running inference on CV11 dataset, we'd need to authenticate ourselves (since, CV11 requires accepting its Terms and Conditions)"
647 |       ],
648 |       "metadata": {
649 |         "id": "NvR6u52ZL9yb"
650 |       }
651 |     },
652 |     {
653 |       "cell_type": "code",
654 |       "source": [
655 |         "!git config --global credential.helper store\n",
656 |         "from huggingface_hub import login\n",
657 |         "\n",
658 |         "login()"
659 |       ],
660 |       "metadata": {
661 |         "colab": {
662 |           "base_uri": "https://localhost:8080/",
663 |           "height": 331,
664 |           "referenced_widgets": [
665 |             "f7a390ea8adc42b3917a18aa20ac7b08",
666 |             "9d22b9fb75264a64905c196405996b1f",
667 |             "83e7115a09d144ab9ef10dd0c60c6a68",
668 |             "85b25d75e78b4cda8546259be94235b1",
669 |             "4a69efc93763470eb2afd6fe1c258076",
670 |             "b32ea47b26f44f0582aecf7cd1526f0e",
671 |             "4b818f15db4d47ce910c4987c64501c7",
672 |             "3ea81ae0965840ed8ad532ee02606d05",
673 |             "a1bfa88b478542a3a4b8773693355bb2",
674 |             "b1f2b510eb2b4e63b4ccc5e0c963da3d",
675 |             "fd6bd029bfeb4028b34b4ffa848e5ee7",
676 |             "7788f987f5ee48dbb63d3634b87309cd",
677 |             "a28a135050b641b19d5d7bb12d666ae7",
678 |             "05ffd42668424ac09d052c325dbad8d3",
679 |             "cb1a9f6bf78047ca88d90d31ed9ab1ee",
680 |             "bea114e4787c4318a99072f76ab641bc",
681 |             "87390232ac4649ba9681b65beb48d17a"
682 |           ]
683 |         },
684 |         "id": "tBSPoZggrtc8",
685 |         "outputId": "1d9fed6b-2345-4eb1-923d-c4624373cc7d"
686 |       },
687 |       "execution_count": null,
688 |       "outputs": [
689 |         {
690 |           "output_type": "stream",
691 |           "name": "stdout",
692 |           "text": [
693 |             "Token is valid.\n",
694 |             "Your token has been saved in your configured git credential helpers (store).\n",
695 |             "Your token has been saved to /root/.huggingface/token\n",
696 |             "Login successful\n"
697 |           ]
698 |         }
699 |       ]
700 |     },
701 |     {
702 |       "cell_type": "markdown",
703 |       "source": [
704 |         "To reduce the memory and time overhead, we'll load the dataset in streaming fashion. During the time of inference we'll stream one data point at a time. This is specially useful for larger datasets."
705 |       ],
706 |       "metadata": {
707 |         "id": "nmQdxHxXMPNL"
708 |       }
709 |     },
710 |     {
711 |       "cell_type": "code",
712 |       "source": [
713 |         "from datasets import load_dataset\n",
714 |         "\n",
715 |         "dataset = load_dataset(\n",
716 |         "    \"mozilla-foundation/common_voice_11_0\", \"en\", revision=\"streaming\", split=\"test\", streaming=True, use_auth_token=True\n",
717 |         ")"
718 |       ],
719 |       "metadata": {
720 |         "id": "_MeHLH1Qrv6_"
721 |       },
722 |       "execution_count": null,
723 |       "outputs": []
724 |     },
725 |     {
726 |       "cell_type": "markdown",
727 |       "source": [
728 |         "Loading the model and processor in 8bit mode with `load_in_8bit=True`\n",
729 |         "\n",
730 |         "Note: This is the only change you need to make in order for you to run the model in 8bit mode."
731 |       ],
732 |       "metadata": {
733 |         "id": "Jk-efqH5MeFg"
734 |       }
735 |     },
736 |     {
737 |       "cell_type": "code",
738 |       "source": [
739 |         "import torch\n",
740 |         "from transformers import WhisperForConditionalGeneration, WhisperProcessor\n",
741 |         "\n",
742 |         "model = WhisperForConditionalGeneration.from_pretrained(\"openai/whisper-large\", device_map=\"auto\", load_in_8bit=True)\n",
743 |         "processor = WhisperProcessor.from_pretrained(\"openai/whisper-large\", load_in_8bit=True)"
744 |       ],
745 |       "metadata": {
746 |         "id": "UdiPC_3w_UyR"
747 |       },
748 |       "execution_count": null,
749 |       "outputs": []
750 |     },
751 |     {
752 |       "cell_type": "markdown",
753 |       "source": [
754 |         "Preprocess the dataset to be sampled at 16KHz, since Whisper expects 16KHz input."
755 |       ],
756 |       "metadata": {
757 |         "id": "IJtaj3IpMwWF"
758 |       }
759 |     },
760 |     {
761 |       "cell_type": "code",
762 |       "source": [
763 |         "from datasets import Audio\n",
764 |         "\n",
765 |         "dataset = dataset.take(10)\n",
766 |         "\n",
767 |         "# resample to 16kHz\n",
768 |         "dataset = dataset.cast_column(\"audio\", Audio(sampling_rate=16000))"
769 |       ],
770 |       "metadata": {
771 |         "id": "NN39fYPbEbkC"
772 |       },
773 |       "execution_count": null,
774 |       "outputs": []
775 |     },
776 |     {
777 |       "cell_type": "markdown",
778 |       "source": [
779 |         "Voila! Time to run inference loop!"
780 |       ],
781 |       "metadata": {
782 |         "id": "ZyXEYmF3M4C5"
783 |       }
784 |     },
785 |     {
786 |       "cell_type": "code",
787 |       "source": [
788 |         "%%time\n",
789 |         "\n",
790 |         "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
791 |         "\n",
792 |         "for data in dataset:\n",
793 |         "    inputs = processor.feature_extractor(data[\"audio\"][\"array\"], return_tensors=\"pt\", sampling_rate=16_000).input_features.half().to(device)\n",
794 |         "    forced_decoder_ids = processor.get_decoder_prompt_ids(language=\"en\", task=\"transcribe\")\n",
795 |         "    predicted_ids = model.generate(inputs, forced_decoder_ids=forced_decoder_ids)\n",
796 |         "    text = processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=False)[0]\n",
797 |         "    print(text)"
798 |       ],
799 |       "metadata": {
800 |         "colab": {
801 |           "base_uri": "https://localhost:8080/"
802 |         },
803 |         "id": "Sbwt2HOPEVpn",
804 |         "outputId": "bfea6799-9226-4b80-dab1-39dbf936d5ab"
805 |       },
806 |       "execution_count": null,
807 |       "outputs": [
808 |         {
809 |           "output_type": "stream",
810 |           "name": "stderr",
811 |           "text": [
812 |             "Reading metadata...: 16354it [00:00, 66038.23it/s]\n"
813 |           ]
814 |         },
815 |         {
816 |           "output_type": "stream",
817 |           "name": "stdout",
818 |           "text": [
819 |             " Joe Keaton disapproved of films and Buster also had reservations about the medium.\n",
820 |             " She'll be alright.\n",
821 |             " Six.\n",
822 |             " All is well that ends well.\n",
823 |             " It is a busy market town that serves a large, surrounded area.\n",
824 |             " the team had Olympic champion Carolina Marin in the squad for the season\n",
825 |             " Do you mean it?\n",
826 |             " The new patch is less invasive than the old one, but still causes regression.\n",
827 |             " How is Mozilla going to handle ambiguities like Q and Q?\n",
828 |             " Wish you a safe and happy holiday.\n",
829 |             "CPU times: user 42.3 s, sys: 1.27 s, total: 43.6 s\n",
830 |             "Wall time: 43.7 s\n"
831 |           ]
832 |         }
833 |       ]
834 |     },
835 |     {
836 |       "cell_type": "code",
837 |       "source": [
838 |         "!nvidia-smi"
839 |       ],
840 |       "metadata": {
841 |         "id": "Lq91uTlgM-Xp",
842 |         "outputId": "c7e6aab2-9996-40e2-8855-669b6c69b705",
843 |         "colab": {
844 |           "base_uri": "https://localhost:8080/"
845 |         }
846 |       },
847 |       "execution_count": null,
848 |       "outputs": [
849 |         {
850 |           "output_type": "stream",
851 |           "name": "stdout",
852 |           "text": [
853 |             "Wed Dec  7 15:28:37 2022       \n",
854 |             "+-----------------------------------------------------------------------------+\n",
855 |             "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
856 |             "|-------------------------------+----------------------+----------------------+\n",
857 |             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
858 |             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
859 |             "|                               |                      |               MIG M. |\n",
860 |             "|===============================+======================+======================|\n",
861 |             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
862 |             "| N/A   57C    P0    29W /  70W |   6410MiB / 15109MiB |      0%      Default |\n",
863 |             "|                               |                      |                  N/A |\n",
864 |             "+-------------------------------+----------------------+----------------------+\n",
865 |             "                                                                               \n",
866 |             "+-----------------------------------------------------------------------------+\n",
867 |             "| Processes:                                                                  |\n",
868 |             "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
869 |             "|        ID   ID                                                   Usage      |\n",
870 |             "|=============================================================================|\n",
871 |             "+-----------------------------------------------------------------------------+\n"
872 |           ]
873 |         }
874 |       ]
875 |     }
876 |   ]
877 | }


--------------------------------------------------------------------------------
/Whisper_translate_with_🤗transformers_pipeline.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyNc6C55bNNEjls6hK10Usqh",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/Whisper_translate_with_%F0%9F%A4%97transformers_pipeline.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": 2,
 32 |       "metadata": {
 33 |         "colab": {
 34 |           "base_uri": "https://localhost:8080/"
 35 |         },
 36 |         "id": "oW1KgNjEgtvZ",
 37 |         "outputId": "403edb60-8922-4d1b-ff04-d2b291efc89d"
 38 |       },
 39 |       "outputs": [
 40 |         {
 41 |           "output_type": "stream",
 42 |           "name": "stdout",
 43 |           "text": [
 44 |             "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
 45 |             "Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.26.1)\n",
 46 |             "Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
 47 |             "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
 48 |             "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
 49 |             "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
 50 |             "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
 51 |             "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
 52 |             "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
 53 |             "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
 54 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
 55 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
 56 |             "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
 57 |             "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
 58 |             "Requirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
 59 |             "Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from datasets) (0.70.14)\n",
 60 |             "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n",
 61 |             "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.18.0)\n",
 62 |             "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
 63 |             "Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from datasets) (3.2.0)\n",
 64 |             "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
 65 |             "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
 66 |             "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
 67 |             "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
 68 |             "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
 69 |             "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
 70 |             "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
 71 |             "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.4.0)\n",
 72 |             "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
 73 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
 74 |             "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
 75 |             "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
 76 |             "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7.1)\n",
 77 |             "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n",
 78 |             "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n"
 79 |           ]
 80 |         }
 81 |       ],
 82 |       "source": [
 83 |         "!pip install transformers datasets"
 84 |       ]
 85 |     },
 86 |     {
 87 |       "cell_type": "code",
 88 |       "source": [
 89 |         "from transformers import pipeline\n",
 90 |         "from datasets import load_dataset"
 91 |       ],
 92 |       "metadata": {
 93 |         "id": "BLCTDydZgwvJ"
 94 |       },
 95 |       "execution_count": 25,
 96 |       "outputs": []
 97 |     },
 98 |     {
 99 |       "cell_type": "code",
100 |       "source": [
101 |         "pipe = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-small\", generate_kwargs={\"task\": \"translate\", \"language\": \"german\"})"
102 |       ],
103 |       "metadata": {
104 |         "id": "_fGRIMlMhF_A"
105 |       },
106 |       "execution_count": 26,
107 |       "outputs": []
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "source": [
112 |         "cv11 = load_dataset(\"mozilla-foundation/common_voice_11_0\", \"de\", streaming=True, split=\"test\")"
113 |       ],
114 |       "metadata": {
115 |         "id": "GbIJMykEEuVO"
116 |       },
117 |       "execution_count": 27,
118 |       "outputs": []
119 |     },
120 |     {
121 |       "cell_type": "code",
122 |       "source": [
123 |         "next(iter(cv11))"
124 |       ],
125 |       "metadata": {
126 |         "colab": {
127 |           "base_uri": "https://localhost:8080/"
128 |         },
129 |         "id": "-xP3eHKDLVwS",
130 |         "outputId": "f147a031-3cc4-4e1f-de2a-424bea9e4a05"
131 |       },
132 |       "execution_count": 31,
133 |       "outputs": [
134 |         {
135 |           "output_type": "stream",
136 |           "name": "stderr",
137 |           "text": [
138 |             "Reading metadata...: 16082it [00:00, 24809.25it/s]\n"
139 |           ]
140 |         },
141 |         {
142 |           "output_type": "execute_result",
143 |           "data": {
144 |             "text/plain": [
145 |               "{'client_id': '0052c07533a6976233ad5926d950b523002c4d8cdd9ae8726dbfec385951bd22aa707a742c49afe20c7d6cb9515dbaddac5b4d6fe8ebddcfbec46a2d3180a3a1',\n",
146 |               " 'path': 'common_voice_de_17922420.mp3',\n",
147 |               " 'audio': {'path': 'common_voice_de_17922420.mp3',\n",
148 |               "  'array': array([ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,\n",
149 |               "         -9.0749630e-12,  5.6385865e-09,  7.3282314e-09], dtype=float32),\n",
150 |               "  'sampling_rate': 48000},\n",
151 |               " 'sentence': 'Zieht euch bitte draußen die Schuhe aus.',\n",
152 |               " 'up_votes': 2,\n",
153 |               " 'down_votes': 0,\n",
154 |               " 'age': '',\n",
155 |               " 'gender': '',\n",
156 |               " 'accent': '',\n",
157 |               " 'locale': 'de',\n",
158 |               " 'segment': ''}"
159 |             ]
160 |           },
161 |           "metadata": {},
162 |           "execution_count": 31
163 |         }
164 |       ]
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "source": [
169 |         "test_speech = {\"raw\": next(iter(cv11))[\"audio\"][\"array\"],\n",
170 |         "               \"sampling_rate\": next(iter(cv11))[\"audio\"][\"sampling_rate\"]}"
171 |       ],
172 |       "metadata": {
173 |         "colab": {
174 |           "base_uri": "https://localhost:8080/"
175 |         },
176 |         "id": "ZZlzI4iWFD7C",
177 |         "outputId": "f390e005-9720-4e8d-8d65-61f3d3714b7b"
178 |       },
179 |       "execution_count": 28,
180 |       "outputs": [
181 |         {
182 |           "output_type": "stream",
183 |           "name": "stderr",
184 |           "text": [
185 |             "Reading metadata...: 16082it [00:00, 23627.15it/s]\n",
186 |             "Reading metadata...: 16082it [00:00, 32748.99it/s]\n"
187 |           ]
188 |         }
189 |       ]
190 |     },
191 |     {
192 |       "cell_type": "code",
193 |       "source": [
194 |         "pipe(test_speech, return_timestamps=True, chunk_length_s=30, stride_length_s=[6,0])"
195 |       ],
196 |       "metadata": {
197 |         "colab": {
198 |           "base_uri": "https://localhost:8080/"
199 |         },
200 |         "id": "1RDKFmrViDGP",
201 |         "outputId": "84d38b06-8639-4285-86d2-7b40f0497ca1"
202 |       },
203 |       "execution_count": 30,
204 |       "outputs": [
205 |         {
206 |           "output_type": "execute_result",
207 |           "data": {
208 |             "text/plain": [
209 |               "{'text': ' Please take off your shoes.',\n",
210 |               " 'chunks': [{'text': ' Please take off your shoes.', 'timestamp': (0.0, 3.0)}]}"
211 |             ]
212 |           },
213 |           "metadata": {},
214 |           "execution_count": 30
215 |         }
216 |       ]
217 |     }
218 |   ]
219 | }


--------------------------------------------------------------------------------
/deepseek_r1_distill_qwen1_5B_transformers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "authorship_tag": "ABX9TyMaHXlZf4FF/2AbgPjQfxrR",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     },
 18 |     "accelerator": "GPU"
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/deepseek_r1_distill_qwen1_5B_transformers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "markdown",
 33 |       "source": [
 34 |         "# Run DeepSeek R1 Distill Qwen 1.5B in FREE Google Colab\n",
 35 |         "\n",
 36 |         "Powered by Transformers and DeepSeek! ❤️"
 37 |       ],
 38 |       "metadata": {
 39 |         "id": "6dxqPSkyeDoO"
 40 |       }
 41 |     },
 42 |     {
 43 |       "cell_type": "markdown",
 44 |       "source": [
 45 |         "## Download the model checkpoint\n",
 46 |         "\n",
 47 |         "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
 48 |       ],
 49 |       "metadata": {
 50 |         "id": "4vwR5z2LeN8h"
 51 |       }
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "execution_count": 7,
 56 |       "metadata": {
 57 |         "id": "uU4FwZWVbgdO"
 58 |       },
 59 |       "outputs": [],
 60 |       "source": [
 61 |         "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
 62 |         "\n",
 63 |         "model_name = \"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\"\n",
 64 |         "\n",
 65 |         "model = AutoModelForCausalLM.from_pretrained(\n",
 66 |         "    model_name,).to(\"cuda\")\n",
 67 |         "tokenizer = AutoTokenizer.from_pretrained(model_name)"
 68 |       ]
 69 |     },
 70 |     {
 71 |       "cell_type": "markdown",
 72 |       "source": [
 73 |         "## Provide a prompt & generation parameters"
 74 |       ],
 75 |       "metadata": {
 76 |         "id": "TH2jLGUHeSty"
 77 |       }
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "source": [
 82 |         "prompt = \"write an efficient alogirthm for sorting a 2 dimensional array\"\n",
 83 |         "messages = [\n",
 84 |         "    {\"role\": \"system\", \"content\": \"You are an extremely focused and to the point assistant.\"},\n",
 85 |         "    {\"role\": \"user\", \"content\": prompt}\n",
 86 |         "]\n",
 87 |         "text = tokenizer.apply_chat_template(\n",
 88 |         "    messages,\n",
 89 |         "    tokenize=False,\n",
 90 |         "    add_generation_prompt=True\n",
 91 |         ")\n",
 92 |         "model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)"
 93 |       ],
 94 |       "metadata": {
 95 |         "id": "A122cuXIcAUu"
 96 |       },
 97 |       "execution_count": 8,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "markdown",
102 |       "source": [
103 |         "## Generate text"
104 |       ],
105 |       "metadata": {
106 |         "id": "ImQgJ188eZ3X"
107 |       }
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "source": [
112 |         "generated_ids = model.generate(\n",
113 |         "    **model_inputs,\n",
114 |         "    max_new_tokens=2048\n",
115 |         ")\n",
116 |         "generated_ids = [\n",
117 |         "    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n",
118 |         "]"
119 |       ],
120 |       "metadata": {
121 |         "colab": {
122 |           "base_uri": "https://localhost:8080/"
123 |         },
124 |         "id": "pGg5GtgCcDCM",
125 |         "outputId": "c6eeafd7-b1c3-4eaa-ad8e-f98bc77ca534"
126 |       },
127 |       "execution_count": 9,
128 |       "outputs": [
129 |         {
130 |           "output_type": "stream",
131 |           "name": "stderr",
132 |           "text": [
133 |             "Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n"
134 |           ]
135 |         }
136 |       ]
137 |     },
138 |     {
139 |       "cell_type": "markdown",
140 |       "source": [
141 |         "## Decode response"
142 |       ],
143 |       "metadata": {
144 |         "id": "8aIGWhNwefE-"
145 |       }
146 |     },
147 |     {
148 |       "cell_type": "code",
149 |       "source": [
150 |         "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n"
151 |       ],
152 |       "metadata": {
153 |         "id": "EfunHq9IcEH0"
154 |       },
155 |       "execution_count": 11,
156 |       "outputs": []
157 |     },
158 |     {
159 |       "cell_type": "markdown",
160 |       "source": [
161 |         "## Voila, enjoy the response!"
162 |       ],
163 |       "metadata": {
164 |         "id": "RjUJoLlTehLA"
165 |       }
166 |     },
167 |     {
168 |       "cell_type": "code",
169 |       "source": [
170 |         "print(response)"
171 |       ],
172 |       "metadata": {
173 |         "colab": {
174 |           "base_uri": "https://localhost:8080/"
175 |         },
176 |         "id": "_dQ4sBtoct_R",
177 |         "outputId": "8295138b-3d63-45d8-8090-f2e32eb718b0"
178 |       },
179 |       "execution_count": 12,
180 |       "outputs": [
181 |         {
182 |           "output_type": "stream",
183 |           "name": "stdout",
184 |           "text": [
185 |             "<think>\n",
186 |             "Okay, so I need to figure out how to write an efficient algorithm for sorting a 2-dimensional array. Hmm, let's start by understanding what exactly is being asked here. The user wants an algorithm that can sort a 2D array, but I'm not entirely sure if they mean a 2D array of numbers or something else. Maybe it's a list of lists, where each sublist is a row. I should clarify that in my response.\n",
187 |             "\n",
188 |             "Alright, assuming it's a 2D array where each element is a number, I need to think about the best sorting algorithms for this. I know that for a single list, the most efficient sorting algorithms are typically O(n log n), like merge sort or quicksort. But since this is a 2D array, I have to consider how to sort it efficiently.\n",
189 |             "\n",
190 |             "One approach is to sort each row individually. That would mean applying a sorting algorithm to each sublist. But if the rows are of different lengths, that could cause issues. Wait, in a 2D array, are all rows of the same length? I think in most cases, yes, but I should consider that possibility.\n",
191 |             "\n",
192 |             "Another idea is to sort the entire array as a 2D structure. That would involve comparing elements across rows and columns. For example, sorting based on the first element, then the second, and so on. This is similar to how you sort a list of tuples in Python using the default sort, which compares elements lexicographically.\n",
193 |             "\n",
194 |             "I should also think about the time complexity. If I sort each row individually, the time complexity would be O(m * n log n), where m is the number of rows and n is the average number of elements per row. If the rows are of varying lengths, this could be inefficient. On the other hand, sorting the entire array as a 2D structure would have a time complexity of O(m * n^2 log n), which is worse.\n",
195 |             "\n",
196 |             "So, which approach is better? If the rows are of similar lengths and the sorting is done element-wise, sorting each row individually might be more efficient. But if the rows are of different lengths, the entire array approach would be better.\n",
197 |             "\n",
198 |             "Wait, the user didn't specify whether the array is a list of lists or a single list. I should clarify that. If it's a single list, then the algorithm would be O(n log n). If it's a 2D array, then it depends on the structure.\n",
199 |             "\n",
200 |             "I think the user is referring to a 2D array, so I should proceed with that assumption. Therefore, the algorithm should be able to handle a 2D array and sort it efficiently. I'll outline the steps for both approaches: sorting each row individually and sorting the entire array as a 2D structure.\n",
201 |             "\n",
202 |             "I should also mention that the choice between the two depends on the specific requirements, like the size of the array and the desired time complexity. For most cases, sorting each row individually might be sufficient and easier to implement.\n",
203 |             "\n",
204 |             "Finally, I'll provide a code example for both methods to illustrate how they can be implemented in Python. This way, the user can choose the one that best fits their needs.\n",
205 |             "</think>\n",
206 |             "\n",
207 |             "To sort a 2-dimensional array efficiently, you can choose between two approaches: sorting each row individually or sorting the entire array as a 2D structure. Here's how you can implement each method:\n",
208 |             "\n",
209 |             "### 1. Sort Each Row Individually\n",
210 |             "This approach involves applying a sorting algorithm to each sublist (row) of the 2D array. This is efficient if the rows are of similar lengths and the sorting is done element-wise.\n",
211 |             "\n",
212 |             "**Algorithm:**\n",
213 |             "1. For each row in the 2D array:\n",
214 |             "   - Apply a sorting algorithm (e.g., quicksort, mergesort, or a built-in sort function) to the row.\n",
215 |             "2. Return the modified 2D array.\n",
216 |             "\n",
217 |             "**Python Code Example:**\n",
218 |             "```python\n",
219 |             "def sort_rows(arr):\n",
220 |             "    if not arr:\n",
221 |             "        return []\n",
222 |             "    for row in arr:\n",
223 |             "        row.sort()\n",
224 |             "    return arr\n",
225 |             "\n",
226 |             "# Example usage:\n",
227 |             "arr = [[3, 1, 2], [4, 5, 6], [7, 8, 9]]\n",
228 |             "sorted_arr = sort_rows(arr)\n",
229 |             "print(sorted_arr)\n",
230 |             "```\n",
231 |             "\n",
232 |             "### 2. Sort the Entire 2D Array\n",
233 |             "This approach involves sorting the entire array as a 2D structure, which can be done lexicographically (element-wise comparison).\n",
234 |             "\n",
235 |             "**Algorithm:**\n",
236 |             "1. Sort the entire 2D array using a sorting algorithm that compares elements across rows and columns.\n",
237 |             "2. Return the sorted 2D array.\n",
238 |             "\n",
239 |             "**Python Code Example:**\n",
240 |             "```python\n",
241 |             "def sort_2d_array(arr):\n",
242 |             "    return sorted(arr)\n",
243 |             "\n",
244 |             "# Example usage:\n",
245 |             "arr = [[3, 1, 2], [4, 5, 6], [7, 8, 9]]\n",
246 |             "sorted_arr = sort_2d_array(arr)\n",
247 |             "print(sorted_arr)\n",
248 |             "```\n",
249 |             "\n",
250 |             "### Choosing the Appropriate Method\n",
251 |             "- **Sorting Each Row Individually:** More efficient if rows are of similar lengths and the sorting is done element-wise.\n",
252 |             "- **Sorting the Entire 2D Array:** More efficient if the rows are of varying lengths and the entire array needs to be sorted lexicographically.\n",
253 |             "\n",
254 |             "### Conclusion\n",
255 |             "The choice between the two methods depends on the specific requirements of your use case. If rows are of similar lengths and element-wise sorting is sufficient, sorting each row individually is more efficient. If the rows are of varying lengths and a lexicographic sort is needed, sorting the entire array as a 2D structure is more appropriate.\n"
256 |           ]
257 |         }
258 |       ]
259 |     },
260 |     {
261 |       "cell_type": "code",
262 |       "source": [],
263 |       "metadata": {
264 |         "id": "YQQXrzuBcvo-"
265 |       },
266 |       "execution_count": null,
267 |       "outputs": []
268 |     }
269 |   ]
270 | }


--------------------------------------------------------------------------------
/insanely_fast_whisper_colab.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "authorship_tag": "ABX9TyNO3mkZ+HMQrvkMHRtFpKvj",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     },
 18 |     "accelerator": "GPU"
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/insanely_fast_whisper_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "markdown",
 33 |       "source": [
 34 |         "# [Insanely Fast Whisper](https://github.com/Vaibhavs10/insanely-fast-whisper)\n",
 35 |         "\n",
 36 |         "By VB (https://twitter.com/reach_vb)\n",
 37 |         "\n",
 38 |         "P.S. Make sure you're on a GPU run-time 🤗"
 39 |       ],
 40 |       "metadata": {
 41 |         "id": "q0MBgZKbhdII"
 42 |       }
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "source": [
 47 |         "!pip install -q pipx && apt install python3.10-venv"
 48 |       ],
 49 |       "metadata": {
 50 |         "colab": {
 51 |           "base_uri": "https://localhost:8080/"
 52 |         },
 53 |         "id": "VF-qp-FWJmyD",
 54 |         "outputId": "10712868-be6e-4b82-b8c2-95e43c591173"
 55 |       },
 56 |       "execution_count": 1,
 57 |       "outputs": [
 58 |         {
 59 |           "output_type": "stream",
 60 |           "name": "stdout",
 61 |           "text": [
 62 |             "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/57.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 63 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 64 |             "Reading package lists... Done\n",
 65 |             "Building dependency tree... Done\n",
 66 |             "Reading state information... Done\n",
 67 |             "The following additional packages will be installed:\n",
 68 |             "  python3-pip-whl python3-setuptools-whl\n",
 69 |             "The following NEW packages will be installed:\n",
 70 |             "  python3-pip-whl python3-setuptools-whl python3.10-venv\n",
 71 |             "0 upgraded, 3 newly installed, 0 to remove and 9 not upgraded.\n",
 72 |             "Need to get 2,473 kB of archives.\n",
 73 |             "After this operation, 2,884 kB of additional disk space will be used.\n",
 74 |             "Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 python3-pip-whl all 22.0.2+dfsg-1ubuntu0.4 [1,680 kB]\n",
 75 |             "Get:2 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 python3-setuptools-whl all 59.6.0-1.2ubuntu0.22.04.1 [788 kB]\n",
 76 |             "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 python3.10-venv amd64 3.10.12-1~22.04.2 [5,724 B]\n",
 77 |             "Fetched 2,473 kB in 2s (1,635 kB/s)\n",
 78 |             "Selecting previously unselected package python3-pip-whl.\n",
 79 |             "(Reading database ... 120880 files and directories currently installed.)\n",
 80 |             "Preparing to unpack .../python3-pip-whl_22.0.2+dfsg-1ubuntu0.4_all.deb ...\n",
 81 |             "Unpacking python3-pip-whl (22.0.2+dfsg-1ubuntu0.4) ...\n",
 82 |             "Selecting previously unselected package python3-setuptools-whl.\n",
 83 |             "Preparing to unpack .../python3-setuptools-whl_59.6.0-1.2ubuntu0.22.04.1_all.deb ...\n",
 84 |             "Unpacking python3-setuptools-whl (59.6.0-1.2ubuntu0.22.04.1) ...\n",
 85 |             "Selecting previously unselected package python3.10-venv.\n",
 86 |             "Preparing to unpack .../python3.10-venv_3.10.12-1~22.04.2_amd64.deb ...\n",
 87 |             "Unpacking python3.10-venv (3.10.12-1~22.04.2) ...\n",
 88 |             "Setting up python3-setuptools-whl (59.6.0-1.2ubuntu0.22.04.1) ...\n",
 89 |             "Setting up python3-pip-whl (22.0.2+dfsg-1ubuntu0.4) ...\n",
 90 |             "Setting up python3.10-venv (3.10.12-1~22.04.2) ...\n"
 91 |           ]
 92 |         }
 93 |       ]
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "!pipx run insanely-fast-whisper --file-name https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav"
 99 |       ],
100 |       "metadata": {
101 |         "colab": {
102 |           "base_uri": "https://localhost:8080/"
103 |         },
104 |         "id": "i_H9Dm89Jj0-",
105 |         "outputId": "f737b9fd-d625-4ccd-d8a1-1895cdf1b22f"
106 |       },
107 |       "execution_count": 2,
108 |       "outputs": [
109 |         {
110 |           "output_type": "stream",
111 |           "name": "stdout",
112 |           "text": [
113 |             "config.json: 100% 1.25k/1.25k [00:00<00:00, 6.33MB/s]\n",
114 |             "model.safetensors: 100% 3.09G/3.09G [00:12<00:00, 242MB/s]\n",
115 |             "generation_config.json: 100% 3.87k/3.87k [00:00<00:00, 17.3MB/s]\n",
116 |             "tokenizer_config.json: 100% 283k/283k [00:00<00:00, 2.15MB/s]\n",
117 |             "vocab.json: 100% 1.04M/1.04M [00:00<00:00, 5.28MB/s]\n",
118 |             "tokenizer.json: 100% 2.48M/2.48M [00:00<00:00, 9.49MB/s]\n",
119 |             "merges.txt: 100% 494k/494k [00:00<00:00, 3.74MB/s]\n",
120 |             "normalizer.json: 100% 52.7k/52.7k [00:00<00:00, 97.3MB/s]\n",
121 |             "added_tokens.json: 100% 34.6k/34.6k [00:00<00:00, 110MB/s]\n",
122 |             "special_tokens_map.json: 100% 2.07k/2.07k [00:00<00:00, 8.95MB/s]\n",
123 |             "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
124 |             "preprocessor_config.json: 100% 340/340 [00:00<00:00, 1.98MB/s]\n",
125 |             "The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.\n",
126 |             "\u001b[2K🤗 \u001b[33mTranscribing...\u001b[0m \u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[93m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m\u001b[37m━\u001b[0m \u001b[33m0:00:09\u001b[0m\n",
127 |             "\u001b[?25hVoila! Your file has been transcribed go check it out over here! output.json\n"
128 |           ]
129 |         }
130 |       ]
131 |     },
132 |     {
133 |       "cell_type": "code",
134 |       "source": [
135 |         "!head output.json"
136 |       ],
137 |       "metadata": {
138 |         "colab": {
139 |           "base_uri": "https://localhost:8080/"
140 |         },
141 |         "id": "NDFrydpsvu57",
142 |         "outputId": "de3d9635-5cf1-46ca-d401-e6c78c5659dc"
143 |       },
144 |       "execution_count": 4,
145 |       "outputs": [
146 |         {
147 |           "output_type": "stream",
148 |           "name": "stdout",
149 |           "text": [
150 |             "{\"text\": \" So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know, you get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that, like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen to every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option. It was way too big a project. So I planned things out, and I decided it kind of had to go something like this. This is how the year would go. So I'd start off light,\", \"chunks\": [{\"timestamp\": [0.0, 4.48], \"text\": \" So in college, I was a government major,\"}, {\"timestamp\": [4.88, 6.62], \"text\": \" which means I had to write a lot of papers.\"}, {\"timestamp\": [7.42, 8.86], \"text\": \" Now, when a normal student writes a paper,\"}, {\"timestamp\": [8.94, 10.6], \"text\": \" they might spread the work out a little like this.\"}, {\"timestamp\": [11.74, 16.3], \"text\": \" So, you know, you get started maybe a little slowly,\"}, {\"timestamp\": [16.36, 17.86], \"text\": \" but you get enough done in the first week\"}, {\"timestamp\": [17.86, 19.76], \"text\": \" that with some heavier days later on,\"}, {\"timestamp\": [20.28, 21.98], \"text\": \" everything gets done and things stay civil.\"}, {\"timestamp\": [23.64, 25.8], \"text\": \" And I would want to do that, like that.\"}, {\"timestamp\": [26.12, 26.94], \"text\": \" That would be the plan.\"}, {\"timestamp\": [27.22, 29.84], \"text\": \" I would have it all ready to go,\"}, {\"timestamp\": [29.96, 32.42], \"text\": \" but then actually the paper would come along,\"}, {\"timestamp\": [32.46, 33.6], \"text\": \" and then I would kind of do this.\"}, {\"timestamp\": [36.48, 38.44], \"text\": \" And that would happen to every single paper.\"}, {\"timestamp\": [39.32, 43.04], \"text\": \" But then came my 90-page senior thesis,\"}, {\"timestamp\": [43.54, 46.0], \"text\": \" a paper you're supposed to spend a year on.\"}, {\"timestamp\": [46.0, 50.0], \"text\": \" I knew for a paper like that, my normal workflow was not an option.\"}, {\"timestamp\": [50.0, 52.0], \"text\": \" It was way too big a project.\"}, {\"timestamp\": [52.0, 56.0], \"text\": \" So I planned things out, and I decided it kind of had to go something like this.\"}, {\"timestamp\": [56.0, 58.0], \"text\": \" This is how the year would go.\"}, {\"timestamp\": [58.0, 60.0], \"text\": \" So I'd start off light,\"}]}"
151 |           ]
152 |         }
153 |       ]
154 |     }
155 |   ]
156 | }


--------------------------------------------------------------------------------
/orpheus-pretrained-inference-demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github",
  7 |         "colab_type": "text"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/orpheus-pretrained-inference-demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "source": [
 16 |         "model_name = \"canopylabs/orpheus-3b-0.1-pretrained\"\n",
 17 |         "\n",
 18 |         "print(\"*** Change the model you use here\")"
 19 |       ],
 20 |       "metadata": {
 21 |         "id": "my_UA_HRu2tK",
 22 |         "colab": {
 23 |           "base_uri": "https://localhost:8080/"
 24 |         },
 25 |         "outputId": "ae9d7e55-dcad-481a-8b8c-b1d71ad5565c"
 26 |       },
 27 |       "execution_count": null,
 28 |       "outputs": [
 29 |         {
 30 |           "output_type": "stream",
 31 |           "name": "stdout",
 32 |           "text": [
 33 |             "*** Change the model you use here\n"
 34 |           ]
 35 |         }
 36 |       ]
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "execution_count": null,
 41 |       "metadata": {
 42 |         "id": "lHWzRUCDcyMx",
 43 |         "cellView": "form"
 44 |       },
 45 |       "outputs": [],
 46 |       "source": [
 47 |         "#@title Installation & Setup\n",
 48 |         "%%capture\n",
 49 |         "!pip install snac ipywebrtc\n",
 50 |         "!pip install datasets\n",
 51 |         "from snac import SNAC\n",
 52 |         "import torch\n",
 53 |         "import torch\n",
 54 |         "from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, AutoTokenizer\n",
 55 |         "import numpy as np\n",
 56 |         "import soundfile as sf\n",
 57 |         "import IPython.display as ipd\n",
 58 |         "import librosa\n",
 59 |         "from ipywebrtc import AudioRecorder, Audio\n",
 60 |         "from IPython.display import display\n",
 61 |         "import ipywidgets as widgets\n",
 62 |         "from huggingface_hub import snapshot_download\n",
 63 |         "import torchaudio.transforms as T\n",
 64 |         "import librosa\n",
 65 |         "import torch\n",
 66 |         "from IPython.display import Audio, display\n",
 67 |         "\n",
 68 |         "model_name = \"canopylabs/orpheus-tts-0.1-pretrained\"\n",
 69 |         "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
 70 |         "\n",
 71 |         "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
 72 |         "snac_model = SNAC.from_pretrained(\"hubertsiuzdak/snac_24khz\")\n",
 73 |         "\n",
 74 |         "\n",
 75 |         "# Download only model config and safetensors\n",
 76 |         "model_path = snapshot_download(\n",
 77 |         "    repo_id=model_name,\n",
 78 |         "    allow_patterns=[\n",
 79 |         "        \"config.json\",\n",
 80 |         "        \"*.safetensors\",\n",
 81 |         "        \"model.safetensors.index.json\",\n",
 82 |         "    ],\n",
 83 |         "    ignore_patterns=[\n",
 84 |         "        \"optimizer.pt\",\n",
 85 |         "        \"pytorch_model.bin\",\n",
 86 |         "        \"training_args.bin\",\n",
 87 |         "        \"scheduler.pt\",\n",
 88 |         "        \"tokenizer.json\",\n",
 89 |         "        \"tokenizer_config.json\",\n",
 90 |         "        \"special_tokens_map.json\",\n",
 91 |         "        \"vocab.json\",\n",
 92 |         "        \"merges.txt\",\n",
 93 |         "        \"tokenizer.*\"\n",
 94 |         "    ]\n",
 95 |         ")\n",
 96 |         "\n",
 97 |         "model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)\n",
 98 |         "model.cuda()"
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "source": [
104 |         "### CHANGE THIS TO YOUR OWN FILE AND TEXT\n",
105 |         "\n",
106 |         "my_wav_file_is = \"X.wav\"\n",
107 |         "and_the_transcript_is = \"Something or the other\"\n",
108 |         "\n",
109 |         "the_model_should_say = [\n",
110 |         "  \"I finally got into the university of my dreams! I can't believe all this hard work actually  paid off!\",\n",
111 |         "  \"Why is your frickin' Waymo blocking the frickin' road? GET OUT OF THE WAY!\",\n",
112 |         "  \"I'm so sorry to hear about your pet, but you know, he'll pull through.\",\n",
113 |         "  \"Conversational, uhm, systems, tend to speak pretty robotically, because- because they don't, really understand how, uhm, humans talk.\"\n",
114 |         "\n",
115 |         "]"
116 |       ],
117 |       "metadata": {
118 |         "id": "P81EElEWvg2J"
119 |       },
120 |       "execution_count": null,
121 |       "outputs": []
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "source": [
126 |         "#@title Tokenising your stuff for the prompt\n",
127 |         "%%capture\n",
128 |         "\n",
129 |         "''' Here we tokenise the prompt you gave us, we also tokenise the prompts you want the model to say\n",
130 |         "\n",
131 |         "The template is:\n",
132 |         "\n",
133 |         "start_of_human, start_of_text, text, end_of_text, start_of_ai, start_of_speech, speech, end_of_speech, end_of_ai, start_of_human, text, end_of_human and then generate from here\n",
134 |         "\n",
135 |         "'''\n",
136 |         "\n",
137 |         "\n",
138 |         "filename = my_wav_file_is\n",
139 |         "\n",
140 |         "audio_array, sample_rate = librosa.load(filename, sr=24000)\n",
141 |         "\n",
142 |         "def tokenise_audio(waveform):\n",
143 |         "  waveform = torch.from_numpy(waveform).unsqueeze(0)\n",
144 |         "  waveform = waveform.to(dtype=torch.float32)\n",
145 |         "\n",
146 |         "\n",
147 |         "  waveform = waveform.unsqueeze(0)\n",
148 |         "\n",
149 |         "  with torch.inference_mode():\n",
150 |         "    codes = snac_model.encode(waveform)\n",
151 |         "\n",
152 |         "  all_codes = []\n",
153 |         "  for i in range(codes[0].shape[1]):\n",
154 |         "    all_codes.append(codes[0][0][i].item()+128266)\n",
155 |         "    all_codes.append(codes[1][0][2*i].item()+128266+4096)\n",
156 |         "    all_codes.append(codes[2][0][4*i].item()+128266+(2*4096))\n",
157 |         "    all_codes.append(codes[2][0][(4*i)+1].item()+128266+(3*4096))\n",
158 |         "    all_codes.append(codes[1][0][(2*i)+1].item()+128266+(4*4096))\n",
159 |         "    all_codes.append(codes[2][0][(4*i)+2].item()+128266+(5*4096))\n",
160 |         "    all_codes.append(codes[2][0][(4*i)+3].item()+128266+(6*4096))\n",
161 |         "\n",
162 |         "\n",
163 |         "  return all_codes\n",
164 |         "\n",
165 |         "myts = tokenise_audio(audio_array)\n",
166 |         "start_tokens = torch.tensor([[ 128259]], dtype=torch.int64)\n",
167 |         "end_tokens = torch.tensor([[128009, 128260, 128261, 128257]], dtype=torch.int64)\n",
168 |         "final_tokens = torch.tensor([[128258, 128262]], dtype=torch.int64)\n",
169 |         "voice_prompt = and_the_transcript_is\n",
170 |         "prompt_tokked = tokenizer(voice_prompt, return_tensors=\"pt\")\n",
171 |         "\n",
172 |         "input_ids = prompt_tokked[\"input_ids\"]\n",
173 |         "\n",
174 |         "zeroprompt_input_ids = torch.cat([start_tokens, input_ids, end_tokens, torch.tensor([myts]), final_tokens], dim=1) # SOH SOT Text EOT EOH\n",
175 |         "\n",
176 |         "prompts = the_model_should_say\n",
177 |         "\n",
178 |         "all_modified_input_ids = []\n",
179 |         "for prompt in prompts:\n",
180 |         "  input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids\n",
181 |         "  second_input_ids = torch.cat([zeroprompt_input_ids, start_tokens, input_ids, end_tokens], dim=1)\n",
182 |         "  all_modified_input_ids.append(second_input_ids)\n",
183 |         "\n",
184 |         "\n",
185 |         "all_padded_tensors = []\n",
186 |         "all_attention_masks = []\n",
187 |         "\n",
188 |         "max_length = max([modified_input_ids.shape[1] for modified_input_ids in all_modified_input_ids])\n",
189 |         "\n",
190 |         "for modified_input_ids in all_modified_input_ids:\n",
191 |         "  padding = max_length - modified_input_ids.shape[1]\n",
192 |         "  padded_tensor = torch.cat([torch.full((1, padding), 128263, dtype=torch.int64), modified_input_ids], dim=1)\n",
193 |         "  attention_mask = torch.cat([torch.zeros((1, padding), dtype=torch.int64), torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64)], dim=1)\n",
194 |         "  all_padded_tensors.append(padded_tensor)\n",
195 |         "  all_attention_masks.append(attention_mask)\n",
196 |         "\n",
197 |         "all_padded_tensors = torch.cat(all_padded_tensors, dim=0)\n",
198 |         "all_attention_masks = torch.cat(all_attention_masks, dim=0)\n",
199 |         "\n",
200 |         "input_ids = all_padded_tensors.to(\"cuda\")\n",
201 |         "attention_mask = all_attention_masks.to(\"cuda\")\n"
202 |       ],
203 |       "metadata": {
204 |         "id": "cXgZmdclbfk_",
205 |         "cellView": "form"
206 |       },
207 |       "execution_count": null,
208 |       "outputs": []
209 |     },
210 |     {
211 |       "cell_type": "code",
212 |       "execution_count": null,
213 |       "metadata": {
214 |         "id": "J_D2LtYw9gkl",
215 |         "colab": {
216 |           "base_uri": "https://localhost:8080/"
217 |         },
218 |         "outputId": "73b937d8-720f-4445-ebdd-70063a065e6b",
219 |         "cellView": "form"
220 |       },
221 |       "outputs": [
222 |         {
223 |           "output_type": "stream",
224 |           "name": "stderr",
225 |           "text": [
226 |             "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
227 |             "Setting `pad_token_id` to `eos_token_id`:128258 for open-end generation.\n"
228 |           ]
229 |         }
230 |       ],
231 |       "source": [
232 |         "#@title Run Inference\n",
233 |         "\n",
234 |         "with torch.no_grad():\n",
235 |         "  generated_ids = model.generate(\n",
236 |         "      input_ids=input_ids,\n",
237 |         "      # attention_mask=attention_mask,\n",
238 |         "      max_new_tokens=990,\n",
239 |         "      do_sample=True,\n",
240 |         "      temperature=0.5,\n",
241 |         "      # top_k=40,\n",
242 |         "      top_p=0.9,\n",
243 |         "      repetition_penalty=1.1,\n",
244 |         "      num_return_sequences=1,\n",
245 |         "      eos_token_id=128258,\n",
246 |         "      # end_token_id=128009\n",
247 |         "  )\n",
248 |         "\n",
249 |         "# generated_ids = torch.cat([generated_ids, torch.tensor([[128262]]).to(\"cuda\")], dim=1) # EOAI"
250 |       ]
251 |     },
252 |     {
253 |       "cell_type": "code",
254 |       "source": [
255 |         "#@title Convert output to speech\n",
256 |         "%%capture\n",
257 |         "token_to_find = 128257\n",
258 |         "token_to_remove = 128258\n",
259 |         "\n",
260 |         "# Check if the token exists in the tensor\n",
261 |         "token_indices = (generated_ids == token_to_find).nonzero(as_tuple=True)\n",
262 |         "\n",
263 |         "if len(token_indices[1]) > 0:\n",
264 |         "    last_occurrence_idx = token_indices[1][-1].item()\n",
265 |         "    cropped_tensor = generated_ids[:, last_occurrence_idx+1:]\n",
266 |         "else:\n",
267 |         "    cropped_tensor = generated_ids\n",
268 |         "\n",
269 |         "mask = cropped_tensor != token_to_remove\n",
270 |         "processed_rows = []\n",
271 |         "for row in cropped_tensor:\n",
272 |         "    # Apply the mask to each row\n",
273 |         "    masked_row = row[row != token_to_remove]\n",
274 |         "    processed_rows.append(masked_row)\n",
275 |         "\n",
276 |         "code_lists = []\n",
277 |         "for row in processed_rows:\n",
278 |         "    # row is a 1D tensor with its own length\n",
279 |         "    row_length = row.size(0)\n",
280 |         "    new_length = (row_length // 7) * 7  # largest multiple of 7 that fits in this row\n",
281 |         "    trimmed_row = row[:new_length]\n",
282 |         "    trimmed_row = [t - 128266 for t in trimmed_row]\n",
283 |         "    code_lists.append(trimmed_row)\n",
284 |         "\n",
285 |         "def redistribute_codes(code_list):\n",
286 |         "  layer_1 = []\n",
287 |         "  layer_2 = []\n",
288 |         "  layer_3 = []\n",
289 |         "  for i in range((len(code_list)+1)//7):\n",
290 |         "    layer_1.append(code_list[7*i])\n",
291 |         "    layer_2.append(code_list[7*i+1]-4096)\n",
292 |         "    layer_3.append(code_list[7*i+2]-(2*4096))\n",
293 |         "    layer_3.append(code_list[7*i+3]-(3*4096))\n",
294 |         "    layer_2.append(code_list[7*i+4]-(4*4096))\n",
295 |         "    layer_3.append(code_list[7*i+5]-(5*4096))\n",
296 |         "    layer_3.append(code_list[7*i+6]-(6*4096))\n",
297 |         "  codes = [torch.tensor(layer_1).unsqueeze(0),\n",
298 |         "         torch.tensor(layer_2).unsqueeze(0),\n",
299 |         "         torch.tensor(layer_3).unsqueeze(0)]\n",
300 |         "  audio_hat = snac_model.decode(codes)\n",
301 |         "  return audio_hat\n",
302 |         "\n",
303 |         "my_samples = []\n",
304 |         "for code_list in code_lists:\n",
305 |         "  samples = redistribute_codes(code_list)\n",
306 |         "  my_samples.append(samples)"
307 |       ],
308 |       "metadata": {
309 |         "id": "lV49oiPFpbXL",
310 |         "cellView": "form"
311 |       },
312 |       "execution_count": null,
313 |       "outputs": []
314 |     },
315 |     {
316 |       "cell_type": "code",
317 |       "source": [
318 |         "#@title Display Speech\n",
319 |         "from IPython.display import Audio, display\n",
320 |         "for samples in my_samples:\n",
321 |         "  display(Audio(samples.detach().squeeze().to(\"cpu\").numpy(), rate=24000))"
322 |       ],
323 |       "metadata": {
324 |         "colab": {
325 |           "base_uri": "https://localhost:8080/",
326 |           "height": 162
327 |         },
328 |         "id": "JuwkHqU4piMJ",
329 |         "outputId": "c27f4f08-f57e-44be-9b21-3d381b3dd452",
330 |         "cellView": "form"
331 |       },
332 |       "execution_count": null,
333 |       "outputs": [
334 |         {
335 |           "output_type": "error",
336 |           "ename": "NameError",
337 |           "evalue": "name 'my_samples' is not defined",
338 |           "traceback": [
339 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
340 |             "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
341 |             "\u001b[0;32m<ipython-input-1-22d6a3267f3a>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0msamples\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmy_samples\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m   \u001b[0mdisplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mAudio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msamples\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m24000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
342 |             "\u001b[0;31mNameError\u001b[0m: name 'my_samples' is not defined"
343 |           ]
344 |         }
345 |       ]
346 |     },
347 |     {
348 |       "cell_type": "code",
349 |       "source": [],
350 |       "metadata": {
351 |         "id": "uQoWOaFC1EDi"
352 |       },
353 |       "execution_count": null,
354 |       "outputs": []
355 |     }
356 |   ],
357 |   "metadata": {
358 |     "accelerator": "GPU",
359 |     "colab": {
360 |       "gpuType": "A100",
361 |       "machine_shape": "hm",
362 |       "provenance": [],
363 |       "include_colab_link": true
364 |     },
365 |     "kernelspec": {
366 |       "display_name": "Python 3",
367 |       "name": "python3"
368 |     },
369 |     "language_info": {
370 |       "name": "python"
371 |     }
372 |   },
373 |   "nbformat": 4,
374 |   "nbformat_minor": 0
375 | }


--------------------------------------------------------------------------------
/stable_audio_open_colab.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "authorship_tag": "ABX9TyOQLO4/4GSQapVQeg1vezzz",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     },
 18 |     "accelerator": "GPU",
 19 |     "widgets": {
 20 |       "application/vnd.jupyter.widget-state+json": {
 21 |         "d621e5ab73d24c0f85aca1e0a20d3e89": {
 22 |           "model_module": "@jupyter-widgets/controls",
 23 |           "model_name": "HBoxModel",
 24 |           "model_module_version": "1.5.0",
 25 |           "state": {
 26 |             "_dom_classes": [],
 27 |             "_model_module": "@jupyter-widgets/controls",
 28 |             "_model_module_version": "1.5.0",
 29 |             "_model_name": "HBoxModel",
 30 |             "_view_count": null,
 31 |             "_view_module": "@jupyter-widgets/controls",
 32 |             "_view_module_version": "1.5.0",
 33 |             "_view_name": "HBoxView",
 34 |             "box_style": "",
 35 |             "children": [
 36 |               "IPY_MODEL_6c7808a91a3e4f72a1248832b985020c",
 37 |               "IPY_MODEL_13e9dac474494304b279d662de87d8f4",
 38 |               "IPY_MODEL_5d116c165c3841248f360b00ba35fffe"
 39 |             ],
 40 |             "layout": "IPY_MODEL_d700622dab8e41e4b3d3acc10ff42ba2"
 41 |           }
 42 |         },
 43 |         "6c7808a91a3e4f72a1248832b985020c": {
 44 |           "model_module": "@jupyter-widgets/controls",
 45 |           "model_name": "HTMLModel",
 46 |           "model_module_version": "1.5.0",
 47 |           "state": {
 48 |             "_dom_classes": [],
 49 |             "_model_module": "@jupyter-widgets/controls",
 50 |             "_model_module_version": "1.5.0",
 51 |             "_model_name": "HTMLModel",
 52 |             "_view_count": null,
 53 |             "_view_module": "@jupyter-widgets/controls",
 54 |             "_view_module_version": "1.5.0",
 55 |             "_view_name": "HTMLView",
 56 |             "description": "",
 57 |             "description_tooltip": null,
 58 |             "layout": "IPY_MODEL_9d88d7dc67534a1fb6e1e15c9c5b937b",
 59 |             "placeholder": "​",
 60 |             "style": "IPY_MODEL_480f4371362742d2952ffbbd986ec729",
 61 |             "value": "100%"
 62 |           }
 63 |         },
 64 |         "13e9dac474494304b279d662de87d8f4": {
 65 |           "model_module": "@jupyter-widgets/controls",
 66 |           "model_name": "FloatProgressModel",
 67 |           "model_module_version": "1.5.0",
 68 |           "state": {
 69 |             "_dom_classes": [],
 70 |             "_model_module": "@jupyter-widgets/controls",
 71 |             "_model_module_version": "1.5.0",
 72 |             "_model_name": "FloatProgressModel",
 73 |             "_view_count": null,
 74 |             "_view_module": "@jupyter-widgets/controls",
 75 |             "_view_module_version": "1.5.0",
 76 |             "_view_name": "ProgressView",
 77 |             "bar_style": "success",
 78 |             "description": "",
 79 |             "description_tooltip": null,
 80 |             "layout": "IPY_MODEL_f6aef59a2db049c39ee874ba3cfc4798",
 81 |             "max": 100,
 82 |             "min": 0,
 83 |             "orientation": "horizontal",
 84 |             "style": "IPY_MODEL_54b55dbf5a5d4a2e9afe5c7dc1e26d7d",
 85 |             "value": 100
 86 |           }
 87 |         },
 88 |         "5d116c165c3841248f360b00ba35fffe": {
 89 |           "model_module": "@jupyter-widgets/controls",
 90 |           "model_name": "HTMLModel",
 91 |           "model_module_version": "1.5.0",
 92 |           "state": {
 93 |             "_dom_classes": [],
 94 |             "_model_module": "@jupyter-widgets/controls",
 95 |             "_model_module_version": "1.5.0",
 96 |             "_model_name": "HTMLModel",
 97 |             "_view_count": null,
 98 |             "_view_module": "@jupyter-widgets/controls",
 99 |             "_view_module_version": "1.5.0",
100 |             "_view_name": "HTMLView",
101 |             "description": "",
102 |             "description_tooltip": null,
103 |             "layout": "IPY_MODEL_e5b2378a037846528a84c0a39547ffc2",
104 |             "placeholder": "​",
105 |             "style": "IPY_MODEL_f7659f9f7fe14dff8577d6f904e3a108",
106 |             "value": " 100/100 [00:34&lt;00:00,  2.91it/s]"
107 |           }
108 |         },
109 |         "d700622dab8e41e4b3d3acc10ff42ba2": {
110 |           "model_module": "@jupyter-widgets/base",
111 |           "model_name": "LayoutModel",
112 |           "model_module_version": "1.2.0",
113 |           "state": {
114 |             "_model_module": "@jupyter-widgets/base",
115 |             "_model_module_version": "1.2.0",
116 |             "_model_name": "LayoutModel",
117 |             "_view_count": null,
118 |             "_view_module": "@jupyter-widgets/base",
119 |             "_view_module_version": "1.2.0",
120 |             "_view_name": "LayoutView",
121 |             "align_content": null,
122 |             "align_items": null,
123 |             "align_self": null,
124 |             "border": null,
125 |             "bottom": null,
126 |             "display": null,
127 |             "flex": null,
128 |             "flex_flow": null,
129 |             "grid_area": null,
130 |             "grid_auto_columns": null,
131 |             "grid_auto_flow": null,
132 |             "grid_auto_rows": null,
133 |             "grid_column": null,
134 |             "grid_gap": null,
135 |             "grid_row": null,
136 |             "grid_template_areas": null,
137 |             "grid_template_columns": null,
138 |             "grid_template_rows": null,
139 |             "height": null,
140 |             "justify_content": null,
141 |             "justify_items": null,
142 |             "left": null,
143 |             "margin": null,
144 |             "max_height": null,
145 |             "max_width": null,
146 |             "min_height": null,
147 |             "min_width": null,
148 |             "object_fit": null,
149 |             "object_position": null,
150 |             "order": null,
151 |             "overflow": null,
152 |             "overflow_x": null,
153 |             "overflow_y": null,
154 |             "padding": null,
155 |             "right": null,
156 |             "top": null,
157 |             "visibility": null,
158 |             "width": null
159 |           }
160 |         },
161 |         "9d88d7dc67534a1fb6e1e15c9c5b937b": {
162 |           "model_module": "@jupyter-widgets/base",
163 |           "model_name": "LayoutModel",
164 |           "model_module_version": "1.2.0",
165 |           "state": {
166 |             "_model_module": "@jupyter-widgets/base",
167 |             "_model_module_version": "1.2.0",
168 |             "_model_name": "LayoutModel",
169 |             "_view_count": null,
170 |             "_view_module": "@jupyter-widgets/base",
171 |             "_view_module_version": "1.2.0",
172 |             "_view_name": "LayoutView",
173 |             "align_content": null,
174 |             "align_items": null,
175 |             "align_self": null,
176 |             "border": null,
177 |             "bottom": null,
178 |             "display": null,
179 |             "flex": null,
180 |             "flex_flow": null,
181 |             "grid_area": null,
182 |             "grid_auto_columns": null,
183 |             "grid_auto_flow": null,
184 |             "grid_auto_rows": null,
185 |             "grid_column": null,
186 |             "grid_gap": null,
187 |             "grid_row": null,
188 |             "grid_template_areas": null,
189 |             "grid_template_columns": null,
190 |             "grid_template_rows": null,
191 |             "height": null,
192 |             "justify_content": null,
193 |             "justify_items": null,
194 |             "left": null,
195 |             "margin": null,
196 |             "max_height": null,
197 |             "max_width": null,
198 |             "min_height": null,
199 |             "min_width": null,
200 |             "object_fit": null,
201 |             "object_position": null,
202 |             "order": null,
203 |             "overflow": null,
204 |             "overflow_x": null,
205 |             "overflow_y": null,
206 |             "padding": null,
207 |             "right": null,
208 |             "top": null,
209 |             "visibility": null,
210 |             "width": null
211 |           }
212 |         },
213 |         "480f4371362742d2952ffbbd986ec729": {
214 |           "model_module": "@jupyter-widgets/controls",
215 |           "model_name": "DescriptionStyleModel",
216 |           "model_module_version": "1.5.0",
217 |           "state": {
218 |             "_model_module": "@jupyter-widgets/controls",
219 |             "_model_module_version": "1.5.0",
220 |             "_model_name": "DescriptionStyleModel",
221 |             "_view_count": null,
222 |             "_view_module": "@jupyter-widgets/base",
223 |             "_view_module_version": "1.2.0",
224 |             "_view_name": "StyleView",
225 |             "description_width": ""
226 |           }
227 |         },
228 |         "f6aef59a2db049c39ee874ba3cfc4798": {
229 |           "model_module": "@jupyter-widgets/base",
230 |           "model_name": "LayoutModel",
231 |           "model_module_version": "1.2.0",
232 |           "state": {
233 |             "_model_module": "@jupyter-widgets/base",
234 |             "_model_module_version": "1.2.0",
235 |             "_model_name": "LayoutModel",
236 |             "_view_count": null,
237 |             "_view_module": "@jupyter-widgets/base",
238 |             "_view_module_version": "1.2.0",
239 |             "_view_name": "LayoutView",
240 |             "align_content": null,
241 |             "align_items": null,
242 |             "align_self": null,
243 |             "border": null,
244 |             "bottom": null,
245 |             "display": null,
246 |             "flex": null,
247 |             "flex_flow": null,
248 |             "grid_area": null,
249 |             "grid_auto_columns": null,
250 |             "grid_auto_flow": null,
251 |             "grid_auto_rows": null,
252 |             "grid_column": null,
253 |             "grid_gap": null,
254 |             "grid_row": null,
255 |             "grid_template_areas": null,
256 |             "grid_template_columns": null,
257 |             "grid_template_rows": null,
258 |             "height": null,
259 |             "justify_content": null,
260 |             "justify_items": null,
261 |             "left": null,
262 |             "margin": null,
263 |             "max_height": null,
264 |             "max_width": null,
265 |             "min_height": null,
266 |             "min_width": null,
267 |             "object_fit": null,
268 |             "object_position": null,
269 |             "order": null,
270 |             "overflow": null,
271 |             "overflow_x": null,
272 |             "overflow_y": null,
273 |             "padding": null,
274 |             "right": null,
275 |             "top": null,
276 |             "visibility": null,
277 |             "width": null
278 |           }
279 |         },
280 |         "54b55dbf5a5d4a2e9afe5c7dc1e26d7d": {
281 |           "model_module": "@jupyter-widgets/controls",
282 |           "model_name": "ProgressStyleModel",
283 |           "model_module_version": "1.5.0",
284 |           "state": {
285 |             "_model_module": "@jupyter-widgets/controls",
286 |             "_model_module_version": "1.5.0",
287 |             "_model_name": "ProgressStyleModel",
288 |             "_view_count": null,
289 |             "_view_module": "@jupyter-widgets/base",
290 |             "_view_module_version": "1.2.0",
291 |             "_view_name": "StyleView",
292 |             "bar_color": null,
293 |             "description_width": ""
294 |           }
295 |         },
296 |         "e5b2378a037846528a84c0a39547ffc2": {
297 |           "model_module": "@jupyter-widgets/base",
298 |           "model_name": "LayoutModel",
299 |           "model_module_version": "1.2.0",
300 |           "state": {
301 |             "_model_module": "@jupyter-widgets/base",
302 |             "_model_module_version": "1.2.0",
303 |             "_model_name": "LayoutModel",
304 |             "_view_count": null,
305 |             "_view_module": "@jupyter-widgets/base",
306 |             "_view_module_version": "1.2.0",
307 |             "_view_name": "LayoutView",
308 |             "align_content": null,
309 |             "align_items": null,
310 |             "align_self": null,
311 |             "border": null,
312 |             "bottom": null,
313 |             "display": null,
314 |             "flex": null,
315 |             "flex_flow": null,
316 |             "grid_area": null,
317 |             "grid_auto_columns": null,
318 |             "grid_auto_flow": null,
319 |             "grid_auto_rows": null,
320 |             "grid_column": null,
321 |             "grid_gap": null,
322 |             "grid_row": null,
323 |             "grid_template_areas": null,
324 |             "grid_template_columns": null,
325 |             "grid_template_rows": null,
326 |             "height": null,
327 |             "justify_content": null,
328 |             "justify_items": null,
329 |             "left": null,
330 |             "margin": null,
331 |             "max_height": null,
332 |             "max_width": null,
333 |             "min_height": null,
334 |             "min_width": null,
335 |             "object_fit": null,
336 |             "object_position": null,
337 |             "order": null,
338 |             "overflow": null,
339 |             "overflow_x": null,
340 |             "overflow_y": null,
341 |             "padding": null,
342 |             "right": null,
343 |             "top": null,
344 |             "visibility": null,
345 |             "width": null
346 |           }
347 |         },
348 |         "f7659f9f7fe14dff8577d6f904e3a108": {
349 |           "model_module": "@jupyter-widgets/controls",
350 |           "model_name": "DescriptionStyleModel",
351 |           "model_module_version": "1.5.0",
352 |           "state": {
353 |             "_model_module": "@jupyter-widgets/controls",
354 |             "_model_module_version": "1.5.0",
355 |             "_model_name": "DescriptionStyleModel",
356 |             "_view_count": null,
357 |             "_view_module": "@jupyter-widgets/base",
358 |             "_view_module_version": "1.2.0",
359 |             "_view_name": "StyleView",
360 |             "description_width": ""
361 |           }
362 |         }
363 |       }
364 |     }
365 |   },
366 |   "cells": [
367 |     {
368 |       "cell_type": "markdown",
369 |       "metadata": {
370 |         "id": "view-in-github",
371 |         "colab_type": "text"
372 |       },
373 |       "source": [
374 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/stable_audio_open_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
375 |       ]
376 |     },
377 |     {
378 |       "cell_type": "code",
379 |       "source": [
380 |         "!pip install -q einops stable_audio_tools"
381 |       ],
382 |       "metadata": {
383 |         "id": "ULbF5y9eoeCN"
384 |       },
385 |       "execution_count": 6,
386 |       "outputs": []
387 |     },
388 |     {
389 |       "cell_type": "code",
390 |       "execution_count": 8,
391 |       "metadata": {
392 |         "id": "GM27lXQuobHw"
393 |       },
394 |       "outputs": [],
395 |       "source": [
396 |         "import torch\n",
397 |         "import torchaudio\n",
398 |         "from einops import rearrange\n",
399 |         "from stable_audio_tools import get_pretrained_model\n",
400 |         "from stable_audio_tools.inference.generation import generate_diffusion_cond"
401 |       ]
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "source": [
406 |         "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
407 |         "\n",
408 |         "# Download model\n",
409 |         "model, model_config = get_pretrained_model(\"stabilityai/stable-audio-open-1.0\")\n",
410 |         "sample_rate = model_config[\"sample_rate\"]\n",
411 |         "sample_size = model_config[\"sample_size\"]\n",
412 |         "\n",
413 |         "model = model.to(device)"
414 |       ],
415 |       "metadata": {
416 |         "colab": {
417 |           "base_uri": "https://localhost:8080/"
418 |         },
419 |         "id": "JQSox2_JrjSm",
420 |         "outputId": "d08c9d02-4223-4ea3-b078-ce1ae384fb34"
421 |       },
422 |       "execution_count": 9,
423 |       "outputs": [
424 |         {
425 |           "output_type": "stream",
426 |           "name": "stderr",
427 |           "text": [
428 |             "/usr/local/lib/python3.10/dist-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n",
429 |             "  warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n"
430 |           ]
431 |         }
432 |       ]
433 |     },
434 |     {
435 |       "cell_type": "code",
436 |       "source": [
437 |         "# Set up text and timing conditioning\n",
438 |         "conditioning = [{\n",
439 |         "    \"prompt\": \"The sound of the piano keys being pressed, the soft melody that follows, and the gentle hum of the bass create a soothing atmosphere that envelops the listener. It's as if the music is a warm embrace, inviting you to relax and unwind. The rhythm is slow and steady, like a heartbeat, and the notes dance together in perfect harmony. It's a symphony of peace and tranquility, a lullaby for the soul.\",\n",
440 |         "    \"seconds_start\": 0,\n",
441 |         "    \"seconds_total\": 45\n",
442 |         "}]"
443 |       ],
444 |       "metadata": {
445 |         "id": "K87sVH9jtBB9"
446 |       },
447 |       "execution_count": 10,
448 |       "outputs": []
449 |     },
450 |     {
451 |       "cell_type": "code",
452 |       "source": [
453 |         "# Generate stereo audio\n",
454 |         "output = generate_diffusion_cond(\n",
455 |         "    model,\n",
456 |         "    steps=100,\n",
457 |         "    cfg_scale=7,\n",
458 |         "    conditioning=conditioning,\n",
459 |         "    sample_size=sample_size,\n",
460 |         "    sigma_min=0.3,\n",
461 |         "    sigma_max=500,\n",
462 |         "    sampler_type=\"dpmpp-3m-sde\",\n",
463 |         "    device=device\n",
464 |         ")"
465 |       ],
466 |       "metadata": {
467 |         "colab": {
468 |           "base_uri": "https://localhost:8080/",
469 |           "height": 181,
470 |           "referenced_widgets": [
471 |             "d621e5ab73d24c0f85aca1e0a20d3e89",
472 |             "6c7808a91a3e4f72a1248832b985020c",
473 |             "13e9dac474494304b279d662de87d8f4",
474 |             "5d116c165c3841248f360b00ba35fffe",
475 |             "d700622dab8e41e4b3d3acc10ff42ba2",
476 |             "9d88d7dc67534a1fb6e1e15c9c5b937b",
477 |             "480f4371362742d2952ffbbd986ec729",
478 |             "f6aef59a2db049c39ee874ba3cfc4798",
479 |             "54b55dbf5a5d4a2e9afe5c7dc1e26d7d",
480 |             "e5b2378a037846528a84c0a39547ffc2",
481 |             "f7659f9f7fe14dff8577d6f904e3a108"
482 |           ]
483 |         },
484 |         "id": "vV1V-15ktC07",
485 |         "outputId": "e319f4a7-8db7-4c0c-e582-a0777f90868c"
486 |       },
487 |       "execution_count": 11,
488 |       "outputs": [
489 |         {
490 |           "output_type": "stream",
491 |           "name": "stdout",
492 |           "text": [
493 |             "384734133\n"
494 |           ]
495 |         },
496 |         {
497 |           "output_type": "display_data",
498 |           "data": {
499 |             "text/plain": [
500 |               "  0%|          | 0/100 [00:00<?, ?it/s]"
501 |             ],
502 |             "application/vnd.jupyter.widget-view+json": {
503 |               "version_major": 2,
504 |               "version_minor": 0,
505 |               "model_id": "d621e5ab73d24c0f85aca1e0a20d3e89"
506 |             }
507 |           },
508 |           "metadata": {}
509 |         },
510 |         {
511 |           "output_type": "stream",
512 |           "name": "stderr",
513 |           "text": [
514 |             "/usr/local/lib/python3.10/dist-packages/torch/backends/cuda/__init__.py:342: FutureWarning: torch.backends.cuda.sdp_kernel() is deprecated. In the future, this context manager will be removed. Please see, torch.nn.attention.sdpa_kernel() for the new context manager, with updated signature.\n",
515 |             "  warnings.warn(\n",
516 |             "/usr/local/lib/python3.10/dist-packages/torchsde/_brownian/brownian_interval.py:608: UserWarning: Should have tb<=t1 but got tb=500.00006103515625 and t1=500.000061.\n",
517 |             "  warnings.warn(f\"Should have {tb_name}<=t1 but got {tb_name}={tb} and t1={self._end}.\")\n"
518 |           ]
519 |         }
520 |       ]
521 |     },
522 |     {
523 |       "cell_type": "code",
524 |       "source": [
525 |         "# Rearrange audio batch to a single sequence\n",
526 |         "output = rearrange(output, \"b d n -> d (b n)\")"
527 |       ],
528 |       "metadata": {
529 |         "id": "jNH-zKlZtEk5"
530 |       },
531 |       "execution_count": 12,
532 |       "outputs": []
533 |     },
534 |     {
535 |       "cell_type": "code",
536 |       "source": [
537 |         "# Peak normalize, clip, convert to int16, and save to file\n",
538 |         "output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()\n",
539 |         "torchaudio.save(\"output.wav\", output, sample_rate)"
540 |       ],
541 |       "metadata": {
542 |         "id": "iwvWXYMjtF0B"
543 |       },
544 |       "execution_count": 13,
545 |       "outputs": []
546 |     }
547 |   ]
548 | }


--------------------------------------------------------------------------------
/transformers_whisper_ckpt_to_OAI.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyOwqcZA9X/v0IMR2JdFVrpM",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     },
 17 |     "widgets": {
 18 |       "application/vnd.jupyter.widget-state+json": {
 19 |         "745588c98f5743ddb72d83824270f5c5": {
 20 |           "model_module": "@jupyter-widgets/controls",
 21 |           "model_name": "HBoxModel",
 22 |           "model_module_version": "1.5.0",
 23 |           "state": {
 24 |             "_dom_classes": [],
 25 |             "_model_module": "@jupyter-widgets/controls",
 26 |             "_model_module_version": "1.5.0",
 27 |             "_model_name": "HBoxModel",
 28 |             "_view_count": null,
 29 |             "_view_module": "@jupyter-widgets/controls",
 30 |             "_view_module_version": "1.5.0",
 31 |             "_view_name": "HBoxView",
 32 |             "box_style": "",
 33 |             "children": [
 34 |               "IPY_MODEL_ef501fa7fc204e2bb70fbd9b61484f1f",
 35 |               "IPY_MODEL_c55c2b2459e044f6890e0851ea862d21",
 36 |               "IPY_MODEL_0c9d723b0fad406fac28e8955f492cb7"
 37 |             ],
 38 |             "layout": "IPY_MODEL_6989f75bbb3746fba2f10bf1b9581ead"
 39 |           }
 40 |         },
 41 |         "ef501fa7fc204e2bb70fbd9b61484f1f": {
 42 |           "model_module": "@jupyter-widgets/controls",
 43 |           "model_name": "HTMLModel",
 44 |           "model_module_version": "1.5.0",
 45 |           "state": {
 46 |             "_dom_classes": [],
 47 |             "_model_module": "@jupyter-widgets/controls",
 48 |             "_model_module_version": "1.5.0",
 49 |             "_model_name": "HTMLModel",
 50 |             "_view_count": null,
 51 |             "_view_module": "@jupyter-widgets/controls",
 52 |             "_view_module_version": "1.5.0",
 53 |             "_view_name": "HTMLView",
 54 |             "description": "",
 55 |             "description_tooltip": null,
 56 |             "layout": "IPY_MODEL_6de3345bd4a74f74a0c53b5ebe1f3187",
 57 |             "placeholder": "​",
 58 |             "style": "IPY_MODEL_672d9de7616c46f083c6bf9c4f9caff3",
 59 |             "value": "Downloading: 100%"
 60 |           }
 61 |         },
 62 |         "c55c2b2459e044f6890e0851ea862d21": {
 63 |           "model_module": "@jupyter-widgets/controls",
 64 |           "model_name": "FloatProgressModel",
 65 |           "model_module_version": "1.5.0",
 66 |           "state": {
 67 |             "_dom_classes": [],
 68 |             "_model_module": "@jupyter-widgets/controls",
 69 |             "_model_module_version": "1.5.0",
 70 |             "_model_name": "FloatProgressModel",
 71 |             "_view_count": null,
 72 |             "_view_module": "@jupyter-widgets/controls",
 73 |             "_view_module_version": "1.5.0",
 74 |             "_view_name": "ProgressView",
 75 |             "bar_style": "success",
 76 |             "description": "",
 77 |             "description_tooltip": null,
 78 |             "layout": "IPY_MODEL_e49c2e613b93405a9366f759e36ccb43",
 79 |             "max": 1985,
 80 |             "min": 0,
 81 |             "orientation": "horizontal",
 82 |             "style": "IPY_MODEL_6043edb9df9e47d0a28ac8fbafca28b1",
 83 |             "value": 1985
 84 |           }
 85 |         },
 86 |         "0c9d723b0fad406fac28e8955f492cb7": {
 87 |           "model_module": "@jupyter-widgets/controls",
 88 |           "model_name": "HTMLModel",
 89 |           "model_module_version": "1.5.0",
 90 |           "state": {
 91 |             "_dom_classes": [],
 92 |             "_model_module": "@jupyter-widgets/controls",
 93 |             "_model_module_version": "1.5.0",
 94 |             "_model_name": "HTMLModel",
 95 |             "_view_count": null,
 96 |             "_view_module": "@jupyter-widgets/controls",
 97 |             "_view_module_version": "1.5.0",
 98 |             "_view_name": "HTMLView",
 99 |             "description": "",
100 |             "description_tooltip": null,
101 |             "layout": "IPY_MODEL_fa5debe4f46b4dd796528715a962b738",
102 |             "placeholder": "​",
103 |             "style": "IPY_MODEL_c0bef8c62caf4e149a250ce7a5fe245b",
104 |             "value": " 1.99k/1.99k [00:00&lt;00:00, 33.8kB/s]"
105 |           }
106 |         },
107 |         "6989f75bbb3746fba2f10bf1b9581ead": {
108 |           "model_module": "@jupyter-widgets/base",
109 |           "model_name": "LayoutModel",
110 |           "model_module_version": "1.2.0",
111 |           "state": {
112 |             "_model_module": "@jupyter-widgets/base",
113 |             "_model_module_version": "1.2.0",
114 |             "_model_name": "LayoutModel",
115 |             "_view_count": null,
116 |             "_view_module": "@jupyter-widgets/base",
117 |             "_view_module_version": "1.2.0",
118 |             "_view_name": "LayoutView",
119 |             "align_content": null,
120 |             "align_items": null,
121 |             "align_self": null,
122 |             "border": null,
123 |             "bottom": null,
124 |             "display": null,
125 |             "flex": null,
126 |             "flex_flow": null,
127 |             "grid_area": null,
128 |             "grid_auto_columns": null,
129 |             "grid_auto_flow": null,
130 |             "grid_auto_rows": null,
131 |             "grid_column": null,
132 |             "grid_gap": null,
133 |             "grid_row": null,
134 |             "grid_template_areas": null,
135 |             "grid_template_columns": null,
136 |             "grid_template_rows": null,
137 |             "height": null,
138 |             "justify_content": null,
139 |             "justify_items": null,
140 |             "left": null,
141 |             "margin": null,
142 |             "max_height": null,
143 |             "max_width": null,
144 |             "min_height": null,
145 |             "min_width": null,
146 |             "object_fit": null,
147 |             "object_position": null,
148 |             "order": null,
149 |             "overflow": null,
150 |             "overflow_x": null,
151 |             "overflow_y": null,
152 |             "padding": null,
153 |             "right": null,
154 |             "top": null,
155 |             "visibility": null,
156 |             "width": null
157 |           }
158 |         },
159 |         "6de3345bd4a74f74a0c53b5ebe1f3187": {
160 |           "model_module": "@jupyter-widgets/base",
161 |           "model_name": "LayoutModel",
162 |           "model_module_version": "1.2.0",
163 |           "state": {
164 |             "_model_module": "@jupyter-widgets/base",
165 |             "_model_module_version": "1.2.0",
166 |             "_model_name": "LayoutModel",
167 |             "_view_count": null,
168 |             "_view_module": "@jupyter-widgets/base",
169 |             "_view_module_version": "1.2.0",
170 |             "_view_name": "LayoutView",
171 |             "align_content": null,
172 |             "align_items": null,
173 |             "align_self": null,
174 |             "border": null,
175 |             "bottom": null,
176 |             "display": null,
177 |             "flex": null,
178 |             "flex_flow": null,
179 |             "grid_area": null,
180 |             "grid_auto_columns": null,
181 |             "grid_auto_flow": null,
182 |             "grid_auto_rows": null,
183 |             "grid_column": null,
184 |             "grid_gap": null,
185 |             "grid_row": null,
186 |             "grid_template_areas": null,
187 |             "grid_template_columns": null,
188 |             "grid_template_rows": null,
189 |             "height": null,
190 |             "justify_content": null,
191 |             "justify_items": null,
192 |             "left": null,
193 |             "margin": null,
194 |             "max_height": null,
195 |             "max_width": null,
196 |             "min_height": null,
197 |             "min_width": null,
198 |             "object_fit": null,
199 |             "object_position": null,
200 |             "order": null,
201 |             "overflow": null,
202 |             "overflow_x": null,
203 |             "overflow_y": null,
204 |             "padding": null,
205 |             "right": null,
206 |             "top": null,
207 |             "visibility": null,
208 |             "width": null
209 |           }
210 |         },
211 |         "672d9de7616c46f083c6bf9c4f9caff3": {
212 |           "model_module": "@jupyter-widgets/controls",
213 |           "model_name": "DescriptionStyleModel",
214 |           "model_module_version": "1.5.0",
215 |           "state": {
216 |             "_model_module": "@jupyter-widgets/controls",
217 |             "_model_module_version": "1.5.0",
218 |             "_model_name": "DescriptionStyleModel",
219 |             "_view_count": null,
220 |             "_view_module": "@jupyter-widgets/base",
221 |             "_view_module_version": "1.2.0",
222 |             "_view_name": "StyleView",
223 |             "description_width": ""
224 |           }
225 |         },
226 |         "e49c2e613b93405a9366f759e36ccb43": {
227 |           "model_module": "@jupyter-widgets/base",
228 |           "model_name": "LayoutModel",
229 |           "model_module_version": "1.2.0",
230 |           "state": {
231 |             "_model_module": "@jupyter-widgets/base",
232 |             "_model_module_version": "1.2.0",
233 |             "_model_name": "LayoutModel",
234 |             "_view_count": null,
235 |             "_view_module": "@jupyter-widgets/base",
236 |             "_view_module_version": "1.2.0",
237 |             "_view_name": "LayoutView",
238 |             "align_content": null,
239 |             "align_items": null,
240 |             "align_self": null,
241 |             "border": null,
242 |             "bottom": null,
243 |             "display": null,
244 |             "flex": null,
245 |             "flex_flow": null,
246 |             "grid_area": null,
247 |             "grid_auto_columns": null,
248 |             "grid_auto_flow": null,
249 |             "grid_auto_rows": null,
250 |             "grid_column": null,
251 |             "grid_gap": null,
252 |             "grid_row": null,
253 |             "grid_template_areas": null,
254 |             "grid_template_columns": null,
255 |             "grid_template_rows": null,
256 |             "height": null,
257 |             "justify_content": null,
258 |             "justify_items": null,
259 |             "left": null,
260 |             "margin": null,
261 |             "max_height": null,
262 |             "max_width": null,
263 |             "min_height": null,
264 |             "min_width": null,
265 |             "object_fit": null,
266 |             "object_position": null,
267 |             "order": null,
268 |             "overflow": null,
269 |             "overflow_x": null,
270 |             "overflow_y": null,
271 |             "padding": null,
272 |             "right": null,
273 |             "top": null,
274 |             "visibility": null,
275 |             "width": null
276 |           }
277 |         },
278 |         "6043edb9df9e47d0a28ac8fbafca28b1": {
279 |           "model_module": "@jupyter-widgets/controls",
280 |           "model_name": "ProgressStyleModel",
281 |           "model_module_version": "1.5.0",
282 |           "state": {
283 |             "_model_module": "@jupyter-widgets/controls",
284 |             "_model_module_version": "1.5.0",
285 |             "_model_name": "ProgressStyleModel",
286 |             "_view_count": null,
287 |             "_view_module": "@jupyter-widgets/base",
288 |             "_view_module_version": "1.2.0",
289 |             "_view_name": "StyleView",
290 |             "bar_color": null,
291 |             "description_width": ""
292 |           }
293 |         },
294 |         "fa5debe4f46b4dd796528715a962b738": {
295 |           "model_module": "@jupyter-widgets/base",
296 |           "model_name": "LayoutModel",
297 |           "model_module_version": "1.2.0",
298 |           "state": {
299 |             "_model_module": "@jupyter-widgets/base",
300 |             "_model_module_version": "1.2.0",
301 |             "_model_name": "LayoutModel",
302 |             "_view_count": null,
303 |             "_view_module": "@jupyter-widgets/base",
304 |             "_view_module_version": "1.2.0",
305 |             "_view_name": "LayoutView",
306 |             "align_content": null,
307 |             "align_items": null,
308 |             "align_self": null,
309 |             "border": null,
310 |             "bottom": null,
311 |             "display": null,
312 |             "flex": null,
313 |             "flex_flow": null,
314 |             "grid_area": null,
315 |             "grid_auto_columns": null,
316 |             "grid_auto_flow": null,
317 |             "grid_auto_rows": null,
318 |             "grid_column": null,
319 |             "grid_gap": null,
320 |             "grid_row": null,
321 |             "grid_template_areas": null,
322 |             "grid_template_columns": null,
323 |             "grid_template_rows": null,
324 |             "height": null,
325 |             "justify_content": null,
326 |             "justify_items": null,
327 |             "left": null,
328 |             "margin": null,
329 |             "max_height": null,
330 |             "max_width": null,
331 |             "min_height": null,
332 |             "min_width": null,
333 |             "object_fit": null,
334 |             "object_position": null,
335 |             "order": null,
336 |             "overflow": null,
337 |             "overflow_x": null,
338 |             "overflow_y": null,
339 |             "padding": null,
340 |             "right": null,
341 |             "top": null,
342 |             "visibility": null,
343 |             "width": null
344 |           }
345 |         },
346 |         "c0bef8c62caf4e149a250ce7a5fe245b": {
347 |           "model_module": "@jupyter-widgets/controls",
348 |           "model_name": "DescriptionStyleModel",
349 |           "model_module_version": "1.5.0",
350 |           "state": {
351 |             "_model_module": "@jupyter-widgets/controls",
352 |             "_model_module_version": "1.5.0",
353 |             "_model_name": "DescriptionStyleModel",
354 |             "_view_count": null,
355 |             "_view_module": "@jupyter-widgets/base",
356 |             "_view_module_version": "1.2.0",
357 |             "_view_name": "StyleView",
358 |             "description_width": ""
359 |           }
360 |         },
361 |         "7f891763dcbd47b187945359fa20e037": {
362 |           "model_module": "@jupyter-widgets/controls",
363 |           "model_name": "HBoxModel",
364 |           "model_module_version": "1.5.0",
365 |           "state": {
366 |             "_dom_classes": [],
367 |             "_model_module": "@jupyter-widgets/controls",
368 |             "_model_module_version": "1.5.0",
369 |             "_model_name": "HBoxModel",
370 |             "_view_count": null,
371 |             "_view_module": "@jupyter-widgets/controls",
372 |             "_view_module_version": "1.5.0",
373 |             "_view_name": "HBoxView",
374 |             "box_style": "",
375 |             "children": [
376 |               "IPY_MODEL_fbe40f8a39ed4b1ba41b04f3761a2e7e",
377 |               "IPY_MODEL_f05e0141670c4460976e16fa7ce72dfd",
378 |               "IPY_MODEL_ddc73b7194c144ee9c7c8d6abe953ebd"
379 |             ],
380 |             "layout": "IPY_MODEL_fc6ccbdf580145c3b17aef8c1c06405c"
381 |           }
382 |         },
383 |         "fbe40f8a39ed4b1ba41b04f3761a2e7e": {
384 |           "model_module": "@jupyter-widgets/controls",
385 |           "model_name": "HTMLModel",
386 |           "model_module_version": "1.5.0",
387 |           "state": {
388 |             "_dom_classes": [],
389 |             "_model_module": "@jupyter-widgets/controls",
390 |             "_model_module_version": "1.5.0",
391 |             "_model_name": "HTMLModel",
392 |             "_view_count": null,
393 |             "_view_module": "@jupyter-widgets/controls",
394 |             "_view_module_version": "1.5.0",
395 |             "_view_name": "HTMLView",
396 |             "description": "",
397 |             "description_tooltip": null,
398 |             "layout": "IPY_MODEL_7290d13ad54749faa28ebd4442c279f3",
399 |             "placeholder": "​",
400 |             "style": "IPY_MODEL_b6d4f50b005f43a38910569aa3cc11c1",
401 |             "value": "Downloading: 100%"
402 |           }
403 |         },
404 |         "f05e0141670c4460976e16fa7ce72dfd": {
405 |           "model_module": "@jupyter-widgets/controls",
406 |           "model_name": "FloatProgressModel",
407 |           "model_module_version": "1.5.0",
408 |           "state": {
409 |             "_dom_classes": [],
410 |             "_model_module": "@jupyter-widgets/controls",
411 |             "_model_module_version": "1.5.0",
412 |             "_model_name": "FloatProgressModel",
413 |             "_view_count": null,
414 |             "_view_module": "@jupyter-widgets/controls",
415 |             "_view_module_version": "1.5.0",
416 |             "_view_name": "ProgressView",
417 |             "bar_style": "success",
418 |             "description": "",
419 |             "description_tooltip": null,
420 |             "layout": "IPY_MODEL_aec08700fe794e5db392755ea3447a99",
421 |             "max": 967102601,
422 |             "min": 0,
423 |             "orientation": "horizontal",
424 |             "style": "IPY_MODEL_5c24c6d84dd64f0da47b0ac8217747fb",
425 |             "value": 967102601
426 |           }
427 |         },
428 |         "ddc73b7194c144ee9c7c8d6abe953ebd": {
429 |           "model_module": "@jupyter-widgets/controls",
430 |           "model_name": "HTMLModel",
431 |           "model_module_version": "1.5.0",
432 |           "state": {
433 |             "_dom_classes": [],
434 |             "_model_module": "@jupyter-widgets/controls",
435 |             "_model_module_version": "1.5.0",
436 |             "_model_name": "HTMLModel",
437 |             "_view_count": null,
438 |             "_view_module": "@jupyter-widgets/controls",
439 |             "_view_module_version": "1.5.0",
440 |             "_view_name": "HTMLView",
441 |             "description": "",
442 |             "description_tooltip": null,
443 |             "layout": "IPY_MODEL_0ef0dc2df03d4aeb898e87d550f787cb",
444 |             "placeholder": "​",
445 |             "style": "IPY_MODEL_6c1d179f465c4a8ab8c9f87169c55e6e",
446 |             "value": " 967M/967M [00:28&lt;00:00, 34.4MB/s]"
447 |           }
448 |         },
449 |         "fc6ccbdf580145c3b17aef8c1c06405c": {
450 |           "model_module": "@jupyter-widgets/base",
451 |           "model_name": "LayoutModel",
452 |           "model_module_version": "1.2.0",
453 |           "state": {
454 |             "_model_module": "@jupyter-widgets/base",
455 |             "_model_module_version": "1.2.0",
456 |             "_model_name": "LayoutModel",
457 |             "_view_count": null,
458 |             "_view_module": "@jupyter-widgets/base",
459 |             "_view_module_version": "1.2.0",
460 |             "_view_name": "LayoutView",
461 |             "align_content": null,
462 |             "align_items": null,
463 |             "align_self": null,
464 |             "border": null,
465 |             "bottom": null,
466 |             "display": null,
467 |             "flex": null,
468 |             "flex_flow": null,
469 |             "grid_area": null,
470 |             "grid_auto_columns": null,
471 |             "grid_auto_flow": null,
472 |             "grid_auto_rows": null,
473 |             "grid_column": null,
474 |             "grid_gap": null,
475 |             "grid_row": null,
476 |             "grid_template_areas": null,
477 |             "grid_template_columns": null,
478 |             "grid_template_rows": null,
479 |             "height": null,
480 |             "justify_content": null,
481 |             "justify_items": null,
482 |             "left": null,
483 |             "margin": null,
484 |             "max_height": null,
485 |             "max_width": null,
486 |             "min_height": null,
487 |             "min_width": null,
488 |             "object_fit": null,
489 |             "object_position": null,
490 |             "order": null,
491 |             "overflow": null,
492 |             "overflow_x": null,
493 |             "overflow_y": null,
494 |             "padding": null,
495 |             "right": null,
496 |             "top": null,
497 |             "visibility": null,
498 |             "width": null
499 |           }
500 |         },
501 |         "7290d13ad54749faa28ebd4442c279f3": {
502 |           "model_module": "@jupyter-widgets/base",
503 |           "model_name": "LayoutModel",
504 |           "model_module_version": "1.2.0",
505 |           "state": {
506 |             "_model_module": "@jupyter-widgets/base",
507 |             "_model_module_version": "1.2.0",
508 |             "_model_name": "LayoutModel",
509 |             "_view_count": null,
510 |             "_view_module": "@jupyter-widgets/base",
511 |             "_view_module_version": "1.2.0",
512 |             "_view_name": "LayoutView",
513 |             "align_content": null,
514 |             "align_items": null,
515 |             "align_self": null,
516 |             "border": null,
517 |             "bottom": null,
518 |             "display": null,
519 |             "flex": null,
520 |             "flex_flow": null,
521 |             "grid_area": null,
522 |             "grid_auto_columns": null,
523 |             "grid_auto_flow": null,
524 |             "grid_auto_rows": null,
525 |             "grid_column": null,
526 |             "grid_gap": null,
527 |             "grid_row": null,
528 |             "grid_template_areas": null,
529 |             "grid_template_columns": null,
530 |             "grid_template_rows": null,
531 |             "height": null,
532 |             "justify_content": null,
533 |             "justify_items": null,
534 |             "left": null,
535 |             "margin": null,
536 |             "max_height": null,
537 |             "max_width": null,
538 |             "min_height": null,
539 |             "min_width": null,
540 |             "object_fit": null,
541 |             "object_position": null,
542 |             "order": null,
543 |             "overflow": null,
544 |             "overflow_x": null,
545 |             "overflow_y": null,
546 |             "padding": null,
547 |             "right": null,
548 |             "top": null,
549 |             "visibility": null,
550 |             "width": null
551 |           }
552 |         },
553 |         "b6d4f50b005f43a38910569aa3cc11c1": {
554 |           "model_module": "@jupyter-widgets/controls",
555 |           "model_name": "DescriptionStyleModel",
556 |           "model_module_version": "1.5.0",
557 |           "state": {
558 |             "_model_module": "@jupyter-widgets/controls",
559 |             "_model_module_version": "1.5.0",
560 |             "_model_name": "DescriptionStyleModel",
561 |             "_view_count": null,
562 |             "_view_module": "@jupyter-widgets/base",
563 |             "_view_module_version": "1.2.0",
564 |             "_view_name": "StyleView",
565 |             "description_width": ""
566 |           }
567 |         },
568 |         "aec08700fe794e5db392755ea3447a99": {
569 |           "model_module": "@jupyter-widgets/base",
570 |           "model_name": "LayoutModel",
571 |           "model_module_version": "1.2.0",
572 |           "state": {
573 |             "_model_module": "@jupyter-widgets/base",
574 |             "_model_module_version": "1.2.0",
575 |             "_model_name": "LayoutModel",
576 |             "_view_count": null,
577 |             "_view_module": "@jupyter-widgets/base",
578 |             "_view_module_version": "1.2.0",
579 |             "_view_name": "LayoutView",
580 |             "align_content": null,
581 |             "align_items": null,
582 |             "align_self": null,
583 |             "border": null,
584 |             "bottom": null,
585 |             "display": null,
586 |             "flex": null,
587 |             "flex_flow": null,
588 |             "grid_area": null,
589 |             "grid_auto_columns": null,
590 |             "grid_auto_flow": null,
591 |             "grid_auto_rows": null,
592 |             "grid_column": null,
593 |             "grid_gap": null,
594 |             "grid_row": null,
595 |             "grid_template_areas": null,
596 |             "grid_template_columns": null,
597 |             "grid_template_rows": null,
598 |             "height": null,
599 |             "justify_content": null,
600 |             "justify_items": null,
601 |             "left": null,
602 |             "margin": null,
603 |             "max_height": null,
604 |             "max_width": null,
605 |             "min_height": null,
606 |             "min_width": null,
607 |             "object_fit": null,
608 |             "object_position": null,
609 |             "order": null,
610 |             "overflow": null,
611 |             "overflow_x": null,
612 |             "overflow_y": null,
613 |             "padding": null,
614 |             "right": null,
615 |             "top": null,
616 |             "visibility": null,
617 |             "width": null
618 |           }
619 |         },
620 |         "5c24c6d84dd64f0da47b0ac8217747fb": {
621 |           "model_module": "@jupyter-widgets/controls",
622 |           "model_name": "ProgressStyleModel",
623 |           "model_module_version": "1.5.0",
624 |           "state": {
625 |             "_model_module": "@jupyter-widgets/controls",
626 |             "_model_module_version": "1.5.0",
627 |             "_model_name": "ProgressStyleModel",
628 |             "_view_count": null,
629 |             "_view_module": "@jupyter-widgets/base",
630 |             "_view_module_version": "1.2.0",
631 |             "_view_name": "StyleView",
632 |             "bar_color": null,
633 |             "description_width": ""
634 |           }
635 |         },
636 |         "0ef0dc2df03d4aeb898e87d550f787cb": {
637 |           "model_module": "@jupyter-widgets/base",
638 |           "model_name": "LayoutModel",
639 |           "model_module_version": "1.2.0",
640 |           "state": {
641 |             "_model_module": "@jupyter-widgets/base",
642 |             "_model_module_version": "1.2.0",
643 |             "_model_name": "LayoutModel",
644 |             "_view_count": null,
645 |             "_view_module": "@jupyter-widgets/base",
646 |             "_view_module_version": "1.2.0",
647 |             "_view_name": "LayoutView",
648 |             "align_content": null,
649 |             "align_items": null,
650 |             "align_self": null,
651 |             "border": null,
652 |             "bottom": null,
653 |             "display": null,
654 |             "flex": null,
655 |             "flex_flow": null,
656 |             "grid_area": null,
657 |             "grid_auto_columns": null,
658 |             "grid_auto_flow": null,
659 |             "grid_auto_rows": null,
660 |             "grid_column": null,
661 |             "grid_gap": null,
662 |             "grid_row": null,
663 |             "grid_template_areas": null,
664 |             "grid_template_columns": null,
665 |             "grid_template_rows": null,
666 |             "height": null,
667 |             "justify_content": null,
668 |             "justify_items": null,
669 |             "left": null,
670 |             "margin": null,
671 |             "max_height": null,
672 |             "max_width": null,
673 |             "min_height": null,
674 |             "min_width": null,
675 |             "object_fit": null,
676 |             "object_position": null,
677 |             "order": null,
678 |             "overflow": null,
679 |             "overflow_x": null,
680 |             "overflow_y": null,
681 |             "padding": null,
682 |             "right": null,
683 |             "top": null,
684 |             "visibility": null,
685 |             "width": null
686 |           }
687 |         },
688 |         "6c1d179f465c4a8ab8c9f87169c55e6e": {
689 |           "model_module": "@jupyter-widgets/controls",
690 |           "model_name": "DescriptionStyleModel",
691 |           "model_module_version": "1.5.0",
692 |           "state": {
693 |             "_model_module": "@jupyter-widgets/controls",
694 |             "_model_module_version": "1.5.0",
695 |             "_model_name": "DescriptionStyleModel",
696 |             "_view_count": null,
697 |             "_view_module": "@jupyter-widgets/base",
698 |             "_view_module_version": "1.2.0",
699 |             "_view_name": "StyleView",
700 |             "description_width": ""
701 |           }
702 |         }
703 |       }
704 |     }
705 |   },
706 |   "cells": [
707 |     {
708 |       "cell_type": "markdown",
709 |       "metadata": {
710 |         "id": "view-in-github",
711 |         "colab_type": "text"
712 |       },
713 |       "source": [
714 |         "<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/transformers_whisper_ckpt_to_OAI.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
715 |       ]
716 |     },
717 |     {
718 |       "cell_type": "code",
719 |       "source": [
720 |         "!pip install git+https://github.com/huggingface/transformers"
721 |       ],
722 |       "metadata": {
723 |         "colab": {
724 |           "base_uri": "https://localhost:8080/"
725 |         },
726 |         "id": "uPXNgQVu_gQi",
727 |         "outputId": "93d1eb58-8609-43d2-af59-06a6e33ebfd1"
728 |       },
729 |       "execution_count": null,
730 |       "outputs": [
731 |         {
732 |           "output_type": "stream",
733 |           "name": "stdout",
734 |           "text": [
735 |             "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
736 |             "Collecting git+https://github.com/huggingface/transformers\n",
737 |             "  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-nksmoml9\n",
738 |             "  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-nksmoml9\n",
739 |             "  Resolved https://github.com/huggingface/transformers to commit d0f324f1e13b2813d4571f446795b15f01cda056\n",
740 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
741 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
742 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
743 |             "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (0.13.2)\n",
744 |             "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (2.25.1)\n",
745 |             "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (1.21.6)\n",
746 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (3.8.2)\n",
747 |             "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (2022.6.2)\n",
748 |             "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (6.0)\n",
749 |             "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (4.64.1)\n",
750 |             "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (0.11.1)\n",
751 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers==4.26.0.dev0) (21.3)\n",
752 |             "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0,>=0.10.0->transformers==4.26.0.dev0) (4.4.0)\n",
753 |             "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.8/dist-packages (from packaging>=20.0->transformers==4.26.0.dev0) (3.0.9)\n",
754 |             "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.26.0.dev0) (4.0.0)\n",
755 |             "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.26.0.dev0) (2.10)\n",
756 |             "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.26.0.dev0) (1.24.3)\n",
757 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.26.0.dev0) (2022.12.7)\n"
758 |           ]
759 |         }
760 |       ]
761 |     },
762 |     {
763 |       "cell_type": "code",
764 |       "execution_count": null,
765 |       "metadata": {
766 |         "id": "iV8RL4Oy_F3Q"
767 |       },
768 |       "outputs": [],
769 |       "source": [
770 |         "from copy import deepcopy\n",
771 |         "import torch\n",
772 |         "from transformers import WhisperForConditionalGeneration"
773 |       ]
774 |     },
775 |     {
776 |       "cell_type": "code",
777 |       "source": [
778 |         "WHISPER_MAPPING = {\n",
779 |         "    \"layers\": \"blocks\",\n",
780 |         "    \"fc1\": \"mlp.0\",\n",
781 |         "    \"fc2\": \"mlp.2\",\n",
782 |         "    \"final_layer_norm\": \"mlp_ln\",\n",
783 |         "    \"layers\": \"blocks\",\n",
784 |         "    \".self_attn.q_proj\": \".attn.query\",\n",
785 |         "    \".self_attn.k_proj\": \".attn.key\",\n",
786 |         "    \".self_attn.v_proj\": \".attn.value\",\n",
787 |         "    \".self_attn_layer_norm\": \".attn_ln\",\n",
788 |         "    \".self_attn.out_proj\": \".attn.out\",\n",
789 |         "    \".encoder_attn.q_proj\": \".cross_attn.query\",\n",
790 |         "    \".encoder_attn.k_proj\": \".cross_attn.key\",\n",
791 |         "    \".encoder_attn.v_proj\": \".cross_attn.value\",\n",
792 |         "    \".encoder_attn_layer_norm\": \".cross_attn_ln\",\n",
793 |         "    \".encoder_attn.out_proj\": \".cross_attn.out\",\n",
794 |         "    \"decoder.layer_norm.\": \"decoder.ln.\",\n",
795 |         "    \"encoder.layer_norm.\": \"encoder.ln_post.\",\n",
796 |         "    \"embed_tokens\": \"token_embedding\",\n",
797 |         "    \"encoder.embed_positions.weight\": \"encoder.positional_embedding\",\n",
798 |         "    \"decoder.embed_positions.weight\": \"decoder.positional_embedding\",\n",
799 |         "    \"layer_norm\": \"ln_post\",\n",
800 |         "}\n",
801 |         "\n",
802 |         "\n",
803 |         "def rename_keys(s_dict):\n",
804 |         "    keys = list(s_dict.keys())\n",
805 |         "    for key in keys:\n",
806 |         "        new_key = key\n",
807 |         "        for k, v in WHISPER_MAPPING.items():\n",
808 |         "            if k in key:\n",
809 |         "                new_key = new_key.replace(k, v)\n",
810 |         "\n",
811 |         "        s_dict[new_key] = s_dict.pop(key)\n",
812 |         "    return s_dict"
813 |       ],
814 |       "metadata": {
815 |         "id": "ZvlNRPssAAoa"
816 |       },
817 |       "execution_count": null,
818 |       "outputs": []
819 |     },
820 |     {
821 |       "cell_type": "code",
822 |       "source": [
823 |         "def convert_hf_ckpt_to_whisper_ckpt(hf_model_name_or_path, whisper_ckpt_save_path):\n",
824 |         "    transformer_model = WhisperForConditionalGeneration.from_pretrained(hf_model_name_or_path)\n",
825 |         "    config = transformer_model.config\n",
826 |         "\n",
827 |         "    dims = {\n",
828 |         "        'n_mels': config.num_mel_bins,\n",
829 |         "        'n_vocab': config.vocab_size,\n",
830 |         "        'n_audio_ctx': config.max_source_positions,\n",
831 |         "        'n_audio_state': config.d_model,\n",
832 |         "        'n_audio_head': config.encoder_attention_heads,\n",
833 |         "        'n_audio_layer': config.encoder_layers,\n",
834 |         "        'n_text_ctx': config.max_target_positions,\n",
835 |         "        'n_text_state': config.d_model,\n",
836 |         "        'n_text_head': config.decoder_attention_heads,\n",
837 |         "        'n_text_layer': config.decoder_layers\n",
838 |         "    }\n",
839 |         "\n",
840 |         "    state_dict = deepcopy(transformer_model.model.state_dict())\n",
841 |         "    state_dict = rename_keys(state_dict)\n",
842 |         "\n",
843 |         "    torch.save({\"dims\": dims, \"model_state_dict\": state_dict}, whisper_ckpt_save_path)"
844 |       ],
845 |       "metadata": {
846 |         "id": "CEeChPKkAn6f"
847 |       },
848 |       "execution_count": null,
849 |       "outputs": []
850 |     },
851 |     {
852 |       "cell_type": "code",
853 |       "source": [
854 |         "convert_hf_ckpt_to_whisper_ckpt(\"flozi00/whisper-small-german\", \"flozi00_whisper-small-german_OAI\")"
855 |       ],
856 |       "metadata": {
857 |         "colab": {
858 |           "base_uri": "https://localhost:8080/",
859 |           "height": 81,
860 |           "referenced_widgets": [
861 |             "745588c98f5743ddb72d83824270f5c5",
862 |             "ef501fa7fc204e2bb70fbd9b61484f1f",
863 |             "c55c2b2459e044f6890e0851ea862d21",
864 |             "0c9d723b0fad406fac28e8955f492cb7",
865 |             "6989f75bbb3746fba2f10bf1b9581ead",
866 |             "6de3345bd4a74f74a0c53b5ebe1f3187",
867 |             "672d9de7616c46f083c6bf9c4f9caff3",
868 |             "e49c2e613b93405a9366f759e36ccb43",
869 |             "6043edb9df9e47d0a28ac8fbafca28b1",
870 |             "fa5debe4f46b4dd796528715a962b738",
871 |             "c0bef8c62caf4e149a250ce7a5fe245b",
872 |             "7f891763dcbd47b187945359fa20e037",
873 |             "fbe40f8a39ed4b1ba41b04f3761a2e7e",
874 |             "f05e0141670c4460976e16fa7ce72dfd",
875 |             "ddc73b7194c144ee9c7c8d6abe953ebd",
876 |             "fc6ccbdf580145c3b17aef8c1c06405c",
877 |             "7290d13ad54749faa28ebd4442c279f3",
878 |             "b6d4f50b005f43a38910569aa3cc11c1",
879 |             "aec08700fe794e5db392755ea3447a99",
880 |             "5c24c6d84dd64f0da47b0ac8217747fb",
881 |             "0ef0dc2df03d4aeb898e87d550f787cb",
882 |             "6c1d179f465c4a8ab8c9f87169c55e6e"
883 |           ]
884 |         },
885 |         "id": "86N7BfR-BQYA",
886 |         "outputId": "b149cb02-579f-44c9-8d26-651db72ce79a"
887 |       },
888 |       "execution_count": null,
889 |       "outputs": [
890 |         {
891 |           "output_type": "display_data",
892 |           "data": {
893 |             "text/plain": [
894 |               "Downloading:   0%|          | 0.00/1.99k [00:00<?, ?B/s]"
895 |             ],
896 |             "application/vnd.jupyter.widget-view+json": {
897 |               "version_major": 2,
898 |               "version_minor": 0,
899 |               "model_id": "745588c98f5743ddb72d83824270f5c5"
900 |             }
901 |           },
902 |           "metadata": {}
903 |         },
904 |         {
905 |           "output_type": "display_data",
906 |           "data": {
907 |             "text/plain": [
908 |               "Downloading:   0%|          | 0.00/967M [00:00<?, ?B/s]"
909 |             ],
910 |             "application/vnd.jupyter.widget-view+json": {
911 |               "version_major": 2,
912 |               "version_minor": 0,
913 |               "model_id": "7f891763dcbd47b187945359fa20e037"
914 |             }
915 |           },
916 |           "metadata": {}
917 |         }
918 |       ]
919 |     }
920 |   ]
921 | }


--------------------------------------------------------------------------------