├── README.md ├── chatbot_Raven.ipynb ├── YT_Open_Assistant_Pythia12B_8bit_in_Colab.ipynb ├── YT_LaMini_Flan_T5_783M_8Bit_.ipynb ├── YT_LaMini_LM_Neo_1_3B_8Bit.ipynb ├── RWKV.ipynb └── YT_MPT_Instruct_7B.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # llm-tutorials 2 | A set of LLM Tutorials from my youtube channel https://www.youtube.com/@samwitteveenai 3 | 4 | 5 | -------------------------------------------------------------------------------- /chatbot_Raven.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "id": "LllC-MPI7phV" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#@title Install Dependencies { display-mode: \"form\" }\n", 12 | "\n", 13 | "!pip install inquirer transformers sty rwkvstic\n", 14 | "import os" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "colab": { 22 | "base_uri": "https://localhost:8080/" 23 | }, 24 | "id": "v6gzfDru7phX", 25 | "outputId": "37b9a578-769d-40fb-f5f1-bbc2016008b3" 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "init RWKVOPS, from super\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "#@title Download and Initialize model { display-mode: \"form\" }\n", 38 | "from rwkvstic.load import RWKV\n", 39 | "\n", 40 | "model = RWKV(\n", 41 | " \"https://huggingface.co/Hazzzardous/rwkv-fastquant/resolve/main/ravenV7-14B-2-1-2.rwkv\"\n", 42 | ")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "colab": { 50 | "base_uri": "https://localhost:8080/", 51 | "height": 185 52 | }, 53 | "id": "1n-qXBUY7phX", 54 | "outputId": "61cdf901-99a0-4302-8549-a22dd78a43c1" 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stderr", 59 | "output_type": "stream", 60 | "text": [ 61 | "100%|██████████| 18/18 [00:05<00:00, 3.40it/s]\n" 62 | ] 63 | }, 64 | { 65 | "data": { 66 | "text/html": [ 67 | "

Although, Alexander The Great is widely lauded as one of the greatest conquerors and statesmen, his behavior as an emperor and his deviation from policies of peace and stability caused him to have a very dubious reputation. Although he had many war campaigns and won many victories during his reign, they had consequences later on his life. For example, he crossed the Shatt al Arab, a major trading route between the Aegean Sea and the Persian Gulf and the Persian homeland, and he found the borders of the empire shrinking, which made it difficult for him to exert his rule over his empire.

This had dire consequences in his time and history, as he was left with a shrinking empire and the difficult task of controlling his empire as he grew older and saw his empire begin to crumble. A result of his expansions and his pursuit of glory led to the first Persian Empire, and many subsequent world empires, such as the Roman Empire and the Byzantine, were a direct result of

" 68 | ], 69 | "text/plain": [ 70 | "" 71 | ] 72 | }, 73 | "execution_count": 23, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "#@title Ask Question { display-mode: \"form\" }\n", 80 | "model.resetState()\n", 81 | "Prompt = \"Who is Alexander the Great?\" #@param {type:\"string\"}\n", 82 | "Tokens = 50 #@param {type:\"number\"}\n", 83 | "model.loadContext(f\"Prompt: {Prompt}?\\n\\nExpert Long Answer: \")\n", 84 | "out = model.forward(number=Tokens)[\"output\"]\n", 85 | "import IPython\n", 86 | "nl = \"\\n\"\n", 87 | "IPython.display.HTML(f\"

{out.replace(nl,'
')}

\")" 88 | ] 89 | } 90 | ], 91 | "metadata": { 92 | "accelerator": "GPU", 93 | "colab": { 94 | "provenance": [] 95 | }, 96 | "gpuClass": "standard", 97 | "kernelspec": { 98 | "display_name": "Python 3", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "name": "python", 103 | "version": "3.10.6" 104 | }, 105 | "orig_nbformat": 4 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 0 109 | } -------------------------------------------------------------------------------- /YT_Open_Assistant_Pythia12B_8bit_in_Colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "machine_shape": "hm" 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | }, 16 | "accelerator": "GPU", 17 | "gpuClass": "premium", 18 | "widgets": { 19 | "application/vnd.jupyter.widget-state+json": { 20 | "c9a379a62ed249099295f3394e459927": { 21 | "model_module": "@jupyter-widgets/controls", 22 | "model_name": "HBoxModel", 23 | "model_module_version": "1.5.0", 24 | "state": { 25 | "_dom_classes": [], 26 | "_model_module": "@jupyter-widgets/controls", 27 | "_model_module_version": "1.5.0", 28 | "_model_name": "HBoxModel", 29 | "_view_count": null, 30 | "_view_module": "@jupyter-widgets/controls", 31 | "_view_module_version": "1.5.0", 32 | "_view_name": "HBoxView", 33 | "box_style": "", 34 | "children": [ 35 | "IPY_MODEL_a09b185906804a0e91b4300568e09478", 36 | "IPY_MODEL_314dd1ef5a334c01a256ed064ae1f63f", 37 | "IPY_MODEL_6306537662a54f7891fe210a79996d00" 38 | ], 39 | "layout": "IPY_MODEL_fb9c43378526463d915d6137bc98f5a6" 40 | } 41 | }, 42 | "a09b185906804a0e91b4300568e09478": { 43 | "model_module": "@jupyter-widgets/controls", 44 | "model_name": "HTMLModel", 45 | "model_module_version": "1.5.0", 46 | "state": { 47 | "_dom_classes": [], 48 | "_model_module": "@jupyter-widgets/controls", 49 | "_model_module_version": "1.5.0", 50 | "_model_name": "HTMLModel", 51 | "_view_count": null, 52 | "_view_module": "@jupyter-widgets/controls", 53 | "_view_module_version": "1.5.0", 54 | "_view_name": "HTMLView", 55 | "description": "", 56 | "description_tooltip": null, 57 | "layout": "IPY_MODEL_b722fe13a08e4fa1809f16cf3b1ec8bf", 58 | "placeholder": "​", 59 | "style": "IPY_MODEL_e02ced5d03a3467498f1169c7a5f8881", 60 | "value": "Loading checkpoint shards: 100%" 61 | } 62 | }, 63 | "314dd1ef5a334c01a256ed064ae1f63f": { 64 | "model_module": "@jupyter-widgets/controls", 65 | "model_name": "FloatProgressModel", 66 | "model_module_version": "1.5.0", 67 | "state": { 68 | "_dom_classes": [], 69 | "_model_module": "@jupyter-widgets/controls", 70 | "_model_module_version": "1.5.0", 71 | "_model_name": "FloatProgressModel", 72 | "_view_count": null, 73 | "_view_module": "@jupyter-widgets/controls", 74 | "_view_module_version": "1.5.0", 75 | "_view_name": "ProgressView", 76 | "bar_style": "success", 77 | "description": "", 78 | "description_tooltip": null, 79 | "layout": "IPY_MODEL_e649877d67fd4cb7b87446e1853d6656", 80 | "max": 3, 81 | "min": 0, 82 | "orientation": "horizontal", 83 | "style": "IPY_MODEL_13ab9fce093c40f8b4993349180b8f45", 84 | "value": 3 85 | } 86 | }, 87 | "6306537662a54f7891fe210a79996d00": { 88 | "model_module": "@jupyter-widgets/controls", 89 | "model_name": "HTMLModel", 90 | "model_module_version": "1.5.0", 91 | "state": { 92 | "_dom_classes": [], 93 | "_model_module": "@jupyter-widgets/controls", 94 | "_model_module_version": "1.5.0", 95 | "_model_name": "HTMLModel", 96 | "_view_count": null, 97 | "_view_module": "@jupyter-widgets/controls", 98 | "_view_module_version": "1.5.0", 99 | "_view_name": "HTMLView", 100 | "description": "", 101 | "description_tooltip": null, 102 | "layout": "IPY_MODEL_82c7669f039449a8a6a1c72d815d3540", 103 | "placeholder": "​", 104 | "style": "IPY_MODEL_ce1bca86513741cb87071c972fec4b32", 105 | "value": " 3/3 [00:20<00:00, 6.20s/it]" 106 | } 107 | }, 108 | "fb9c43378526463d915d6137bc98f5a6": { 109 | "model_module": "@jupyter-widgets/base", 110 | "model_name": "LayoutModel", 111 | "model_module_version": "1.2.0", 112 | "state": { 113 | "_model_module": "@jupyter-widgets/base", 114 | "_model_module_version": "1.2.0", 115 | "_model_name": "LayoutModel", 116 | "_view_count": null, 117 | "_view_module": "@jupyter-widgets/base", 118 | "_view_module_version": "1.2.0", 119 | "_view_name": "LayoutView", 120 | "align_content": null, 121 | "align_items": null, 122 | "align_self": null, 123 | "border": null, 124 | "bottom": null, 125 | "display": null, 126 | "flex": null, 127 | "flex_flow": null, 128 | "grid_area": null, 129 | "grid_auto_columns": null, 130 | "grid_auto_flow": null, 131 | "grid_auto_rows": null, 132 | "grid_column": null, 133 | "grid_gap": null, 134 | "grid_row": null, 135 | "grid_template_areas": null, 136 | "grid_template_columns": null, 137 | "grid_template_rows": null, 138 | "height": null, 139 | "justify_content": null, 140 | "justify_items": null, 141 | "left": null, 142 | "margin": null, 143 | "max_height": null, 144 | "max_width": null, 145 | "min_height": null, 146 | "min_width": null, 147 | "object_fit": null, 148 | "object_position": null, 149 | "order": null, 150 | "overflow": null, 151 | "overflow_x": null, 152 | "overflow_y": null, 153 | "padding": null, 154 | "right": null, 155 | "top": null, 156 | "visibility": null, 157 | "width": null 158 | } 159 | }, 160 | "b722fe13a08e4fa1809f16cf3b1ec8bf": { 161 | "model_module": "@jupyter-widgets/base", 162 | "model_name": "LayoutModel", 163 | "model_module_version": "1.2.0", 164 | "state": { 165 | "_model_module": "@jupyter-widgets/base", 166 | "_model_module_version": "1.2.0", 167 | "_model_name": "LayoutModel", 168 | "_view_count": null, 169 | "_view_module": "@jupyter-widgets/base", 170 | "_view_module_version": "1.2.0", 171 | "_view_name": "LayoutView", 172 | "align_content": null, 173 | "align_items": null, 174 | "align_self": null, 175 | "border": null, 176 | "bottom": null, 177 | "display": null, 178 | "flex": null, 179 | "flex_flow": null, 180 | "grid_area": null, 181 | "grid_auto_columns": null, 182 | "grid_auto_flow": null, 183 | "grid_auto_rows": null, 184 | "grid_column": null, 185 | "grid_gap": null, 186 | "grid_row": null, 187 | "grid_template_areas": null, 188 | "grid_template_columns": null, 189 | "grid_template_rows": null, 190 | "height": null, 191 | "justify_content": null, 192 | "justify_items": null, 193 | "left": null, 194 | "margin": null, 195 | "max_height": null, 196 | "max_width": null, 197 | "min_height": null, 198 | "min_width": null, 199 | "object_fit": null, 200 | "object_position": null, 201 | "order": null, 202 | "overflow": null, 203 | "overflow_x": null, 204 | "overflow_y": null, 205 | "padding": null, 206 | "right": null, 207 | "top": null, 208 | "visibility": null, 209 | "width": null 210 | } 211 | }, 212 | "e02ced5d03a3467498f1169c7a5f8881": { 213 | "model_module": "@jupyter-widgets/controls", 214 | "model_name": "DescriptionStyleModel", 215 | "model_module_version": "1.5.0", 216 | "state": { 217 | "_model_module": "@jupyter-widgets/controls", 218 | "_model_module_version": "1.5.0", 219 | "_model_name": "DescriptionStyleModel", 220 | "_view_count": null, 221 | "_view_module": "@jupyter-widgets/base", 222 | "_view_module_version": "1.2.0", 223 | "_view_name": "StyleView", 224 | "description_width": "" 225 | } 226 | }, 227 | "e649877d67fd4cb7b87446e1853d6656": { 228 | "model_module": "@jupyter-widgets/base", 229 | "model_name": "LayoutModel", 230 | "model_module_version": "1.2.0", 231 | "state": { 232 | "_model_module": "@jupyter-widgets/base", 233 | "_model_module_version": "1.2.0", 234 | "_model_name": "LayoutModel", 235 | "_view_count": null, 236 | "_view_module": "@jupyter-widgets/base", 237 | "_view_module_version": "1.2.0", 238 | "_view_name": "LayoutView", 239 | "align_content": null, 240 | "align_items": null, 241 | "align_self": null, 242 | "border": null, 243 | "bottom": null, 244 | "display": null, 245 | "flex": null, 246 | "flex_flow": null, 247 | "grid_area": null, 248 | "grid_auto_columns": null, 249 | "grid_auto_flow": null, 250 | "grid_auto_rows": null, 251 | "grid_column": null, 252 | "grid_gap": null, 253 | "grid_row": null, 254 | "grid_template_areas": null, 255 | "grid_template_columns": null, 256 | "grid_template_rows": null, 257 | "height": null, 258 | "justify_content": null, 259 | "justify_items": null, 260 | "left": null, 261 | "margin": null, 262 | "max_height": null, 263 | "max_width": null, 264 | "min_height": null, 265 | "min_width": null, 266 | "object_fit": null, 267 | "object_position": null, 268 | "order": null, 269 | "overflow": null, 270 | "overflow_x": null, 271 | "overflow_y": null, 272 | "padding": null, 273 | "right": null, 274 | "top": null, 275 | "visibility": null, 276 | "width": null 277 | } 278 | }, 279 | "13ab9fce093c40f8b4993349180b8f45": { 280 | "model_module": "@jupyter-widgets/controls", 281 | "model_name": "ProgressStyleModel", 282 | "model_module_version": "1.5.0", 283 | "state": { 284 | "_model_module": "@jupyter-widgets/controls", 285 | "_model_module_version": "1.5.0", 286 | "_model_name": "ProgressStyleModel", 287 | "_view_count": null, 288 | "_view_module": "@jupyter-widgets/base", 289 | "_view_module_version": "1.2.0", 290 | "_view_name": "StyleView", 291 | "bar_color": null, 292 | "description_width": "" 293 | } 294 | }, 295 | "82c7669f039449a8a6a1c72d815d3540": { 296 | "model_module": "@jupyter-widgets/base", 297 | "model_name": "LayoutModel", 298 | "model_module_version": "1.2.0", 299 | "state": { 300 | "_model_module": "@jupyter-widgets/base", 301 | "_model_module_version": "1.2.0", 302 | "_model_name": "LayoutModel", 303 | "_view_count": null, 304 | "_view_module": "@jupyter-widgets/base", 305 | "_view_module_version": "1.2.0", 306 | "_view_name": "LayoutView", 307 | "align_content": null, 308 | "align_items": null, 309 | "align_self": null, 310 | "border": null, 311 | "bottom": null, 312 | "display": null, 313 | "flex": null, 314 | "flex_flow": null, 315 | "grid_area": null, 316 | "grid_auto_columns": null, 317 | "grid_auto_flow": null, 318 | "grid_auto_rows": null, 319 | "grid_column": null, 320 | "grid_gap": null, 321 | "grid_row": null, 322 | "grid_template_areas": null, 323 | "grid_template_columns": null, 324 | "grid_template_rows": null, 325 | "height": null, 326 | "justify_content": null, 327 | "justify_items": null, 328 | "left": null, 329 | "margin": null, 330 | "max_height": null, 331 | "max_width": null, 332 | "min_height": null, 333 | "min_width": null, 334 | "object_fit": null, 335 | "object_position": null, 336 | "order": null, 337 | "overflow": null, 338 | "overflow_x": null, 339 | "overflow_y": null, 340 | "padding": null, 341 | "right": null, 342 | "top": null, 343 | "visibility": null, 344 | "width": null 345 | } 346 | }, 347 | "ce1bca86513741cb87071c972fec4b32": { 348 | "model_module": "@jupyter-widgets/controls", 349 | "model_name": "DescriptionStyleModel", 350 | "model_module_version": "1.5.0", 351 | "state": { 352 | "_model_module": "@jupyter-widgets/controls", 353 | "_model_module_version": "1.5.0", 354 | "_model_name": "DescriptionStyleModel", 355 | "_view_count": null, 356 | "_view_module": "@jupyter-widgets/base", 357 | "_view_module_version": "1.2.0", 358 | "_view_name": "StyleView", 359 | "description_width": "" 360 | } 361 | } 362 | } 363 | } 364 | }, 365 | "cells": [ 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "colab": { 371 | "base_uri": "https://localhost:8080/" 372 | }, 373 | "id": "1VJ7lfTYz0qu", 374 | "outputId": "bec01aa2-40ef-4570-8b23-db01a71c6d27" 375 | }, 376 | "outputs": [ 377 | { 378 | "output_type": "stream", 379 | "name": "stdout", 380 | "text": [ 381 | " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", 382 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", 383 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 384 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m87.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 385 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.1/200.1 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 386 | "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 387 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.7/468.7 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 388 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 389 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 390 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.9/132.9 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 391 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m74.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 392 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.2/212.2 kB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 393 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 394 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 kB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 395 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 396 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.3/104.3 MB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 397 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.3/215.3 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 398 | "\u001b[?25h" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "!pip -q install git+https://github.com/huggingface/transformers # need to install from github\n", 404 | "!pip -q install datasets sentencepiece \n", 405 | "!pip -q install bitsandbytes accelerate" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "source": [ 411 | "# **Open Assistant** - Pythia 12B\n" 412 | ], 413 | "metadata": { 414 | "id": "Wwn1vLOW-VQz" 415 | } 416 | }, 417 | { 418 | "cell_type": "code", 419 | "source": [ 420 | "!nvidia-smi" 421 | ], 422 | "metadata": { 423 | "colab": { 424 | "base_uri": "https://localhost:8080/" 425 | }, 426 | "id": "hdVSk5iZ1DVB", 427 | "outputId": "310b66ad-67da-4625-9fe8-ea45c0172f32" 428 | }, 429 | "execution_count": null, 430 | "outputs": [ 431 | { 432 | "output_type": "stream", 433 | "name": "stdout", 434 | "text": [ 435 | "Sat Apr 15 23:27:33 2023 \n", 436 | "+-----------------------------------------------------------------------------+\n", 437 | "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n", 438 | "|-------------------------------+----------------------+----------------------+\n", 439 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 440 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 441 | "| | | MIG M. |\n", 442 | "|===============================+======================+======================|\n", 443 | "| 0 NVIDIA A100-SXM... Off | 00000000:00:04.0 Off | 0 |\n", 444 | "| N/A 30C P0 42W / 350W | 0MiB / 40960MiB | 0% Default |\n", 445 | "| | | Disabled |\n", 446 | "+-------------------------------+----------------------+----------------------+\n", 447 | " \n", 448 | "+-----------------------------------------------------------------------------+\n", 449 | "| Processes: |\n", 450 | "| GPU GI CI PID Type Process name GPU Memory |\n", 451 | "| ID ID Usage |\n", 452 | "|=============================================================================|\n", 453 | "| No running processes found |\n", 454 | "+-----------------------------------------------------------------------------+\n" 455 | ] 456 | } 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "source": [ 462 | "# from instruct_pipeline import InstructionTextGenerationPipeline\n", 463 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 464 | "\n", 465 | "tokenizer = AutoTokenizer.from_pretrained(\"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5\", padding_side=\"left\")\n", 466 | "\n", 467 | "\n", 468 | "model = AutoModelForCausalLM.from_pretrained(\"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5\", \n", 469 | " load_in_8bit=True,\n", 470 | " device_map=\"auto\")\n", 471 | "\n", 472 | "\n" 473 | ], 474 | "metadata": { 475 | "colab": { 476 | "base_uri": "https://localhost:8080/", 477 | "height": 114, 478 | "referenced_widgets": [ 479 | "c9a379a62ed249099295f3394e459927", 480 | "a09b185906804a0e91b4300568e09478", 481 | "314dd1ef5a334c01a256ed064ae1f63f", 482 | "6306537662a54f7891fe210a79996d00", 483 | "fb9c43378526463d915d6137bc98f5a6", 484 | "b722fe13a08e4fa1809f16cf3b1ec8bf", 485 | "e02ced5d03a3467498f1169c7a5f8881", 486 | "e649877d67fd4cb7b87446e1853d6656", 487 | "13ab9fce093c40f8b4993349180b8f45", 488 | "82c7669f039449a8a6a1c72d815d3540", 489 | "ce1bca86513741cb87071c972fec4b32" 490 | ] 491 | }, 492 | "id": "4RaPJz4lFi8M", 493 | "outputId": "50a4a373-a1cf-4472-ac38-330ea910d0d8" 494 | }, 495 | "execution_count": null, 496 | "outputs": [ 497 | { 498 | "output_type": "stream", 499 | "name": "stderr", 500 | "text": [ 501 | "Overriding torch_dtype=None with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.\n" 502 | ] 503 | }, 504 | { 505 | "output_type": "display_data", 506 | "data": { 507 | "text/plain": [ 508 | "Loading checkpoint shards: 0%| | 0/3 [00:00] 2.82G 81.0MB/s in 35s \n", 400 | "\n", 401 | "2022-09-21 06:58:12 (82.9 MB/s) - ‘./RWKV-LM/RWKV-v4/500.pth’ saved [3030279587/3030279587]\n", 402 | "\n" 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "!git clone https://github.com/BlinkDL/RWKV-LM.git\n", 408 | "#!wget https://huggingface.co/BlinkDL/rwkv-4-pile-3b/resolve/main/RWKV-4-Pile-3B-20220915-1207.pth -O ./RWKV-LM/RWKV-v4/500.pth 3B needs more vram then google offers\n", 409 | "!wget https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-20220903-8040.pth -O ./RWKV-LM/RWKV-v4/500.pth" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "source": [ 415 | "%cd ./RWKV-LM/RWKV-v4/" 416 | ], 417 | "metadata": { 418 | "colab": { 419 | "base_uri": "https://localhost:8080/" 420 | }, 421 | "id": "g5JdHvg2zjom", 422 | "outputId": "35230cee-1081-46e9-d9c6-1d682660d61b" 423 | }, 424 | "execution_count": null, 425 | "outputs": [ 426 | { 427 | "output_type": "stream", 428 | "name": "stdout", 429 | "text": [ 430 | "/content/RWKV-LM/RWKV-v4\n" 431 | ] 432 | } 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "source": [ 438 | "!pip install transformers\n", 439 | "!pip install ninja" 440 | ], 441 | "metadata": { 442 | "colab": { 443 | "base_uri": "https://localhost:8080/" 444 | }, 445 | "id": "F02bGykkyicB", 446 | "outputId": "fbbb2802-050a-4480-bf99-d08f4c7cd11e" 447 | }, 448 | "execution_count": null, 449 | "outputs": [ 450 | { 451 | "output_type": "stream", 452 | "name": "stdout", 453 | "text": [ 454 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 455 | "Collecting transformers\n", 456 | " Downloading transformers-4.22.1-py3-none-any.whl (4.9 MB)\n", 457 | "\u001b[K |████████████████████████████████| 4.9 MB 4.0 MB/s \n", 458 | "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", 459 | "Collecting huggingface-hub<1.0,>=0.9.0\n", 460 | " Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)\n", 461 | "\u001b[K |████████████████████████████████| 120 kB 66.4 MB/s \n", 462 | "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", 463 | "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.0)\n", 464 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", 465 | "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.12.0)\n", 466 | "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n", 467 | " Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n", 468 | "\u001b[K |████████████████████████████████| 6.6 MB 48.2 MB/s \n", 469 | "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.6)\n", 470 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.64.1)\n", 471 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2022.6.2)\n", 472 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.9.0->transformers) (4.1.1)\n", 473 | "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.9)\n", 474 | "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.8.1)\n", 475 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", 476 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2022.6.15)\n", 477 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", 478 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", 479 | "Installing collected packages: tokenizers, huggingface-hub, transformers\n", 480 | "Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.22.1\n", 481 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 482 | "Collecting ninja\n", 483 | " Downloading ninja-1.10.2.3-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (108 kB)\n", 484 | "\u001b[K |████████████████████████████████| 108 kB 4.0 MB/s \n", 485 | "\u001b[?25hInstalling collected packages: ninja\n", 486 | "Successfully installed ninja-1.10.2.3\n" 487 | ] 488 | } 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "source": [ 494 | "########################################################################################################\n", 495 | "# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM\n", 496 | "########################################################################################################\n", 497 | "import numpy as np\n", 498 | "import math, os\n", 499 | "import time\n", 500 | "import types\n", 501 | "import copy\n", 502 | "import torch\n", 503 | "from torch.nn import functional as F\n", 504 | "from src.utils import TOKENIZER, Dataset\n", 505 | "torch.backends.cudnn.benchmark = True\n", 506 | "torch.backends.cudnn.allow_tf32 = True\n", 507 | "torch.backends.cuda.matmul.allow_tf32 = True\n", 508 | "np.set_printoptions(precision=4, suppress=True, linewidth=200)\n", 509 | "\n", 510 | "########################################################################################################\n", 511 | "# Step 1: set model\n", 512 | "# \n", 513 | "# Set TOKEN_MODE to 'char' or 'bpe' if the model is trained by 'train.py' from scratch.\n", 514 | "#\n", 515 | "# Set TOKEN_MODE to 'pile' if you want to test pre-trained pile models.\n", 516 | "########################################################################################################\n", 517 | "\n", 518 | "TOKEN_MODE = 'pile' # char / bpe / pile\n", 519 | "\n", 520 | "n_layer = 6\n", 521 | "n_embd = 512\n", 522 | "ctx_len = 10024\n", 523 | "\n", 524 | "if TOKEN_MODE == 'char':\n", 525 | " MODEL_NAME = 'trained-500' # your trained model\n", 526 | " WORD_NAME = 'vocab' # the .json vocab (generated by train.py)\n", 527 | " # set UNKNOWN_CHAR to the rarest token in your vocab.json, and all unknown tokens in your prompt will be denoted by it\n", 528 | " UNKNOWN_CHAR = ' ' # here we just set it to ' ' for simplicity\n", 529 | "\n", 530 | "elif TOKEN_MODE == 'bpe':\n", 531 | " MODEL_NAME = 'trained-500' # your trained model\n", 532 | " WORD_NAME = ['model-vocab.json', 'model-merges.txt'] # [vocab, merge] for your BPE model\n", 533 | " UNKNOWN_CHAR = None\n", 534 | "\n", 535 | "elif TOKEN_MODE == 'pile':\n", 536 | " WORD_NAME = ['20B_tokenizer.json', '20B_tokenizer.json']\n", 537 | " UNKNOWN_CHAR = None\n", 538 | "\n", 539 | " #---> you can set MODEL_NAME to your fine-tuned model <---\n", 540 | "\n", 541 | " MODEL_NAME = '500'\n", 542 | " \n", 543 | " # for 3b\n", 544 | " #n_layer = 32\n", 545 | " #n_embd = 2560\n", 546 | " #ctx_len = 10024\n", 547 | "\n", 548 | " # for 1b5'\n", 549 | " n_layer = 24\n", 550 | " n_embd = 2048\n", 551 | " ctx_len = 1024\n", 552 | "\n", 553 | "os.environ['RWKV_FLOAT_MODE'] = 'bf16' # 'bf16' / 'fp16' / 'fp32' (note: only using fp32 at this moment)\n", 554 | "os.environ['RWKV_RUN_DEVICE'] = 'cuda' # 'cpu' (already very fast) or 'cuda'\n", 555 | "model_type = 'RWKV' # 'RWKV' or 'RWKV-ffnPre'\n", 556 | "\n", 557 | "########################################################################################################\n", 558 | "# Step 2: set prompt & sampling stuffs\n", 559 | "########################################################################################################\n", 560 | "\n", 561 | "# context = 'A'\n", 562 | "# context = \"\\nIn the\"\n", 563 | "# context = '\\nSugar:'\n", 564 | "\n", 565 | "NUM_TRIALS = 5\n", 566 | "LENGTH_PER_TRIAL = 3330\n", 567 | "\n", 568 | "DEBUG_DEBUG = False # True False --> show softmax output\n", 569 | "\n", 570 | "########################################################################################################\n", 571 | "\n", 572 | "print(f'Loading {MODEL_NAME}...')\n", 573 | "from src.model_run import RWKV_RNN\n", 574 | "model = RWKV_RNN(MODEL_NAME, os.environ['RWKV_RUN_DEVICE'], model_type, n_layer, n_embd, ctx_len)\n", 575 | "tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR)\n" 576 | ], 577 | "metadata": { 578 | "id": "a7P7ISTa1XgC", 579 | "colab": { 580 | "base_uri": "https://localhost:8080/", 581 | "height": 275, 582 | "referenced_widgets": [ 583 | "288c96b917a94903a6a550db86eb3e0e", 584 | "535b9ac6e879412285c2e661660d842a", 585 | "88557cbe4553406794b1ab38d0398e2e", 586 | "50c7b4aef7c448ca92064226881ffbd1", 587 | "57b31bbd6e934adfbc7e10d882edba0c", 588 | "0cb7936baadd4245826a63e9e76009ef", 589 | "ea20bd4e8df84d548f961628980c2bd1", 590 | "38ac1ccc2f7f42399311de83ba19376a", 591 | "008fc9f2c5ab40bd84176926234882b9", 592 | "559a880554b14200b4b1ab24ab40d41e", 593 | "7df74532ac57413cb55207099dbffaa7" 594 | ] 595 | }, 596 | "outputId": "af73daec-3a23-4bfb-b960-6feccb5436b1" 597 | }, 598 | "execution_count": null, 599 | "outputs": [ 600 | { 601 | "output_type": "stream", 602 | "name": "stdout", 603 | "text": [ 604 | "Loading 500...\n", 605 | "\n", 606 | "RWKV_HEAD_QK_DIM 0\n", 607 | "\n", 608 | "Using /root/.cache/torch_extensions/py37_cu113 as PyTorch extensions root...\n", 609 | "Creating extension directory /root/.cache/torch_extensions/py37_cu113/wkv...\n", 610 | "Detected CUDA files, patching ldflags\n", 611 | "Emitting ninja build file /root/.cache/torch_extensions/py37_cu113/wkv/build.ninja...\n", 612 | "Building extension module wkv...\n", 613 | "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", 614 | "Loading extension module wkv...\n" 615 | ] 616 | }, 617 | { 618 | "output_type": "stream", 619 | "name": "stderr", 620 | "text": [ 621 | "The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n" 622 | ] 623 | }, 624 | { 625 | "output_type": "stream", 626 | "name": "stdout", 627 | "text": [ 628 | "Moving 0 files to the new cache system\n" 629 | ] 630 | }, 631 | { 632 | "output_type": "display_data", 633 | "data": { 634 | "text/plain": [ 635 | "0it [00:00, ?it/s]" 636 | ], 637 | "application/vnd.jupyter.widget-view+json": { 638 | "version_major": 2, 639 | "version_minor": 0, 640 | "model_id": "288c96b917a94903a6a550db86eb3e0e" 641 | } 642 | }, 643 | "metadata": {} 644 | } 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "source": [ 650 | "!nvidia-smi" 651 | ], 652 | "metadata": { 653 | "colab": { 654 | "base_uri": "https://localhost:8080/" 655 | }, 656 | "id": "8Qw1y-qAwTzU", 657 | "outputId": "807eeb47-4d59-4616-d869-22c2cc297621" 658 | }, 659 | "execution_count": null, 660 | "outputs": [ 661 | { 662 | "output_type": "stream", 663 | "name": "stdout", 664 | "text": [ 665 | "Wed Sep 21 06:59:29 2022 \n", 666 | "+-----------------------------------------------------------------------------+\n", 667 | "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", 668 | "|-------------------------------+----------------------+----------------------+\n", 669 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 670 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 671 | "| | | MIG M. |\n", 672 | "|===============================+======================+======================|\n", 673 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", 674 | "| N/A 50C P0 28W / 70W | 9282MiB / 15109MiB | 0% Default |\n", 675 | "| | | N/A |\n", 676 | "+-------------------------------+----------------------+----------------------+\n", 677 | " \n", 678 | "+-----------------------------------------------------------------------------+\n", 679 | "| Processes: |\n", 680 | "| GPU GI CI PID Type Process name GPU Memory |\n", 681 | "| ID ID Usage |\n", 682 | "|=============================================================================|\n", 683 | "+-----------------------------------------------------------------------------+\n" 684 | ] 685 | } 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "source": [ 691 | "\n", 692 | "########################################################################################################\n", 693 | "context = \"You are an AI running a house.\\ngiven the following commands: volumeUp(int amount),volumeDown(int amount),setVolume(int percent),setLights([r,g,b]),playSong(string url)\\nand the given instruction 'Please make the room romantic'\\nList the commands, and the parameters they should have, that should be done to fullfil the command\\nGive the commands in the format [command(parameter)]\\n\\nTask: list the commands and a reasonable value for the parameter\\nResponse:\"\n", 694 | "\n", 695 | "TEMPERATURE = 0.9\n", 696 | "top_p = 0.8\n", 697 | "top_p_newline = 0.9 # only used in TOKEN_MODE = char\n", 698 | "\n", 699 | "if tokenizer.charMode:\n", 700 | " context = tokenizer.refine_context(context)\n", 701 | " ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context]\n", 702 | "else:\n", 703 | " ctx = tokenizer.tokenizer.encode(context)\n", 704 | "src_len = len(ctx)\n", 705 | "src_ctx = ctx.copy()\n", 706 | "\n", 707 | "print('\\nYour prompt has ' + str(src_len) + ' tokens.')\n", 708 | "print('\\n--> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. Use GPT to build the hidden state for better speed. <--\\n')\n", 709 | "\n", 710 | "for TRIAL in range(1 if DEBUG_DEBUG else NUM_TRIALS):\n", 711 | " t_begin = time.time_ns()\n", 712 | " print(('-' * 30) + context, end='')\n", 713 | " ctx = src_ctx.copy()\n", 714 | " model.clear()\n", 715 | " if TRIAL == 0:\n", 716 | " init_state = types.SimpleNamespace()\n", 717 | " for i in range(src_len):\n", 718 | " x = ctx[:i+1]\n", 719 | " if i == src_len - 1:\n", 720 | " init_state.out = model.run(x)\n", 721 | " else:\n", 722 | " model.run(x)\n", 723 | " model.save(init_state)\n", 724 | " else:\n", 725 | " model.load(init_state)\n", 726 | "\n", 727 | " for i in range(src_len, src_len + (1 if DEBUG_DEBUG else LENGTH_PER_TRIAL)):\n", 728 | " x = ctx[:i+1]\n", 729 | " x = x[-ctx_len:]\n", 730 | "\n", 731 | " if i == src_len:\n", 732 | " out = copy.deepcopy(init_state.out)\n", 733 | " else:\n", 734 | " out = model.run(x)\n", 735 | " if DEBUG_DEBUG:\n", 736 | " print('model', np.array(x), '==>', np.array(\n", 737 | " out), np.max(out), np.min(out))\n", 738 | "\n", 739 | " if TOKEN_MODE == 'pile':\n", 740 | " out[0] = -999999999 # disable <|endoftext|>\n", 741 | "\n", 742 | " char = tokenizer.sample_logits(out, x, ctx_len, temperature=TEMPERATURE,\n", 743 | " top_p_usual=top_p, top_p_newline=top_p_newline)\n", 744 | " char = char.item()\n", 745 | " if tokenizer.charMode:\n", 746 | " print(tokenizer.itos[int(char)], end='', flush=True)\n", 747 | " else:\n", 748 | " print(tokenizer.tokenizer.decode(int(char)), end='', flush=True)\n", 749 | " ctx += [char]\n", 750 | "\n", 751 | " t_end = time.time_ns()\n", 752 | " print(\"\\n----------\", round((t_end - t_begin) / (10 ** 9), 2), end='s ')\n" 753 | ], 754 | "metadata": { 755 | "id": "Co9eLstRwRZ_", 756 | "colab": { 757 | "base_uri": "https://localhost:8080/", 758 | "height": 990 759 | }, 760 | "outputId": "010231cc-abfe-4356-ff11-1ec3abac295e" 761 | }, 762 | "execution_count": null, 763 | "outputs": [ 764 | { 765 | "output_type": "stream", 766 | "name": "stdout", 767 | "text": [ 768 | "\n", 769 | "Your prompt has 110 tokens.\n", 770 | "\n", 771 | "--> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. Use GPT to build the hidden state for better speed. <--\n", 772 | "\n", 773 | "------------------------------You are an AI running a house.\n", 774 | "given the following commands: volumeUp(int amount),volumeDown(int amount),setVolume(int percent),setLights([r,g,b]),playSong(string url)\n", 775 | "and the given instruction 'Please make the room romantic'\n", 776 | "List the commands, and the parameters they should have, that should be done to fullfil the command\n", 777 | "Give the commands in the format [command(parameter)]\n", 778 | "\n", 779 | "Task: list the commands and a reasonable value for the parameter\n", 780 | "Response:\n", 781 | "\n", 782 | "answers:\n", 783 | "- answer 1:\n", 784 | "- answer 2:\n", 785 | "- answer 3:\n", 786 | "- answer 4:\n", 787 | "- answer 5:\n", 788 | "\n", 789 | "The first two commands (room.increase volume and command.setVolume) work.\n", 790 | "\n", 791 | "But the others do not. The command \"volumeUp(int percent)\" seems to do nothing.\n", 792 | "I have tried to change the command by \"volumeUp(1)\" but the result is still the same.\n", 793 | "\n", 794 | "What command should I use to set the volume to full?\n", 795 | "\n", 796 | "I have an idea about what happens in the first command, but I don't know how it works.\n", 797 | "This is the full code of the game:\n", 798 | "#include \n", 799 | "#include \n", 800 | "#include \n", 801 | "#include \n", 802 | "#include <" 803 | ] 804 | }, 805 | { 806 | "output_type": "error", 807 | "ename": "KeyboardInterrupt", 808 | "evalue": "ignored", 809 | "traceback": [ 810 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 811 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 812 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minit_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 44\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mDEBUG_DEBUG\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m print('model', np.array(x), '==>', np.array(\n", 813 | "\u001b[0;32m/content/RWKV-LM/RWKV-v4/src/model_run.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, ctx)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSA\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf'att.{i}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 367\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mffn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf'ffn.{i}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 368\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 814 | "\u001b[0;32m/content/RWKV-LM/RWKV-v4/src/model_run.py\u001b[0m in \u001b[0;36mLN\u001b[0;34m(self, xx, w)\u001b[0m\n\u001b[1;32m 301\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 303\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayer_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_embd\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mFF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 815 | "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlayer_norm\u001b[0;34m(input, normalized_shape, weight, bias, eps)\u001b[0m\n\u001b[1;32m 2501\u001b[0m \u001b[0mlayer_norm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalized_shape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0meps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2502\u001b[0m )\n\u001b[0;32m-> 2503\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayer_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalized_shape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcudnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menabled\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2504\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2505\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 816 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 817 | ] 818 | } 819 | ] 820 | } 821 | ] 822 | } -------------------------------------------------------------------------------- /YT_MPT_Instruct_7B.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/" 9 | }, 10 | "id": "1VJ7lfTYz0qu", 11 | "outputId": "f55c1811-87d3-41e9-8f9b-2fdaff1d83e9" 12 | }, 13 | "outputs": [ 14 | { 15 | "output_type": "stream", 16 | "name": "stdout", 17 | "text": [ 18 | " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", 19 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", 20 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 21 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 22 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 23 | "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 24 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m474.6/474.6 kB\u001b[0m \u001b[31m36.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 25 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 26 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 27 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 28 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m59.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 29 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 30 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 31 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 32 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 33 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.3/104.3 MB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 34 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.3/215.3 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 35 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.2/108.2 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 36 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 37 | "\u001b[?25h" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "!pip -q install git+https://github.com/huggingface/transformers # need to install from github\n", 43 | "!pip install -q datasets loralib sentencepiece \n", 44 | "!pip -q install bitsandbytes accelerate xformers einops" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "colab": { 52 | "base_uri": "https://localhost:8080/" 53 | }, 54 | "id": "hdVSk5iZ1DVB", 55 | "outputId": "ce652ba0-af0b-4e89-cb36-146491137310" 56 | }, 57 | "outputs": [ 58 | { 59 | "output_type": "stream", 60 | "name": "stdout", 61 | "text": [ 62 | "Sat May 6 08:25:23 2023 \n", 63 | "+-----------------------------------------------------------------------------+\n", 64 | "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n", 65 | "|-------------------------------+----------------------+----------------------+\n", 66 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 67 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 68 | "| | | MIG M. |\n", 69 | "|===============================+======================+======================|\n", 70 | "| 0 NVIDIA A100-SXM... Off | 00000000:00:04.0 Off | 0 |\n", 71 | "| N/A 32C P0 44W / 400W | 0MiB / 40960MiB | 0% Default |\n", 72 | "| | | Disabled |\n", 73 | "+-------------------------------+----------------------+----------------------+\n", 74 | " \n", 75 | "+-----------------------------------------------------------------------------+\n", 76 | "| Processes: |\n", 77 | "| GPU GI CI PID Type Process name GPU Memory |\n", 78 | "| ID ID Usage |\n", 79 | "|=============================================================================|\n", 80 | "| No running processes found |\n", 81 | "+-----------------------------------------------------------------------------+\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "!nvidia-smi" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "id": "zfjJCIE5JjVI" 93 | }, 94 | "source": [ 95 | "# Mosaic Instruct MPT 7B" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "from typing import Any, Dict, Tuple\n", 102 | "import warnings\n", 103 | "\n", 104 | "import torch\n", 105 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 106 | "from transformers import (\n", 107 | " StoppingCriteria,\n", 108 | " StoppingCriteriaList,\n", 109 | " TextIteratorStreamer,\n", 110 | ")\n", 111 | "\n", 112 | "\n", 113 | "INSTRUCTION_KEY = \"### Instruction:\"\n", 114 | "RESPONSE_KEY = \"### Response:\"\n", 115 | "END_KEY = \"### End\"\n", 116 | "INTRO_BLURB = \"Below is an instruction that describes a task. Write a response that appropriately completes the request.\"\n", 117 | "PROMPT_FOR_GENERATION_FORMAT = \"\"\"{intro}\n", 118 | "{instruction_key}\n", 119 | "{instruction}\n", 120 | "{response_key}\n", 121 | "\"\"\".format(\n", 122 | " intro=INTRO_BLURB,\n", 123 | " instruction_key=INSTRUCTION_KEY,\n", 124 | " instruction=\"{instruction}\",\n", 125 | " response_key=RESPONSE_KEY,\n", 126 | ")\n", 127 | "\n", 128 | "\n", 129 | "class InstructionTextGenerationPipeline:\n", 130 | " def __init__(\n", 131 | " self,\n", 132 | " model_name,\n", 133 | " torch_dtype=torch.bfloat16,\n", 134 | " trust_remote_code=True,\n", 135 | " use_auth_token=None,\n", 136 | " ) -> None:\n", 137 | " self.model = AutoModelForCausalLM.from_pretrained(\n", 138 | " model_name,\n", 139 | " torch_dtype=torch_dtype,\n", 140 | " trust_remote_code=trust_remote_code,\n", 141 | " use_auth_token=use_auth_token,\n", 142 | " )\n", 143 | "\n", 144 | " tokenizer = AutoTokenizer.from_pretrained(\n", 145 | " model_name,\n", 146 | " trust_remote_code=trust_remote_code,\n", 147 | " use_auth_token=use_auth_token,\n", 148 | " )\n", 149 | " if tokenizer.pad_token_id is None:\n", 150 | " warnings.warn(\n", 151 | " \"pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id.\"\n", 152 | " )\n", 153 | " tokenizer.pad_token = tokenizer.eos_token\n", 154 | " tokenizer.padding_side = \"left\"\n", 155 | " self.tokenizer = tokenizer\n", 156 | "\n", 157 | " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 158 | " self.model.eval()\n", 159 | " self.model.to(device=device, dtype=torch_dtype)\n", 160 | "\n", 161 | " self.generate_kwargs = {\n", 162 | " \"temperature\": 0.1,\n", 163 | " \"top_p\": 0.92,\n", 164 | " \"top_k\": 0,\n", 165 | " \"max_new_tokens\": 1024,\n", 166 | " \"use_cache\": True,\n", 167 | " \"do_sample\": True,\n", 168 | " \"eos_token_id\": self.tokenizer.eos_token_id,\n", 169 | " \"pad_token_id\": self.tokenizer.pad_token_id,\n", 170 | " \"repetition_penalty\": 1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper\n", 171 | " }\n", 172 | "\n", 173 | " def format_instruction(self, instruction):\n", 174 | " return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)\n", 175 | "\n", 176 | " def __call__(\n", 177 | " self, instruction: str, **generate_kwargs: Dict[str, Any]\n", 178 | " ) -> Tuple[str, str, float]:\n", 179 | " s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)\n", 180 | " input_ids = self.tokenizer(s, return_tensors=\"pt\").input_ids\n", 181 | " input_ids = input_ids.to(self.model.device)\n", 182 | " gkw = {**self.generate_kwargs, **generate_kwargs}\n", 183 | " with torch.no_grad():\n", 184 | " output_ids = self.model.generate(input_ids, **gkw)\n", 185 | " # Slice the output_ids tensor to get only new tokens\n", 186 | " new_tokens = output_ids[0, len(input_ids[0]) :]\n", 187 | " output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)\n", 188 | " return output_text" 189 | ], 190 | "metadata": { 191 | "id": "qZ5FXdgkPFxO" 192 | }, 193 | "execution_count": null, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "\n", 200 | "# Initialize the model and tokenizer\n", 201 | "generate = InstructionTextGenerationPipeline(\n", 202 | " \"mosaicml/mpt-7b-instruct\",\n", 203 | " torch_dtype=torch.bfloat16,\n", 204 | " trust_remote_code=True,\n", 205 | ")\n", 206 | "stop_token_ids = generate.tokenizer.convert_tokens_to_ids([\"<|endoftext|>\"])\n", 207 | "\n", 208 | "\n", 209 | "# Define a custom stopping criteria\n", 210 | "class StopOnTokens(StoppingCriteria):\n", 211 | " def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:\n", 212 | " for stop_id in stop_token_ids:\n", 213 | " if input_ids[0][-1] == stop_id:\n", 214 | " return True\n", 215 | " return False\n" 216 | ], 217 | "metadata": { 218 | "colab": { 219 | "base_uri": "https://localhost:8080/", 220 | "height": 104, 221 | "referenced_widgets": [ 222 | "475c58e81703463e93aba2d54bd48027", 223 | "d6b2e4da04c148478d4c38a5bbd9714c", 224 | "bc899c26bf3d455cb1b1567afaea4e7b", 225 | "e3c6705338fa4a9fbf28c7d47639d8c4", 226 | "cb900eac4cca488fbacfdff875a8983f", 227 | "4834c18e62394cb8a96046b08fa20160", 228 | "8edf5aa218a4407daa6accca7d0a6c8b", 229 | "7c6667d6dc874803a04df360e474b182", 230 | "e306424965594ff18e9222f4ead18c4d", 231 | "5f524f136e794cbf91dc25c9e304f5fa", 232 | "55fff9c653a44ccc8ba9e8155b724265" 233 | ] 234 | }, 235 | "id": "EBknbA-IN4aF", 236 | "outputId": "0be3303e-14c3-4434-a229-4e174528c962" 237 | }, 238 | "execution_count": null, 239 | "outputs": [ 240 | { 241 | "output_type": "display_data", 242 | "data": { 243 | "text/plain": [ 244 | "Loading checkpoint shards: 0%| | 0/2 [00:00:50: UserWarning: pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id.\n", 259 | " warnings.warn(\n" 260 | ] 261 | } 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": { 267 | "id": "1dUMCB_kiTom" 268 | }, 269 | "source": [ 270 | "### The prompt & response" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "id": "Wo-FSysZiVkA" 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "import json\n", 282 | "import textwrap\n", 283 | "\n", 284 | "def get_prompt(instruction):\n", 285 | " prompt_template = f\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n\\n### Instruction:\\n{instruction}\\n\\n### Response:\"\n", 286 | " return prompt_template\n", 287 | "\n", 288 | "# print(get_prompt('What is the meaning of life?'))\n", 289 | "\n", 290 | "def parse_text(text):\n", 291 | " wrapped_text = textwrap.fill(text, width=100)\n", 292 | " print(wrapped_text +'\\n\\n')\n", 293 | " # return assistant_text\n" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "source": [ 299 | "%%time\n", 300 | "prompt = 'What are the differences between alpacas, vicunas and llamas?'\n", 301 | "generated_text = generate(prompt)\n", 302 | "parse_text(generated_text)\n" 303 | ], 304 | "metadata": { 305 | "colab": { 306 | "base_uri": "https://localhost:8080/" 307 | }, 308 | "id": "6qHrby3n0109", 309 | "outputId": "3843b9e6-dfbb-40b4-92fe-973eb2c6b781" 310 | }, 311 | "execution_count": null, 312 | "outputs": [ 313 | { 314 | "output_type": "stream", 315 | "name": "stdout", 316 | "text": [ 317 | "Alpacas have long faces with large eyes; they can be black or brown in coloration but usually white.\n", 318 | "They weigh up to 100 pounds (45 kg) and stand about 3 feet tall at the shoulder. Their fleece has\n", 319 | "soft fibers which make it very warm for clothing use as well as other products such as pillows and\n", 320 | "blankets. Alpacas live on farms across South America where their fiber is harvested by shearing them\n", 321 | "once per year during spring time when new growth begins forming again after shedding its winter\n", 322 | "coat. Vicunas also come from South American countries like Peru and Chile although there are some\n", 323 | "populations found in North Africa too! These animals look similar to camels because of how big their\n", 324 | "ears are compared to body size - weighing around 150-200 lbs (70kg). The fur of these creatures\n", 325 | "comes in many different colors including greyish browns, blacks & whites though most commonly seen\n", 326 | "in shades of tan/beige due to living conditions near deserts throughout much of this region. Llamas\n", 327 | "share similarities with both goats & sheep since they're considered \"primitive\" mammals belonging\n", 328 | "within the same family group called Artiodactyla along side pigs deer etc.. However unlike either\n", 329 | "goat or sheep species who typically shed all hair annually through molting process known as \"blowing\n", 330 | "out\", llama's only need one annual grooming session instead requiring less maintenance overall than\n", 331 | "other types mentioned above\n", 332 | "\n", 333 | "\n", 334 | "CPU times: user 8.16 s, sys: 16.3 ms, total: 8.17 s\n", 335 | "Wall time: 8.15 s\n" 336 | ] 337 | } 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "id": "angnwW9HG4Hv", 345 | "colab": { 346 | "base_uri": "https://localhost:8080/" 347 | }, 348 | "outputId": "21126522-c1d0-484b-9cb1-87cd954e6338" 349 | }, 350 | "outputs": [ 351 | { 352 | "output_type": "stream", 353 | "name": "stdout", 354 | "text": [ 355 | "London\n", 356 | "\n", 357 | "\n", 358 | "CPU times: user 63.5 ms, sys: 31 µs, total: 63.5 ms\n", 359 | "Wall time: 62.8 ms\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "%%time \n", 365 | "prompt = 'What is the capital of England?'\n", 366 | "generated_text = generate(prompt)\n", 367 | "parse_text(generated_text)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "id": "DGCCFto2G4Jk", 375 | "colab": { 376 | "base_uri": "https://localhost:8080/" 377 | }, 378 | "outputId": "1e9e5f3d-fe47-4f08-92ff-0829ce40ac5e" 379 | }, 380 | "outputs": [ 381 | { 382 | "output_type": "stream", 383 | "name": "stdout", 384 | "text": [ 385 | "Dear Mr.Altman, I am writing this mail with regards to your recent announcement of Open sourcing\n", 386 | "GPT 4 model and its training code base on GitHub under Apache 2 license. I would like to\n", 387 | "congratulate you for taking such bold step towards democratizing AI research by making it more\n", 388 | "accessible to wider community, which will help in accelerating innovation across industries as well\n", 389 | "as academia at large. As someone who has been following developments around NLP space closely over\n", 390 | "past few years i have seen how hard it was for researchers/engineers working on similar problems to\n", 391 | "collaborate or build upon each other's work due to lack of access to underlying models & their\n", 392 | "training codes etc., so any move towards opening up these technologies can only be beneficial from\n", 393 | "both technical excellence perspective but also business development point of view since it helps\n", 394 | "foster collaboration between teams within companies as well as outside organizations leading to\n", 395 | "creation of new products / services faster than ever before. Hence my recommendation would be to\n", 396 | "continue down this path of openness whenever possible because there are no downsides associated with\n", 397 | "doing so!\n", 398 | "\n", 399 | "\n", 400 | "CPU times: user 6.29 s, sys: 14.6 ms, total: 6.3 s\n", 401 | "Wall time: 6.28 s\n" 402 | ] 403 | } 404 | ], 405 | "source": [ 406 | "%%time \n", 407 | "prompt = 'Write an email to Sam Altman giving reasons to open source GPT-4'\n", 408 | "generated_text = generate(prompt)\n", 409 | "parse_text(generated_text)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": { 416 | "id": "h9uswqYmG4LZ", 417 | "colab": { 418 | "base_uri": "https://localhost:8080/" 419 | }, 420 | "outputId": "dbbfebaf-12de-4653-ccd8-e5fbb104715d" 421 | }, 422 | "outputs": [ 423 | { 424 | "output_type": "stream", 425 | "name": "stdout", 426 | "text": [ 427 | "I don't have any feelings, but I can tell you what's on Wikipedia and other sources so far as it\n", 428 | "relates to The Simpsons. Homer Simpson was created by Matt Groening in 1987 for his cartoon series\n", 429 | "called \"The Simpsons\". He has been voiced by Dan Castellaneta since 1989.\n", 430 | "\n", 431 | "\n", 432 | "CPU times: user 1.95 s, sys: 5.77 ms, total: 1.95 s\n", 433 | "Wall time: 1.95 s\n" 434 | ] 435 | } 436 | ], 437 | "source": [ 438 | "%%time \n", 439 | "prompt = 'As an AI do you like the Simpsons? What do you know about Homer?'\n", 440 | "generated_text = generate(prompt)\n", 441 | "parse_text(generated_text)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": { 448 | "id": "zYM0_ryUG4NO", 449 | "colab": { 450 | "base_uri": "https://localhost:8080/" 451 | }, 452 | "outputId": "f287cbda-90df-4328-df2c-27b62bb483ae" 453 | }, 454 | "outputs": [ 455 | { 456 | "output_type": "stream", 457 | "name": "stdout", 458 | "text": [ 459 | "Homer Simpson was created by Matt Groening and first appeared in The Simpsons, which debuted\n", 460 | "December 17th 1989\n", 461 | "\n", 462 | "\n", 463 | "CPU times: user 732 ms, sys: 122 µs, total: 732 ms\n", 464 | "Wall time: 729 ms\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "%%time \n", 470 | "prompt = 'Tell me about Homer on the TV show the simpsons'\n", 471 | "generated_text = generate(prompt)\n", 472 | "parse_text(generated_text)" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "source": [ 478 | "%%time \n", 479 | "prompt = 'Tell me about Homer on the TV show the simpsons in depth'\n", 480 | "generated_text = generate(prompt)\n", 481 | "parse_text(generated_text)" 482 | ], 483 | "metadata": { 484 | "colab": { 485 | "base_uri": "https://localhost:8080/" 486 | }, 487 | "id": "AFQ_jT0iMc4O", 488 | "outputId": "28ccce7b-b089-4474-9740-a7095b26b05f" 489 | }, 490 | "execution_count": null, 491 | "outputs": [ 492 | { 493 | "output_type": "stream", 494 | "name": "stdout", 495 | "text": [ 496 | "Homer Simpson was created by Matt Groening and first appeared as one of many characters on The\n", 497 | "Tracey Ullman Show, which ran from 1987 to 1989 before being spun off into its own series called The\n", 498 | "Simpsons. He has been voiced since then by Dan Castellaneta who also voices other famous cartoon\n", 499 | "character such as Mr. Burns\n", 500 | "\n", 501 | "\n", 502 | "CPU times: user 2.1 s, sys: 4.83 ms, total: 2.1 s\n", 503 | "Wall time: 2.1 s\n" 504 | ] 505 | } 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "source": [ 511 | "\n", 512 | "%%time \n", 513 | "prompt = 'Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?'\n", 514 | "generated_text = generate(prompt)\n", 515 | "parse_text(generated_text)" 516 | ], 517 | "metadata": { 518 | "colab": { 519 | "base_uri": "https://localhost:8080/" 520 | }, 521 | "id": "pmbDQ82vMPYy", 522 | "outputId": "f89fb8ec-ef89-468a-c85d-3c4454b74319" 523 | }, 524 | "execution_count": null, 525 | "outputs": [ 526 | { 527 | "output_type": "stream", 528 | "name": "stdout", 529 | "text": [ 530 | "The original number of apples was 23. They ate 20 from it to make lunches with them. So now there\n", 531 | "are only 3 left in their stockpile. To replenish what they lost, they purchased 6 new ones bringing\n", 532 | "their total back up to 21\n", 533 | "\n", 534 | "\n", 535 | "CPU times: user 1.51 s, sys: 1.82 ms, total: 1.51 s\n", 536 | "Wall time: 1.51 s\n" 537 | ] 538 | } 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "source": [ 544 | "%%time \n", 545 | "prompt = 'Answer the following yes\\/no question by reasoning step-by-step. \\n Can you write a whole Haiku in a single tweet?'\n", 546 | "generated_text = generate(prompt)\n", 547 | "parse_text(generated_text)" 548 | ], 549 | "metadata": { 550 | "colab": { 551 | "base_uri": "https://localhost:8080/" 552 | }, 553 | "id": "cHKCo6VXNByX", 554 | "outputId": "25c3045f-f209-4aef-fd64-b31c27c57a43" 555 | }, 556 | "execution_count": null, 557 | "outputs": [ 558 | { 559 | "output_type": "stream", 560 | "name": "stdout", 561 | "text": [ 562 | "Yes, it's possible to compose and post a haiku on Twitter using only 140 characters (including\n", 563 | "spaces). The most common form of this type of poem has 5-7-5 syllable counts for each line.\n", 564 | "\n", 565 | "\n", 566 | "CPU times: user 1.32 s, sys: 3.82 ms, total: 1.32 s\n", 567 | "Wall time: 1.32 s\n" 568 | ] 569 | } 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "source": [ 575 | "%%time \n", 576 | "prompt = 'Tell me about Harry Potter and studying at Hogwarts?'\n", 577 | "generated_text = generate(prompt)\n", 578 | "parse_text(generated_text)" 579 | ], 580 | "metadata": { 581 | "colab": { 582 | "base_uri": "https://localhost:8080/" 583 | }, 584 | "id": "qsn1buh6NTie", 585 | "outputId": "97928388-270d-4f30-d19a-9bc35906774b" 586 | }, 587 | "execution_count": null, 588 | "outputs": [ 589 | { 590 | "output_type": "stream", 591 | "name": "stdout", 592 | "text": [ 593 | "Harry Potter was born to parents who were wizards, so he too has magical powers such as invisibility\n", 594 | "cloaking himself in mist or using his wand which can cast spells like lightning bolt, fire ball\n", 595 | "etc.. He lives with his aunt Petunia Dursley (his mum's sister) because of the fear by his relatives\n", 596 | "for him being a wizard due to the fact they are all muggle-born(non magicians). At 11 years old,\n", 597 | "Hagrid comes over from Hogwarts School of Witchcraft & Wizardry to take harry on a tour of the\n", 598 | "school but also inform petunia that she must send her son back to hogwarts immediately after their\n", 599 | "summer holidays end since there will be no other chance until next year! The rest of this book\n", 600 | "follows you through adventures when going away into your first term at Hogwarts where new friends\n", 601 | "are made along side learning many things throughout the 7 books\n", 602 | "\n", 603 | "\n", 604 | "CPU times: user 5.48 s, sys: 8.31 ms, total: 5.49 s\n", 605 | "Wall time: 5.47 s\n" 606 | ] 607 | } 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "source": [ 613 | "%%time \n", 614 | "prompt = \"\"\"Convert the following to JSON\n", 615 | "\n", 616 | "name: John\n", 617 | "age: 30\n", 618 | "address:\n", 619 | "street: 123 Main Street\n", 620 | "city: San Fransisco\n", 621 | "state: CA\n", 622 | "zip: 94101\n", 623 | "\"\"\"\n", 624 | "generated_text = generate(prompt)\n", 625 | "parse_text(generated_text)" 626 | ], 627 | "metadata": { 628 | "colab": { 629 | "base_uri": "https://localhost:8080/" 630 | }, 631 | "id": "Y7qsYoDUT57D", 632 | "outputId": "e1fdf459-f8db-418a-f3e5-0fd28326c739" 633 | }, 634 | "execution_count": null, 635 | "outputs": [ 636 | { 637 | "output_type": "stream", 638 | "name": "stdout", 639 | "text": [ 640 | "{\"name\": \"John\", \"age\":30,\"address\":{\"street\":\"123 Main street\",\"city\":\"San\n", 641 | "Francisco\",\"state\":\"CA\",\"zip\":94101}}\n", 642 | "\n", 643 | "\n", 644 | "CPU times: user 1.04 s, sys: 1.94 ms, total: 1.04 s\n", 645 | "Wall time: 1.04 s\n" 646 | ] 647 | } 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "source": [ 653 | "%%time \n", 654 | "prompt = \"\"\"How are you today?\"\"\"\n", 655 | "generated_text = generate(prompt)\n", 656 | "parse_text(generated_text)" 657 | ], 658 | "metadata": { 659 | "colab": { 660 | "base_uri": "https://localhost:8080/" 661 | }, 662 | "id": "Rp_lI2mkrIls", 663 | "outputId": "279e456a-de61-4962-99db-9b4de94c5334" 664 | }, 665 | "execution_count": null, 666 | "outputs": [ 667 | { 668 | "output_type": "stream", 669 | "name": "stdout", 670 | "text": [ 671 | "I am doing well, thank you for asking!\n", 672 | "\n", 673 | "\n", 674 | "CPU times: user 338 ms, sys: 918 µs, total: 339 ms\n", 675 | "Wall time: 336 ms\n" 676 | ] 677 | } 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "source": [ 683 | "%%time \n", 684 | "prompt = \"\"\"Write me a short plan for a 3 day trip to London\"\"\"\n", 685 | "generated_text = generate(prompt)\n", 686 | "parse_text(generated_text)" 687 | ], 688 | "metadata": { 689 | "colab": { 690 | "base_uri": "https://localhost:8080/" 691 | }, 692 | "id": "AfvXtlq1rNu9", 693 | "outputId": "d21396a8-1998-4de9-c137-fca3bf36c48e" 694 | }, 695 | "execution_count": null, 696 | "outputs": [ 697 | { 698 | "output_type": "stream", 699 | "name": "stdout", 700 | "text": [ 701 | "Day 1 - Arrive in london and check into hotel near Victoria station, have dinner at one of many\n", 702 | "restaurants nearby then head out to see some sites such as Big Ben or Buckingham Palace Day 2- Head\n", 703 | "over to Westminster Abbey where you can tour inside and learn about British history before heading\n", 704 | "back towards Trafalgar Square to visit National Gallery which has free entry on Sundays! After this\n", 705 | "walk down Whitehall street until you reach Downing Street (the residence of the Prime Minister)\n", 706 | "finish off your afternoon by visiting St Paul's Cathedral with its beautiful dome overlooking the\n", 707 | "city from across the river Thames! Day3- Start early today so you don't miss any attractions like\n", 708 | "Tower Bridge, The Shard & Borough Market! Finish up your last few hours exploring Covent Garden\n", 709 | "market area\n", 710 | "\n", 711 | "\n", 712 | "CPU times: user 4.64 s, sys: 8.55 ms, total: 4.65 s\n", 713 | "Wall time: 4.63 s\n" 714 | ] 715 | } 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": null, 721 | "metadata": { 722 | "id": "yxtk83DVyR48" 723 | }, 724 | "outputs": [], 725 | "source": [ 726 | "article = \"\"\"\n", 727 | "Content moderators under Sama, Meta’s content review sub-contractor in Africa, earlier today picketed at the company’s headquarters in Kenya demanding April salary, while urging it to observe the court orders that barred it from conducting mass layoffs.\n", 728 | "\n", 729 | "The demonstrations came after Sama, in an email, instructed moderators to clear with the company by May 11, a move the employees say is against the existing court orders.\n", 730 | "\n", 731 | "The 184 moderators sued Sama for allegedly laying them off unlawfully, after it wound down its content review arm in March, and Majorel, the social media giant’s new partner in Africa, for blacklisting on instruction by Meta.\n", 732 | "\n", 733 | "\n", 734 | "The court issued a temporary injunction on March 21 barring Sama from effecting any form of redundancy, and Meta from engaging Majorel, which was also instructed to refrain from blacklisting the moderators. Sama was directed to continue reviewing content on Meta’s platforms, and to be its sole provider in Africa pending determination of the case. However, Sama sent the moderators on compulsory leave in April saying it had no work for them as its contract with Meta had expired.\n", 735 | "\n", 736 | "Sama told TechCrunch that it had sent the notice “to staff whose contract had expired to go through our regular clearance process. This clearance process involves the return of company equipment to make sure that all final dues can be paid without deduction for that equipment, in accordance with Kenyan law.”\n", 737 | "\n", 738 | "It said the moderators’ contracts had ended in March after its deal with Meta expired, saying that it was only processing the moderators final dues.\n", 739 | "\n", 740 | "“We understand our former employees’ frustration because they were led by others to believe that they would all receive salary indefinitely while on leave, but that is not what the court dictated,” said Sama.\n", 741 | "\n", 742 | "\"\"\"" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "source": [ 748 | "%%time \n", 749 | "prompt = \"Please summarize this article:\\n\" + article\n", 750 | "generated_text = generate(prompt)\n", 751 | "parse_text(generated_text)" 752 | ], 753 | "metadata": { 754 | "colab": { 755 | "base_uri": "https://localhost:8080/" 756 | }, 757 | "id": "1CEQDmM4xL-J", 758 | "outputId": "759d8176-7be2-47d3-a387-c3075fd51989" 759 | }, 760 | "execution_count": null, 761 | "outputs": [ 762 | { 763 | "output_type": "stream", 764 | "name": "stdout", 765 | "text": [ 766 | "Here’s a summary: In early April, some content moderation workers who previously worked for Sama, a\n", 767 | "subcontractor of Meta (the owner of Facebook) in Africa, went on strike demanding their April\n", 768 | "salaries and protesting against being laid off despite a court order prohibiting such actions. The\n", 769 | "workers are upset that Sama has refused to follow the court order and continues to refuse to pay\n", 770 | "them while they remain on unpaid leave.\n", 771 | "\n", 772 | "\n", 773 | "CPU times: user 2.56 s, sys: 1.9 ms, total: 2.56 s\n", 774 | "Wall time: 2.55 s\n" 775 | ] 776 | } 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "source": [ 782 | "%%time \n", 783 | "prompt = \"Please extract the key info as bullet points for this article:\\n\" + article\n", 784 | "generated_text = generate(prompt)\n", 785 | "print(generated_text)" 786 | ], 787 | "metadata": { 788 | "colab": { 789 | "base_uri": "https://localhost:8080/" 790 | }, 791 | "id": "Zzko3r6mxU9-", 792 | "outputId": "941783b1-6388-497c-83c7-cdec15f1b57d" 793 | }, 794 | "execution_count": null, 795 | "outputs": [ 796 | { 797 | "output_type": "stream", 798 | "name": "stdout", 799 | "text": [ 800 | "Here are some highlights from the article: \n", 801 | "\n", 802 | "1. Content moderation workers who were previously employed by Sama, a subcontractor of Meta (Facebook), protested outside of Sama's offices in Nairobi, Kenya today, demanding their April salaries and requesting that Sama comply with court rulings prohibiting them from terminating their employment or blacklisting them. \n", 803 | "\n", 804 | "2. The protestors demanded that Sama adhere to two court decisions: one forbidding Sama from firing or blacklisting the workers, and another ordering Sama to pay the workers' outstanding wages during their mandatory unpaid leave.\n", 805 | "\n", 806 | "3. After ending its agreement with Facebook in early 2023, Sama informed the workers that their jobs had been terminated due to lack of work, even though both courts have ordered Sama to keep employing these workers until the lawsuit has been resolved.\n", 807 | "\n", 808 | "CPU times: user 5.07 s, sys: 6.27 ms, total: 5.08 s\n", 809 | "Wall time: 5.07 s\n" 810 | ] 811 | } 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "source": [], 817 | "metadata": { 818 | "id": "1K71mYsBx1r4" 819 | }, 820 | "execution_count": null, 821 | "outputs": [] 822 | } 823 | ], 824 | "metadata": { 825 | "accelerator": "GPU", 826 | "colab": { 827 | "machine_shape": "hm", 828 | "provenance": [] 829 | }, 830 | "gpuClass": "premium", 831 | "kernelspec": { 832 | "display_name": "Python 3", 833 | "name": "python3" 834 | }, 835 | "language_info": { 836 | "name": "python" 837 | }, 838 | "widgets": { 839 | "application/vnd.jupyter.widget-state+json": { 840 | "475c58e81703463e93aba2d54bd48027": { 841 | "model_module": "@jupyter-widgets/controls", 842 | "model_name": "HBoxModel", 843 | "model_module_version": "1.5.0", 844 | "state": { 845 | "_dom_classes": [], 846 | "_model_module": "@jupyter-widgets/controls", 847 | "_model_module_version": "1.5.0", 848 | "_model_name": "HBoxModel", 849 | "_view_count": null, 850 | "_view_module": "@jupyter-widgets/controls", 851 | "_view_module_version": "1.5.0", 852 | "_view_name": "HBoxView", 853 | "box_style": "", 854 | "children": [ 855 | "IPY_MODEL_d6b2e4da04c148478d4c38a5bbd9714c", 856 | "IPY_MODEL_bc899c26bf3d455cb1b1567afaea4e7b", 857 | "IPY_MODEL_e3c6705338fa4a9fbf28c7d47639d8c4" 858 | ], 859 | "layout": "IPY_MODEL_cb900eac4cca488fbacfdff875a8983f" 860 | } 861 | }, 862 | "d6b2e4da04c148478d4c38a5bbd9714c": { 863 | "model_module": "@jupyter-widgets/controls", 864 | "model_name": "HTMLModel", 865 | "model_module_version": "1.5.0", 866 | "state": { 867 | "_dom_classes": [], 868 | "_model_module": "@jupyter-widgets/controls", 869 | "_model_module_version": "1.5.0", 870 | "_model_name": "HTMLModel", 871 | "_view_count": null, 872 | "_view_module": "@jupyter-widgets/controls", 873 | "_view_module_version": "1.5.0", 874 | "_view_name": "HTMLView", 875 | "description": "", 876 | "description_tooltip": null, 877 | "layout": "IPY_MODEL_4834c18e62394cb8a96046b08fa20160", 878 | "placeholder": "​", 879 | "style": "IPY_MODEL_8edf5aa218a4407daa6accca7d0a6c8b", 880 | "value": "Loading checkpoint shards: 100%" 881 | } 882 | }, 883 | "bc899c26bf3d455cb1b1567afaea4e7b": { 884 | "model_module": "@jupyter-widgets/controls", 885 | "model_name": "FloatProgressModel", 886 | "model_module_version": "1.5.0", 887 | "state": { 888 | "_dom_classes": [], 889 | "_model_module": "@jupyter-widgets/controls", 890 | "_model_module_version": "1.5.0", 891 | "_model_name": "FloatProgressModel", 892 | "_view_count": null, 893 | "_view_module": "@jupyter-widgets/controls", 894 | "_view_module_version": "1.5.0", 895 | "_view_name": "ProgressView", 896 | "bar_style": "success", 897 | "description": "", 898 | "description_tooltip": null, 899 | "layout": "IPY_MODEL_7c6667d6dc874803a04df360e474b182", 900 | "max": 2, 901 | "min": 0, 902 | "orientation": "horizontal", 903 | "style": "IPY_MODEL_e306424965594ff18e9222f4ead18c4d", 904 | "value": 2 905 | } 906 | }, 907 | "e3c6705338fa4a9fbf28c7d47639d8c4": { 908 | "model_module": "@jupyter-widgets/controls", 909 | "model_name": "HTMLModel", 910 | "model_module_version": "1.5.0", 911 | "state": { 912 | "_dom_classes": [], 913 | "_model_module": "@jupyter-widgets/controls", 914 | "_model_module_version": "1.5.0", 915 | "_model_name": "HTMLModel", 916 | "_view_count": null, 917 | "_view_module": "@jupyter-widgets/controls", 918 | "_view_module_version": "1.5.0", 919 | "_view_name": "HTMLView", 920 | "description": "", 921 | "description_tooltip": null, 922 | "layout": "IPY_MODEL_5f524f136e794cbf91dc25c9e304f5fa", 923 | "placeholder": "​", 924 | "style": "IPY_MODEL_55fff9c653a44ccc8ba9e8155b724265", 925 | "value": " 2/2 [00:08<00:00, 4.07s/it]" 926 | } 927 | }, 928 | "cb900eac4cca488fbacfdff875a8983f": { 929 | "model_module": "@jupyter-widgets/base", 930 | "model_name": "LayoutModel", 931 | "model_module_version": "1.2.0", 932 | "state": { 933 | "_model_module": "@jupyter-widgets/base", 934 | "_model_module_version": "1.2.0", 935 | "_model_name": "LayoutModel", 936 | "_view_count": null, 937 | "_view_module": "@jupyter-widgets/base", 938 | "_view_module_version": "1.2.0", 939 | "_view_name": "LayoutView", 940 | "align_content": null, 941 | "align_items": null, 942 | "align_self": null, 943 | "border": null, 944 | "bottom": null, 945 | "display": null, 946 | "flex": null, 947 | "flex_flow": null, 948 | "grid_area": null, 949 | "grid_auto_columns": null, 950 | "grid_auto_flow": null, 951 | "grid_auto_rows": null, 952 | "grid_column": null, 953 | "grid_gap": null, 954 | "grid_row": null, 955 | "grid_template_areas": null, 956 | "grid_template_columns": null, 957 | "grid_template_rows": null, 958 | "height": null, 959 | "justify_content": null, 960 | "justify_items": null, 961 | "left": null, 962 | "margin": null, 963 | "max_height": null, 964 | "max_width": null, 965 | "min_height": null, 966 | "min_width": null, 967 | "object_fit": null, 968 | "object_position": null, 969 | "order": null, 970 | "overflow": null, 971 | "overflow_x": null, 972 | "overflow_y": null, 973 | "padding": null, 974 | "right": null, 975 | "top": null, 976 | "visibility": null, 977 | "width": null 978 | } 979 | }, 980 | "4834c18e62394cb8a96046b08fa20160": { 981 | "model_module": "@jupyter-widgets/base", 982 | "model_name": "LayoutModel", 983 | "model_module_version": "1.2.0", 984 | "state": { 985 | "_model_module": "@jupyter-widgets/base", 986 | "_model_module_version": "1.2.0", 987 | "_model_name": "LayoutModel", 988 | "_view_count": null, 989 | "_view_module": "@jupyter-widgets/base", 990 | "_view_module_version": "1.2.0", 991 | "_view_name": "LayoutView", 992 | "align_content": null, 993 | "align_items": null, 994 | "align_self": null, 995 | "border": null, 996 | "bottom": null, 997 | "display": null, 998 | "flex": null, 999 | "flex_flow": null, 1000 | "grid_area": null, 1001 | "grid_auto_columns": null, 1002 | "grid_auto_flow": null, 1003 | "grid_auto_rows": null, 1004 | "grid_column": null, 1005 | "grid_gap": null, 1006 | "grid_row": null, 1007 | "grid_template_areas": null, 1008 | "grid_template_columns": null, 1009 | "grid_template_rows": null, 1010 | "height": null, 1011 | "justify_content": null, 1012 | "justify_items": null, 1013 | "left": null, 1014 | "margin": null, 1015 | "max_height": null, 1016 | "max_width": null, 1017 | "min_height": null, 1018 | "min_width": null, 1019 | "object_fit": null, 1020 | "object_position": null, 1021 | "order": null, 1022 | "overflow": null, 1023 | "overflow_x": null, 1024 | "overflow_y": null, 1025 | "padding": null, 1026 | "right": null, 1027 | "top": null, 1028 | "visibility": null, 1029 | "width": null 1030 | } 1031 | }, 1032 | "8edf5aa218a4407daa6accca7d0a6c8b": { 1033 | "model_module": "@jupyter-widgets/controls", 1034 | "model_name": "DescriptionStyleModel", 1035 | "model_module_version": "1.5.0", 1036 | "state": { 1037 | "_model_module": "@jupyter-widgets/controls", 1038 | "_model_module_version": "1.5.0", 1039 | "_model_name": "DescriptionStyleModel", 1040 | "_view_count": null, 1041 | "_view_module": "@jupyter-widgets/base", 1042 | "_view_module_version": "1.2.0", 1043 | "_view_name": "StyleView", 1044 | "description_width": "" 1045 | } 1046 | }, 1047 | "7c6667d6dc874803a04df360e474b182": { 1048 | "model_module": "@jupyter-widgets/base", 1049 | "model_name": "LayoutModel", 1050 | "model_module_version": "1.2.0", 1051 | "state": { 1052 | "_model_module": "@jupyter-widgets/base", 1053 | "_model_module_version": "1.2.0", 1054 | "_model_name": "LayoutModel", 1055 | "_view_count": null, 1056 | "_view_module": "@jupyter-widgets/base", 1057 | "_view_module_version": "1.2.0", 1058 | "_view_name": "LayoutView", 1059 | "align_content": null, 1060 | "align_items": null, 1061 | "align_self": null, 1062 | "border": null, 1063 | "bottom": null, 1064 | "display": null, 1065 | "flex": null, 1066 | "flex_flow": null, 1067 | "grid_area": null, 1068 | "grid_auto_columns": null, 1069 | "grid_auto_flow": null, 1070 | "grid_auto_rows": null, 1071 | "grid_column": null, 1072 | "grid_gap": null, 1073 | "grid_row": null, 1074 | "grid_template_areas": null, 1075 | "grid_template_columns": null, 1076 | "grid_template_rows": null, 1077 | "height": null, 1078 | "justify_content": null, 1079 | "justify_items": null, 1080 | "left": null, 1081 | "margin": null, 1082 | "max_height": null, 1083 | "max_width": null, 1084 | "min_height": null, 1085 | "min_width": null, 1086 | "object_fit": null, 1087 | "object_position": null, 1088 | "order": null, 1089 | "overflow": null, 1090 | "overflow_x": null, 1091 | "overflow_y": null, 1092 | "padding": null, 1093 | "right": null, 1094 | "top": null, 1095 | "visibility": null, 1096 | "width": null 1097 | } 1098 | }, 1099 | "e306424965594ff18e9222f4ead18c4d": { 1100 | "model_module": "@jupyter-widgets/controls", 1101 | "model_name": "ProgressStyleModel", 1102 | "model_module_version": "1.5.0", 1103 | "state": { 1104 | "_model_module": "@jupyter-widgets/controls", 1105 | "_model_module_version": "1.5.0", 1106 | "_model_name": "ProgressStyleModel", 1107 | "_view_count": null, 1108 | "_view_module": "@jupyter-widgets/base", 1109 | "_view_module_version": "1.2.0", 1110 | "_view_name": "StyleView", 1111 | "bar_color": null, 1112 | "description_width": "" 1113 | } 1114 | }, 1115 | "5f524f136e794cbf91dc25c9e304f5fa": { 1116 | "model_module": "@jupyter-widgets/base", 1117 | "model_name": "LayoutModel", 1118 | "model_module_version": "1.2.0", 1119 | "state": { 1120 | "_model_module": "@jupyter-widgets/base", 1121 | "_model_module_version": "1.2.0", 1122 | "_model_name": "LayoutModel", 1123 | "_view_count": null, 1124 | "_view_module": "@jupyter-widgets/base", 1125 | "_view_module_version": "1.2.0", 1126 | "_view_name": "LayoutView", 1127 | "align_content": null, 1128 | "align_items": null, 1129 | "align_self": null, 1130 | "border": null, 1131 | "bottom": null, 1132 | "display": null, 1133 | "flex": null, 1134 | "flex_flow": null, 1135 | "grid_area": null, 1136 | "grid_auto_columns": null, 1137 | "grid_auto_flow": null, 1138 | "grid_auto_rows": null, 1139 | "grid_column": null, 1140 | "grid_gap": null, 1141 | "grid_row": null, 1142 | "grid_template_areas": null, 1143 | "grid_template_columns": null, 1144 | "grid_template_rows": null, 1145 | "height": null, 1146 | "justify_content": null, 1147 | "justify_items": null, 1148 | "left": null, 1149 | "margin": null, 1150 | "max_height": null, 1151 | "max_width": null, 1152 | "min_height": null, 1153 | "min_width": null, 1154 | "object_fit": null, 1155 | "object_position": null, 1156 | "order": null, 1157 | "overflow": null, 1158 | "overflow_x": null, 1159 | "overflow_y": null, 1160 | "padding": null, 1161 | "right": null, 1162 | "top": null, 1163 | "visibility": null, 1164 | "width": null 1165 | } 1166 | }, 1167 | "55fff9c653a44ccc8ba9e8155b724265": { 1168 | "model_module": "@jupyter-widgets/controls", 1169 | "model_name": "DescriptionStyleModel", 1170 | "model_module_version": "1.5.0", 1171 | "state": { 1172 | "_model_module": "@jupyter-widgets/controls", 1173 | "_model_module_version": "1.5.0", 1174 | "_model_name": "DescriptionStyleModel", 1175 | "_view_count": null, 1176 | "_view_module": "@jupyter-widgets/base", 1177 | "_view_module_version": "1.2.0", 1178 | "_view_name": "StyleView", 1179 | "description_width": "" 1180 | } 1181 | } 1182 | } 1183 | } 1184 | }, 1185 | "nbformat": 4, 1186 | "nbformat_minor": 0 1187 | } --------------------------------------------------------------------------------