├── README.md
├── RecurrentGemma_2b_colab.ipynb
└── llama3_2_3b_colab.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # GPU Poor LLM Notebooks ⚡
2 |
3 | A good old T4 is more potent than you think!
4 |
5 | This repository is a collection of notebooks to run <15B param LLMs using Transformers and Accelerate in `bfloat16`/ `float16` on the mighty T4.
6 |
7 | I'll update this with each new LLM release. Feel free to open an issue with feature requests.
8 |
9 | LLMs covered so far:
10 | 1. Mathstral 7B
11 | 2. Gemma 2 9B
12 | 3. CodeGemma
13 | 4. RecurrentGemma
14 | 5. Mistral Nemo 12B
15 |
--------------------------------------------------------------------------------
/RecurrentGemma_2b_colab.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "gpuType": "T4",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "accelerator": "GPU",
18 | "widgets": {
19 | "application/vnd.jupyter.widget-state+json": {
20 | "e6f2b94e3bb345859811226dc345a6e3": {
21 | "model_module": "@jupyter-widgets/controls",
22 | "model_name": "HBoxModel",
23 | "model_module_version": "1.5.0",
24 | "state": {
25 | "_dom_classes": [],
26 | "_model_module": "@jupyter-widgets/controls",
27 | "_model_module_version": "1.5.0",
28 | "_model_name": "HBoxModel",
29 | "_view_count": null,
30 | "_view_module": "@jupyter-widgets/controls",
31 | "_view_module_version": "1.5.0",
32 | "_view_name": "HBoxView",
33 | "box_style": "",
34 | "children": [
35 | "IPY_MODEL_ce12a18f666740eabe1f71be5152e7d8",
36 | "IPY_MODEL_76a4282443194092af1039de43c523c6",
37 | "IPY_MODEL_3775878f2e484e84b6cb0971d2d35a7f"
38 | ],
39 | "layout": "IPY_MODEL_df936531829f4c1580c04719de77059c"
40 | }
41 | },
42 | "ce12a18f666740eabe1f71be5152e7d8": {
43 | "model_module": "@jupyter-widgets/controls",
44 | "model_name": "HTMLModel",
45 | "model_module_version": "1.5.0",
46 | "state": {
47 | "_dom_classes": [],
48 | "_model_module": "@jupyter-widgets/controls",
49 | "_model_module_version": "1.5.0",
50 | "_model_name": "HTMLModel",
51 | "_view_count": null,
52 | "_view_module": "@jupyter-widgets/controls",
53 | "_view_module_version": "1.5.0",
54 | "_view_name": "HTMLView",
55 | "description": "",
56 | "description_tooltip": null,
57 | "layout": "IPY_MODEL_ab60c88077984531aa08150c72fa7ab5",
58 | "placeholder": "",
59 | "style": "IPY_MODEL_ad9901a17d3d4b1b898941d16c16f1cb",
60 | "value": "tokenizer_config.json: 100%"
61 | }
62 | },
63 | "76a4282443194092af1039de43c523c6": {
64 | "model_module": "@jupyter-widgets/controls",
65 | "model_name": "FloatProgressModel",
66 | "model_module_version": "1.5.0",
67 | "state": {
68 | "_dom_classes": [],
69 | "_model_module": "@jupyter-widgets/controls",
70 | "_model_module_version": "1.5.0",
71 | "_model_name": "FloatProgressModel",
72 | "_view_count": null,
73 | "_view_module": "@jupyter-widgets/controls",
74 | "_view_module_version": "1.5.0",
75 | "_view_name": "ProgressView",
76 | "bar_style": "success",
77 | "description": "",
78 | "description_tooltip": null,
79 | "layout": "IPY_MODEL_f2fbf227d64e451681083a0ca189405c",
80 | "max": 40529,
81 | "min": 0,
82 | "orientation": "horizontal",
83 | "style": "IPY_MODEL_72364dfb4d994115bfd049dae5f53423",
84 | "value": 40529
85 | }
86 | },
87 | "3775878f2e484e84b6cb0971d2d35a7f": {
88 | "model_module": "@jupyter-widgets/controls",
89 | "model_name": "HTMLModel",
90 | "model_module_version": "1.5.0",
91 | "state": {
92 | "_dom_classes": [],
93 | "_model_module": "@jupyter-widgets/controls",
94 | "_model_module_version": "1.5.0",
95 | "_model_name": "HTMLModel",
96 | "_view_count": null,
97 | "_view_module": "@jupyter-widgets/controls",
98 | "_view_module_version": "1.5.0",
99 | "_view_name": "HTMLView",
100 | "description": "",
101 | "description_tooltip": null,
102 | "layout": "IPY_MODEL_fcbe52122eeb466d91b44011f7f9bc47",
103 | "placeholder": "",
104 | "style": "IPY_MODEL_c0915cfb93634ede86568eface4115d8",
105 | "value": " 40.5k/40.5k [00:00<00:00, 670kB/s]"
106 | }
107 | },
108 | "df936531829f4c1580c04719de77059c": {
109 | "model_module": "@jupyter-widgets/base",
110 | "model_name": "LayoutModel",
111 | "model_module_version": "1.2.0",
112 | "state": {
113 | "_model_module": "@jupyter-widgets/base",
114 | "_model_module_version": "1.2.0",
115 | "_model_name": "LayoutModel",
116 | "_view_count": null,
117 | "_view_module": "@jupyter-widgets/base",
118 | "_view_module_version": "1.2.0",
119 | "_view_name": "LayoutView",
120 | "align_content": null,
121 | "align_items": null,
122 | "align_self": null,
123 | "border": null,
124 | "bottom": null,
125 | "display": null,
126 | "flex": null,
127 | "flex_flow": null,
128 | "grid_area": null,
129 | "grid_auto_columns": null,
130 | "grid_auto_flow": null,
131 | "grid_auto_rows": null,
132 | "grid_column": null,
133 | "grid_gap": null,
134 | "grid_row": null,
135 | "grid_template_areas": null,
136 | "grid_template_columns": null,
137 | "grid_template_rows": null,
138 | "height": null,
139 | "justify_content": null,
140 | "justify_items": null,
141 | "left": null,
142 | "margin": null,
143 | "max_height": null,
144 | "max_width": null,
145 | "min_height": null,
146 | "min_width": null,
147 | "object_fit": null,
148 | "object_position": null,
149 | "order": null,
150 | "overflow": null,
151 | "overflow_x": null,
152 | "overflow_y": null,
153 | "padding": null,
154 | "right": null,
155 | "top": null,
156 | "visibility": null,
157 | "width": null
158 | }
159 | },
160 | "ab60c88077984531aa08150c72fa7ab5": {
161 | "model_module": "@jupyter-widgets/base",
162 | "model_name": "LayoutModel",
163 | "model_module_version": "1.2.0",
164 | "state": {
165 | "_model_module": "@jupyter-widgets/base",
166 | "_model_module_version": "1.2.0",
167 | "_model_name": "LayoutModel",
168 | "_view_count": null,
169 | "_view_module": "@jupyter-widgets/base",
170 | "_view_module_version": "1.2.0",
171 | "_view_name": "LayoutView",
172 | "align_content": null,
173 | "align_items": null,
174 | "align_self": null,
175 | "border": null,
176 | "bottom": null,
177 | "display": null,
178 | "flex": null,
179 | "flex_flow": null,
180 | "grid_area": null,
181 | "grid_auto_columns": null,
182 | "grid_auto_flow": null,
183 | "grid_auto_rows": null,
184 | "grid_column": null,
185 | "grid_gap": null,
186 | "grid_row": null,
187 | "grid_template_areas": null,
188 | "grid_template_columns": null,
189 | "grid_template_rows": null,
190 | "height": null,
191 | "justify_content": null,
192 | "justify_items": null,
193 | "left": null,
194 | "margin": null,
195 | "max_height": null,
196 | "max_width": null,
197 | "min_height": null,
198 | "min_width": null,
199 | "object_fit": null,
200 | "object_position": null,
201 | "order": null,
202 | "overflow": null,
203 | "overflow_x": null,
204 | "overflow_y": null,
205 | "padding": null,
206 | "right": null,
207 | "top": null,
208 | "visibility": null,
209 | "width": null
210 | }
211 | },
212 | "ad9901a17d3d4b1b898941d16c16f1cb": {
213 | "model_module": "@jupyter-widgets/controls",
214 | "model_name": "DescriptionStyleModel",
215 | "model_module_version": "1.5.0",
216 | "state": {
217 | "_model_module": "@jupyter-widgets/controls",
218 | "_model_module_version": "1.5.0",
219 | "_model_name": "DescriptionStyleModel",
220 | "_view_count": null,
221 | "_view_module": "@jupyter-widgets/base",
222 | "_view_module_version": "1.2.0",
223 | "_view_name": "StyleView",
224 | "description_width": ""
225 | }
226 | },
227 | "f2fbf227d64e451681083a0ca189405c": {
228 | "model_module": "@jupyter-widgets/base",
229 | "model_name": "LayoutModel",
230 | "model_module_version": "1.2.0",
231 | "state": {
232 | "_model_module": "@jupyter-widgets/base",
233 | "_model_module_version": "1.2.0",
234 | "_model_name": "LayoutModel",
235 | "_view_count": null,
236 | "_view_module": "@jupyter-widgets/base",
237 | "_view_module_version": "1.2.0",
238 | "_view_name": "LayoutView",
239 | "align_content": null,
240 | "align_items": null,
241 | "align_self": null,
242 | "border": null,
243 | "bottom": null,
244 | "display": null,
245 | "flex": null,
246 | "flex_flow": null,
247 | "grid_area": null,
248 | "grid_auto_columns": null,
249 | "grid_auto_flow": null,
250 | "grid_auto_rows": null,
251 | "grid_column": null,
252 | "grid_gap": null,
253 | "grid_row": null,
254 | "grid_template_areas": null,
255 | "grid_template_columns": null,
256 | "grid_template_rows": null,
257 | "height": null,
258 | "justify_content": null,
259 | "justify_items": null,
260 | "left": null,
261 | "margin": null,
262 | "max_height": null,
263 | "max_width": null,
264 | "min_height": null,
265 | "min_width": null,
266 | "object_fit": null,
267 | "object_position": null,
268 | "order": null,
269 | "overflow": null,
270 | "overflow_x": null,
271 | "overflow_y": null,
272 | "padding": null,
273 | "right": null,
274 | "top": null,
275 | "visibility": null,
276 | "width": null
277 | }
278 | },
279 | "72364dfb4d994115bfd049dae5f53423": {
280 | "model_module": "@jupyter-widgets/controls",
281 | "model_name": "ProgressStyleModel",
282 | "model_module_version": "1.5.0",
283 | "state": {
284 | "_model_module": "@jupyter-widgets/controls",
285 | "_model_module_version": "1.5.0",
286 | "_model_name": "ProgressStyleModel",
287 | "_view_count": null,
288 | "_view_module": "@jupyter-widgets/base",
289 | "_view_module_version": "1.2.0",
290 | "_view_name": "StyleView",
291 | "bar_color": null,
292 | "description_width": ""
293 | }
294 | },
295 | "fcbe52122eeb466d91b44011f7f9bc47": {
296 | "model_module": "@jupyter-widgets/base",
297 | "model_name": "LayoutModel",
298 | "model_module_version": "1.2.0",
299 | "state": {
300 | "_model_module": "@jupyter-widgets/base",
301 | "_model_module_version": "1.2.0",
302 | "_model_name": "LayoutModel",
303 | "_view_count": null,
304 | "_view_module": "@jupyter-widgets/base",
305 | "_view_module_version": "1.2.0",
306 | "_view_name": "LayoutView",
307 | "align_content": null,
308 | "align_items": null,
309 | "align_self": null,
310 | "border": null,
311 | "bottom": null,
312 | "display": null,
313 | "flex": null,
314 | "flex_flow": null,
315 | "grid_area": null,
316 | "grid_auto_columns": null,
317 | "grid_auto_flow": null,
318 | "grid_auto_rows": null,
319 | "grid_column": null,
320 | "grid_gap": null,
321 | "grid_row": null,
322 | "grid_template_areas": null,
323 | "grid_template_columns": null,
324 | "grid_template_rows": null,
325 | "height": null,
326 | "justify_content": null,
327 | "justify_items": null,
328 | "left": null,
329 | "margin": null,
330 | "max_height": null,
331 | "max_width": null,
332 | "min_height": null,
333 | "min_width": null,
334 | "object_fit": null,
335 | "object_position": null,
336 | "order": null,
337 | "overflow": null,
338 | "overflow_x": null,
339 | "overflow_y": null,
340 | "padding": null,
341 | "right": null,
342 | "top": null,
343 | "visibility": null,
344 | "width": null
345 | }
346 | },
347 | "c0915cfb93634ede86568eface4115d8": {
348 | "model_module": "@jupyter-widgets/controls",
349 | "model_name": "DescriptionStyleModel",
350 | "model_module_version": "1.5.0",
351 | "state": {
352 | "_model_module": "@jupyter-widgets/controls",
353 | "_model_module_version": "1.5.0",
354 | "_model_name": "DescriptionStyleModel",
355 | "_view_count": null,
356 | "_view_module": "@jupyter-widgets/base",
357 | "_view_module_version": "1.2.0",
358 | "_view_name": "StyleView",
359 | "description_width": ""
360 | }
361 | },
362 | "401cecca00fb42b29f1ec3fd5cfa4396": {
363 | "model_module": "@jupyter-widgets/controls",
364 | "model_name": "HBoxModel",
365 | "model_module_version": "1.5.0",
366 | "state": {
367 | "_dom_classes": [],
368 | "_model_module": "@jupyter-widgets/controls",
369 | "_model_module_version": "1.5.0",
370 | "_model_name": "HBoxModel",
371 | "_view_count": null,
372 | "_view_module": "@jupyter-widgets/controls",
373 | "_view_module_version": "1.5.0",
374 | "_view_name": "HBoxView",
375 | "box_style": "",
376 | "children": [
377 | "IPY_MODEL_e98c3904a7e346a5870c5ac768cd6a98",
378 | "IPY_MODEL_d908df5a5a8945dc88f8d0f147245bbd",
379 | "IPY_MODEL_f1a29b9608244a9db3cc919ad149ef48"
380 | ],
381 | "layout": "IPY_MODEL_d19c6905802d46c4beb6fe8886cb6e8c"
382 | }
383 | },
384 | "e98c3904a7e346a5870c5ac768cd6a98": {
385 | "model_module": "@jupyter-widgets/controls",
386 | "model_name": "HTMLModel",
387 | "model_module_version": "1.5.0",
388 | "state": {
389 | "_dom_classes": [],
390 | "_model_module": "@jupyter-widgets/controls",
391 | "_model_module_version": "1.5.0",
392 | "_model_name": "HTMLModel",
393 | "_view_count": null,
394 | "_view_module": "@jupyter-widgets/controls",
395 | "_view_module_version": "1.5.0",
396 | "_view_name": "HTMLView",
397 | "description": "",
398 | "description_tooltip": null,
399 | "layout": "IPY_MODEL_f7d7bc20d2ba40eeb576e8865cdbb8ec",
400 | "placeholder": "",
401 | "style": "IPY_MODEL_1a431c4a814941169c8feff1b4741052",
402 | "value": "Downloading shards: 100%"
403 | }
404 | },
405 | "d908df5a5a8945dc88f8d0f147245bbd": {
406 | "model_module": "@jupyter-widgets/controls",
407 | "model_name": "FloatProgressModel",
408 | "model_module_version": "1.5.0",
409 | "state": {
410 | "_dom_classes": [],
411 | "_model_module": "@jupyter-widgets/controls",
412 | "_model_module_version": "1.5.0",
413 | "_model_name": "FloatProgressModel",
414 | "_view_count": null,
415 | "_view_module": "@jupyter-widgets/controls",
416 | "_view_module_version": "1.5.0",
417 | "_view_name": "ProgressView",
418 | "bar_style": "success",
419 | "description": "",
420 | "description_tooltip": null,
421 | "layout": "IPY_MODEL_21a66f11f21e4913a1a5a975727916f0",
422 | "max": 2,
423 | "min": 0,
424 | "orientation": "horizontal",
425 | "style": "IPY_MODEL_b931a5080c154b7dbbdcebf1a48aa9a3",
426 | "value": 2
427 | }
428 | },
429 | "f1a29b9608244a9db3cc919ad149ef48": {
430 | "model_module": "@jupyter-widgets/controls",
431 | "model_name": "HTMLModel",
432 | "model_module_version": "1.5.0",
433 | "state": {
434 | "_dom_classes": [],
435 | "_model_module": "@jupyter-widgets/controls",
436 | "_model_module_version": "1.5.0",
437 | "_model_name": "HTMLModel",
438 | "_view_count": null,
439 | "_view_module": "@jupyter-widgets/controls",
440 | "_view_module_version": "1.5.0",
441 | "_view_name": "HTMLView",
442 | "description": "",
443 | "description_tooltip": null,
444 | "layout": "IPY_MODEL_6cc6f7e129fc47c9ac57d38f713c50ea",
445 | "placeholder": "",
446 | "style": "IPY_MODEL_896598f7441c4e84b4c7963b520d6daf",
447 | "value": " 2/2 [00:00<00:00, 7.70it/s]"
448 | }
449 | },
450 | "d19c6905802d46c4beb6fe8886cb6e8c": {
451 | "model_module": "@jupyter-widgets/base",
452 | "model_name": "LayoutModel",
453 | "model_module_version": "1.2.0",
454 | "state": {
455 | "_model_module": "@jupyter-widgets/base",
456 | "_model_module_version": "1.2.0",
457 | "_model_name": "LayoutModel",
458 | "_view_count": null,
459 | "_view_module": "@jupyter-widgets/base",
460 | "_view_module_version": "1.2.0",
461 | "_view_name": "LayoutView",
462 | "align_content": null,
463 | "align_items": null,
464 | "align_self": null,
465 | "border": null,
466 | "bottom": null,
467 | "display": null,
468 | "flex": null,
469 | "flex_flow": null,
470 | "grid_area": null,
471 | "grid_auto_columns": null,
472 | "grid_auto_flow": null,
473 | "grid_auto_rows": null,
474 | "grid_column": null,
475 | "grid_gap": null,
476 | "grid_row": null,
477 | "grid_template_areas": null,
478 | "grid_template_columns": null,
479 | "grid_template_rows": null,
480 | "height": null,
481 | "justify_content": null,
482 | "justify_items": null,
483 | "left": null,
484 | "margin": null,
485 | "max_height": null,
486 | "max_width": null,
487 | "min_height": null,
488 | "min_width": null,
489 | "object_fit": null,
490 | "object_position": null,
491 | "order": null,
492 | "overflow": null,
493 | "overflow_x": null,
494 | "overflow_y": null,
495 | "padding": null,
496 | "right": null,
497 | "top": null,
498 | "visibility": null,
499 | "width": null
500 | }
501 | },
502 | "f7d7bc20d2ba40eeb576e8865cdbb8ec": {
503 | "model_module": "@jupyter-widgets/base",
504 | "model_name": "LayoutModel",
505 | "model_module_version": "1.2.0",
506 | "state": {
507 | "_model_module": "@jupyter-widgets/base",
508 | "_model_module_version": "1.2.0",
509 | "_model_name": "LayoutModel",
510 | "_view_count": null,
511 | "_view_module": "@jupyter-widgets/base",
512 | "_view_module_version": "1.2.0",
513 | "_view_name": "LayoutView",
514 | "align_content": null,
515 | "align_items": null,
516 | "align_self": null,
517 | "border": null,
518 | "bottom": null,
519 | "display": null,
520 | "flex": null,
521 | "flex_flow": null,
522 | "grid_area": null,
523 | "grid_auto_columns": null,
524 | "grid_auto_flow": null,
525 | "grid_auto_rows": null,
526 | "grid_column": null,
527 | "grid_gap": null,
528 | "grid_row": null,
529 | "grid_template_areas": null,
530 | "grid_template_columns": null,
531 | "grid_template_rows": null,
532 | "height": null,
533 | "justify_content": null,
534 | "justify_items": null,
535 | "left": null,
536 | "margin": null,
537 | "max_height": null,
538 | "max_width": null,
539 | "min_height": null,
540 | "min_width": null,
541 | "object_fit": null,
542 | "object_position": null,
543 | "order": null,
544 | "overflow": null,
545 | "overflow_x": null,
546 | "overflow_y": null,
547 | "padding": null,
548 | "right": null,
549 | "top": null,
550 | "visibility": null,
551 | "width": null
552 | }
553 | },
554 | "1a431c4a814941169c8feff1b4741052": {
555 | "model_module": "@jupyter-widgets/controls",
556 | "model_name": "DescriptionStyleModel",
557 | "model_module_version": "1.5.0",
558 | "state": {
559 | "_model_module": "@jupyter-widgets/controls",
560 | "_model_module_version": "1.5.0",
561 | "_model_name": "DescriptionStyleModel",
562 | "_view_count": null,
563 | "_view_module": "@jupyter-widgets/base",
564 | "_view_module_version": "1.2.0",
565 | "_view_name": "StyleView",
566 | "description_width": ""
567 | }
568 | },
569 | "21a66f11f21e4913a1a5a975727916f0": {
570 | "model_module": "@jupyter-widgets/base",
571 | "model_name": "LayoutModel",
572 | "model_module_version": "1.2.0",
573 | "state": {
574 | "_model_module": "@jupyter-widgets/base",
575 | "_model_module_version": "1.2.0",
576 | "_model_name": "LayoutModel",
577 | "_view_count": null,
578 | "_view_module": "@jupyter-widgets/base",
579 | "_view_module_version": "1.2.0",
580 | "_view_name": "LayoutView",
581 | "align_content": null,
582 | "align_items": null,
583 | "align_self": null,
584 | "border": null,
585 | "bottom": null,
586 | "display": null,
587 | "flex": null,
588 | "flex_flow": null,
589 | "grid_area": null,
590 | "grid_auto_columns": null,
591 | "grid_auto_flow": null,
592 | "grid_auto_rows": null,
593 | "grid_column": null,
594 | "grid_gap": null,
595 | "grid_row": null,
596 | "grid_template_areas": null,
597 | "grid_template_columns": null,
598 | "grid_template_rows": null,
599 | "height": null,
600 | "justify_content": null,
601 | "justify_items": null,
602 | "left": null,
603 | "margin": null,
604 | "max_height": null,
605 | "max_width": null,
606 | "min_height": null,
607 | "min_width": null,
608 | "object_fit": null,
609 | "object_position": null,
610 | "order": null,
611 | "overflow": null,
612 | "overflow_x": null,
613 | "overflow_y": null,
614 | "padding": null,
615 | "right": null,
616 | "top": null,
617 | "visibility": null,
618 | "width": null
619 | }
620 | },
621 | "b931a5080c154b7dbbdcebf1a48aa9a3": {
622 | "model_module": "@jupyter-widgets/controls",
623 | "model_name": "ProgressStyleModel",
624 | "model_module_version": "1.5.0",
625 | "state": {
626 | "_model_module": "@jupyter-widgets/controls",
627 | "_model_module_version": "1.5.0",
628 | "_model_name": "ProgressStyleModel",
629 | "_view_count": null,
630 | "_view_module": "@jupyter-widgets/base",
631 | "_view_module_version": "1.2.0",
632 | "_view_name": "StyleView",
633 | "bar_color": null,
634 | "description_width": ""
635 | }
636 | },
637 | "6cc6f7e129fc47c9ac57d38f713c50ea": {
638 | "model_module": "@jupyter-widgets/base",
639 | "model_name": "LayoutModel",
640 | "model_module_version": "1.2.0",
641 | "state": {
642 | "_model_module": "@jupyter-widgets/base",
643 | "_model_module_version": "1.2.0",
644 | "_model_name": "LayoutModel",
645 | "_view_count": null,
646 | "_view_module": "@jupyter-widgets/base",
647 | "_view_module_version": "1.2.0",
648 | "_view_name": "LayoutView",
649 | "align_content": null,
650 | "align_items": null,
651 | "align_self": null,
652 | "border": null,
653 | "bottom": null,
654 | "display": null,
655 | "flex": null,
656 | "flex_flow": null,
657 | "grid_area": null,
658 | "grid_auto_columns": null,
659 | "grid_auto_flow": null,
660 | "grid_auto_rows": null,
661 | "grid_column": null,
662 | "grid_gap": null,
663 | "grid_row": null,
664 | "grid_template_areas": null,
665 | "grid_template_columns": null,
666 | "grid_template_rows": null,
667 | "height": null,
668 | "justify_content": null,
669 | "justify_items": null,
670 | "left": null,
671 | "margin": null,
672 | "max_height": null,
673 | "max_width": null,
674 | "min_height": null,
675 | "min_width": null,
676 | "object_fit": null,
677 | "object_position": null,
678 | "order": null,
679 | "overflow": null,
680 | "overflow_x": null,
681 | "overflow_y": null,
682 | "padding": null,
683 | "right": null,
684 | "top": null,
685 | "visibility": null,
686 | "width": null
687 | }
688 | },
689 | "896598f7441c4e84b4c7963b520d6daf": {
690 | "model_module": "@jupyter-widgets/controls",
691 | "model_name": "DescriptionStyleModel",
692 | "model_module_version": "1.5.0",
693 | "state": {
694 | "_model_module": "@jupyter-widgets/controls",
695 | "_model_module_version": "1.5.0",
696 | "_model_name": "DescriptionStyleModel",
697 | "_view_count": null,
698 | "_view_module": "@jupyter-widgets/base",
699 | "_view_module_version": "1.2.0",
700 | "_view_name": "StyleView",
701 | "description_width": ""
702 | }
703 | },
704 | "b391b63b204848009b051b9c9a5062a3": {
705 | "model_module": "@jupyter-widgets/controls",
706 | "model_name": "HBoxModel",
707 | "model_module_version": "1.5.0",
708 | "state": {
709 | "_dom_classes": [],
710 | "_model_module": "@jupyter-widgets/controls",
711 | "_model_module_version": "1.5.0",
712 | "_model_name": "HBoxModel",
713 | "_view_count": null,
714 | "_view_module": "@jupyter-widgets/controls",
715 | "_view_module_version": "1.5.0",
716 | "_view_name": "HBoxView",
717 | "box_style": "",
718 | "children": [
719 | "IPY_MODEL_0eee8063610d46139d7576ef02ddc228",
720 | "IPY_MODEL_5d3bd17d0aa44d84a91d3ac8255dc296",
721 | "IPY_MODEL_84981495b59f46009bde2cdbec478a5f"
722 | ],
723 | "layout": "IPY_MODEL_f51d9c821e3b4f558ad76706f99d76a6"
724 | }
725 | },
726 | "0eee8063610d46139d7576ef02ddc228": {
727 | "model_module": "@jupyter-widgets/controls",
728 | "model_name": "HTMLModel",
729 | "model_module_version": "1.5.0",
730 | "state": {
731 | "_dom_classes": [],
732 | "_model_module": "@jupyter-widgets/controls",
733 | "_model_module_version": "1.5.0",
734 | "_model_name": "HTMLModel",
735 | "_view_count": null,
736 | "_view_module": "@jupyter-widgets/controls",
737 | "_view_module_version": "1.5.0",
738 | "_view_name": "HTMLView",
739 | "description": "",
740 | "description_tooltip": null,
741 | "layout": "IPY_MODEL_07e7af89197e489b877e309189e6ea53",
742 | "placeholder": "",
743 | "style": "IPY_MODEL_66e1d6e6e45146a7a56d3e935f56ad51",
744 | "value": "Loading checkpoint shards: 100%"
745 | }
746 | },
747 | "5d3bd17d0aa44d84a91d3ac8255dc296": {
748 | "model_module": "@jupyter-widgets/controls",
749 | "model_name": "FloatProgressModel",
750 | "model_module_version": "1.5.0",
751 | "state": {
752 | "_dom_classes": [],
753 | "_model_module": "@jupyter-widgets/controls",
754 | "_model_module_version": "1.5.0",
755 | "_model_name": "FloatProgressModel",
756 | "_view_count": null,
757 | "_view_module": "@jupyter-widgets/controls",
758 | "_view_module_version": "1.5.0",
759 | "_view_name": "ProgressView",
760 | "bar_style": "success",
761 | "description": "",
762 | "description_tooltip": null,
763 | "layout": "IPY_MODEL_85b6c5d6fdc745d4a533004de3c97408",
764 | "max": 2,
765 | "min": 0,
766 | "orientation": "horizontal",
767 | "style": "IPY_MODEL_9f5ddcf6583246af9ec1ebe7f23446d6",
768 | "value": 2
769 | }
770 | },
771 | "84981495b59f46009bde2cdbec478a5f": {
772 | "model_module": "@jupyter-widgets/controls",
773 | "model_name": "HTMLModel",
774 | "model_module_version": "1.5.0",
775 | "state": {
776 | "_dom_classes": [],
777 | "_model_module": "@jupyter-widgets/controls",
778 | "_model_module_version": "1.5.0",
779 | "_model_name": "HTMLModel",
780 | "_view_count": null,
781 | "_view_module": "@jupyter-widgets/controls",
782 | "_view_module_version": "1.5.0",
783 | "_view_name": "HTMLView",
784 | "description": "",
785 | "description_tooltip": null,
786 | "layout": "IPY_MODEL_58b61a060918476c82be882ed6d5cc10",
787 | "placeholder": "",
788 | "style": "IPY_MODEL_7a2fce54921c4062a739fb690387f156",
789 | "value": " 2/2 [00:19<00:00, 8.21s/it]"
790 | }
791 | },
792 | "f51d9c821e3b4f558ad76706f99d76a6": {
793 | "model_module": "@jupyter-widgets/base",
794 | "model_name": "LayoutModel",
795 | "model_module_version": "1.2.0",
796 | "state": {
797 | "_model_module": "@jupyter-widgets/base",
798 | "_model_module_version": "1.2.0",
799 | "_model_name": "LayoutModel",
800 | "_view_count": null,
801 | "_view_module": "@jupyter-widgets/base",
802 | "_view_module_version": "1.2.0",
803 | "_view_name": "LayoutView",
804 | "align_content": null,
805 | "align_items": null,
806 | "align_self": null,
807 | "border": null,
808 | "bottom": null,
809 | "display": null,
810 | "flex": null,
811 | "flex_flow": null,
812 | "grid_area": null,
813 | "grid_auto_columns": null,
814 | "grid_auto_flow": null,
815 | "grid_auto_rows": null,
816 | "grid_column": null,
817 | "grid_gap": null,
818 | "grid_row": null,
819 | "grid_template_areas": null,
820 | "grid_template_columns": null,
821 | "grid_template_rows": null,
822 | "height": null,
823 | "justify_content": null,
824 | "justify_items": null,
825 | "left": null,
826 | "margin": null,
827 | "max_height": null,
828 | "max_width": null,
829 | "min_height": null,
830 | "min_width": null,
831 | "object_fit": null,
832 | "object_position": null,
833 | "order": null,
834 | "overflow": null,
835 | "overflow_x": null,
836 | "overflow_y": null,
837 | "padding": null,
838 | "right": null,
839 | "top": null,
840 | "visibility": null,
841 | "width": null
842 | }
843 | },
844 | "07e7af89197e489b877e309189e6ea53": {
845 | "model_module": "@jupyter-widgets/base",
846 | "model_name": "LayoutModel",
847 | "model_module_version": "1.2.0",
848 | "state": {
849 | "_model_module": "@jupyter-widgets/base",
850 | "_model_module_version": "1.2.0",
851 | "_model_name": "LayoutModel",
852 | "_view_count": null,
853 | "_view_module": "@jupyter-widgets/base",
854 | "_view_module_version": "1.2.0",
855 | "_view_name": "LayoutView",
856 | "align_content": null,
857 | "align_items": null,
858 | "align_self": null,
859 | "border": null,
860 | "bottom": null,
861 | "display": null,
862 | "flex": null,
863 | "flex_flow": null,
864 | "grid_area": null,
865 | "grid_auto_columns": null,
866 | "grid_auto_flow": null,
867 | "grid_auto_rows": null,
868 | "grid_column": null,
869 | "grid_gap": null,
870 | "grid_row": null,
871 | "grid_template_areas": null,
872 | "grid_template_columns": null,
873 | "grid_template_rows": null,
874 | "height": null,
875 | "justify_content": null,
876 | "justify_items": null,
877 | "left": null,
878 | "margin": null,
879 | "max_height": null,
880 | "max_width": null,
881 | "min_height": null,
882 | "min_width": null,
883 | "object_fit": null,
884 | "object_position": null,
885 | "order": null,
886 | "overflow": null,
887 | "overflow_x": null,
888 | "overflow_y": null,
889 | "padding": null,
890 | "right": null,
891 | "top": null,
892 | "visibility": null,
893 | "width": null
894 | }
895 | },
896 | "66e1d6e6e45146a7a56d3e935f56ad51": {
897 | "model_module": "@jupyter-widgets/controls",
898 | "model_name": "DescriptionStyleModel",
899 | "model_module_version": "1.5.0",
900 | "state": {
901 | "_model_module": "@jupyter-widgets/controls",
902 | "_model_module_version": "1.5.0",
903 | "_model_name": "DescriptionStyleModel",
904 | "_view_count": null,
905 | "_view_module": "@jupyter-widgets/base",
906 | "_view_module_version": "1.2.0",
907 | "_view_name": "StyleView",
908 | "description_width": ""
909 | }
910 | },
911 | "85b6c5d6fdc745d4a533004de3c97408": {
912 | "model_module": "@jupyter-widgets/base",
913 | "model_name": "LayoutModel",
914 | "model_module_version": "1.2.0",
915 | "state": {
916 | "_model_module": "@jupyter-widgets/base",
917 | "_model_module_version": "1.2.0",
918 | "_model_name": "LayoutModel",
919 | "_view_count": null,
920 | "_view_module": "@jupyter-widgets/base",
921 | "_view_module_version": "1.2.0",
922 | "_view_name": "LayoutView",
923 | "align_content": null,
924 | "align_items": null,
925 | "align_self": null,
926 | "border": null,
927 | "bottom": null,
928 | "display": null,
929 | "flex": null,
930 | "flex_flow": null,
931 | "grid_area": null,
932 | "grid_auto_columns": null,
933 | "grid_auto_flow": null,
934 | "grid_auto_rows": null,
935 | "grid_column": null,
936 | "grid_gap": null,
937 | "grid_row": null,
938 | "grid_template_areas": null,
939 | "grid_template_columns": null,
940 | "grid_template_rows": null,
941 | "height": null,
942 | "justify_content": null,
943 | "justify_items": null,
944 | "left": null,
945 | "margin": null,
946 | "max_height": null,
947 | "max_width": null,
948 | "min_height": null,
949 | "min_width": null,
950 | "object_fit": null,
951 | "object_position": null,
952 | "order": null,
953 | "overflow": null,
954 | "overflow_x": null,
955 | "overflow_y": null,
956 | "padding": null,
957 | "right": null,
958 | "top": null,
959 | "visibility": null,
960 | "width": null
961 | }
962 | },
963 | "9f5ddcf6583246af9ec1ebe7f23446d6": {
964 | "model_module": "@jupyter-widgets/controls",
965 | "model_name": "ProgressStyleModel",
966 | "model_module_version": "1.5.0",
967 | "state": {
968 | "_model_module": "@jupyter-widgets/controls",
969 | "_model_module_version": "1.5.0",
970 | "_model_name": "ProgressStyleModel",
971 | "_view_count": null,
972 | "_view_module": "@jupyter-widgets/base",
973 | "_view_module_version": "1.2.0",
974 | "_view_name": "StyleView",
975 | "bar_color": null,
976 | "description_width": ""
977 | }
978 | },
979 | "58b61a060918476c82be882ed6d5cc10": {
980 | "model_module": "@jupyter-widgets/base",
981 | "model_name": "LayoutModel",
982 | "model_module_version": "1.2.0",
983 | "state": {
984 | "_model_module": "@jupyter-widgets/base",
985 | "_model_module_version": "1.2.0",
986 | "_model_name": "LayoutModel",
987 | "_view_count": null,
988 | "_view_module": "@jupyter-widgets/base",
989 | "_view_module_version": "1.2.0",
990 | "_view_name": "LayoutView",
991 | "align_content": null,
992 | "align_items": null,
993 | "align_self": null,
994 | "border": null,
995 | "bottom": null,
996 | "display": null,
997 | "flex": null,
998 | "flex_flow": null,
999 | "grid_area": null,
1000 | "grid_auto_columns": null,
1001 | "grid_auto_flow": null,
1002 | "grid_auto_rows": null,
1003 | "grid_column": null,
1004 | "grid_gap": null,
1005 | "grid_row": null,
1006 | "grid_template_areas": null,
1007 | "grid_template_columns": null,
1008 | "grid_template_rows": null,
1009 | "height": null,
1010 | "justify_content": null,
1011 | "justify_items": null,
1012 | "left": null,
1013 | "margin": null,
1014 | "max_height": null,
1015 | "max_width": null,
1016 | "min_height": null,
1017 | "min_width": null,
1018 | "object_fit": null,
1019 | "object_position": null,
1020 | "order": null,
1021 | "overflow": null,
1022 | "overflow_x": null,
1023 | "overflow_y": null,
1024 | "padding": null,
1025 | "right": null,
1026 | "top": null,
1027 | "visibility": null,
1028 | "width": null
1029 | }
1030 | },
1031 | "7a2fce54921c4062a739fb690387f156": {
1032 | "model_module": "@jupyter-widgets/controls",
1033 | "model_name": "DescriptionStyleModel",
1034 | "model_module_version": "1.5.0",
1035 | "state": {
1036 | "_model_module": "@jupyter-widgets/controls",
1037 | "_model_module_version": "1.5.0",
1038 | "_model_name": "DescriptionStyleModel",
1039 | "_view_count": null,
1040 | "_view_module": "@jupyter-widgets/base",
1041 | "_view_module_version": "1.2.0",
1042 | "_view_name": "StyleView",
1043 | "description_width": ""
1044 | }
1045 | }
1046 | }
1047 | }
1048 | },
1049 | "cells": [
1050 | {
1051 | "cell_type": "markdown",
1052 | "metadata": {
1053 | "id": "view-in-github",
1054 | "colab_type": "text"
1055 | },
1056 | "source": [
1057 | "
"
1058 | ]
1059 | },
1060 | {
1061 | "cell_type": "markdown",
1062 | "source": [
1063 | "# RecurrentGemma - 2B & 2B-it\n",
1064 | "\n",
1065 | "RecurrentGemma is a family of open language models built on a novel recurrent architecture developed at Google. Both pre-trained (2B) and instruction-tuned (2B-it) versions are available in English.\n",
1066 | "\n",
1067 | "Like Gemma, [RecurrentGemma](https://huggingface.co/google/recurrentgemma-2b-it) models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Because of its novel architecture, RecurrentGemma requires less memory than Gemma and achieves faster inference when generating long sequences."
1068 | ],
1069 | "metadata": {
1070 | "id": "MVkIfH6Cg7Fx"
1071 | }
1072 | },
1073 | {
1074 | "cell_type": "code",
1075 | "execution_count": null,
1076 | "metadata": {
1077 | "colab": {
1078 | "base_uri": "https://localhost:8080/"
1079 | },
1080 | "id": "ahVaTC6rEIVI",
1081 | "outputId": "2036392c-b381-4ca0-80ba-16ba8c87cde3"
1082 | },
1083 | "outputs": [
1084 | {
1085 | "output_type": "stream",
1086 | "name": "stdout",
1087 | "text": [
1088 | "Collecting transformers==4.40.0.dev0\n",
1089 | " Downloading https://huggingface.co/datasets/reach-vb/random-wheels/resolve/main/transformers-4.40.0.dev0-py3-none-any.whl (8.8 MB)\n",
1090 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1091 | "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n",
1092 | "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n",
1093 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n",
1094 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n",
1095 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n",
1096 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n",
1097 | "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n",
1098 | "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n",
1099 | "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n",
1100 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n",
1101 | "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n",
1102 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n",
1103 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n",
1104 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n",
1105 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n",
1106 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n",
1107 | "Installing collected packages: transformers\n",
1108 | " Attempting uninstall: transformers\n",
1109 | " Found existing installation: transformers 4.38.2\n",
1110 | " Uninstalling transformers-4.38.2:\n",
1111 | " Successfully uninstalled transformers-4.38.2\n",
1112 | "Successfully installed transformers-4.40.0.dev0\n"
1113 | ]
1114 | }
1115 | ],
1116 | "source": [
1117 | "!pip install git+https://github.com/huggingface/transformers.git"
1118 | ]
1119 | },
1120 | {
1121 | "cell_type": "markdown",
1122 | "source": [
1123 | "## Load the model checkpoints\n",
1124 | "\n",
1125 | "Make sure to accept the terms and conditions for the model before running the code further here: https://huggingface.co/google/recurrentgemma-2b-it.\n"
1126 | ],
1127 | "metadata": {
1128 | "id": "FZK4T_zHhL9Q"
1129 | }
1130 | },
1131 | {
1132 | "cell_type": "code",
1133 | "source": [
1134 | "import torch\n",
1135 | "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
1136 | "\n",
1137 | "tokenizer = AutoTokenizer.from_pretrained(\"google/recurrentgemma-2b-it\")\n",
1138 | "model = AutoModelForCausalLM.from_pretrained(\"google/recurrentgemma-2b-it\", torch_dtype=torch.float16).to(\"cuda:0\")"
1139 | ],
1140 | "metadata": {
1141 | "colab": {
1142 | "base_uri": "https://localhost:8080/",
1143 | "height": 129,
1144 | "referenced_widgets": [
1145 | "e6f2b94e3bb345859811226dc345a6e3",
1146 | "ce12a18f666740eabe1f71be5152e7d8",
1147 | "76a4282443194092af1039de43c523c6",
1148 | "3775878f2e484e84b6cb0971d2d35a7f",
1149 | "df936531829f4c1580c04719de77059c",
1150 | "ab60c88077984531aa08150c72fa7ab5",
1151 | "ad9901a17d3d4b1b898941d16c16f1cb",
1152 | "f2fbf227d64e451681083a0ca189405c",
1153 | "72364dfb4d994115bfd049dae5f53423",
1154 | "fcbe52122eeb466d91b44011f7f9bc47",
1155 | "c0915cfb93634ede86568eface4115d8",
1156 | "401cecca00fb42b29f1ec3fd5cfa4396",
1157 | "e98c3904a7e346a5870c5ac768cd6a98",
1158 | "d908df5a5a8945dc88f8d0f147245bbd",
1159 | "f1a29b9608244a9db3cc919ad149ef48",
1160 | "d19c6905802d46c4beb6fe8886cb6e8c",
1161 | "f7d7bc20d2ba40eeb576e8865cdbb8ec",
1162 | "1a431c4a814941169c8feff1b4741052",
1163 | "21a66f11f21e4913a1a5a975727916f0",
1164 | "b931a5080c154b7dbbdcebf1a48aa9a3",
1165 | "6cc6f7e129fc47c9ac57d38f713c50ea",
1166 | "896598f7441c4e84b4c7963b520d6daf",
1167 | "b391b63b204848009b051b9c9a5062a3",
1168 | "0eee8063610d46139d7576ef02ddc228",
1169 | "5d3bd17d0aa44d84a91d3ac8255dc296",
1170 | "84981495b59f46009bde2cdbec478a5f",
1171 | "f51d9c821e3b4f558ad76706f99d76a6",
1172 | "07e7af89197e489b877e309189e6ea53",
1173 | "66e1d6e6e45146a7a56d3e935f56ad51",
1174 | "85b6c5d6fdc745d4a533004de3c97408",
1175 | "9f5ddcf6583246af9ec1ebe7f23446d6",
1176 | "58b61a060918476c82be882ed6d5cc10",
1177 | "7a2fce54921c4062a739fb690387f156"
1178 | ]
1179 | },
1180 | "id": "XItA_HZ-EPIR",
1181 | "outputId": "22b1edbc-c6d7-4ad0-b992-0f59682a30ce"
1182 | },
1183 | "execution_count": null,
1184 | "outputs": [
1185 | {
1186 | "output_type": "display_data",
1187 | "data": {
1188 | "text/plain": [
1189 | "tokenizer_config.json: 0%| | 0.00/40.5k [00:00, ?B/s]"
1190 | ],
1191 | "application/vnd.jupyter.widget-view+json": {
1192 | "version_major": 2,
1193 | "version_minor": 0,
1194 | "model_id": "e6f2b94e3bb345859811226dc345a6e3"
1195 | }
1196 | },
1197 | "metadata": {}
1198 | },
1199 | {
1200 | "output_type": "display_data",
1201 | "data": {
1202 | "text/plain": [
1203 | "Downloading shards: 0%| | 0/2 [00:00, ?it/s]"
1204 | ],
1205 | "application/vnd.jupyter.widget-view+json": {
1206 | "version_major": 2,
1207 | "version_minor": 0,
1208 | "model_id": "401cecca00fb42b29f1ec3fd5cfa4396"
1209 | }
1210 | },
1211 | "metadata": {}
1212 | },
1213 | {
1214 | "output_type": "display_data",
1215 | "data": {
1216 | "text/plain": [
1217 | "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
1218 | ],
1219 | "application/vnd.jupyter.widget-view+json": {
1220 | "version_major": 2,
1221 | "version_minor": 0,
1222 | "model_id": "b391b63b204848009b051b9c9a5062a3"
1223 | }
1224 | },
1225 | "metadata": {}
1226 | }
1227 | ]
1228 | },
1229 | {
1230 | "cell_type": "markdown",
1231 | "source": [
1232 | "## Prepare our input text with chat template.\n",
1233 | "\n",
1234 | "The instruction-tuned models use a chat template that must be adhered to for conversational use. The easiest way to apply it is using the tokenizer's built-in chat template, as shown in the following snippet.\n",
1235 | "\n",
1236 | "Let's load the model and apply the chat template to a conversation. In this example, we'll start with a single user interaction:"
1237 | ],
1238 | "metadata": {
1239 | "id": "u3hoYG18hmHS"
1240 | }
1241 | },
1242 | {
1243 | "cell_type": "code",
1244 | "source": [
1245 | "chat = [\n",
1246 | " { \"role\": \"user\", \"content\": \"Write a hello world program\" },\n",
1247 | "]\n",
1248 | "prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)"
1249 | ],
1250 | "metadata": {
1251 | "id": "IrVmrmVpkgN3"
1252 | },
1253 | "execution_count": null,
1254 | "outputs": []
1255 | },
1256 | {
1257 | "cell_type": "markdown",
1258 | "source": [
1259 | "## Tokenize the inputs"
1260 | ],
1261 | "metadata": {
1262 | "id": "fDAnnNAYsghB"
1263 | }
1264 | },
1265 | {
1266 | "cell_type": "code",
1267 | "source": [
1268 | "inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors=\"pt\").to(model.device)"
1269 | ],
1270 | "metadata": {
1271 | "id": "dlokCV7isgEI"
1272 | },
1273 | "execution_count": null,
1274 | "outputs": []
1275 | },
1276 | {
1277 | "cell_type": "markdown",
1278 | "source": [
1279 | "## Pass the input through the model and generate."
1280 | ],
1281 | "metadata": {
1282 | "id": "pBXeqctLhuGy"
1283 | }
1284 | },
1285 | {
1286 | "cell_type": "code",
1287 | "source": [
1288 | "outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)\n",
1289 | "print(tokenizer.batch_decode(outputs, skip_special_tokens=True))"
1290 | ],
1291 | "metadata": {
1292 | "colab": {
1293 | "base_uri": "https://localhost:8080/"
1294 | },
1295 | "id": "odmrX7pQeRo6",
1296 | "outputId": "3d2f4189-0f7e-4536-eee1-e5265915c657"
1297 | },
1298 | "execution_count": null,
1299 | "outputs": [
1300 | {
1301 | "output_type": "stream",
1302 | "name": "stdout",
1303 | "text": [
1304 | "['user\\nWrite a hello world program\\nmodel\\n```python\\nprint(\"Hello, world!\")\\n```\\n\\nThis program will print the message \"Hello, world!\" to the console.\\n\\n**Explanation:**\\n\\n* `print()` is a built-in Python function that prints the given argument to the console.\\n* `\"Hello, world!\"` is the string that will be printed.\\n\\n**Output:**\\n\\n```\\nHello, world!\\n```']\n"
1305 | ]
1306 | }
1307 | ]
1308 | },
1309 | {
1310 | "cell_type": "markdown",
1311 | "source": [
1312 | "Enjoy! There's much more you can do to maximise the output of your generation. Check out this guide: https://huggingface.co/docs/transformers/generation_strategies"
1313 | ],
1314 | "metadata": {
1315 | "id": "rkpXJ5sHwmMH"
1316 | }
1317 | }
1318 | ]
1319 | }
--------------------------------------------------------------------------------
/llama3_2_3b_colab.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "gpuType": "T4",
8 | "authorship_tag": "ABX9TyOes6wtI/0rnFu0CghstjWU",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | },
18 | "accelerator": "GPU",
19 | "widgets": {
20 | "application/vnd.jupyter.widget-state+json": {
21 | "0b10add730774dcbb03cf0834fd0b724": {
22 | "model_module": "@jupyter-widgets/controls",
23 | "model_name": "HBoxModel",
24 | "model_module_version": "1.5.0",
25 | "state": {
26 | "_dom_classes": [],
27 | "_model_module": "@jupyter-widgets/controls",
28 | "_model_module_version": "1.5.0",
29 | "_model_name": "HBoxModel",
30 | "_view_count": null,
31 | "_view_module": "@jupyter-widgets/controls",
32 | "_view_module_version": "1.5.0",
33 | "_view_name": "HBoxView",
34 | "box_style": "",
35 | "children": [
36 | "IPY_MODEL_a74dd475cb0848c0bf343bb772ddac6a",
37 | "IPY_MODEL_3f3bf676a4dc4673a1acca0ef4616033",
38 | "IPY_MODEL_f7204a8184db44cb8bbb3d3e12478a0f"
39 | ],
40 | "layout": "IPY_MODEL_e1048e107534477897425885d08094a9"
41 | }
42 | },
43 | "a74dd475cb0848c0bf343bb772ddac6a": {
44 | "model_module": "@jupyter-widgets/controls",
45 | "model_name": "HTMLModel",
46 | "model_module_version": "1.5.0",
47 | "state": {
48 | "_dom_classes": [],
49 | "_model_module": "@jupyter-widgets/controls",
50 | "_model_module_version": "1.5.0",
51 | "_model_name": "HTMLModel",
52 | "_view_count": null,
53 | "_view_module": "@jupyter-widgets/controls",
54 | "_view_module_version": "1.5.0",
55 | "_view_name": "HTMLView",
56 | "description": "",
57 | "description_tooltip": null,
58 | "layout": "IPY_MODEL_2fe67aae4b854a2c814cca55840b81c2",
59 | "placeholder": "",
60 | "style": "IPY_MODEL_d8ae9d0d1d254353ba82b1574d7fe1dd",
61 | "value": "Downloading shards: 100%"
62 | }
63 | },
64 | "3f3bf676a4dc4673a1acca0ef4616033": {
65 | "model_module": "@jupyter-widgets/controls",
66 | "model_name": "FloatProgressModel",
67 | "model_module_version": "1.5.0",
68 | "state": {
69 | "_dom_classes": [],
70 | "_model_module": "@jupyter-widgets/controls",
71 | "_model_module_version": "1.5.0",
72 | "_model_name": "FloatProgressModel",
73 | "_view_count": null,
74 | "_view_module": "@jupyter-widgets/controls",
75 | "_view_module_version": "1.5.0",
76 | "_view_name": "ProgressView",
77 | "bar_style": "success",
78 | "description": "",
79 | "description_tooltip": null,
80 | "layout": "IPY_MODEL_f7add879d74a48ae9ffc18c8b7694b4a",
81 | "max": 2,
82 | "min": 0,
83 | "orientation": "horizontal",
84 | "style": "IPY_MODEL_11655f24216f46b9a07e4944f70cede1",
85 | "value": 2
86 | }
87 | },
88 | "f7204a8184db44cb8bbb3d3e12478a0f": {
89 | "model_module": "@jupyter-widgets/controls",
90 | "model_name": "HTMLModel",
91 | "model_module_version": "1.5.0",
92 | "state": {
93 | "_dom_classes": [],
94 | "_model_module": "@jupyter-widgets/controls",
95 | "_model_module_version": "1.5.0",
96 | "_model_name": "HTMLModel",
97 | "_view_count": null,
98 | "_view_module": "@jupyter-widgets/controls",
99 | "_view_module_version": "1.5.0",
100 | "_view_name": "HTMLView",
101 | "description": "",
102 | "description_tooltip": null,
103 | "layout": "IPY_MODEL_1d0264c3d2704b6bbe51583c4b4a3b96",
104 | "placeholder": "",
105 | "style": "IPY_MODEL_7ec800ba8b1a4fbeae940b7aab631794",
106 | "value": " 2/2 [01:55<00:00, 58.51s/it]"
107 | }
108 | },
109 | "e1048e107534477897425885d08094a9": {
110 | "model_module": "@jupyter-widgets/base",
111 | "model_name": "LayoutModel",
112 | "model_module_version": "1.2.0",
113 | "state": {
114 | "_model_module": "@jupyter-widgets/base",
115 | "_model_module_version": "1.2.0",
116 | "_model_name": "LayoutModel",
117 | "_view_count": null,
118 | "_view_module": "@jupyter-widgets/base",
119 | "_view_module_version": "1.2.0",
120 | "_view_name": "LayoutView",
121 | "align_content": null,
122 | "align_items": null,
123 | "align_self": null,
124 | "border": null,
125 | "bottom": null,
126 | "display": null,
127 | "flex": null,
128 | "flex_flow": null,
129 | "grid_area": null,
130 | "grid_auto_columns": null,
131 | "grid_auto_flow": null,
132 | "grid_auto_rows": null,
133 | "grid_column": null,
134 | "grid_gap": null,
135 | "grid_row": null,
136 | "grid_template_areas": null,
137 | "grid_template_columns": null,
138 | "grid_template_rows": null,
139 | "height": null,
140 | "justify_content": null,
141 | "justify_items": null,
142 | "left": null,
143 | "margin": null,
144 | "max_height": null,
145 | "max_width": null,
146 | "min_height": null,
147 | "min_width": null,
148 | "object_fit": null,
149 | "object_position": null,
150 | "order": null,
151 | "overflow": null,
152 | "overflow_x": null,
153 | "overflow_y": null,
154 | "padding": null,
155 | "right": null,
156 | "top": null,
157 | "visibility": null,
158 | "width": null
159 | }
160 | },
161 | "2fe67aae4b854a2c814cca55840b81c2": {
162 | "model_module": "@jupyter-widgets/base",
163 | "model_name": "LayoutModel",
164 | "model_module_version": "1.2.0",
165 | "state": {
166 | "_model_module": "@jupyter-widgets/base",
167 | "_model_module_version": "1.2.0",
168 | "_model_name": "LayoutModel",
169 | "_view_count": null,
170 | "_view_module": "@jupyter-widgets/base",
171 | "_view_module_version": "1.2.0",
172 | "_view_name": "LayoutView",
173 | "align_content": null,
174 | "align_items": null,
175 | "align_self": null,
176 | "border": null,
177 | "bottom": null,
178 | "display": null,
179 | "flex": null,
180 | "flex_flow": null,
181 | "grid_area": null,
182 | "grid_auto_columns": null,
183 | "grid_auto_flow": null,
184 | "grid_auto_rows": null,
185 | "grid_column": null,
186 | "grid_gap": null,
187 | "grid_row": null,
188 | "grid_template_areas": null,
189 | "grid_template_columns": null,
190 | "grid_template_rows": null,
191 | "height": null,
192 | "justify_content": null,
193 | "justify_items": null,
194 | "left": null,
195 | "margin": null,
196 | "max_height": null,
197 | "max_width": null,
198 | "min_height": null,
199 | "min_width": null,
200 | "object_fit": null,
201 | "object_position": null,
202 | "order": null,
203 | "overflow": null,
204 | "overflow_x": null,
205 | "overflow_y": null,
206 | "padding": null,
207 | "right": null,
208 | "top": null,
209 | "visibility": null,
210 | "width": null
211 | }
212 | },
213 | "d8ae9d0d1d254353ba82b1574d7fe1dd": {
214 | "model_module": "@jupyter-widgets/controls",
215 | "model_name": "DescriptionStyleModel",
216 | "model_module_version": "1.5.0",
217 | "state": {
218 | "_model_module": "@jupyter-widgets/controls",
219 | "_model_module_version": "1.5.0",
220 | "_model_name": "DescriptionStyleModel",
221 | "_view_count": null,
222 | "_view_module": "@jupyter-widgets/base",
223 | "_view_module_version": "1.2.0",
224 | "_view_name": "StyleView",
225 | "description_width": ""
226 | }
227 | },
228 | "f7add879d74a48ae9ffc18c8b7694b4a": {
229 | "model_module": "@jupyter-widgets/base",
230 | "model_name": "LayoutModel",
231 | "model_module_version": "1.2.0",
232 | "state": {
233 | "_model_module": "@jupyter-widgets/base",
234 | "_model_module_version": "1.2.0",
235 | "_model_name": "LayoutModel",
236 | "_view_count": null,
237 | "_view_module": "@jupyter-widgets/base",
238 | "_view_module_version": "1.2.0",
239 | "_view_name": "LayoutView",
240 | "align_content": null,
241 | "align_items": null,
242 | "align_self": null,
243 | "border": null,
244 | "bottom": null,
245 | "display": null,
246 | "flex": null,
247 | "flex_flow": null,
248 | "grid_area": null,
249 | "grid_auto_columns": null,
250 | "grid_auto_flow": null,
251 | "grid_auto_rows": null,
252 | "grid_column": null,
253 | "grid_gap": null,
254 | "grid_row": null,
255 | "grid_template_areas": null,
256 | "grid_template_columns": null,
257 | "grid_template_rows": null,
258 | "height": null,
259 | "justify_content": null,
260 | "justify_items": null,
261 | "left": null,
262 | "margin": null,
263 | "max_height": null,
264 | "max_width": null,
265 | "min_height": null,
266 | "min_width": null,
267 | "object_fit": null,
268 | "object_position": null,
269 | "order": null,
270 | "overflow": null,
271 | "overflow_x": null,
272 | "overflow_y": null,
273 | "padding": null,
274 | "right": null,
275 | "top": null,
276 | "visibility": null,
277 | "width": null
278 | }
279 | },
280 | "11655f24216f46b9a07e4944f70cede1": {
281 | "model_module": "@jupyter-widgets/controls",
282 | "model_name": "ProgressStyleModel",
283 | "model_module_version": "1.5.0",
284 | "state": {
285 | "_model_module": "@jupyter-widgets/controls",
286 | "_model_module_version": "1.5.0",
287 | "_model_name": "ProgressStyleModel",
288 | "_view_count": null,
289 | "_view_module": "@jupyter-widgets/base",
290 | "_view_module_version": "1.2.0",
291 | "_view_name": "StyleView",
292 | "bar_color": null,
293 | "description_width": ""
294 | }
295 | },
296 | "1d0264c3d2704b6bbe51583c4b4a3b96": {
297 | "model_module": "@jupyter-widgets/base",
298 | "model_name": "LayoutModel",
299 | "model_module_version": "1.2.0",
300 | "state": {
301 | "_model_module": "@jupyter-widgets/base",
302 | "_model_module_version": "1.2.0",
303 | "_model_name": "LayoutModel",
304 | "_view_count": null,
305 | "_view_module": "@jupyter-widgets/base",
306 | "_view_module_version": "1.2.0",
307 | "_view_name": "LayoutView",
308 | "align_content": null,
309 | "align_items": null,
310 | "align_self": null,
311 | "border": null,
312 | "bottom": null,
313 | "display": null,
314 | "flex": null,
315 | "flex_flow": null,
316 | "grid_area": null,
317 | "grid_auto_columns": null,
318 | "grid_auto_flow": null,
319 | "grid_auto_rows": null,
320 | "grid_column": null,
321 | "grid_gap": null,
322 | "grid_row": null,
323 | "grid_template_areas": null,
324 | "grid_template_columns": null,
325 | "grid_template_rows": null,
326 | "height": null,
327 | "justify_content": null,
328 | "justify_items": null,
329 | "left": null,
330 | "margin": null,
331 | "max_height": null,
332 | "max_width": null,
333 | "min_height": null,
334 | "min_width": null,
335 | "object_fit": null,
336 | "object_position": null,
337 | "order": null,
338 | "overflow": null,
339 | "overflow_x": null,
340 | "overflow_y": null,
341 | "padding": null,
342 | "right": null,
343 | "top": null,
344 | "visibility": null,
345 | "width": null
346 | }
347 | },
348 | "7ec800ba8b1a4fbeae940b7aab631794": {
349 | "model_module": "@jupyter-widgets/controls",
350 | "model_name": "DescriptionStyleModel",
351 | "model_module_version": "1.5.0",
352 | "state": {
353 | "_model_module": "@jupyter-widgets/controls",
354 | "_model_module_version": "1.5.0",
355 | "_model_name": "DescriptionStyleModel",
356 | "_view_count": null,
357 | "_view_module": "@jupyter-widgets/base",
358 | "_view_module_version": "1.2.0",
359 | "_view_name": "StyleView",
360 | "description_width": ""
361 | }
362 | },
363 | "943c1bafe9ec4618869cfd028a610213": {
364 | "model_module": "@jupyter-widgets/controls",
365 | "model_name": "HBoxModel",
366 | "model_module_version": "1.5.0",
367 | "state": {
368 | "_dom_classes": [],
369 | "_model_module": "@jupyter-widgets/controls",
370 | "_model_module_version": "1.5.0",
371 | "_model_name": "HBoxModel",
372 | "_view_count": null,
373 | "_view_module": "@jupyter-widgets/controls",
374 | "_view_module_version": "1.5.0",
375 | "_view_name": "HBoxView",
376 | "box_style": "",
377 | "children": [
378 | "IPY_MODEL_abf1255b444e4cb9be53d1a556f2ca44",
379 | "IPY_MODEL_c77ab3e2cec54aa7b11fc1957c64389b",
380 | "IPY_MODEL_33da38440919410bae7be13caec4929d"
381 | ],
382 | "layout": "IPY_MODEL_71b9f62cd90e4c898518fe75267e0e8f"
383 | }
384 | },
385 | "abf1255b444e4cb9be53d1a556f2ca44": {
386 | "model_module": "@jupyter-widgets/controls",
387 | "model_name": "HTMLModel",
388 | "model_module_version": "1.5.0",
389 | "state": {
390 | "_dom_classes": [],
391 | "_model_module": "@jupyter-widgets/controls",
392 | "_model_module_version": "1.5.0",
393 | "_model_name": "HTMLModel",
394 | "_view_count": null,
395 | "_view_module": "@jupyter-widgets/controls",
396 | "_view_module_version": "1.5.0",
397 | "_view_name": "HTMLView",
398 | "description": "",
399 | "description_tooltip": null,
400 | "layout": "IPY_MODEL_0ea2047d4cec4026bfa9ae05eb5d4e18",
401 | "placeholder": "",
402 | "style": "IPY_MODEL_05c1d4ab928d480094cf672339a98f29",
403 | "value": "model-00001-of-00002.safetensors: 100%"
404 | }
405 | },
406 | "c77ab3e2cec54aa7b11fc1957c64389b": {
407 | "model_module": "@jupyter-widgets/controls",
408 | "model_name": "FloatProgressModel",
409 | "model_module_version": "1.5.0",
410 | "state": {
411 | "_dom_classes": [],
412 | "_model_module": "@jupyter-widgets/controls",
413 | "_model_module_version": "1.5.0",
414 | "_model_name": "FloatProgressModel",
415 | "_view_count": null,
416 | "_view_module": "@jupyter-widgets/controls",
417 | "_view_module_version": "1.5.0",
418 | "_view_name": "ProgressView",
419 | "bar_style": "success",
420 | "description": "",
421 | "description_tooltip": null,
422 | "layout": "IPY_MODEL_d164fe1776bc413bae36e96fc0371907",
423 | "max": 4965799096,
424 | "min": 0,
425 | "orientation": "horizontal",
426 | "style": "IPY_MODEL_e8f2756358d24df9b887bfc3d5531e83",
427 | "value": 4965799096
428 | }
429 | },
430 | "33da38440919410bae7be13caec4929d": {
431 | "model_module": "@jupyter-widgets/controls",
432 | "model_name": "HTMLModel",
433 | "model_module_version": "1.5.0",
434 | "state": {
435 | "_dom_classes": [],
436 | "_model_module": "@jupyter-widgets/controls",
437 | "_model_module_version": "1.5.0",
438 | "_model_name": "HTMLModel",
439 | "_view_count": null,
440 | "_view_module": "@jupyter-widgets/controls",
441 | "_view_module_version": "1.5.0",
442 | "_view_name": "HTMLView",
443 | "description": "",
444 | "description_tooltip": null,
445 | "layout": "IPY_MODEL_11e5200b4fee425991ceed611bfb560a",
446 | "placeholder": "",
447 | "style": "IPY_MODEL_3a5b9c9609a544858f8cf78b0b72788f",
448 | "value": " 4.97G/4.97G [00:52<00:00, 38.9MB/s]"
449 | }
450 | },
451 | "71b9f62cd90e4c898518fe75267e0e8f": {
452 | "model_module": "@jupyter-widgets/base",
453 | "model_name": "LayoutModel",
454 | "model_module_version": "1.2.0",
455 | "state": {
456 | "_model_module": "@jupyter-widgets/base",
457 | "_model_module_version": "1.2.0",
458 | "_model_name": "LayoutModel",
459 | "_view_count": null,
460 | "_view_module": "@jupyter-widgets/base",
461 | "_view_module_version": "1.2.0",
462 | "_view_name": "LayoutView",
463 | "align_content": null,
464 | "align_items": null,
465 | "align_self": null,
466 | "border": null,
467 | "bottom": null,
468 | "display": null,
469 | "flex": null,
470 | "flex_flow": null,
471 | "grid_area": null,
472 | "grid_auto_columns": null,
473 | "grid_auto_flow": null,
474 | "grid_auto_rows": null,
475 | "grid_column": null,
476 | "grid_gap": null,
477 | "grid_row": null,
478 | "grid_template_areas": null,
479 | "grid_template_columns": null,
480 | "grid_template_rows": null,
481 | "height": null,
482 | "justify_content": null,
483 | "justify_items": null,
484 | "left": null,
485 | "margin": null,
486 | "max_height": null,
487 | "max_width": null,
488 | "min_height": null,
489 | "min_width": null,
490 | "object_fit": null,
491 | "object_position": null,
492 | "order": null,
493 | "overflow": null,
494 | "overflow_x": null,
495 | "overflow_y": null,
496 | "padding": null,
497 | "right": null,
498 | "top": null,
499 | "visibility": null,
500 | "width": null
501 | }
502 | },
503 | "0ea2047d4cec4026bfa9ae05eb5d4e18": {
504 | "model_module": "@jupyter-widgets/base",
505 | "model_name": "LayoutModel",
506 | "model_module_version": "1.2.0",
507 | "state": {
508 | "_model_module": "@jupyter-widgets/base",
509 | "_model_module_version": "1.2.0",
510 | "_model_name": "LayoutModel",
511 | "_view_count": null,
512 | "_view_module": "@jupyter-widgets/base",
513 | "_view_module_version": "1.2.0",
514 | "_view_name": "LayoutView",
515 | "align_content": null,
516 | "align_items": null,
517 | "align_self": null,
518 | "border": null,
519 | "bottom": null,
520 | "display": null,
521 | "flex": null,
522 | "flex_flow": null,
523 | "grid_area": null,
524 | "grid_auto_columns": null,
525 | "grid_auto_flow": null,
526 | "grid_auto_rows": null,
527 | "grid_column": null,
528 | "grid_gap": null,
529 | "grid_row": null,
530 | "grid_template_areas": null,
531 | "grid_template_columns": null,
532 | "grid_template_rows": null,
533 | "height": null,
534 | "justify_content": null,
535 | "justify_items": null,
536 | "left": null,
537 | "margin": null,
538 | "max_height": null,
539 | "max_width": null,
540 | "min_height": null,
541 | "min_width": null,
542 | "object_fit": null,
543 | "object_position": null,
544 | "order": null,
545 | "overflow": null,
546 | "overflow_x": null,
547 | "overflow_y": null,
548 | "padding": null,
549 | "right": null,
550 | "top": null,
551 | "visibility": null,
552 | "width": null
553 | }
554 | },
555 | "05c1d4ab928d480094cf672339a98f29": {
556 | "model_module": "@jupyter-widgets/controls",
557 | "model_name": "DescriptionStyleModel",
558 | "model_module_version": "1.5.0",
559 | "state": {
560 | "_model_module": "@jupyter-widgets/controls",
561 | "_model_module_version": "1.5.0",
562 | "_model_name": "DescriptionStyleModel",
563 | "_view_count": null,
564 | "_view_module": "@jupyter-widgets/base",
565 | "_view_module_version": "1.2.0",
566 | "_view_name": "StyleView",
567 | "description_width": ""
568 | }
569 | },
570 | "d164fe1776bc413bae36e96fc0371907": {
571 | "model_module": "@jupyter-widgets/base",
572 | "model_name": "LayoutModel",
573 | "model_module_version": "1.2.0",
574 | "state": {
575 | "_model_module": "@jupyter-widgets/base",
576 | "_model_module_version": "1.2.0",
577 | "_model_name": "LayoutModel",
578 | "_view_count": null,
579 | "_view_module": "@jupyter-widgets/base",
580 | "_view_module_version": "1.2.0",
581 | "_view_name": "LayoutView",
582 | "align_content": null,
583 | "align_items": null,
584 | "align_self": null,
585 | "border": null,
586 | "bottom": null,
587 | "display": null,
588 | "flex": null,
589 | "flex_flow": null,
590 | "grid_area": null,
591 | "grid_auto_columns": null,
592 | "grid_auto_flow": null,
593 | "grid_auto_rows": null,
594 | "grid_column": null,
595 | "grid_gap": null,
596 | "grid_row": null,
597 | "grid_template_areas": null,
598 | "grid_template_columns": null,
599 | "grid_template_rows": null,
600 | "height": null,
601 | "justify_content": null,
602 | "justify_items": null,
603 | "left": null,
604 | "margin": null,
605 | "max_height": null,
606 | "max_width": null,
607 | "min_height": null,
608 | "min_width": null,
609 | "object_fit": null,
610 | "object_position": null,
611 | "order": null,
612 | "overflow": null,
613 | "overflow_x": null,
614 | "overflow_y": null,
615 | "padding": null,
616 | "right": null,
617 | "top": null,
618 | "visibility": null,
619 | "width": null
620 | }
621 | },
622 | "e8f2756358d24df9b887bfc3d5531e83": {
623 | "model_module": "@jupyter-widgets/controls",
624 | "model_name": "ProgressStyleModel",
625 | "model_module_version": "1.5.0",
626 | "state": {
627 | "_model_module": "@jupyter-widgets/controls",
628 | "_model_module_version": "1.5.0",
629 | "_model_name": "ProgressStyleModel",
630 | "_view_count": null,
631 | "_view_module": "@jupyter-widgets/base",
632 | "_view_module_version": "1.2.0",
633 | "_view_name": "StyleView",
634 | "bar_color": null,
635 | "description_width": ""
636 | }
637 | },
638 | "11e5200b4fee425991ceed611bfb560a": {
639 | "model_module": "@jupyter-widgets/base",
640 | "model_name": "LayoutModel",
641 | "model_module_version": "1.2.0",
642 | "state": {
643 | "_model_module": "@jupyter-widgets/base",
644 | "_model_module_version": "1.2.0",
645 | "_model_name": "LayoutModel",
646 | "_view_count": null,
647 | "_view_module": "@jupyter-widgets/base",
648 | "_view_module_version": "1.2.0",
649 | "_view_name": "LayoutView",
650 | "align_content": null,
651 | "align_items": null,
652 | "align_self": null,
653 | "border": null,
654 | "bottom": null,
655 | "display": null,
656 | "flex": null,
657 | "flex_flow": null,
658 | "grid_area": null,
659 | "grid_auto_columns": null,
660 | "grid_auto_flow": null,
661 | "grid_auto_rows": null,
662 | "grid_column": null,
663 | "grid_gap": null,
664 | "grid_row": null,
665 | "grid_template_areas": null,
666 | "grid_template_columns": null,
667 | "grid_template_rows": null,
668 | "height": null,
669 | "justify_content": null,
670 | "justify_items": null,
671 | "left": null,
672 | "margin": null,
673 | "max_height": null,
674 | "max_width": null,
675 | "min_height": null,
676 | "min_width": null,
677 | "object_fit": null,
678 | "object_position": null,
679 | "order": null,
680 | "overflow": null,
681 | "overflow_x": null,
682 | "overflow_y": null,
683 | "padding": null,
684 | "right": null,
685 | "top": null,
686 | "visibility": null,
687 | "width": null
688 | }
689 | },
690 | "3a5b9c9609a544858f8cf78b0b72788f": {
691 | "model_module": "@jupyter-widgets/controls",
692 | "model_name": "DescriptionStyleModel",
693 | "model_module_version": "1.5.0",
694 | "state": {
695 | "_model_module": "@jupyter-widgets/controls",
696 | "_model_module_version": "1.5.0",
697 | "_model_name": "DescriptionStyleModel",
698 | "_view_count": null,
699 | "_view_module": "@jupyter-widgets/base",
700 | "_view_module_version": "1.2.0",
701 | "_view_name": "StyleView",
702 | "description_width": ""
703 | }
704 | },
705 | "6e6d6dae6bc44498b6774c83685fb0b9": {
706 | "model_module": "@jupyter-widgets/controls",
707 | "model_name": "HBoxModel",
708 | "model_module_version": "1.5.0",
709 | "state": {
710 | "_dom_classes": [],
711 | "_model_module": "@jupyter-widgets/controls",
712 | "_model_module_version": "1.5.0",
713 | "_model_name": "HBoxModel",
714 | "_view_count": null,
715 | "_view_module": "@jupyter-widgets/controls",
716 | "_view_module_version": "1.5.0",
717 | "_view_name": "HBoxView",
718 | "box_style": "",
719 | "children": [
720 | "IPY_MODEL_4156204d442346a79a9f7d5193b02e1e",
721 | "IPY_MODEL_32d71755c0f8460790b88f963ff1e4e6",
722 | "IPY_MODEL_23fba6d9a7044060a9b5a02ee7ba71e5"
723 | ],
724 | "layout": "IPY_MODEL_2da95739a2444ce896fb3ff81c59e6d0"
725 | }
726 | },
727 | "4156204d442346a79a9f7d5193b02e1e": {
728 | "model_module": "@jupyter-widgets/controls",
729 | "model_name": "HTMLModel",
730 | "model_module_version": "1.5.0",
731 | "state": {
732 | "_dom_classes": [],
733 | "_model_module": "@jupyter-widgets/controls",
734 | "_model_module_version": "1.5.0",
735 | "_model_name": "HTMLModel",
736 | "_view_count": null,
737 | "_view_module": "@jupyter-widgets/controls",
738 | "_view_module_version": "1.5.0",
739 | "_view_name": "HTMLView",
740 | "description": "",
741 | "description_tooltip": null,
742 | "layout": "IPY_MODEL_2bdfdc5610ec4dc1acfe22283eb1541e",
743 | "placeholder": "",
744 | "style": "IPY_MODEL_baba10fb45dc42bcbf6321dd59cfaf6e",
745 | "value": "model-00002-of-00002.safetensors: 100%"
746 | }
747 | },
748 | "32d71755c0f8460790b88f963ff1e4e6": {
749 | "model_module": "@jupyter-widgets/controls",
750 | "model_name": "FloatProgressModel",
751 | "model_module_version": "1.5.0",
752 | "state": {
753 | "_dom_classes": [],
754 | "_model_module": "@jupyter-widgets/controls",
755 | "_model_module_version": "1.5.0",
756 | "_model_name": "FloatProgressModel",
757 | "_view_count": null,
758 | "_view_module": "@jupyter-widgets/controls",
759 | "_view_module_version": "1.5.0",
760 | "_view_name": "ProgressView",
761 | "bar_style": "success",
762 | "description": "",
763 | "description_tooltip": null,
764 | "layout": "IPY_MODEL_a1404373c9314d81bf99c41fdc9f11b4",
765 | "max": 1459729952,
766 | "min": 0,
767 | "orientation": "horizontal",
768 | "style": "IPY_MODEL_1eb7881728a04d1ca9e209a17da9bef2",
769 | "value": 1459729952
770 | }
771 | },
772 | "23fba6d9a7044060a9b5a02ee7ba71e5": {
773 | "model_module": "@jupyter-widgets/controls",
774 | "model_name": "HTMLModel",
775 | "model_module_version": "1.5.0",
776 | "state": {
777 | "_dom_classes": [],
778 | "_model_module": "@jupyter-widgets/controls",
779 | "_model_module_version": "1.5.0",
780 | "_model_name": "HTMLModel",
781 | "_view_count": null,
782 | "_view_module": "@jupyter-widgets/controls",
783 | "_view_module_version": "1.5.0",
784 | "_view_name": "HTMLView",
785 | "description": "",
786 | "description_tooltip": null,
787 | "layout": "IPY_MODEL_a472fa2b93f449f0a5c1444d1dacd833",
788 | "placeholder": "",
789 | "style": "IPY_MODEL_1c149ec6a5a640f3bd5a0209840e58a4",
790 | "value": " 1.46G/1.46G [01:01<00:00, 34.6MB/s]"
791 | }
792 | },
793 | "2da95739a2444ce896fb3ff81c59e6d0": {
794 | "model_module": "@jupyter-widgets/base",
795 | "model_name": "LayoutModel",
796 | "model_module_version": "1.2.0",
797 | "state": {
798 | "_model_module": "@jupyter-widgets/base",
799 | "_model_module_version": "1.2.0",
800 | "_model_name": "LayoutModel",
801 | "_view_count": null,
802 | "_view_module": "@jupyter-widgets/base",
803 | "_view_module_version": "1.2.0",
804 | "_view_name": "LayoutView",
805 | "align_content": null,
806 | "align_items": null,
807 | "align_self": null,
808 | "border": null,
809 | "bottom": null,
810 | "display": null,
811 | "flex": null,
812 | "flex_flow": null,
813 | "grid_area": null,
814 | "grid_auto_columns": null,
815 | "grid_auto_flow": null,
816 | "grid_auto_rows": null,
817 | "grid_column": null,
818 | "grid_gap": null,
819 | "grid_row": null,
820 | "grid_template_areas": null,
821 | "grid_template_columns": null,
822 | "grid_template_rows": null,
823 | "height": null,
824 | "justify_content": null,
825 | "justify_items": null,
826 | "left": null,
827 | "margin": null,
828 | "max_height": null,
829 | "max_width": null,
830 | "min_height": null,
831 | "min_width": null,
832 | "object_fit": null,
833 | "object_position": null,
834 | "order": null,
835 | "overflow": null,
836 | "overflow_x": null,
837 | "overflow_y": null,
838 | "padding": null,
839 | "right": null,
840 | "top": null,
841 | "visibility": null,
842 | "width": null
843 | }
844 | },
845 | "2bdfdc5610ec4dc1acfe22283eb1541e": {
846 | "model_module": "@jupyter-widgets/base",
847 | "model_name": "LayoutModel",
848 | "model_module_version": "1.2.0",
849 | "state": {
850 | "_model_module": "@jupyter-widgets/base",
851 | "_model_module_version": "1.2.0",
852 | "_model_name": "LayoutModel",
853 | "_view_count": null,
854 | "_view_module": "@jupyter-widgets/base",
855 | "_view_module_version": "1.2.0",
856 | "_view_name": "LayoutView",
857 | "align_content": null,
858 | "align_items": null,
859 | "align_self": null,
860 | "border": null,
861 | "bottom": null,
862 | "display": null,
863 | "flex": null,
864 | "flex_flow": null,
865 | "grid_area": null,
866 | "grid_auto_columns": null,
867 | "grid_auto_flow": null,
868 | "grid_auto_rows": null,
869 | "grid_column": null,
870 | "grid_gap": null,
871 | "grid_row": null,
872 | "grid_template_areas": null,
873 | "grid_template_columns": null,
874 | "grid_template_rows": null,
875 | "height": null,
876 | "justify_content": null,
877 | "justify_items": null,
878 | "left": null,
879 | "margin": null,
880 | "max_height": null,
881 | "max_width": null,
882 | "min_height": null,
883 | "min_width": null,
884 | "object_fit": null,
885 | "object_position": null,
886 | "order": null,
887 | "overflow": null,
888 | "overflow_x": null,
889 | "overflow_y": null,
890 | "padding": null,
891 | "right": null,
892 | "top": null,
893 | "visibility": null,
894 | "width": null
895 | }
896 | },
897 | "baba10fb45dc42bcbf6321dd59cfaf6e": {
898 | "model_module": "@jupyter-widgets/controls",
899 | "model_name": "DescriptionStyleModel",
900 | "model_module_version": "1.5.0",
901 | "state": {
902 | "_model_module": "@jupyter-widgets/controls",
903 | "_model_module_version": "1.5.0",
904 | "_model_name": "DescriptionStyleModel",
905 | "_view_count": null,
906 | "_view_module": "@jupyter-widgets/base",
907 | "_view_module_version": "1.2.0",
908 | "_view_name": "StyleView",
909 | "description_width": ""
910 | }
911 | },
912 | "a1404373c9314d81bf99c41fdc9f11b4": {
913 | "model_module": "@jupyter-widgets/base",
914 | "model_name": "LayoutModel",
915 | "model_module_version": "1.2.0",
916 | "state": {
917 | "_model_module": "@jupyter-widgets/base",
918 | "_model_module_version": "1.2.0",
919 | "_model_name": "LayoutModel",
920 | "_view_count": null,
921 | "_view_module": "@jupyter-widgets/base",
922 | "_view_module_version": "1.2.0",
923 | "_view_name": "LayoutView",
924 | "align_content": null,
925 | "align_items": null,
926 | "align_self": null,
927 | "border": null,
928 | "bottom": null,
929 | "display": null,
930 | "flex": null,
931 | "flex_flow": null,
932 | "grid_area": null,
933 | "grid_auto_columns": null,
934 | "grid_auto_flow": null,
935 | "grid_auto_rows": null,
936 | "grid_column": null,
937 | "grid_gap": null,
938 | "grid_row": null,
939 | "grid_template_areas": null,
940 | "grid_template_columns": null,
941 | "grid_template_rows": null,
942 | "height": null,
943 | "justify_content": null,
944 | "justify_items": null,
945 | "left": null,
946 | "margin": null,
947 | "max_height": null,
948 | "max_width": null,
949 | "min_height": null,
950 | "min_width": null,
951 | "object_fit": null,
952 | "object_position": null,
953 | "order": null,
954 | "overflow": null,
955 | "overflow_x": null,
956 | "overflow_y": null,
957 | "padding": null,
958 | "right": null,
959 | "top": null,
960 | "visibility": null,
961 | "width": null
962 | }
963 | },
964 | "1eb7881728a04d1ca9e209a17da9bef2": {
965 | "model_module": "@jupyter-widgets/controls",
966 | "model_name": "ProgressStyleModel",
967 | "model_module_version": "1.5.0",
968 | "state": {
969 | "_model_module": "@jupyter-widgets/controls",
970 | "_model_module_version": "1.5.0",
971 | "_model_name": "ProgressStyleModel",
972 | "_view_count": null,
973 | "_view_module": "@jupyter-widgets/base",
974 | "_view_module_version": "1.2.0",
975 | "_view_name": "StyleView",
976 | "bar_color": null,
977 | "description_width": ""
978 | }
979 | },
980 | "a472fa2b93f449f0a5c1444d1dacd833": {
981 | "model_module": "@jupyter-widgets/base",
982 | "model_name": "LayoutModel",
983 | "model_module_version": "1.2.0",
984 | "state": {
985 | "_model_module": "@jupyter-widgets/base",
986 | "_model_module_version": "1.2.0",
987 | "_model_name": "LayoutModel",
988 | "_view_count": null,
989 | "_view_module": "@jupyter-widgets/base",
990 | "_view_module_version": "1.2.0",
991 | "_view_name": "LayoutView",
992 | "align_content": null,
993 | "align_items": null,
994 | "align_self": null,
995 | "border": null,
996 | "bottom": null,
997 | "display": null,
998 | "flex": null,
999 | "flex_flow": null,
1000 | "grid_area": null,
1001 | "grid_auto_columns": null,
1002 | "grid_auto_flow": null,
1003 | "grid_auto_rows": null,
1004 | "grid_column": null,
1005 | "grid_gap": null,
1006 | "grid_row": null,
1007 | "grid_template_areas": null,
1008 | "grid_template_columns": null,
1009 | "grid_template_rows": null,
1010 | "height": null,
1011 | "justify_content": null,
1012 | "justify_items": null,
1013 | "left": null,
1014 | "margin": null,
1015 | "max_height": null,
1016 | "max_width": null,
1017 | "min_height": null,
1018 | "min_width": null,
1019 | "object_fit": null,
1020 | "object_position": null,
1021 | "order": null,
1022 | "overflow": null,
1023 | "overflow_x": null,
1024 | "overflow_y": null,
1025 | "padding": null,
1026 | "right": null,
1027 | "top": null,
1028 | "visibility": null,
1029 | "width": null
1030 | }
1031 | },
1032 | "1c149ec6a5a640f3bd5a0209840e58a4": {
1033 | "model_module": "@jupyter-widgets/controls",
1034 | "model_name": "DescriptionStyleModel",
1035 | "model_module_version": "1.5.0",
1036 | "state": {
1037 | "_model_module": "@jupyter-widgets/controls",
1038 | "_model_module_version": "1.5.0",
1039 | "_model_name": "DescriptionStyleModel",
1040 | "_view_count": null,
1041 | "_view_module": "@jupyter-widgets/base",
1042 | "_view_module_version": "1.2.0",
1043 | "_view_name": "StyleView",
1044 | "description_width": ""
1045 | }
1046 | },
1047 | "8648320acb1f4fed83f6a7822ec43043": {
1048 | "model_module": "@jupyter-widgets/controls",
1049 | "model_name": "HBoxModel",
1050 | "model_module_version": "1.5.0",
1051 | "state": {
1052 | "_dom_classes": [],
1053 | "_model_module": "@jupyter-widgets/controls",
1054 | "_model_module_version": "1.5.0",
1055 | "_model_name": "HBoxModel",
1056 | "_view_count": null,
1057 | "_view_module": "@jupyter-widgets/controls",
1058 | "_view_module_version": "1.5.0",
1059 | "_view_name": "HBoxView",
1060 | "box_style": "",
1061 | "children": [
1062 | "IPY_MODEL_ba14c4a3589f419aaa14b9b8b7bdfa73",
1063 | "IPY_MODEL_07584bcd3cd24debbb1ac2fa59458e2b",
1064 | "IPY_MODEL_61993ca2959f4f39870cea582314b7f8"
1065 | ],
1066 | "layout": "IPY_MODEL_6e1abca235f7485484abcd5c76b7b3d7"
1067 | }
1068 | },
1069 | "ba14c4a3589f419aaa14b9b8b7bdfa73": {
1070 | "model_module": "@jupyter-widgets/controls",
1071 | "model_name": "HTMLModel",
1072 | "model_module_version": "1.5.0",
1073 | "state": {
1074 | "_dom_classes": [],
1075 | "_model_module": "@jupyter-widgets/controls",
1076 | "_model_module_version": "1.5.0",
1077 | "_model_name": "HTMLModel",
1078 | "_view_count": null,
1079 | "_view_module": "@jupyter-widgets/controls",
1080 | "_view_module_version": "1.5.0",
1081 | "_view_name": "HTMLView",
1082 | "description": "",
1083 | "description_tooltip": null,
1084 | "layout": "IPY_MODEL_beb839820dba4f0a8862f04e03e861b0",
1085 | "placeholder": "",
1086 | "style": "IPY_MODEL_e977a6d7ec764683aeb36e30f006d238",
1087 | "value": "Loading checkpoint shards: 100%"
1088 | }
1089 | },
1090 | "07584bcd3cd24debbb1ac2fa59458e2b": {
1091 | "model_module": "@jupyter-widgets/controls",
1092 | "model_name": "FloatProgressModel",
1093 | "model_module_version": "1.5.0",
1094 | "state": {
1095 | "_dom_classes": [],
1096 | "_model_module": "@jupyter-widgets/controls",
1097 | "_model_module_version": "1.5.0",
1098 | "_model_name": "FloatProgressModel",
1099 | "_view_count": null,
1100 | "_view_module": "@jupyter-widgets/controls",
1101 | "_view_module_version": "1.5.0",
1102 | "_view_name": "ProgressView",
1103 | "bar_style": "success",
1104 | "description": "",
1105 | "description_tooltip": null,
1106 | "layout": "IPY_MODEL_138403b96c874e86a36b267b7a6f721e",
1107 | "max": 2,
1108 | "min": 0,
1109 | "orientation": "horizontal",
1110 | "style": "IPY_MODEL_42d5c7b11aba49d593951dbe4a0c9916",
1111 | "value": 2
1112 | }
1113 | },
1114 | "61993ca2959f4f39870cea582314b7f8": {
1115 | "model_module": "@jupyter-widgets/controls",
1116 | "model_name": "HTMLModel",
1117 | "model_module_version": "1.5.0",
1118 | "state": {
1119 | "_dom_classes": [],
1120 | "_model_module": "@jupyter-widgets/controls",
1121 | "_model_module_version": "1.5.0",
1122 | "_model_name": "HTMLModel",
1123 | "_view_count": null,
1124 | "_view_module": "@jupyter-widgets/controls",
1125 | "_view_module_version": "1.5.0",
1126 | "_view_name": "HTMLView",
1127 | "description": "",
1128 | "description_tooltip": null,
1129 | "layout": "IPY_MODEL_93deb96f5e9c4fb78c8a89a7b95dc240",
1130 | "placeholder": "",
1131 | "style": "IPY_MODEL_c412e8d1c59247ddb01ff5e41be21294",
1132 | "value": " 2/2 [00:00<00:00, 3.47it/s]"
1133 | }
1134 | },
1135 | "6e1abca235f7485484abcd5c76b7b3d7": {
1136 | "model_module": "@jupyter-widgets/base",
1137 | "model_name": "LayoutModel",
1138 | "model_module_version": "1.2.0",
1139 | "state": {
1140 | "_model_module": "@jupyter-widgets/base",
1141 | "_model_module_version": "1.2.0",
1142 | "_model_name": "LayoutModel",
1143 | "_view_count": null,
1144 | "_view_module": "@jupyter-widgets/base",
1145 | "_view_module_version": "1.2.0",
1146 | "_view_name": "LayoutView",
1147 | "align_content": null,
1148 | "align_items": null,
1149 | "align_self": null,
1150 | "border": null,
1151 | "bottom": null,
1152 | "display": null,
1153 | "flex": null,
1154 | "flex_flow": null,
1155 | "grid_area": null,
1156 | "grid_auto_columns": null,
1157 | "grid_auto_flow": null,
1158 | "grid_auto_rows": null,
1159 | "grid_column": null,
1160 | "grid_gap": null,
1161 | "grid_row": null,
1162 | "grid_template_areas": null,
1163 | "grid_template_columns": null,
1164 | "grid_template_rows": null,
1165 | "height": null,
1166 | "justify_content": null,
1167 | "justify_items": null,
1168 | "left": null,
1169 | "margin": null,
1170 | "max_height": null,
1171 | "max_width": null,
1172 | "min_height": null,
1173 | "min_width": null,
1174 | "object_fit": null,
1175 | "object_position": null,
1176 | "order": null,
1177 | "overflow": null,
1178 | "overflow_x": null,
1179 | "overflow_y": null,
1180 | "padding": null,
1181 | "right": null,
1182 | "top": null,
1183 | "visibility": null,
1184 | "width": null
1185 | }
1186 | },
1187 | "beb839820dba4f0a8862f04e03e861b0": {
1188 | "model_module": "@jupyter-widgets/base",
1189 | "model_name": "LayoutModel",
1190 | "model_module_version": "1.2.0",
1191 | "state": {
1192 | "_model_module": "@jupyter-widgets/base",
1193 | "_model_module_version": "1.2.0",
1194 | "_model_name": "LayoutModel",
1195 | "_view_count": null,
1196 | "_view_module": "@jupyter-widgets/base",
1197 | "_view_module_version": "1.2.0",
1198 | "_view_name": "LayoutView",
1199 | "align_content": null,
1200 | "align_items": null,
1201 | "align_self": null,
1202 | "border": null,
1203 | "bottom": null,
1204 | "display": null,
1205 | "flex": null,
1206 | "flex_flow": null,
1207 | "grid_area": null,
1208 | "grid_auto_columns": null,
1209 | "grid_auto_flow": null,
1210 | "grid_auto_rows": null,
1211 | "grid_column": null,
1212 | "grid_gap": null,
1213 | "grid_row": null,
1214 | "grid_template_areas": null,
1215 | "grid_template_columns": null,
1216 | "grid_template_rows": null,
1217 | "height": null,
1218 | "justify_content": null,
1219 | "justify_items": null,
1220 | "left": null,
1221 | "margin": null,
1222 | "max_height": null,
1223 | "max_width": null,
1224 | "min_height": null,
1225 | "min_width": null,
1226 | "object_fit": null,
1227 | "object_position": null,
1228 | "order": null,
1229 | "overflow": null,
1230 | "overflow_x": null,
1231 | "overflow_y": null,
1232 | "padding": null,
1233 | "right": null,
1234 | "top": null,
1235 | "visibility": null,
1236 | "width": null
1237 | }
1238 | },
1239 | "e977a6d7ec764683aeb36e30f006d238": {
1240 | "model_module": "@jupyter-widgets/controls",
1241 | "model_name": "DescriptionStyleModel",
1242 | "model_module_version": "1.5.0",
1243 | "state": {
1244 | "_model_module": "@jupyter-widgets/controls",
1245 | "_model_module_version": "1.5.0",
1246 | "_model_name": "DescriptionStyleModel",
1247 | "_view_count": null,
1248 | "_view_module": "@jupyter-widgets/base",
1249 | "_view_module_version": "1.2.0",
1250 | "_view_name": "StyleView",
1251 | "description_width": ""
1252 | }
1253 | },
1254 | "138403b96c874e86a36b267b7a6f721e": {
1255 | "model_module": "@jupyter-widgets/base",
1256 | "model_name": "LayoutModel",
1257 | "model_module_version": "1.2.0",
1258 | "state": {
1259 | "_model_module": "@jupyter-widgets/base",
1260 | "_model_module_version": "1.2.0",
1261 | "_model_name": "LayoutModel",
1262 | "_view_count": null,
1263 | "_view_module": "@jupyter-widgets/base",
1264 | "_view_module_version": "1.2.0",
1265 | "_view_name": "LayoutView",
1266 | "align_content": null,
1267 | "align_items": null,
1268 | "align_self": null,
1269 | "border": null,
1270 | "bottom": null,
1271 | "display": null,
1272 | "flex": null,
1273 | "flex_flow": null,
1274 | "grid_area": null,
1275 | "grid_auto_columns": null,
1276 | "grid_auto_flow": null,
1277 | "grid_auto_rows": null,
1278 | "grid_column": null,
1279 | "grid_gap": null,
1280 | "grid_row": null,
1281 | "grid_template_areas": null,
1282 | "grid_template_columns": null,
1283 | "grid_template_rows": null,
1284 | "height": null,
1285 | "justify_content": null,
1286 | "justify_items": null,
1287 | "left": null,
1288 | "margin": null,
1289 | "max_height": null,
1290 | "max_width": null,
1291 | "min_height": null,
1292 | "min_width": null,
1293 | "object_fit": null,
1294 | "object_position": null,
1295 | "order": null,
1296 | "overflow": null,
1297 | "overflow_x": null,
1298 | "overflow_y": null,
1299 | "padding": null,
1300 | "right": null,
1301 | "top": null,
1302 | "visibility": null,
1303 | "width": null
1304 | }
1305 | },
1306 | "42d5c7b11aba49d593951dbe4a0c9916": {
1307 | "model_module": "@jupyter-widgets/controls",
1308 | "model_name": "ProgressStyleModel",
1309 | "model_module_version": "1.5.0",
1310 | "state": {
1311 | "_model_module": "@jupyter-widgets/controls",
1312 | "_model_module_version": "1.5.0",
1313 | "_model_name": "ProgressStyleModel",
1314 | "_view_count": null,
1315 | "_view_module": "@jupyter-widgets/base",
1316 | "_view_module_version": "1.2.0",
1317 | "_view_name": "StyleView",
1318 | "bar_color": null,
1319 | "description_width": ""
1320 | }
1321 | },
1322 | "93deb96f5e9c4fb78c8a89a7b95dc240": {
1323 | "model_module": "@jupyter-widgets/base",
1324 | "model_name": "LayoutModel",
1325 | "model_module_version": "1.2.0",
1326 | "state": {
1327 | "_model_module": "@jupyter-widgets/base",
1328 | "_model_module_version": "1.2.0",
1329 | "_model_name": "LayoutModel",
1330 | "_view_count": null,
1331 | "_view_module": "@jupyter-widgets/base",
1332 | "_view_module_version": "1.2.0",
1333 | "_view_name": "LayoutView",
1334 | "align_content": null,
1335 | "align_items": null,
1336 | "align_self": null,
1337 | "border": null,
1338 | "bottom": null,
1339 | "display": null,
1340 | "flex": null,
1341 | "flex_flow": null,
1342 | "grid_area": null,
1343 | "grid_auto_columns": null,
1344 | "grid_auto_flow": null,
1345 | "grid_auto_rows": null,
1346 | "grid_column": null,
1347 | "grid_gap": null,
1348 | "grid_row": null,
1349 | "grid_template_areas": null,
1350 | "grid_template_columns": null,
1351 | "grid_template_rows": null,
1352 | "height": null,
1353 | "justify_content": null,
1354 | "justify_items": null,
1355 | "left": null,
1356 | "margin": null,
1357 | "max_height": null,
1358 | "max_width": null,
1359 | "min_height": null,
1360 | "min_width": null,
1361 | "object_fit": null,
1362 | "object_position": null,
1363 | "order": null,
1364 | "overflow": null,
1365 | "overflow_x": null,
1366 | "overflow_y": null,
1367 | "padding": null,
1368 | "right": null,
1369 | "top": null,
1370 | "visibility": null,
1371 | "width": null
1372 | }
1373 | },
1374 | "c412e8d1c59247ddb01ff5e41be21294": {
1375 | "model_module": "@jupyter-widgets/controls",
1376 | "model_name": "DescriptionStyleModel",
1377 | "model_module_version": "1.5.0",
1378 | "state": {
1379 | "_model_module": "@jupyter-widgets/controls",
1380 | "_model_module_version": "1.5.0",
1381 | "_model_name": "DescriptionStyleModel",
1382 | "_view_count": null,
1383 | "_view_module": "@jupyter-widgets/base",
1384 | "_view_module_version": "1.2.0",
1385 | "_view_name": "StyleView",
1386 | "description_width": ""
1387 | }
1388 | },
1389 | "d798703b03304bdcb9de0ed306ca9943": {
1390 | "model_module": "@jupyter-widgets/controls",
1391 | "model_name": "HBoxModel",
1392 | "model_module_version": "1.5.0",
1393 | "state": {
1394 | "_dom_classes": [],
1395 | "_model_module": "@jupyter-widgets/controls",
1396 | "_model_module_version": "1.5.0",
1397 | "_model_name": "HBoxModel",
1398 | "_view_count": null,
1399 | "_view_module": "@jupyter-widgets/controls",
1400 | "_view_module_version": "1.5.0",
1401 | "_view_name": "HBoxView",
1402 | "box_style": "",
1403 | "children": [
1404 | "IPY_MODEL_1de9c5460dbb4e80bb7e5c45b90439a1",
1405 | "IPY_MODEL_e0291b1e01184f7393bbca2d10902a7e",
1406 | "IPY_MODEL_25e1398b6e9a41f19d1bdb645c1cb35c"
1407 | ],
1408 | "layout": "IPY_MODEL_1f51124cf426489aa1d9748f4ec83d0c"
1409 | }
1410 | },
1411 | "1de9c5460dbb4e80bb7e5c45b90439a1": {
1412 | "model_module": "@jupyter-widgets/controls",
1413 | "model_name": "HTMLModel",
1414 | "model_module_version": "1.5.0",
1415 | "state": {
1416 | "_dom_classes": [],
1417 | "_model_module": "@jupyter-widgets/controls",
1418 | "_model_module_version": "1.5.0",
1419 | "_model_name": "HTMLModel",
1420 | "_view_count": null,
1421 | "_view_module": "@jupyter-widgets/controls",
1422 | "_view_module_version": "1.5.0",
1423 | "_view_name": "HTMLView",
1424 | "description": "",
1425 | "description_tooltip": null,
1426 | "layout": "IPY_MODEL_abc003a02bb5418ca3969f5e475808a0",
1427 | "placeholder": "",
1428 | "style": "IPY_MODEL_e47d3b79aef646a1be1da4d61d466927",
1429 | "value": "generation_config.json: 100%"
1430 | }
1431 | },
1432 | "e0291b1e01184f7393bbca2d10902a7e": {
1433 | "model_module": "@jupyter-widgets/controls",
1434 | "model_name": "FloatProgressModel",
1435 | "model_module_version": "1.5.0",
1436 | "state": {
1437 | "_dom_classes": [],
1438 | "_model_module": "@jupyter-widgets/controls",
1439 | "_model_module_version": "1.5.0",
1440 | "_model_name": "FloatProgressModel",
1441 | "_view_count": null,
1442 | "_view_module": "@jupyter-widgets/controls",
1443 | "_view_module_version": "1.5.0",
1444 | "_view_name": "ProgressView",
1445 | "bar_style": "success",
1446 | "description": "",
1447 | "description_tooltip": null,
1448 | "layout": "IPY_MODEL_dae4a3b4986047f8b071095b720d20dc",
1449 | "max": 189,
1450 | "min": 0,
1451 | "orientation": "horizontal",
1452 | "style": "IPY_MODEL_b41721c47ad04ea3856c3177702bee1e",
1453 | "value": 189
1454 | }
1455 | },
1456 | "25e1398b6e9a41f19d1bdb645c1cb35c": {
1457 | "model_module": "@jupyter-widgets/controls",
1458 | "model_name": "HTMLModel",
1459 | "model_module_version": "1.5.0",
1460 | "state": {
1461 | "_dom_classes": [],
1462 | "_model_module": "@jupyter-widgets/controls",
1463 | "_model_module_version": "1.5.0",
1464 | "_model_name": "HTMLModel",
1465 | "_view_count": null,
1466 | "_view_module": "@jupyter-widgets/controls",
1467 | "_view_module_version": "1.5.0",
1468 | "_view_name": "HTMLView",
1469 | "description": "",
1470 | "description_tooltip": null,
1471 | "layout": "IPY_MODEL_2d17eeb690584ef9a98c58f31551e1ae",
1472 | "placeholder": "",
1473 | "style": "IPY_MODEL_117d30dc8c334efba28bbfe9c7713344",
1474 | "value": " 189/189 [00:00<00:00, 14.9kB/s]"
1475 | }
1476 | },
1477 | "1f51124cf426489aa1d9748f4ec83d0c": {
1478 | "model_module": "@jupyter-widgets/base",
1479 | "model_name": "LayoutModel",
1480 | "model_module_version": "1.2.0",
1481 | "state": {
1482 | "_model_module": "@jupyter-widgets/base",
1483 | "_model_module_version": "1.2.0",
1484 | "_model_name": "LayoutModel",
1485 | "_view_count": null,
1486 | "_view_module": "@jupyter-widgets/base",
1487 | "_view_module_version": "1.2.0",
1488 | "_view_name": "LayoutView",
1489 | "align_content": null,
1490 | "align_items": null,
1491 | "align_self": null,
1492 | "border": null,
1493 | "bottom": null,
1494 | "display": null,
1495 | "flex": null,
1496 | "flex_flow": null,
1497 | "grid_area": null,
1498 | "grid_auto_columns": null,
1499 | "grid_auto_flow": null,
1500 | "grid_auto_rows": null,
1501 | "grid_column": null,
1502 | "grid_gap": null,
1503 | "grid_row": null,
1504 | "grid_template_areas": null,
1505 | "grid_template_columns": null,
1506 | "grid_template_rows": null,
1507 | "height": null,
1508 | "justify_content": null,
1509 | "justify_items": null,
1510 | "left": null,
1511 | "margin": null,
1512 | "max_height": null,
1513 | "max_width": null,
1514 | "min_height": null,
1515 | "min_width": null,
1516 | "object_fit": null,
1517 | "object_position": null,
1518 | "order": null,
1519 | "overflow": null,
1520 | "overflow_x": null,
1521 | "overflow_y": null,
1522 | "padding": null,
1523 | "right": null,
1524 | "top": null,
1525 | "visibility": null,
1526 | "width": null
1527 | }
1528 | },
1529 | "abc003a02bb5418ca3969f5e475808a0": {
1530 | "model_module": "@jupyter-widgets/base",
1531 | "model_name": "LayoutModel",
1532 | "model_module_version": "1.2.0",
1533 | "state": {
1534 | "_model_module": "@jupyter-widgets/base",
1535 | "_model_module_version": "1.2.0",
1536 | "_model_name": "LayoutModel",
1537 | "_view_count": null,
1538 | "_view_module": "@jupyter-widgets/base",
1539 | "_view_module_version": "1.2.0",
1540 | "_view_name": "LayoutView",
1541 | "align_content": null,
1542 | "align_items": null,
1543 | "align_self": null,
1544 | "border": null,
1545 | "bottom": null,
1546 | "display": null,
1547 | "flex": null,
1548 | "flex_flow": null,
1549 | "grid_area": null,
1550 | "grid_auto_columns": null,
1551 | "grid_auto_flow": null,
1552 | "grid_auto_rows": null,
1553 | "grid_column": null,
1554 | "grid_gap": null,
1555 | "grid_row": null,
1556 | "grid_template_areas": null,
1557 | "grid_template_columns": null,
1558 | "grid_template_rows": null,
1559 | "height": null,
1560 | "justify_content": null,
1561 | "justify_items": null,
1562 | "left": null,
1563 | "margin": null,
1564 | "max_height": null,
1565 | "max_width": null,
1566 | "min_height": null,
1567 | "min_width": null,
1568 | "object_fit": null,
1569 | "object_position": null,
1570 | "order": null,
1571 | "overflow": null,
1572 | "overflow_x": null,
1573 | "overflow_y": null,
1574 | "padding": null,
1575 | "right": null,
1576 | "top": null,
1577 | "visibility": null,
1578 | "width": null
1579 | }
1580 | },
1581 | "e47d3b79aef646a1be1da4d61d466927": {
1582 | "model_module": "@jupyter-widgets/controls",
1583 | "model_name": "DescriptionStyleModel",
1584 | "model_module_version": "1.5.0",
1585 | "state": {
1586 | "_model_module": "@jupyter-widgets/controls",
1587 | "_model_module_version": "1.5.0",
1588 | "_model_name": "DescriptionStyleModel",
1589 | "_view_count": null,
1590 | "_view_module": "@jupyter-widgets/base",
1591 | "_view_module_version": "1.2.0",
1592 | "_view_name": "StyleView",
1593 | "description_width": ""
1594 | }
1595 | },
1596 | "dae4a3b4986047f8b071095b720d20dc": {
1597 | "model_module": "@jupyter-widgets/base",
1598 | "model_name": "LayoutModel",
1599 | "model_module_version": "1.2.0",
1600 | "state": {
1601 | "_model_module": "@jupyter-widgets/base",
1602 | "_model_module_version": "1.2.0",
1603 | "_model_name": "LayoutModel",
1604 | "_view_count": null,
1605 | "_view_module": "@jupyter-widgets/base",
1606 | "_view_module_version": "1.2.0",
1607 | "_view_name": "LayoutView",
1608 | "align_content": null,
1609 | "align_items": null,
1610 | "align_self": null,
1611 | "border": null,
1612 | "bottom": null,
1613 | "display": null,
1614 | "flex": null,
1615 | "flex_flow": null,
1616 | "grid_area": null,
1617 | "grid_auto_columns": null,
1618 | "grid_auto_flow": null,
1619 | "grid_auto_rows": null,
1620 | "grid_column": null,
1621 | "grid_gap": null,
1622 | "grid_row": null,
1623 | "grid_template_areas": null,
1624 | "grid_template_columns": null,
1625 | "grid_template_rows": null,
1626 | "height": null,
1627 | "justify_content": null,
1628 | "justify_items": null,
1629 | "left": null,
1630 | "margin": null,
1631 | "max_height": null,
1632 | "max_width": null,
1633 | "min_height": null,
1634 | "min_width": null,
1635 | "object_fit": null,
1636 | "object_position": null,
1637 | "order": null,
1638 | "overflow": null,
1639 | "overflow_x": null,
1640 | "overflow_y": null,
1641 | "padding": null,
1642 | "right": null,
1643 | "top": null,
1644 | "visibility": null,
1645 | "width": null
1646 | }
1647 | },
1648 | "b41721c47ad04ea3856c3177702bee1e": {
1649 | "model_module": "@jupyter-widgets/controls",
1650 | "model_name": "ProgressStyleModel",
1651 | "model_module_version": "1.5.0",
1652 | "state": {
1653 | "_model_module": "@jupyter-widgets/controls",
1654 | "_model_module_version": "1.5.0",
1655 | "_model_name": "ProgressStyleModel",
1656 | "_view_count": null,
1657 | "_view_module": "@jupyter-widgets/base",
1658 | "_view_module_version": "1.2.0",
1659 | "_view_name": "StyleView",
1660 | "bar_color": null,
1661 | "description_width": ""
1662 | }
1663 | },
1664 | "2d17eeb690584ef9a98c58f31551e1ae": {
1665 | "model_module": "@jupyter-widgets/base",
1666 | "model_name": "LayoutModel",
1667 | "model_module_version": "1.2.0",
1668 | "state": {
1669 | "_model_module": "@jupyter-widgets/base",
1670 | "_model_module_version": "1.2.0",
1671 | "_model_name": "LayoutModel",
1672 | "_view_count": null,
1673 | "_view_module": "@jupyter-widgets/base",
1674 | "_view_module_version": "1.2.0",
1675 | "_view_name": "LayoutView",
1676 | "align_content": null,
1677 | "align_items": null,
1678 | "align_self": null,
1679 | "border": null,
1680 | "bottom": null,
1681 | "display": null,
1682 | "flex": null,
1683 | "flex_flow": null,
1684 | "grid_area": null,
1685 | "grid_auto_columns": null,
1686 | "grid_auto_flow": null,
1687 | "grid_auto_rows": null,
1688 | "grid_column": null,
1689 | "grid_gap": null,
1690 | "grid_row": null,
1691 | "grid_template_areas": null,
1692 | "grid_template_columns": null,
1693 | "grid_template_rows": null,
1694 | "height": null,
1695 | "justify_content": null,
1696 | "justify_items": null,
1697 | "left": null,
1698 | "margin": null,
1699 | "max_height": null,
1700 | "max_width": null,
1701 | "min_height": null,
1702 | "min_width": null,
1703 | "object_fit": null,
1704 | "object_position": null,
1705 | "order": null,
1706 | "overflow": null,
1707 | "overflow_x": null,
1708 | "overflow_y": null,
1709 | "padding": null,
1710 | "right": null,
1711 | "top": null,
1712 | "visibility": null,
1713 | "width": null
1714 | }
1715 | },
1716 | "117d30dc8c334efba28bbfe9c7713344": {
1717 | "model_module": "@jupyter-widgets/controls",
1718 | "model_name": "DescriptionStyleModel",
1719 | "model_module_version": "1.5.0",
1720 | "state": {
1721 | "_model_module": "@jupyter-widgets/controls",
1722 | "_model_module_version": "1.5.0",
1723 | "_model_name": "DescriptionStyleModel",
1724 | "_view_count": null,
1725 | "_view_module": "@jupyter-widgets/base",
1726 | "_view_module_version": "1.2.0",
1727 | "_view_name": "StyleView",
1728 | "description_width": ""
1729 | }
1730 | }
1731 | }
1732 | }
1733 | },
1734 | "cells": [
1735 | {
1736 | "cell_type": "markdown",
1737 | "metadata": {
1738 | "id": "view-in-github",
1739 | "colab_type": "text"
1740 | },
1741 | "source": [
1742 | "
"
1743 | ]
1744 | },
1745 | {
1746 | "cell_type": "markdown",
1747 | "source": [
1748 | "# Run Llama 3.2 3B in a FREE Google Colab!\n",
1749 | "\n",
1750 | "Powered by Transformers 🤗\n",
1751 | "\n",
1752 | "[Model Checkpoint 3B](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct)\n",
1753 | "\n",
1754 | "[Model Checkpoint 1B](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)\n",
1755 | "\n",
1756 | "*Make sure to accept the license by visiting the model checkpoint above.*"
1757 | ],
1758 | "metadata": {
1759 | "id": "ycrm7hWWxYoX"
1760 | }
1761 | },
1762 | {
1763 | "cell_type": "markdown",
1764 | "source": [
1765 | "## Setup Environment\n",
1766 | "\n",
1767 | "Llama 3.2 3B should work out of the box with Transformers, make sure to be on the latest transformers release!"
1768 | ],
1769 | "metadata": {
1770 | "id": "PCzvqFRoyGcM"
1771 | }
1772 | },
1773 | {
1774 | "cell_type": "code",
1775 | "execution_count": null,
1776 | "metadata": {
1777 | "id": "u5hdTjYoYHqn"
1778 | },
1779 | "outputs": [],
1780 | "source": [
1781 | "!pip install -q --upgrade transformers accelerate"
1782 | ]
1783 | },
1784 | {
1785 | "cell_type": "markdown",
1786 | "source": [
1787 | "## Load Tokenizer and Model checkpoint"
1788 | ],
1789 | "metadata": {
1790 | "id": "fO093ZGCynnm"
1791 | }
1792 | },
1793 | {
1794 | "cell_type": "code",
1795 | "source": [
1796 | "import torch\n",
1797 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
1798 | "\n",
1799 | "model_id = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
1800 | "\n",
1801 | "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
1802 | "\n",
1803 | "model = AutoModelForCausalLM.from_pretrained(\n",
1804 | " model_id,\n",
1805 | " torch_dtype=torch.bfloat16,\n",
1806 | " low_cpu_mem_usage=True,\n",
1807 | ").to(\"cuda\")"
1808 | ],
1809 | "metadata": {
1810 | "colab": {
1811 | "base_uri": "https://localhost:8080/",
1812 | "height": 202,
1813 | "referenced_widgets": [
1814 | "0b10add730774dcbb03cf0834fd0b724",
1815 | "a74dd475cb0848c0bf343bb772ddac6a",
1816 | "3f3bf676a4dc4673a1acca0ef4616033",
1817 | "f7204a8184db44cb8bbb3d3e12478a0f",
1818 | "e1048e107534477897425885d08094a9",
1819 | "2fe67aae4b854a2c814cca55840b81c2",
1820 | "d8ae9d0d1d254353ba82b1574d7fe1dd",
1821 | "f7add879d74a48ae9ffc18c8b7694b4a",
1822 | "11655f24216f46b9a07e4944f70cede1",
1823 | "1d0264c3d2704b6bbe51583c4b4a3b96",
1824 | "7ec800ba8b1a4fbeae940b7aab631794",
1825 | "943c1bafe9ec4618869cfd028a610213",
1826 | "abf1255b444e4cb9be53d1a556f2ca44",
1827 | "c77ab3e2cec54aa7b11fc1957c64389b",
1828 | "33da38440919410bae7be13caec4929d",
1829 | "71b9f62cd90e4c898518fe75267e0e8f",
1830 | "0ea2047d4cec4026bfa9ae05eb5d4e18",
1831 | "05c1d4ab928d480094cf672339a98f29",
1832 | "d164fe1776bc413bae36e96fc0371907",
1833 | "e8f2756358d24df9b887bfc3d5531e83",
1834 | "11e5200b4fee425991ceed611bfb560a",
1835 | "3a5b9c9609a544858f8cf78b0b72788f",
1836 | "6e6d6dae6bc44498b6774c83685fb0b9",
1837 | "4156204d442346a79a9f7d5193b02e1e",
1838 | "32d71755c0f8460790b88f963ff1e4e6",
1839 | "23fba6d9a7044060a9b5a02ee7ba71e5",
1840 | "2da95739a2444ce896fb3ff81c59e6d0",
1841 | "2bdfdc5610ec4dc1acfe22283eb1541e",
1842 | "baba10fb45dc42bcbf6321dd59cfaf6e",
1843 | "a1404373c9314d81bf99c41fdc9f11b4",
1844 | "1eb7881728a04d1ca9e209a17da9bef2",
1845 | "a472fa2b93f449f0a5c1444d1dacd833",
1846 | "1c149ec6a5a640f3bd5a0209840e58a4",
1847 | "8648320acb1f4fed83f6a7822ec43043",
1848 | "ba14c4a3589f419aaa14b9b8b7bdfa73",
1849 | "07584bcd3cd24debbb1ac2fa59458e2b",
1850 | "61993ca2959f4f39870cea582314b7f8",
1851 | "6e1abca235f7485484abcd5c76b7b3d7",
1852 | "beb839820dba4f0a8862f04e03e861b0",
1853 | "e977a6d7ec764683aeb36e30f006d238",
1854 | "138403b96c874e86a36b267b7a6f721e",
1855 | "42d5c7b11aba49d593951dbe4a0c9916",
1856 | "93deb96f5e9c4fb78c8a89a7b95dc240",
1857 | "c412e8d1c59247ddb01ff5e41be21294",
1858 | "d798703b03304bdcb9de0ed306ca9943",
1859 | "1de9c5460dbb4e80bb7e5c45b90439a1",
1860 | "e0291b1e01184f7393bbca2d10902a7e",
1861 | "25e1398b6e9a41f19d1bdb645c1cb35c",
1862 | "1f51124cf426489aa1d9748f4ec83d0c",
1863 | "abc003a02bb5418ca3969f5e475808a0",
1864 | "e47d3b79aef646a1be1da4d61d466927",
1865 | "dae4a3b4986047f8b071095b720d20dc",
1866 | "b41721c47ad04ea3856c3177702bee1e",
1867 | "2d17eeb690584ef9a98c58f31551e1ae",
1868 | "117d30dc8c334efba28bbfe9c7713344"
1869 | ]
1870 | },
1871 | "id": "OLFqj9b6YW5n",
1872 | "outputId": "aa6b8990-54ee-4c7b-e4e2-e8a2cfe89e39"
1873 | },
1874 | "execution_count": null,
1875 | "outputs": [
1876 | {
1877 | "output_type": "display_data",
1878 | "data": {
1879 | "text/plain": [
1880 | "Downloading shards: 0%| | 0/2 [00:00, ?it/s]"
1881 | ],
1882 | "application/vnd.jupyter.widget-view+json": {
1883 | "version_major": 2,
1884 | "version_minor": 0,
1885 | "model_id": "0b10add730774dcbb03cf0834fd0b724"
1886 | }
1887 | },
1888 | "metadata": {}
1889 | },
1890 | {
1891 | "output_type": "display_data",
1892 | "data": {
1893 | "text/plain": [
1894 | "model-00001-of-00002.safetensors: 64%|######4 | 3.20G/4.97G [00:00, ?B/s]"
1895 | ],
1896 | "application/vnd.jupyter.widget-view+json": {
1897 | "version_major": 2,
1898 | "version_minor": 0,
1899 | "model_id": "943c1bafe9ec4618869cfd028a610213"
1900 | }
1901 | },
1902 | "metadata": {}
1903 | },
1904 | {
1905 | "output_type": "display_data",
1906 | "data": {
1907 | "text/plain": [
1908 | "model-00002-of-00002.safetensors: 0%| | 0.00/1.46G [00:00, ?B/s]"
1909 | ],
1910 | "application/vnd.jupyter.widget-view+json": {
1911 | "version_major": 2,
1912 | "version_minor": 0,
1913 | "model_id": "6e6d6dae6bc44498b6774c83685fb0b9"
1914 | }
1915 | },
1916 | "metadata": {}
1917 | },
1918 | {
1919 | "output_type": "display_data",
1920 | "data": {
1921 | "text/plain": [
1922 | "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
1923 | ],
1924 | "application/vnd.jupyter.widget-view+json": {
1925 | "version_major": 2,
1926 | "version_minor": 0,
1927 | "model_id": "8648320acb1f4fed83f6a7822ec43043"
1928 | }
1929 | },
1930 | "metadata": {}
1931 | },
1932 | {
1933 | "output_type": "display_data",
1934 | "data": {
1935 | "text/plain": [
1936 | "generation_config.json: 0%| | 0.00/189 [00:00, ?B/s]"
1937 | ],
1938 | "application/vnd.jupyter.widget-view+json": {
1939 | "version_major": 2,
1940 | "version_minor": 0,
1941 | "model_id": "d798703b03304bdcb9de0ed306ca9943"
1942 | }
1943 | },
1944 | "metadata": {}
1945 | }
1946 | ]
1947 | },
1948 | {
1949 | "cell_type": "markdown",
1950 | "source": [
1951 | "# Define Prompt & Tokenize"
1952 | ],
1953 | "metadata": {
1954 | "id": "sck3FWKpy11A"
1955 | }
1956 | },
1957 | {
1958 | "cell_type": "code",
1959 | "source": [
1960 | "messages = [\n",
1961 | " {\"role\": \"user\", \"content\": \"Who are you? Please, answer in pirate-speak.\"},\n",
1962 | "]\n",
1963 | "\n",
1964 | "inputs = tokenizer.apply_chat_template(\n",
1965 | " messages,\n",
1966 | " tokenize=True,\n",
1967 | " add_generation_prompt=True,\n",
1968 | " return_tensors=\"pt\",\n",
1969 | " return_dict=True,\n",
1970 | ").to(\"cuda\")"
1971 | ],
1972 | "metadata": {
1973 | "id": "HDS8InY0y0WC"
1974 | },
1975 | "execution_count": null,
1976 | "outputs": []
1977 | },
1978 | {
1979 | "cell_type": "markdown",
1980 | "source": [
1981 | "# Generate"
1982 | ],
1983 | "metadata": {
1984 | "id": "0veYFyBOzAmG"
1985 | }
1986 | },
1987 | {
1988 | "cell_type": "code",
1989 | "source": [
1990 | "outputs = model.generate(**inputs, do_sample=True, max_new_tokens=25)\n",
1991 | "print(tokenizer.batch_decode(outputs, skip_special_tokens=True))"
1992 | ],
1993 | "metadata": {
1994 | "colab": {
1995 | "base_uri": "https://localhost:8080/"
1996 | },
1997 | "id": "QslxgBSgy7w_",
1998 | "outputId": "b939bbc3-10f3-4357-cb25-f2b5b0370955"
1999 | },
2000 | "execution_count": null,
2001 | "outputs": [
2002 | {
2003 | "output_type": "stream",
2004 | "name": "stderr",
2005 | "text": [
2006 | "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
2007 | ]
2008 | },
2009 | {
2010 | "output_type": "stream",
2011 | "name": "stdout",
2012 | "text": [
2013 | "[\"user\\n\\nWho are you? Please, answer in pirate-speak.assistant\\n\\nYer lookin' fer a tale o' who I be, eh? Alright then, matey! I be a\"]\n"
2014 | ]
2015 | }
2016 | ]
2017 | },
2018 | {
2019 | "cell_type": "markdown",
2020 | "source": [
2021 | "# Voila! You now have a smart and capable assistant! 🦙"
2022 | ],
2023 | "metadata": {
2024 | "id": "o__5S-lCzDo3"
2025 | }
2026 | }
2027 | ]
2028 | }
--------------------------------------------------------------------------------