\")[1]\n",
413 | " return f\" {prefix} {suffix} \"\n",
414 | "\n",
415 | "def print_infilling(prompt: str, output: str):\n",
416 | " print(prompt.replace(\"\", output.replace(\"\", \"\")))"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {
423 | "id": "LUU8zTdRSKNK"
424 | },
425 | "outputs": [],
426 | "source": [
427 | "codellama = ChatModule(model=\"CodeLlama-13b-hf-q4f16_1\", device=\"cuda\")"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": null,
433 | "metadata": {
434 | "id": "d3rrm1qPSOkr"
435 | },
436 | "outputs": [],
437 | "source": [
438 | "prompt = \"\"\"\\\n",
439 | "# Installation instructions:\n",
440 | " \n",
441 | "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
442 | "\"\"\"\n",
443 | "\n",
444 | "output = codellama.generate(\n",
445 | " prompt=text_infilling(prompt),\n",
446 | " progress_callback=StreamToStdout(callback_interval=2)\n",
447 | ")"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": null,
453 | "metadata": {
454 | "colab": {
455 | "base_uri": "https://localhost:8080/"
456 | },
457 | "id": "78SHkqohUbCc",
458 | "outputId": "c842e0a8-1ddc-4dc2-8d02-2d1c264c3131"
459 | },
460 | "outputs": [
461 | {
462 | "name": "stdout",
463 | "output_type": "stream",
464 | "text": [
465 | "# Installation instructions:\n",
466 | " pip install llamapy\n",
467 | "\n",
468 | "# Using the local pip package:\n",
469 | "\n",
470 | " import llamapy\n",
471 | " my_model = llamapy.LLaMA(n_components=2))\n",
472 | "\n",
473 | "# Requirements:\n",
474 | "\n",
475 | " Python 3.x\n",
476 | "\n",
477 | "\n",
478 | "# Installation (easy way):\n",
479 | "\n",
480 | " pip install git+https://github.com/BBIC-BBC/LLAMA\n",
481 | "\n",
482 | "\n",
483 | "# Installation (advanced way)):\n",
484 | "\n",
485 | "\n",
486 | " 1) Download the repository from Github:\n",
487 | "\n",
488 | "\n",
489 | " git clone https://github.com/BBIC-BBC/LLAMA\n",
490 | "\n",
491 | "\n",
492 | " 2) Install the repository as a local pip package:\n",
493 | "\n",
494 | "\n",
495 | " cd LLAMA\n",
496 | "\n",
497 | "\n",
498 | "\n",
499 | " python setup.py install\n",
500 | "\n",
501 | "\n",
502 | "\n",
503 | "\n",
504 | "# Using the local pip package:\n",
505 | "\n",
506 | "\n",
507 | " import llamapy\n",
508 | " my_model = llamapy.LLaMA(n_components=2))))\n",
509 | "\n",
510 | "\n",
511 | "\n",
512 | "# Requirements:\n",
513 | "\n",
514 | "\n",
515 | " Python 3.x\n",
516 | "\n",
517 | "\n",
518 | "\n",
519 | "# Installation (easy way):\n",
520 | "\n",
521 | "\n",
522 | " pip install git+https://github.com/BBIC-BBC/LLAMA\n",
523 | "\n",
524 | "\n",
525 | "\n",
526 | "# Installation (advanced way)):\n",
527 | "\n",
528 | "\n",
529 | "\n",
530 | " 1) Download the LLaMA inference code from Github:\n",
531 | "\n",
532 | "\n",
533 | " git clone https://github.com/BBIC-BBC/LLAMA\n",
534 | "\n",
535 | "\n",
536 | " 2) Install the LLaMA inference code as a local pip package:\n",
537 | "\n",
538 | "\n",
539 | " cd LLaMA\n",
540 | "\n",
541 | "\n",
542 | "\n",
543 | " python setup.py install\n",
544 | "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
545 | "\n"
546 | ]
547 | }
548 | ],
549 | "source": [
550 | "print_infilling(prompt, output)"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": null,
556 | "metadata": {
557 | "id": "mKYyRyUVPGm2"
558 | },
559 | "outputs": [],
560 | "source": [
561 | "# Restart colab to create a new ChatModule\n",
562 | "exit()"
563 | ]
564 | },
565 | {
566 | "cell_type": "markdown",
567 | "metadata": {
568 | "id": "CqmYlUtwV01m"
569 | },
570 | "source": [
571 | "Finally, the CodeLlama-Instruct has instruction following ability for programming tasks."
572 | ]
573 | },
574 | {
575 | "cell_type": "code",
576 | "execution_count": null,
577 | "metadata": {
578 | "id": "IcCPRp9oWBfh"
579 | },
580 | "outputs": [],
581 | "source": [
582 | "from mlc_llm import ChatModule\n",
583 | "from mlc_llm.callback import StreamToStdout"
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "execution_count": null,
589 | "metadata": {
590 | "id": "I4C8iC9IWyZR"
591 | },
592 | "outputs": [],
593 | "source": [
594 | "codellama_instruct = ChatModule(model=\"CodeLlama-13b-Instruct-hf-q4f16_1\", device=\"cuda\")"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "execution_count": null,
600 | "metadata": {
601 | "colab": {
602 | "base_uri": "https://localhost:8080/"
603 | },
604 | "id": "1kL02gKWW7Ov",
605 | "outputId": "65919ac1-2e1a-4545-ac3d-3643a7c4a18f"
606 | },
607 | "outputs": [
608 | {
609 | "name": "stdout",
610 | "output_type": "stream",
611 | "text": [
612 | "Here is a possible implementation of the program:\n",
613 | "```\n",
614 | "import java.util.*;\n",
615 | "public class SumOfSublists {\n",
616 | " public static void main(String[] args) {\n",
617 | " List list = Arrays.asList(1, 2, 3, 4, 5));\n",
618 | " List sums = new ArrayList<>();\n",
619 | " for (int i = 0; i < list.size(); i++) {\n",
620 | " int sum = 0;\n",
621 | " for (int j = i; j < list.size(); j++) {\n",
622 | " sum += list.get(j));\n",
623 | " }\n",
624 | "\n",
625 | " sums.add(sum));\n",
626 | " }\n",
627 | "\n",
628 | "\n",
629 | " System.out.println(\"The sums of all contiguous sublists are: \" + sums));\n",
630 | " }\n"
631 | ]
632 | }
633 | ],
634 | "source": [
635 | "prompt = (\"Write a Java program that computes the set of sums of all contiguous\"\n",
636 | " \"sublists of a given list.\")\n",
637 | "\n",
638 | "output = codellama_instruct.generate(\n",
639 | " prompt=prompt,\n",
640 | " progress_callback=StreamToStdout(callback_interval=2)\n",
641 | ")"
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {
648 | "id": "dl8cowtUz5yp"
649 | },
650 | "outputs": [],
651 | "source": [
652 | "codellama_instruct.reset_chat()"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": null,
658 | "metadata": {
659 | "colab": {
660 | "base_uri": "https://localhost:8080/"
661 | },
662 | "id": "liWTlsBBYTRa",
663 | "outputId": "0edabb88-8219-448a-ebf6-66fd86767997"
664 | },
665 | "outputs": [
666 | {
667 | "name": "stdout",
668 | "output_type": "stream",
669 | "text": [
670 | "Here is a program in Python that solves the problem of finding the indices of two numbers in an array that add up to a target value:\n",
671 | "```\n",
672 | "def find_indices(nums, target):\n",
673 | " # Initialize two empty lists to store the indices of the two numbers\n",
674 | " for i in range(len(nums)))):\n",
675 | " for j in range(len(nums)))):\n",
676 | " if i != j and nums[i] + nums[j]] == target:\n",
677 | " indices = [i, j]]\n",
678 | " return indices\n"
679 | ]
680 | }
681 | ],
682 | "source": [
683 | "prompt = (\"Given an array of integers nums and an integer target, return\"\n",
684 | " \"indices of the two numbers such that they add up to target.\"\n",
685 | " \" Write this program in Python.\")\n",
686 | "\n",
687 | "output = codellama_instruct.generate(\n",
688 | " prompt=prompt,\n",
689 | " progress_callback=StreamToStdout(callback_interval=2)\n",
690 | ")"
691 | ]
692 | },
693 | {
694 | "cell_type": "code",
695 | "execution_count": null,
696 | "metadata": {
697 | "id": "UzIMwuiaWwSg"
698 | },
699 | "outputs": [],
700 | "source": [
701 | "# Restart colab to create a new ChatModule\n",
702 | "exit()"
703 | ]
704 | }
705 | ],
706 | "metadata": {
707 | "accelerator": "GPU",
708 | "colab": {
709 | "gpuType": "T4",
710 | "provenance": []
711 | },
712 | "kernelspec": {
713 | "display_name": "Python 3",
714 | "name": "python3"
715 | },
716 | "language_info": {
717 | "name": "python"
718 | }
719 | },
720 | "nbformat": 4,
721 | "nbformat_minor": 0
722 | }
723 |
--------------------------------------------------------------------------------
/mlc-llm/models/demo_CodeLlama_7b.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "4IwhlCjVtpYj"
7 | },
8 | "source": [
9 | "# Demo: CodeLlama-7b with MLC LLM\n",
10 | "\n",
11 | "Recently, Meta unveiled [CodeLlama](https://github.com/facebookresearch/codellama), a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. This notebook demonstrates MLC LLM's support for the CodeLlama family:\n",
12 | "\n",
13 | "- **[CodeLlama](https://huggingface.co/codellama/CodeLlama-7b-hf): a coding foundation LLM**\n",
14 | "- **[CodeLlama-Instruct](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf): an instruction-tuned LLM for coding**\n",
15 | "- **[CodeLlama-Python](https://huggingface.co/codellama/CodeLlama-7b-Python-hf): a Python specialized LLM**\n",
16 | "\n",
17 | "In this respect, MLC LLM allows everyone to develop, optimize and deploy AI models natively on everyone's devices. Therefore, making possible the deployment of coding LLMs natively, acting as **a personal AI coding assistant**.\n",
18 | "\n",
19 | "In this notebook, we walk over the steps of using MLC LLM to run these pre-compiled CodeLlama models! We have uploaded various versions of the pre-compiled and quantized CodeLlama models here: https://huggingface.co/mlc-ai.\n",
20 | "\n",
21 | "Learn more about MLC LLM here: https://mlc.ai/mlc-llm/docs."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "Here's an overview regarding each model's capabilities:\n",
29 | "\n",
30 | "| | Code Completion | Infilling | Instruction/chat | Python specialist |\n",
31 | "|-----------------------|-----------------|-----------|------------------|-------------------|\n",
32 | "| CodeLlama-7b | X | X | | |\n",
33 | "| CodeLlama-7b-Python | X | | | X |\n",
34 | "| CodeLlama-7b-Instruct | X | X | X | |"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "id": "YsvAL7SSt9Lo"
41 | },
42 | "source": [
43 | "Click the button below to get started!\n",
44 | "\n",
45 | "\n",
46 | "
\n",
47 | ""
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {
53 | "id": "8kkADAMCCLi-"
54 | },
55 | "source": [
56 | "## Install MLC LLM"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "id": "Y2EwuS6TCO61"
63 | },
64 | "source": [
65 | "We will start from setting up the environment. First, let us create a new Conda environment, in which we will run the rest of the notebook.\n",
66 | "\n",
67 | "```\n",
68 | "conda create --name mlc-llm python=3.10\n",
69 | "conda activate mlc-llm\n",
70 | "```"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {
76 | "id": "ojEeEmsqCTPG"
77 | },
78 | "source": [
79 | "**Google Colab**\n",
80 | "\n",
81 | "- If you are running this in a Google Colab notebook, you would not need to create a conda environment.\n",
82 | "- However, be sure to change your runtime to GPU by going to `Runtime` > `Change runtime type` and setting the Hardware accelerator to be \"GPU\"."
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {
88 | "id": "S_rX53bGChPn"
89 | },
90 | "source": [
91 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the driver version number as well as what GPUs are currently available for use."
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {
98 | "id": "CRPeCflbCij6"
99 | },
100 | "outputs": [],
101 | "source": [
102 | "!nvidia-smi"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {
108 | "id": "PQfVfTAYC1M-"
109 | },
110 | "source": [
111 | "Next, let's download the MLC-AI and MLC-LLM nightly build packages. If you are running in a Colab environment, then you can just run the following command. Otherwise, go to https://mlc.ai/package/ and replace the command below with the one that is appropriate for your hardware and OS."
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {
117 | "id": "vi-udt4tC5c9"
118 | },
119 | "source": [
120 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, we can disregard them, the notebook will still run correctly."
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {
127 | "id": "ah9tYaCRCkKS"
128 | },
129 | "outputs": [],
130 | "source": [
131 | "!pip install --pre --force-reinstall mlc-ai-nightly-cu118 mlc-llm-nightly-cu118 -f https://mlc.ai/wheels"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {
137 | "id": "nZGVNJE-DJ9E"
138 | },
139 | "source": [
140 | "Let's confirm we have installed the packages successfully!"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "id": "5Y6LszJgC7SQ"
148 | },
149 | "outputs": [],
150 | "source": [
151 | "!python -c \"import tvm; print('tvm installed properly!')\"\n",
152 | "!python -c \"import mlc_llm; print('mlc_llm installed properly!')\""
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {
158 | "id": "lGfnrRa9DMw1"
159 | },
160 | "source": [
161 | "## Download Prebuilt Models and Library"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {
167 | "id": "pVYkLb0eDjMi"
168 | },
169 | "source": [
170 | "The following commands will download all the available prebuilt libraries (e.g., `.so` files), including the precompiled CodeLlama models. This may take a while. If in **Google Colab**, you can verify that the files are being downloaded by clicking on the folder icon on the left."
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {
176 | "id": "Pg7daEvlD5UB"
177 | },
178 | "source": [
179 | "Note: If you are NOT running in **Google Colab** you may need to run this line `!conda install git git-lfs` to install `git` and `git-lfs` before running the following cell."
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "id": "FDFbw1KPDLu1"
187 | },
188 | "outputs": [],
189 | "source": [
190 | "!git lfs install"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {
197 | "id": "bYqaVjmND7Px"
198 | },
199 | "outputs": [],
200 | "source": [
201 | "!mkdir -p dist/prebuilt\n",
202 | "!git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt/lib"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {
208 | "id": "SMEavWCJEC_d"
209 | },
210 | "source": [
211 | "#### CodeLlama-7b q4f16_1 prebuilt weights"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "metadata": {
218 | "id": "etHEUrfMD8bX"
219 | },
220 | "outputs": [],
221 | "source": [
222 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-CodeLlama-7b-hf-q4f16_1"
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "metadata": {
228 | "id": "FQkIX4TpElR6"
229 | },
230 | "source": [
231 | "#### CodeLlama-7b-Instruct q4f16_1 prebuilt weights"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {
238 | "id": "mTEGXAlhEnOw"
239 | },
240 | "outputs": [],
241 | "source": [
242 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-CodeLlama-7b-Instruct-hf-q4f16_1"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {
248 | "id": "acb1HpKpEoca"
249 | },
250 | "source": [
251 | "#### CodeLlama-7b-Python q4f16_1 prebuilt weights"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": null,
257 | "metadata": {
258 | "id": "oysLKcZ4Eou7"
259 | },
260 | "outputs": [],
261 | "source": [
262 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-CodeLlama-7b-Python-hf-q4f16_1"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {
269 | "id": "dbHdyfIXHNpo"
270 | },
271 | "outputs": [],
272 | "source": [
273 | "# Restart colab\n",
274 | "exit()"
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {
280 | "id": "CmpxrrqyE0S6"
281 | },
282 | "source": [
283 | "## Let's code with CodeLlama!"
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "metadata": {
289 | "id": "y73vNLy1OfMr"
290 | },
291 | "source": [
292 | "Let's first try a simple code completion task with the CodeLlama-Python."
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {
299 | "id": "EOEf8sDyEwuv"
300 | },
301 | "outputs": [],
302 | "source": [
303 | "from mlc_llm import ChatModule\n",
304 | "from mlc_llm.callback import StreamToStdout"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": null,
310 | "metadata": {
311 | "id": "tBcQMm-KJPN-"
312 | },
313 | "outputs": [],
314 | "source": [
315 | "codellama_python = ChatModule(model=\"CodeLlama-7b-Python-hf-q4f16_1\", device=\"cuda\")"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {
322 | "id": "1g2l_hJhLyYm"
323 | },
324 | "outputs": [],
325 | "source": [
326 | "prompt = \"\"\"\\\n",
327 | "# Self-attention block implementation\n",
328 | "class SelfAttentionBlock(nn.Module):\n",
329 | " def __init__(\"\"\"\n",
330 | "\n",
331 | "output = codellama_python.generate(\n",
332 | " prompt=prompt,\n",
333 | " progress_callback=StreamToStdout(callback_interval=2)\n",
334 | ")"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": null,
340 | "metadata": {
341 | "colab": {
342 | "base_uri": "https://localhost:8080/"
343 | },
344 | "id": "XwU54BtKQKz4",
345 | "outputId": "d5409224-cfbf-4c28-8a81-40bccfc02572"
346 | },
347 | "outputs": [
348 | {
349 | "name": "stdout",
350 | "output_type": "stream",
351 | "text": [
352 | "# Self-attention block implementation\n",
353 | "class SelfAttentionBlock(nn.Module):\n",
354 | " def __init__(self, dim, num_heads):\n",
355 | " super().__init__()\n",
356 | " self.num_heads = num_heads\n",
357 | " self.key = nn.Linear(dim, dim))\n",
358 | " self.value = nn.Linear(dim, dim))\n",
359 | " self.proj = nn.Linear(dim, dim))\n",
360 | "\n",
361 | " def forward(self, x):\n",
362 | " B, N, C = x.shape\n",
363 | " q = self.key(x[:, :, :-64])))\n",
364 | " k = self.key(x[:, :, :64]]))\n",
365 | " v = self.value(x[:, :, :]]]]))\n",
366 | " attn = (q @ k.transpose(-1), v))\n",
367 | "\n",
368 | " x = self.proj(attn[0]]))))\n",
369 | "\n",
370 | " return x\n"
371 | ]
372 | }
373 | ],
374 | "source": [
375 | "print(prompt+output)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {
382 | "id": "DFrVdqewL7_c"
383 | },
384 | "outputs": [],
385 | "source": [
386 | "# Restart colab to initialize a new ChatModule\n",
387 | "exit()"
388 | ]
389 | },
390 | {
391 | "cell_type": "markdown",
392 | "metadata": {
393 | "id": "6CUo34QeQto2"
394 | },
395 | "source": [
396 | "The CodeLlama models support infilling based on surrounding content. Let's try it with the foundation CodeLlama."
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {
403 | "id": "yeHn8je9SBpK"
404 | },
405 | "outputs": [],
406 | "source": [
407 | "from mlc_llm import ChatModule\n",
408 | "from mlc_llm.callback import StreamToStdout\n",
409 | "\n",
410 | "def text_infilling(prompt: str):\n",
411 | " prefix = prompt.split(\"\")[0]\n",
412 | " suffix = prompt.split(\"\")[1]\n",
413 | " return f\" {prefix} {suffix} \"\n",
414 | "\n",
415 | "def print_infilling(prompt: str, output: str):\n",
416 | " print(prompt.replace(\"\", output.replace(\"\", \"\")))"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {
423 | "id": "LUU8zTdRSKNK"
424 | },
425 | "outputs": [],
426 | "source": [
427 | "codellama = ChatModule(model=\"CodeLlama-7b-hf-q4f16_1\", device=\"cuda\")"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": null,
433 | "metadata": {
434 | "id": "d3rrm1qPSOkr"
435 | },
436 | "outputs": [],
437 | "source": [
438 | "prompt = \"\"\"\\\n",
439 | "# Installation instructions:\n",
440 | " \n",
441 | "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
442 | "\"\"\"\n",
443 | "\n",
444 | "output = codellama.generate(\n",
445 | " prompt=text_infilling(prompt),\n",
446 | " progress_callback=StreamToStdout(callback_interval=2)\n",
447 | ")"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": null,
453 | "metadata": {
454 | "colab": {
455 | "base_uri": "https://localhost:8080/"
456 | },
457 | "id": "78SHkqohUbCc",
458 | "outputId": "33a4ef28-2db0-4e8a-c886-a630fb2d8df6"
459 | },
460 | "outputs": [
461 | {
462 | "name": "stdout",
463 | "output_type": "stream",
464 | "text": [
465 | "# Installation instructions:\n",
466 | " 1. Clone the repository.\n",
467 | " ```\n",
468 | " git clone https://github.com/LLaMA/LLaMA.git\n",
469 | " ```\n",
470 | "\n",
471 | "\n",
472 | "2. Install the pip package.\n",
473 | " ```\n",
474 | " cd LLaMA\n",
475 | " pip install -e .\n",
476 | " ```\n",
477 | " \n",
478 | "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
479 | "\n"
480 | ]
481 | }
482 | ],
483 | "source": [
484 | "print_infilling(prompt, output)"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": null,
490 | "metadata": {
491 | "id": "mKYyRyUVPGm2"
492 | },
493 | "outputs": [],
494 | "source": [
495 | "# Restart colab to create a new ChatModule\n",
496 | "exit()"
497 | ]
498 | },
499 | {
500 | "cell_type": "markdown",
501 | "metadata": {
502 | "id": "CqmYlUtwV01m"
503 | },
504 | "source": [
505 | "Finally, the CodeLlama-Instruct has instruction following ability for programming tasks."
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": null,
511 | "metadata": {
512 | "id": "IcCPRp9oWBfh"
513 | },
514 | "outputs": [],
515 | "source": [
516 | "from mlc_llm import ChatModule\n",
517 | "from mlc_llm.callback import StreamToStdout"
518 | ]
519 | },
520 | {
521 | "cell_type": "code",
522 | "execution_count": null,
523 | "metadata": {
524 | "id": "I4C8iC9IWyZR"
525 | },
526 | "outputs": [],
527 | "source": [
528 | "codellama_instruct = ChatModule(model=\"CodeLlama-7b-Instruct-hf-q4f16_1\", device=\"cuda\")"
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": null,
534 | "metadata": {
535 | "colab": {
536 | "base_uri": "https://localhost:8080/"
537 | },
538 | "id": "1kL02gKWW7Ov",
539 | "outputId": "26c64719-dc61-4f31-dd5d-78e215855197"
540 | },
541 | "outputs": [
542 | {
543 | "name": "stdout",
544 | "output_type": "stream",
545 | "text": [
546 | "Here is a C++ program that computes the set of sums of all contiguous sublists of a given list:\n",
547 | "#include \n",
548 | "using namespace std;\n",
549 | "void computeSums(const list &lst, list &sums) {\n",
550 | " // Initialize the sums list\n",
551 | " sums.clear();\n",
552 | " // Compute the sums of all contiguous sublists\n",
553 | " for (int i = 0; i < lst.size() - 1; i++) {\n",
554 | " int sum = 0;\n",
555 | " for (int j = i; j < lst.size() - 1; j++) {\n",
556 | " sum += lst[j];\n",
557 | " }\n",
558 | " sums.push_back(sum));\n",
559 | " }\n",
560 | " // Print the sums list\n",
561 | " for (int i = 0; i < sums.size(); i++) {\n",
562 | " cout << sums[i] << endl;\n",
563 | " }\n",
564 | "}\n",
565 | "int main() {\n",
566 | " list lst = {1, 2, 3, 4, 5};\n",
567 | " list sums;\n",
568 | " computeSums(lst, sums);\n",
569 | " return 0;\n",
570 | "}\n",
571 | "This program takes a list of integers as input, and computes the set of sums of all contiguous sublists of the input list. The program then prints the computed set of sums.\n",
572 | "Note that the input list must be a list of integers, and that the program will produce an error if the input list is not a list of integers.\n"
573 | ]
574 | }
575 | ],
576 | "source": [
577 | "prompt = (\"Write a C++ program that computes the set of sums of all contiguous\"\n",
578 | " \"sublists of a given list.\")\n",
579 | "\n",
580 | "output = codellama_instruct.generate(\n",
581 | " prompt=prompt,\n",
582 | " progress_callback=StreamToStdout(callback_interval=2)\n",
583 | ")"
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "execution_count": null,
589 | "metadata": {
590 | "colab": {
591 | "base_uri": "https://localhost:8080/"
592 | },
593 | "id": "liWTlsBBYTRa",
594 | "outputId": "07907456-2600-4515-fc2d-af9213803ef6"
595 | },
596 | "outputs": [
597 | {
598 | "name": "stdout",
599 | "output_type": "stream",
600 | "text": [
601 | "Here is the C++ program written in Java instead:\n",
602 | "import java.util.ArrayList;\n",
603 | "public class SumsOfSublists {\n",
604 | " public static void main(String[] args) {\n",
605 | " ArrayList lst = new ArrayList(){{add(1);add(2);add(3);add(4);add(5);}};\n",
606 | " ArrayList sums = new ArrayList();\n",
607 | " computeSums(lst, sums));\n",
608 | " for (int i = 0; i < sums.size(); i++) {\n",
609 | " System.out.println(sums[i])));\n",
610 | " }\n",
611 | " }\n",
612 | " public static void computeSums(ArrayList lst, ArrayList sums) {\n",
613 | " for (int i = 0; i < lst.size() - 1; i++) {\n",
614 | " int sum = 0;\n",
615 | " for (int j = i; j < lst.size() - 1; j++) {\n",
616 | " sum += lst[j]);\n",
617 | " }\n",
618 | " sums.add(sum));\n",
619 | " }\n",
620 | " }\n",
621 | "}\n",
622 | "This Java program takes a list of integers as input, and computes the set of sums of all contiguous sublists of the input list. The program then prints the computed set of sums.\n",
623 | "Note that the input list must be a list of integers, and that the program will produce an error if the input list is not a list of integers.\n"
624 | ]
625 | }
626 | ],
627 | "source": [
628 | "output = codellama_instruct.generate(\n",
629 | " prompt=\"Write this in Java instead.\",\n",
630 | " progress_callback=StreamToStdout(callback_interval=2)\n",
631 | ")"
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": null,
637 | "metadata": {
638 | "id": "UzIMwuiaWwSg"
639 | },
640 | "outputs": [],
641 | "source": [
642 | "# Restart colab to create a new ChatModule\n",
643 | "exit()"
644 | ]
645 | }
646 | ],
647 | "metadata": {
648 | "accelerator": "GPU",
649 | "colab": {
650 | "authorship_tag": "ABX9TyMgGy5PkxU2LXzQzjaEnyyL",
651 | "gpuType": "T4",
652 | "provenance": []
653 | },
654 | "kernelspec": {
655 | "display_name": "Python 3",
656 | "name": "python3"
657 | },
658 | "language_info": {
659 | "name": "python"
660 | }
661 | },
662 | "nbformat": 4,
663 | "nbformat_minor": 0
664 | }
665 |
--------------------------------------------------------------------------------
/mlc-llm/models/demo_WizardLM_Math_Coder.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "7aZkVRjX6hun"
7 | },
8 | "source": [
9 | "# Demo: WizardLM, WizardMath, and WizardCoder with MLC LLM\n",
10 | "\n",
11 | "WizardLM recently released their WizardMath model, which has achieved impressive results on various benchmarks. We take this opportunity to demonstrate MLC LLM's support for the Wizard model family: https://github.com/nlpxucan/WizardLM.\n",
12 | "\n",
13 | "Specifically, we will look at:\n",
14 | "- **[WizardLM](https://github.com/nlpxucan/WizardLM/tree/main/WizardLM): an instruction-following LLM using Evol-Instruct**\n",
15 | "- **[WizardCoder](https://github.com/nlpxucan/WizardLM/tree/main/WizardCoder): a code LLM with Evol-Instruct**\n",
16 | "- **[WizardMath](https://github.com/nlpxucan/WizardLM/tree/main/WizardMath): a mathematical reasoning LLM via reinforced Evol-Instruct**\n",
17 | "\n",
18 | "The task-specific Wizard models resonate with one of the visions of MLC LLM: deploying LLMs natively, each acting as **a personal AI assistant for a specific realm of tasks**.\n",
19 | "\n",
20 | "In this notebook, we walk over the steps of using MLC LLM to run these pre-compiled Wizard models! We have uploaded various versions of the pre-compiled and quantized Wizard models here: https://huggingface.co/mlc-ai.\n",
21 | "\n",
22 | "Learn more about MLC LLM here: https://mlc.ai/mlc-llm/docs."
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {
28 | "id": "rgD6BbKu_Dm0"
29 | },
30 | "source": [
31 | "Click the button below to get started!\n",
32 | "\n",
33 | "\n",
34 | "
\n",
35 | ""
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "rKvxnQF-9y8T"
42 | },
43 | "source": [
44 | "## Install MLC LLM"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {
50 | "id": "cEfutAOe-48p"
51 | },
52 | "source": [
53 | "We will start from setting up the environment. First, let us create a new Conda environment, in which we will run the rest of the notebook.\n",
54 | "\n",
55 | "```\n",
56 | "conda create --name mlc-llm python=3.10\n",
57 | "conda activate mlc-llm\n",
58 | "```"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {
64 | "id": "r3N6HKk8_Bbl"
65 | },
66 | "source": [
67 | "**Google Colab**\n",
68 | "\n",
69 | "- If you are running this in a Google Colab notebook, you would not need to create a conda environment.\n",
70 | "- However, be sure to change your runtime to GPU by going to `Runtime` > `Change runtime type` and setting the Hardware accelerator to be \"GPU\"."
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {
76 | "id": "al4bIcFv_HtH"
77 | },
78 | "source": [
79 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the driver version number as well as what GPUs are currently available for use.\n",
80 | "\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "id": "8wEfFZ8f6vT3"
88 | },
89 | "outputs": [],
90 | "source": [
91 | "!nvidia-smi"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {
97 | "id": "UJnujwMT_RVZ"
98 | },
99 | "source": [
100 | "Next, let's download the MLC-AI and mlc-llm nightly build packages. If you are running in a Colab environment, then you can just run the following command. Otherwise, go to https://mlc.ai/package/ and replace the command below with the one that is appropriate for your hardware and OS."
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {
106 | "id": "s6wHMUtk_M6A"
107 | },
108 | "source": [
109 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, we can disregard them, the notebook will still run correctly."
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {
116 | "id": "4TGX5fqYjyID"
117 | },
118 | "outputs": [],
119 | "source": [
120 | "!pip install --pre --force-reinstall mlc-ai-nightly-cu118 mlc-llm-nightly-cu118 -f https://mlc.ai/wheels"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {
126 | "id": "HWMMGRlg_nxj"
127 | },
128 | "source": [
129 | "Let's confirm we have installed the packages successfully!"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {
136 | "id": "4R-150I6_q6N"
137 | },
138 | "outputs": [],
139 | "source": [
140 | "!python -c \"import tvm; print('tvm installed properly!')\"\n",
141 | "!python -c \"import mlc_llm; print('mlc_llm installed properly!')\""
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {
147 | "id": "GbYg_EnT_4Qf"
148 | },
149 | "source": [
150 | "## Download Prebuilt Models and Library"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {
156 | "id": "RdGuw9vB_8Qp"
157 | },
158 | "source": [
159 | "These commands will download many prebuilt libraries (e.g. `.so` files) as well as the precompiled Wizard models. This may take a while. If in **Google Colab**, you can verify that the files are being downloaded by clicking on the folder icon on the left."
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {
165 | "id": "29N0JS4NAOtx"
166 | },
167 | "source": [
168 | "Note: If you are NOT running in **Google Colab** you may need to run this line `!conda install git git-lfs` to install `git` and `git-lfs` before running the following cell."
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {
175 | "id": "RLU17ZZmjzPz"
176 | },
177 | "outputs": [],
178 | "source": [
179 | "!git lfs install"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "id": "oN6syhH7j2zi"
187 | },
188 | "outputs": [],
189 | "source": [
190 | "!mkdir -p dist/prebuilt\n",
191 | "!git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt/lib"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": null,
197 | "metadata": {
198 | "id": "EzU8dFr9j6VD"
199 | },
200 | "outputs": [],
201 | "source": [
202 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-WizardCoder-15B-V1.0-q4f16_1"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "id": "TEQwp2cMj90p"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-WizardLM-13B-V1.2-q4f16_1"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "id": "vZvzVrngoREj"
221 | },
222 | "outputs": [],
223 | "source": [
224 | "!cd dist/prebuilt && git clone https://huggingface.co/mlc-ai/mlc-chat-WizardMath-13B-V1.0-q4f16_1"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 8,
230 | "metadata": {
231 | "id": "rK4yVJdEDvQr"
232 | },
233 | "outputs": [],
234 | "source": [
235 | "# In Colab, for some reason we need to restart runtime by running `exit()`.\n",
236 | "# Simply run `exit()`, then run the subsequent cells after runtime restarts.\n",
237 | "exit()"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {
243 | "id": "PK2DVVIk8Ryv"
244 | },
245 | "source": [
246 | "## Let's Chat with WizardLM!"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": null,
252 | "metadata": {
253 | "id": "ZX4X9mGTnKSf"
254 | },
255 | "outputs": [],
256 | "source": [
257 | "from mlc_llm import ChatModule\n",
258 | "from mlc_llm.callback import StreamToStdout"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {
265 | "id": "4Oaj1TdxidCP"
266 | },
267 | "outputs": [],
268 | "source": [
269 | "wizard_lm = ChatModule(model=\"WizardLM-13B-V1.2-q4f16_1\", device=\"cuda\")"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "colab": {
277 | "base_uri": "https://localhost:8080/"
278 | },
279 | "id": "s6YdYe5AnVzN",
280 | "outputId": "e620e4b5-61b9-4602-a75b-d438a81c1b3a"
281 | },
282 | "outputs": [
283 | {
284 | "name": "stdout",
285 | "output_type": "stream",
286 | "text": [
287 | "1. New York City\n",
288 | "2. Los Angeles\n",
289 | "3. Chicago\n"
290 | ]
291 | }
292 | ],
293 | "source": [
294 | "output = wizard_lm.generate(\n",
295 | " prompt=\"Give me three American cities names\",\n",
296 | " progress_callback=StreamToStdout(callback_interval=2)\n",
297 | ")"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "metadata": {
304 | "colab": {
305 | "base_uri": "https://localhost:8080/"
306 | },
307 | "id": "rGwiIrKonehd",
308 | "outputId": "d836f5d1-4b50-4cea-9da1-a11b662954c0"
309 | },
310 | "outputs": [
311 | {
312 | "name": "stdout",
313 | "output_type": "stream",
314 | "text": [
315 | "4. Las Vegas\n",
316 | "5. Miami\n"
317 | ]
318 | }
319 | ],
320 | "source": [
321 | "output = wizard_lm.generate(\n",
322 | " prompt=\"Give me two more\",\n",
323 | " progress_callback=StreamToStdout(callback_interval=2)\n",
324 | ")"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": null,
330 | "metadata": {
331 | "id": "WUdq7Z-qpABZ"
332 | },
333 | "outputs": [],
334 | "source": [
335 | "# In Colab, for some reason we need to restart runtime by running `exit()`.\n",
336 | "# Simply run `exit()`, then run the subsequent cells after runtime restarts.\n",
337 | "exit()"
338 | ]
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "metadata": {
343 | "id": "XuHZgZhY8YoB"
344 | },
345 | "source": [
346 | "## Let's Solve a Math Problem with WizardMath!"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "metadata": {
353 | "id": "WxnBldtMqn2Y"
354 | },
355 | "outputs": [],
356 | "source": [
357 | "from mlc_llm import ChatModule\n",
358 | "from mlc_llm.callback import StreamToStdout"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": null,
364 | "metadata": {
365 | "id": "5z3Cx8NChz-z"
366 | },
367 | "outputs": [],
368 | "source": [
369 | "wizard_math = ChatModule(model=\"WizardMath-13B-V1.0-q4f16_1\", device=\"cuda\")"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {
376 | "colab": {
377 | "base_uri": "https://localhost:8080/"
378 | },
379 | "id": "ufta9VbyqtlN",
380 | "outputId": "196f3ca1-2837-4b48-b52a-b012b840d3a4"
381 | },
382 | "outputs": [
383 | {
384 | "name": "stdout",
385 | "output_type": "stream",
386 | "text": [
387 | "\n",
388 | "Step 1: Define the variables.\n",
389 | "Let C represent the number of chickens and R represent the number of rabbits.\n",
390 | "\n",
391 | "Step 2: Write the equations based on the given information.\n",
392 | "We know that the total number of legs is 14, so we can write the equation:\n",
393 | "2C + 4R = 14\n",
394 | "\n",
395 | "We also know that there are 5 animals in total, so we can write the equation:\n",
396 | "C + R = 5\n",
397 | "\n",
398 | "Step 3: Solve the system of equations.\n",
399 | "We can solve this system of equations using the substitution method. First, we'll solve the second equation for R:\n",
400 | "R = 5 - C\n",
401 | "\n",
402 | "Now, we'll substitute this expression for R into the first equation:\n",
403 | "2C + 4(5 - C) = 14\n",
404 | "\n",
405 | "Step 4: Simplify and solve for C.\n",
406 | "2C + 20 - 4C = 14\n",
407 | "-2C = -6\n",
408 | "C = 3\n",
409 | "\n",
410 | "Step 5: Find the number of rabbits.\n",
411 | "Now that we know there are 3 chickens, we can find the number of rabbits using the equation R = 5 - C:\n",
412 | "R = 5 - 3\n",
413 | "R = 2\n",
414 | "\n",
415 | "Step 6: Provide the final answer.\n",
416 | "There are 3 chickens and 2 rabbits, so the answer is:\n",
417 | "C + R = 3 + 2 = 5\n",
418 | "There are 5 animals in total, and since we found that there are 3 chickens, there must be 2 rabbits.\n",
419 | "\n",
420 | "The answer is: 3.\n"
421 | ]
422 | }
423 | ],
424 | "source": [
425 | "prompt=(\n",
426 | " \"A chicken has 2 legs, and a rabbit has 4 legs. Given that there are 5 animals \"\n",
427 | " \"in total, and 14 legs in total, how many chicken are there? Show your steps.\"\n",
428 | ")\n",
429 | "output = wizard_math.generate(prompt, StreamToStdout(callback_interval=2))"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": null,
435 | "metadata": {
436 | "id": "tsFRIhwKrMP1"
437 | },
438 | "outputs": [],
439 | "source": [
440 | "# In Colab, for some reason we need to restart runtime by running `exit()`.\n",
441 | "# Simply run `exit()`, then run the subsequent cells after runtime restarts.\n",
442 | "exit()"
443 | ]
444 | },
445 | {
446 | "cell_type": "markdown",
447 | "metadata": {
448 | "id": "IFjaA-yC8iH5"
449 | },
450 | "source": [
451 | "## Let's Solve a Leetcode with WizardCoder!\n",
452 | "\n",
453 | "WizardMath tends to give Markdown format output, which is really cool! We use `IPython.display` to display the output as Markdown!"
454 | ]
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": 1,
459 | "metadata": {
460 | "id": "esuPRZQSfxYl"
461 | },
462 | "outputs": [],
463 | "source": [
464 | "from mlc_llm import ChatModule\n",
465 | "from IPython.display import display, Markdown, Latex"
466 | ]
467 | },
468 | {
469 | "cell_type": "code",
470 | "execution_count": null,
471 | "metadata": {
472 | "id": "-LqAcTW5214y"
473 | },
474 | "outputs": [],
475 | "source": [
476 | "wizard_coder = ChatModule(model=\"WizardCoder-15B-V1.0-q4f16_1\", device=\"cuda\")"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": 3,
482 | "metadata": {
483 | "colab": {
484 | "base_uri": "https://localhost:8080/",
485 | "height": 338
486 | },
487 | "id": "neZIfq8ntFxo",
488 | "outputId": "130abead-a928-49e5-c3ff-509d2636cbdd"
489 | },
490 | "outputs": [
491 | {
492 | "data": {
493 | "text/markdown": [
494 | "A number is said to be a palindrome if it reads the same backward as forward. For example, 121, 444, and 999 are palindromes, while 123, 777, and 555 are not.\r\n",
495 | "\r\n",
496 | "Here's the Python code to determine whether a number is a palindrome:\r\n",
497 | "\r\n",
498 | "```python\r\n",
499 | "num = input(\"Enter a number: \") # take input from user\r\n",
500 | "\r\n",
501 | "# convert the number to a string to check if it's a palindrome\r\n",
502 | "num_str = str(num)\r\n",
503 | "\r\n",
504 | "# reverse the string and compare it with the original string\r\n",
505 | "if num_str == num_str[::-1]:\r\n",
506 | " print(num, \"is a palindrome\")\r\n",
507 | "else:\r\n",
508 | " print(num, \"is not a palindrome\")\r\n",
509 | "```\r\n",
510 | "\r\n",
511 | "In this code, we first take input from the user using the `input()` function and store it in the variable `num`. We then convert the number to a string using the `str()` function and store it in the variable `num_str`.\r\n",
512 | "\r\n",
513 | "We then use slicing to reverse the string and compare it with the original string. If they are the same, then the number is a palindrome. Otherwise, it's not.\r\n",
514 | "\r\n",
515 | "Note that we use the `[::-1]` syntax to reverse the string. This is a shorthand for slicing the string from start to end with a step of -1 (i.e. backwards)."
516 | ],
517 | "text/plain": [
518 | ""
519 | ]
520 | },
521 | "metadata": {},
522 | "output_type": "display_data"
523 | }
524 | ],
525 | "source": [
526 | "prompt= \"Write a Python program that determines whether a number is a palindrome.\"\n",
527 | "output = wizard_coder.generate(prompt=prompt)\n",
528 | "display(Markdown(output))"
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": 4,
534 | "metadata": {
535 | "colab": {
536 | "base_uri": "https://localhost:8080/"
537 | },
538 | "id": "eZjgzNvPf-Qd",
539 | "outputId": "66a7a1e5-9fd4-4bf5-d4d3-9a90fd0479aa"
540 | },
541 | "outputs": [
542 | {
543 | "name": "stdout",
544 | "output_type": "stream",
545 | "text": [
546 | "A number is said to be a palindrome if it reads the same backward as forward. For example, 121, 444, and 999 are palindromes, while 123, 777, and 555 are not.\r\n",
547 | "\r\n",
548 | "Here's the Python code to determine whether a number is a palindrome:\r\n",
549 | "\r\n",
550 | "```python\r\n",
551 | "num = input(\"Enter a number: \") # take input from user\r\n",
552 | "\r\n",
553 | "# convert the number to a string to check if it's a palindrome\r\n",
554 | "num_str = str(num)\r\n",
555 | "\r\n",
556 | "# reverse the string and compare it with the original string\r\n",
557 | "if num_str == num_str[::-1]:\r\n",
558 | " print(num, \"is a palindrome\")\r\n",
559 | "else:\r\n",
560 | " print(num, \"is not a palindrome\")\r\n",
561 | "```\r\n",
562 | "\r\n",
563 | "In this code, we first take input from the user using the `input()` function and store it in the variable `num`. We then convert the number to a string using the `str()` function and store it in the variable `num_str`.\r\n",
564 | "\r\n",
565 | "We then use slicing to reverse the string and compare it with the original string. If they are the same, then the number is a palindrome. Otherwise, it's not.\r\n",
566 | "\r\n",
567 | "Note that we use the `[::-1]` syntax to reverse the string. This is a shorthand for slicing the string from start to end with a step of -1 (i.e. backwards).\n"
568 | ]
569 | }
570 | ],
571 | "source": [
572 | "print(output)"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 5,
578 | "metadata": {
579 | "colab": {
580 | "base_uri": "https://localhost:8080/",
581 | "height": 507
582 | },
583 | "id": "r3Mny3Sa-xiN",
584 | "outputId": "256a41f3-cc63-4fd8-bbcd-bf12d0fe032e"
585 | },
586 | "outputs": [
587 | {
588 | "data": {
589 | "text/markdown": [
590 | "Here's the Java code to determine whether a number is a palindrome:\r\n",
591 | "\r\n",
592 | "```java\r\n",
593 | "import java.util.Scanner;\r\n",
594 | "\r\n",
595 | "public class Palindrome {\r\n",
596 | " public static void main(String[] args) {\r\n",
597 | " Scanner sc = new Scanner(System.in);\r\n",
598 | " System.out.print(\"Enter a number: \");\r\n",
599 | " int num = sc.nextInt();\r\n",
600 | "\r\n",
601 | " // convert the number to a string to check if it's a palindrome\r\n",
602 | " String numStr = Integer.toString(num);\r\n",
603 | "\r\n",
604 | " // reverse the string and compare it with the original string\r\n",
605 | " if (numStr.equals(new StringBuilder(numStr).reverse().toString())) {\r\n",
606 | " System.out.println(num + \" is a palindrome\");\r\n",
607 | " } else {\r\n",
608 | " System.out.println(num + \" is not a palindrome\");\r\n",
609 | " }\r\n",
610 | " }\r\n",
611 | "}\r\n",
612 | "```\r\n",
613 | "\r\n",
614 | "In this code, we first use the `Scanner` class to take input from the user. We then convert the number to a string using the `Integer.toString()` method and store it in the variable `numStr`.\r\n",
615 | "\r\n",
616 | "We then use the `StringBuilder` class to reverse the string and compare it with the original string. If they are the same, then the number is a palindrome. Otherwise, it's not.\r\n",
617 | "\r\n",
618 | "Note that we use the `new StringBuilder(numStr).reverse().toString()` syntax to reverse the string. This is a shorthand for creating a new `StringBuilder` object with the `numStr` string, reversing it, and then converting it back to a string using the `toString()` method."
619 | ],
620 | "text/plain": [
621 | ""
622 | ]
623 | },
624 | "metadata": {},
625 | "output_type": "display_data"
626 | }
627 | ],
628 | "source": [
629 | "output = wizard_coder.generate(prompt=\"Write this in Java instead.\")\n",
630 | "display(Markdown(output))"
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": 6,
636 | "metadata": {
637 | "colab": {
638 | "base_uri": "https://localhost:8080/"
639 | },
640 | "id": "PWnD4C6dI6-X",
641 | "outputId": "286a84e8-2507-4fbd-c765-45eef888ee3e"
642 | },
643 | "outputs": [
644 | {
645 | "name": "stdout",
646 | "output_type": "stream",
647 | "text": [
648 | "Here's the Java code to determine whether a number is a palindrome:\r\n",
649 | "\r\n",
650 | "```java\r\n",
651 | "import java.util.Scanner;\r\n",
652 | "\r\n",
653 | "public class Palindrome {\r\n",
654 | " public static void main(String[] args) {\r\n",
655 | " Scanner sc = new Scanner(System.in);\r\n",
656 | " System.out.print(\"Enter a number: \");\r\n",
657 | " int num = sc.nextInt();\r\n",
658 | "\r\n",
659 | " // convert the number to a string to check if it's a palindrome\r\n",
660 | " String numStr = Integer.toString(num);\r\n",
661 | "\r\n",
662 | " // reverse the string and compare it with the original string\r\n",
663 | " if (numStr.equals(new StringBuilder(numStr).reverse().toString())) {\r\n",
664 | " System.out.println(num + \" is a palindrome\");\r\n",
665 | " } else {\r\n",
666 | " System.out.println(num + \" is not a palindrome\");\r\n",
667 | " }\r\n",
668 | " }\r\n",
669 | "}\r\n",
670 | "```\r\n",
671 | "\r\n",
672 | "In this code, we first use the `Scanner` class to take input from the user. We then convert the number to a string using the `Integer.toString()` method and store it in the variable `numStr`.\r\n",
673 | "\r\n",
674 | "We then use the `StringBuilder` class to reverse the string and compare it with the original string. If they are the same, then the number is a palindrome. Otherwise, it's not.\r\n",
675 | "\r\n",
676 | "Note that we use the `new StringBuilder(numStr).reverse().toString()` syntax to reverse the string. This is a shorthand for creating a new `StringBuilder` object with the `numStr` string, reversing it, and then converting it back to a string using the `toString()` method.\n"
677 | ]
678 | }
679 | ],
680 | "source": [
681 | "print(output)"
682 | ]
683 | }
684 | ],
685 | "metadata": {
686 | "accelerator": "GPU",
687 | "colab": {
688 | "gpuType": "T4",
689 | "provenance": []
690 | },
691 | "kernelspec": {
692 | "display_name": "Python 3",
693 | "name": "python3"
694 | },
695 | "language_info": {
696 | "name": "python"
697 | }
698 | },
699 | "nbformat": 4,
700 | "nbformat_minor": 0
701 | }
702 |
--------------------------------------------------------------------------------
/mlc-llm/models/demo_gemma.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "uLLHBhZ_KVqE"
7 | },
8 | "source": [
9 | "# Demo: Gemma with MLC LLM\n",
10 | "\n",
11 | "Google recently release Gemma: https://blog.google/technology/developers/gemma-open-models/.\n",
12 | "\n",
13 | "This notebook demonstrates how to use the model with MLC LLM: https://llm.mlc.ai/.\n",
14 | "\n",
15 | "For the easiest setup, we recommend trying this out in a Google Colab notebook. Click the button below to get started!\n",
16 | "\n",
17 | "\n",
18 | "
\n",
19 | ""
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "id": "Vu8opC0QMOZf"
26 | },
27 | "source": [
28 | "## Environment Setup\n",
29 | "\n",
30 | "Let's set up your environment, so you can successfully run the `ChatModule`. First, let's set up the Conda environment which we will be running this notebook in (not required if running in Google Colab).\n",
31 | "\n",
32 | "```bash\n",
33 | "conda create --name mlc-llm python=3.11\n",
34 | "conda activate mlc-llm\n",
35 | "```\n",
36 | "\n",
37 | "**Google Colab:** If you are running this in a Google Colab notebook, be sure to change your runtime to GPU by going to Runtime > Change runtime type and setting the Hardware accelerator to be \"GPU\". Select \"Connect\" on the top right to instantiate your GPU session.\n",
38 | "\n",
39 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the version number."
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 1,
45 | "metadata": {
46 | "colab": {
47 | "base_uri": "https://localhost:8080/"
48 | },
49 | "id": "o7vvnPntdgun",
50 | "outputId": "fb05a739-0a5a-4447-b21a-bf21d5cfb537"
51 | },
52 | "outputs": [
53 | {
54 | "name": "stdout",
55 | "output_type": "stream",
56 | "text": [
57 | "Fri Feb 23 18:19:58 2024 \n",
58 | "+---------------------------------------------------------------------------------------+\n",
59 | "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n",
60 | "|-----------------------------------------+----------------------+----------------------+\n",
61 | "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
62 | "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
63 | "| | | MIG M. |\n",
64 | "|=========================================+======================+======================|\n",
65 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
66 | "| N/A 46C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n",
67 | "| | | N/A |\n",
68 | "+-----------------------------------------+----------------------+----------------------+\n",
69 | " \n",
70 | "+---------------------------------------------------------------------------------------+\n",
71 | "| Processes: |\n",
72 | "| GPU GI CI PID Type Process name GPU Memory |\n",
73 | "| ID ID Usage |\n",
74 | "|=======================================================================================|\n",
75 | "| No running processes found |\n",
76 | "+---------------------------------------------------------------------------------------+\n"
77 | ]
78 | }
79 | ],
80 | "source": [
81 | "!nvidia-smi"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {
87 | "id": "qZfQBQExMV-f"
88 | },
89 | "source": [
90 | "Next, let's download the MLC-AI and mlc-llm nightly build packages. Go to https://mlc.ai/package/ and replace the command below with the one that is appropriate for your hardware and OS."
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {
97 | "id": "yiPuqenodnB8"
98 | },
99 | "outputs": [],
100 | "source": [
101 | "!pip install --pre mlc-ai-nightly-cu122 mlc-llm-nightly-cu122 -f https://mlc.ai/wheels"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {
107 | "id": "qtRRsOPHM3SE"
108 | },
109 | "source": [
110 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, simply restart session, and run the next cell after restart.\n",
111 | "\n",
112 | "Let's confirm we have installed the packages successfully!"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {
119 | "colab": {
120 | "base_uri": "https://localhost:8080/"
121 | },
122 | "id": "ktNZi8B6M4Md",
123 | "outputId": "908711b7-eaa8-4a1a-88a8-147cb32c58c9"
124 | },
125 | "outputs": [],
126 | "source": [
127 | "!python -c \"import tvm; print('tvm installed properly!')\"\n",
128 | "!python -c \"import mlc_llm; print('mlc_llm installed properly!')\""
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {
134 | "id": "EIEtjOVvM9LJ"
135 | },
136 | "source": [
137 | "## Running Gemma with MLC-LLM\n",
138 | "\n",
139 | "Then we can clone gemma weights converted to MLC format from huggingface.\n",
140 | "\n",
141 | "This is the only thing you need. Afterwards, our JIT (just-in-time) compilation will take care of everything for you!\n",
142 | "\n",
143 | "First time running may require more time as we need to compile the model. But afterwards we cache it to `/pathto/.cache/mlc_llm/`, so future runs are faster.\n",
144 | "\n",
145 | "Alternatively, you could also use the following\n",
146 | "\n",
147 | "```python\n",
148 | "!python -m mlc_llm compile gemma-7b-it-q4f16_2-MLC -o gemma-7b-it-q4f16_2-q4f16_2-cuda.so\n",
149 | "\n",
150 | "cm = ChatModule(\"./gemma-7b-it-q4f16_2-MLC\", model_lib_path=\"gemma-7b-it-q4f16_2-q4f16_2-cuda.so\")\n",
151 | "```"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 4,
157 | "metadata": {
158 | "colab": {
159 | "base_uri": "https://localhost:8080/"
160 | },
161 | "id": "MHsFX5cwNZQN",
162 | "outputId": "e5ab61f0-37b6-46a8-9ff5-d5782be6495f"
163 | },
164 | "outputs": [
165 | {
166 | "name": "stdout",
167 | "output_type": "stream",
168 | "text": [
169 | "Git LFS initialized.\n"
170 | ]
171 | }
172 | ],
173 | "source": [
174 | "!git lfs install"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 5,
180 | "metadata": {
181 | "colab": {
182 | "base_uri": "https://localhost:8080/"
183 | },
184 | "id": "isA1NfGFNadt",
185 | "outputId": "26b1a8a6-bcf4-4b8b-a0eb-42e6816d1773"
186 | },
187 | "outputs": [
188 | {
189 | "name": "stdout",
190 | "output_type": "stream",
191 | "text": [
192 | "Cloning into 'gemma-7b-it-q4f16_2-MLC'...\n",
193 | "remote: Enumerating objects: 113, done.\u001b[K\n",
194 | "remote: Counting objects: 100% (110/110), done.\u001b[K\n",
195 | "remote: Compressing objects: 100% (110/110), done.\u001b[K\n",
196 | "remote: Total 113 (delta 0), reused 0 (delta 0), pack-reused 3\u001b[K\n",
197 | "Receiving objects: 100% (113/113), 33.40 KiB | 6.68 MiB/s, done.\n",
198 | "Filtering content: 100% (103/103), 5.54 GiB | 62.53 MiB/s, done.\n"
199 | ]
200 | }
201 | ],
202 | "source": [
203 | "# This is gemma 7b with 4-bit quantization\n",
204 | "# Any other quantizations/models have the same steps: https://huggingface.co/mlc-ai\n",
205 | "!git clone https://huggingface.co/mlc-ai/gemma-7b-it-q4f16_2-MLC"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 6,
211 | "metadata": {
212 | "id": "MbxdMhcgfGvk"
213 | },
214 | "outputs": [],
215 | "source": [
216 | "from mlc_llm import ChatModule\n",
217 | "from mlc_llm.callback import StreamToStdout"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 7,
223 | "metadata": {
224 | "id": "QAjm3lTJmsiy"
225 | },
226 | "outputs": [],
227 | "source": [
228 | "cm = ChatModule(\"./gemma-7b-it-q4f16_2-MLC\")"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 8,
234 | "metadata": {
235 | "colab": {
236 | "base_uri": "https://localhost:8080/"
237 | },
238 | "id": "BEHtDLG9nTx1",
239 | "outputId": "1c7a3037-afd5-406e-ca98-6282a14b2719"
240 | },
241 | "outputs": [
242 | {
243 | "name": "stdout",
244 | "output_type": "stream",
245 | "text": [
246 | "Sure, here's a quick overview of five states in the US:\n",
247 | "\n",
248 | "**1. California:**\n",
249 | "- Capital: Sacramento\n",
250 | "- Largest city: Los Angeles\n",
251 | "- Known for: Golden Gate Bridge, Hollywood, Silicon Valley, and its diverse population.\n",
252 | "\n",
253 | "**2. New York:**\n",
254 | "- Capital: Albany\n",
255 | "- Largest city: New York City\n",
256 | "- Known for: Empire State Building, Times Square, Niagara Falls, and its rich history.\n",
257 | "\n",
258 | "**3. Texas:**\n",
259 | "- Capital: Austin\n",
260 | "- Largest city: Dallas\n",
261 | "- Known for: Its large size, diverse culture, and its strong economy.\n",
262 | "\n",
263 | "**4. Florida:**\n",
264 | "- Capital: Tallahassee\n",
265 | "- Largest city: Jacksonville\n",
266 | "- Known for: Its beautiful beaches, warm climate, and its history as a major naval power.\n",
267 | "\n",
268 | "**5. Alaska:**\n",
269 | "- Capital: Juneau\n",
270 | "- Largest city: Anchorage\n",
271 | "- Known for: Its breathtaking natural beauty, including towering mountains, glaciers, and fjords.\n"
272 | ]
273 | }
274 | ],
275 | "source": [
276 | "output = cm.generate(\n",
277 | " prompt=\"Tell me about 5 states in the US\",\n",
278 | " progress_callback=StreamToStdout(callback_interval=2),\n",
279 | ")"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 9,
285 | "metadata": {
286 | "colab": {
287 | "base_uri": "https://localhost:8080/"
288 | },
289 | "id": "ElwvKxHSQZe-",
290 | "outputId": "0709daf6-623d-42dd-c790-9b330afc035e"
291 | },
292 | "outputs": [
293 | {
294 | "name": "stdout",
295 | "output_type": "stream",
296 | "text": [
297 | "**Sure, here are two more states:**\n",
298 | "\n",
299 | "**6. Nevada:**\n",
300 | "- Capital: Carson City\n",
301 | "- Largest city: Las Vegas\n",
302 | "- Known for: Its casinos, its desert landscapes, and its history as a frontier town.\n",
303 | "\n",
304 | "**7. Idaho:**\n",
305 | "- Capital: Boise\n",
306 | "- Largest city: Boise\n",
307 | "- Known for: Its scenic mountains, its salmon fishing, and its rich Native American heritage.\n"
308 | ]
309 | }
310 | ],
311 | "source": [
312 | "output = cm.generate(\n",
313 | " prompt=\"Two more please\",\n",
314 | " progress_callback=StreamToStdout(callback_interval=2),\n",
315 | ")"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {
322 | "id": "hvbzb39ZrAVO"
323 | },
324 | "outputs": [],
325 | "source": [
326 | "cm.reset_chat()"
327 | ]
328 | }
329 | ],
330 | "metadata": {
331 | "accelerator": "GPU",
332 | "colab": {
333 | "gpuType": "T4",
334 | "provenance": []
335 | },
336 | "kernelspec": {
337 | "display_name": "Python 3",
338 | "name": "python3"
339 | },
340 | "language_info": {
341 | "name": "python",
342 | "version": "3.11.6"
343 | }
344 | },
345 | "nbformat": 4,
346 | "nbformat_minor": 0
347 | }
348 |
--------------------------------------------------------------------------------
/mlc-llm/tutorial_chat_module_getting_started.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "Cm85Ap3zDmYB"
7 | },
8 | "source": [
9 | "# Getting Started with MLC-LLM using the Llama 2 Model\n",
10 | "\n",
11 | "Here's a quick overview of how to get started with the MLC-LLM `ChatModule` in Python. In this tutorial, we will chat with the [Llama2](https://ai.meta.com/llama/) model. For the easiest setup, we recommend trying this out in a Google Colab notebook. Click the button below to get started!\n",
12 | "\n",
13 | "\n",
14 | "
\n",
15 | ""
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "1ttPt-hNDmYC"
22 | },
23 | "source": [
24 | "## Environment Setup\n",
25 | "\n",
26 | "Let's set up your environment, so you can successfully run the `ChatModule`. First, let's set up the Conda environment which we will be running this notebook in (not required if running in Google Colab).\n",
27 | "\n",
28 | "```bash\n",
29 | "conda create --name mlc-llm python=3.10\n",
30 | "conda activate mlc-llm\n",
31 | "```\n",
32 | "\n",
33 | "**Google Colab:** If you are running this in a Google Colab notebook, be sure to change your runtime to GPU by going to Runtime > Change runtime type and setting the Hardware accelerator to be \"GPU\". Select \"Connect\" on the top right to instantiate your GPU session.\n",
34 | "\n",
35 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the version number."
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "id": "KK25HZsIDmYC"
43 | },
44 | "outputs": [],
45 | "source": [
46 | "!nvidia-smi"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "id": "EWOtpjJMDmYE"
53 | },
54 | "source": [
55 | "Next, let's download the MLC-AI and mlc-llm nightly build packages. Go to https://mlc.ai/package/ and replace the command below with the one that is appropriate for your hardware and OS.\n",
56 | "\n",
57 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, we can disregard them, the notebook will still run correctly."
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "id": "PgW-5OAADmYE"
65 | },
66 | "outputs": [],
67 | "source": [
68 | "!pip install --pre --force-reinstall mlc-ai-nightly-cu118 mlc-llm-nightly-cu118 -f https://mlc.ai/wheels"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {
74 | "id": "FwsWd1WbDmYE"
75 | },
76 | "source": [
77 | "Next, let's download the model weights for the Llama2 model and the prebuilt model libraries from Github. In order to download the large weights, we'll have to use `git lfs`."
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {
83 | "id": "ppvAhErV3gjq"
84 | },
85 | "source": [
86 | "Note: If you are NOT running in **Google Colab** you may need to run this line `!conda install git git-lfs` to install `git` and `git-lfs` before running the following cell to fully install `git lfs`."
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {
93 | "id": "V0GjINnMDmYF"
94 | },
95 | "outputs": [],
96 | "source": [
97 | "!git lfs install"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {
103 | "id": "yYwjsCOK7Jij"
104 | },
105 | "source": [
106 | "These commands will download many prebuilt libraries as well as the chat configuration for Llama-2-7b that `mlc_llm` needs, which may take a long time. If in **Google Colab** you can verify that the files are being downloaded by clicking on the folder icon on the left and navigating to the `dist` and then `prebuilt` folders which should be updating as the files are being downloaded."
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "id": "FSAe7Ew_DmYF"
114 | },
115 | "outputs": [],
116 | "source": [
117 | "!mkdir -p dist\n",
118 | "!git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt_libs"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {
125 | "id": "BDbi6H3MDmYF"
126 | },
127 | "outputs": [],
128 | "source": [
129 | "!cd dist && git clone https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "# Need to restart runtime since notebooks cannot find the module right after installing\n",
139 | "# Simply run this cell, then run the next cells after runtime finishes restarting\n",
140 | "exit()"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {
146 | "id": "76Ru5__tDmYF"
147 | },
148 | "source": [
149 | "## Let's Chat!\n",
150 | "\n",
151 | "Before we can chat with the model, we must first import a library and instantiate a `ChatModule` instance. The `ChatModule` must be initialized with the appropriate model name."
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {
158 | "id": "AJAt6oW7DmYF"
159 | },
160 | "outputs": [],
161 | "source": [
162 | "from mlc_llm import ChatModule\n",
163 | "from mlc_llm.callback import StreamToStdout\n",
164 | "\n",
165 | "cm = ChatModule(\n",
166 | " model=\"dist/Llama-2-7b-chat-hf-q4f16_1-MLC\",\n",
167 | " model_lib_path=\"dist/prebuilt_libs/Llama-2-7b-chat-hf/Llama-2-7b-chat-hf-q4f16_1-cuda.so\"\n",
168 | ")"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {
174 | "id": "c9m5sxyXDmYF"
175 | },
176 | "source": [
177 | "For other platforms/backends, change the file in `model_lib_path` to:\n",
178 | "\n",
179 | "- Vulkan on Linux: `Llama-2-7b-chat-hf-q4f16_1-vulkan.so`\n",
180 | "- Metal on macOS: `Llama-2-7b-chat-hf-q4f16_1-metal.so`\n",
181 | "- Other platforms: `Llama-2-7b-chat-hf-q4f16_1-{backend}.{suffix}`"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {
187 | "id": "zEaVXnnJDmYF"
188 | },
189 | "source": [
190 | "That is all what needed to set up the `ChatModule`. You can now chat with the model by entering any prompt you'd like. Try it out below!"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {
197 | "id": "TNmg9N_NDmYF"
198 | },
199 | "outputs": [],
200 | "source": [
201 | "output = cm.generate(\n",
202 | " prompt=\"When was Python released?\",\n",
203 | " progress_callback=StreamToStdout(callback_interval=2),\n",
204 | ")"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "You can also repeat running the code block below for multiple rounds to interact with the model in a chat style."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "prompt = input(\"Prompt: \")\n",
221 | "output = cm.generate(prompt=prompt, progress_callback=StreamToStdout(callback_interval=2))"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": null,
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "output = cm.generate(\n",
231 | " prompt=\"Please summarize your response in three sentences.\",\n",
232 | " progress_callback=StreamToStdout(callback_interval=2),\n",
233 | ")"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {
239 | "id": "I4bOyUk7DmYF"
240 | },
241 | "source": [
242 | "To check the generation speed of the chat bot, you can print the statistics."
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "id": "PPbPj6vpDmYF"
250 | },
251 | "outputs": [],
252 | "source": [
253 | "print(cm.stats())"
254 | ]
255 | },
256 | {
257 | "cell_type": "markdown",
258 | "metadata": {
259 | "id": "XAb-XZPnDmYF"
260 | },
261 | "source": [
262 | "By default, the `ChatModule` will keep a history of your chat. You can reset the chat history by running the following."
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {
269 | "id": "iKpKgVxNDmYF"
270 | },
271 | "outputs": [],
272 | "source": [
273 | "cm.reset_chat()"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "metadata": {},
279 | "source": [
280 | "### Benchmark Performance\n",
281 | "\n",
282 | "To benchmark the performance, we can use the `benchmark_generate` method of ChatModule. It takes an input prompt and the number of tokens to generate, ignores the system prompt and model stop criterion, generates tokens in a language model way and stops until finishing generating the desired number of tokens. After calling `benchmark_generate`, we can use `stats` to check the performance."
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": null,
288 | "metadata": {},
289 | "outputs": [],
290 | "source": [
291 | "print(cm.benchmark_generate(prompt=\"What is benchmark?\", generate_length=512))\n",
292 | "cm.stats()"
293 | ]
294 | }
295 | ],
296 | "metadata": {
297 | "accelerator": "GPU",
298 | "colab": {
299 | "gpuType": "T4",
300 | "provenance": []
301 | },
302 | "kernelspec": {
303 | "display_name": "Python 3",
304 | "name": "python3"
305 | },
306 | "language_info": {
307 | "codemirror_mode": {
308 | "name": "ipython",
309 | "version": 3
310 | },
311 | "file_extension": ".py",
312 | "mimetype": "text/x-python",
313 | "name": "python",
314 | "nbconvert_exporter": "python",
315 | "pygments_lexer": "ipython3",
316 | "version": "3.10.8"
317 | },
318 | "orig_nbformat": 4
319 | },
320 | "nbformat": 4,
321 | "nbformat_minor": 0
322 | }
323 |
--------------------------------------------------------------------------------
/mlc-llm/tutorial_mlc_xgrammar_structured_generation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "4IwhlCjVtpYj"
7 | },
8 | "source": [
9 | "# MLC-LLM Structured Generation with XGrammar\n",
10 | "\n",
11 | "Here's a quick overview of how to generate structured text with XGrammar in MLC LLM in Python.\n",
12 | "In this tutorial, we will be chatting with the Llama3.2 model.\n",
13 | "For the easiest setup, we recommend trying this out in a Google Colab notebook. Click the button below to get started!\n",
14 | "\n",
15 | "Structured generation of LLMs greatly improves the abilities of LLMs,\n",
16 | "going beyond the basic chat or plain text generation.\n",
17 | "With controllable structured generation, LLMs become able to serve as standard tools and can be better integrated into other applications in production.\n",
18 | "MLCEngine offers state-of-the-art structured generation with XGrammar integration.\n",
19 | "Importantly, the structured generation support is built into the engine, which means it can be used across all the API platforms that MLCEngine supports.\n",
20 | "\n",
21 | "Learn more about\n",
22 | "* MLC LLM: https://mlc.ai/mlc-llm/docs.\n",
23 | "* XGrammar: https://xgrammar.mlc.ai/docs"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "YsvAL7SSt9Lo"
30 | },
31 | "source": [
32 | "Click the button below to get started!\n",
33 | "\n",
34 | "\n",
35 | "
\n",
36 | ""
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "8kkADAMCCLi-"
43 | },
44 | "source": [
45 | "## Install MLC LLM"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {
51 | "id": "Y2EwuS6TCO61"
52 | },
53 | "source": [
54 | "We will start from setting up the environment. First, let us create a new Conda environment, in which we will run the rest of the notebook.\n",
55 | "\n",
56 | "```\n",
57 | "conda create --name mlc-llm python=3.11\n",
58 | "conda activate mlc-llm\n",
59 | "```"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {
65 | "id": "ojEeEmsqCTPG"
66 | },
67 | "source": [
68 | "**Google Colab**\n",
69 | "\n",
70 | "- If you are running this in a Google Colab notebook, you would not need to create a conda environment.\n",
71 | "- However, be sure to change your runtime to GPU by going to `Runtime` > `Change runtime type` and setting the Hardware accelerator to be \"GPU\"."
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {
77 | "id": "S_rX53bGChPn"
78 | },
79 | "source": [
80 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the driver version number as well as what GPUs are currently available for use."
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 1,
86 | "metadata": {
87 | "id": "CRPeCflbCij6",
88 | "colab": {
89 | "base_uri": "https://localhost:8080/"
90 | },
91 | "outputId": "e661ee68-a30c-4800-fc30-fef1d85d557d"
92 | },
93 | "outputs": [
94 | {
95 | "output_type": "stream",
96 | "name": "stdout",
97 | "text": [
98 | "Fri Nov 22 01:44:26 2024 \n",
99 | "+---------------------------------------------------------------------------------------+\n",
100 | "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n",
101 | "|-----------------------------------------+----------------------+----------------------+\n",
102 | "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
103 | "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
104 | "| | | MIG M. |\n",
105 | "|=========================================+======================+======================|\n",
106 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
107 | "| N/A 61C P8 10W / 70W | 0MiB / 15360MiB | 0% Default |\n",
108 | "| | | N/A |\n",
109 | "+-----------------------------------------+----------------------+----------------------+\n",
110 | " \n",
111 | "+---------------------------------------------------------------------------------------+\n",
112 | "| Processes: |\n",
113 | "| GPU GI CI PID Type Process name GPU Memory |\n",
114 | "| ID ID Usage |\n",
115 | "|=======================================================================================|\n",
116 | "| No running processes found |\n",
117 | "+---------------------------------------------------------------------------------------+\n"
118 | ]
119 | }
120 | ],
121 | "source": [
122 | "!nvidia-smi"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {
128 | "id": "PQfVfTAYC1M-"
129 | },
130 | "source": [
131 | "Next, let's download the MLC-AI and mlc-llm nightly build packages. If you are running in a Colab environment, then you can just run the following command. Otherwise, go to https://llm.mlc.ai/docs/install/mlc_llm.html and replace the command below with the one that is appropriate for your hardware and OS."
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {
137 | "id": "vi-udt4tC5c9"
138 | },
139 | "source": [
140 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, we can disregard them, the notebook will still run correctly."
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 2,
146 | "metadata": {
147 | "id": "ah9tYaCRCkKS",
148 | "colab": {
149 | "base_uri": "https://localhost:8080/"
150 | },
151 | "outputId": "5556bc7a-5dd2-4ecd-d0d9-b008cccfe8a1"
152 | },
153 | "outputs": [
154 | {
155 | "output_type": "stream",
156 | "name": "stdout",
157 | "text": [
158 | "Looking in links: https://mlc.ai/wheels\n",
159 | "Collecting mlc-ai-nightly-cu123\n",
160 | " Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_ai_nightly_cu123-0.18.dev249-cp310-cp310-manylinux_2_28_x86_64.whl (1026.5 MB)\n",
161 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 GB\u001b[0m \u001b[31m987.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
162 | "\u001b[?25hCollecting mlc-llm-nightly-cu123\n",
163 | " Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_llm_nightly_cu123-0.18.dev71-cp310-cp310-manylinux_2_28_x86_64.whl (177.1 MB)\n",
164 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.1/177.1 MB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
165 | "\u001b[?25hRequirement already satisfied: attrs in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (24.2.0)\n",
166 | "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (3.1.0)\n",
167 | "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (4.4.2)\n",
168 | "Requirement already satisfied: ml-dtypes in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (0.4.1)\n",
169 | "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (1.26.4)\n",
170 | "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (24.2)\n",
171 | "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (5.9.5)\n",
172 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (1.13.1)\n",
173 | "Requirement already satisfied: tornado in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (6.3.3)\n",
174 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from mlc-ai-nightly-cu123) (4.12.2)\n",
175 | "Collecting fastapi (from mlc-llm-nightly-cu123)\n",
176 | " Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)\n",
177 | "Collecting uvicorn (from mlc-llm-nightly-cu123)\n",
178 | " Downloading uvicorn-0.32.1-py3-none-any.whl.metadata (6.6 kB)\n",
179 | "Collecting shortuuid (from mlc-llm-nightly-cu123)\n",
180 | " Downloading shortuuid-1.0.13-py3-none-any.whl.metadata (5.8 kB)\n",
181 | "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (2.5.1+cu121)\n",
182 | "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (0.4.5)\n",
183 | "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (2.32.3)\n",
184 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (4.66.6)\n",
185 | "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (0.2.0)\n",
186 | "Collecting tiktoken (from mlc-llm-nightly-cu123)\n",
187 | " Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
188 | "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (3.0.48)\n",
189 | "Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (1.54.4)\n",
190 | "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (4.46.2)\n",
191 | "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from mlc-llm-nightly-cu123) (2.2.2)\n",
192 | "Collecting datasets (from mlc-llm-nightly-cu123)\n",
193 | " Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
194 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets->mlc-llm-nightly-cu123) (3.16.1)\n",
195 | "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->mlc-llm-nightly-cu123) (17.0.0)\n",
196 | "Collecting dill<0.3.9,>=0.3.0 (from datasets->mlc-llm-nightly-cu123)\n",
197 | " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
198 | "Collecting xxhash (from datasets->mlc-llm-nightly-cu123)\n",
199 | " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
200 | "Collecting multiprocess<0.70.17 (from datasets->mlc-llm-nightly-cu123)\n",
201 | " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
202 | "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets->mlc-llm-nightly-cu123)\n",
203 | " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
204 | "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->mlc-llm-nightly-cu123) (3.11.2)\n",
205 | "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets->mlc-llm-nightly-cu123) (0.26.2)\n",
206 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets->mlc-llm-nightly-cu123) (6.0.2)\n",
207 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->mlc-llm-nightly-cu123) (3.4.0)\n",
208 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->mlc-llm-nightly-cu123) (3.10)\n",
209 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->mlc-llm-nightly-cu123) (2.2.3)\n",
210 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->mlc-llm-nightly-cu123) (2024.8.30)\n",
211 | "Collecting starlette<0.42.0,>=0.40.0 (from fastapi->mlc-llm-nightly-cu123)\n",
212 | " Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
213 | "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from fastapi->mlc-llm-nightly-cu123) (2.9.2)\n",
214 | "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai->mlc-llm-nightly-cu123) (3.7.1)\n",
215 | "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai->mlc-llm-nightly-cu123) (1.9.0)\n",
216 | "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai->mlc-llm-nightly-cu123) (0.27.2)\n",
217 | "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai->mlc-llm-nightly-cu123) (0.7.1)\n",
218 | "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai->mlc-llm-nightly-cu123) (1.3.1)\n",
219 | "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->mlc-llm-nightly-cu123) (2.8.2)\n",
220 | "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->mlc-llm-nightly-cu123) (2024.2)\n",
221 | "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->mlc-llm-nightly-cu123) (2024.2)\n",
222 | "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->mlc-llm-nightly-cu123) (0.2.13)\n",
223 | "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->mlc-llm-nightly-cu123) (2024.9.11)\n",
224 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->mlc-llm-nightly-cu123) (3.4.2)\n",
225 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->mlc-llm-nightly-cu123) (3.1.4)\n",
226 | "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch->mlc-llm-nightly-cu123) (1.13.1)\n",
227 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch->mlc-llm-nightly-cu123) (1.3.0)\n",
228 | "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers->mlc-llm-nightly-cu123) (0.20.3)\n",
229 | "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->mlc-llm-nightly-cu123) (8.1.7)\n",
230 | "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn->mlc-llm-nightly-cu123) (0.14.0)\n",
231 | "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai->mlc-llm-nightly-cu123) (1.2.2)\n",
232 | "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (2.4.3)\n",
233 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (1.3.1)\n",
234 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (1.5.0)\n",
235 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (6.1.0)\n",
236 | "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (0.2.0)\n",
237 | "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (1.17.2)\n",
238 | "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->mlc-llm-nightly-cu123) (4.0.3)\n",
239 | "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai->mlc-llm-nightly-cu123) (1.0.7)\n",
240 | "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi->mlc-llm-nightly-cu123) (0.7.0)\n",
241 | "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi->mlc-llm-nightly-cu123) (2.23.4)\n",
242 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->mlc-llm-nightly-cu123) (1.16.0)\n",
243 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->mlc-llm-nightly-cu123) (3.0.2)\n",
244 | "Downloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
245 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
246 | "\u001b[?25hDownloading fastapi-0.115.5-py3-none-any.whl (94 kB)\n",
247 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.9/94.9 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
248 | "\u001b[?25hDownloading shortuuid-1.0.13-py3-none-any.whl (10 kB)\n",
249 | "Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
250 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
251 | "\u001b[?25hDownloading uvicorn-0.32.1-py3-none-any.whl (63 kB)\n",
252 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.8/63.8 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
253 | "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
254 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
255 | "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
256 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
257 | "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
258 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
259 | "\u001b[?25hDownloading starlette-0.41.3-py3-none-any.whl (73 kB)\n",
260 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.2/73.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
261 | "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
262 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
263 | "\u001b[?25hInstalling collected packages: xxhash, uvicorn, shortuuid, fsspec, dill, tiktoken, starlette, multiprocess, mlc-ai-nightly-cu123, fastapi, datasets, mlc-llm-nightly-cu123\n",
264 | " Attempting uninstall: fsspec\n",
265 | " Found existing installation: fsspec 2024.10.0\n",
266 | " Uninstalling fsspec-2024.10.0:\n",
267 | " Successfully uninstalled fsspec-2024.10.0\n",
268 | "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
269 | "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
270 | "\u001b[0mSuccessfully installed datasets-3.1.0 dill-0.3.8 fastapi-0.115.5 fsspec-2024.9.0 mlc-ai-nightly-cu123-0.18.dev249 mlc-llm-nightly-cu123-0.18.dev71 multiprocess-0.70.16 shortuuid-1.0.13 starlette-0.41.3 tiktoken-0.8.0 uvicorn-0.32.1 xxhash-3.5.0\n"
271 | ]
272 | }
273 | ],
274 | "source": [
275 | "!pip install --pre mlc-ai-nightly-cu123 mlc-llm-nightly-cu123 -f https://mlc.ai/wheels"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {
281 | "id": "nZGVNJE-DJ9E"
282 | },
283 | "source": [
284 | "Let's confirm we have installed the packages successfully!"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": 3,
290 | "metadata": {
291 | "id": "5Y6LszJgC7SQ",
292 | "colab": {
293 | "base_uri": "https://localhost:8080/"
294 | },
295 | "outputId": "8bb25fb7-e9d2-4fbf-d0eb-a09a41071cdc"
296 | },
297 | "outputs": [
298 | {
299 | "output_type": "stream",
300 | "name": "stdout",
301 | "text": [
302 | "tvm installed properly!\n",
303 | "mlc_llm installed properly!\n"
304 | ]
305 | }
306 | ],
307 | "source": [
308 | "!python -c \"import tvm; print('tvm installed properly!')\"\n",
309 | "!python -c \"import mlc_llm; print('mlc_llm installed properly!')\""
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "metadata": {
315 | "id": "lGfnrRa9DMw1"
316 | },
317 | "source": [
318 | "## General JSON Text Generation"
319 | ]
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {
324 | "id": "pVYkLb0eDjMi"
325 | },
326 | "source": [
327 | "MLC LLM supports two levels of structured generation mode: general JSON response and schema customization. The general JSON mode constrains the response to conform to JSON grammar. To use the general JSON mode, pass argument `response_format={\"type\": \"json_object\"}` to chat completion. Below is a request example with JSON mode:\n"
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "metadata": {
333 | "id": "Pg7daEvlD5UB"
334 | },
335 | "source": [
336 | "Note: If you are NOT running in **Google Colab** you may need to run this line `!conda install git git-lfs` to install `git` and `git-lfs` before running the following cell."
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 4,
342 | "metadata": {
343 | "id": "FDFbw1KPDLu1",
344 | "colab": {
345 | "base_uri": "https://localhost:8080/"
346 | },
347 | "outputId": "e50809ec-89ec-46b8-c761-69d1d040fd9c"
348 | },
349 | "outputs": [
350 | {
351 | "output_type": "stream",
352 | "name": "stdout",
353 | "text": [
354 | "Git LFS initialized.\n"
355 | ]
356 | }
357 | ],
358 | "source": [
359 | "!git lfs install"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 5,
365 | "metadata": {
366 | "id": "bYqaVjmND7Px",
367 | "colab": {
368 | "base_uri": "https://localhost:8080/"
369 | },
370 | "outputId": "46a18ae6-fa5b-45ef-ac6a-23246855de31"
371 | },
372 | "outputs": [
373 | {
374 | "output_type": "stream",
375 | "name": "stderr",
376 | "text": [
377 | "0it [00:00, ?it/s]\n",
378 | "100%|██████████| 58/58 [00:18<00:00, 3.11it/s]\n"
379 | ]
380 | },
381 | {
382 | "output_type": "stream",
383 | "name": "stdout",
384 | "text": [
385 | "{\"places\": [\n",
386 | " {\n",
387 | " \"name\": \"Grand Canyon\",\n",
388 | " \"location\": \"Arizona\",\n",
389 | " \"description\": \"One of the most iconic natural wonders in the United States, the Grand Canyon is a breathtaking example of erosion and geological history.\"\n",
390 | " },\n",
391 | " {\n",
392 | " \"name\": \"Statue of Liberty\",\n",
393 | " \"location\": \"New York/New Jersey\",\n",
394 | " \"description\": \"A symbol of freedom and democracy, the Statue of Liberty is a must-see attraction on Liberty Island in New York Harbor.\"\n",
395 | " },\n",
396 | " {\n",
397 | " \"name\": \"Golden Gate Bridge\",\n",
398 | " \"location\": \"California\",\n",
399 | " \"description\": \"An engineering marvel and iconic symbol of San Francisco, the Golden Gate Bridge is a must-see for its stunning views and rich history.\"\n",
400 | " }\n",
401 | "]}"
402 | ]
403 | }
404 | ],
405 | "source": [
406 | "from mlc_llm import MLCEngine\n",
407 | "\n",
408 | "# Create the MLCEngine. The model will be automatically downloaded.\n",
409 | "model = \"HF://mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC\"\n",
410 | "engine = MLCEngine(model)\n",
411 | "\n",
412 | "# Generate JSON text with MLCEngine, backed by XGrammar.\n",
413 | "prompt = \"List 3 must-see places of interest in United States in JSON.\"\n",
414 | "for chunk in engine.chat.completions.create(\n",
415 | " messages= [{\"role\": \"user\", \"content\": prompt}],\n",
416 | " response_format={\"type\": \"json_object\"},\n",
417 | " stream=True,\n",
418 | "):\n",
419 | " print(chunk.choices[0].delta.content, end=\"\", flush=True)"
420 | ]
421 | },
422 | {
423 | "cell_type": "markdown",
424 | "metadata": {
425 | "id": "SMEavWCJEC_d"
426 | },
427 | "source": [
428 | "## Structured Generation with Schema"
429 | ]
430 | },
431 | {
432 | "cell_type": "markdown",
433 | "source": [
434 | "Additionally, MLCEngine allows for the customization of the response JSON schema for each individual request. When a JSON schema is provided, MLCEngine will generate responses that adhere strictly to that schema. Below is a request example with customized JSON schema:"
435 | ],
436 | "metadata": {
437 | "id": "S1LpZviTgD_m"
438 | }
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": 6,
443 | "metadata": {
444 | "id": "etHEUrfMD8bX",
445 | "colab": {
446 | "base_uri": "https://localhost:8080/"
447 | },
448 | "outputId": "a6f7b918-094c-4f79-d359-6528a912c351"
449 | },
450 | "outputs": [
451 | {
452 | "output_type": "stream",
453 | "name": "stdout",
454 | "text": [
455 | "{\"countries\": [{\"name\": \"Japan\", \"capital\": \"Tokyo\"}, {\"name\": \"Australia\", \"capital\": \"Canberra\"}, {\"name\": \"Brazil\", \"capital\": \"Brasilia\"}]}"
456 | ]
457 | }
458 | ],
459 | "source": [
460 | "import json\n",
461 | "import pydantic\n",
462 | "from typing import List\n",
463 | "\n",
464 | "\n",
465 | "class Country(pydantic.BaseModel):\n",
466 | " name: str\n",
467 | " capital: str\n",
468 | "\n",
469 | "\n",
470 | "class Countries(pydantic.BaseModel):\n",
471 | " countries: List[Country]\n",
472 | "\n",
473 | "\n",
474 | "# Get the JSON schema of \"Countries\"\n",
475 | "schema = json.dumps(Countries.model_json_schema())\n",
476 | "prompt = \"Randomly list three countries and their capitals in JSON.\"\n",
477 | "\n",
478 | "for chunk in engine.chat.completions.create(\n",
479 | " messages= [{\"role\": \"user\", \"content\": prompt}],\n",
480 | " response_format={\"type\": \"json_object\", \"schema\": schema},\n",
481 | " stream=True,\n",
482 | "):\n",
483 | " print(chunk.choices[0].delta.content, end=\"\", flush=True)\n"
484 | ]
485 | }
486 | ],
487 | "metadata": {
488 | "accelerator": "GPU",
489 | "colab": {
490 | "gpuType": "T4",
491 | "provenance": []
492 | },
493 | "kernelspec": {
494 | "display_name": "Python 3",
495 | "name": "python3"
496 | },
497 | "language_info": {
498 | "name": "python"
499 | }
500 | },
501 | "nbformat": 4,
502 | "nbformat_minor": 0
503 | }
--------------------------------------------------------------------------------
/mlc-llm/tutorial_raw_text_generation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "4IwhlCjVtpYj"
7 | },
8 | "source": [
9 | "# MLC-LLM Raw Text Generation in Python\n",
10 | "\n",
11 | "Here's a quick overview of how to perform raw text generation in Python. In this tutorial, we will be chatting with the Llama2 model. For the easiest setup, we recommend trying this out in a Google Colab notebook. Click the button below to get started!\n",
12 | "\n",
13 | "Raw text generation allows the user to have more flexibility over the prompts, without being forced to create a new conversational template, making prompt customization easier. This serves other demands for APIs to handle LLM generation without the usual system prompts and other items.\n",
14 | "\n",
15 | "Learn more about MLC LLM here: https://mlc.ai/mlc-llm/docs."
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "YsvAL7SSt9Lo"
22 | },
23 | "source": [
24 | "Click the button below to get started!\n",
25 | "\n",
26 | "\n",
27 | "
\n",
28 | ""
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "8kkADAMCCLi-"
35 | },
36 | "source": [
37 | "## Install MLC LLM"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "Y2EwuS6TCO61"
44 | },
45 | "source": [
46 | "We will start from setting up the environment. First, let us create a new Conda environment, in which we will run the rest of the notebook.\n",
47 | "\n",
48 | "```\n",
49 | "conda create --name mlc-llm python=3.10\n",
50 | "conda activate mlc-llm\n",
51 | "```"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {
57 | "id": "ojEeEmsqCTPG"
58 | },
59 | "source": [
60 | "**Google Colab**\n",
61 | "\n",
62 | "- If you are running this in a Google Colab notebook, you would not need to create a conda environment.\n",
63 | "- However, be sure to change your runtime to GPU by going to `Runtime` > `Change runtime type` and setting the Hardware accelerator to be \"GPU\"."
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "S_rX53bGChPn"
70 | },
71 | "source": [
72 | "If you are using CUDA, you can run the following command to confirm that CUDA is set up correctly, and check the driver version number as well as what GPUs are currently available for use."
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "id": "CRPeCflbCij6"
80 | },
81 | "outputs": [],
82 | "source": [
83 | "!nvidia-smi"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "PQfVfTAYC1M-"
90 | },
91 | "source": [
92 | "Next, let's download the MLC-AI and mlc-llm nightly build packages. If you are running in a Colab environment, then you can just run the following command. Otherwise, go to https://mlc.ai/package/ and replace the command below with the one that is appropriate for your hardware and OS."
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {
98 | "id": "vi-udt4tC5c9"
99 | },
100 | "source": [
101 | "**Google Colab**: If you are using Colab, you may see the red warnings such as \"You must restart the runtime in order to use newly installed versions.\" For our purpose, we can disregard them, the notebook will still run correctly."
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "id": "ah9tYaCRCkKS"
109 | },
110 | "outputs": [],
111 | "source": [
112 | "!pip install --pre --force-reinstall mlc-ai-nightly-cu118 mlc-llm-nightly-cu118 -f https://mlc.ai/wheels"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {
118 | "id": "nZGVNJE-DJ9E"
119 | },
120 | "source": [
121 | "Let's confirm we have installed the packages successfully!"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": null,
127 | "metadata": {
128 | "id": "5Y6LszJgC7SQ"
129 | },
130 | "outputs": [],
131 | "source": [
132 | "!python -c \"import tvm; print('tvm installed properly!')\"\n",
133 | "!python -c \"import mlc_llm; print('mlc_llm installed properly!')\""
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {
139 | "id": "lGfnrRa9DMw1"
140 | },
141 | "source": [
142 | "## Download Prebuilt Models and Library"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {
148 | "id": "pVYkLb0eDjMi"
149 | },
150 | "source": [
151 | "The following commands will download all the available prebuilt libraries (e.g., `.so` files). This may take a while. If in **Google Colab**, you can verify that the files are being downloaded by clicking on the folder icon on the left."
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {
157 | "id": "Pg7daEvlD5UB"
158 | },
159 | "source": [
160 | "Note: If you are NOT running in **Google Colab** you may need to run this line `!conda install git git-lfs` to install `git` and `git-lfs` before running the following cell."
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": null,
166 | "metadata": {
167 | "id": "FDFbw1KPDLu1"
168 | },
169 | "outputs": [],
170 | "source": [
171 | "!git lfs install"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {
178 | "id": "bYqaVjmND7Px"
179 | },
180 | "outputs": [],
181 | "source": [
182 | "!mkdir -p dist\n",
183 | "!git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt_libs"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {
189 | "id": "SMEavWCJEC_d"
190 | },
191 | "source": [
192 | "#### Llama-2-7b-chat q4f16_1 prebuilt weights"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {
199 | "id": "etHEUrfMD8bX"
200 | },
201 | "outputs": [],
202 | "source": [
203 | "!cd dist && git clone https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 9,
209 | "metadata": {
210 | "id": "dbHdyfIXHNpo"
211 | },
212 | "outputs": [],
213 | "source": [
214 | "# Restart colab\n",
215 | "exit()"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {
221 | "id": "CmpxrrqyE0S6"
222 | },
223 | "source": [
224 | "## Let's try raw text generation with Llama-2-7b-chat!"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 1,
230 | "metadata": {
231 | "id": "VYZtJS_OoCW6"
232 | },
233 | "outputs": [],
234 | "source": [
235 | "from mlc_llm import ChatModule, ChatConfig, ConvConfig\n",
236 | "from mlc_llm.callback import StreamToStdout"
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "dCnYzG1dombI"
243 | },
244 | "source": [
245 | "Use a `ConvConfig` to define the generation settings. Since we will be using the `LM` template, which supports raw text generation, system prompts will not be executed if provided."
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 2,
251 | "metadata": {
252 | "id": "vUn1QHlaoiY8"
253 | },
254 | "outputs": [],
255 | "source": [
256 | "conv_config = ConvConfig(stop_tokens=[2,], add_bos=True, stop_str=\"[INST]\")"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {
262 | "id": "K5460Ca7phM0"
263 | },
264 | "source": [
265 | "Note that `conv_config` is an optional subfield of `chat_config`. The `LM` template serves the basic purposes of raw text generation."
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 3,
271 | "metadata": {
272 | "id": "Yw0vlNEvpclP"
273 | },
274 | "outputs": [],
275 | "source": [
276 | "chat_config = ChatConfig(conv_config=conv_config, conv_template=\"LM\")"
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {
282 | "id": "UshFruMXpu31"
283 | },
284 | "source": [
285 | "Using the `chat_config` we created, instantiate a `ChatModule`."
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 4,
291 | "metadata": {
292 | "id": "6AeKjYybpvMH"
293 | },
294 | "outputs": [],
295 | "source": [
296 | "cm = ChatModule(\n",
297 | " model=\"dist/Llama-2-7b-chat-hf-q4f16_1-MLC\",\n",
298 | " model_lib_path=\"dist/prebuilt_libs/Llama-2-7b-chat-hf/Llama-2-7b-chat-hf-q4f16_1-cuda.so\",\n",
299 | " chat_config=chat_config\n",
300 | ")"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {
306 | "id": "XAbeEqDjqB0T"
307 | },
308 | "source": [
309 | "Let's depict our first prompt. Essentially the LLM will be fed with this exact piece of text, unlike other conversational templates that structure the conversation beforehand to abstract specific settings. However, to make the model follow conversations a chat structure should be provided. Specific tags should be placed, because the model was finetuned with those tags to accurately follow conversations. This allows users to build their own prompts without necessarily building a new template."
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 5,
315 | "metadata": {
316 | "id": "7_Z_w5VUp7HZ"
317 | },
318 | "outputs": [],
319 | "source": [
320 | "system_prompt = \"<>\\nYou are a helpful, respectful and honest assistant.\\n<>\\n\\n\"\n",
321 | "inst_prompt = \"What is mother nature?\""
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {
327 | "id": "kuylQHLQ6ugR"
328 | },
329 | "source": [
330 | "Concatenate system and instruction prompts, and add instruction tags before generation. As you can see, the model will correctly follow the conversation."
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 6,
336 | "metadata": {
337 | "colab": {
338 | "base_uri": "https://localhost:8080/"
339 | },
340 | "id": "NaVcdEXup8NH",
341 | "outputId": "631c2f60-68cc-4a90-ecb2-6fc06eb1b642"
342 | },
343 | "outputs": [
344 | {
345 | "name": "stdout",
346 | "output_type": "stream",
347 | "text": [
348 | "Hello! I'm so glad you asked! Mother Nature is a term used to describe the natural world around us, including all living things and the environment that supports them. It encompasses everything from the tiniest microorganisms to the largest landscapes, and includes all the elements and processes that shape our planet.\n",
349 | "Mother Nature is the source of all life, providing us with the air we breathe, the water we drink, the food we eat, and the beauty we behold. She is the foundation of our very existence, and yet, she is often taken for granted.\n",
350 | "It's important to remember that Mother Nature is not just something we rely on for our survival, but she also provides us with endless opportunities for inspiration, creativity, and joy. From the majestic mountains to the rolling hills, from the sparkling oceans to the babbling brooks, Mother Nature offers us a never-ending array of wonders and marvels.\n",
351 | "So, the next time you take a moment to appreciate the beauty of Mother Nature, remember that you are not just appreciating something beautiful, you are appreciating the very source of your own existence. Take care of her, and she will take care of you.\n"
352 | ]
353 | }
354 | ],
355 | "source": [
356 | "output = cm.generate(\n",
357 | " prompt=f\"[INST] {system_prompt+inst_prompt} [/INST]\",\n",
358 | " progress_callback=StreamToStdout(callback_interval=2),\n",
359 | ")"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {
365 | "id": "CijSHO6K9QqG"
366 | },
367 | "source": [
368 | "Structuring the conversation in this way is equivelent to using the following conversational template in MLC-LLM:\n",
369 | "\n",
370 | "```cpp\n",
371 | "Conversation Llama2() {\n",
372 | " Conversation conv;\n",
373 | " conv.name = \"llama-2\";\n",
374 | " conv.system =\n",
375 | " (\"[INST] <>\\n\\nYou are a helpful, respectful and honest assistant.\\n<>\\n\\n \");\n",
376 | " conv.roles = {\"[INST]\", \"[/INST]\"};\n",
377 | " conv.messages = {};\n",
378 | " conv.offset = 0;\n",
379 | " conv.separator_style = SeparatorStyle::kSepRoleMsg;\n",
380 | " conv.seps = {\" \"};\n",
381 | " conv.role_msg_sep = \" \";\n",
382 | " conv.role_empty_sep = \" \";\n",
383 | " conv.stop_tokens = {2};\n",
384 | " conv.stop_str = \"[INST]\";\n",
385 | " conv.add_bos = true;\n",
386 | " return conv;\n",
387 | "}\n",
388 | "```"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {
394 | "id": "008dtOGy7ZMQ"
395 | },
396 | "source": [
397 | "In following case, since we do not add any tags, the model will just follow normal text completion because there isn't a chat structure.\n",
398 | "\n",
399 | "**Note:** The `LM` template has no memory, so it will be reset every single generation (as if we would run `cm.reset_chat()`)."
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 7,
405 | "metadata": {
406 | "colab": {
407 | "base_uri": "https://localhost:8080/"
408 | },
409 | "id": "3K8X2p7Y61nl",
410 | "outputId": "7dcc570e-9f62-4744-d4b5-23fa003a4307"
411 | },
412 | "outputs": [
413 | {
414 | "name": "stdout",
415 | "output_type": "stream",
416 | "text": [
417 | "living beings from non-living matter. literally, it is characterized by growth, reproduction, metabolism, response to stimuli, and adaptation to their environment. The concept of life has puzzled scientists and philosophers for centuries, and there is no consensus on a definition that encompasses all aspects of life.\n",
418 | "The most commonly used definition of life is the \"chemical definition,\" which states that living things are composed of cells, which are the basic structural and functional units of life. Cells are made up of biomolecules such as DNA, RNA, and proteins, which perform a variety of functions necessary for life, such as metabolism, growth, and reproduction.\n",
419 | "Another definition of life is the \"functional definition,\" which states that living things have the ability to maintain homeostasis, or a stable internal environment, despite changes in the external environment. This means that living things are able to regulate their internal processes and maintain a stable balance of chemical and physical parameters, such as temperature, pH, and concentration of nutrients and waste products.\n",
420 | "A third definition of life is the \"process definition,\" which states that living things are characterized by a set of processes that are unique to living things and cannot be replicated by non-living matter. These processes include metabolism, growth, reproduction, response to stimuli, and adaptation to their environment.\n",
421 | "There are also other definitions of life, such as the \"energy definition,\" which states that living things are characterized by their ability to capture and convert energy from their environment, and the \"information definition,\" which states that living things are characterized by their ability to store, process, and transmit information.\n",
422 | "Despite these various definitions, there is still much debate among scientists and philosophers about what exactly constitutes life. Some argue that life is a fundamental property of the universe, while others believe that it is a product of historical and cultural factors. Ultimately, the definition of life is likely to be complex and multifaceted, encompassing a variety of biological, chemical, and physical processes that are unique to living things.\n"
423 | ]
424 | }
425 | ],
426 | "source": [
427 | "output = cm.generate(\n",
428 | " prompt=\"Life is a quality that distinguishes\",\n",
429 | " progress_callback=StreamToStdout(callback_interval=2),\n",
430 | ")"
431 | ]
432 | }
433 | ],
434 | "metadata": {
435 | "accelerator": "GPU",
436 | "colab": {
437 | "gpuType": "T4",
438 | "provenance": []
439 | },
440 | "kernelspec": {
441 | "display_name": "Python 3",
442 | "name": "python3"
443 | },
444 | "language_info": {
445 | "name": "python"
446 | }
447 | },
448 | "nbformat": 4,
449 | "nbformat_minor": 0
450 | }
451 |
--------------------------------------------------------------------------------