8 | Open console to see output
9 |
10 |
11 |
12 |
13 |
Prompt
14 |
15 |
16 |
Response
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/abort-reload/src/get_started.js:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 | import { error } from "loglevel";
3 |
4 | let engine;
5 |
6 | function setLabel(id, text) {
7 | const label = document.getElementById(id);
8 | if (label == null) {
9 | throw Error("Cannot find label " + id);
10 | }
11 | label.innerText = text;
12 | }
13 |
14 | async function main() {
15 | const initProgressCallback = (report) => {
16 | console.log(report.text);
17 | setLabel("init-label", report.text);
18 | };
19 | // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
20 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
21 | engine = new webllm.MLCEngine({
22 | initProgressCallback,
23 | });
24 | engine.reload(selectedModel);
25 | }
26 | main();
27 | setTimeout(() => {
28 | console.log("calling unload");
29 | engine.unload().catch((err) => {
30 | console.log(err);
31 | });
32 | }, 5000);
33 |
--------------------------------------------------------------------------------
/examples/cache-usage/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Cache Usage
2 |
3 | WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
4 | This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
5 | demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.
6 |
7 | For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.
8 |
9 | To inspect the downloaded artifacts in your browser, open up developer console, go to application,
10 | and you will find the artifacts under either `IndexedDB` or `Cache storage`.
11 |
12 | To run the exapmle, you can do the following steps under this folder
13 |
14 | ```bash
15 | npm install
16 | npm start
17 | ```
18 |
19 | Note if you would like to hack WebLLM core package.
20 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
21 | instruction in the project to build webllm locally. This option is only recommended
22 | if you would like to hack WebLLM core package.
23 |
--------------------------------------------------------------------------------
/examples/cache-usage/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cache-usage",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/cache_usage.html --port 8888",
7 | "build": "parcel build src/cache_usage.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/cache-usage/src/cache_usage.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Test Page
9 | Open console to see output
10 |
11 |
12 |
13 |
14 |
Prompt
15 |
16 |
17 |
Response
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/examples/cache-usage/src/cache_usage.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | function setLabel(id: string, text: string) {
4 | const label = document.getElementById(id);
5 | if (label == null) {
6 | throw Error("Cannot find label " + id);
7 | }
8 | label.innerText = text;
9 | }
10 |
11 | const initProgressCallback = (report: webllm.InitProgressReport) => {
12 | setLabel("init-label", report.text);
13 | };
14 |
15 | async function main() {
16 | const appConfig = webllm.prebuiltAppConfig;
17 | // CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
18 | appConfig.useIndexedDBCache = true;
19 |
20 | if (appConfig.useIndexedDBCache) {
21 | console.log("Using IndexedDB Cache");
22 | } else {
23 | console.log("Using Cache API");
24 | }
25 |
26 | // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
27 | const selectedModel = "phi-2-q4f16_1-MLC";
28 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
29 | selectedModel,
30 | { initProgressCallback: initProgressCallback, appConfig: appConfig },
31 | );
32 |
33 | const request: webllm.ChatCompletionRequest = {
34 | stream: false,
35 | messages: [
36 | {
37 | role: "user",
38 | content: "Write an analogy between mathematics and a lighthouse.",
39 | },
40 | ],
41 | n: 1,
42 | };
43 | let reply = await engine.chat.completions.create(request);
44 | console.log(reply);
45 |
46 | // 2. Check whether model weights are cached
47 | let modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
48 | console.log("hasModelInCache: ", modelCached);
49 | if (!modelCached) {
50 | throw Error("Expect hasModelInCache() to be true, but got: " + modelCached);
51 | }
52 |
53 | // 3. We reload, and we should see this time it is much faster because the weights are cached.
54 | console.log("Reload model start");
55 | await engine.reload(selectedModel);
56 | console.log("Reload model end");
57 | reply = await engine.chat.completions.create(request);
58 | console.log(reply);
59 |
60 | // 4. Delete every thing about this model from cache
61 | // You can also delete only the model library wasm, only the model weights, or only the config file
62 | await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
63 | modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
64 | console.log("After deletion, hasModelInCache: ", modelCached);
65 | if (modelCached) {
66 | throw Error(
67 | "Expect hasModelInCache() to be false, but got: " + modelCached,
68 | );
69 | }
70 |
71 | // 5. If we reload, we should expect the model to start downloading again
72 | console.log("Reload model start");
73 | await engine.reload(selectedModel);
74 | console.log("Reload model end");
75 | reply = await engine.chat.completions.create(request);
76 | console.log(reply);
77 | }
78 |
79 | main();
80 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Chrome Extension using WebGPU Running on Service Worker
2 |
3 | 
4 |
5 | > [!WARNING]
6 | > Service worker support in WebGPU is enabled by default in [Chrome 124](https://chromiumdash.appspot.com/commit/8d78510e4aca5ac3cd8ee4a33e96b404eaa43246).
7 | > If you are using Chrome 123, go to `chrome://flags/#enable-experimental-web-platform-features`, enable the `#enable-experimental-web-platform-features` flag, and **relaunch the browser**.
8 |
9 | This example shows how we can create a Chrome extension using WebGPU and service worker.
10 |
11 | - The project structure is as follows:
12 | - `manifest.json`: A required file that lists important information about the structure and behavior of that extension. Here we are using manifest V3.
13 | - `popup.ts`: Script of the extension pop-up window.
14 | - `background.ts`: Script of the service worker. An extension service worker is loaded when it is needed, and unloaded when it goes dormant.
15 | - `content.js`: Content script that interacts with DOM.
16 | - Run
17 |
18 | ```bash
19 | npm install
20 | npm run build
21 | ```
22 |
23 | This will create a new directory at `./dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `./dist/` directory. You can now pin the extension to your toolbar and use it to chat with your favorite model!
24 |
25 | **Note**: This example disables chatting using the contents of the active tab by default.
26 | To enable it, set `useContext` in `popup.ts` to `true`. More info about this feature can be found
27 | [here](https://github.com/mlc-ai/web-llm/pull/190).
28 | However, if the web content is too large, it might run into issues. We recommend using `example.html` to
29 | test this feature.
30 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "chrome-extension",
3 | "version": "1.0.0",
4 | "description": "",
5 | "private": true,
6 | "scripts": {
7 | "build": "parcel build src/manifest.json --config @parcel/config-webextension"
8 | },
9 | "author": "",
10 | "license": "ISC",
11 | "devDependencies": {
12 | "@parcel/config-webextension": "^2.9.3",
13 | "@types/chrome": "^0.0.242",
14 | "buffer": "^6.0.3",
15 | "parcel": "^2.9.3",
16 | "process": "^0.11.10",
17 | "url": "^0.11.1"
18 | },
19 | "dependencies": {
20 | "@mlc-ai/web-llm": "^0.2.79",
21 | "progressbar.js": "^1.1.0"
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/background.ts:
--------------------------------------------------------------------------------
1 | import { ExtensionServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 |
3 | // Hookup an engine to a service worker handler
4 | let handler;
5 |
6 | chrome.runtime.onConnect.addListener(function (port) {
7 | console.assert(port.name === "web_llm_service_worker");
8 | if (handler === undefined) {
9 | handler = new ExtensionServiceWorkerMLCEngineHandler(port);
10 | } else {
11 | handler.setPort(port);
12 | }
13 | port.onMessage.addListener(handler.onmessage.bind(handler));
14 | });
15 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/content.js:
--------------------------------------------------------------------------------
1 | // Only the content script is able to access the DOM
2 | chrome.runtime.onConnect.addListener(function (port) {
3 | port.onMessage.addListener(function (msg) {
4 | port.postMessage({ contents: document.body.innerHTML });
5 | });
6 | });
7 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/example.html:
--------------------------------------------------------------------------------
1 | In the year 2154, humanity had colonized several planets in the distant reaches
2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and
3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
4 | these harsh conditions, a team of scientists had established a research station
5 | on the planet to study the unique geological formations and exotic flora and
6 | fauna. One day, while conducting a routine survey of the planet's surface, the
7 | team discovered an strange object buried deep in the ice. As they examined it
8 | closer, they realized it was a small, metallic capsule with a glowing blue
9 | symbol etched onto its surface. The team's leader, a brilliant scientist named
10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
11 | origins. She ordered her team to bring it back to the research station for
12 | further analysis. After weeks of studying the capsule, the team finally cracked
13 | the code to the symbol etched onto its surface. It was a message from an alien
14 | race, warning Earth of an impending attack from an unknown threat. The team was
15 | shocked and dismayed by the news, but they knew they had to act quickly to warn
16 | the rest of humanity. They transmitted the message to the nearest space station,
17 | which relayed it to Earth's government. As the threat of attack loomed near, the
18 | team remained on high alert, ready to face whatever dangers lay ahead. They had
19 | uncovered a secrets of the universe, and now they were determined to protect
20 | their planet and its inhabitants at all costs.
21 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 3,
3 | "name": "MLCBot",
4 | "version": "0.1.0",
5 | "description": "Chat with your browser",
6 | "icons": {
7 | "16": "icons/icon-16.png",
8 | "32": "icons/icon-32.png",
9 | "64": "icons/icon-64.png",
10 | "128": "icons/icon-128.png"
11 | },
12 | "content_security_policy": {
13 | "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
14 | },
15 | "action": {
16 | "default_title": "MLCBot",
17 | "default_popup": "popup.html"
18 | },
19 | "content_scripts": [
20 | {
21 | "matches": [""],
22 | "js": ["content.js"]
23 | }
24 | ],
25 | "background": {
26 | "service_worker": "background.ts",
27 | "type": "module"
28 | },
29 | "permissions": ["storage", "tabs", "webNavigation"]
30 | }
31 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/popup.css:
--------------------------------------------------------------------------------
1 | *,
2 | *::before,
3 | *::after {
4 | margin: 0;
5 | padding: 0;
6 | box-sizing: border-box;
7 | }
8 |
9 | html {
10 | font-family:
11 | -apple-system,
12 | BlinkMacSystemFont,
13 | Segoe UI,
14 | Helvetica,
15 | Arial,
16 | sans-serif;
17 | color: #222;
18 | }
19 |
20 | body {
21 | margin: 0;
22 | padding: 0.5rem;
23 | background-color: #778da9;
24 | width: 320px;
25 | font-size: small;
26 | }
27 |
28 | p {
29 | margin: 0;
30 | }
31 |
32 | /* LOADING BAR */
33 | #loadingContainer {
34 | margin-bottom: 15px;
35 | width: 300px;
36 | height: 8px;
37 | }
38 |
39 | /* INPUT AREA */
40 | #query-input {
41 | border: 1px solid #ccc;
42 | border-radius: 4px;
43 | }
44 |
45 | .input-container {
46 | display: flex;
47 | flex-direction: row;
48 | align-items: center;
49 | }
50 |
51 | .input-container input {
52 | width: 100%;
53 | outline: none;
54 | padding: 0.5rem;
55 | margin-right: 0.5rem;
56 | }
57 |
58 | /* SUBMIT BUTTON */
59 | .btn {
60 | background-color: #1b263b;
61 | color: white;
62 | font-size: small;
63 | cursor: pointer;
64 | border-radius: 4px;
65 | border: none;
66 | padding: 0.5rem;
67 | }
68 |
69 | .btn:hover {
70 | background-color: #d0d0d0;
71 | }
72 |
73 | .btn:disabled {
74 | background-color: #a7a7a7;
75 | color: rgb(255, 255, 255);
76 | cursor: default;
77 | }
78 |
79 | .btn img {
80 | width: 1rem;
81 | height: 1rem;
82 | }
83 |
84 | /* LOADING */
85 |
86 | .stage {
87 | display: flex;
88 | justify-content: center;
89 | align-items: center;
90 | position: relative;
91 | margin: 0 -5%;
92 | overflow: hidden;
93 | }
94 |
95 | #loading-indicator {
96 | display: none;
97 | color: white;
98 | margin-top: 0.5rem;
99 | }
100 |
101 | .dot-flashing {
102 | position: relative;
103 | width: 10px;
104 | height: 10px;
105 | border-radius: 5px;
106 | background-color: #1b263b;
107 | color: #1b263b;
108 | animation: dot-flashing 0.4s infinite linear alternate;
109 | animation-delay: 0.2s;
110 | }
111 |
112 | .dot-flashing::before,
113 | .dot-flashing::after {
114 | content: "";
115 | display: inline-block;
116 | position: absolute;
117 | top: 0;
118 | }
119 |
120 | .dot-flashing::before {
121 | left: -15px;
122 | width: 10px;
123 | height: 10px;
124 | border-radius: 5px;
125 | background-color: #1b263b;
126 | color: #1b263b;
127 | animation: dot-flashing 0.4s infinite alternate;
128 | animation-delay: 0s;
129 | }
130 |
131 | .dot-flashing::after {
132 | left: 15px;
133 | width: 10px;
134 | height: 10px;
135 | border-radius: 5px;
136 | background-color: #1b263b;
137 | color: #1b263b;
138 | animation: dot-flashing 0.4s infinite alternate;
139 | animation-delay: 0.4s;
140 | }
141 |
142 | @keyframes dot-flashing {
143 | 0% {
144 | background-color: #1b263b;
145 | }
146 |
147 | 50%,
148 | 100% {
149 | background-color: #415a77;
150 | }
151 | }
152 |
153 | /* ANSWERS */
154 | #queriesAnswersContainer {
155 | display: block;
156 | color: white;
157 | margin-top: 0.5rem;
158 | }
159 |
160 | #answer {
161 | color: #333333;
162 | }
163 |
164 | #answerWrapper {
165 | display: none;
166 | background-color: #ffd166;
167 | border-radius: 8px;
168 | padding: 0.5rem;
169 | margin-top: 0.5rem;
170 | }
171 |
172 | .queriesAnswers {
173 | border-radius: 8px;
174 | background-color: #ffd166;
175 | padding: 0.5rem;
176 | color: #333333;
177 | }
178 |
179 | #lastQuery {
180 | color: rgb(188, 188, 188);
181 | }
182 |
183 | #lastAnswer {
184 | color: white;
185 | margin-top: 0.5rem;
186 | }
187 |
188 | #lastRequest {
189 | padding: 0.5rem;
190 | margin-top: 0.5rem;
191 | background-color: #333333;
192 | border-radius: 4px;
193 | }
194 |
195 | /* ANSWER OPTIONS */
196 | .timeStamp {
197 | color: #9a8c98;
198 | }
199 |
200 | .copyRow {
201 | display: flex;
202 | flex-direction: row;
203 | align-items: end;
204 | justify-content: space-between;
205 | color: #a7a7a7;
206 | margin-top: 0.5rem;
207 | }
208 |
209 | .copyText {
210 | display: none;
211 | color: #a7a7a7;
212 | margin-right: 0.5rem;
213 | }
214 |
215 | .copyButton {
216 | color: #415a77;
217 | background-color: transparent;
218 | border: none;
219 | cursor: pointer;
220 | padding: 0;
221 | margin-left: 0.5rem;
222 | }
223 |
224 | .copyButton:hover {
225 | color: #5e80a7;
226 | background-color: transparent;
227 | }
228 |
229 | .removeButton {
230 | color: #415a77;
231 | background-color: transparent;
232 | border: none;
233 | cursor: pointer;
234 | padding: 0;
235 | }
236 |
237 | .removeButton:hover {
238 | color: #5e80a7;
239 | background-color: transparent;
240 | }
241 |
--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Chatbot
6 |
7 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/examples/chrome-extension/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Chrome Extension
2 |
3 | 
4 |
5 | To run the extension, do the following steps under this folder
6 |
7 | ```bash
8 | npm install
9 | npm run build
10 | ```
11 |
12 | This will create a new directory at `chrome-extension/dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `chrome-extension/dist/` directory. You can now pin the extension to your toolbar and use the drop-down menu to chat with your favorite model!
13 |
--------------------------------------------------------------------------------
/examples/chrome-extension/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "chrome-extension",
3 | "version": "1.0.1",
4 | "description": "",
5 | "private": true,
6 | "scripts": {
7 | "build": "parcel build src/manifest.json --config @parcel/config-webextension"
8 | },
9 | "author": "",
10 | "license": "ISC",
11 | "devDependencies": {
12 | "@parcel/config-webextension": "^2.9.3",
13 | "@types/chrome": "^0.0.242",
14 | "buffer": "^6.0.3",
15 | "parcel": "^2.9.3",
16 | "process": "^0.11.10",
17 | "url": "^0.11.1"
18 | },
19 | "dependencies": {
20 | "@mlc-ai/web-llm": "^0.2.79",
21 | "progressbar.js": "^1.1.0"
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/content.js:
--------------------------------------------------------------------------------
1 | // Only the content script is able to access the DOM
2 | chrome.runtime.onConnect.addListener(function (port) {
3 | port.onMessage.addListener(function (msg) {
4 | port.postMessage({ contents: document.body.innerText });
5 | });
6 | });
7 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/example.html:
--------------------------------------------------------------------------------
1 | In the year 2154, humanity had colonized several planets in the distant reaches
2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and
3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
4 | these harsh conditions, a team of scientists had established a research station
5 | on the planet to study the unique geological formations and exotic flora and
6 | fauna. One day, while conducting a routine survey of the planet's surface, the
7 | team discovered an strange object buried deep in the ice. As they examined it
8 | closer, they realized it was a small, metallic capsule with a glowing blue
9 | symbol etched onto its surface. The team's leader, a brilliant scientist named
10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
11 | origins. She ordered her team to bring it back to the research station for
12 | further analysis. After weeks of studying the capsule, the team finally cracked
13 | the code to the symbol etched onto its surface. It was a message from an alien
14 | race, warning Earth of an impending attack from an unknown threat. The team was
15 | shocked and dismayed by the news, but they knew they had to act quickly to warn
16 | the rest of humanity. They transmitted the message to the nearest space station,
17 | which relayed it to Earth's government. As the threat of attack loomed near, the
18 | team remained on high alert, ready to face whatever dangers lay ahead. They had
19 | uncovered a secrets of the universe, and now they were determined to protect
20 | their planet and its inhabitants at all costs.
21 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-128.png
--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-16.png
--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-32.png
--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-64.png
--------------------------------------------------------------------------------
/examples/chrome-extension/src/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 3,
3 | "name": "MLCBot",
4 | "version": "0.1.1",
5 | "description": "Chat with your browser",
6 | "icons": {
7 | "16": "icons/icon-16.png",
8 | "32": "icons/icon-32.png",
9 | "64": "icons/icon-64.png",
10 | "128": "icons/icon-128.png"
11 | },
12 | "content_security_policy": {
13 | "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
14 | },
15 | "action": {
16 | "default_title": "MLCBot",
17 | "default_popup": "popup.html"
18 | },
19 | "content_scripts": [
20 | {
21 | "matches": [""],
22 | "js": ["content.js"]
23 | }
24 | ],
25 | "permissions": ["storage", "tabs", "webNavigation", "activeTab", "scripting"],
26 | "host_permissions": ["http://*/", "https://*/"]
27 | }
28 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/manifest_v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 | "name": "MLCBot",
4 | "version": "0.1.0",
5 | "description": "Chat with your browser",
6 | "icons": {
7 | "16": "icons/icon-16.png",
8 | "32": "icons/icon-32.png",
9 | "64": "icons/icon-64.png",
10 | "128": "icons/icon-128.png"
11 | },
12 | "content_security_policy": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'unsafe-eval' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co",
13 | "browser_action": {
14 | "default_popup": "popup.html"
15 | },
16 | "content_scripts": [
17 | {
18 | "matches": [""],
19 | "js": ["content.js"]
20 | }
21 | ],
22 | "permissions": ["storage", "tabs", "webNavigation", "activeTab"]
23 | }
24 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/popup.css:
--------------------------------------------------------------------------------
1 | *,
2 | *::before,
3 | *::after {
4 | margin: 0;
5 | padding: 0;
6 | box-sizing: border-box;
7 | }
8 |
9 | html {
10 | font-family:
11 | -apple-system,
12 | BlinkMacSystemFont,
13 | Segoe UI,
14 | Helvetica,
15 | Arial,
16 | sans-serif;
17 | color: #222;
18 | }
19 |
20 | body {
21 | margin: 0;
22 | padding: 0.5rem;
23 | background-color: #778da9;
24 | width: 335px;
25 | font-size: small;
26 | }
27 |
28 | p {
29 | margin: 0;
30 | }
31 |
32 | /* LOADING BAR */
33 | #loadingContainer {
34 | margin-bottom: 15px;
35 | width: 315px;
36 | height: 8px;
37 | }
38 |
39 | /* INPUT AREA */
40 | #query-input {
41 | border: 1px solid #ccc;
42 | border-radius: 4px;
43 | }
44 |
45 | .input-container {
46 | display: flex;
47 | flex-direction: row;
48 | align-items: center;
49 | }
50 |
51 | .input-container input {
52 | width: 100%;
53 | outline: none;
54 | padding: 0.5rem;
55 | margin-right: 0.5rem;
56 | }
57 |
58 | /* BUTTON */
59 | .btn {
60 | background-color: #1b263b;
61 | color: white;
62 | font-size: small;
63 | cursor: pointer;
64 | border-radius: 4px;
65 | border: none;
66 | padding: 0.5rem;
67 | }
68 |
69 | .btn:hover {
70 | background-color: #d0d0d0;
71 | }
72 |
73 | .btn:disabled {
74 | background-color: #a7a7a7;
75 | color: rgb(255, 255, 255);
76 | cursor: default;
77 | }
78 |
79 | .btn img {
80 | width: 1rem;
81 | height: 1rem;
82 | }
83 |
84 | /* LOADING */
85 |
86 | .stage {
87 | display: flex;
88 | justify-content: center;
89 | align-items: center;
90 | position: relative;
91 | margin: 0 -5%;
92 | overflow: hidden;
93 | }
94 |
95 | #loading-indicator {
96 | display: none;
97 | color: white;
98 | margin-top: 0.5rem;
99 | }
100 |
101 | .dot-flashing {
102 | position: relative;
103 | width: 10px;
104 | height: 10px;
105 | border-radius: 5px;
106 | background-color: #1b263b;
107 | color: #1b263b;
108 | animation: dot-flashing 0.4s infinite linear alternate;
109 | animation-delay: 0.2s;
110 | }
111 |
112 | .dot-flashing::before,
113 | .dot-flashing::after {
114 | content: "";
115 | display: inline-block;
116 | position: absolute;
117 | top: 0;
118 | }
119 |
120 | .dot-flashing::before {
121 | left: -15px;
122 | width: 10px;
123 | height: 10px;
124 | border-radius: 5px;
125 | background-color: #1b263b;
126 | color: #1b263b;
127 | animation: dot-flashing 0.4s infinite alternate;
128 | animation-delay: 0s;
129 | }
130 |
131 | .dot-flashing::after {
132 | left: 15px;
133 | width: 10px;
134 | height: 10px;
135 | border-radius: 5px;
136 | background-color: #1b263b;
137 | color: #1b263b;
138 | animation: dot-flashing 0.4s infinite alternate;
139 | animation-delay: 0.4s;
140 | }
141 |
142 | @keyframes dot-flashing {
143 | 0% {
144 | background-color: #1b263b;
145 | }
146 |
147 | 50%,
148 | 100% {
149 | background-color: #415a77;
150 | }
151 | }
152 |
153 | /* ANSWERS */
154 | #queriesAnswersContainer {
155 | display: block;
156 | color: white;
157 | margin-top: 0.5rem;
158 | }
159 |
160 | #answer {
161 | color: #333333;
162 | }
163 |
164 | #answerWrapper {
165 | display: none;
166 | background-color: #ffd166;
167 | border-radius: 8px;
168 | padding: 0.5rem;
169 | margin-top: 0.5rem;
170 | }
171 |
172 | .queriesAnswers {
173 | border-radius: 8px;
174 | background-color: #ffd166;
175 | padding: 0.5rem;
176 | color: #333333;
177 | }
178 |
179 | #lastQuery {
180 | color: rgb(188, 188, 188);
181 | }
182 |
183 | #lastAnswer {
184 | color: white;
185 | margin-top: 0.5rem;
186 | }
187 |
188 | #lastRequest {
189 | padding: 0.5rem;
190 | margin-top: 0.5rem;
191 | background-color: #333333;
192 | border-radius: 4px;
193 | }
194 |
195 | /* ANSWER OPTIONS */
196 | .timeStamp {
197 | color: #9a8c98;
198 | }
199 |
200 | .copyRow {
201 | display: flex;
202 | flex-direction: row;
203 | align-items: end;
204 | justify-content: space-between;
205 | color: #a7a7a7;
206 | margin-top: 0.5rem;
207 | }
208 |
209 | .copyText {
210 | display: none;
211 | color: #a7a7a7;
212 | margin-right: 0.5rem;
213 | }
214 |
215 | .copyButton {
216 | color: #415a77;
217 | background-color: transparent;
218 | border: none;
219 | cursor: pointer;
220 | padding: 0;
221 | margin-left: 0.5rem;
222 | }
223 |
224 | .copyButton:hover {
225 | color: #5e80a7;
226 | background-color: transparent;
227 | }
228 |
229 | .removeButton {
230 | color: #415a77;
231 | background-color: transparent;
232 | border: none;
233 | cursor: pointer;
234 | padding: 0;
235 | }
236 |
237 | .removeButton:hover {
238 | color: #5e80a7;
239 | background-color: transparent;
240 | }
241 |
--------------------------------------------------------------------------------
/examples/chrome-extension/src/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Chatbot
6 |
7 |
11 |
12 |
13 |
14 |
15 |
Initializing model...
16 |
17 |
18 |
19 |
20 |
25 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/examples/embeddings/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Get Started App
2 |
3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
4 | To try it out, you can do the following steps under this folder
5 |
6 | ```bash
7 | npm install
8 | npm start
9 | ```
10 |
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 |
--------------------------------------------------------------------------------
/examples/embeddings/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "embeddings-example",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/embeddings.html --port 8885",
7 | "build": "parcel build src/embeddings.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79",
19 | "langchain": "0.2.15"
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/examples/embeddings/src/embeddings.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
WebLLM Test Page
8 | Open console to see output
9 |
10 |
11 |
12 |
13 |
Prompt
14 |
15 |
16 |
Response
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/function-calling/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos - Function calling
2 |
3 | This folder contains two main ways of using function calling with WebLLM.
4 |
5 | `function-calling-manual` demonstrates how you can use function calling with Llama3.1 and Hermes2
6 | without using the `tools`, `tool_choice`, and `tool_call` fields. This is the most flexible way and you can follow
7 | the instruction given by the model releaser and iterate yourself on top of that. However, you need to do parsing on your own, which differs for each model. For instance, Hermes2 models use `` and `` to wrap around a tool call, which may be very different from other models' format.
8 |
9 | `function-calling-openai` conforms to the OpenAI function calling usage, leveraging `tools`, `tool_choice`, and `tool_call`
10 | fields. This is more usable, but sacrifices the flexibility since we have pre-defined system prompt
11 | for this.
12 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/README.md:
--------------------------------------------------------------------------------
1 | ### Demos - Function calling
2 |
3 | Run `npm install` first, followed by `npm start`.
4 |
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-api",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/function_calling_manual.html --port 8888",
7 | "build": "parcel build src/function_calling_manual.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/src/function_calling_manual.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Test Page
9 | Open console to see output
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/README.md:
--------------------------------------------------------------------------------
1 | ### Demos - Function calling
2 |
3 | Run `npm install` first, followed by `npm start`.
4 |
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-api",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/function_calling_openai.html --port 8888",
7 | "build": "parcel build src/function_calling_openai.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/src/function_calling_openai.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Test Page
9 | Open console to see output
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/src/function_calling_openai.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | function setLabel(id: string, text: string) {
4 | const label = document.getElementById(id);
5 | if (label == null) {
6 | throw Error("Cannot find label " + id);
7 | }
8 | label.innerText = text;
9 | }
10 |
11 | async function main() {
12 | const initProgressCallback = (report: webllm.InitProgressReport) => {
13 | setLabel("init-label", report.text);
14 | };
15 | const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
16 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
17 | selectedModel,
18 | { initProgressCallback: initProgressCallback },
19 | );
20 |
21 | const tools: Array = [
22 | {
23 | type: "function",
24 | function: {
25 | name: "get_current_weather",
26 | description: "Get the current weather in a given location",
27 | parameters: {
28 | type: "object",
29 | properties: {
30 | location: {
31 | type: "string",
32 | description: "The city and state, e.g. San Francisco, CA",
33 | },
34 | unit: { type: "string", enum: ["celsius", "fahrenheit"] },
35 | },
36 | required: ["location"],
37 | },
38 | },
39 | },
40 | ];
41 |
42 | const request: webllm.ChatCompletionRequest = {
43 | stream: true, // works with stream as well, where the last chunk returns tool_calls
44 | stream_options: { include_usage: true },
45 | messages: [
46 | {
47 | role: "user",
48 | content:
49 | "What is the current weather in celsius in Pittsburgh and Tokyo?",
50 | },
51 | ],
52 | tool_choice: "auto",
53 | tools: tools,
54 | };
55 |
56 | if (!request.stream) {
57 | const reply0 = await engine.chat.completions.create(request);
58 | console.log(reply0.choices[0]);
59 | console.log(reply0.usage);
60 | } else {
61 | // If streaming, the last chunk returns tool calls
62 | const asyncChunkGenerator = await engine.chat.completions.create(request);
63 | let message = "";
64 | let lastChunk: webllm.ChatCompletionChunk | undefined;
65 | let usageChunk: webllm.ChatCompletionChunk | undefined;
66 | for await (const chunk of asyncChunkGenerator) {
67 | console.log(chunk);
68 | message += chunk.choices[0]?.delta?.content || "";
69 | setLabel("generate-label", message);
70 | if (!chunk.usage) {
71 | lastChunk = chunk;
72 | }
73 | usageChunk = chunk;
74 | }
75 | console.log(lastChunk!.choices[0].delta);
76 | console.log(usageChunk!.usage);
77 | }
78 | }
79 |
80 | main();
81 |
--------------------------------------------------------------------------------
/examples/get-started-web-worker/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Get Started with WebWorker
2 |
3 | This folder provides a minimum demo to show WebLLM API using
4 | [WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
5 | The main benefit of web worker is that all ML workloads runs on a separate thread as a result
6 | will less likely block the UI.
7 |
8 | To try it out, you can do the following steps under this folder
9 |
10 | ```bash
11 | npm install
12 | npm start
13 | ```
14 |
15 | Note if you would like to hack WebLLM core package.
16 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
17 | instruction in the project to build webllm locally. This option is only recommended
18 | if you would like to hack WebLLM core package.
19 |
--------------------------------------------------------------------------------
/examples/get-started-web-worker/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "get-started-web-worker",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/get_started.html --port 8885",
7 | "build": "parcel build src/get_started.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^6.0.3",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/get_started.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
WebLLM Test Page
8 | Open console to see output
9 |
10 |
11 |
12 |
13 |
Prompt
14 |
15 |
16 |
Response
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/main.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | function setLabel(id: string, text: string) {
4 | const label = document.getElementById(id);
5 | if (label == null) {
6 | throw Error("Cannot find label " + id);
7 | }
8 | label.innerText = text;
9 | }
10 |
11 | // There are two demonstrations, pick one to run
12 |
13 | /**
14 | * Chat completion (OpenAI style) without streaming, where we get the entire response at once.
15 | */
16 | async function mainNonStreaming() {
17 | const initProgressCallback = (report: webllm.InitProgressReport) => {
18 | setLabel("init-label", report.text);
19 | };
20 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
21 |
22 | const engine: webllm.MLCEngineInterface =
23 | await webllm.CreateWebWorkerMLCEngine(
24 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
25 | selectedModel,
26 | { initProgressCallback: initProgressCallback },
27 | );
28 |
29 | const request: webllm.ChatCompletionRequest = {
30 | messages: [
31 | {
32 | role: "system",
33 | content:
34 | "You are a helpful, respectful and honest assistant. " +
35 | "Be as happy as you can when speaking please. ",
36 | },
37 | { role: "user", content: "Provide me three US states." },
38 | { role: "assistant", content: "California, New York, Pennsylvania." },
39 | { role: "user", content: "Two more please!" },
40 | ],
41 | n: 3,
42 | temperature: 1.5,
43 | max_tokens: 256,
44 | };
45 |
46 | const reply0 = await engine.chat.completions.create(request);
47 | console.log(reply0);
48 |
49 | console.log(reply0.usage);
50 | }
51 |
52 | /**
53 | * Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
54 | */
55 | async function mainStreaming() {
56 | const initProgressCallback = (report: webllm.InitProgressReport) => {
57 | setLabel("init-label", report.text);
58 | };
59 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
60 |
61 | const engine: webllm.MLCEngineInterface =
62 | await webllm.CreateWebWorkerMLCEngine(
63 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
64 | selectedModel,
65 | { initProgressCallback: initProgressCallback },
66 | );
67 |
68 | const request: webllm.ChatCompletionRequest = {
69 | stream: true,
70 | stream_options: { include_usage: true },
71 | messages: [
72 | {
73 | role: "system",
74 | content:
75 | "You are a helpful, respectful and honest assistant. " +
76 | "Be as happy as you can when speaking please. ",
77 | },
78 | { role: "user", content: "Provide me three US states." },
79 | { role: "assistant", content: "California, New York, Pennsylvania." },
80 | { role: "user", content: "Two more please!" },
81 | ],
82 | temperature: 1.5,
83 | max_tokens: 256,
84 | };
85 |
86 | const asyncChunkGenerator = await engine.chat.completions.create(request);
87 | let message = "";
88 | for await (const chunk of asyncChunkGenerator) {
89 | console.log(chunk);
90 | message += chunk.choices[0]?.delta?.content || "";
91 | setLabel("generate-label", message);
92 | if (chunk.usage) {
93 | console.log(chunk.usage); // only last chunk has usage
94 | }
95 | // engine.interruptGenerate(); // works with interrupt as well
96 | }
97 | console.log("Final message:\n", await engine.getMessage()); // the concatenated message
98 | }
99 |
100 | // Run one of the function below
101 | // mainNonStreaming();
102 | mainStreaming();
103 |
--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/worker.ts:
--------------------------------------------------------------------------------
1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 |
3 | // Hookup an engine to a worker handler
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 | handler.onmessage(msg);
7 | };
8 |
--------------------------------------------------------------------------------
/examples/get-started/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Get Started App
2 |
3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
4 | To try it out, you can do the following steps under this folder
5 |
6 | ```bash
7 | npm install
8 | npm start
9 | ```
10 |
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 |
--------------------------------------------------------------------------------
/examples/get-started/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "get-started",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/get_started.html --port 8888",
7 | "build": "parcel build src/get_started.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/get-started/src/get_started.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
WebLLM Test Page
8 | Open console to see output
9 |
10 |
11 |
12 |
13 |
Prompt
14 |
15 |
16 |
Response
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/get-started/src/get_started.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | function setLabel(id: string, text: string) {
4 | const label = document.getElementById(id);
5 | if (label == null) {
6 | throw Error("Cannot find label " + id);
7 | }
8 | label.innerText = text;
9 | }
10 |
11 | async function main() {
12 | const initProgressCallback = (report: webllm.InitProgressReport) => {
13 | setLabel("init-label", report.text);
14 | };
15 | // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
16 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
17 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
18 | selectedModel,
19 | {
20 | initProgressCallback: initProgressCallback,
21 | logLevel: "INFO", // specify the log level
22 | },
23 | // customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
24 | {
25 | context_window_size: 2048,
26 | // sliding_window_size: 1024,
27 | // attention_sink_size: 4,
28 | },
29 | );
30 |
31 | // Option 2: Specify your own model other than the prebuilt ones
32 | // const appConfig: webllm.AppConfig = {
33 | // model_list: [
34 | // {
35 | // model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
36 | // model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
37 | // model_lib:
38 | // webllm.modelLibURLPrefix +
39 | // webllm.modelVersion +
40 | // "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
41 | // overrides: {
42 | // context_window_size: 2048,
43 | // },
44 | // },
45 | // ],
46 | // };
47 | // const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
48 | // selectedModel,
49 | // { appConfig: appConfig, initProgressCallback: initProgressCallback },
50 | // );
51 |
52 | // Option 3: Instantiate MLCEngine() and call reload() separately
53 | // const engine: webllm.MLCEngineInterface = new webllm.MLCEngine({
54 | // appConfig: appConfig, // if do not specify, we use webllm.prebuiltAppConfig
55 | // initProgressCallback: initProgressCallback,
56 | // });
57 | // await engine.reload(selectedModel);
58 |
59 | const reply0 = await engine.chat.completions.create({
60 | messages: [{ role: "user", content: "List three US states." }],
61 | // below configurations are all optional
62 | n: 3,
63 | temperature: 1.5,
64 | max_tokens: 256,
65 | // 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
66 | // So we would have a higher chance of seeing the latter two, but never the first in the answer
67 | logit_bias: {
68 | "46510": -100,
69 | "7188": -100,
70 | "8421": 5,
71 | "51325": 5,
72 | },
73 | logprobs: true,
74 | top_logprobs: 2,
75 | });
76 | console.log(reply0);
77 | console.log(reply0.usage);
78 |
79 | // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
80 | }
81 |
82 | main();
83 |
--------------------------------------------------------------------------------
/examples/json-mode/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 |
3 | Run `npm install` first, followed by `npm start`.
4 |
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 |
--------------------------------------------------------------------------------
/examples/json-mode/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-api",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/json_mode.html --port 8888",
7 | "build": "parcel build src/json_mode.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/json-mode/src/json_mode.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Test Page
9 | Open console to see output.
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/examples/json-mode/src/json_mode.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | function setLabel(id: string, text: string) {
4 | const label = document.getElementById(id);
5 | if (label == null) {
6 | throw Error("Cannot find label " + id);
7 | }
8 | label.innerText = text;
9 | }
10 |
11 | async function main() {
12 | const initProgressCallback = (report: webllm.InitProgressReport) => {
13 | setLabel("init-label", report.text);
14 | };
15 | // Pick any one of these models to start trying -- most models in WebLLM support grammar
16 | const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
17 | // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
18 | // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
19 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
20 | selectedModel,
21 | { initProgressCallback: initProgressCallback },
22 | );
23 | // Note that you'd need to prompt the model to answer in JSON either in
24 | // user's message or the system prompt
25 | const request: webllm.ChatCompletionRequest = {
26 | stream: false, // works with streaming, logprobs, top_logprobs as well
27 | messages: [
28 | {
29 | role: "user",
30 | content: "Write a short JSON file introducing yourself.",
31 | },
32 | ],
33 | n: 2,
34 | max_tokens: 128,
35 | response_format: { type: "json_object" } as webllm.ResponseFormat,
36 | };
37 |
38 | const reply0 = await engine.chatCompletion(request);
39 | console.log(reply0);
40 | console.log("First reply's last choice:\n" + (await engine.getMessage()));
41 | console.log(reply0.usage);
42 | }
43 |
44 | main();
45 |
--------------------------------------------------------------------------------
/examples/json-schema/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 |
3 | Run `npm install` first, followed by `npm start`.
4 |
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 |
--------------------------------------------------------------------------------
/examples/json-schema/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-api",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/json_schema.html --port 8885",
7 | "build": "parcel build src/json_schema.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/json-schema/src/json_schema.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Test Page
9 | Open console to see output.
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/examples/logit-processor/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Logit Processor and Low-Level API Example
2 |
3 | This folder explains the usage of `LogitProcessor`, demonstrating how it can be used to
4 | manipulate the raw logits before sampling the token (e.g. setting certain tokens to `inf` or `-inf`).
5 | We demonstrate how to use it with and without a web worker, which can be toggled with `USE_WEB_WORKER`
6 | in `logit_processor.ts` (see `worker.ts` on how `LogitProcessor` plays a role there).
7 |
8 | We also demonstrate the usage of a low-level API `forwardTokenAndSample()`, which, unlike `chat.completions.create()`
9 | that assumes the usage is for autoregressive chatting, here we have more fine-grained control.
10 |
11 | See `my_logit_processor.ts` on how to customize your own logit processor. Here we make the logit
12 | of token 0 `100.0` manually, large enough that we should expect to always sample token 0, which
13 | is indeed the case if we observe the console log. We also demonstarte that a LogitProcessor can be
14 | stateful, and the state can also be cleaned with `LogitProcessor.resetState()`.
15 |
16 | To try it out, you can do the following steps under this folder
17 |
18 | ```bash
19 | npm install
20 | npm start
21 | ```
22 |
23 | Note if you would like to hack WebLLM core package, you can change web-llm dependencies as `"file:../.."`, and follow the build from source instruction in the project to build webllm locally. This option is only recommended if you would like to hack WebLLM core package.
24 |
--------------------------------------------------------------------------------
/examples/logit-processor/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "logit-processor",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/logit_processor.html --port 8885",
7 | "build": "parcel build src/logit_processor.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/logit-processor/src/logit_processor.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
WebLLM Logit Processor Test Page
9 | Open console to see the effect of your logit processor.
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/examples/logit-processor/src/logit_processor.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 | import { MyLogitProcessor } from "./my_logit_processor";
3 |
4 | const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker
5 | const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test
6 |
7 | function setLabel(id: string, text: string) {
8 | const label = document.getElementById(id);
9 | if (label == null) {
10 | throw Error("Cannot find label " + id);
11 | }
12 | label.innerText = text;
13 | }
14 |
15 | async function main() {
16 | const initProgressCallback = (report: webllm.InitProgressReport) => {
17 | setLabel("init-label", report.text);
18 | };
19 | // Instantiate myLogitProcessor, registering in the logitProcessorRegistry
20 | const myLogitProcessor = new MyLogitProcessor();
21 | const logitProcessorRegistry = new Map();
22 | logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
23 |
24 | let engine: webllm.MLCEngineInterface;
25 |
26 | // Depending on whether we use a web worker, the code is slightly different
27 | if (USE_WEB_WORKER) {
28 | // see worker.ts on how LogitProcessor plays a role there
29 | engine = await webllm.CreateWebWorkerMLCEngine(
30 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
31 | "phi-2-q4f32_1-MLC",
32 | { initProgressCallback: initProgressCallback },
33 | );
34 | } else {
35 | engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", {
36 | initProgressCallback: initProgressCallback,
37 | logitProcessorRegistry: logitProcessorRegistry,
38 | });
39 | }
40 |
41 | // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()`
42 | const prompt: Array = [42];
43 | let nextToken = await engine.forwardTokensAndSample(
44 | prompt,
45 | /*isPrefill=*/ true,
46 | );
47 | console.log(nextToken);
48 |
49 | let counter = prompt.length;
50 | while (counter < AUTOREGRESS_LIMIT) {
51 | counter += 1;
52 | nextToken = await engine.forwardTokensAndSample(
53 | [nextToken],
54 | /*isPrefill=*/ false,
55 | );
56 | console.log(nextToken);
57 | }
58 |
59 | // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState()
60 | engine.resetChat();
61 | counter = prompt.length;
62 | nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true);
63 | console.log(nextToken);
64 | while (counter < AUTOREGRESS_LIMIT) {
65 | counter += 1;
66 | nextToken = await engine.forwardTokensAndSample(
67 | [nextToken],
68 | /*isPrefill=*/ false,
69 | );
70 | console.log(nextToken);
71 | }
72 |
73 | // `forwardTokensAndSample()` is made compatible with registering runtime stats.
74 | console.log(await engine.runtimeStatsText());
75 | }
76 |
77 | main();
78 |
--------------------------------------------------------------------------------
/examples/logit-processor/src/my_logit_processor.ts:
--------------------------------------------------------------------------------
1 | import * as webllm from "@mlc-ai/web-llm";
2 |
3 | // Define LogitProcessor
4 | export class MyLogitProcessor implements webllm.LogitProcessor {
5 | private tokenSequence: Array = [];
6 |
7 | processLogits(logits: Float32Array): Float32Array {
8 | logits[0] = 100.0; // should be enough so that we always sample token 0 below
9 | return logits;
10 | }
11 |
12 | processSampledToken(token: number): void {
13 | this.tokenSequence.push(token);
14 | console.log("processSampledToken: " + this.tokenSequence.length);
15 | }
16 |
17 | resetState(): void {
18 | this.tokenSequence = [];
19 | console.log("resetState");
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/examples/logit-processor/src/worker.ts:
--------------------------------------------------------------------------------
1 | // Serve the chat workload through web worker
2 | import * as webllm from "@mlc-ai/web-llm";
3 | import { MyLogitProcessor } from "./my_logit_processor";
4 |
5 | console.log("Use web worker for logit processor");
6 |
7 | const myLogitProcessor = new MyLogitProcessor();
8 | const logitProcessorRegistry = new Map();
9 | logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
10 |
11 | const handler = new webllm.WebWorkerMLCEngineHandler();
12 | handler.setLogitProcessorRegistry(logitProcessorRegistry);
13 | self.onmessage = (msg: MessageEvent) => {
14 | handler.onmessage(msg);
15 | };
16 |
--------------------------------------------------------------------------------
/examples/multi-models/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Get Started App
2 |
3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
4 | To try it out, you can do the following steps under this folder
5 |
6 | ```bash
7 | npm install
8 | npm start
9 | ```
10 |
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 |
--------------------------------------------------------------------------------
/examples/multi-models/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "get-started",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/multi_models.html --port 8888",
7 | "build": "parcel build src/multi_models.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/multi-models/src/multi_models.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
WebLLM Test Page
8 | Open console to see output
9 |
10 |
11 |
12 |
13 |
Prompt 1
14 |
15 |
16 |
Response from model 1
17 |
18 |
19 |
20 |
Prompt 2
21 |
22 |
23 |
Response from model 2
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/examples/multi-models/src/worker.ts:
--------------------------------------------------------------------------------
1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 |
3 | // Hookup an engine to a worker handler
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 | handler.onmessage(msg);
7 | };
8 |
--------------------------------------------------------------------------------
/examples/multi-round-chat/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 |
3 | Run `npm install` first, followed by `npm start`.
4 |
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 |
--------------------------------------------------------------------------------
/examples/multi-round-chat/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-api",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "start": "parcel src/multi_round_chat.html --port 8888",
7 | "build": "parcel build src/multi_round_chat.html --dist-dir lib"
8 | },
9 | "devDependencies": {
10 | "buffer": "^5.7.1",
11 | "parcel": "^2.8.3",
12 | "process": "^0.11.10",
13 | "tslib": "^2.3.1",
14 | "typescript": "^4.9.5",
15 | "url": "^0.11.3"
16 | },
17 | "dependencies": {
18 | "@mlc-ai/web-llm": "^0.2.79"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/examples/multi-round-chat/src/multi_round_chat.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |