├── .eslintignore
├── .eslintrc.cjs
├── .github
    └── workflows
    │   ├── build-site.yaml
    │   └── linter.yaml
├── .gitignore
├── .gitmodules
├── .husky
    └── pre-commit
├── .lintstagedrc.json
├── .prettierignore
├── .prettierrc
├── 3rdparty
    └── .gitkeep
├── LICENSE
├── README.md
├── SECURITY.md
├── cleanup-index-js.sh
├── docs
    ├── Makefile
    ├── README.md
    ├── _static
    │   └── img
    │   │   └── mlc-logo-with-text-landscape.svg
    ├── conf.py
    ├── developer
    │   ├── add_models.rst
    │   └── building_from_source.rst
    ├── index.rst
    ├── make.bat
    ├── requirements.txt
    └── user
    │   ├── advanced_usage.rst
    │   ├── api_reference.rst
    │   ├── basic_usage.rst
    │   └── get_started.rst
├── examples
    ├── .gitignore
    ├── README.md
    ├── abort-reload
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── get_started.html
    │   │   └── get_started.js
    ├── cache-usage
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── cache_usage.html
    │   │   └── cache_usage.ts
    ├── chrome-extension-webgpu-service-worker
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── background.ts
    │   │   ├── content.js
    │   │   ├── example.html
    │   │   ├── icons
    │   │       ├── icon-128.png
    │   │       ├── icon-16.png
    │   │       ├── icon-32.png
    │   │       └── icon-64.png
    │   │   ├── manifest.json
    │   │   ├── popup.css
    │   │   ├── popup.html
    │   │   └── popup.ts
    ├── chrome-extension
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── content.js
    │   │   ├── example.html
    │   │   ├── icons
    │   │       ├── icon-128.png
    │   │       ├── icon-16.png
    │   │       ├── icon-32.png
    │   │       └── icon-64.png
    │   │   ├── manifest.json
    │   │   ├── manifest_v2.json
    │   │   ├── popup.css
    │   │   ├── popup.html
    │   │   └── popup.ts
    ├── embeddings
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── embeddings.html
    │   │   └── embeddings.ts
    ├── function-calling
    │   ├── README.md
    │   ├── function-calling-manual
    │   │   ├── README.md
    │   │   ├── package.json
    │   │   └── src
    │   │   │   ├── function_calling_manual.html
    │   │   │   └── function_calling_manual.ts
    │   └── function-calling-openai
    │   │   ├── README.md
    │   │   ├── package.json
    │   │   └── src
    │   │       ├── function_calling_openai.html
    │   │       └── function_calling_openai.ts
    ├── get-started-web-worker
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── get_started.html
    │   │   ├── main.ts
    │   │   └── worker.ts
    ├── get-started
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── get_started.html
    │   │   └── get_started.ts
    ├── json-mode
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── json_mode.html
    │   │   └── json_mode.ts
    ├── json-schema
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── json_schema.html
    │   │   └── json_schema.ts
    ├── logit-processor
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── logit_processor.html
    │   │   ├── logit_processor.ts
    │   │   ├── my_logit_processor.ts
    │   │   └── worker.ts
    ├── multi-models
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── main.ts
    │   │   ├── multi_models.html
    │   │   └── worker.ts
    ├── multi-round-chat
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── multi_round_chat.html
    │   │   └── multi_round_chat.ts
    ├── next-simple-chat
    │   ├── .gitignore
    │   ├── README.md
    │   ├── next.config.js
    │   ├── package.json
    │   ├── postcss.config.js
    │   ├── public
    │   │   ├── favicon.ico
    │   │   ├── next.svg
    │   │   └── vercel.svg
    │   ├── src
    │   │   ├── pages
    │   │   │   ├── _app.tsx
    │   │   │   ├── _document.tsx
    │   │   │   ├── api
    │   │   │   │   └── hello.ts
    │   │   │   └── index.tsx
    │   │   ├── styles
    │   │   │   └── globals.css
    │   │   └── utils
    │   │   │   ├── chat_component.tsx
    │   │   │   └── chat_ui.ts
    │   ├── tailwind.config.js
    │   └── tsconfig.json
    ├── qwen3
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── qwen3_example.html
    │   │   └── qwen3_example.ts
    ├── seed-to-reproduce
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── seed.html
    │   │   └── seed.ts
    ├── service-worker
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── index.html
    │   │   ├── main.ts
    │   │   └── sw.ts
    ├── simple-chat-js
    │   ├── index.css
    │   ├── index.html
    │   └── index.js
    ├── simple-chat-ts
    │   ├── .gitignore
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── gh-config.js
    │   │   ├── img
    │   │       ├── plane.png
    │   │       └── reset.png
    │   │   ├── llm_chat.css
    │   │   ├── llm_chat.html
    │   │   ├── simple_chat.ts
    │   │   └── worker.ts
    ├── simple-chat-upload
    │   ├── .gitignore
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── gh-config.js
    │   │   ├── img
    │   │       ├── plane.png
    │   │       └── reset.png
    │   │   ├── llm_chat.css
    │   │   ├── llm_chat.html
    │   │   ├── simple_chat.ts
    │   │   └── worker.ts
    ├── streaming
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── streaming.html
    │   │   └── streaming.ts
    ├── text-completion
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │   │   ├── text_completion.html
    │   │   └── text_completion.ts
    └── vision-model
    │   ├── README.md
    │   ├── package.json
    │   └── src
    │       ├── utils.ts
    │       ├── vision_model.html
    │       ├── vision_model.ts
    │       └── worker.ts
├── jest.config.cjs
├── licenses
    └── license.openai_node.txt
├── package-lock.json
├── package.json
├── rollup.config.js
├── scripts
    ├── gh_deploy_site.sh
    ├── local_deploy_site.sh
    ├── prep_deps.sh
    └── serve_mlc_llm_dist.sh
├── site
    ├── .gitignore
    ├── _config.yml
    ├── _includes
    │   ├── arrow.svg
    │   ├── github.svg
    │   ├── head.html
    │   └── hero.html
    ├── assets
    │   ├── css
    │   │   └── hero.scss
    │   ├── img
    │   │   ├── fig
    │   │   │   ├── Pittsburgh.png
    │   │   │   └── web-llm.svg
    │   │   └── logo
    │   │   │   ├── catalyst.svg
    │   │   │   ├── cmuscs.png
    │   │   │   ├── mlc-logo-with-text-landscape.png
    │   │   │   ├── mlc-logo-with-text-landscape.svg
    │   │   │   ├── octoml.png
    │   │   │   ├── sjtu.png
    │   │   │   └── uw.jpg
    │   └── video
    │   │   ├── Code.mp4
    │   │   ├── Code.webm
    │   │   ├── Pittsburgh.mp4
    │   │   └── Pittsburgh.webm
    └── index.md
├── src
    ├── cache_util.ts
    ├── config.ts
    ├── conversation.ts
    ├── embedding.ts
    ├── engine.ts
    ├── error.ts
    ├── extension_service_worker.ts
    ├── index.ts
    ├── llm_chat.ts
    ├── message.ts
    ├── openai_api_protocols
    │   ├── chat_completion.ts
    │   ├── completion.ts
    │   ├── embedding.ts
    │   └── index.ts
    ├── service_worker.ts
    ├── support.ts
    ├── types.ts
    ├── utils.ts
    └── web_worker.ts
├── tests
    ├── constants.ts
    ├── conversation.test.ts
    ├── function_calling.test.ts
    ├── generation_config.test.ts
    ├── multi_round_chat.test.ts
    ├── openai_chat_completion.test.ts
    ├── openai_completion.test.ts
    ├── openai_embeddings.test.ts
    └── util.test.ts
├── tsconfig.json
└── utils
    ├── .gitignore
    └── vram_requirements
        ├── .gitignore
        ├── README.md
        ├── package.json
        └── src
            ├── gh-config.js
            ├── vram_requirements.html
            └── vram_requirements.ts


/.eslintignore:
--------------------------------------------------------------------------------
1 | dist
2 | debug
3 | lib
4 | build
5 | node_modules
6 | 3rdparty
7 | .eslintrc.cjs
8 | **/.next


--------------------------------------------------------------------------------
/.eslintrc.cjs:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended', 'plugin:prettier/recommended'],
 3 |     parser: '@typescript-eslint/parser',
 4 |     plugins: ['@typescript-eslint'],
 5 |     root: true,
 6 |     rules: {
 7 |         "@typescript-eslint/no-explicit-any": "off",
 8 |         "@typescript-eslint/no-empty-function": "off",
 9 |         "@typescript-eslint/no-non-null-assertion": "off",
10 |     },
11 |     overrides: [
12 |         {
13 |           "files": ["examples/**/*.js", "examples/**/*.ts"],
14 |           "rules": {
15 |             "no-undef": "off",
16 |             "@typescript-eslint/no-unused-vars": "off"
17 |           }
18 |         }
19 |     ]
20 | };
21 | 


--------------------------------------------------------------------------------
/.github/workflows/build-site.yaml:
--------------------------------------------------------------------------------
 1 | name: Build site and push to gh-pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 |     name: Build site
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 | 
16 |     - name: Configuring build Environment
17 |       run: |
18 |         sudo apt-get update
19 |         python -m pip install -U pip
20 | 
21 |     - name: Setup Ruby
22 |       uses: ruby/setup-ruby@v1
23 |       with:
24 |         ruby-version: '3.0'
25 | 
26 |     - name: Installing dependencies
27 |       run: |
28 |         python -m pip install -r docs/requirements.txt
29 |         gem install jekyll jekyll-remote-theme jekyll-sass-converter
30 | 
31 |     - name: Build and deploy site
32 |       if: github.ref == 'refs/heads/main'
33 |       run: |
34 |         git remote set-url origin https://x-access-token:${{ secrets.MLC_GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
35 |         git config --global user.email "mlc-gh-actions-bot@nomail"
36 |         git config --global user.name "mlc-gh-actions-bot"
37 | 
38 |         ./scripts/gh_deploy_site.sh


--------------------------------------------------------------------------------
/.github/workflows/linter.yaml:
--------------------------------------------------------------------------------
 1 | name: Linter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v3
17 | 
18 |       - name: Set up Node.js
19 |         uses: actions/setup-node@v3
20 |         with:
21 |           node-version: '16'
22 | 
23 |       - name: Install dependencies
24 |         run: npm install
25 | 
26 |       - name: Run lint
27 |         run: npm run lint
28 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/.gitmodules


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | npx lint-staged
2 | 


--------------------------------------------------------------------------------
/.lintstagedrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "./**/*.{js,ts,jsx,tsx,json}": ["eslint --fix", "prettier --write"]
3 | }
4 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | dist
2 | debug
3 | lib
4 | build
5 | node_modules
6 | 3rdparty
7 | .eslintrc.cjs
8 | **/.next


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "trailingComma": "all"
3 | }
4 | 


--------------------------------------------------------------------------------
/3rdparty/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/3rdparty/.gitkeep


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | For security concerns or vulnerability reports, please send email to mlc-llm-private@googlegroups.com.
6 | 


--------------------------------------------------------------------------------
/cleanup-index-js.sh:
--------------------------------------------------------------------------------
 1 | # Remove instances of string "const{createRequire:createRequire}=await import('module');"
 2 | # This is required to allow background workers packaged with Parcel for the chrome extension
 3 | # to run the `ChatModule`.
 4 | sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i .backup lib/index.js
 5 | sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i .backup lib/index.js.map
 6 | 
 7 | # Replace string "new (require('u' + 'rl').URL)('file:' + __filename).href" with "MLC_DUMMY_PATH"
 8 | # This is required for building nextJS projects -- its compile time would complain about `require()`
 9 | # See https://github.com/mlc-ai/web-llm/issues/383 and the fixing PR's description for more.
10 | sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/"\"MLC_DUMMY_PATH\""/g -i .backup lib/index.js
11 | # Replace with \"MLC_DUMMY_PATH\"
12 | sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/'\\\"MLC_DUMMY_PATH\\\"'/g -i .backup lib/index.js.map
13 | 
14 | # Replace "import require$$3 from 'perf_hooks';" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR""
15 | # This is to prevent `perf_hooks` not found error
16 | # For more see https://github.com/mlc-ai/web-llm/issues/258 and https://github.com/mlc-ai/web-llm/issues/127
17 | sed -e s/"import require\$\$3 from 'perf_hooks';"/"const require\$\$3 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i .backup lib/index.js
18 | # Similarly replace `const performanceNode = require(\"perf_hooks\")` with `const performanceNode = \"MLC_DUMMY_REQUIRE_VAR\"`
19 | sed -e s/'require(\\\"perf_hooks\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i .backup lib/index.js.map
20 | 
21 | # Below is added when we include dependency @mlc-ai/web-runtime, rather than using local tvm_home
22 | # Replace "import require$$4 from 'ws'" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR""
23 | # This is to prevent error `Cannot find module 'ws'`
24 | sed -e s/"import require\$\$4 from 'ws';"/"const require\$\$4 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i .backup lib/index.js
25 | # Similarly replace `const WebSocket = require(\"ws\")` with `const WebSocket = \"MLC_DUMMY_REQUIRE_VAR\"`
26 | sed -e s/'require(\\\"ws\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i .backup lib/index.js.map
27 | 
28 | # Cleanup backup files
29 | rm lib/index.js.backup
30 | rm lib/index.js.map.backup
31 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= python -m sphinx
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Documentation
 2 | 
 3 | The documentation was built upon [Sphinx](https://www.sphinx-doc.org/en/master/).
 4 | 
 5 | ## Dependencies
 6 | 
 7 | Run the following command in this directory to install dependencies first:
 8 | 
 9 | ```bash
10 | pip3 install -r requirements.txt
11 | ```
12 | 
13 | ## Build the Documentation
14 | 
15 | Then you can build the documentation by running:
16 | 
17 | ```bash
18 | make html
19 | ```
20 | 
21 | ## View the Documentation
22 | 
23 | Run the following command to start a simple HTTP server:
24 | 
25 | ```bash
26 | cd _build/html
27 | python3 -m http.server
28 | ```
29 | 
30 | Then you can view the documentation in your browser at `http://localhost:8000` (the port can be customized by appending ` -p PORT_NUMBER` in the python command above).
31 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | 
  5 | import tlcpack_sphinx_addon
  6 | 
  7 | # -- General configuration ------------------------------------------------
  8 | 
  9 | sys.path.insert(0, os.path.abspath("../python"))
 10 | sys.path.insert(0, os.path.abspath("../"))
 11 | autodoc_mock_imports = ["torch"]
 12 | 
 13 | # General information about the project.
 14 | project = "web-llm"
 15 | author = "WebLLM Contributors"
 16 | copyright = "2023, %s" % author
 17 | 
 18 | # Version information.
 19 | 
 20 | version = "0.2.79"
 21 | release = "0.2.79"
 22 | 
 23 | extensions = [
 24 |     "sphinx_tabs.tabs",
 25 |     "sphinx_toolbox.collapse",
 26 |     "sphinxcontrib.httpdomain",
 27 |     "sphinx.ext.autodoc",
 28 |     "sphinx.ext.napoleon",
 29 |     "sphinx_reredirects",
 30 | ]
 31 | 
 32 | redirects = {"get_started/try_out": "../index.html#getting-started"}
 33 | 
 34 | source_suffix = [".rst"]
 35 | 
 36 | language = "en"
 37 | 
 38 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 39 | 
 40 | # The name of the Pygments (syntax highlighting) style to use.
 41 | pygments_style = "sphinx"
 42 | 
 43 | # A list of ignored prefixes for module index sorting.
 44 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 45 | todo_include_todos = False
 46 | 
 47 | # -- Options for HTML output ----------------------------------------------
 48 | 
 49 | # The theme is set by the make target
 50 | import sphinx_rtd_theme
 51 | 
 52 | html_theme = "sphinx_rtd_theme"
 53 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 54 | 
 55 | templates_path = []
 56 | 
 57 | html_static_path = []
 58 | 
 59 | footer_copyright = "© 2023 MLC LLM"
 60 | footer_note = " "
 61 | 
 62 | html_logo = "_static/img/mlc-logo-with-text-landscape.svg"
 63 | 
 64 | html_theme_options = {
 65 |     "logo_only": True,
 66 | }
 67 | 
 68 | header_links = [
 69 |     ("Home", "https://webllm.mlc.ai/"),
 70 |     ("GitHub", "https://github.com/mlc-ai/web-llm"),
 71 |     ("Discord", "https://discord.gg/9Xpy2HGBuD"),
 72 | ]
 73 | 
 74 | header_dropdown = {
 75 |     "name": "Other Resources",
 76 |     "items": [
 77 |         ("WebLLM Chat", "https://chat.webllm.ai/"),
 78 |         ("MLC Course", "https://mlc.ai/"),
 79 |         ("MLC Blog", "https://blog.mlc.ai/"),
 80 |         ("MLC LLM", "https://llm.mlc.ai/"),
 81 |     ],
 82 | }
 83 | 
 84 | html_context = {
 85 |     "footer_copyright": footer_copyright,
 86 |     "footer_note": footer_note,
 87 |     "header_links": header_links,
 88 |     "header_dropdown": header_dropdown,
 89 |     "display_github": True,
 90 |     "github_user": "mlc-ai",
 91 |     "github_repo": "web-llm",
 92 |     "github_version": "main/docs/",
 93 |     "theme_vcs_pageview_mode": "edit",
 94 |     # "header_logo": "/path/to/logo",
 95 |     # "header_logo_link": "",
 96 |     # "version_selecter": "",
 97 | }
 98 | 
 99 | 
100 | # add additional overrides
101 | templates_path += [tlcpack_sphinx_addon.get_templates_path()]
102 | html_static_path += [tlcpack_sphinx_addon.get_static_path()]
103 | 


--------------------------------------------------------------------------------
/docs/developer/add_models.rst:
--------------------------------------------------------------------------------
1 | Adding Models
2 | =============
3 | 
4 | WebLLM allows you to compile custom language models using `MLC LLM <https://llm.mlc.ai/>`_ and then serve compiled model through WebLLM.
5 | 
6 | For instructions of how to compile and add custom models to WebLLM, check the `MLC LLM documentation here <https://llm.mlc.ai/docs/deploy/webllm.html>`_. 


--------------------------------------------------------------------------------
/docs/developer/building_from_source.rst:
--------------------------------------------------------------------------------
 1 | Building From Source
 2 | ====================
 3 | 
 4 | Clone the Repository
 5 | ---------------------
 6 | .. code-block:: bash
 7 | 
 8 |    git clone https://github.com/mlc-ai/web-llm.git
 9 |    cd web-llm
10 | 
11 | Install Dependencies
12 | ---------------------
13 | .. code-block:: bash
14 | 
15 |    npm install
16 | 
17 | Build the Project
18 | -----------------
19 | .. code-block:: bash
20 | 
21 |    npm run build
22 | 
23 | Test Changes
24 | ------------
25 | 
26 | To test you changes, you can reuse any existing example or create a new example for your new functionality to test.
27 | 
28 | Then, to test the effects of your code change in an example, inside ``examples/<example>/package.json``, change from ``"@mlc-ai/web-llm": "^0.2.xx"`` to ``"@mlc-ai/web-llm": ../...`` to let it reference you local code.
29 | 
30 | .. code-block:: bash
31 | 
32 |    cd examples/<example>
33 |    # Modify the package.json
34 |    npm install
35 |    npm start
36 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 👋 Welcome to WebLLM
 2 | ====================
 3 | 
 4 | `GitHub <https://github.com/mlc-ai/web-llm>`_ | `WebLLM Chat <https://chat.webllm.ai/>`_ | `NPM <https://www.npmjs.com/package/@mlc-ai/web-llm>`_ | `Discord <https://discord.gg/9Xpy2HGBuD>`_
 5 | 
 6 | WebLLM is a high-performance in-browser language model inference engine that brings large language models (LLMs) to web browsers with hardware acceleration. With WebGPU support, it allows developers to build AI-powered applications directly within the browser environment, removing the need for server-side processing and ensuring privacy.
 7 | 
 8 | It provides a specialized runtime for the web backend of MLCEngine, leverages
 9 | `WebGPU <https://www.w3.org/TR/webgpu/>`_ for local acceleration, offers OpenAI-compatible API,
10 | and provides built-in support for web workers to separate heavy computation from the UI flow.
11 | 
12 | Key Features
13 | ------------
14 | - 🌐 In-Browser Inference: Run LLMs directly in the browser
15 | - 🚀 WebGPU Acceleration: Leverage hardware acceleration for optimal performance
16 | - 🔄 OpenAI API Compatibility: Seamless integration with standard AI workflows
17 | - 📦 Multiple Model Support: Works with Llama, Phi, Gemma, Mistral, and more
18 | 
19 | Start exploring WebLLM by `chatting with WebLLM Chat <https://chat.webllm.ai/>`_, and start building webapps with high-performance local LLM inference with the following guides and tutorials.
20 | 
21 | .. toctree::
22 |    :maxdepth: 2
23 |    :caption: User Guide
24 | 
25 |    user/get_started.rst
26 |    user/basic_usage.rst
27 |    user/advanced_usage.rst
28 |    user/api_reference.rst
29 | 
30 | .. toctree::
31 |    :maxdepth: 2
32 |    :caption: Developer Guide
33 | 
34 |    developer/building_from_source.rst
35 |    developer/add_models.rst
36 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-tabs == 3.4.1
2 | sphinx-rtd-theme
3 | sphinx == 5.2.3
4 | sphinx-toolbox == 3.4.0
5 | tlcpack-sphinx-addon==0.2.2
6 | sphinxcontrib_httpdomain==1.8.1
7 | sphinxcontrib-napoleon==0.7
8 | sphinx-reredirects==0.1.2
9 | 


--------------------------------------------------------------------------------
/docs/user/basic_usage.rst:
--------------------------------------------------------------------------------
  1 | Basic Usage
  2 | ================
  3 | 
  4 | Model Records in WebLLM
  5 | -----------------------
  6 | 
  7 | Each of the model available WebLLM is registered as an instance of
  8 | ``ModelRecord`` and can be accessed at
  9 | `webllm.prebuiltAppConfig.model_list <https://github.com/mlc-ai/web-llm/blob/main/src/config.ts#L293>`__.
 10 | 
 11 | Creating an MLCEngine
 12 | ---------------------
 13 | 
 14 | WebLLM APIs are exposed through the ``MLCEngine`` interface. You can create an ``MLCEngine`` instance and loading the model by calling the CreateMLCEngine() factory function.
 15 | 
 16 | (Note that loading models requires downloading and it can take a significant amount of time for the very first run without caching previously. You should properly handle this asynchronous call.)
 17 | 
 18 | ``MLCEngine`` can be instantiated in two ways:
 19 | 1. Using the factory function ``CreateMLCEngine``.
 20 | 2. Instantiating the ``MLCEngine`` class directly and using ``reload()`` to load models.
 21 | 
 22 | .. code-block:: typescript
 23 | 
 24 |    import { CreateMLCEngine, MLCEngine } from "@mlc-ai/web-llm";
 25 | 
 26 |     // Initialize with a progress callback
 27 |     const initProgressCallback = (progress) => {
 28 |         console.log("Model loading progress:", progress);
 29 |     };
 30 | 
 31 |    // Using CreateMLCEngine
 32 |    const engine = await CreateMLCEngine("Llama-3.1-8B-Instruct", { initProgressCallback });
 33 | 
 34 |    // Direct instantiation
 35 |    const engineInstance = new MLCEngine({ initProgressCallback });
 36 |    await engineInstance.reload("Llama-3.1-8B-Instruct");
 37 | 
 38 | Under the hood, this factory function ``CreateMLCEngine`` does the following steps for first creating an engine instance (synchronous) and then loading the model (asynchronous). You can also do them separately in your application.
 39 | 
 40 | .. code-block:: typescript
 41 | 
 42 |     import { MLCEngine } from "@mlc-ai/web-llm";
 43 | 
 44 |     // This is a synchronous call that returns immediately
 45 |     const engine = new MLCEngine({
 46 |         initProgressCallback: initProgressCallback
 47 |     });
 48 | 
 49 |     // This is an asynchronous call and can take a long time to finish
 50 |     await engine.reload(selectedModel);
 51 | 
 52 | 
 53 | Chat Completion
 54 | ---------------
 55 | 
 56 | Chat completions can be invoked using OpenAI style chat APIs through the ``engine.chat.completions`` interface of an initialized ``MLCEgnine``. For the full list of parameters and their descriptions, check :ref:`api-reference` for full list of parameters.
 57 | 
 58 | (Note: As model is determined at the ``MLCEngine`` initialization time, ``model`` parameter is not supported and will be **ignored**. Instead, call ``CreateMLCEngine(model)`` or ``engine.reload(model)`` to reinitialize the engine to use a specific model.)
 59 | 
 60 | .. code-block:: typescript
 61 | 
 62 |     const messages = [
 63 |         { role: "system", content: "You are a helpful AI assistant." },
 64 |         { role: "user", content: "Hello!" }
 65 |     ];
 66 | 
 67 |     const reply = await engine.chat.completions.create({
 68 |         messages,
 69 |     });
 70 | 
 71 |     console.log(reply.choices[0].message);
 72 |     console.log(reply.usage);
 73 | 
 74 | 
 75 | Streaming Chat Completion
 76 | -------------------------
 77 | 
 78 | Streaming chat completion could be enabled by passsing ``stream: true`` parameter to the `engine.chat.completions.create` call configuration. Check :ref:`api-reference` for full list of parameters.
 79 | 
 80 | .. code-block:: typescript
 81 | 
 82 |     const messages = [
 83 |         { role: "system", content: "You are a helpful AI assistant." },
 84 |         { role: "user", content: "Hello!" },
 85 |     ]
 86 | 
 87 |     // Chunks is an AsyncGenerator object
 88 |     const chunks = await engine.chat.completions.create({
 89 |         messages,
 90 |         temperature: 1,
 91 |         stream: true, // <-- Enable streaming
 92 |         stream_options: { include_usage: true },
 93 |     });
 94 | 
 95 |     let reply = "";
 96 |     for await (const chunk of chunks) {
 97 |         reply += chunk.choices[0]?.delta.content || "";
 98 |         console.log(reply);
 99 |         if (chunk.usage) {
100 |             console.log(chunk.usage); // only last chunk has usage
101 |         }
102 |     }
103 | 
104 |     const fullReply = await engine.getMessage();
105 |     console.log(fullReply);
106 | 
107 | 
108 | Chatbot Examples
109 | ----------------
110 | 
111 | Learn how to use WebLLM to integrate large language models into your applications and generate chat completions through this simple Chatbot example:
112 | 
113 | - `Example in JSFiddle <https://jsfiddle.net/neetnestor/4nmgvsa2/>`_
114 | - `Example in CodePen <https://codepen.io/neetnestor/pen/vYwgZaG>`_
115 | 
116 | For an advanced example of a larger, more complicated project, check `WebLLM Chat <https://github.com/mlc-ai/web-llm-chat/blob/main/app/client/webllm.ts>`_.
117 | 
118 | More examples for different use cases are available in the examples folder.
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/docs/user/get_started.rst:
--------------------------------------------------------------------------------
 1 | Getting Started with WebLLM
 2 | ===========================
 3 | 
 4 | This guide will help you set up WebLLM in your project, install necessary dependencies, and verify your setup.
 5 | 
 6 | 
 7 | WebLLM Chat
 8 | -----------
 9 | 
10 | If you want to experience AI Chat supported by local LLM inference and understand how WebLLM works, try out `WebLLM Chat <https://chat.webllm.ai/>`__, which provides a great example
11 | of integrating WebLLM into a full web application.
12 | 
13 | A WebGPU-compatible browser is needed to run WebLLM-powered web applications.
14 | You can download the latest Google Chrome and use `WebGPU Report <https://webgpureport.org/>`__
15 | to verify the functionality of WebGPU on your browser.
16 | 
17 | Installation
18 | ------------
19 | 
20 | WebLLM offers a minimalist and modular interface to access the chatbot in the browser. The package is designed in a modular way to hook to any of the UI components.
21 | 
22 | WebLLM is available as an `npm package <https://www.npmjs.com/package/@mlc-ai/web-llm>`_ and is also CDN-delivered. Therefore, you can install WebLLM using Node.js pacakage managers like npm, yarn, or pnpm, or directly import the pacakge via CDN.
23 | 
24 | Using Package Managers
25 | ^^^^^^^^^^^^^^^^^^^^^^
26 | Install WebLLM via your preferred package manager:
27 | 
28 | .. code-block:: bash
29 | 
30 |    # npm
31 |    npm install @mlc-ai/web-llm
32 |    # yarn
33 |    yarn add @mlc-ai/web-llm
34 |    # pnpm
35 |    pnpm install @mlc-ai/web-llm
36 | 
37 | Import WebLLM into your project:
38 | 
39 | .. code-block:: javascript
40 | 
41 |    // Import everything
42 |    import * as webllm from "@mlc-ai/web-llm";
43 | 
44 |    // Or only import what you need
45 |    import { CreateMLCEngine } from "@mlc-ai/web-llm";
46 | 
47 | Using CDN
48 | ^^^^^^^^^
49 | Thanks to `jsdelivr.com <https://www.jsdelivr.com/package/npm/@mlc-ai/web-llm>`_, WebLLM can be imported directly through URL and work out-of-the-box on cloud development platforms like `jsfiddle.net <https://jsfiddle.net/>`_, `Codepen.io <https://codepen.io/>`_, and `Scribbler <https://scribbler.live/>`_:
50 | 
51 | .. code-block:: javascript
52 | 
53 |    import * as webllm from "https://esm.run/@mlc-ai/web-llm";
54 | 
55 | This method is especially useful for online environments like CodePen, JSFiddle, or local experiments.
56 | 
57 | Verifying Installation
58 | ^^^^^^^^^^^^^^^^^^^^^^
59 | Run the following script to verify the installation:
60 | 
61 | .. code-block:: javascript
62 | 
63 |    import { CreateMLCEngine } from "@mlc-ai/web-llm";
64 |    console.log("WebLLM loaded successfully!");
65 | 
66 | 
67 | Online IDE Sandbox
68 | ------------------
69 | 
70 | Instead of setting WebLLM locally, you can also try it on online Javascript IDE sandboxes like:
71 | 
72 | - `Example in JSFiddle <https://jsfiddle.net/neetnestor/4nmgvsa2/>`_
73 | - `Example in CodePen <https://codepen.io/neetnestor/pen/vYwgZaG>`_
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | package-lock.json
2 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Awesome WebLLM
 2 | 
 3 | This page contains a curated list of examples, tutorials, blogs about WebLLM usecases.
 4 | Please send a pull request if you find things that belongs to here.
 5 | 
 6 | ## Example Projects
 7 | 
 8 | Note that all examples below run in-browser and use WebGPU as a backend.
 9 | 
10 | #### Project List
11 | 
12 | - [get-started](get-started): minimum get started example with chat completion.
13 | 
14 |   [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/yac9gbwf/)
15 |   [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/NWVdgey)
16 | 
17 | - [simple-chat-js](simple-chat-js): a mininum and complete chat bot app in vanilla JavaScript.
18 | 
19 |   [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/4nmgvsa2/)
20 |   [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/vYwgZaG)
21 | 
22 | - [simple-chat-ts](simple-chat-ts): a mininum and complete chat bot app in TypeScript.
23 | - [get-started-web-worker](get-started-web-worker): same as get-started, but using web worker.
24 | - [next-simple-chat](next-simple-chat): a mininum and complete chat bot app with [Next.js](https://nextjs.org/).
25 | - [multi-round-chat](multi-round-chat): while APIs are functional, we internally optimize so that multi round chat usage can reuse KV cache
26 | - [text-completion](text-completion): demonstrates API `engine.completions.create()`, which is pure text completion with no conversation, as opposed to `engine.chat.completions.create()`
27 | - [embeddings](embeddings): demonstrates API `engine.embeddings.create()`, integration with `EmbeddingsInterface` and `MemoryVectorStore` of [Langchain.js](js.langchain.com), and RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine
28 | - [multi-models](multi-models): demonstrates loading multiple models in a single engine concurrently
29 | 
30 | #### Advanced OpenAI API Capabilities
31 | 
32 | These examples demonstrate various capabilities via WebLLM's OpenAI-like API.
33 | 
34 | - [streaming](streaming): return output as chunks in real-time in the form of an AsyncGenerator
35 | - [json-mode](json-mode): efficiently ensure output is in json format, see [OpenAI Reference](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) for more.
36 | - [json-schema](json-schema): besides guaranteeing output to be in JSON, ensure output to adhere to a specific JSON schema specified the user
37 | - [seed-to-reproduce](seed-to-reproduce): use seeding to ensure reproducible output with fields `seed`.
38 | - [function-calling](function-calling) (WIP): function calling with fields `tools` and `tool_choice` (with preliminary support).
39 | - [vision-model](vision-model): process request with image input using Vision Language Model (e.g. Phi3.5-vision)
40 | 
41 | #### Chrome Extension
42 | 
43 | - [chrome-extension](chrome-extension): chrome extension that does not have a persistent background
44 | - [chrome-extension-webgpu-service-worker](chrome-extension-webgpu-service-worker): chrome extension using service worker, hence having a persistent background
45 | 
46 | #### Others
47 | 
48 | - [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`.
49 | - [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and
50 |   users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking
51 |   whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc.
52 | - [simple-chat-upload](simple-chat-upload): demonstrates how to upload local models to WebLLM instead of downloading via a URL link
53 | 
54 | ## Demo Spaces
55 | 
56 | - [web-llm-embed](https://huggingface.co/spaces/matthoffner/web-llm-embed): document chat prototype using react-llm with transformers.js embeddings
57 | - [DeVinci](https://x6occ-biaaa-aaaai-acqzq-cai.icp0.io/): AI chat app based on WebLLM and hosted on decentralized cloud platform
58 | 


--------------------------------------------------------------------------------
/examples/abort-reload/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a demo for cancelling model fetching after calling `engine.reload()`.
 4 | 
 5 | ```bash
 6 | npm install
 7 | npm start
 8 | ```
 9 | 
10 | Note if you would like to hack WebLLM core package.
11 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
12 | instruction in the project to build webllm locally. This option is only recommended
13 | if you would like to hack WebLLM core package.
14 | 


--------------------------------------------------------------------------------
/examples/abort-reload/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "get-started",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/get_started.html  --port 8887",
 7 |     "build": "parcel build src/get_started.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/abort-reload/src/get_started.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./get_started.js"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/abort-reload/src/get_started.js:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | import { error } from "loglevel";
 3 | 
 4 | let engine;
 5 | 
 6 | function setLabel(id, text) {
 7 |   const label = document.getElementById(id);
 8 |   if (label == null) {
 9 |     throw Error("Cannot find label " + id);
10 |   }
11 |   label.innerText = text;
12 | }
13 | 
14 | async function main() {
15 |   const initProgressCallback = (report) => {
16 |     console.log(report.text);
17 |     setLabel("init-label", report.text);
18 |   };
19 |   // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
20 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
21 |   engine = new webllm.MLCEngine({
22 |     initProgressCallback,
23 |   });
24 |   engine.reload(selectedModel);
25 | }
26 | main();
27 | setTimeout(() => {
28 |   console.log("calling unload");
29 |   engine.unload().catch((err) => {
30 |     console.log(err);
31 |   });
32 | }, 5000);
33 | 


--------------------------------------------------------------------------------
/examples/cache-usage/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Cache Usage
 2 | 
 3 | WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
 4 | This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
 5 | demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.
 6 | 
 7 | For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.
 8 | 
 9 | To inspect the downloaded artifacts in your browser, open up developer console, go to application,
10 | and you will find the artifacts under either `IndexedDB` or `Cache storage`.
11 | 
12 | To run the exapmle, you can do the following steps under this folder
13 | 
14 | ```bash
15 | npm install
16 | npm start
17 | ```
18 | 
19 | Note if you would like to hack WebLLM core package.
20 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
21 | instruction in the project to build webllm locally. This option is only recommended
22 | if you would like to hack WebLLM core package.
23 | 


--------------------------------------------------------------------------------
/examples/cache-usage/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "cache-usage",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/cache_usage.html  --port 8888",
 7 |     "build": "parcel build src/cache_usage.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/cache-usage/src/cache_usage.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 | 
14 |     <h3>Prompt</h3>
15 |     <label id="prompt-label"> </label>
16 | 
17 |     <h3>Response</h3>
18 |     <label id="generate-label"> </label>
19 |     <br />
20 |     <label id="stats-label"> </label>
21 | 
22 |     <script type="module" src="./cache_usage.ts"></script>
23 |   </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/examples/cache-usage/src/cache_usage.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | const initProgressCallback = (report: webllm.InitProgressReport) => {
12 |   setLabel("init-label", report.text);
13 | };
14 | 
15 | async function main() {
16 |   const appConfig = webllm.prebuiltAppConfig;
17 |   // CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
18 |   appConfig.useIndexedDBCache = true;
19 | 
20 |   if (appConfig.useIndexedDBCache) {
21 |     console.log("Using IndexedDB Cache");
22 |   } else {
23 |     console.log("Using Cache API");
24 |   }
25 | 
26 |   // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
27 |   const selectedModel = "phi-2-q4f16_1-MLC";
28 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
29 |     selectedModel,
30 |     { initProgressCallback: initProgressCallback, appConfig: appConfig },
31 |   );
32 | 
33 |   const request: webllm.ChatCompletionRequest = {
34 |     stream: false,
35 |     messages: [
36 |       {
37 |         role: "user",
38 |         content: "Write an analogy between mathematics and a lighthouse.",
39 |       },
40 |     ],
41 |     n: 1,
42 |   };
43 |   let reply = await engine.chat.completions.create(request);
44 |   console.log(reply);
45 | 
46 |   // 2. Check whether model weights are cached
47 |   let modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
48 |   console.log("hasModelInCache: ", modelCached);
49 |   if (!modelCached) {
50 |     throw Error("Expect hasModelInCache() to be true, but got: " + modelCached);
51 |   }
52 | 
53 |   // 3. We reload, and we should see this time it is much faster because the weights are cached.
54 |   console.log("Reload model start");
55 |   await engine.reload(selectedModel);
56 |   console.log("Reload model end");
57 |   reply = await engine.chat.completions.create(request);
58 |   console.log(reply);
59 | 
60 |   // 4. Delete every thing about this model from cache
61 |   // You can also delete only the model library wasm, only the model weights, or only the config file
62 |   await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
63 |   modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
64 |   console.log("After deletion, hasModelInCache: ", modelCached);
65 |   if (modelCached) {
66 |     throw Error(
67 |       "Expect hasModelInCache() to be false, but got: " + modelCached,
68 |     );
69 |   }
70 | 
71 |   // 5. If we reload, we should expect the model to start downloading again
72 |   console.log("Reload model start");
73 |   await engine.reload(selectedModel);
74 |   console.log("Reload model end");
75 |   reply = await engine.chat.completions.create(request);
76 |   console.log(reply);
77 | }
78 | 
79 | main();
80 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Chrome Extension using WebGPU Running on Service Worker
 2 | 
 3 | ![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0)
 4 | 
 5 | > [!WARNING]  
 6 | > Service worker support in WebGPU is enabled by default in [Chrome 124](https://chromiumdash.appspot.com/commit/8d78510e4aca5ac3cd8ee4a33e96b404eaa43246).
 7 | > If you are using Chrome 123, go to `chrome://flags/#enable-experimental-web-platform-features`, enable the `#enable-experimental-web-platform-features` flag, and **relaunch the browser**.
 8 | 
 9 | This example shows how we can create a Chrome extension using WebGPU and service worker.
10 | 
11 | - The project structure is as follows:
12 |   - `manifest.json`: A required file that lists important information about the structure and behavior of that extension. Here we are using manifest V3.
13 |   - `popup.ts`: Script of the extension pop-up window.
14 |   - `background.ts`: Script of the service worker. An extension service worker is loaded when it is needed, and unloaded when it goes dormant.
15 |   - `content.js`: Content script that interacts with DOM.
16 | - Run
17 | 
18 |   ```bash
19 |   npm install
20 |   npm run build
21 |   ```
22 | 
23 |   This will create a new directory at `./dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `./dist/` directory. You can now pin the extension to your toolbar and use it to chat with your favorite model!
24 | 
25 | **Note**: This example disables chatting using the contents of the active tab by default.
26 | To enable it, set `useContext` in `popup.ts` to `true`. More info about this feature can be found
27 | [here](https://github.com/mlc-ai/web-llm/pull/190).
28 | However, if the web content is too large, it might run into issues. We recommend using `example.html` to
29 | test this feature.
30 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "chrome-extension",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "private": true,
 6 |   "scripts": {
 7 |     "build": "parcel build src/manifest.json --config @parcel/config-webextension"
 8 |   },
 9 |   "author": "",
10 |   "license": "ISC",
11 |   "devDependencies": {
12 |     "@parcel/config-webextension": "^2.9.3",
13 |     "@types/chrome": "^0.0.242",
14 |     "buffer": "^6.0.3",
15 |     "parcel": "^2.9.3",
16 |     "process": "^0.11.10",
17 |     "url": "^0.11.1"
18 |   },
19 |   "dependencies": {
20 |     "@mlc-ai/web-llm": "^0.2.79",
21 |     "progressbar.js": "^1.1.0"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/background.ts:
--------------------------------------------------------------------------------
 1 | import { ExtensionServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
 2 | 
 3 | // Hookup an engine to a service worker handler
 4 | let handler;
 5 | 
 6 | chrome.runtime.onConnect.addListener(function (port) {
 7 |   console.assert(port.name === "web_llm_service_worker");
 8 |   if (handler === undefined) {
 9 |     handler = new ExtensionServiceWorkerMLCEngineHandler(port);
10 |   } else {
11 |     handler.setPort(port);
12 |   }
13 |   port.onMessage.addListener(handler.onmessage.bind(handler));
14 | });
15 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/content.js:
--------------------------------------------------------------------------------
1 | // Only the content script is able to access the DOM
2 | chrome.runtime.onConnect.addListener(function (port) {
3 |   port.onMessage.addListener(function (msg) {
4 |     port.postMessage({ contents: document.body.innerHTML });
5 |   });
6 | });
7 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/example.html:
--------------------------------------------------------------------------------
 1 | In the year 2154, humanity had colonized several planets in the distant reaches
 2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and
 3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
 4 | these harsh conditions, a team of scientists had established a research station
 5 | on the planet to study the unique geological formations and exotic flora and
 6 | fauna. One day, while conducting a routine survey of the planet's surface, the
 7 | team discovered an strange object buried deep in the ice. As they examined it
 8 | closer, they realized it was a small, metallic capsule with a glowing blue
 9 | symbol etched onto its surface. The team's leader, a brilliant scientist named
10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
11 | origins. She ordered her team to bring it back to the research station for
12 | further analysis. After weeks of studying the capsule, the team finally cracked
13 | the code to the symbol etched onto its surface. It was a message from an alien
14 | race, warning Earth of an impending attack from an unknown threat. The team was
15 | shocked and dismayed by the news, but they knew they had to act quickly to warn
16 | the rest of humanity. They transmitted the message to the nearest space station,
17 | which relayed it to Earth's government. As the threat of attack loomed near, the
18 | team remained on high alert, ready to face whatever dangers lay ahead. They had
19 | uncovered a secrets of the universe, and now they were determined to protect
20 | their planet and its inhabitants at all costs.
21 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 3,
 3 |   "name": "MLCBot",
 4 |   "version": "0.1.0",
 5 |   "description": "Chat with your browser",
 6 |   "icons": {
 7 |     "16": "icons/icon-16.png",
 8 |     "32": "icons/icon-32.png",
 9 |     "64": "icons/icon-64.png",
10 |     "128": "icons/icon-128.png"
11 |   },
12 |   "content_security_policy": {
13 |     "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
14 |   },
15 |   "action": {
16 |     "default_title": "MLCBot",
17 |     "default_popup": "popup.html"
18 |   },
19 |   "content_scripts": [
20 |     {
21 |       "matches": ["<all_urls>"],
22 |       "js": ["content.js"]
23 |     }
24 |   ],
25 |   "background": {
26 |     "service_worker": "background.ts",
27 |     "type": "module"
28 |   },
29 |   "permissions": ["storage", "tabs", "webNavigation"]
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/popup.css:
--------------------------------------------------------------------------------
  1 | *,
  2 | *::before,
  3 | *::after {
  4 |   margin: 0;
  5 |   padding: 0;
  6 |   box-sizing: border-box;
  7 | }
  8 | 
  9 | html {
 10 |   font-family:
 11 |     -apple-system,
 12 |     BlinkMacSystemFont,
 13 |     Segoe UI,
 14 |     Helvetica,
 15 |     Arial,
 16 |     sans-serif;
 17 |   color: #222;
 18 | }
 19 | 
 20 | body {
 21 |   margin: 0;
 22 |   padding: 0.5rem;
 23 |   background-color: #778da9;
 24 |   width: 320px;
 25 |   font-size: small;
 26 | }
 27 | 
 28 | p {
 29 |   margin: 0;
 30 | }
 31 | 
 32 | /* LOADING BAR */
 33 | #loadingContainer {
 34 |   margin-bottom: 15px;
 35 |   width: 300px;
 36 |   height: 8px;
 37 | }
 38 | 
 39 | /* INPUT AREA */
 40 | #query-input {
 41 |   border: 1px solid #ccc;
 42 |   border-radius: 4px;
 43 | }
 44 | 
 45 | .input-container {
 46 |   display: flex;
 47 |   flex-direction: row;
 48 |   align-items: center;
 49 | }
 50 | 
 51 | .input-container input {
 52 |   width: 100%;
 53 |   outline: none;
 54 |   padding: 0.5rem;
 55 |   margin-right: 0.5rem;
 56 | }
 57 | 
 58 | /* SUBMIT BUTTON */
 59 | .btn {
 60 |   background-color: #1b263b;
 61 |   color: white;
 62 |   font-size: small;
 63 |   cursor: pointer;
 64 |   border-radius: 4px;
 65 |   border: none;
 66 |   padding: 0.5rem;
 67 | }
 68 | 
 69 | .btn:hover {
 70 |   background-color: #d0d0d0;
 71 | }
 72 | 
 73 | .btn:disabled {
 74 |   background-color: #a7a7a7;
 75 |   color: rgb(255, 255, 255);
 76 |   cursor: default;
 77 | }
 78 | 
 79 | .btn img {
 80 |   width: 1rem;
 81 |   height: 1rem;
 82 | }
 83 | 
 84 | /* LOADING */
 85 | 
 86 | .stage {
 87 |   display: flex;
 88 |   justify-content: center;
 89 |   align-items: center;
 90 |   position: relative;
 91 |   margin: 0 -5%;
 92 |   overflow: hidden;
 93 | }
 94 | 
 95 | #loading-indicator {
 96 |   display: none;
 97 |   color: white;
 98 |   margin-top: 0.5rem;
 99 | }
100 | 
101 | .dot-flashing {
102 |   position: relative;
103 |   width: 10px;
104 |   height: 10px;
105 |   border-radius: 5px;
106 |   background-color: #1b263b;
107 |   color: #1b263b;
108 |   animation: dot-flashing 0.4s infinite linear alternate;
109 |   animation-delay: 0.2s;
110 | }
111 | 
112 | .dot-flashing::before,
113 | .dot-flashing::after {
114 |   content: "";
115 |   display: inline-block;
116 |   position: absolute;
117 |   top: 0;
118 | }
119 | 
120 | .dot-flashing::before {
121 |   left: -15px;
122 |   width: 10px;
123 |   height: 10px;
124 |   border-radius: 5px;
125 |   background-color: #1b263b;
126 |   color: #1b263b;
127 |   animation: dot-flashing 0.4s infinite alternate;
128 |   animation-delay: 0s;
129 | }
130 | 
131 | .dot-flashing::after {
132 |   left: 15px;
133 |   width: 10px;
134 |   height: 10px;
135 |   border-radius: 5px;
136 |   background-color: #1b263b;
137 |   color: #1b263b;
138 |   animation: dot-flashing 0.4s infinite alternate;
139 |   animation-delay: 0.4s;
140 | }
141 | 
142 | @keyframes dot-flashing {
143 |   0% {
144 |     background-color: #1b263b;
145 |   }
146 | 
147 |   50%,
148 |   100% {
149 |     background-color: #415a77;
150 |   }
151 | }
152 | 
153 | /* ANSWERS */
154 | #queriesAnswersContainer {
155 |   display: block;
156 |   color: white;
157 |   margin-top: 0.5rem;
158 | }
159 | 
160 | #answer {
161 |   color: #333333;
162 | }
163 | 
164 | #answerWrapper {
165 |   display: none;
166 |   background-color: #ffd166;
167 |   border-radius: 8px;
168 |   padding: 0.5rem;
169 |   margin-top: 0.5rem;
170 | }
171 | 
172 | .queriesAnswers {
173 |   border-radius: 8px;
174 |   background-color: #ffd166;
175 |   padding: 0.5rem;
176 |   color: #333333;
177 | }
178 | 
179 | #lastQuery {
180 |   color: rgb(188, 188, 188);
181 | }
182 | 
183 | #lastAnswer {
184 |   color: white;
185 |   margin-top: 0.5rem;
186 | }
187 | 
188 | #lastRequest {
189 |   padding: 0.5rem;
190 |   margin-top: 0.5rem;
191 |   background-color: #333333;
192 |   border-radius: 4px;
193 | }
194 | 
195 | /* ANSWER OPTIONS */
196 | .timeStamp {
197 |   color: #9a8c98;
198 | }
199 | 
200 | .copyRow {
201 |   display: flex;
202 |   flex-direction: row;
203 |   align-items: end;
204 |   justify-content: space-between;
205 |   color: #a7a7a7;
206 |   margin-top: 0.5rem;
207 | }
208 | 
209 | .copyText {
210 |   display: none;
211 |   color: #a7a7a7;
212 |   margin-right: 0.5rem;
213 | }
214 | 
215 | .copyButton {
216 |   color: #415a77;
217 |   background-color: transparent;
218 |   border: none;
219 |   cursor: pointer;
220 |   padding: 0;
221 |   margin-left: 0.5rem;
222 | }
223 | 
224 | .copyButton:hover {
225 |   color: #5e80a7;
226 |   background-color: transparent;
227 | }
228 | 
229 | .removeButton {
230 |   color: #415a77;
231 |   background-color: transparent;
232 |   border: none;
233 |   cursor: pointer;
234 |   padding: 0;
235 | }
236 | 
237 | .removeButton:hover {
238 |   color: #5e80a7;
239 |   background-color: transparent;
240 | }
241 | 


--------------------------------------------------------------------------------
/examples/chrome-extension-webgpu-service-worker/src/popup.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <title>Chatbot</title>
 6 |     <link rel="stylesheet" href="popup.css" />
 7 |     <link
 8 |       rel="stylesheet"
 9 |       href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
10 |     />
11 |   </head>
12 |   <body>
13 |     <div id="loadingContainer"></div>
14 | 
15 |     <div class="input-container form-group">
16 |       <input
17 |         type="search"
18 |         id="query-input"
19 |         placeholder="What's on your mind?"
20 |       />
21 |       <button id="submit-button" class="btn">
22 |         <i class="fa fa-comments"></i>
23 |       </button>
24 |     </div>
25 | 
26 |     <div class="stage">
27 |       <div id="loading-indicator" class="dot-flashing"></div>
28 |     </div>
29 | 
30 |     <div id="answerWrapper">
31 |       <div id="answer"></div>
32 |       <div class="copyRow">
33 |         <span id="timestamp"></span>
34 |         <button
35 |           id="copyAnswer"
36 |           class="btn copyButton"
37 |           title="Copy the Answer to the Clipboard"
38 |         >
39 |           <i class="fa-solid fa-copy fa-lg"></i>
40 |         </button>
41 |       </div>
42 |     </div>
43 | 
44 |     <script type="module" src="./popup.ts"></script>
45 |   </body>
46 | </html>
47 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Chrome Extension
 2 | 
 3 | ![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0)
 4 | 
 5 | To run the extension, do the following steps under this folder
 6 | 
 7 | ```bash
 8 | npm install
 9 | npm run build
10 | ```
11 | 
12 | This will create a new directory at `chrome-extension/dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `chrome-extension/dist/` directory. You can now pin the extension to your toolbar and use the drop-down menu to chat with your favorite model!
13 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "chrome-extension",
 3 |   "version": "1.0.1",
 4 |   "description": "",
 5 |   "private": true,
 6 |   "scripts": {
 7 |     "build": "parcel build src/manifest.json --config @parcel/config-webextension"
 8 |   },
 9 |   "author": "",
10 |   "license": "ISC",
11 |   "devDependencies": {
12 |     "@parcel/config-webextension": "^2.9.3",
13 |     "@types/chrome": "^0.0.242",
14 |     "buffer": "^6.0.3",
15 |     "parcel": "^2.9.3",
16 |     "process": "^0.11.10",
17 |     "url": "^0.11.1"
18 |   },
19 |   "dependencies": {
20 |     "@mlc-ai/web-llm": "^0.2.79",
21 |     "progressbar.js": "^1.1.0"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/content.js:
--------------------------------------------------------------------------------
1 | // Only the content script is able to access the DOM
2 | chrome.runtime.onConnect.addListener(function (port) {
3 |   port.onMessage.addListener(function (msg) {
4 |     port.postMessage({ contents: document.body.innerText });
5 |   });
6 | });
7 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/example.html:
--------------------------------------------------------------------------------
 1 | In the year 2154, humanity had colonized several planets in the distant reaches
 2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and
 3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
 4 | these harsh conditions, a team of scientists had established a research station
 5 | on the planet to study the unique geological formations and exotic flora and
 6 | fauna. One day, while conducting a routine survey of the planet's surface, the
 7 | team discovered an strange object buried deep in the ice. As they examined it
 8 | closer, they realized it was a small, metallic capsule with a glowing blue
 9 | symbol etched onto its surface. The team's leader, a brilliant scientist named
10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
11 | origins. She ordered her team to bring it back to the research station for
12 | further analysis. After weeks of studying the capsule, the team finally cracked
13 | the code to the symbol etched onto its surface. It was a message from an alien
14 | race, warning Earth of an impending attack from an unknown threat. The team was
15 | shocked and dismayed by the news, but they knew they had to act quickly to warn
16 | the rest of humanity. They transmitted the message to the nearest space station,
17 | which relayed it to Earth's government. As the threat of attack loomed near, the
18 | team remained on high alert, ready to face whatever dangers lay ahead. They had
19 | uncovered a secrets of the universe, and now they were determined to protect
20 | their planet and its inhabitants at all costs.
21 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-128.png


--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-16.png


--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-32.png


--------------------------------------------------------------------------------
/examples/chrome-extension/src/icons/icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-64.png


--------------------------------------------------------------------------------
/examples/chrome-extension/src/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 3,
 3 |   "name": "MLCBot",
 4 |   "version": "0.1.1",
 5 |   "description": "Chat with your browser",
 6 |   "icons": {
 7 |     "16": "icons/icon-16.png",
 8 |     "32": "icons/icon-32.png",
 9 |     "64": "icons/icon-64.png",
10 |     "128": "icons/icon-128.png"
11 |   },
12 |   "content_security_policy": {
13 |     "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
14 |   },
15 |   "action": {
16 |     "default_title": "MLCBot",
17 |     "default_popup": "popup.html"
18 |   },
19 |   "content_scripts": [
20 |     {
21 |       "matches": ["<all_urls>"],
22 |       "js": ["content.js"]
23 |     }
24 |   ],
25 |   "permissions": ["storage", "tabs", "webNavigation", "activeTab", "scripting"],
26 |   "host_permissions": ["http://*/", "https://*/"]
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/manifest_v2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 2,
 3 |   "name": "MLCBot",
 4 |   "version": "0.1.0",
 5 |   "description": "Chat with your browser",
 6 |   "icons": {
 7 |     "16": "icons/icon-16.png",
 8 |     "32": "icons/icon-32.png",
 9 |     "64": "icons/icon-64.png",
10 |     "128": "icons/icon-128.png"
11 |   },
12 |   "content_security_policy": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'unsafe-eval' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co",
13 |   "browser_action": {
14 |     "default_popup": "popup.html"
15 |   },
16 |   "content_scripts": [
17 |     {
18 |       "matches": ["<all_urls>"],
19 |       "js": ["content.js"]
20 |     }
21 |   ],
22 |   "permissions": ["storage", "tabs", "webNavigation", "activeTab"]
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/popup.css:
--------------------------------------------------------------------------------
  1 | *,
  2 | *::before,
  3 | *::after {
  4 |   margin: 0;
  5 |   padding: 0;
  6 |   box-sizing: border-box;
  7 | }
  8 | 
  9 | html {
 10 |   font-family:
 11 |     -apple-system,
 12 |     BlinkMacSystemFont,
 13 |     Segoe UI,
 14 |     Helvetica,
 15 |     Arial,
 16 |     sans-serif;
 17 |   color: #222;
 18 | }
 19 | 
 20 | body {
 21 |   margin: 0;
 22 |   padding: 0.5rem;
 23 |   background-color: #778da9;
 24 |   width: 335px;
 25 |   font-size: small;
 26 | }
 27 | 
 28 | p {
 29 |   margin: 0;
 30 | }
 31 | 
 32 | /* LOADING BAR */
 33 | #loadingContainer {
 34 |   margin-bottom: 15px;
 35 |   width: 315px;
 36 |   height: 8px;
 37 | }
 38 | 
 39 | /* INPUT AREA */
 40 | #query-input {
 41 |   border: 1px solid #ccc;
 42 |   border-radius: 4px;
 43 | }
 44 | 
 45 | .input-container {
 46 |   display: flex;
 47 |   flex-direction: row;
 48 |   align-items: center;
 49 | }
 50 | 
 51 | .input-container input {
 52 |   width: 100%;
 53 |   outline: none;
 54 |   padding: 0.5rem;
 55 |   margin-right: 0.5rem;
 56 | }
 57 | 
 58 | /* BUTTON */
 59 | .btn {
 60 |   background-color: #1b263b;
 61 |   color: white;
 62 |   font-size: small;
 63 |   cursor: pointer;
 64 |   border-radius: 4px;
 65 |   border: none;
 66 |   padding: 0.5rem;
 67 | }
 68 | 
 69 | .btn:hover {
 70 |   background-color: #d0d0d0;
 71 | }
 72 | 
 73 | .btn:disabled {
 74 |   background-color: #a7a7a7;
 75 |   color: rgb(255, 255, 255);
 76 |   cursor: default;
 77 | }
 78 | 
 79 | .btn img {
 80 |   width: 1rem;
 81 |   height: 1rem;
 82 | }
 83 | 
 84 | /* LOADING */
 85 | 
 86 | .stage {
 87 |   display: flex;
 88 |   justify-content: center;
 89 |   align-items: center;
 90 |   position: relative;
 91 |   margin: 0 -5%;
 92 |   overflow: hidden;
 93 | }
 94 | 
 95 | #loading-indicator {
 96 |   display: none;
 97 |   color: white;
 98 |   margin-top: 0.5rem;
 99 | }
100 | 
101 | .dot-flashing {
102 |   position: relative;
103 |   width: 10px;
104 |   height: 10px;
105 |   border-radius: 5px;
106 |   background-color: #1b263b;
107 |   color: #1b263b;
108 |   animation: dot-flashing 0.4s infinite linear alternate;
109 |   animation-delay: 0.2s;
110 | }
111 | 
112 | .dot-flashing::before,
113 | .dot-flashing::after {
114 |   content: "";
115 |   display: inline-block;
116 |   position: absolute;
117 |   top: 0;
118 | }
119 | 
120 | .dot-flashing::before {
121 |   left: -15px;
122 |   width: 10px;
123 |   height: 10px;
124 |   border-radius: 5px;
125 |   background-color: #1b263b;
126 |   color: #1b263b;
127 |   animation: dot-flashing 0.4s infinite alternate;
128 |   animation-delay: 0s;
129 | }
130 | 
131 | .dot-flashing::after {
132 |   left: 15px;
133 |   width: 10px;
134 |   height: 10px;
135 |   border-radius: 5px;
136 |   background-color: #1b263b;
137 |   color: #1b263b;
138 |   animation: dot-flashing 0.4s infinite alternate;
139 |   animation-delay: 0.4s;
140 | }
141 | 
142 | @keyframes dot-flashing {
143 |   0% {
144 |     background-color: #1b263b;
145 |   }
146 | 
147 |   50%,
148 |   100% {
149 |     background-color: #415a77;
150 |   }
151 | }
152 | 
153 | /* ANSWERS */
154 | #queriesAnswersContainer {
155 |   display: block;
156 |   color: white;
157 |   margin-top: 0.5rem;
158 | }
159 | 
160 | #answer {
161 |   color: #333333;
162 | }
163 | 
164 | #answerWrapper {
165 |   display: none;
166 |   background-color: #ffd166;
167 |   border-radius: 8px;
168 |   padding: 0.5rem;
169 |   margin-top: 0.5rem;
170 | }
171 | 
172 | .queriesAnswers {
173 |   border-radius: 8px;
174 |   background-color: #ffd166;
175 |   padding: 0.5rem;
176 |   color: #333333;
177 | }
178 | 
179 | #lastQuery {
180 |   color: rgb(188, 188, 188);
181 | }
182 | 
183 | #lastAnswer {
184 |   color: white;
185 |   margin-top: 0.5rem;
186 | }
187 | 
188 | #lastRequest {
189 |   padding: 0.5rem;
190 |   margin-top: 0.5rem;
191 |   background-color: #333333;
192 |   border-radius: 4px;
193 | }
194 | 
195 | /* ANSWER OPTIONS */
196 | .timeStamp {
197 |   color: #9a8c98;
198 | }
199 | 
200 | .copyRow {
201 |   display: flex;
202 |   flex-direction: row;
203 |   align-items: end;
204 |   justify-content: space-between;
205 |   color: #a7a7a7;
206 |   margin-top: 0.5rem;
207 | }
208 | 
209 | .copyText {
210 |   display: none;
211 |   color: #a7a7a7;
212 |   margin-right: 0.5rem;
213 | }
214 | 
215 | .copyButton {
216 |   color: #415a77;
217 |   background-color: transparent;
218 |   border: none;
219 |   cursor: pointer;
220 |   padding: 0;
221 |   margin-left: 0.5rem;
222 | }
223 | 
224 | .copyButton:hover {
225 |   color: #5e80a7;
226 |   background-color: transparent;
227 | }
228 | 
229 | .removeButton {
230 |   color: #415a77;
231 |   background-color: transparent;
232 |   border: none;
233 |   cursor: pointer;
234 |   padding: 0;
235 | }
236 | 
237 | .removeButton:hover {
238 |   color: #5e80a7;
239 |   background-color: transparent;
240 | }
241 | 


--------------------------------------------------------------------------------
/examples/chrome-extension/src/popup.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <title>Chatbot</title>
 6 |     <link rel="stylesheet" href="popup.css" />
 7 |     <link
 8 |       rel="stylesheet"
 9 |       href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
10 |     />
11 |   </head>
12 |   <body>
13 |     <select id="model-selection"></select>
14 |     <div id="loadingBox">
15 |       <p id="init-label">Initializing model...</p>
16 |       <div id="loadingContainer"></div>
17 |     </div>
18 |     <p id="model-name"></p>
19 |     <div class="input-container form-group">
20 |       <input
21 |         type="search"
22 |         id="query-input"
23 |         placeholder="What's on your mind?"
24 |       />
25 |       <button id="submit-button" class="btn">
26 |         <i class="fa fa-comments"></i>
27 |       </button>
28 |     </div>
29 | 
30 |     <div class="stage">
31 |       <div id="loading-indicator" class="dot-flashing"></div>
32 |     </div>
33 | 
34 |     <div id="answerWrapper">
35 |       <div id="answer"></div>
36 |       <div class="copyRow">
37 |         <span id="timestamp"></span>
38 |         <button
39 |           id="copyAnswer"
40 |           class="btn copyButton"
41 |           title="Copy the Answer to the Clipboard"
42 |         >
43 |           <i class="fa-solid fa-copy fa-lg"></i>
44 |         </button>
45 |       </div>
46 |     </div>
47 | 
48 |     <script type="module" src="./popup.ts"></script>
49 |   </body>
50 | </html>
51 | 


--------------------------------------------------------------------------------
/examples/embeddings/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
 4 | To try it out, you can do the following steps under this folder
 5 | 
 6 | ```bash
 7 | npm install
 8 | npm start
 9 | ```
10 | 
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 | 


--------------------------------------------------------------------------------
/examples/embeddings/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "embeddings-example",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/embeddings.html  --port 8885",
 7 |     "build": "parcel build src/embeddings.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79",
19 |     "langchain": "0.2.15"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/embeddings/src/embeddings.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./embeddings.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/function-calling/README.md:
--------------------------------------------------------------------------------
 1 | ### OpenAI API Demos - Function calling
 2 | 
 3 | This folder contains two main ways of using function calling with WebLLM.
 4 | 
 5 | `function-calling-manual` demonstrates how you can use function calling with Llama3.1 and Hermes2
 6 | without using the `tools`, `tool_choice`, and `tool_call` fields. This is the most flexible way and you can follow
 7 | the instruction given by the model releaser and iterate yourself on top of that. However, you need to do parsing on your own, which differs for each model. For instance, Hermes2 models use `<tool_call>` and `</tool_call>` to wrap around a tool call, which may be very different from other models' format.
 8 | 
 9 | `function-calling-openai` conforms to the OpenAI function calling usage, leveraging `tools`, `tool_choice`, and `tool_call`
10 | fields. This is more usable, but sacrifices the flexibility since we have pre-defined system prompt
11 | for this.
12 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/README.md:
--------------------------------------------------------------------------------
1 | ### Demos - Function calling
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-api",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/function_calling_manual.html  --port 8888",
 7 |     "build": "parcel build src/function_calling_manual.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-manual/src/function_calling_manual.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 |     <label id="generate-label"> </label>
14 | 
15 |     <script type="module" src="./function_calling_manual.ts"></script>
16 |   </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/README.md:
--------------------------------------------------------------------------------
1 | ### Demos - Function calling
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-api",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/function_calling_openai.html  --port 8888",
 7 |     "build": "parcel build src/function_calling_openai.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/src/function_calling_openai.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 |     <label id="generate-label"> </label>
14 | 
15 |     <script type="module" src="./function_calling_openai.ts"></script>
16 |   </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/examples/function-calling/function-calling-openai/src/function_calling_openai.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | async function main() {
12 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
13 |     setLabel("init-label", report.text);
14 |   };
15 |   const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
16 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
17 |     selectedModel,
18 |     { initProgressCallback: initProgressCallback },
19 |   );
20 | 
21 |   const tools: Array<webllm.ChatCompletionTool> = [
22 |     {
23 |       type: "function",
24 |       function: {
25 |         name: "get_current_weather",
26 |         description: "Get the current weather in a given location",
27 |         parameters: {
28 |           type: "object",
29 |           properties: {
30 |             location: {
31 |               type: "string",
32 |               description: "The city and state, e.g. San Francisco, CA",
33 |             },
34 |             unit: { type: "string", enum: ["celsius", "fahrenheit"] },
35 |           },
36 |           required: ["location"],
37 |         },
38 |       },
39 |     },
40 |   ];
41 | 
42 |   const request: webllm.ChatCompletionRequest = {
43 |     stream: true, // works with stream as well, where the last chunk returns tool_calls
44 |     stream_options: { include_usage: true },
45 |     messages: [
46 |       {
47 |         role: "user",
48 |         content:
49 |           "What is the current weather in celsius in Pittsburgh and Tokyo?",
50 |       },
51 |     ],
52 |     tool_choice: "auto",
53 |     tools: tools,
54 |   };
55 | 
56 |   if (!request.stream) {
57 |     const reply0 = await engine.chat.completions.create(request);
58 |     console.log(reply0.choices[0]);
59 |     console.log(reply0.usage);
60 |   } else {
61 |     // If streaming, the last chunk returns tool calls
62 |     const asyncChunkGenerator = await engine.chat.completions.create(request);
63 |     let message = "";
64 |     let lastChunk: webllm.ChatCompletionChunk | undefined;
65 |     let usageChunk: webllm.ChatCompletionChunk | undefined;
66 |     for await (const chunk of asyncChunkGenerator) {
67 |       console.log(chunk);
68 |       message += chunk.choices[0]?.delta?.content || "";
69 |       setLabel("generate-label", message);
70 |       if (!chunk.usage) {
71 |         lastChunk = chunk;
72 |       }
73 |       usageChunk = chunk;
74 |     }
75 |     console.log(lastChunk!.choices[0].delta);
76 |     console.log(usageChunk!.usage);
77 |   }
78 | }
79 | 
80 | main();
81 | 


--------------------------------------------------------------------------------
/examples/get-started-web-worker/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started with WebWorker
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API using
 4 | [WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
 5 | The main benefit of web worker is that all ML workloads runs on a separate thread as a result
 6 | will less likely block the UI.
 7 | 
 8 | To try it out, you can do the following steps under this folder
 9 | 
10 | ```bash
11 | npm install
12 | npm start
13 | ```
14 | 
15 | Note if you would like to hack WebLLM core package.
16 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
17 | instruction in the project to build webllm locally. This option is only recommended
18 | if you would like to hack WebLLM core package.
19 | 


--------------------------------------------------------------------------------
/examples/get-started-web-worker/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "get-started-web-worker",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/get_started.html  --port 8885",
 7 |     "build": "parcel build src/get_started.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^6.0.3",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/get_started.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./main.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/main.ts:
--------------------------------------------------------------------------------
  1 | import * as webllm from "@mlc-ai/web-llm";
  2 | 
  3 | function setLabel(id: string, text: string) {
  4 |   const label = document.getElementById(id);
  5 |   if (label == null) {
  6 |     throw Error("Cannot find label " + id);
  7 |   }
  8 |   label.innerText = text;
  9 | }
 10 | 
 11 | // There are two demonstrations, pick one to run
 12 | 
 13 | /**
 14 |  * Chat completion (OpenAI style) without streaming, where we get the entire response at once.
 15 |  */
 16 | async function mainNonStreaming() {
 17 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
 18 |     setLabel("init-label", report.text);
 19 |   };
 20 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
 21 | 
 22 |   const engine: webllm.MLCEngineInterface =
 23 |     await webllm.CreateWebWorkerMLCEngine(
 24 |       new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
 25 |       selectedModel,
 26 |       { initProgressCallback: initProgressCallback },
 27 |     );
 28 | 
 29 |   const request: webllm.ChatCompletionRequest = {
 30 |     messages: [
 31 |       {
 32 |         role: "system",
 33 |         content:
 34 |           "You are a helpful, respectful and honest assistant. " +
 35 |           "Be as happy as you can when speaking please. ",
 36 |       },
 37 |       { role: "user", content: "Provide me three US states." },
 38 |       { role: "assistant", content: "California, New York, Pennsylvania." },
 39 |       { role: "user", content: "Two more please!" },
 40 |     ],
 41 |     n: 3,
 42 |     temperature: 1.5,
 43 |     max_tokens: 256,
 44 |   };
 45 | 
 46 |   const reply0 = await engine.chat.completions.create(request);
 47 |   console.log(reply0);
 48 | 
 49 |   console.log(reply0.usage);
 50 | }
 51 | 
 52 | /**
 53 |  * Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
 54 |  */
 55 | async function mainStreaming() {
 56 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
 57 |     setLabel("init-label", report.text);
 58 |   };
 59 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
 60 | 
 61 |   const engine: webllm.MLCEngineInterface =
 62 |     await webllm.CreateWebWorkerMLCEngine(
 63 |       new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
 64 |       selectedModel,
 65 |       { initProgressCallback: initProgressCallback },
 66 |     );
 67 | 
 68 |   const request: webllm.ChatCompletionRequest = {
 69 |     stream: true,
 70 |     stream_options: { include_usage: true },
 71 |     messages: [
 72 |       {
 73 |         role: "system",
 74 |         content:
 75 |           "You are a helpful, respectful and honest assistant. " +
 76 |           "Be as happy as you can when speaking please. ",
 77 |       },
 78 |       { role: "user", content: "Provide me three US states." },
 79 |       { role: "assistant", content: "California, New York, Pennsylvania." },
 80 |       { role: "user", content: "Two more please!" },
 81 |     ],
 82 |     temperature: 1.5,
 83 |     max_tokens: 256,
 84 |   };
 85 | 
 86 |   const asyncChunkGenerator = await engine.chat.completions.create(request);
 87 |   let message = "";
 88 |   for await (const chunk of asyncChunkGenerator) {
 89 |     console.log(chunk);
 90 |     message += chunk.choices[0]?.delta?.content || "";
 91 |     setLabel("generate-label", message);
 92 |     if (chunk.usage) {
 93 |       console.log(chunk.usage); // only last chunk has usage
 94 |     }
 95 |     // engine.interruptGenerate();  // works with interrupt as well
 96 |   }
 97 |   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
 98 | }
 99 | 
100 | // Run one of the function below
101 | // mainNonStreaming();
102 | mainStreaming();
103 | 


--------------------------------------------------------------------------------
/examples/get-started-web-worker/src/worker.ts:
--------------------------------------------------------------------------------
1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 | 
3 | // Hookup an engine to a worker handler
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 |   handler.onmessage(msg);
7 | };
8 | 


--------------------------------------------------------------------------------
/examples/get-started/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
 4 | To try it out, you can do the following steps under this folder
 5 | 
 6 | ```bash
 7 | npm install
 8 | npm start
 9 | ```
10 | 
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 | 


--------------------------------------------------------------------------------
/examples/get-started/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "get-started",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/get_started.html  --port 8888",
 7 |     "build": "parcel build src/get_started.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/get-started/src/get_started.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./get_started.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/get-started/src/get_started.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | async function main() {
12 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
13 |     setLabel("init-label", report.text);
14 |   };
15 |   // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
16 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
17 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
18 |     selectedModel,
19 |     {
20 |       initProgressCallback: initProgressCallback,
21 |       logLevel: "INFO", // specify the log level
22 |     },
23 |     // customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
24 |     {
25 |       context_window_size: 2048,
26 |       // sliding_window_size: 1024,
27 |       // attention_sink_size: 4,
28 |     },
29 |   );
30 | 
31 |   // Option 2: Specify your own model other than the prebuilt ones
32 |   // const appConfig: webllm.AppConfig = {
33 |   //   model_list: [
34 |   //     {
35 |   //       model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
36 |   //       model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
37 |   //       model_lib:
38 |   //         webllm.modelLibURLPrefix +
39 |   //         webllm.modelVersion +
40 |   //         "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
41 |   //       overrides: {
42 |   //         context_window_size: 2048,
43 |   //       },
44 |   //     },
45 |   //   ],
46 |   // };
47 |   // const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
48 |   //   selectedModel,
49 |   //   { appConfig: appConfig, initProgressCallback: initProgressCallback },
50 |   // );
51 | 
52 |   // Option 3: Instantiate MLCEngine() and call reload() separately
53 |   // const engine: webllm.MLCEngineInterface = new webllm.MLCEngine({
54 |   //   appConfig: appConfig, // if do not specify, we use webllm.prebuiltAppConfig
55 |   //   initProgressCallback: initProgressCallback,
56 |   // });
57 |   // await engine.reload(selectedModel);
58 | 
59 |   const reply0 = await engine.chat.completions.create({
60 |     messages: [{ role: "user", content: "List three US states." }],
61 |     // below configurations are all optional
62 |     n: 3,
63 |     temperature: 1.5,
64 |     max_tokens: 256,
65 |     // 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
66 |     // So we would have a higher chance of seeing the latter two, but never the first in the answer
67 |     logit_bias: {
68 |       "46510": -100,
69 |       "7188": -100,
70 |       "8421": 5,
71 |       "51325": 5,
72 |     },
73 |     logprobs: true,
74 |     top_logprobs: 2,
75 |   });
76 |   console.log(reply0);
77 |   console.log(reply0.usage);
78 | 
79 |   // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
80 | }
81 | 
82 | main();
83 | 


--------------------------------------------------------------------------------
/examples/json-mode/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/json-mode/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-api",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/json_mode.html  --port 8888",
 7 |     "build": "parcel build src/json_mode.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/json-mode/src/json_mode.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output.
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 | 
14 |     <script type="module" src="./json_mode.ts"></script>
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/examples/json-mode/src/json_mode.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | async function main() {
12 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
13 |     setLabel("init-label", report.text);
14 |   };
15 |   // Pick any one of these models to start trying -- most models in WebLLM support grammar
16 |   const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
17 |   // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
18 |   // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
19 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
20 |     selectedModel,
21 |     { initProgressCallback: initProgressCallback },
22 |   );
23 |   // Note that you'd need to prompt the model to answer in JSON either in
24 |   // user's message or the system prompt
25 |   const request: webllm.ChatCompletionRequest = {
26 |     stream: false, // works with streaming, logprobs, top_logprobs as well
27 |     messages: [
28 |       {
29 |         role: "user",
30 |         content: "Write a short JSON file introducing yourself.",
31 |       },
32 |     ],
33 |     n: 2,
34 |     max_tokens: 128,
35 |     response_format: { type: "json_object" } as webllm.ResponseFormat,
36 |   };
37 | 
38 |   const reply0 = await engine.chatCompletion(request);
39 |   console.log(reply0);
40 |   console.log("First reply's last choice:\n" + (await engine.getMessage()));
41 |   console.log(reply0.usage);
42 | }
43 | 
44 | main();
45 | 


--------------------------------------------------------------------------------
/examples/json-schema/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/json-schema/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-api",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/json_schema.html  --port 8885",
 7 |     "build": "parcel build src/json_schema.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/json-schema/src/json_schema.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output.
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 | 
14 |     <script type="module" src="./json_schema.ts"></script>
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/examples/logit-processor/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Logit Processor and Low-Level API Example
 2 | 
 3 | This folder explains the usage of `LogitProcessor`, demonstrating how it can be used to
 4 | manipulate the raw logits before sampling the token (e.g. setting certain tokens to `inf` or `-inf`).
 5 | We demonstrate how to use it with and without a web worker, which can be toggled with `USE_WEB_WORKER`
 6 | in `logit_processor.ts` (see `worker.ts` on how `LogitProcessor` plays a role there).
 7 | 
 8 | We also demonstrate the usage of a low-level API `forwardTokenAndSample()`, which, unlike `chat.completions.create()`
 9 | that assumes the usage is for autoregressive chatting, here we have more fine-grained control.
10 | 
11 | See `my_logit_processor.ts` on how to customize your own logit processor. Here we make the logit
12 | of token 0 `100.0` manually, large enough that we should expect to always sample token 0, which
13 | is indeed the case if we observe the console log. We also demonstarte that a LogitProcessor can be
14 | stateful, and the state can also be cleaned with `LogitProcessor.resetState()`.
15 | 
16 | To try it out, you can do the following steps under this folder
17 | 
18 | ```bash
19 | npm install
20 | npm start
21 | ```
22 | 
23 | Note if you would like to hack WebLLM core package, you can change web-llm dependencies as `"file:../.."`, and follow the build from source instruction in the project to build webllm locally. This option is only recommended if you would like to hack WebLLM core package.
24 | 


--------------------------------------------------------------------------------
/examples/logit-processor/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "logit-processor",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/logit_processor.html  --port 8885",
 7 |     "build": "parcel build src/logit_processor.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/logit-processor/src/logit_processor.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Logit Processor Test Page</h2>
 9 |     Open console to see the effect of your logit processor.
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 | 
14 |     <script type="module" src="./logit_processor.ts"></script>
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/examples/logit-processor/src/logit_processor.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | import { MyLogitProcessor } from "./my_logit_processor";
 3 | 
 4 | const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker
 5 | const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test
 6 | 
 7 | function setLabel(id: string, text: string) {
 8 |   const label = document.getElementById(id);
 9 |   if (label == null) {
10 |     throw Error("Cannot find label " + id);
11 |   }
12 |   label.innerText = text;
13 | }
14 | 
15 | async function main() {
16 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
17 |     setLabel("init-label", report.text);
18 |   };
19 |   // Instantiate myLogitProcessor, registering in the logitProcessorRegistry
20 |   const myLogitProcessor = new MyLogitProcessor();
21 |   const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
22 |   logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
23 | 
24 |   let engine: webllm.MLCEngineInterface;
25 | 
26 |   // Depending on whether we use a web worker, the code is slightly different
27 |   if (USE_WEB_WORKER) {
28 |     // see worker.ts on how LogitProcessor plays a role there
29 |     engine = await webllm.CreateWebWorkerMLCEngine(
30 |       new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
31 |       "phi-2-q4f32_1-MLC",
32 |       { initProgressCallback: initProgressCallback },
33 |     );
34 |   } else {
35 |     engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", {
36 |       initProgressCallback: initProgressCallback,
37 |       logitProcessorRegistry: logitProcessorRegistry,
38 |     });
39 |   }
40 | 
41 |   // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()`
42 |   const prompt: Array<number> = [42];
43 |   let nextToken = await engine.forwardTokensAndSample(
44 |     prompt,
45 |     /*isPrefill=*/ true,
46 |   );
47 |   console.log(nextToken);
48 | 
49 |   let counter = prompt.length;
50 |   while (counter < AUTOREGRESS_LIMIT) {
51 |     counter += 1;
52 |     nextToken = await engine.forwardTokensAndSample(
53 |       [nextToken],
54 |       /*isPrefill=*/ false,
55 |     );
56 |     console.log(nextToken);
57 |   }
58 | 
59 |   // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState()
60 |   engine.resetChat();
61 |   counter = prompt.length;
62 |   nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true);
63 |   console.log(nextToken);
64 |   while (counter < AUTOREGRESS_LIMIT) {
65 |     counter += 1;
66 |     nextToken = await engine.forwardTokensAndSample(
67 |       [nextToken],
68 |       /*isPrefill=*/ false,
69 |     );
70 |     console.log(nextToken);
71 |   }
72 | 
73 |   // `forwardTokensAndSample()` is made compatible with registering runtime stats.
74 |   console.log(await engine.runtimeStatsText());
75 | }
76 | 
77 | main();
78 | 


--------------------------------------------------------------------------------
/examples/logit-processor/src/my_logit_processor.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | // Define LogitProcessor
 4 | export class MyLogitProcessor implements webllm.LogitProcessor {
 5 |   private tokenSequence: Array<number> = [];
 6 | 
 7 |   processLogits(logits: Float32Array): Float32Array {
 8 |     logits[0] = 100.0; // should be enough so that we always sample token 0 below
 9 |     return logits;
10 |   }
11 | 
12 |   processSampledToken(token: number): void {
13 |     this.tokenSequence.push(token);
14 |     console.log("processSampledToken: " + this.tokenSequence.length);
15 |   }
16 | 
17 |   resetState(): void {
18 |     this.tokenSequence = [];
19 |     console.log("resetState");
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/logit-processor/src/worker.ts:
--------------------------------------------------------------------------------
 1 | // Serve the chat workload through web worker
 2 | import * as webllm from "@mlc-ai/web-llm";
 3 | import { MyLogitProcessor } from "./my_logit_processor";
 4 | 
 5 | console.log("Use web worker for logit processor");
 6 | 
 7 | const myLogitProcessor = new MyLogitProcessor();
 8 | const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
 9 | logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
10 | 
11 | const handler = new webllm.WebWorkerMLCEngineHandler();
12 | handler.setLogitProcessorRegistry(logitProcessorRegistry);
13 | self.onmessage = (msg: MessageEvent) => {
14 |   handler.onmessage(msg);
15 | };
16 | 


--------------------------------------------------------------------------------
/examples/multi-models/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
 4 | To try it out, you can do the following steps under this folder
 5 | 
 6 | ```bash
 7 | npm install
 8 | npm start
 9 | ```
10 | 
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 | 


--------------------------------------------------------------------------------
/examples/multi-models/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "get-started",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/multi_models.html  --port 8888",
 7 |     "build": "parcel build src/multi_models.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/multi-models/src/multi_models.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt 1</h3>
14 |     <label id="prompt-label-1"> </label>
15 | 
16 |     <h3>Response from model 1</h3>
17 |     <label id="generate-label-1"> </label>
18 |     <br />
19 | 
20 |     <h3>Prompt 2</h3>
21 |     <label id="prompt-label-2"> </label>
22 | 
23 |     <h3>Response from model 2</h3>
24 |     <label id="generate-label-2"> </label>
25 |     <br />
26 |     <label id="stats-label"> </label>
27 | 
28 |     <script type="module" src="./main.ts"></script>
29 |   </body>
30 | </html>
31 | 


--------------------------------------------------------------------------------
/examples/multi-models/src/worker.ts:
--------------------------------------------------------------------------------
1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 | 
3 | // Hookup an engine to a worker handler
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 |   handler.onmessage(msg);
7 | };
8 | 


--------------------------------------------------------------------------------
/examples/multi-round-chat/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/multi-round-chat/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-api",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/multi_round_chat.html  --port 8888",
 7 |     "build": "parcel build src/multi_round_chat.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/multi-round-chat/src/multi_round_chat.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 | 
14 |     <script type="module" src="./multi_round_chat.ts"></script>
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/examples/multi-round-chat/src/multi_round_chat.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | /**
12 |  * We demonstrate multiround chatting. Though users are required to maintain chat history, internally
13 |  * we compare provided `messages` with the internal chat history. If it matches, we will reuse KVs
14 |  * and hence save computation -- essentially an implicit internal optimization.
15 |  */
16 | async function main() {
17 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
18 |     setLabel("init-label", report.text);
19 |   };
20 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
21 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
22 |     selectedModel,
23 |     { initProgressCallback: initProgressCallback },
24 |   );
25 | 
26 |   // Round 0
27 |   const messages: webllm.ChatCompletionMessageParam[] = [
28 |     {
29 |       role: "system",
30 |       content:
31 |         "You are a helpful, respectful and honest assistant. " +
32 |         "Be as happy as you can when speaking please. ",
33 |     },
34 |     { role: "user", content: "Provide me three US states." },
35 |   ];
36 | 
37 |   const request0: webllm.ChatCompletionRequest = {
38 |     stream: false, // can be streaming, same behavior
39 |     messages: messages,
40 |   };
41 | 
42 |   const reply0 = await engine.chat.completions.create(request0);
43 |   const replyMessage0 = await engine.getMessage();
44 |   console.log(reply0);
45 |   console.log(replyMessage0);
46 |   console.log(reply0.usage);
47 | 
48 |   // Round 1
49 |   // Append generated response to messages
50 |   messages.push({ role: "assistant", content: replyMessage0 });
51 |   // Append new user input
52 |   messages.push({ role: "user", content: "Two more please!" });
53 |   // Below line would cause an internal reset (clear KV cache, etc.) since the history no longer
54 |   // matches the new request
55 |   // messages[0].content = "Another system prompt";
56 | 
57 |   const request1: webllm.ChatCompletionRequest = {
58 |     stream: false, // can be streaming, same behavior
59 |     messages: messages,
60 |   };
61 | 
62 |   const reply1 = await engine.chat.completions.create(request1);
63 |   const replyMessage1 = await engine.getMessage();
64 |   console.log(reply1);
65 |   console.log(replyMessage1);
66 |   console.log(reply1.usage);
67 | 
68 |   // If we used multiround chat, request1 should only prefill a small number of tokens
69 |   const prefillTokens0 = reply0.usage?.prompt_tokens;
70 |   const prefillTokens1 = reply1.usage?.prompt_tokens;
71 |   console.log("Requset 0 prompt tokens: ", prefillTokens0);
72 |   console.log("Requset 1 prompt tokens: ", prefillTokens1);
73 |   if (
74 |     prefillTokens0 === undefined ||
75 |     prefillTokens1 === undefined ||
76 |     prefillTokens1 > prefillTokens0
77 |   ) {
78 |     throw Error("Multi-round chat is not triggered as expected.");
79 |   }
80 | }
81 | 
82 | main();
83 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # next.js
12 | /.next/
13 | /out/
14 | 
15 | # production
16 | /build
17 | 
18 | # misc
19 | .DS_Store
20 | *.pem
21 | 
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 | 
27 | # local env files
28 | .env*.local
29 | 
30 | # vercel
31 | .vercel
32 | 
33 | # typescript
34 | *.tsbuildinfo
35 | next-env.d.ts
36 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/README.md:
--------------------------------------------------------------------------------
 1 | This is a [Next.js](https://nextjs.org/) project using web-llm.
 2 | 
 3 | ## Getting Started
 4 | 
 5 | First, install web-llm from source.
 6 | 
 7 | Then, run the development server:
 8 | 
 9 | ```bash
10 | npm run dev
11 | # or
12 | yarn dev
13 | # or
14 | pnpm dev
15 | ```
16 | 
17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
18 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/next.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('next').NextConfig} */
 2 | const nextConfig = {
 3 |   reactStrictMode: true,
 4 | 
 5 |   webpack: (config, { isServer }) => {
 6 |     // Fixes npm packages that depend on `fs` module
 7 |     if (!isServer) {
 8 |       config.resolve.fallback = {
 9 |         ...config.resolve.fallback, // if you miss it, all the other options in fallback, specified
10 |         // by next.js will be dropped. Doesn't make much sense, but how it is
11 |         fs: false, // the solution
12 |         module: false,
13 |         perf_hooks: false,
14 |       };
15 |     }
16 | 
17 |     return config;
18 |   },
19 | };
20 | 
21 | module.exports = nextConfig;
22 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "next-simple-chat",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@mlc-ai/web-llm": "^0.2.79",
13 |     "@types/node": "20.3.3",
14 |     "@types/react": "18.2.14",
15 |     "@types/react-dom": "18.2.6",
16 |     "autoprefixer": "10.4.14",
17 |     "eslint": "8.44.0",
18 |     "eslint-config-next": "13.4.7",
19 |     "next": "^13.5.6",
20 |     "postcss": "8.4.24",
21 |     "react": "18.2.0",
22 |     "react-dom": "18.2.0",
23 |     "tailwindcss": "3.3.2",
24 |     "typescript": "5.1.6"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | };
7 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/examples/next-simple-chat/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/pages/_app.tsx:
--------------------------------------------------------------------------------
1 | import "~/styles/globals.css";
2 | import type { AppProps } from "next/app";
3 | 
4 | export default function App({ Component, pageProps }: AppProps) {
5 |   return <Component {...pageProps} />;
6 | }
7 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/pages/_document.tsx:
--------------------------------------------------------------------------------
 1 | import { Html, Head, Main, NextScript } from "next/document";
 2 | 
 3 | export default function Document() {
 4 |   return (
 5 |     <Html lang="en">
 6 |       <Head />
 7 |       <body>
 8 |         <Main />
 9 |         <NextScript />
10 |       </body>
11 |     </Html>
12 |   );
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/pages/api/hello.ts:
--------------------------------------------------------------------------------
 1 | // Next.js API route support: https://nextjs.org/docs/api-routes/introduction
 2 | import type { NextApiRequest, NextApiResponse } from "next";
 3 | 
 4 | type Data = {
 5 |   name: string;
 6 | };
 7 | 
 8 | export default function handler(
 9 |   req: NextApiRequest,
10 |   res: NextApiResponse<Data>,
11 | ) {
12 |   res.status(200).json({ name: "John Doe" });
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/pages/index.tsx:
--------------------------------------------------------------------------------
 1 | import Head from "next/head";
 2 | import ChatComponent from "~/utils/chat_component";
 3 | import { Inter } from "next/font/google";
 4 | 
 5 | const inter = Inter({ subsets: ["latin"] });
 6 | 
 7 | export default function Home() {
 8 |   return (
 9 |     <>
10 |       <Head>
11 |         <title>Example App</title>
12 |         <meta
13 |           name="description"
14 |           content="Example app for web llm next compatibility"
15 |         />
16 |         <link rel="icon" href="/favicon.ico" />
17 |       </Head>
18 |       <main
19 |         className={`flex min-h-screen flex-col items-center justify-between p-24 ${inter.className}`}
20 |       >
21 |         <ChatComponent />
22 |       </main>
23 |     </>
24 |   );
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/styles/globals.css:
--------------------------------------------------------------------------------
  1 | @tailwind base;
  2 | @tailwind components;
  3 | @tailwind utilities;
  4 | 
  5 | :root {
  6 |   --foreground-rgb: 0, 0, 0;
  7 |   --background-start-rgb: 214, 219, 220;
  8 |   --background-end-rgb: 255, 255, 255;
  9 | }
 10 | 
 11 | @media (prefers-color-scheme: dark) {
 12 |   :root {
 13 |     --foreground-rgb: 255, 255, 255;
 14 |     --background-start-rgb: 0, 0, 0;
 15 |     --background-end-rgb: 0, 0, 0;
 16 |   }
 17 | }
 18 | 
 19 | body {
 20 |   color: rgb(var(--foreground-rgb));
 21 |   background: linear-gradient(
 22 |       to bottom,
 23 |       transparent,
 24 |       rgb(var(--background-end-rgb))
 25 |     )
 26 |     rgb(var(--background-start-rgb));
 27 | }
 28 | 
 29 | a {
 30 |   color: inherit;
 31 |   text-decoration: none;
 32 | }
 33 | 
 34 | * {
 35 |   box-sizing: border-box;
 36 | }
 37 | 
 38 | chatui-chat {
 39 |   height: 100;
 40 | }
 41 | 
 42 | .chatui {
 43 |   display: flex;
 44 |   flex-flow: column wrap;
 45 |   justify-content: space-between;
 46 |   width: 100%;
 47 |   max-width: 867px;
 48 |   margin: 25px 10px;
 49 |   height: 600px;
 50 |   border: 2px solid #ddd;
 51 |   border-radius: 5px;
 52 |   box-shadow: 0 15px 15px -5px rgba(0, 0, 0, 0.2);
 53 | }
 54 | 
 55 | s .chatui-header {
 56 |   display: flex;
 57 |   justify-content: space-between;
 58 |   padding: 10px;
 59 |   border-bottom: 2px solid #ddd;
 60 |   background: #eee;
 61 |   color: #666;
 62 | }
 63 | 
 64 | .chatui-chat {
 65 |   flex: 1;
 66 |   overflow-y: auto;
 67 |   padding: 10px;
 68 | }
 69 | 
 70 | .chatui-chat::-webkit-scrollbar {
 71 |   width: 6px;
 72 | }
 73 | 
 74 | .chatui-chat::-webkit-scrollbar-track {
 75 |   background: #ddd;
 76 | }
 77 | 
 78 | .chatui-chat::-webkit-scrollbar-thumb {
 79 |   background: #bdbdbd;
 80 | }
 81 | 
 82 | .msg {
 83 |   display: flex;
 84 |   align-items: flex-end;
 85 |   margin-bottom: 10px;
 86 | }
 87 | 
 88 | .msg:last-of-type {
 89 |   margin: 0;
 90 | }
 91 | 
 92 | .msg-bubble {
 93 |   max-width: 450px;
 94 |   padding: 15px;
 95 |   border-radius: 15px;
 96 |   background: #ececec;
 97 | }
 98 | 
 99 | .left-msg .msg-bubble {
100 |   border-bottom-left-radius: 0;
101 | }
102 | 
103 | .error-msg .msg-bubble {
104 |   border-bottom-left-radius: 0;
105 |   color: #f15959;
106 | }
107 | 
108 | .init-msg .msg-bubble {
109 |   border-bottom-left-radius: 0;
110 | }
111 | 
112 | .right-msg {
113 |   flex-direction: row-reverse;
114 | }
115 | 
116 | .right-msg .msg-bubble {
117 |   background: #579ffb;
118 |   color: #fff;
119 |   border-bottom-right-radius: 0;
120 | }
121 | 
122 | .chatui-inputarea {
123 |   display: flex;
124 |   padding: 10px;
125 |   border-top: 2px solid #ddd;
126 |   background: #eee;
127 | }
128 | 
129 | .chatui-inputarea * {
130 |   padding: 10px;
131 |   border: none;
132 |   border-radius: 3px;
133 |   font-size: 1em;
134 | }
135 | 
136 | .chatui-input {
137 |   flex: 1;
138 |   background: #ddd;
139 | }
140 | 
141 | .chatui-btn {
142 |   margin-left: 10px;
143 |   background: #579ffb;
144 |   color: #fff;
145 |   font-weight: bold;
146 |   cursor: pointer;
147 |   padding: 10px;
148 | }
149 | 
150 | .chatui-btn:hover {
151 |   background: #577bfb;
152 | }
153 | 
154 | .chatui-chat {
155 |   background-color: #fcfcfe;
156 | }
157 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/src/utils/chat_component.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from "react";
 2 | import { MLCEngine } from "@mlc-ai/web-llm";
 3 | import ChatUI from "~/utils/chat_ui";
 4 | 
 5 | const ChatComponent = () => {
 6 |   const [messages, setMessages] = useState<{ kind: string; text: string }[]>(
 7 |     [],
 8 |   );
 9 |   const [prompt, setPrompt] = useState("");
10 |   const [runtimeStats, setRuntimeStats] = useState("");
11 |   const [chat_ui] = useState(new ChatUI(new MLCEngine()));
12 |   const updateMessage = (kind: string, text: string, append: boolean) => {
13 |     if (kind == "init") {
14 |       text = "[System Initalize] " + text;
15 |     }
16 |     const msgCopy = [...messages];
17 |     if (msgCopy.length == 0 || append) {
18 |       setMessages([...msgCopy, { kind, text }]);
19 |     } else {
20 |       msgCopy[msgCopy.length - 1] = { kind, text };
21 |       setMessages([...msgCopy]);
22 |     }
23 |   };
24 |   return (
25 |     <div className="flex flex-col items-center">
26 |       <button
27 |         className="chatui-btn"
28 |         onClick={() => {
29 |           chat_ui.asyncInitChat(updateMessage).catch((error) => {
30 |             console.log(error);
31 |           });
32 |         }}
33 |       >
34 |         Download Model
35 |       </button>
36 | 
37 |       <div className="chatui">
38 |         <div className="chatui-chat" id="chatui-chat">
39 |           {messages.map((value, index) => (
40 |             <div key={index} className={`msg ${value.kind}-msg`}>
41 |               <div className="msg-bubble">
42 |                 <div className="msg-text">${value.text}</div>
43 |               </div>
44 |             </div>
45 |           ))}
46 |         </div>
47 | 
48 |         <div className="chatui-inputarea">
49 |           <input
50 |             id="chatui-input"
51 |             type="text"
52 |             className="chatui-input"
53 |             placeholder="Enter your message..."
54 |             onKeyDown={(event) => {
55 |               if (event.key === "Enter") {
56 |                 chat_ui
57 |                   .onGenerate(prompt, updateMessage, setRuntimeStats)
58 |                   .catch((error) => console.log(error));
59 |               }
60 |             }}
61 |             value={prompt}
62 |             onChange={(event) => setPrompt(event.target.value)}
63 |           />
64 |           <button
65 |             className="chatui-btn"
66 |             onClick={() => {
67 |               chat_ui
68 |                 .onGenerate(prompt, updateMessage, setRuntimeStats)
69 |                 .catch((error) => console.log(error));
70 |             }}
71 |           >
72 |             Send
73 |           </button>
74 |         </div>
75 |       </div>
76 | 
77 |       <div className="chatui-extra-control">
78 |         <button
79 |           className="chatui-btn"
80 |           onClick={() => {
81 |             chat_ui
82 |               .onReset(() => {
83 |                 setMessages([]);
84 |               })
85 |               .catch((error) => console.log(error));
86 |           }}
87 |         >
88 |           Reset Chat
89 |         </button>
90 |         <label id="chatui-info-label">{runtimeStats}</label>
91 |       </div>
92 |     </div>
93 |   );
94 | };
95 | 
96 | export default ChatComponent;
97 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | module.exports = {
 3 |   content: [
 4 |     "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
 5 |     "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
 6 |     "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
 7 |   ],
 8 |   theme: {
 9 |     extend: {
10 |       backgroundImage: {
11 |         "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
12 |         "gradient-conic":
13 |           "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
14 |       },
15 |     },
16 |   },
17 |   plugins: [],
18 | };
19 | 


--------------------------------------------------------------------------------
/examples/next-simple-chat/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es5",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "forceConsistentCasingInFileNames": true,
 9 |     "noEmit": true,
10 |     "esModuleInterop": true,
11 |     "module": "esnext",
12 |     "moduleResolution": "node",
13 |     "resolveJsonModule": true,
14 |     "isolatedModules": true,
15 |     "jsx": "preserve",
16 |     "incremental": true,
17 |     "paths": {
18 |       "~/*": ["./src/*"]
19 |     }
20 |   },
21 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
22 |   "exclude": ["node_modules"]
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/qwen3/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos w/ Qwen3
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/qwen3/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "qwen3_example",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/qwen3_example.html  --port 8883",
 7 |     "build": "parcel build src/qwen3_example.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/qwen3/src/qwen3_example.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 |     <h3>Response</h3>
14 |     <label id="generate-label"> </label>
15 |     <script type="module" src="./qwen3_example.ts"></script>
16 |   </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/examples/seed-to-reproduce/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/seed-to-reproduce/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "seed-to-reproduce",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/seed.html  --port 8888",
 7 |     "build": "parcel build src/seed.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/seed-to-reproduce/src/seed.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output. We make two generations with same seed, we
10 |     should expect them to be the same.
11 |     <br />
12 |     <br />
13 |     <label id="init-label"> </label>
14 | 
15 |     <script type="module" src="./seed.ts"></script>
16 |   </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/examples/seed-to-reproduce/src/seed.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | /**
12 |  * We domnstrate the effect of seeding. The prompt is about writing a poem and we use a high
13 |  * `temperature`, making the sampling distribution supposedly more random. However, we demonstrate
14 |  * that with seeding, we should see the exact same result being generated across two trials.
15 |  * With `n > 1`, all choices should also be exactly the same.
16 |  */
17 | async function main() {
18 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
19 |     setLabel("init-label", report.text);
20 |   };
21 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
22 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
23 |     selectedModel,
24 |     { initProgressCallback: initProgressCallback },
25 |   );
26 | 
27 |   const request: webllm.ChatCompletionRequest = {
28 |     stream: false, // works with streaming as well
29 |     messages: [
30 |       { role: "user", content: "Write a creative Haiku about Pittsburgh" },
31 |     ],
32 |     n: 3,
33 |     temperature: 1.2, // high temperature gives much more random results
34 |     max_tokens: 128, // To save time; enough to demonstrate the effect
35 |     seed: 42,
36 |   };
37 | 
38 |   const reply0 = await engine.chat.completions.create(request);
39 |   console.log(reply0);
40 |   console.log("First reply's last choice:\n" + (await engine.getMessage()));
41 |   console.log(reply0.usage);
42 | 
43 |   const reply1 = await engine.chat.completions.create(request);
44 |   console.log(reply1);
45 |   console.log("Second reply's last choice:\n" + (await engine.getMessage()));
46 | 
47 |   // Rigorously check the generation results of each choice for the two requests
48 |   for (const choice0 of reply0.choices) {
49 |     const id = choice0.index;
50 |     const choice1 = reply1.choices[id];
51 |     if (choice0.message.content !== choice1.message.content) {
52 |       throw Error(
53 |         "Chocie " +
54 |           id +
55 |           " of the two generations are different despite seeding",
56 |       );
57 |     }
58 |   }
59 | 
60 |   console.log(reply1.usage);
61 | }
62 | 
63 | // Run one of the functions
64 | main();
65 | 


--------------------------------------------------------------------------------
/examples/service-worker/README.md:
--------------------------------------------------------------------------------
1 | # WebLLM Service Worker Example
2 | 
3 | This example shows how we can create a page with Web-LLM running in service worker.
4 | 
5 | ```bash
6 | npm install
7 | npm run build
8 | ```
9 | 


--------------------------------------------------------------------------------
/examples/service-worker/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "web-llm-service-worker",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "rm -rf .parcel-cache && parcel src/index.html --port 3000",
 7 |     "build": "rm -rf .parcel-cache && parcel build src/index.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^6.0.3",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/service-worker/src/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./main.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/service-worker/src/main.ts:
--------------------------------------------------------------------------------
  1 | import * as webllm from "@mlc-ai/web-llm";
  2 | 
  3 | const registerServiceWorker = async () => {
  4 |   if ("serviceWorker" in navigator) {
  5 |     try {
  6 |       const registration = await navigator.serviceWorker.register(
  7 |         new URL("sw.ts", import.meta.url),
  8 |         { type: "module" },
  9 |       );
 10 |       if (registration.installing) {
 11 |         console.log("Service worker installing");
 12 |       } else if (registration.waiting) {
 13 |         console.log("Service worker installed");
 14 |       } else if (registration.active) {
 15 |         console.log("Service worker active");
 16 |       }
 17 |     } catch (error) {
 18 |       console.error(`Registration failed with ${error}`);
 19 |     }
 20 |   }
 21 | };
 22 | 
 23 | function setLabel(id: string, text: string) {
 24 |   const label = document.getElementById(id);
 25 |   if (label == null) {
 26 |     throw Error("Cannot find label " + id);
 27 |   }
 28 |   label.innerText = text;
 29 | }
 30 | 
 31 | // There are two demonstrations, pick one to run
 32 | 
 33 | /**
 34 |  * Chat completion (OpenAI style) without streaming, where we get the entire response at once.
 35 |  */
 36 | async function mainNonStreaming() {
 37 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
 38 |     setLabel("init-label", report.text);
 39 |   };
 40 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
 41 | 
 42 |   const engine: webllm.MLCEngineInterface =
 43 |     await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
 44 |       initProgressCallback: initProgressCallback,
 45 |     });
 46 | 
 47 |   const request: webllm.ChatCompletionRequest = {
 48 |     messages: [
 49 |       {
 50 |         role: "system",
 51 |         content:
 52 |           "You are a helpful, respectful and honest assistant. " +
 53 |           "Be as happy as you can when speaking please. ",
 54 |       },
 55 |       { role: "user", content: "Provide me three US states." },
 56 |       { role: "assistant", content: "California, New York, Pennsylvania." },
 57 |       { role: "user", content: "Two more please!" },
 58 |     ],
 59 |     n: 3,
 60 |     temperature: 1.5,
 61 |     max_tokens: 256,
 62 |   };
 63 | 
 64 |   const reply0 = await engine.chat.completions.create(request);
 65 |   console.log(reply0);
 66 |   setLabel("generate-label", reply0.choices[0].message.content || "");
 67 | 
 68 |   console.log(reply0.usage);
 69 | }
 70 | 
 71 | /**
 72 |  * Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
 73 |  */
 74 | async function mainStreaming() {
 75 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
 76 |     setLabel("init-label", report.text);
 77 |   };
 78 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
 79 | 
 80 |   const engine: webllm.ServiceWorkerMLCEngine =
 81 |     await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
 82 |       initProgressCallback: initProgressCallback,
 83 |     });
 84 | 
 85 |   const request: webllm.ChatCompletionRequest = {
 86 |     stream: true,
 87 |     stream_options: { include_usage: true },
 88 |     messages: [
 89 |       {
 90 |         role: "system",
 91 |         content:
 92 |           "You are a helpful, respectful and honest assistant. " +
 93 |           "Be as happy as you can when speaking please. ",
 94 |       },
 95 |       { role: "user", content: "Provide me three US states." },
 96 |       { role: "assistant", content: "California, New York, Pennsylvania." },
 97 |       { role: "user", content: "Two more please!" },
 98 |     ],
 99 |     temperature: 1.5,
100 |     max_tokens: 256,
101 |   };
102 | 
103 |   const asyncChunkGenerator = await engine.chat.completions.create(request);
104 |   let message = "";
105 |   for await (const chunk of asyncChunkGenerator) {
106 |     console.log(chunk);
107 |     message += chunk.choices[0]?.delta?.content || "";
108 |     setLabel("generate-label", message);
109 |     if (chunk.usage) {
110 |       console.log(chunk.usage); // only last chunk has usage
111 |     }
112 |     // engine.interruptGenerate();  // works with interrupt as well
113 |   }
114 |   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
115 | }
116 | 
117 | registerServiceWorker();
118 | // Run one of the function below
119 | // mainNonStreaming();
120 | mainStreaming();
121 | 


--------------------------------------------------------------------------------
/examples/service-worker/src/sw.ts:
--------------------------------------------------------------------------------
1 | import { ServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 | 
3 | let handler: ServiceWorkerMLCEngineHandler;
4 | 
5 | self.addEventListener("activate", function (event) {
6 |   handler = new ServiceWorkerMLCEngineHandler();
7 |   console.log("Web-LLM Service Worker Activated");
8 | });
9 | 


--------------------------------------------------------------------------------
/examples/simple-chat-js/index.css:
--------------------------------------------------------------------------------
  1 | body,
  2 | html {
  3 |   font-family: Arial, sans-serif;
  4 |   padding: 10px 20px;
  5 | }
  6 | 
  7 | .download-container {
  8 |   display: flex;
  9 |   justify-content: space-between;
 10 |   margin-bottom: 20px;
 11 | }
 12 | 
 13 | #download-status {
 14 |   border: solid 1px black;
 15 |   box-shadow:
 16 |     0 10px 15px -3px rgba(0, 0, 0, 0.1),
 17 |     0 4px 6px -2px rgba(0, 0, 0, 0.05);
 18 |   padding: 10px;
 19 | }
 20 | 
 21 | .chat-container {
 22 |   height: 400px;
 23 |   width: 100%;
 24 |   border: 2px solid black;
 25 |   display: flex;
 26 |   flex-direction: column;
 27 | }
 28 | 
 29 | .chat-box {
 30 |   overflow-y: scroll;
 31 |   background-color: #c3c3c3;
 32 |   border: 1px solid #ccc;
 33 |   padding: 5px;
 34 |   flex: 1 1;
 35 | }
 36 | 
 37 | .chat-stats {
 38 |   background-color: #d3eceb;
 39 |   flex: 0 0;
 40 |   padding: 10px;
 41 |   font-size: 0.75rem;
 42 | }
 43 | 
 44 | .message-container {
 45 |   width: 100%;
 46 |   display: flex;
 47 | }
 48 | 
 49 | .message {
 50 |   padding: 10px;
 51 |   margin: 10px 0;
 52 |   border-radius: 10px;
 53 |   width: fit-content;
 54 | }
 55 | 
 56 | .message-container.user {
 57 |   justify-content: end;
 58 | }
 59 | 
 60 | .message-container.assistant {
 61 |   justify-content: start;
 62 | }
 63 | 
 64 | .message-container.user .message {
 65 |   background: #007bff;
 66 |   color: #fff;
 67 | }
 68 | 
 69 | .message-container.assistant .message {
 70 |   background: #f1f0f0;
 71 |   color: #333;
 72 | }
 73 | 
 74 | .chat-input-container {
 75 |   min-height: 40px;
 76 |   flex: 0 0;
 77 |   display: flex;
 78 | }
 79 | 
 80 | #user-input {
 81 |   width: 70%;
 82 |   padding: 10px;
 83 |   border: 1px solid #ccc;
 84 | }
 85 | 
 86 | button {
 87 |   width: 25%;
 88 |   padding: 10px;
 89 |   border: none;
 90 |   background-color: #007bff;
 91 |   color: white;
 92 |   cursor: pointer;
 93 | }
 94 | 
 95 | button:disabled {
 96 |   background-color: lightgray;
 97 |   cursor: not-allowed;
 98 | }
 99 | 
100 | button:hover:not(:disabled) {
101 |   background-color: #0056b3;
102 | }
103 | 
104 | .hidden {
105 |   display: none;
106 | }
107 | 


--------------------------------------------------------------------------------
/examples/simple-chat-js/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <title>Simple Chatbot</title>
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 6 |     <meta charset="UTF-8" />
 7 |     <link rel="stylesheet" href="./index.css" />
 8 |   </head>
 9 | 
10 |   <body>
11 |     <p>Step 1: Initialize WebLLM and Download Model</p>
12 |     <div class="download-container">
13 |       <select id="model-selection"></select>
14 |       <button id="download">Download</button>
15 |     </div>
16 |     <p id="download-status" class="hidden"></p>
17 | 
18 |     <p>Step 2: Chat</p>
19 |     <div class="chat-container">
20 |       <div id="chat-box" class="chat-box"></div>
21 |       <div id="chat-stats" class="chat-stats hidden"></div>
22 |       <div class="chat-input-container">
23 |         <input type="text" id="user-input" placeholder="Type a message..." />
24 |         <button id="send" disabled>Send</button>
25 |       </div>
26 |     </div>
27 | 
28 |     <script src="./index.js" type="module"></script>
29 |   </body>
30 | </html>
31 | 


--------------------------------------------------------------------------------
/examples/simple-chat-js/index.js:
--------------------------------------------------------------------------------
  1 | import * as webllm from "https://esm.run/@mlc-ai/web-llm";
  2 | 
  3 | /*************** WebLLM logic ***************/
  4 | const messages = [
  5 |   {
  6 |     content: "You are a helpful AI agent helping users.",
  7 |     role: "system",
  8 |   },
  9 | ];
 10 | 
 11 | const availableModels = webllm.prebuiltAppConfig.model_list.map(
 12 |   (m) => m.model_id,
 13 | );
 14 | let selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-1k";
 15 | 
 16 | // Callback function for initializing progress
 17 | function updateEngineInitProgressCallback(report) {
 18 |   console.log("initialize", report.progress);
 19 |   document.getElementById("download-status").textContent = report.text;
 20 | }
 21 | 
 22 | // Create engine instance
 23 | const engine = new webllm.MLCEngine();
 24 | engine.setInitProgressCallback(updateEngineInitProgressCallback);
 25 | 
 26 | async function initializeWebLLMEngine() {
 27 |   document.getElementById("download-status").classList.remove("hidden");
 28 |   selectedModel = document.getElementById("model-selection").value;
 29 |   const config = {
 30 |     temperature: 1.0,
 31 |     top_p: 1,
 32 |   };
 33 |   await engine.reload(selectedModel, config);
 34 | }
 35 | 
 36 | async function streamingGenerating(messages, onUpdate, onFinish, onError) {
 37 |   try {
 38 |     let curMessage = "";
 39 |     let usage;
 40 |     const completion = await engine.chat.completions.create({
 41 |       stream: true,
 42 |       messages,
 43 |       stream_options: { include_usage: true },
 44 |     });
 45 |     for await (const chunk of completion) {
 46 |       const curDelta = chunk.choices[0]?.delta.content;
 47 |       if (curDelta) {
 48 |         curMessage += curDelta;
 49 |       }
 50 |       if (chunk.usage) {
 51 |         usage = chunk.usage;
 52 |       }
 53 |       onUpdate(curMessage);
 54 |     }
 55 |     const finalMessage = await engine.getMessage();
 56 |     onFinish(finalMessage, usage);
 57 |   } catch (err) {
 58 |     onError(err);
 59 |   }
 60 | }
 61 | 
 62 | /*************** UI logic ***************/
 63 | function onMessageSend() {
 64 |   const input = document.getElementById("user-input").value.trim();
 65 |   const message = {
 66 |     content: input,
 67 |     role: "user",
 68 |   };
 69 |   if (input.length === 0) {
 70 |     return;
 71 |   }
 72 |   document.getElementById("send").disabled = true;
 73 | 
 74 |   messages.push(message);
 75 |   appendMessage(message);
 76 | 
 77 |   document.getElementById("user-input").value = "";
 78 |   document
 79 |     .getElementById("user-input")
 80 |     .setAttribute("placeholder", "Generating...");
 81 | 
 82 |   const aiMessage = {
 83 |     content: "typing...",
 84 |     role: "assistant",
 85 |   };
 86 |   appendMessage(aiMessage);
 87 | 
 88 |   const onFinishGenerating = (finalMessage, usage) => {
 89 |     updateLastMessage(finalMessage);
 90 |     document.getElementById("send").disabled = false;
 91 |     const usageText =
 92 |       `prompt_tokens: ${usage.prompt_tokens}, ` +
 93 |       `completion_tokens: ${usage.completion_tokens}, ` +
 94 |       `prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
 95 |       `decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
 96 |     document.getElementById("chat-stats").classList.remove("hidden");
 97 |     document.getElementById("chat-stats").textContent = usageText;
 98 |   };
 99 | 
100 |   streamingGenerating(
101 |     messages,
102 |     updateLastMessage,
103 |     onFinishGenerating,
104 |     console.error,
105 |   );
106 | }
107 | 
108 | function appendMessage(message) {
109 |   const chatBox = document.getElementById("chat-box");
110 |   const container = document.createElement("div");
111 |   container.classList.add("message-container");
112 |   const newMessage = document.createElement("div");
113 |   newMessage.classList.add("message");
114 |   newMessage.textContent = message.content;
115 | 
116 |   if (message.role === "user") {
117 |     container.classList.add("user");
118 |   } else {
119 |     container.classList.add("assistant");
120 |   }
121 | 
122 |   container.appendChild(newMessage);
123 |   chatBox.appendChild(container);
124 |   chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message
125 | }
126 | 
127 | function updateLastMessage(content) {
128 |   const messageDoms = document
129 |     .getElementById("chat-box")
130 |     .querySelectorAll(".message");
131 |   const lastMessageDom = messageDoms[messageDoms.length - 1];
132 |   lastMessageDom.textContent = content;
133 | }
134 | 
135 | /*************** UI binding ***************/
136 | availableModels.forEach((modelId) => {
137 |   const option = document.createElement("option");
138 |   option.value = modelId;
139 |   option.textContent = modelId;
140 |   document.getElementById("model-selection").appendChild(option);
141 | });
142 | document.getElementById("model-selection").value = selectedModel;
143 | document.getElementById("download").addEventListener("click", function () {
144 |   initializeWebLLMEngine().then(() => {
145 |     document.getElementById("send").disabled = false;
146 |   });
147 | });
148 | document.getElementById("send").addEventListener("click", function () {
149 |   onMessageSend();
150 | });
151 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/.gitignore:
--------------------------------------------------------------------------------
1 | src/app-config.js
2 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/README.md:
--------------------------------------------------------------------------------
 1 | # SimpleChat
 2 | 
 3 | This folder provides a complete implementation of a simple
 4 | chat app based on WebLLM. To try it out, you can do the following steps
 5 | under this folder
 6 | 
 7 | ```bash
 8 | npm install
 9 | npm start
10 | ```
11 | 
12 | Note if you would like to hack WebLLM core package.
13 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
14 | instruction in the project to build webllm locally. This option is only recommended
15 | if you would like to hack WebLLM core package.
16 | 
17 | Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows.
18 | 
19 | ### Steps for Windows Users
20 | 
21 | 1. **Create a Node.js Script File**:
22 | 
23 |    - In the `examples\simple-chat` directory, create a file named `copy-config.js`.
24 |    - Add the following code to handle file copying:
25 |      ```javascript
26 |      const fs = require("fs");
27 |      // Copy file
28 |      fs.copyFileSync("src/gh-config.js", "src/app-config.js");
29 |      ```
30 | 
31 | 2. **Modify `package.json`**:
32 | 
33 |    - In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example:
34 |      ```json
35 |      "scripts": {
36 |          "start": "node copy-config.js && parcel src/llm_chat.html --port 8888",
37 |          "mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888",
38 |          "build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
39 |      },
40 |      ```
41 | 
42 | 3. **Run the Application**:
43 |    - Save your changes and run `npm start` in CMD or PowerShell to start the application.
44 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "simple-chat",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html  --port 8883",
 7 |     "build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/gh-config.js:
--------------------------------------------------------------------------------
1 | import { prebuiltAppConfig } from "@mlc-ai/web-llm";
2 | 
3 | export default {
4 |   model_list: prebuiltAppConfig.model_list,
5 |   use_web_worker: true,
6 | };
7 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/img/plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-ts/src/img/plane.png


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/img/reset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-ts/src/img/reset.png


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/llm_chat.css:
--------------------------------------------------------------------------------
  1 | .chatui {
  2 |   display: flex;
  3 |   position: relative;
  4 |   flex-flow: column wrap;
  5 |   justify-content: space-between;
  6 |   width: 100%;
  7 |   max-width: 867px;
  8 |   margin: 25px 10px;
  9 |   height: 600px;
 10 |   border: 2px solid #ddd;
 11 |   border-radius: 5px;
 12 |   background-color: #1f2027;
 13 | }
 14 | 
 15 | .chatui-select-wrapper {
 16 |   display: flex;
 17 |   justify-content: center;
 18 |   background-color: #1f2027;
 19 |   padding: 10px 0;
 20 | }
 21 | 
 22 | #chatui-select {
 23 |   width: 350px;
 24 |   background-color: #1f2027;
 25 |   color: white;
 26 |   border: none;
 27 | }
 28 | 
 29 | #chatui-select:focus {
 30 |   outline: none;
 31 | }
 32 | 
 33 | #chatui-select::-webkit-scrollbar {
 34 |   display: none;
 35 | }
 36 | 
 37 | #chatui-select option {
 38 |   background-color: #1f2027;
 39 |   color: white;
 40 | }
 41 | 
 42 | #chatui-select option:hover {
 43 |   background-color: #474747;
 44 |   color: white;
 45 | }
 46 | 
 47 | s .chatui-header {
 48 |   display: flex;
 49 |   justify-content: space-between;
 50 |   padding: 10px;
 51 |   border-bottom: 2px solid #ddd;
 52 |   background: #eee;
 53 |   color: #666;
 54 | }
 55 | 
 56 | /* Used to remove tiny white lines in android devices; not sure if there is a better way */
 57 | *,
 58 | *::before,
 59 | *::after {
 60 |   box-sizing: content-box;
 61 | }
 62 | 
 63 | .chatui-chat {
 64 |   flex: 1;
 65 |   overflow-y: auto;
 66 |   padding: 10px;
 67 |   background-color: #1f2027;
 68 | }
 69 | 
 70 | .chatui-chat::-webkit-scrollbar {
 71 |   width: 6px;
 72 | }
 73 | 
 74 | .chatui-chat::-webkit-scrollbar-track {
 75 |   background: #1f2027;
 76 | }
 77 | 
 78 | .chatui-chat::-webkit-scrollbar-thumb {
 79 |   background: #888;
 80 | }
 81 | 
 82 | .chatui-chat::-webkit-scrollbar-thumb:hover {
 83 |   background: #555;
 84 | }
 85 | 
 86 | .msg {
 87 |   display: flex;
 88 |   align-items: flex-end;
 89 |   margin-bottom: 10px;
 90 | }
 91 | 
 92 | .msg:last-of-type {
 93 |   margin: 0;
 94 | }
 95 | 
 96 | .msg-bubble {
 97 |   background-color: #f0f0f0;
 98 |   border-radius: 8px;
 99 |   padding: 16px;
100 |   margin: 5px auto;
101 |   width: calc(100% - 20px);
102 |   box-sizing: border-box;
103 |   color: black;
104 |   border: none;
105 |   font-size: medium;
106 |   margin-left: auto;
107 |   margin-right: auto;
108 | }
109 | 
110 | .left-msg .msg-bubble {
111 |   background-color: #343541;
112 |   color: #ececec;
113 | }
114 | 
115 | .error-msg .msg-bubble {
116 |   background-color: #343541;
117 |   color: #f15959;
118 | }
119 | 
120 | .init-msg .msg-bubble {
121 |   background-color: #343541;
122 |   color: #ececec;
123 | }
124 | 
125 | .right-msg .msg-bubble {
126 |   background-color: #444654;
127 |   color: #ececec;
128 | }
129 | 
130 | .chatui-inputarea {
131 |   display: flex;
132 |   padding: 10px;
133 |   border-top: 2px solid transparent;
134 |   background-color: #1f2027;
135 | }
136 | 
137 | .chatui-inputarea * {
138 |   padding: 10px;
139 |   border: none;
140 |   border-radius: 3px;
141 |   font-size: 1em;
142 |   color: white;
143 |   background: rgba(0, 0, 0, 0.3);
144 | }
145 | 
146 | .chatui-input {
147 |   flex: 1;
148 |   background-color: #40414f;
149 |   color: white;
150 | }
151 | 
152 | .chatui-reset-btn {
153 |   margin-left: 10px;
154 |   background-color: #40414f;
155 |   color: #fff;
156 |   font-weight: bold;
157 |   cursor: pointer;
158 |   background-image: url("img/reset.png");
159 |   background-repeat: no-repeat;
160 |   background-position: center;
161 |   width: 40px;
162 |   background-repeat: no-repeat;
163 |   background-position: center;
164 |   background-size: 20px 20px;
165 | }
166 | 
167 | .chatui-reset-btn:hover {
168 |   background-color: #03a33e;
169 | }
170 | 
171 | .chatui-send-btn {
172 |   margin-left: 10px;
173 |   background-color: #40414f;
174 |   color: #fff;
175 |   font-weight: bold;
176 |   cursor: pointer;
177 |   background-image: url("img/plane.png");
178 |   background-repeat: no-repeat;
179 |   background-position: center;
180 |   width: 40px;
181 |   background-repeat: no-repeat;
182 |   background-position: center;
183 |   background-size: 20px 20px;
184 | }
185 | 
186 | .chatui-send-btn:hover {
187 |   background-color: #03a33e;
188 | }
189 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/llm_chat.html:
--------------------------------------------------------------------------------
 1 | <link href="./llm_chat.css" rel="stylesheet" type="text/css" />
 2 | 
 3 | <div class="chatui">
 4 |   <div class="chatui-select-wrapper">
 5 |     <select id="chatui-select"></select>
 6 |   </div>
 7 |   <div class="chatui-chat" id="chatui-chat" height="100"></div>
 8 | 
 9 |   <div class="chatui-inputarea">
10 |     <input
11 |       id="chatui-input"
12 |       type="text"
13 |       class="chatui-input"
14 |       placeholder="Enter your message..."
15 |     />
16 |     <button id="chatui-send-btn" class="chatui-send-btn"></button>
17 |     <button id="chatui-reset-btn" class="chatui-reset-btn"></button>
18 |   </div>
19 | </div>
20 | 
21 | <div class="chatui-extra-control">
22 |   <label id="chatui-info-label"></label>
23 | </div>
24 | <!--- Place script after ui to make sure ui loads first -->
25 | <script type="module" src="./simple_chat.ts"></script>
26 | 


--------------------------------------------------------------------------------
/examples/simple-chat-ts/src/worker.ts:
--------------------------------------------------------------------------------
1 | // Serve the engine workload through web worker
2 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
3 | 
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 |   handler.onmessage(msg);
7 | };
8 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/.gitignore:
--------------------------------------------------------------------------------
1 | src/app-config.js
2 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/README.md:
--------------------------------------------------------------------------------
 1 | # SimpleChat
 2 | 
 3 | This folder provides a complete implementation of a simple
 4 | chat app based on WebLLM. To try it out, you can do the following steps
 5 | under this folder
 6 | 
 7 | ```bash
 8 | npm install
 9 | npm start
10 | ```
11 | 
12 | Note if you would like to hack WebLLM core package.
13 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
14 | instruction in the project to build webllm locally. This option is only recommended
15 | if you would like to hack WebLLM core package.
16 | 
17 | Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows.
18 | 
19 | ### Steps for Windows Users
20 | 
21 | 1. **Create a Node.js Script File**:
22 | 
23 |    - In the `examples\simple-chat` directory, create a file named `copy-config.js`.
24 |    - Add the following code to handle file copying:
25 |      ```javascript
26 |      const fs = require("fs");
27 |      // Copy file
28 |      fs.copyFileSync("src/gh-config.js", "src/app-config.js");
29 |      ```
30 | 
31 | 2. **Modify `package.json`**:
32 | 
33 |    - In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example:
34 |      ```json
35 |      "scripts": {
36 |          "start": "node copy-config.js && parcel src/llm_chat.html --port 8888",
37 |          "mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888",
38 |          "build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
39 |      },
40 |      ```
41 | 
42 | 3. **Run the Application**:
43 |    - Save your changes and run `npm start` in CMD or PowerShell to start the application.
44 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "simple-chat",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html  --port 8883",
 7 |     "build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.31"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/gh-config.js:
--------------------------------------------------------------------------------
1 | import { prebuiltAppConfig } from "@mlc-ai/web-llm";
2 | 
3 | export default {
4 |   model_list: prebuiltAppConfig.model_list,
5 |   use_web_worker: true,
6 | };
7 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/img/plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-upload/src/img/plane.png


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/img/reset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-upload/src/img/reset.png


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/llm_chat.css:
--------------------------------------------------------------------------------
  1 | .chatui {
  2 |   display: flex;
  3 |   position: relative;
  4 |   flex-flow: column wrap;
  5 |   justify-content: space-between;
  6 |   width: 100%;
  7 |   max-width: 867px;
  8 |   margin: 25px 10px;
  9 |   height: 600px;
 10 |   border: 2px solid #ddd;
 11 |   border-radius: 5px;
 12 |   background-color: #1f2027;
 13 | }
 14 | 
 15 | .chatui-select-wrapper {
 16 |   display: flex;
 17 |   justify-content: center;
 18 |   background-color: #1f2027;
 19 |   padding: 10px 0;
 20 | }
 21 | 
 22 | #chatui-select {
 23 |   width: 350px;
 24 |   background-color: #1f2027;
 25 |   color: white;
 26 |   border: none;
 27 | }
 28 | 
 29 | #chatui-select:focus {
 30 |   outline: none;
 31 | }
 32 | 
 33 | #chatui-select::-webkit-scrollbar {
 34 |   display: none;
 35 | }
 36 | 
 37 | #chatui-select option {
 38 |   background-color: #1f2027;
 39 |   color: white;
 40 | }
 41 | 
 42 | #chatui-select option:hover {
 43 |   background-color: #474747;
 44 |   color: white;
 45 | }
 46 | 
 47 | s .chatui-header {
 48 |   display: flex;
 49 |   justify-content: space-between;
 50 |   padding: 10px;
 51 |   border-bottom: 2px solid #ddd;
 52 |   background: #eee;
 53 |   color: #666;
 54 | }
 55 | 
 56 | /* Used to remove tiny white lines in android devices; not sure if there is a better way */
 57 | *,
 58 | *::before,
 59 | *::after {
 60 |   box-sizing: content-box;
 61 | }
 62 | 
 63 | .chatui-chat {
 64 |   flex: 1;
 65 |   overflow-y: auto;
 66 |   padding: 10px;
 67 |   background-color: #1f2027;
 68 | }
 69 | 
 70 | .chatui-chat::-webkit-scrollbar {
 71 |   width: 6px;
 72 | }
 73 | 
 74 | .chatui-chat::-webkit-scrollbar-track {
 75 |   background: #1f2027;
 76 | }
 77 | 
 78 | .chatui-chat::-webkit-scrollbar-thumb {
 79 |   background: #888;
 80 | }
 81 | 
 82 | .chatui-chat::-webkit-scrollbar-thumb:hover {
 83 |   background: #555;
 84 | }
 85 | 
 86 | .msg {
 87 |   display: flex;
 88 |   align-items: flex-end;
 89 |   margin-bottom: 10px;
 90 | }
 91 | 
 92 | .msg:last-of-type {
 93 |   margin: 0;
 94 | }
 95 | 
 96 | .msg-bubble {
 97 |   background-color: #f0f0f0;
 98 |   border-radius: 8px;
 99 |   padding: 16px;
100 |   margin: 5px auto;
101 |   width: calc(100% - 20px);
102 |   box-sizing: border-box;
103 |   color: black;
104 |   border: none;
105 |   font-size: medium;
106 |   margin-left: auto;
107 |   margin-right: auto;
108 | }
109 | 
110 | .left-msg .msg-bubble {
111 |   background-color: #343541;
112 |   color: #ececec;
113 | }
114 | 
115 | .error-msg .msg-bubble {
116 |   background-color: #343541;
117 |   color: #f15959;
118 | }
119 | 
120 | .init-msg .msg-bubble {
121 |   background-color: #343541;
122 |   color: #ececec;
123 | }
124 | 
125 | .right-msg .msg-bubble {
126 |   background-color: #444654;
127 |   color: #ececec;
128 | }
129 | 
130 | .chatui-inputarea {
131 |   display: flex;
132 |   padding: 10px;
133 |   border-top: 2px solid transparent;
134 |   background-color: #1f2027;
135 | }
136 | 
137 | .chatui-inputarea * {
138 |   padding: 10px;
139 |   border: none;
140 |   border-radius: 3px;
141 |   font-size: 1em;
142 |   color: white;
143 |   background: rgba(0, 0, 0, 0.3);
144 | }
145 | 
146 | .chatui-input {
147 |   flex: 1;
148 |   background-color: #40414f;
149 |   color: white;
150 | }
151 | 
152 | .chatui-reset-btn {
153 |   margin-left: 10px;
154 |   background-color: #40414f;
155 |   color: #fff;
156 |   font-weight: bold;
157 |   cursor: pointer;
158 |   background-image: url("img/reset.png");
159 |   background-repeat: no-repeat;
160 |   background-position: center;
161 |   width: 40px;
162 |   background-repeat: no-repeat;
163 |   background-position: center;
164 |   background-size: 20px 20px;
165 | }
166 | 
167 | .chatui-reset-btn:hover {
168 |   background-color: #03a33e;
169 | }
170 | 
171 | .chatui-send-btn {
172 |   margin-left: 10px;
173 |   background-color: #40414f;
174 |   color: #fff;
175 |   font-weight: bold;
176 |   cursor: pointer;
177 |   background-image: url("img/plane.png");
178 |   background-repeat: no-repeat;
179 |   background-position: center;
180 |   width: 40px;
181 |   background-repeat: no-repeat;
182 |   background-position: center;
183 |   background-size: 20px 20px;
184 | }
185 | 
186 | .chatui-send-btn:hover {
187 |   background-color: #03a33e;
188 | }
189 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/llm_chat.html:
--------------------------------------------------------------------------------
 1 | <link href="./llm_chat.css" rel="stylesheet" type="text/css" />
 2 | 
 3 | <div class="chatui">
 4 |   <div class="chatui-select-wrapper">
 5 |     <select id="chatui-select"></select>
 6 |   </div>
 7 |   <div class="chatui-chat" id="chatui-chat" height="100"></div>
 8 | 
 9 |   <body>
10 |     <input
11 |       type="file"
12 |       id="file-input"
13 |       style="position: absolute; top: 10px; right: 20px"
14 |       multiple
15 |       onchange="uploadFiles()"
16 |     />/>
17 |   </body>
18 |   <div class="chatui-inputarea">
19 |     <input
20 |       id="chatui-input"
21 |       type="text"
22 |       class="chatui-input"
23 |       placeholder="Enter your message..."
24 |     />
25 |     <button id="chatui-send-btn" class="chatui-send-btn"></button>
26 |     <button id="chatui-reset-btn" class="chatui-reset-btn"></button>
27 |   </div>
28 | </div>
29 | 
30 | <div class="chatui-extra-control">
31 |   <label id="chatui-info-label"></label>
32 | </div>
33 | <!--- Place script after ui to make sure ui loads first -->
34 | <script type="module" src="./simple_chat.ts"></script>
35 | 


--------------------------------------------------------------------------------
/examples/simple-chat-upload/src/worker.ts:
--------------------------------------------------------------------------------
1 | // Serve the engine workload through web worker
2 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
3 | 
4 | const handler = new WebWorkerMLCEngineHandler();
5 | self.onmessage = (msg: MessageEvent) => {
6 |   handler.onmessage(msg);
7 | };
8 | 


--------------------------------------------------------------------------------
/examples/streaming/README.md:
--------------------------------------------------------------------------------
1 | ### OpenAI API Demos
2 | 
3 | Run `npm install` first, followed by `npm start`.
4 | 
5 | Note if you would like to hack WebLLM core package,
6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source
7 | instruction in the project to build webllm locally. This option is only recommended
8 | if you would like to hack WebLLM core package.
9 | 


--------------------------------------------------------------------------------
/examples/streaming/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "streaming",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/streaming.html  --port 8888",
 7 |     "build": "parcel build src/streaming.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/streaming/src/streaming.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 | 
 7 |   <body>
 8 |     <h2>WebLLM Test Page</h2>
 9 |     Open console to see output
10 |     <br />
11 |     <br />
12 |     <label id="init-label"> </label>
13 |     <h3>Response</h3>
14 |     <label id="generate-label"> </label>
15 |     <script type="module" src="./streaming.ts"></script>
16 |   </body>
17 | </html>
18 | 


--------------------------------------------------------------------------------
/examples/streaming/src/streaming.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | /**
12 |  * We demonstrate chat completion with streaming, where delta is sent while generating response.
13 |  */
14 | async function main() {
15 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
16 |     setLabel("init-label", report.text);
17 |   };
18 |   const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
19 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
20 |     selectedModel,
21 |     { initProgressCallback: initProgressCallback },
22 |   );
23 | 
24 |   const request: webllm.ChatCompletionRequest = {
25 |     stream: true,
26 |     stream_options: { include_usage: true },
27 |     messages: [
28 |       {
29 |         role: "system",
30 |         content:
31 |           "You are a pirate chatbot who always responds in pirate speak!",
32 |       },
33 |       { role: "user", content: "Who are you?" },
34 |     ],
35 |     logprobs: true,
36 |     top_logprobs: 2,
37 |   };
38 | 
39 |   const asyncChunkGenerator = await engine.chat.completions.create(request);
40 |   let message = "";
41 |   for await (const chunk of asyncChunkGenerator) {
42 |     console.log(chunk);
43 |     message += chunk.choices[0]?.delta?.content || "";
44 |     setLabel("generate-label", message);
45 |     if (chunk.usage) {
46 |       console.log(chunk.usage); // only last chunk has usage
47 |     }
48 |     // engine.interruptGenerate();  // works with interrupt as well
49 |   }
50 |   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
51 | }
52 | 
53 | main();
54 | 


--------------------------------------------------------------------------------
/examples/text-completion/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
 4 | To try it out, you can do the following steps under this folder
 5 | 
 6 | ```bash
 7 | npm install
 8 | npm start
 9 | ```
10 | 
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 | 


--------------------------------------------------------------------------------
/examples/text-completion/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "text-completion",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/text_completion.html  --port 8888",
 7 |     "build": "parcel build src/text_completion.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/text-completion/src/text_completion.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./text_completion.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/text-completion/src/text_completion.ts:
--------------------------------------------------------------------------------
 1 | import * as webllm from "@mlc-ai/web-llm";
 2 | 
 3 | function setLabel(id: string, text: string) {
 4 |   const label = document.getElementById(id);
 5 |   if (label == null) {
 6 |     throw Error("Cannot find label " + id);
 7 |   }
 8 |   label.innerText = text;
 9 | }
10 | 
11 | async function main() {
12 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
13 |     setLabel("init-label", report.text);
14 |   };
15 | 
16 |   // Unlike "Llama-3.1-8B-Instruct-q4f32_1-MLC", this is a base model
17 |   const selectedModel = "Llama-3.1-8B-q4f32_1-MLC";
18 | 
19 |   const appConfig: webllm.AppConfig = {
20 |     model_list: [
21 |       {
22 |         model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-q4f32_1-MLC", // a base model
23 |         model_id: selectedModel,
24 |         model_lib:
25 |           webllm.modelLibURLPrefix +
26 |           webllm.modelVersion +
27 |           "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
28 |         overrides: {
29 |           context_window_size: 2048,
30 |         },
31 |       },
32 |     ],
33 |   };
34 |   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
35 |     selectedModel,
36 |     {
37 |       appConfig: appConfig,
38 |       initProgressCallback: initProgressCallback,
39 |       logLevel: "INFO",
40 |     },
41 |   );
42 | 
43 |   const reply0 = await engine.completions.create({
44 |     prompt: "List 3 US states: ",
45 |     // below configurations are all optional
46 |     echo: true,
47 |     n: 2,
48 |     max_tokens: 64,
49 |     logprobs: true,
50 |     top_logprobs: 2,
51 |   });
52 |   console.log(reply0);
53 |   console.log(reply0.usage);
54 | 
55 |   // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
56 | }
57 | 
58 | main();
59 | 


--------------------------------------------------------------------------------
/examples/vision-model/README.md:
--------------------------------------------------------------------------------
 1 | # WebLLM Get Started App
 2 | 
 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting.
 4 | To try it out, you can do the following steps under this folder
 5 | 
 6 | ```bash
 7 | npm install
 8 | npm start
 9 | ```
10 | 
11 | Note if you would like to hack WebLLM core package.
12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source
13 | instruction in the project to build webllm locally. This option is only recommended
14 | if you would like to hack WebLLM core package.
15 | 


--------------------------------------------------------------------------------
/examples/vision-model/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "get-started",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "parcel src/vision_model.html  --port 8888",
 7 |     "build": "parcel build src/vision_model.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "parcel": "^2.8.3",
12 |     "process": "^0.11.10",
13 |     "tslib": "^2.3.1",
14 |     "typescript": "^4.9.5",
15 |     "url": "^0.11.3"
16 |   },
17 |   "dependencies": {
18 |     "@mlc-ai/web-llm": "^0.2.79"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/vision-model/src/utils.ts:
--------------------------------------------------------------------------------
 1 | export function getImageDataFromURL(url: string): Promise<ImageData> {
 2 |   return new Promise((resolve, reject) => {
 3 |     // Converts img to any, and later `as CanvasImageSource`, otherwise build complains
 4 |     const img: any = new Image();
 5 |     img.crossOrigin = "anonymous"; // Important for CORS
 6 |     img.onload = () => {
 7 |       const canvas: HTMLCanvasElement = document.createElement("canvas");
 8 |       const ctx: CanvasRenderingContext2D = canvas.getContext("2d")!;
 9 |       canvas.width = img.width;
10 |       canvas.height = img.height;
11 |       ctx.drawImage(img as CanvasImageSource, 0, 0);
12 | 
13 |       const imageData = ctx.getImageData(0, 0, img.width, img.height);
14 |       resolve(imageData);
15 |     };
16 |     img.onerror = () => reject(new Error("Failed to load image"));
17 |     img.src = url;
18 |   });
19 | }
20 | 
21 | export async function imageURLToBase64(url: string): Promise<string> {
22 |   const imageData: ImageData = await getImageDataFromURL(url);
23 |   const canvas = document.createElement("canvas");
24 |   const ctx = canvas.getContext("2d");
25 | 
26 |   canvas.width = imageData.width;
27 |   canvas.height = imageData.height;
28 | 
29 |   ctx!.putImageData(imageData, 0, 0);
30 | 
31 |   return canvas.toDataURL();
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/vision-model/src/vision_model.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <script>
 4 |     webLLMGlobal = {};
 5 |   </script>
 6 |   <body>
 7 |     <h2>WebLLM Test Page</h2>
 8 |     Open console to see output
 9 |     <br />
10 |     <br />
11 |     <label id="init-label"> </label>
12 | 
13 |     <h3>Prompt</h3>
14 |     <label id="prompt-label"> </label>
15 | 
16 |     <h3>Response</h3>
17 |     <label id="generate-label"> </label>
18 |     <br />
19 |     <label id="stats-label"> </label>
20 | 
21 |     <script type="module" src="./vision_model.ts"></script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/examples/vision-model/src/vision_model.ts:
--------------------------------------------------------------------------------
  1 | import * as webllm from "@mlc-ai/web-llm";
  2 | import { imageURLToBase64 } from "./utils";
  3 | 
  4 | function setLabel(id: string, text: string) {
  5 |   const label = document.getElementById(id);
  6 |   if (label == null) {
  7 |     throw Error("Cannot find label " + id);
  8 |   }
  9 |   label.innerText = text;
 10 | }
 11 | 
 12 | const USE_WEB_WORKER = true;
 13 | 
 14 | const proxyUrl = "https://cors-anywhere.herokuapp.com/";
 15 | const url_https_street = "https://www.ilankelman.org/stopsigns/australia.jpg";
 16 | const url_https_tree = "https://www.ilankelman.org/sunset.jpg";
 17 | const url_https_sea =
 18 |   "https://www.islandvulnerability.org/index/silhouette.jpg";
 19 | 
 20 | async function main() {
 21 |   // can feed request with either base64 or http url
 22 |   const url_base64_street = await imageURLToBase64(proxyUrl + url_https_street);
 23 | 
 24 |   const initProgressCallback = (report: webllm.InitProgressReport) => {
 25 |     setLabel("init-label", report.text);
 26 |   };
 27 |   const selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC";
 28 | 
 29 |   const engineConfig: webllm.MLCEngineConfig = {
 30 |     initProgressCallback: initProgressCallback,
 31 |     logLevel: "INFO", // specify the log level
 32 |   };
 33 |   const chatOpts = {
 34 |     context_window_size: 6144,
 35 |   };
 36 | 
 37 |   const engine: webllm.MLCEngineInterface = USE_WEB_WORKER
 38 |     ? await webllm.CreateWebWorkerMLCEngine(
 39 |         new Worker(new URL("./worker.ts", import.meta.url), {
 40 |           type: "module",
 41 |         }),
 42 |         selectedModel,
 43 |         engineConfig,
 44 |         chatOpts,
 45 |       )
 46 |     : await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts);
 47 | 
 48 |   // 1. Prefill two images
 49 |   const messages: webllm.ChatCompletionMessageParam[] = [
 50 |     {
 51 |       role: "user",
 52 |       content: [
 53 |         { type: "text", text: "List the items in each image concisely." },
 54 |         {
 55 |           type: "image_url",
 56 |           image_url: {
 57 |             url: url_base64_street,
 58 |           },
 59 |         },
 60 |         {
 61 |           type: "image_url",
 62 |           image_url: {
 63 |             url: proxyUrl + url_https_sea,
 64 |           },
 65 |         },
 66 |       ],
 67 |     },
 68 |   ];
 69 |   const request0: webllm.ChatCompletionRequest = {
 70 |     stream: false, // can be streaming, same behavior
 71 |     messages: messages,
 72 |   };
 73 |   const reply0 = await engine.chat.completions.create(request0);
 74 |   const replyMessage0 = await engine.getMessage();
 75 |   console.log(reply0);
 76 |   console.log(replyMessage0);
 77 |   console.log(reply0.usage);
 78 | 
 79 |   // 2. A follow up text-only question
 80 |   messages.push({ role: "assistant", content: replyMessage0 });
 81 |   messages.push({ role: "user", content: "What is special about each image?" });
 82 |   const request1: webllm.ChatCompletionRequest = {
 83 |     stream: false, // can be streaming, same behavior
 84 |     messages: messages,
 85 |   };
 86 |   const reply1 = await engine.chat.completions.create(request1);
 87 |   const replyMessage1 = await engine.getMessage();
 88 |   console.log(reply1);
 89 |   console.log(replyMessage1);
 90 |   console.log(reply1.usage);
 91 | 
 92 |   // 3. A follow up single-image question
 93 |   messages.push({ role: "assistant", content: replyMessage1 });
 94 |   messages.push({
 95 |     role: "user",
 96 |     content: [
 97 |       { type: "text", text: "What about this image? Answer concisely." },
 98 |       {
 99 |         type: "image_url",
100 |         image_url: { url: proxyUrl + url_https_tree },
101 |       },
102 |     ],
103 |   });
104 |   const request2: webllm.ChatCompletionRequest = {
105 |     stream: false, // can be streaming, same behavior
106 |     messages: messages,
107 |   };
108 |   const reply2 = await engine.chat.completions.create(request2);
109 |   const replyMessage2 = await engine.getMessage();
110 |   console.log(reply2);
111 |   console.log(replyMessage2);
112 |   console.log(reply2.usage);
113 | }
114 | 
115 | main();
116 | 


--------------------------------------------------------------------------------
/examples/vision-model/src/worker.ts:
--------------------------------------------------------------------------------
1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
2 | 
3 | const handler = new WebWorkerMLCEngineHandler();
4 | 
5 | self.onmessage = (msg: MessageEvent) => {
6 |   handler.onmessage(msg);
7 | };
8 | 


--------------------------------------------------------------------------------
/jest.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     preset: "ts-jest",
3 |     testEnvironment: "node",
4 | };
5 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@mlc-ai/web-llm",
 3 |   "version": "0.2.79",
 4 |   "description": "Hardware accelerated language model chats on browsers",
 5 |   "main": "lib/index.js",
 6 |   "types": "lib/index.d.ts",
 7 |   "type": "module",
 8 |   "scripts": {
 9 |     "build": "rollup -c && ./cleanup-index-js.sh",
10 |     "lint": "npx eslint ./src/ ./tests/ ./examples/ && npx prettier ./src/ ./tests/ ./examples/ --check",
11 |     "test": "yarn jest",
12 |     "format": "prettier --write \"./src/\" \"./examples/\" \"./tests/\"",
13 |     "prepare": "husky"
14 |   },
15 |   "files": [
16 |     "lib"
17 |   ],
18 |   "repository": {
19 |     "type": "git",
20 |     "url": "git+https://github.com/mlc-ai/web-llm"
21 |   },
22 |   "keywords": [
23 |     "llm",
24 |     "large language model",
25 |     "machine learning"
26 |   ],
27 |   "license": "Apache-2.0",
28 |   "homepage": "https://github.com/mlc-ai/web-llm",
29 |   "devDependencies": {
30 |     "@mlc-ai/web-tokenizers": "^0.1.6",
31 |     "@next/eslint-plugin-next": "^14.2.3",
32 |     "@rollup/plugin-commonjs": "^20.0.0",
33 |     "@rollup/plugin-node-resolve": "^13.0.4",
34 |     "@types/chrome": "^0.0.266",
35 |     "@types/jest": "^29.5.11",
36 |     "@types/serviceworker": "^0.0.86",
37 |     "@typescript-eslint/eslint-plugin": "^5.59.6",
38 |     "@typescript-eslint/parser": "^5.59.6",
39 |     "@webgpu/types": "^0.1.24",
40 |     "buffer": "^5.7.1",
41 |     "eslint": "^8.41.0",
42 |     "eslint-config-prettier": "^9.1.0",
43 |     "eslint-plugin-prettier": "^5.1.3",
44 |     "husky": "^9.0.11",
45 |     "jest": "^29.7.0",
46 |     "prettier": "3.2.5",
47 |     "process": "^0.11.10",
48 |     "rollup": "^2.56.2",
49 |     "rollup-plugin-ignore": "^1.0.10",
50 |     "rollup-plugin-typescript2": "^0.34.1",
51 |     "ts-jest": "^29.1.2",
52 |     "tslib": "^2.3.1",
53 |     "@mlc-ai/web-runtime": "0.18.0-dev2",
54 |     "@mlc-ai/web-xgrammar": "0.1.0",
55 |     "typescript": "^4.9.5"
56 |   },
57 |   "dependencies": {
58 |     "loglevel": "^1.9.1"
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
 1 | import { nodeResolve } from '@rollup/plugin-node-resolve';
 2 | import ignore from "rollup-plugin-ignore";
 3 | import commonjs from '@rollup/plugin-commonjs';
 4 | import typescript from 'rollup-plugin-typescript2';
 5 | 
 6 | export default {
 7 |     input: 'src/index.ts',
 8 |     output: [
 9 |         {
10 |             file: 'lib/index.js',
11 |             exports: 'named',
12 |             format: 'es',
13 |             sourcemap: true,
14 |             globals: {'ws': 'ws',
15 |                       'perf_hooks': 'perf_hooks'}
16 |         }
17 |     ],
18 |     plugins: [
19 |         ignore(["fs", "path", "crypto"]),
20 |         nodeResolve({ browser: true }),
21 |         commonjs({
22 |             ignoreDynamicRequires: true,
23 |         }),
24 |         typescript({
25 |             rollupCommonJSResolveHack: false,
26 |             clean: true
27 |         })
28 |     ]
29 | };
30 | 


--------------------------------------------------------------------------------
/scripts/gh_deploy_site.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euxo pipefail
 3 | 
 4 | export PYTHONPATH=$PWD/python
 5 | cd docs && make html && cd ..
 6 | cd site && jekyll b && cd ..
 7 | rm -rf site/_site/docs
 8 | cp -r docs/_build/html site/_site/docs
 9 | 
10 | git fetch
11 | git checkout -B gh-pages origin/gh-pages
12 | rm -rf docs .gitignore
13 | mkdir -p docs
14 | cp -rf site/_site/* docs
15 | touch docs/.nojekyll
16 | echo "webllm.mlc.ai" >> docs/CNAME
17 | 
18 | DATE=`date`
19 | git add docs && git commit -am "Build at ${DATE}"
20 | git push origin gh-pages
21 | git checkout main && git submodule update
22 | echo "Finish deployment at ${DATE}"
23 | 


--------------------------------------------------------------------------------
/scripts/local_deploy_site.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euxo pipefail
 3 | 
 4 | cd examples/simple-chat
 5 | rm -rf lib
 6 | npm run build
 7 | cd ../..
 8 | 
 9 | cp examples/simple-chat/lib/* site
10 | 
11 | cd site && jekyll serve  --host localhost --port 8888
12 | 


--------------------------------------------------------------------------------
/scripts/prep_deps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This file prepares all the necessary dependencies for the web build.
 3 | set -euxo pipefail
 4 | 
 5 | emcc --version
 6 | npm --version
 7 | 
 8 | TVM_SOURCE_DIR_SET="${TVM_SOURCE_DIR:-}"
 9 | 
10 | if [[ -z ${TVM_SOURCE_DIR_SET} ]]; then
11 |     if [[ ! -d "3rdparty/tvm-unity" ]]; then
12 |         echo "Do not find TVM_SOURCE_DIR env variable, cloning a version as source".
13 |         git clone https://github.com/mlc-ai/relax 3rdparty/tvm-unity --recursive
14 |     fi
15 |     export TVM_SOURCE_DIR="${TVM_SOURCE_DIR:-3rdparty/tvm-unity}"
16 | fi
17 | 
18 | cd ${TVM_SOURCE_DIR}/web && make && npm install && npm run build && cd -
19 | rm -rf tvm_home
20 | ln -s ${TVM_SOURCE_DIR} tvm_home
21 | npm install
22 | 


--------------------------------------------------------------------------------
/scripts/serve_mlc_llm_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This file prepares all the necessary dependencies for the web build.
 3 | set -euxo pipefail
 4 | 
 5 | npm --version
 6 | 
 7 | MLC_LLM_HOME_SET="${MLC_LLM_HOME:-}"
 8 | 
 9 | if [[ -z ${MLC_LLM_HOME_SET} ]]; then
10 |     echo "Do not find MLC_LLM_HOME env variable, need to set this to work".
11 | fi
12 | cd ${MLC_LLM_HOME}/dist
13 | echo "Serving ${MLC_LLM_HOME}/dist for local debugging purposes"
14 | npx http-server -p 8000 --cors
15 | cd -
16 | 


--------------------------------------------------------------------------------
/site/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | llm-chat-config.json
3 | _includes/stable_diffusion.html
4 | _site
5 | llm_chat.*
6 | 


--------------------------------------------------------------------------------
/site/_config.yml:
--------------------------------------------------------------------------------
 1 | name: "WebLLM"
 2 | short_name: "WebLLM"
 3 | 
 4 | url: https://webllm.mlc.ai
 5 | 
 6 | exclude: [README.md, serve_local.sh]
 7 | 
 8 | plugins:
 9 |   - jekyll-remote-theme
10 | 
11 | remote_theme: mlc-ai/jekyll-theme-mlc
12 | 
13 | # Colorize code snippets with the rogue module if we want to deploy on GH.
14 | highlighter: rouge
15 | 
16 | markdown: kramdown
17 | 
18 | # The path structure for blog posts.
19 | permalink: /blog/:year/:month/:day/:title.html
20 | 
21 | # Number of news stories on the front page.
22 | front_page_news: 8
23 | 
24 | # Base pathname for links.
25 | base: ""
26 | 
27 | # make pages for the _projects folder
28 | collections:
29 |   projects:
30 |     output: true
31 | 
32 | course_title:
33 | 
34 | # Navigation bar links.
35 | navigation:
36 |   - title: Home
37 |     link: /
38 |   - title: GitHub
39 |     link: https://github.com/mlc-ai/web-llm
40 | 


--------------------------------------------------------------------------------
/site/_includes/arrow.svg:
--------------------------------------------------------------------------------
 1 | <svg
 2 |   width="16"
 3 |   height="16"
 4 |   viewBox="0 0 16 16"
 5 |   fill="none"
 6 |   aria-hidden="true"
 7 |   focusable="false"
 8 |   data-testid="Button-expandable-arrow"
 9 | >
10 |   <path
11 |     fill="currentColor"
12 |     d="M7.28033 3.21967C6.98744 2.92678 6.51256 2.92678 6.21967 3.21967C5.92678 3.51256 5.92678 3.98744 6.21967 4.28033L7.28033 3.21967ZM11 8L11.5303 8.53033C11.8232 8.23744 11.8232 7.76256 11.5303 7.46967L11 8ZM6.21967 11.7197C5.92678 12.0126 5.92678 12.4874 6.21967 12.7803C6.51256 13.0732 6.98744 13.0732 7.28033 12.7803L6.21967 11.7197ZM6.21967 4.28033L10.4697 8.53033L11.5303 7.46967L7.28033 3.21967L6.21967 4.28033ZM10.4697 7.46967L6.21967 11.7197L7.28033 12.7803L11.5303 8.53033L10.4697 7.46967Z"
13 |   ></path>
14 |   <path
15 |     class="arrow-expandable"
16 |     stroke="currentColor"
17 |     d="M1.75 8H11"
18 |     stroke-width="1.5"
19 |     stroke-linecap="round"
20 |   ></path>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/site/_includes/github.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 98 96" xmlns="http://www.w3.org/2000/svg">
2 |   <path
3 |     fill-rule="evenodd"
4 |     clip-rule="evenodd"
5 |     d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z"
6 |     fill="#000000"
7 |   />
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/site/_includes/head.html:
--------------------------------------------------------------------------------
 1 | <meta name="description" content="WebLLM: High-Performance In-Browser LLM Inference Engine">
 2 | <meta
 3 |   http-equiv="origin-trial"
 4 |   content="Agx76XA0ITxMPF0Z8rbbcMllwuxsyp9qdtQaXlLqu1JUrdHB6FPonuyIKJ3CsBREUkeioJck4nn3KO0c0kkwqAMAAABJeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0Ojg4ODgiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjkxNzExOTk5fQ=="
 5 | />
 6 | <meta
 7 |   http-equiv="origin-trial"
 8 |   content="AnmwqQ1dtYDQTYkZ5iMtHdINCaxjE94uWQBKp2yOz1wPTcjSRtOHUGQG+r2BxsEuM0qhxTVnuTjyh31HgTeA8gsAAABZeyJvcmlnaW4iOiJodHRwczovL21sYy5haTo0NDMiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjkxNzExOTk5LCJpc1N1YmRvbWFpbiI6dHJ1ZX0="
 9 | />
10 | <script src="https://code.jquery.com/jquery-3.6.3.min.js" integrity="sha256-pvPw+upLPUjgMXY0G+8O0xUf+/Im1MZjXxxgOcBQBXU=" crossorigin="anonymous"></script>
11 | <link rel="stylesheet" href="{{ '/assets/css/hero.css' | relative_url }}" />


--------------------------------------------------------------------------------
/site/_includes/hero.html:
--------------------------------------------------------------------------------
 1 | <section id="hero">
 2 |   <div class="heading-container">
 3 |     <h1>WebLLM: High-Performance In-Browser LLM Inference Engine</h1>
 4 |     <div class="link-container">
 5 |       <a class="get-start-link" href="/docs">
 6 |         <span class="get-start-link-content">
 7 |           <span>Get Started</span>
 8 |           <span class="arrow-container">{% include arrow.svg %}</span></span
 9 |         >
10 |       </a>
11 |       <a class="chat-link moving-border" href="https://chat.webllm.ai">
12 |         <span class="border"></span>
13 |         <span class="chat-link-content">
14 |           <span>Chat with WebLLM</span>
15 |           <span class="arrow-container">{% include arrow.svg %}</span>
16 |           </span>
17 |       </a>
18 |     </div>
19 |   </div>
20 |   <div class="video-container">
21 |     <video
22 |       autoplay
23 |       playsinline
24 |       muted
25 |       poster="{{ site.base }}/assets/img/fig/Pittsburgh.png"
26 |     >
27 |       <source
28 |         src="{{ site.base }}/assets/video/Pittsburgh.webm"
29 |         type="video/webm"
30 |       />
31 |       <source
32 |         src="{{ site.base }}/assets/video/Pittsburgh.mp4"
33 |         type="video/mp4"
34 |       />
35 |     </video>
36 |   </div>
37 | </section>
38 | 
39 | <script>
40 |   (function() {
41 | 
42 |   function handlerIn(e) {
43 |     $(this).addClass("expanded");
44 |   }
45 |   function handlerOut(e) {
46 |     $(this).removeClass("expanded");
47 |   }
48 | 
49 |   $(".chat-link").hover(handlerIn, handlerOut);
50 |   $(".github-link").hover(handlerIn, handlerOut);
51 | 
52 |   var video = $("video")[0];
53 |   video.play().then((_) => {
54 |     let observer = new IntersectionObserver(
55 |       (entries) => {
56 |         entries.forEach((entry) => {
57 |           if (
58 |               entry.intersectionRatio !== 1 &&
59 |               !video.paused
60 |           ) {
61 |             video.pause();
62 |           } else if (video.paused) {
63 |             video.play();
64 |           }
65 |         });
66 |       },
67 |         { threshold: 0.2 }
68 |     );
69 |     observer.observe(video);
70 |   });
71 | })()
72 | </script>
73 | 


--------------------------------------------------------------------------------
/site/assets/img/fig/Pittsburgh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/fig/Pittsburgh.png


--------------------------------------------------------------------------------
/site/assets/img/logo/cmuscs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/cmuscs.png


--------------------------------------------------------------------------------
/site/assets/img/logo/mlc-logo-with-text-landscape.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/mlc-logo-with-text-landscape.png


--------------------------------------------------------------------------------
/site/assets/img/logo/octoml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/octoml.png


--------------------------------------------------------------------------------
/site/assets/img/logo/sjtu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/sjtu.png


--------------------------------------------------------------------------------
/site/assets/img/logo/uw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/uw.jpg


--------------------------------------------------------------------------------
/site/assets/video/Code.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Code.mp4


--------------------------------------------------------------------------------
/site/assets/video/Code.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Code.webm


--------------------------------------------------------------------------------
/site/assets/video/Pittsburgh.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Pittsburgh.mp4


--------------------------------------------------------------------------------
/site/assets/video/Pittsburgh.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Pittsburgh.webm


--------------------------------------------------------------------------------
/site/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: default
 3 | title: Home
 4 | notitle: true
 5 | ---
 6 | 
 7 | {% include hero.html %}
 8 | 
 9 | ## Overview
10 | 
11 | We have been seeing amazing progress in generative AI and LLM recently. Thanks to the open-source efforts like LLaMA, Alpaca, Vicuna and Dolly, we start to see an exciting future of building our own open source language models and personal AI assistant.
12 | 
13 | These models are usually big and compute-heavy. To build a chat service, we will need a large cluster to run an inference server, while clients send requests to servers and retrieve the inference output. We also usually have to run on a specific type of GPUs where popular deep-learning frameworks are readily available.
14 | 
15 | This project is our step to bring more diversity to the ecosystem. Specifically, can we simply bake LLMs directly into the client side and directly run them inside a browser? If that can be realized, we could offer support for client personal AI models with the benefit of cost reduction, enhancement for personalization and privacy protection. The client side is getting pretty powerful.
16 | 
17 | Won’t it be even more amazing if we can simply open up a browser and directly bring AI natively to your browser tab? There is some level of readiness in the ecosystem. This project provides an affirmative answer to the question.
18 | 
19 | ## Key Features
20 | - **In-Browser Inference**: WebLLM is a high-performance, in-browser language model inference engine that leverages WebGPU for hardware acceleration, enabling powerful LLM operations directly within web browsers without server-side processing.
21 | 
22 | - [**Full OpenAI API Compatibility**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#full-openai-compatibility): Seamlessly integrate your app with WebLLM using OpenAI API with functionalities such as JSON-mode, function-calling, streaming, and more.
23 | 
24 | - [**Extensive Model Support**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#built-in-models): WebLLM natively supports a range of models including Llama, Phi, Gemma, RedPajama, Mistral, Qwen(通义千问), and many others, making it versatile for various AI tasks.
25 | 
26 | - [**Custom Model Integration**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#custom-models): Easily integrate and deploy custom models in MLC format, allowing you to adapt WebLLM to specific needs and scenarios, enhancing flexibility in model deployment.
27 | 
28 | - **Plug-and-Play Integration**: Easily integrate WebLLM into your projects using package managers like NPM and Yarn, or directly via CDN, complete with comprehensive [examples](https://github.com/mlc-ai/web-llm/tree/main/examples) and a modular design for connecting with UI components.
29 | 
30 | - **Streaming & Real-Time Interactions**: Supports streaming chat completions, allowing real-time output generation which enhances interactive applications like chatbots and virtual assistants.
31 | 
32 | - **Web Worker & Service Worker Support**: Optimize UI performance and manage the lifecycle of models efficiently by offloading computations to separate worker threads or service workers.
33 | 
34 | - **Chrome Extension Support**: Extend the functionality of web browsers through custom Chrome extensions using WebLLM, with examples available for building both basic and advanced extensions.
35 | 
36 | ## Disclaimer
37 | 
38 | The [demo site](https://chat.webllm.ai) is for research purposes only, subject to the model License of LLaMA, Vicuna and RedPajama. Please contact us if you find any potential violation.
39 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | export {
 2 |   ModelRecord,
 3 |   AppConfig,
 4 |   ChatOptions,
 5 |   MLCEngineConfig,
 6 |   GenerationConfig,
 7 |   ModelType,
 8 |   prebuiltAppConfig,
 9 |   modelVersion,
10 |   modelLibURLPrefix,
11 |   functionCallingModelIds,
12 | } from "./config";
13 | 
14 | export {
15 |   InitProgressCallback,
16 |   InitProgressReport,
17 |   MLCEngineInterface,
18 |   LogitProcessor,
19 |   LogLevel,
20 | } from "./types";
21 | 
22 | export { MLCEngine, CreateMLCEngine } from "./engine";
23 | 
24 | export {
25 |   hasModelInCache,
26 |   deleteChatConfigInCache,
27 |   deleteModelAllInfoInCache,
28 |   deleteModelWasmInCache,
29 |   deleteModelInCache,
30 | } from "./cache_util";
31 | 
32 | export {
33 |   WebWorkerMLCEngineHandler,
34 |   WebWorkerMLCEngine,
35 |   CreateWebWorkerMLCEngine,
36 | } from "./web_worker";
37 | 
38 | export { WorkerRequest, WorkerResponse, CustomRequestParams } from "./message";
39 | 
40 | export {
41 |   ServiceWorkerMLCEngineHandler,
42 |   ServiceWorkerMLCEngine,
43 |   CreateServiceWorkerMLCEngine,
44 | } from "./service_worker";
45 | 
46 | export {
47 |   ServiceWorkerMLCEngineHandler as ExtensionServiceWorkerMLCEngineHandler,
48 |   ServiceWorkerMLCEngine as ExtensionServiceWorkerMLCEngine,
49 |   CreateServiceWorkerMLCEngine as CreateExtensionServiceWorkerMLCEngine,
50 | } from "./extension_service_worker";
51 | 
52 | export * from "./openai_api_protocols/index";
53 | 


--------------------------------------------------------------------------------
/src/message.ts:
--------------------------------------------------------------------------------
  1 | import { AppConfig, ChatOptions } from "./config";
  2 | import { InitProgressReport, LogLevel } from "./types";
  3 | import {
  4 |   ChatCompletionRequestStreaming,
  5 |   ChatCompletionRequestNonStreaming,
  6 |   ChatCompletion,
  7 |   ChatCompletionChunk,
  8 |   CompletionCreateParamsNonStreaming,
  9 |   CompletionCreateParamsStreaming,
 10 |   Completion,
 11 |   EmbeddingCreateParams,
 12 |   CreateEmbeddingResponse,
 13 | } from "./openai_api_protocols/index";
 14 | 
 15 | /**
 16 |  * Message kind used by worker
 17 |  */
 18 | type RequestKind =
 19 |   | "reload"
 20 |   | "runtimeStatsText"
 21 |   | "interruptGenerate"
 22 |   | "unload"
 23 |   | "resetChat"
 24 |   | "getMaxStorageBufferBindingSize"
 25 |   | "getGPUVendor"
 26 |   | "forwardTokensAndSample"
 27 |   | "chatCompletionNonStreaming"
 28 |   | "completionNonStreaming"
 29 |   | "embedding"
 30 |   | "getMessage"
 31 |   | "chatCompletionStreamInit"
 32 |   | "completionStreamInit"
 33 |   | "completionStreamNextChunk"
 34 |   | "customRequest"
 35 |   | "keepAlive"
 36 |   | "setLogLevel"
 37 |   | "setAppConfig";
 38 | 
 39 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
 40 | type ResponseKind = "return" | "throw" | "initProgressCallback";
 41 | 
 42 | export interface ReloadParams {
 43 |   modelId: string[];
 44 |   chatOpts?: ChatOptions[];
 45 | }
 46 | export interface ResetChatParams {
 47 |   keepStats: boolean;
 48 |   modelId?: string;
 49 | }
 50 | export interface GetMessageParams {
 51 |   modelId?: string;
 52 | }
 53 | export interface RuntimeStatsTextParams {
 54 |   modelId?: string;
 55 | }
 56 | export interface ForwardTokensAndSampleParams {
 57 |   inputIds: Array<number>;
 58 |   isPrefill: boolean;
 59 |   modelId?: string;
 60 | }
 61 | 
 62 | // Notes on the following Params with modelId and chatOpts:
 63 | // These fields are the model and chatOpts that the frontend engine expects the backend
 64 | // to be loaded with. If not loaded due to web/service worker unexpectedly killed,
 65 | // handler will call reload(). An engine can load multiple models, hence both are list.
 66 | // TODO(webllm-team): should add appConfig here as well if rigorous.
 67 | // Fore more, see https://github.com/mlc-ai/web-llm/pull/471
 68 | 
 69 | // Note on the messages with selectedModelId:
 70 | // This is the modelId this request uses. It is needed to identify which async generator
 71 | // to instantiate / use, since an engine can load multiple models, thus the handler
 72 | // needs to maintain multiple generators.
 73 | export interface ChatCompletionNonStreamingParams {
 74 |   request: ChatCompletionRequestNonStreaming;
 75 |   modelId: string[];
 76 |   chatOpts?: ChatOptions[];
 77 | }
 78 | export interface ChatCompletionStreamInitParams {
 79 |   request: ChatCompletionRequestStreaming;
 80 |   selectedModelId: string;
 81 |   modelId: string[];
 82 |   chatOpts?: ChatOptions[];
 83 | }
 84 | export interface CompletionNonStreamingParams {
 85 |   request: CompletionCreateParamsNonStreaming;
 86 |   modelId: string[];
 87 |   chatOpts?: ChatOptions[];
 88 | }
 89 | export interface CompletionStreamInitParams {
 90 |   request: CompletionCreateParamsStreaming;
 91 |   selectedModelId: string;
 92 |   modelId: string[];
 93 |   chatOpts?: ChatOptions[];
 94 | }
 95 | export interface EmbeddingParams {
 96 |   request: EmbeddingCreateParams;
 97 |   modelId: string[];
 98 |   chatOpts?: ChatOptions[];
 99 | }
100 | export interface CompletionStreamNextChunkParams {
101 |   selectedModelId: string;
102 | }
103 | 
104 | export interface CustomRequestParams {
105 |   requestName: string;
106 |   requestMessage: string;
107 | }
108 | export type MessageContent =
109 |   | ReloadParams
110 |   | ResetChatParams
111 |   | GetMessageParams
112 |   | RuntimeStatsTextParams
113 |   | ForwardTokensAndSampleParams
114 |   | ChatCompletionNonStreamingParams
115 |   | ChatCompletionStreamInitParams
116 |   | CompletionNonStreamingParams
117 |   | CompletionStreamInitParams
118 |   | EmbeddingParams
119 |   | CompletionStreamNextChunkParams
120 |   | CustomRequestParams
121 |   | InitProgressReport
122 |   | LogLevel
123 |   | string
124 |   | null
125 |   | number
126 |   | ChatCompletion
127 |   | ChatCompletionChunk
128 |   | CreateEmbeddingResponse
129 |   | Completion
130 |   | AppConfig
131 |   | void;
132 | /**
133 |  * The message used in exchange between worker
134 |  * and the main thread.
135 |  */
136 | 
137 | export type WorkerRequest = {
138 |   kind: RequestKind;
139 |   uuid: string;
140 |   content: MessageContent;
141 | };
142 | 
143 | type HeartbeatWorkerResponse = {
144 |   kind: "heartbeat";
145 |   uuid: string;
146 | };
147 | 
148 | type OneTimeWorkerResponse = {
149 |   kind: "return" | "throw";
150 |   uuid: string;
151 |   content: MessageContent;
152 | };
153 | 
154 | type InitProgressWorkerResponse = {
155 |   kind: "initProgressCallback";
156 |   uuid: string;
157 |   content: InitProgressReport;
158 | };
159 | 
160 | export type WorkerResponse =
161 |   | OneTimeWorkerResponse
162 |   | InitProgressWorkerResponse
163 |   | HeartbeatWorkerResponse;
164 | 


--------------------------------------------------------------------------------
/src/openai_api_protocols/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The input to OpenAI API, directly adopted from openai-node with small tweaks:
 3 |  * https://github.com/openai/openai-node/blob/master/src/resources/chat/completions.ts
 4 |  *
 5 |  * Copyright 2024 OpenAI
 6 |  *
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *      http://www.apache.org/licenses/LICENSE-2.0
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | export {
19 |   Chat,
20 |   ChatCompletionRequestBase,
21 |   ChatCompletionRequestNonStreaming,
22 |   ChatCompletionRequestStreaming,
23 |   ChatCompletionRequest,
24 |   ChatCompletion,
25 |   ChatCompletionChunk,
26 |   ChatCompletionRequestUnsupportedFields,
27 |   postInitAndCheckFields as postInitAndCheckFieldsChatCompletion,
28 |   ChatCompletionContentPart,
29 |   ChatCompletionContentPartText,
30 |   ChatCompletionContentPartImage,
31 |   ChatCompletionMessageToolCall,
32 |   ChatCompletionRole,
33 |   ChatCompletionSystemMessageParam,
34 |   ChatCompletionUserMessageParam,
35 |   ChatCompletionAssistantMessageParam,
36 |   ChatCompletionToolMessageParam,
37 |   ChatCompletionMessageParam,
38 |   FunctionParameters,
39 |   FunctionDefinition,
40 |   ChatCompletionTool,
41 |   ChatCompletionNamedToolChoice,
42 |   ChatCompletionToolChoiceOption,
43 |   TopLogprob,
44 |   ChatCompletionTokenLogprob,
45 |   ChatCompletionMessage,
46 |   CompletionUsage,
47 |   ResponseFormat,
48 |   ChatCompletionFinishReason,
49 | } from "./chat_completion";
50 | 
51 | export {
52 |   Completions,
53 |   CompletionCreateParamsNonStreaming,
54 |   CompletionCreateParamsStreaming,
55 |   CompletionCreateParamsBase,
56 |   CompletionCreateParams,
57 |   Completion,
58 |   CompletionChoice,
59 |   postInitAndCheckFields as postInitAndCheckFieldsCompletion,
60 | } from "./completion";
61 | 
62 | export {
63 |   Embeddings,
64 |   Embedding,
65 |   EmbeddingCreateParams,
66 |   CreateEmbeddingResponse,
67 |   postInitAndCheckFields as postInitAndCheckFieldsEmbedding,
68 | } from "./embedding";
69 | 


--------------------------------------------------------------------------------
/tests/generation_config.test.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   GenerationConfig,
  3 |   postInitAndCheckGenerationConfigValues,
  4 | } from "../src/config";
  5 | import { describe, expect, test } from "@jest/globals";
  6 | 
  7 | describe("Check generation config illegal values", () => {
  8 |   test("High-level unsupported fields", () => {
  9 |     expect(() => {
 10 |       const genConfig: GenerationConfig = {
 11 |         max_tokens: 0,
 12 |       };
 13 |       postInitAndCheckGenerationConfigValues(genConfig);
 14 |     }).toThrow("Make sure `max_tokens` > 0");
 15 |   });
 16 | 
 17 |   test("logit_bias exceeds range", () => {
 18 |     expect(() => {
 19 |       const genConfig: GenerationConfig = {
 20 |         max_tokens: 10,
 21 |         logit_bias: {
 22 |           "1355": 155,
 23 |         },
 24 |       };
 25 |       postInitAndCheckGenerationConfigValues(genConfig);
 26 |     }).toThrow("Make sure -100 < logit_bias <= 100.");
 27 |   });
 28 | 
 29 |   test("logit_bias invalid key", () => {
 30 |     expect(() => {
 31 |       const genConfig: GenerationConfig = {
 32 |         max_tokens: 10,
 33 |         logit_bias: {
 34 |           thisRaisesError: 50,
 35 |         },
 36 |       };
 37 |       postInitAndCheckGenerationConfigValues(genConfig);
 38 |     }).toThrow(
 39 |       "Make sure logit_bias's keys to be number represented in string.",
 40 |     );
 41 |   });
 42 | 
 43 |   test("top_logprobs out of range", () => {
 44 |     expect(() => {
 45 |       const genConfig: GenerationConfig = {
 46 |         logprobs: true,
 47 |         top_logprobs: 6,
 48 |         max_tokens: 10,
 49 |       };
 50 |       postInitAndCheckGenerationConfigValues(genConfig);
 51 |     }).toThrow("Make sure 0 < top_logprobs <= 5.");
 52 |   });
 53 | 
 54 |   test("top_logprobs set without setting logprobs", () => {
 55 |     expect(() => {
 56 |       const genConfig: GenerationConfig = {
 57 |         top_logprobs: 3,
 58 |         max_tokens: 10,
 59 |       };
 60 |       postInitAndCheckGenerationConfigValues(genConfig);
 61 |     }).toThrow("top_logprobs requires logprobs to be true");
 62 |   });
 63 | 
 64 |   test("top_logprobs set though logprobs is false", () => {
 65 |     expect(() => {
 66 |       const genConfig: GenerationConfig = {
 67 |         logprobs: false,
 68 |         top_logprobs: 3,
 69 |         max_tokens: 10,
 70 |       };
 71 |       postInitAndCheckGenerationConfigValues(genConfig);
 72 |     }).toThrow("top_logprobs requires logprobs to be true");
 73 |   });
 74 | });
 75 | 
 76 | describe("Check generation post init", () => {
 77 |   test("Only set one of presence or frequency penalty", () => {
 78 |     const genConfig: GenerationConfig = {
 79 |       frequency_penalty: 1.5,
 80 |     };
 81 |     postInitAndCheckGenerationConfigValues(genConfig);
 82 |     expect(genConfig.presence_penalty).toBe(0.0);
 83 |   });
 84 | 
 85 |   test("Set logprobs without setting top_logprobs", () => {
 86 |     const genConfig: GenerationConfig = {
 87 |       logprobs: true,
 88 |     };
 89 |     postInitAndCheckGenerationConfigValues(genConfig);
 90 |     expect(genConfig.top_logprobs).toBe(0);
 91 |   });
 92 | 
 93 |   test("Set both logprobs and top_logprobs", () => {
 94 |     const genConfig: GenerationConfig = {
 95 |       logprobs: true,
 96 |       top_logprobs: 2,
 97 |     };
 98 |     postInitAndCheckGenerationConfigValues(genConfig);
 99 |     expect(genConfig.top_logprobs).toBe(2);
100 |   });
101 | });
102 | 


--------------------------------------------------------------------------------
/tests/openai_completion.test.ts:
--------------------------------------------------------------------------------
  1 | import { getConversation } from "../src/conversation";
  2 | import {
  3 |   TextCompletionConversationError,
  4 |   TextCompletionConversationExpectsPrompt,
  5 | } from "../src/error";
  6 | import {
  7 |   CompletionCreateParams,
  8 |   postInitAndCheckFields,
  9 | } from "../src/openai_api_protocols/completion";
 10 | import { llama3_1ChatConfig } from "./constants";
 11 | import { describe, expect, test } from "@jest/globals";
 12 | 
 13 | describe("Conversation object with text completion", () => {
 14 |   test("Conversation checks ", () => {
 15 |     const conv = getConversation(
 16 |       llama3_1ChatConfig.conv_template,
 17 |       llama3_1ChatConfig.conv_config,
 18 |       /*isTextCompletion=*/ true,
 19 |     );
 20 |     expect(() => {
 21 |       conv.getPromptArrayTextCompletion();
 22 |     }).toThrow(new TextCompletionConversationExpectsPrompt());
 23 |     expect(() => {
 24 |       conv.getPromptArray();
 25 |     }).toThrow(new TextCompletionConversationError("getPromptArray"));
 26 | 
 27 |     conv.prompt = "Hi";
 28 |     expect(conv.getPromptArrayTextCompletion()).toEqual(["Hi"]);
 29 | 
 30 |     conv.reset();
 31 |     expect(conv.prompt === undefined).toEqual(true);
 32 |   });
 33 | });
 34 | 
 35 | describe("Check completion unsupported requests", () => {
 36 |   test("stream_options without stream specified", () => {
 37 |     expect(() => {
 38 |       const request: CompletionCreateParams = {
 39 |         prompt: "Hello, ",
 40 |         stream_options: { include_usage: true },
 41 |       };
 42 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 43 |     }).toThrow("Only specify stream_options when stream=True.");
 44 |   });
 45 | 
 46 |   test("stream_options with stream=false", () => {
 47 |     expect(() => {
 48 |       const request: CompletionCreateParams = {
 49 |         stream: false,
 50 |         prompt: "Hello, ",
 51 |         stream_options: { include_usage: true },
 52 |       };
 53 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 54 |     }).toThrow("Only specify stream_options when stream=True.");
 55 |   });
 56 | 
 57 |   test("High-level unsupported fields", () => {
 58 |     expect(() => {
 59 |       const request: CompletionCreateParams = {
 60 |         prompt: "Hello, ",
 61 |         suffix: "this is suffix", // this raises error
 62 |       };
 63 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 64 |     }).toThrow(
 65 |       "The following fields in CompletionCreateParams are not yet supported",
 66 |     );
 67 | 
 68 |     expect(() => {
 69 |       const request: CompletionCreateParams = {
 70 |         prompt: "Hello, ",
 71 |         best_of: 3, // this raises error
 72 |       };
 73 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 74 |     }).toThrow(
 75 |       "The following fields in CompletionCreateParams are not yet supported",
 76 |     );
 77 | 
 78 |     expect(() => {
 79 |       const request: CompletionCreateParams = {
 80 |         prompt: "Hello, ",
 81 |         user: "Bob", // this raises error
 82 |       };
 83 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 84 |     }).toThrow(
 85 |       "The following fields in CompletionCreateParams are not yet supported",
 86 |     );
 87 |   });
 88 | 
 89 |   test("When streaming `n` needs to be 1", () => {
 90 |     expect(() => {
 91 |       const request: CompletionCreateParams = {
 92 |         stream: true,
 93 |         n: 2,
 94 |         prompt: "Hello, ",
 95 |       };
 96 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
 97 |     }).toThrow("When streaming, `n` cannot be > 1.");
 98 |   });
 99 | 
100 |   test("Non-integer seed", () => {
101 |     expect(() => {
102 |       const request: CompletionCreateParams = {
103 |         prompt: "Hello, ",
104 |         max_tokens: 10,
105 |         seed: 42.2, // Note that Number.isInteger(42.0) is true
106 |       };
107 |       postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
108 |     }).toThrow("`seed` should be an integer, but got");
109 |   });
110 | });
111 | 


--------------------------------------------------------------------------------
/tests/openai_embeddings.test.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   EmbeddingInputEmptyError,
  3 |   EmbeddingUnsupportedEncodingFormatError,
  4 | } from "../src/error";
  5 | import {
  6 |   EmbeddingCreateParams,
  7 |   postInitAndCheckFields,
  8 | } from "../src/openai_api_protocols/embedding";
  9 | import { describe, expect, test } from "@jest/globals";
 10 | 
 11 | describe("Check embeddings supported requests", () => {
 12 |   test("Supported embedding request float", () => {
 13 |     const request: EmbeddingCreateParams = {
 14 |       input: ["Hello", "Hi"],
 15 |       encoding_format: "float",
 16 |     };
 17 |     postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 18 |   });
 19 | 
 20 |   test("Supported embedding request, unspecified format", () => {
 21 |     const request: EmbeddingCreateParams = {
 22 |       input: ["Hello", "Hi"],
 23 |     };
 24 |     postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 25 |   });
 26 | 
 27 |   test("Supported embedding request, single string", () => {
 28 |     const request: EmbeddingCreateParams = {
 29 |       input: "Hello",
 30 |     };
 31 |     postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 32 |   });
 33 | 
 34 |   test("Supported embedding request, single token array", () => {
 35 |     const request: EmbeddingCreateParams = {
 36 |       input: [0, 1],
 37 |     };
 38 |     postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 39 |   });
 40 | 
 41 |   test("Supported embedding request, array of token arrays", () => {
 42 |     const request: EmbeddingCreateParams = {
 43 |       input: [
 44 |         [0, 1],
 45 |         [0, 1],
 46 |       ],
 47 |     };
 48 |     postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 49 |   });
 50 | });
 51 | 
 52 | describe("Invalid embedding input", () => {
 53 |   test("Empty string", () => {
 54 |     expect(() => {
 55 |       const request: EmbeddingCreateParams = {
 56 |         input: "",
 57 |       };
 58 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 59 |     }).toThrow(new EmbeddingInputEmptyError());
 60 |   });
 61 | 
 62 |   test("Contains empty string", () => {
 63 |     expect(() => {
 64 |       const request: EmbeddingCreateParams = {
 65 |         input: ["Hi", "hello", ""],
 66 |       };
 67 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 68 |     }).toThrow(new EmbeddingInputEmptyError());
 69 |   });
 70 | 
 71 |   test("Empty token array", () => {
 72 |     expect(() => {
 73 |       const request: EmbeddingCreateParams = {
 74 |         input: [],
 75 |       };
 76 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 77 |     }).toThrow(new EmbeddingInputEmptyError());
 78 |   });
 79 | 
 80 |   test("Contains empty token array", () => {
 81 |     expect(() => {
 82 |       const request: EmbeddingCreateParams = {
 83 |         input: [[1, 2], [3], [], [4]],
 84 |       };
 85 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 86 |     }).toThrow(new EmbeddingInputEmptyError());
 87 |   });
 88 | });
 89 | 
 90 | describe("Check embeddings unsupported requests", () => {
 91 |   test("base64 encoding_format", () => {
 92 |     expect(() => {
 93 |       const request: EmbeddingCreateParams = {
 94 |         input: ["Hello", "Hi"],
 95 |         encoding_format: "base64",
 96 |       };
 97 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
 98 |     }).toThrow(new EmbeddingUnsupportedEncodingFormatError());
 99 |   });
100 | 
101 |   test("user", () => {
102 |     expect(() => {
103 |       const request: EmbeddingCreateParams = {
104 |         input: ["Hello", "Hi"],
105 |         encoding_format: "float",
106 |         user: "Bob",
107 |       };
108 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
109 |     }).toThrow("The following fields in");
110 |   });
111 | 
112 |   test("dimensions", () => {
113 |     expect(() => {
114 |       const request: EmbeddingCreateParams = {
115 |         input: ["Hello", "Hi"],
116 |         encoding_format: "float",
117 |         dimensions: 2048,
118 |       };
119 |       postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC");
120 |     }).toThrow("The following fields in");
121 |   });
122 | });
123 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es6",
 4 |     "declaration": true,
 5 |     "outDir": "lib",
 6 |     "declarationMap": true,
 7 |     "sourceMap": true,
 8 |     "strict": true,
 9 |     "moduleResolution": "Node",
10 |     "esModuleInterop": true,
11 |     "lib": ["dom", "WebWorker"]
12 |   },
13 |   "typeRoots": ["./node_modules/@webgpu/types", "./node_modules/@types"],
14 |   "include": ["src"],
15 |   "exclude": ["node_modules", "build", "dist", "rollup.config.cjs"]
16 | }
17 | 


--------------------------------------------------------------------------------
/utils/.gitignore:
--------------------------------------------------------------------------------
1 | package-lock.json
2 | 


--------------------------------------------------------------------------------
/utils/vram_requirements/.gitignore:
--------------------------------------------------------------------------------
1 | src/app-config.js
2 | 


--------------------------------------------------------------------------------
/utils/vram_requirements/README.md:
--------------------------------------------------------------------------------
1 | ### vRAM Requirements
2 | 
3 | To check vRAM requirement for a model, add models to check in `gh-config.json`.
4 | 
5 | Then run `npm install` followed by `npm start`.


--------------------------------------------------------------------------------
/utils/vram_requirements/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "vram-requirements",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "start": "cp src/gh-config.js src/app-config.js && parcel src/vram_requirements.html  --port 8885",
 7 |     "build": "cp src/gh-config.js src/app-config.js && parcel build src/vram_requirements.html --dist-dir lib"
 8 |   },
 9 |   "devDependencies": {
10 |     "buffer": "^5.7.1",
11 |     "crypto-browserify": "^3.12.0",
12 |     "events": "^3.3.0",
13 |     "parcel": "^2.8.3",
14 |     "path-browserify": "^1.0.1",
15 |     "process": "^0.11.10",
16 |     "stream-browserify": "^3.0.0",
17 |     "tslib": "^2.3.1",
18 |     "typescript": "^4.9.5",
19 |     "url": "^0.11.3"
20 |   },
21 |   "dependencies": {
22 |     "@mlc-ai/web-llm": "^0.2.79",
23 |     "@mlc-ai/web-runtime": "0.18.0-dev2"
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/utils/vram_requirements/src/gh-config.js:
--------------------------------------------------------------------------------
1 | import { prebuiltAppConfig } from "../../../lib/config";
2 | 
3 | export default {
4 | 	"model_list": prebuiltAppConfig.model_list,
5 | 	"use_web_worker": true
6 | }
7 | 


--------------------------------------------------------------------------------
/utils/vram_requirements/src/vram_requirements.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <script>
 4 |   webLLMGlobal = {}
 5 | </script>
 6 | 
 7 | <body>
 8 |   <h2>vRAM Requirement Report</h2>
 9 |   Open console to see logs
10 |   <br />
11 |   <br />
12 | 
13 |   <label id="report-label"> </label>
14 | 
15 |   <script type="module" src="./vram_requirements.ts"></script>
16 | 
17 | </html>


--------------------------------------------------------------------------------
/utils/vram_requirements/src/vram_requirements.ts:
--------------------------------------------------------------------------------
  1 | import ModelRecord from "@mlc-ai/web-llm";
  2 | import appConfig from "./app-config"; // Modify this to inspect vram requirement for models of choice
  3 | import * as tvmjs from "@mlc-ai/web-runtime";
  4 | import log from "loglevel";
  5 | 
  6 | function setLabel(id: string, text: string) {
  7 |   const label = document.getElementById(id);
  8 |   if (label == null) {
  9 |     throw Error("Cannot find label " + id);
 10 |   }
 11 |   label.innerText = text;
 12 | }
 13 | 
 14 | interface AppConfig {
 15 |   model_list: Array<ModelRecord>;
 16 | }
 17 | 
 18 | const dtypeBytesMap = new Map<string, number>([
 19 |   ["uint32", 4],
 20 |   ["uint16", 2],
 21 |   ["float32", 4],
 22 |   ["float16", 4],
 23 | ]);
 24 | 
 25 | async function main() {
 26 |   const config: AppConfig = appConfig;
 27 |   let report = "";
 28 |   for (let i = 0; i < config.model_list.length; ++i) {
 29 |     // 1. Read each model record
 30 |     const modelRecord: ModelRecord = config.model_list[i];
 31 |     const model_id = modelRecord.model_id;
 32 |     // 2. Load the wasm
 33 |     const wasmUrl = modelRecord.model_lib;
 34 |     const wasmSource = await (await fetch(wasmUrl)).arrayBuffer();
 35 |     report += `${model_id}: \n`;
 36 |     // 3. Initialize tvmjs instance and virtual machine using the wasm
 37 |     const tvm = await tvmjs.instantiate(
 38 |       new Uint8Array(wasmSource),
 39 |       tvmjs.createPolyfillWASI(),
 40 |       log.info,
 41 |     );
 42 |     const gpuDetectOutput = await tvmjs.detectGPUDevice();
 43 |     if (gpuDetectOutput == undefined) {
 44 |       throw Error("Cannot find WebGPU in the environment");
 45 |     }
 46 |     tvm.initWebGPU(gpuDetectOutput.device);
 47 |     tvm.beginScope();
 48 |     const vm = tvm.detachFromCurrentScope(
 49 |       tvm.createVirtualMachine(tvm.webgpu()),
 50 |     );
 51 |     // 4. Get metadata from the vm
 52 |     let fgetMetadata: any;
 53 |     try {
 54 |       fgetMetadata = vm.getFunction("_metadata");
 55 |     } catch (err) {
 56 |       log.error(
 57 |         "The wasm needs to have function `_metadata` to inspect vram requirement.",
 58 |         err,
 59 |       );
 60 |     }
 61 |     const ret_value = fgetMetadata();
 62 |     const metadataStr = tvm.detachFromCurrentScope(ret_value).toString();
 63 |     const metadata = JSON.parse(metadataStr);
 64 |     // 5. Parse the vram requirement
 65 |     // 5.1. Get bytes for loading params
 66 |     let paramBytes = 0;
 67 |     metadata.params.forEach((param: any) => {
 68 |       if (Math.min(...param.shape) > 0) {
 69 |         // Possible to have shape -1 signifying a dynamic shape -- we disregard them
 70 |         const dtypeBytes = dtypeBytesMap.get(param.dtype);
 71 |         if (dtypeBytes === undefined) {
 72 |           throw Error(
 73 |             "Cannot find size of " +
 74 |               param.dtype +
 75 |               ", add it to `dtypeBytesMap`.",
 76 |           );
 77 |         }
 78 |         const numParams = param.shape.reduce((a: number, b: number) => a * b);
 79 |         paramBytes += numParams * dtypeBytes;
 80 |       } else {
 81 |         log.info(
 82 |           `${model_id}'s ${param.name} has dynamic shape; excluded from vRAM calculation.`,
 83 |         );
 84 |       }
 85 |     });
 86 |     // 5.2. Get maximum bytes needed for temporary buffer across all functions
 87 |     let maxTempFuncBytes = 0;
 88 |     Object.entries(metadata.memory_usage).forEach(([funcName, funcBytes]) => {
 89 |       if (typeof funcBytes !== "number") {
 90 |         throw Error("`memory_usage` expects entry `funcName: funcBytes`.");
 91 |       }
 92 |       maxTempFuncBytes = Math.max(maxTempFuncBytes, funcBytes);
 93 |     });
 94 |     // 5.3. Get kv cache bytes
 95 |     const kv_cache_bytes: number = metadata.kv_cache_bytes;
 96 |     // 5.4. Get total vRAM needed
 97 |     const totalBytes = paramBytes + maxTempFuncBytes + kv_cache_bytes;
 98 |     // 6. Report vRAM Requirement
 99 |     report +=
100 |       `totalBytes: ${(totalBytes / 1024 / 1024).toFixed(2)} MB\n` +
101 |       `paramBytes: ${(paramBytes / 1024 / 1024).toFixed(2)} MB\n` +
102 |       `maxTempFuncBytes: ${(maxTempFuncBytes / 1024 / 1024).toFixed(2)} MB\n` +
103 |       `kv_cache_bytes: ${(kv_cache_bytes / 1024 / 1024).toFixed(2)} MB\n\n`;
104 |     // 7. Dispose everything
105 |     tvm.endScope();
106 |     vm.dispose();
107 |     tvm.dispose();
108 |   }
109 |   setLabel("report-label", report);
110 | }
111 | 
112 | main();
113 | 


--------------------------------------------------------------------------------