├── .eslintignore ├── .eslintrc.cjs ├── .github └── workflows │ ├── build-site.yaml │ └── linter.yaml ├── .gitignore ├── .gitmodules ├── .husky └── pre-commit ├── .lintstagedrc.json ├── .prettierignore ├── .prettierrc ├── 3rdparty └── .gitkeep ├── LICENSE ├── README.md ├── SECURITY.md ├── cleanup-index-js.sh ├── docs ├── Makefile ├── README.md ├── _static │ └── img │ │ └── mlc-logo-with-text-landscape.svg ├── conf.py ├── developer │ ├── add_models.rst │ └── building_from_source.rst ├── index.rst ├── make.bat ├── requirements.txt └── user │ ├── advanced_usage.rst │ ├── api_reference.rst │ ├── basic_usage.rst │ └── get_started.rst ├── examples ├── .gitignore ├── README.md ├── abort-reload │ ├── README.md │ ├── package.json │ └── src │ │ ├── get_started.html │ │ └── get_started.js ├── cache-usage │ ├── README.md │ ├── package.json │ └── src │ │ ├── cache_usage.html │ │ └── cache_usage.ts ├── chrome-extension-webgpu-service-worker │ ├── README.md │ ├── package.json │ └── src │ │ ├── background.ts │ │ ├── content.js │ │ ├── example.html │ │ ├── icons │ │ ├── icon-128.png │ │ ├── icon-16.png │ │ ├── icon-32.png │ │ └── icon-64.png │ │ ├── manifest.json │ │ ├── popup.css │ │ ├── popup.html │ │ └── popup.ts ├── chrome-extension │ ├── README.md │ ├── package.json │ └── src │ │ ├── content.js │ │ ├── example.html │ │ ├── icons │ │ ├── icon-128.png │ │ ├── icon-16.png │ │ ├── icon-32.png │ │ └── icon-64.png │ │ ├── manifest.json │ │ ├── manifest_v2.json │ │ ├── popup.css │ │ ├── popup.html │ │ └── popup.ts ├── embeddings │ ├── README.md │ ├── package.json │ └── src │ │ ├── embeddings.html │ │ └── embeddings.ts ├── function-calling │ ├── README.md │ ├── function-calling-manual │ │ ├── README.md │ │ ├── package.json │ │ └── src │ │ │ ├── function_calling_manual.html │ │ │ └── function_calling_manual.ts │ └── function-calling-openai │ │ ├── README.md │ │ ├── package.json │ │ └── src │ │ ├── function_calling_openai.html │ │ └── function_calling_openai.ts ├── get-started-web-worker │ ├── README.md │ ├── package.json │ └── src │ │ ├── get_started.html │ │ ├── main.ts │ │ └── worker.ts ├── get-started │ ├── README.md │ ├── package.json │ └── src │ │ ├── get_started.html │ │ └── get_started.ts ├── json-mode │ ├── README.md │ ├── package.json │ └── src │ │ ├── json_mode.html │ │ └── json_mode.ts ├── json-schema │ ├── README.md │ ├── package.json │ └── src │ │ ├── json_schema.html │ │ └── json_schema.ts ├── logit-processor │ ├── README.md │ ├── package.json │ └── src │ │ ├── logit_processor.html │ │ ├── logit_processor.ts │ │ ├── my_logit_processor.ts │ │ └── worker.ts ├── multi-models │ ├── README.md │ ├── package.json │ └── src │ │ ├── main.ts │ │ ├── multi_models.html │ │ └── worker.ts ├── multi-round-chat │ ├── README.md │ ├── package.json │ └── src │ │ ├── multi_round_chat.html │ │ └── multi_round_chat.ts ├── next-simple-chat │ ├── .gitignore │ ├── README.md │ ├── next.config.js │ ├── package.json │ ├── postcss.config.js │ ├── public │ │ ├── favicon.ico │ │ ├── next.svg │ │ └── vercel.svg │ ├── src │ │ ├── pages │ │ │ ├── _app.tsx │ │ │ ├── _document.tsx │ │ │ ├── api │ │ │ │ └── hello.ts │ │ │ └── index.tsx │ │ ├── styles │ │ │ └── globals.css │ │ └── utils │ │ │ ├── chat_component.tsx │ │ │ └── chat_ui.ts │ ├── tailwind.config.js │ └── tsconfig.json ├── qwen3 │ ├── README.md │ ├── package.json │ └── src │ │ ├── qwen3_example.html │ │ └── qwen3_example.ts ├── seed-to-reproduce │ ├── README.md │ ├── package.json │ └── src │ │ ├── seed.html │ │ └── seed.ts ├── service-worker │ ├── README.md │ ├── package.json │ └── src │ │ ├── index.html │ │ ├── main.ts │ │ └── sw.ts ├── simple-chat-js │ ├── index.css │ ├── index.html │ └── index.js ├── simple-chat-ts │ ├── .gitignore │ ├── README.md │ ├── package.json │ └── src │ │ ├── gh-config.js │ │ ├── img │ │ ├── plane.png │ │ └── reset.png │ │ ├── llm_chat.css │ │ ├── llm_chat.html │ │ ├── simple_chat.ts │ │ └── worker.ts ├── simple-chat-upload │ ├── .gitignore │ ├── README.md │ ├── package.json │ └── src │ │ ├── gh-config.js │ │ ├── img │ │ ├── plane.png │ │ └── reset.png │ │ ├── llm_chat.css │ │ ├── llm_chat.html │ │ ├── simple_chat.ts │ │ └── worker.ts ├── streaming │ ├── README.md │ ├── package.json │ └── src │ │ ├── streaming.html │ │ └── streaming.ts ├── text-completion │ ├── README.md │ ├── package.json │ └── src │ │ ├── text_completion.html │ │ └── text_completion.ts └── vision-model │ ├── README.md │ ├── package.json │ └── src │ ├── utils.ts │ ├── vision_model.html │ ├── vision_model.ts │ └── worker.ts ├── jest.config.cjs ├── licenses └── license.openai_node.txt ├── package-lock.json ├── package.json ├── rollup.config.js ├── scripts ├── gh_deploy_site.sh ├── local_deploy_site.sh ├── prep_deps.sh └── serve_mlc_llm_dist.sh ├── site ├── .gitignore ├── _config.yml ├── _includes │ ├── arrow.svg │ ├── github.svg │ ├── head.html │ └── hero.html ├── assets │ ├── css │ │ └── hero.scss │ ├── img │ │ ├── fig │ │ │ ├── Pittsburgh.png │ │ │ └── web-llm.svg │ │ └── logo │ │ │ ├── catalyst.svg │ │ │ ├── cmuscs.png │ │ │ ├── mlc-logo-with-text-landscape.png │ │ │ ├── mlc-logo-with-text-landscape.svg │ │ │ ├── octoml.png │ │ │ ├── sjtu.png │ │ │ └── uw.jpg │ └── video │ │ ├── Code.mp4 │ │ ├── Code.webm │ │ ├── Pittsburgh.mp4 │ │ └── Pittsburgh.webm └── index.md ├── src ├── cache_util.ts ├── config.ts ├── conversation.ts ├── embedding.ts ├── engine.ts ├── error.ts ├── extension_service_worker.ts ├── index.ts ├── llm_chat.ts ├── message.ts ├── openai_api_protocols │ ├── chat_completion.ts │ ├── completion.ts │ ├── embedding.ts │ └── index.ts ├── service_worker.ts ├── support.ts ├── types.ts ├── utils.ts └── web_worker.ts ├── tests ├── constants.ts ├── conversation.test.ts ├── function_calling.test.ts ├── generation_config.test.ts ├── multi_round_chat.test.ts ├── openai_chat_completion.test.ts ├── openai_completion.test.ts ├── openai_embeddings.test.ts └── util.test.ts ├── tsconfig.json └── utils ├── .gitignore └── vram_requirements ├── .gitignore ├── README.md ├── package.json └── src ├── gh-config.js ├── vram_requirements.html └── vram_requirements.ts /.eslintignore: -------------------------------------------------------------------------------- 1 | dist 2 | debug 3 | lib 4 | build 5 | node_modules 6 | 3rdparty 7 | .eslintrc.cjs 8 | **/.next -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended', 'plugin:prettier/recommended'], 3 | parser: '@typescript-eslint/parser', 4 | plugins: ['@typescript-eslint'], 5 | root: true, 6 | rules: { 7 | "@typescript-eslint/no-explicit-any": "off", 8 | "@typescript-eslint/no-empty-function": "off", 9 | "@typescript-eslint/no-non-null-assertion": "off", 10 | }, 11 | overrides: [ 12 | { 13 | "files": ["examples/**/*.js", "examples/**/*.ts"], 14 | "rules": { 15 | "no-undef": "off", 16 | "@typescript-eslint/no-unused-vars": "off" 17 | } 18 | } 19 | ] 20 | }; 21 | -------------------------------------------------------------------------------- /.github/workflows/build-site.yaml: -------------------------------------------------------------------------------- 1 | name: Build site and push to gh-pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Build site 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Configuring build Environment 17 | run: | 18 | sudo apt-get update 19 | python -m pip install -U pip 20 | 21 | - name: Setup Ruby 22 | uses: ruby/setup-ruby@v1 23 | with: 24 | ruby-version: '3.0' 25 | 26 | - name: Installing dependencies 27 | run: | 28 | python -m pip install -r docs/requirements.txt 29 | gem install jekyll jekyll-remote-theme jekyll-sass-converter 30 | 31 | - name: Build and deploy site 32 | if: github.ref == 'refs/heads/main' 33 | run: | 34 | git remote set-url origin https://x-access-token:${{ secrets.MLC_GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY 35 | git config --global user.email "mlc-gh-actions-bot@nomail" 36 | git config --global user.name "mlc-gh-actions-bot" 37 | 38 | ./scripts/gh_deploy_site.sh -------------------------------------------------------------------------------- /.github/workflows/linter.yaml: -------------------------------------------------------------------------------- 1 | name: Linter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v3 17 | 18 | - name: Set up Node.js 19 | uses: actions/setup-node@v3 20 | with: 21 | node-version: '16' 22 | 23 | - name: Install dependencies 24 | run: npm install 25 | 26 | - name: Run lint 27 | run: npm run lint 28 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/.gitmodules -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | npx lint-staged 2 | -------------------------------------------------------------------------------- /.lintstagedrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "./**/*.{js,ts,jsx,tsx,json}": ["eslint --fix", "prettier --write"] 3 | } 4 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist 2 | debug 3 | lib 4 | build 5 | node_modules 6 | 3rdparty 7 | .eslintrc.cjs 8 | **/.next -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all" 3 | } 4 | -------------------------------------------------------------------------------- /3rdparty/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/3rdparty/.gitkeep -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | For security concerns or vulnerability reports, please send email to mlc-llm-private@googlegroups.com. 6 | -------------------------------------------------------------------------------- /cleanup-index-js.sh: -------------------------------------------------------------------------------- 1 | # Remove instances of string "const{createRequire:createRequire}=await import('module');" 2 | # This is required to allow background workers packaged with Parcel for the chrome extension 3 | # to run the `ChatModule`. 4 | sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i .backup lib/index.js 5 | sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i .backup lib/index.js.map 6 | 7 | # Replace string "new (require('u' + 'rl').URL)('file:' + __filename).href" with "MLC_DUMMY_PATH" 8 | # This is required for building nextJS projects -- its compile time would complain about `require()` 9 | # See https://github.com/mlc-ai/web-llm/issues/383 and the fixing PR's description for more. 10 | sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/"\"MLC_DUMMY_PATH\""/g -i .backup lib/index.js 11 | # Replace with \"MLC_DUMMY_PATH\" 12 | sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/'\\\"MLC_DUMMY_PATH\\\"'/g -i .backup lib/index.js.map 13 | 14 | # Replace "import require$$3 from 'perf_hooks';" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR"" 15 | # This is to prevent `perf_hooks` not found error 16 | # For more see https://github.com/mlc-ai/web-llm/issues/258 and https://github.com/mlc-ai/web-llm/issues/127 17 | sed -e s/"import require\$\$3 from 'perf_hooks';"/"const require\$\$3 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i .backup lib/index.js 18 | # Similarly replace `const performanceNode = require(\"perf_hooks\")` with `const performanceNode = \"MLC_DUMMY_REQUIRE_VAR\"` 19 | sed -e s/'require(\\\"perf_hooks\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i .backup lib/index.js.map 20 | 21 | # Below is added when we include dependency @mlc-ai/web-runtime, rather than using local tvm_home 22 | # Replace "import require$$4 from 'ws'" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR"" 23 | # This is to prevent error `Cannot find module 'ws'` 24 | sed -e s/"import require\$\$4 from 'ws';"/"const require\$\$4 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i .backup lib/index.js 25 | # Similarly replace `const WebSocket = require(\"ws\")` with `const WebSocket = \"MLC_DUMMY_REQUIRE_VAR\"` 26 | sed -e s/'require(\\\"ws\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i .backup lib/index.js.map 27 | 28 | # Cleanup backup files 29 | rm lib/index.js.backup 30 | rm lib/index.js.map.backup 31 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= python -m sphinx 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Documentation 2 | 3 | The documentation was built upon [Sphinx](https://www.sphinx-doc.org/en/master/). 4 | 5 | ## Dependencies 6 | 7 | Run the following command in this directory to install dependencies first: 8 | 9 | ```bash 10 | pip3 install -r requirements.txt 11 | ``` 12 | 13 | ## Build the Documentation 14 | 15 | Then you can build the documentation by running: 16 | 17 | ```bash 18 | make html 19 | ``` 20 | 21 | ## View the Documentation 22 | 23 | Run the following command to start a simple HTTP server: 24 | 25 | ```bash 26 | cd _build/html 27 | python3 -m http.server 28 | ``` 29 | 30 | Then you can view the documentation in your browser at `http://localhost:8000` (the port can be customized by appending ` -p PORT_NUMBER` in the python command above). 31 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | 5 | import tlcpack_sphinx_addon 6 | 7 | # -- General configuration ------------------------------------------------ 8 | 9 | sys.path.insert(0, os.path.abspath("../python")) 10 | sys.path.insert(0, os.path.abspath("../")) 11 | autodoc_mock_imports = ["torch"] 12 | 13 | # General information about the project. 14 | project = "web-llm" 15 | author = "WebLLM Contributors" 16 | copyright = "2023, %s" % author 17 | 18 | # Version information. 19 | 20 | version = "0.2.79" 21 | release = "0.2.79" 22 | 23 | extensions = [ 24 | "sphinx_tabs.tabs", 25 | "sphinx_toolbox.collapse", 26 | "sphinxcontrib.httpdomain", 27 | "sphinx.ext.autodoc", 28 | "sphinx.ext.napoleon", 29 | "sphinx_reredirects", 30 | ] 31 | 32 | redirects = {"get_started/try_out": "../index.html#getting-started"} 33 | 34 | source_suffix = [".rst"] 35 | 36 | language = "en" 37 | 38 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 39 | 40 | # The name of the Pygments (syntax highlighting) style to use. 41 | pygments_style = "sphinx" 42 | 43 | # A list of ignored prefixes for module index sorting. 44 | # If true, `todo` and `todoList` produce output, else they produce nothing. 45 | todo_include_todos = False 46 | 47 | # -- Options for HTML output ---------------------------------------------- 48 | 49 | # The theme is set by the make target 50 | import sphinx_rtd_theme 51 | 52 | html_theme = "sphinx_rtd_theme" 53 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 54 | 55 | templates_path = [] 56 | 57 | html_static_path = [] 58 | 59 | footer_copyright = "© 2023 MLC LLM" 60 | footer_note = " " 61 | 62 | html_logo = "_static/img/mlc-logo-with-text-landscape.svg" 63 | 64 | html_theme_options = { 65 | "logo_only": True, 66 | } 67 | 68 | header_links = [ 69 | ("Home", "https://webllm.mlc.ai/"), 70 | ("GitHub", "https://github.com/mlc-ai/web-llm"), 71 | ("Discord", "https://discord.gg/9Xpy2HGBuD"), 72 | ] 73 | 74 | header_dropdown = { 75 | "name": "Other Resources", 76 | "items": [ 77 | ("WebLLM Chat", "https://chat.webllm.ai/"), 78 | ("MLC Course", "https://mlc.ai/"), 79 | ("MLC Blog", "https://blog.mlc.ai/"), 80 | ("MLC LLM", "https://llm.mlc.ai/"), 81 | ], 82 | } 83 | 84 | html_context = { 85 | "footer_copyright": footer_copyright, 86 | "footer_note": footer_note, 87 | "header_links": header_links, 88 | "header_dropdown": header_dropdown, 89 | "display_github": True, 90 | "github_user": "mlc-ai", 91 | "github_repo": "web-llm", 92 | "github_version": "main/docs/", 93 | "theme_vcs_pageview_mode": "edit", 94 | # "header_logo": "/path/to/logo", 95 | # "header_logo_link": "", 96 | # "version_selecter": "", 97 | } 98 | 99 | 100 | # add additional overrides 101 | templates_path += [tlcpack_sphinx_addon.get_templates_path()] 102 | html_static_path += [tlcpack_sphinx_addon.get_static_path()] 103 | -------------------------------------------------------------------------------- /docs/developer/add_models.rst: -------------------------------------------------------------------------------- 1 | Adding Models 2 | ============= 3 | 4 | WebLLM allows you to compile custom language models using `MLC LLM `_ and then serve compiled model through WebLLM. 5 | 6 | For instructions of how to compile and add custom models to WebLLM, check the `MLC LLM documentation here `_. -------------------------------------------------------------------------------- /docs/developer/building_from_source.rst: -------------------------------------------------------------------------------- 1 | Building From Source 2 | ==================== 3 | 4 | Clone the Repository 5 | --------------------- 6 | .. code-block:: bash 7 | 8 | git clone https://github.com/mlc-ai/web-llm.git 9 | cd web-llm 10 | 11 | Install Dependencies 12 | --------------------- 13 | .. code-block:: bash 14 | 15 | npm install 16 | 17 | Build the Project 18 | ----------------- 19 | .. code-block:: bash 20 | 21 | npm run build 22 | 23 | Test Changes 24 | ------------ 25 | 26 | To test you changes, you can reuse any existing example or create a new example for your new functionality to test. 27 | 28 | Then, to test the effects of your code change in an example, inside ``examples//package.json``, change from ``"@mlc-ai/web-llm": "^0.2.xx"`` to ``"@mlc-ai/web-llm": ../...`` to let it reference you local code. 29 | 30 | .. code-block:: bash 31 | 32 | cd examples/ 33 | # Modify the package.json 34 | npm install 35 | npm start 36 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 👋 Welcome to WebLLM 2 | ==================== 3 | 4 | `GitHub `_ | `WebLLM Chat `_ | `NPM `_ | `Discord `_ 5 | 6 | WebLLM is a high-performance in-browser language model inference engine that brings large language models (LLMs) to web browsers with hardware acceleration. With WebGPU support, it allows developers to build AI-powered applications directly within the browser environment, removing the need for server-side processing and ensuring privacy. 7 | 8 | It provides a specialized runtime for the web backend of MLCEngine, leverages 9 | `WebGPU `_ for local acceleration, offers OpenAI-compatible API, 10 | and provides built-in support for web workers to separate heavy computation from the UI flow. 11 | 12 | Key Features 13 | ------------ 14 | - 🌐 In-Browser Inference: Run LLMs directly in the browser 15 | - 🚀 WebGPU Acceleration: Leverage hardware acceleration for optimal performance 16 | - 🔄 OpenAI API Compatibility: Seamless integration with standard AI workflows 17 | - 📦 Multiple Model Support: Works with Llama, Phi, Gemma, Mistral, and more 18 | 19 | Start exploring WebLLM by `chatting with WebLLM Chat `_, and start building webapps with high-performance local LLM inference with the following guides and tutorials. 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | :caption: User Guide 24 | 25 | user/get_started.rst 26 | user/basic_usage.rst 27 | user/advanced_usage.rst 28 | user/api_reference.rst 29 | 30 | .. toctree:: 31 | :maxdepth: 2 32 | :caption: Developer Guide 33 | 34 | developer/building_from_source.rst 35 | developer/add_models.rst 36 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-tabs == 3.4.1 2 | sphinx-rtd-theme 3 | sphinx == 5.2.3 4 | sphinx-toolbox == 3.4.0 5 | tlcpack-sphinx-addon==0.2.2 6 | sphinxcontrib_httpdomain==1.8.1 7 | sphinxcontrib-napoleon==0.7 8 | sphinx-reredirects==0.1.2 9 | -------------------------------------------------------------------------------- /docs/user/basic_usage.rst: -------------------------------------------------------------------------------- 1 | Basic Usage 2 | ================ 3 | 4 | Model Records in WebLLM 5 | ----------------------- 6 | 7 | Each of the model available WebLLM is registered as an instance of 8 | ``ModelRecord`` and can be accessed at 9 | `webllm.prebuiltAppConfig.model_list `__. 10 | 11 | Creating an MLCEngine 12 | --------------------- 13 | 14 | WebLLM APIs are exposed through the ``MLCEngine`` interface. You can create an ``MLCEngine`` instance and loading the model by calling the CreateMLCEngine() factory function. 15 | 16 | (Note that loading models requires downloading and it can take a significant amount of time for the very first run without caching previously. You should properly handle this asynchronous call.) 17 | 18 | ``MLCEngine`` can be instantiated in two ways: 19 | 1. Using the factory function ``CreateMLCEngine``. 20 | 2. Instantiating the ``MLCEngine`` class directly and using ``reload()`` to load models. 21 | 22 | .. code-block:: typescript 23 | 24 | import { CreateMLCEngine, MLCEngine } from "@mlc-ai/web-llm"; 25 | 26 | // Initialize with a progress callback 27 | const initProgressCallback = (progress) => { 28 | console.log("Model loading progress:", progress); 29 | }; 30 | 31 | // Using CreateMLCEngine 32 | const engine = await CreateMLCEngine("Llama-3.1-8B-Instruct", { initProgressCallback }); 33 | 34 | // Direct instantiation 35 | const engineInstance = new MLCEngine({ initProgressCallback }); 36 | await engineInstance.reload("Llama-3.1-8B-Instruct"); 37 | 38 | Under the hood, this factory function ``CreateMLCEngine`` does the following steps for first creating an engine instance (synchronous) and then loading the model (asynchronous). You can also do them separately in your application. 39 | 40 | .. code-block:: typescript 41 | 42 | import { MLCEngine } from "@mlc-ai/web-llm"; 43 | 44 | // This is a synchronous call that returns immediately 45 | const engine = new MLCEngine({ 46 | initProgressCallback: initProgressCallback 47 | }); 48 | 49 | // This is an asynchronous call and can take a long time to finish 50 | await engine.reload(selectedModel); 51 | 52 | 53 | Chat Completion 54 | --------------- 55 | 56 | Chat completions can be invoked using OpenAI style chat APIs through the ``engine.chat.completions`` interface of an initialized ``MLCEgnine``. For the full list of parameters and their descriptions, check :ref:`api-reference` for full list of parameters. 57 | 58 | (Note: As model is determined at the ``MLCEngine`` initialization time, ``model`` parameter is not supported and will be **ignored**. Instead, call ``CreateMLCEngine(model)`` or ``engine.reload(model)`` to reinitialize the engine to use a specific model.) 59 | 60 | .. code-block:: typescript 61 | 62 | const messages = [ 63 | { role: "system", content: "You are a helpful AI assistant." }, 64 | { role: "user", content: "Hello!" } 65 | ]; 66 | 67 | const reply = await engine.chat.completions.create({ 68 | messages, 69 | }); 70 | 71 | console.log(reply.choices[0].message); 72 | console.log(reply.usage); 73 | 74 | 75 | Streaming Chat Completion 76 | ------------------------- 77 | 78 | Streaming chat completion could be enabled by passsing ``stream: true`` parameter to the `engine.chat.completions.create` call configuration. Check :ref:`api-reference` for full list of parameters. 79 | 80 | .. code-block:: typescript 81 | 82 | const messages = [ 83 | { role: "system", content: "You are a helpful AI assistant." }, 84 | { role: "user", content: "Hello!" }, 85 | ] 86 | 87 | // Chunks is an AsyncGenerator object 88 | const chunks = await engine.chat.completions.create({ 89 | messages, 90 | temperature: 1, 91 | stream: true, // <-- Enable streaming 92 | stream_options: { include_usage: true }, 93 | }); 94 | 95 | let reply = ""; 96 | for await (const chunk of chunks) { 97 | reply += chunk.choices[0]?.delta.content || ""; 98 | console.log(reply); 99 | if (chunk.usage) { 100 | console.log(chunk.usage); // only last chunk has usage 101 | } 102 | } 103 | 104 | const fullReply = await engine.getMessage(); 105 | console.log(fullReply); 106 | 107 | 108 | Chatbot Examples 109 | ---------------- 110 | 111 | Learn how to use WebLLM to integrate large language models into your applications and generate chat completions through this simple Chatbot example: 112 | 113 | - `Example in JSFiddle `_ 114 | - `Example in CodePen `_ 115 | 116 | For an advanced example of a larger, more complicated project, check `WebLLM Chat `_. 117 | 118 | More examples for different use cases are available in the examples folder. 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs/user/get_started.rst: -------------------------------------------------------------------------------- 1 | Getting Started with WebLLM 2 | =========================== 3 | 4 | This guide will help you set up WebLLM in your project, install necessary dependencies, and verify your setup. 5 | 6 | 7 | WebLLM Chat 8 | ----------- 9 | 10 | If you want to experience AI Chat supported by local LLM inference and understand how WebLLM works, try out `WebLLM Chat `__, which provides a great example 11 | of integrating WebLLM into a full web application. 12 | 13 | A WebGPU-compatible browser is needed to run WebLLM-powered web applications. 14 | You can download the latest Google Chrome and use `WebGPU Report `__ 15 | to verify the functionality of WebGPU on your browser. 16 | 17 | Installation 18 | ------------ 19 | 20 | WebLLM offers a minimalist and modular interface to access the chatbot in the browser. The package is designed in a modular way to hook to any of the UI components. 21 | 22 | WebLLM is available as an `npm package `_ and is also CDN-delivered. Therefore, you can install WebLLM using Node.js pacakage managers like npm, yarn, or pnpm, or directly import the pacakge via CDN. 23 | 24 | Using Package Managers 25 | ^^^^^^^^^^^^^^^^^^^^^^ 26 | Install WebLLM via your preferred package manager: 27 | 28 | .. code-block:: bash 29 | 30 | # npm 31 | npm install @mlc-ai/web-llm 32 | # yarn 33 | yarn add @mlc-ai/web-llm 34 | # pnpm 35 | pnpm install @mlc-ai/web-llm 36 | 37 | Import WebLLM into your project: 38 | 39 | .. code-block:: javascript 40 | 41 | // Import everything 42 | import * as webllm from "@mlc-ai/web-llm"; 43 | 44 | // Or only import what you need 45 | import { CreateMLCEngine } from "@mlc-ai/web-llm"; 46 | 47 | Using CDN 48 | ^^^^^^^^^ 49 | Thanks to `jsdelivr.com `_, WebLLM can be imported directly through URL and work out-of-the-box on cloud development platforms like `jsfiddle.net `_, `Codepen.io `_, and `Scribbler `_: 50 | 51 | .. code-block:: javascript 52 | 53 | import * as webllm from "https://esm.run/@mlc-ai/web-llm"; 54 | 55 | This method is especially useful for online environments like CodePen, JSFiddle, or local experiments. 56 | 57 | Verifying Installation 58 | ^^^^^^^^^^^^^^^^^^^^^^ 59 | Run the following script to verify the installation: 60 | 61 | .. code-block:: javascript 62 | 63 | import { CreateMLCEngine } from "@mlc-ai/web-llm"; 64 | console.log("WebLLM loaded successfully!"); 65 | 66 | 67 | Online IDE Sandbox 68 | ------------------ 69 | 70 | Instead of setting WebLLM locally, you can also try it on online Javascript IDE sandboxes like: 71 | 72 | - `Example in JSFiddle `_ 73 | - `Example in CodePen `_ 74 | 75 | 76 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | package-lock.json 2 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Awesome WebLLM 2 | 3 | This page contains a curated list of examples, tutorials, blogs about WebLLM usecases. 4 | Please send a pull request if you find things that belongs to here. 5 | 6 | ## Example Projects 7 | 8 | Note that all examples below run in-browser and use WebGPU as a backend. 9 | 10 | #### Project List 11 | 12 | - [get-started](get-started): minimum get started example with chat completion. 13 | 14 | [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/yac9gbwf/) 15 | [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/NWVdgey) 16 | 17 | - [simple-chat-js](simple-chat-js): a mininum and complete chat bot app in vanilla JavaScript. 18 | 19 | [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/4nmgvsa2/) 20 | [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/vYwgZaG) 21 | 22 | - [simple-chat-ts](simple-chat-ts): a mininum and complete chat bot app in TypeScript. 23 | - [get-started-web-worker](get-started-web-worker): same as get-started, but using web worker. 24 | - [next-simple-chat](next-simple-chat): a mininum and complete chat bot app with [Next.js](https://nextjs.org/). 25 | - [multi-round-chat](multi-round-chat): while APIs are functional, we internally optimize so that multi round chat usage can reuse KV cache 26 | - [text-completion](text-completion): demonstrates API `engine.completions.create()`, which is pure text completion with no conversation, as opposed to `engine.chat.completions.create()` 27 | - [embeddings](embeddings): demonstrates API `engine.embeddings.create()`, integration with `EmbeddingsInterface` and `MemoryVectorStore` of [Langchain.js](js.langchain.com), and RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine 28 | - [multi-models](multi-models): demonstrates loading multiple models in a single engine concurrently 29 | 30 | #### Advanced OpenAI API Capabilities 31 | 32 | These examples demonstrate various capabilities via WebLLM's OpenAI-like API. 33 | 34 | - [streaming](streaming): return output as chunks in real-time in the form of an AsyncGenerator 35 | - [json-mode](json-mode): efficiently ensure output is in json format, see [OpenAI Reference](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) for more. 36 | - [json-schema](json-schema): besides guaranteeing output to be in JSON, ensure output to adhere to a specific JSON schema specified the user 37 | - [seed-to-reproduce](seed-to-reproduce): use seeding to ensure reproducible output with fields `seed`. 38 | - [function-calling](function-calling) (WIP): function calling with fields `tools` and `tool_choice` (with preliminary support). 39 | - [vision-model](vision-model): process request with image input using Vision Language Model (e.g. Phi3.5-vision) 40 | 41 | #### Chrome Extension 42 | 43 | - [chrome-extension](chrome-extension): chrome extension that does not have a persistent background 44 | - [chrome-extension-webgpu-service-worker](chrome-extension-webgpu-service-worker): chrome extension using service worker, hence having a persistent background 45 | 46 | #### Others 47 | 48 | - [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`. 49 | - [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and 50 | users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking 51 | whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc. 52 | - [simple-chat-upload](simple-chat-upload): demonstrates how to upload local models to WebLLM instead of downloading via a URL link 53 | 54 | ## Demo Spaces 55 | 56 | - [web-llm-embed](https://huggingface.co/spaces/matthoffner/web-llm-embed): document chat prototype using react-llm with transformers.js embeddings 57 | - [DeVinci](https://x6occ-biaaa-aaaai-acqzq-cai.icp0.io/): AI chat app based on WebLLM and hosted on decentralized cloud platform 58 | -------------------------------------------------------------------------------- /examples/abort-reload/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a demo for cancelling model fetching after calling `engine.reload()`. 4 | 5 | ```bash 6 | npm install 7 | npm start 8 | ``` 9 | 10 | Note if you would like to hack WebLLM core package. 11 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 12 | instruction in the project to build webllm locally. This option is only recommended 13 | if you would like to hack WebLLM core package. 14 | -------------------------------------------------------------------------------- /examples/abort-reload/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-started", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/get_started.html --port 8887", 7 | "build": "parcel build src/get_started.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/abort-reload/src/get_started.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/abort-reload/src/get_started.js: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | import { error } from "loglevel"; 3 | 4 | let engine; 5 | 6 | function setLabel(id, text) { 7 | const label = document.getElementById(id); 8 | if (label == null) { 9 | throw Error("Cannot find label " + id); 10 | } 11 | label.innerText = text; 12 | } 13 | 14 | async function main() { 15 | const initProgressCallback = (report) => { 16 | console.log(report.text); 17 | setLabel("init-label", report.text); 18 | }; 19 | // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts` 20 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 21 | engine = new webllm.MLCEngine({ 22 | initProgressCallback, 23 | }); 24 | engine.reload(selectedModel); 25 | } 26 | main(); 27 | setTimeout(() => { 28 | console.log("calling unload"); 29 | engine.unload().catch((err) => { 30 | console.log(err); 31 | }); 32 | }, 5000); 33 | -------------------------------------------------------------------------------- /examples/cache-usage/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Cache Usage 2 | 3 | WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`. 4 | This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also 5 | demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc. 6 | 7 | For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser. 8 | 9 | To inspect the downloaded artifacts in your browser, open up developer console, go to application, 10 | and you will find the artifacts under either `IndexedDB` or `Cache storage`. 11 | 12 | To run the exapmle, you can do the following steps under this folder 13 | 14 | ```bash 15 | npm install 16 | npm start 17 | ``` 18 | 19 | Note if you would like to hack WebLLM core package. 20 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 21 | instruction in the project to build webllm locally. This option is only recommended 22 | if you would like to hack WebLLM core package. 23 | -------------------------------------------------------------------------------- /examples/cache-usage/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cache-usage", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/cache_usage.html --port 8888", 7 | "build": "parcel build src/cache_usage.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/cache-usage/src/cache_usage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 | 14 |

Prompt

15 | 16 | 17 |

Response

18 | 19 |
20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/cache-usage/src/cache_usage.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | const initProgressCallback = (report: webllm.InitProgressReport) => { 12 | setLabel("init-label", report.text); 13 | }; 14 | 15 | async function main() { 16 | const appConfig = webllm.prebuiltAppConfig; 17 | // CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE 18 | appConfig.useIndexedDBCache = true; 19 | 20 | if (appConfig.useIndexedDBCache) { 21 | console.log("Using IndexedDB Cache"); 22 | } else { 23 | console.log("Using Cache API"); 24 | } 25 | 26 | // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache 27 | const selectedModel = "phi-2-q4f16_1-MLC"; 28 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 29 | selectedModel, 30 | { initProgressCallback: initProgressCallback, appConfig: appConfig }, 31 | ); 32 | 33 | const request: webllm.ChatCompletionRequest = { 34 | stream: false, 35 | messages: [ 36 | { 37 | role: "user", 38 | content: "Write an analogy between mathematics and a lighthouse.", 39 | }, 40 | ], 41 | n: 1, 42 | }; 43 | let reply = await engine.chat.completions.create(request); 44 | console.log(reply); 45 | 46 | // 2. Check whether model weights are cached 47 | let modelCached = await webllm.hasModelInCache(selectedModel, appConfig); 48 | console.log("hasModelInCache: ", modelCached); 49 | if (!modelCached) { 50 | throw Error("Expect hasModelInCache() to be true, but got: " + modelCached); 51 | } 52 | 53 | // 3. We reload, and we should see this time it is much faster because the weights are cached. 54 | console.log("Reload model start"); 55 | await engine.reload(selectedModel); 56 | console.log("Reload model end"); 57 | reply = await engine.chat.completions.create(request); 58 | console.log(reply); 59 | 60 | // 4. Delete every thing about this model from cache 61 | // You can also delete only the model library wasm, only the model weights, or only the config file 62 | await webllm.deleteModelAllInfoInCache(selectedModel, appConfig); 63 | modelCached = await webllm.hasModelInCache(selectedModel, appConfig); 64 | console.log("After deletion, hasModelInCache: ", modelCached); 65 | if (modelCached) { 66 | throw Error( 67 | "Expect hasModelInCache() to be false, but got: " + modelCached, 68 | ); 69 | } 70 | 71 | // 5. If we reload, we should expect the model to start downloading again 72 | console.log("Reload model start"); 73 | await engine.reload(selectedModel); 74 | console.log("Reload model end"); 75 | reply = await engine.chat.completions.create(request); 76 | console.log(reply); 77 | } 78 | 79 | main(); 80 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Chrome Extension using WebGPU Running on Service Worker 2 | 3 | ![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0) 4 | 5 | > [!WARNING] 6 | > Service worker support in WebGPU is enabled by default in [Chrome 124](https://chromiumdash.appspot.com/commit/8d78510e4aca5ac3cd8ee4a33e96b404eaa43246). 7 | > If you are using Chrome 123, go to `chrome://flags/#enable-experimental-web-platform-features`, enable the `#enable-experimental-web-platform-features` flag, and **relaunch the browser**. 8 | 9 | This example shows how we can create a Chrome extension using WebGPU and service worker. 10 | 11 | - The project structure is as follows: 12 | - `manifest.json`: A required file that lists important information about the structure and behavior of that extension. Here we are using manifest V3. 13 | - `popup.ts`: Script of the extension pop-up window. 14 | - `background.ts`: Script of the service worker. An extension service worker is loaded when it is needed, and unloaded when it goes dormant. 15 | - `content.js`: Content script that interacts with DOM. 16 | - Run 17 | 18 | ```bash 19 | npm install 20 | npm run build 21 | ``` 22 | 23 | This will create a new directory at `./dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `./dist/` directory. You can now pin the extension to your toolbar and use it to chat with your favorite model! 24 | 25 | **Note**: This example disables chatting using the contents of the active tab by default. 26 | To enable it, set `useContext` in `popup.ts` to `true`. More info about this feature can be found 27 | [here](https://github.com/mlc-ai/web-llm/pull/190). 28 | However, if the web content is too large, it might run into issues. We recommend using `example.html` to 29 | test this feature. 30 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chrome-extension", 3 | "version": "1.0.0", 4 | "description": "", 5 | "private": true, 6 | "scripts": { 7 | "build": "parcel build src/manifest.json --config @parcel/config-webextension" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "@parcel/config-webextension": "^2.9.3", 13 | "@types/chrome": "^0.0.242", 14 | "buffer": "^6.0.3", 15 | "parcel": "^2.9.3", 16 | "process": "^0.11.10", 17 | "url": "^0.11.1" 18 | }, 19 | "dependencies": { 20 | "@mlc-ai/web-llm": "^0.2.79", 21 | "progressbar.js": "^1.1.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/background.ts: -------------------------------------------------------------------------------- 1 | import { ExtensionServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 2 | 3 | // Hookup an engine to a service worker handler 4 | let handler; 5 | 6 | chrome.runtime.onConnect.addListener(function (port) { 7 | console.assert(port.name === "web_llm_service_worker"); 8 | if (handler === undefined) { 9 | handler = new ExtensionServiceWorkerMLCEngineHandler(port); 10 | } else { 11 | handler.setPort(port); 12 | } 13 | port.onMessage.addListener(handler.onmessage.bind(handler)); 14 | }); 15 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/content.js: -------------------------------------------------------------------------------- 1 | // Only the content script is able to access the DOM 2 | chrome.runtime.onConnect.addListener(function (port) { 3 | port.onMessage.addListener(function (msg) { 4 | port.postMessage({ contents: document.body.innerHTML }); 5 | }); 6 | }); 7 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/example.html: -------------------------------------------------------------------------------- 1 | In the year 2154, humanity had colonized several planets in the distant reaches 2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and 3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite 4 | these harsh conditions, a team of scientists had established a research station 5 | on the planet to study the unique geological formations and exotic flora and 6 | fauna. One day, while conducting a routine survey of the planet's surface, the 7 | team discovered an strange object buried deep in the ice. As they examined it 8 | closer, they realized it was a small, metallic capsule with a glowing blue 9 | symbol etched onto its surface. The team's leader, a brilliant scientist named 10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious 11 | origins. She ordered her team to bring it back to the research station for 12 | further analysis. After weeks of studying the capsule, the team finally cracked 13 | the code to the symbol etched onto its surface. It was a message from an alien 14 | race, warning Earth of an impending attack from an unknown threat. The team was 15 | shocked and dismayed by the news, but they knew they had to act quickly to warn 16 | the rest of humanity. They transmitted the message to the nearest space station, 17 | which relayed it to Earth's government. As the threat of attack loomed near, the 18 | team remained on high alert, ready to face whatever dangers lay ahead. They had 19 | uncovered a secrets of the universe, and now they were determined to protect 20 | their planet and its inhabitants at all costs. 21 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-128.png -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-16.png -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-32.png -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension-webgpu-service-worker/src/icons/icon-64.png -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 3, 3 | "name": "MLCBot", 4 | "version": "0.1.0", 5 | "description": "Chat with your browser", 6 | "icons": { 7 | "16": "icons/icon-16.png", 8 | "32": "icons/icon-32.png", 9 | "64": "icons/icon-64.png", 10 | "128": "icons/icon-128.png" 11 | }, 12 | "content_security_policy": { 13 | "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co" 14 | }, 15 | "action": { 16 | "default_title": "MLCBot", 17 | "default_popup": "popup.html" 18 | }, 19 | "content_scripts": [ 20 | { 21 | "matches": [""], 22 | "js": ["content.js"] 23 | } 24 | ], 25 | "background": { 26 | "service_worker": "background.ts", 27 | "type": "module" 28 | }, 29 | "permissions": ["storage", "tabs", "webNavigation"] 30 | } 31 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/popup.css: -------------------------------------------------------------------------------- 1 | *, 2 | *::before, 3 | *::after { 4 | margin: 0; 5 | padding: 0; 6 | box-sizing: border-box; 7 | } 8 | 9 | html { 10 | font-family: 11 | -apple-system, 12 | BlinkMacSystemFont, 13 | Segoe UI, 14 | Helvetica, 15 | Arial, 16 | sans-serif; 17 | color: #222; 18 | } 19 | 20 | body { 21 | margin: 0; 22 | padding: 0.5rem; 23 | background-color: #778da9; 24 | width: 320px; 25 | font-size: small; 26 | } 27 | 28 | p { 29 | margin: 0; 30 | } 31 | 32 | /* LOADING BAR */ 33 | #loadingContainer { 34 | margin-bottom: 15px; 35 | width: 300px; 36 | height: 8px; 37 | } 38 | 39 | /* INPUT AREA */ 40 | #query-input { 41 | border: 1px solid #ccc; 42 | border-radius: 4px; 43 | } 44 | 45 | .input-container { 46 | display: flex; 47 | flex-direction: row; 48 | align-items: center; 49 | } 50 | 51 | .input-container input { 52 | width: 100%; 53 | outline: none; 54 | padding: 0.5rem; 55 | margin-right: 0.5rem; 56 | } 57 | 58 | /* SUBMIT BUTTON */ 59 | .btn { 60 | background-color: #1b263b; 61 | color: white; 62 | font-size: small; 63 | cursor: pointer; 64 | border-radius: 4px; 65 | border: none; 66 | padding: 0.5rem; 67 | } 68 | 69 | .btn:hover { 70 | background-color: #d0d0d0; 71 | } 72 | 73 | .btn:disabled { 74 | background-color: #a7a7a7; 75 | color: rgb(255, 255, 255); 76 | cursor: default; 77 | } 78 | 79 | .btn img { 80 | width: 1rem; 81 | height: 1rem; 82 | } 83 | 84 | /* LOADING */ 85 | 86 | .stage { 87 | display: flex; 88 | justify-content: center; 89 | align-items: center; 90 | position: relative; 91 | margin: 0 -5%; 92 | overflow: hidden; 93 | } 94 | 95 | #loading-indicator { 96 | display: none; 97 | color: white; 98 | margin-top: 0.5rem; 99 | } 100 | 101 | .dot-flashing { 102 | position: relative; 103 | width: 10px; 104 | height: 10px; 105 | border-radius: 5px; 106 | background-color: #1b263b; 107 | color: #1b263b; 108 | animation: dot-flashing 0.4s infinite linear alternate; 109 | animation-delay: 0.2s; 110 | } 111 | 112 | .dot-flashing::before, 113 | .dot-flashing::after { 114 | content: ""; 115 | display: inline-block; 116 | position: absolute; 117 | top: 0; 118 | } 119 | 120 | .dot-flashing::before { 121 | left: -15px; 122 | width: 10px; 123 | height: 10px; 124 | border-radius: 5px; 125 | background-color: #1b263b; 126 | color: #1b263b; 127 | animation: dot-flashing 0.4s infinite alternate; 128 | animation-delay: 0s; 129 | } 130 | 131 | .dot-flashing::after { 132 | left: 15px; 133 | width: 10px; 134 | height: 10px; 135 | border-radius: 5px; 136 | background-color: #1b263b; 137 | color: #1b263b; 138 | animation: dot-flashing 0.4s infinite alternate; 139 | animation-delay: 0.4s; 140 | } 141 | 142 | @keyframes dot-flashing { 143 | 0% { 144 | background-color: #1b263b; 145 | } 146 | 147 | 50%, 148 | 100% { 149 | background-color: #415a77; 150 | } 151 | } 152 | 153 | /* ANSWERS */ 154 | #queriesAnswersContainer { 155 | display: block; 156 | color: white; 157 | margin-top: 0.5rem; 158 | } 159 | 160 | #answer { 161 | color: #333333; 162 | } 163 | 164 | #answerWrapper { 165 | display: none; 166 | background-color: #ffd166; 167 | border-radius: 8px; 168 | padding: 0.5rem; 169 | margin-top: 0.5rem; 170 | } 171 | 172 | .queriesAnswers { 173 | border-radius: 8px; 174 | background-color: #ffd166; 175 | padding: 0.5rem; 176 | color: #333333; 177 | } 178 | 179 | #lastQuery { 180 | color: rgb(188, 188, 188); 181 | } 182 | 183 | #lastAnswer { 184 | color: white; 185 | margin-top: 0.5rem; 186 | } 187 | 188 | #lastRequest { 189 | padding: 0.5rem; 190 | margin-top: 0.5rem; 191 | background-color: #333333; 192 | border-radius: 4px; 193 | } 194 | 195 | /* ANSWER OPTIONS */ 196 | .timeStamp { 197 | color: #9a8c98; 198 | } 199 | 200 | .copyRow { 201 | display: flex; 202 | flex-direction: row; 203 | align-items: end; 204 | justify-content: space-between; 205 | color: #a7a7a7; 206 | margin-top: 0.5rem; 207 | } 208 | 209 | .copyText { 210 | display: none; 211 | color: #a7a7a7; 212 | margin-right: 0.5rem; 213 | } 214 | 215 | .copyButton { 216 | color: #415a77; 217 | background-color: transparent; 218 | border: none; 219 | cursor: pointer; 220 | padding: 0; 221 | margin-left: 0.5rem; 222 | } 223 | 224 | .copyButton:hover { 225 | color: #5e80a7; 226 | background-color: transparent; 227 | } 228 | 229 | .removeButton { 230 | color: #415a77; 231 | background-color: transparent; 232 | border: none; 233 | cursor: pointer; 234 | padding: 0; 235 | } 236 | 237 | .removeButton:hover { 238 | color: #5e80a7; 239 | background-color: transparent; 240 | } 241 | -------------------------------------------------------------------------------- /examples/chrome-extension-webgpu-service-worker/src/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chatbot 6 | 7 | 11 | 12 | 13 |
14 | 15 |
16 | 21 | 24 |
25 | 26 |
27 |
28 |
29 | 30 |
31 |
32 |
33 | 34 | 41 |
42 |
43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /examples/chrome-extension/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Chrome Extension 2 | 3 | ![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0) 4 | 5 | To run the extension, do the following steps under this folder 6 | 7 | ```bash 8 | npm install 9 | npm run build 10 | ``` 11 | 12 | This will create a new directory at `chrome-extension/dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `chrome-extension/dist/` directory. You can now pin the extension to your toolbar and use the drop-down menu to chat with your favorite model! 13 | -------------------------------------------------------------------------------- /examples/chrome-extension/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chrome-extension", 3 | "version": "1.0.1", 4 | "description": "", 5 | "private": true, 6 | "scripts": { 7 | "build": "parcel build src/manifest.json --config @parcel/config-webextension" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "@parcel/config-webextension": "^2.9.3", 13 | "@types/chrome": "^0.0.242", 14 | "buffer": "^6.0.3", 15 | "parcel": "^2.9.3", 16 | "process": "^0.11.10", 17 | "url": "^0.11.1" 18 | }, 19 | "dependencies": { 20 | "@mlc-ai/web-llm": "^0.2.79", 21 | "progressbar.js": "^1.1.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/content.js: -------------------------------------------------------------------------------- 1 | // Only the content script is able to access the DOM 2 | chrome.runtime.onConnect.addListener(function (port) { 3 | port.onMessage.addListener(function (msg) { 4 | port.postMessage({ contents: document.body.innerText }); 5 | }); 6 | }); 7 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/example.html: -------------------------------------------------------------------------------- 1 | In the year 2154, humanity had colonized several planets in the distant reaches 2 | of the galaxy. The planet of Xylophia-IV was one of the most remote and 3 | inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite 4 | these harsh conditions, a team of scientists had established a research station 5 | on the planet to study the unique geological formations and exotic flora and 6 | fauna. One day, while conducting a routine survey of the planet's surface, the 7 | team discovered an strange object buried deep in the ice. As they examined it 8 | closer, they realized it was a small, metallic capsule with a glowing blue 9 | symbol etched onto its surface. The team's leader, a brilliant scientist named 10 | Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious 11 | origins. She ordered her team to bring it back to the research station for 12 | further analysis. After weeks of studying the capsule, the team finally cracked 13 | the code to the symbol etched onto its surface. It was a message from an alien 14 | race, warning Earth of an impending attack from an unknown threat. The team was 15 | shocked and dismayed by the news, but they knew they had to act quickly to warn 16 | the rest of humanity. They transmitted the message to the nearest space station, 17 | which relayed it to Earth's government. As the threat of attack loomed near, the 18 | team remained on high alert, ready to face whatever dangers lay ahead. They had 19 | uncovered a secrets of the universe, and now they were determined to protect 20 | their planet and its inhabitants at all costs. 21 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/icons/icon-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-128.png -------------------------------------------------------------------------------- /examples/chrome-extension/src/icons/icon-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-16.png -------------------------------------------------------------------------------- /examples/chrome-extension/src/icons/icon-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-32.png -------------------------------------------------------------------------------- /examples/chrome-extension/src/icons/icon-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/chrome-extension/src/icons/icon-64.png -------------------------------------------------------------------------------- /examples/chrome-extension/src/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 3, 3 | "name": "MLCBot", 4 | "version": "0.1.1", 5 | "description": "Chat with your browser", 6 | "icons": { 7 | "16": "icons/icon-16.png", 8 | "32": "icons/icon-32.png", 9 | "64": "icons/icon-64.png", 10 | "128": "icons/icon-128.png" 11 | }, 12 | "content_security_policy": { 13 | "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co" 14 | }, 15 | "action": { 16 | "default_title": "MLCBot", 17 | "default_popup": "popup.html" 18 | }, 19 | "content_scripts": [ 20 | { 21 | "matches": [""], 22 | "js": ["content.js"] 23 | } 24 | ], 25 | "permissions": ["storage", "tabs", "webNavigation", "activeTab", "scripting"], 26 | "host_permissions": ["http://*/", "https://*/"] 27 | } 28 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/manifest_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | "name": "MLCBot", 4 | "version": "0.1.0", 5 | "description": "Chat with your browser", 6 | "icons": { 7 | "16": "icons/icon-16.png", 8 | "32": "icons/icon-32.png", 9 | "64": "icons/icon-64.png", 10 | "128": "icons/icon-128.png" 11 | }, 12 | "content_security_policy": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'unsafe-eval' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co", 13 | "browser_action": { 14 | "default_popup": "popup.html" 15 | }, 16 | "content_scripts": [ 17 | { 18 | "matches": [""], 19 | "js": ["content.js"] 20 | } 21 | ], 22 | "permissions": ["storage", "tabs", "webNavigation", "activeTab"] 23 | } 24 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/popup.css: -------------------------------------------------------------------------------- 1 | *, 2 | *::before, 3 | *::after { 4 | margin: 0; 5 | padding: 0; 6 | box-sizing: border-box; 7 | } 8 | 9 | html { 10 | font-family: 11 | -apple-system, 12 | BlinkMacSystemFont, 13 | Segoe UI, 14 | Helvetica, 15 | Arial, 16 | sans-serif; 17 | color: #222; 18 | } 19 | 20 | body { 21 | margin: 0; 22 | padding: 0.5rem; 23 | background-color: #778da9; 24 | width: 335px; 25 | font-size: small; 26 | } 27 | 28 | p { 29 | margin: 0; 30 | } 31 | 32 | /* LOADING BAR */ 33 | #loadingContainer { 34 | margin-bottom: 15px; 35 | width: 315px; 36 | height: 8px; 37 | } 38 | 39 | /* INPUT AREA */ 40 | #query-input { 41 | border: 1px solid #ccc; 42 | border-radius: 4px; 43 | } 44 | 45 | .input-container { 46 | display: flex; 47 | flex-direction: row; 48 | align-items: center; 49 | } 50 | 51 | .input-container input { 52 | width: 100%; 53 | outline: none; 54 | padding: 0.5rem; 55 | margin-right: 0.5rem; 56 | } 57 | 58 | /* BUTTON */ 59 | .btn { 60 | background-color: #1b263b; 61 | color: white; 62 | font-size: small; 63 | cursor: pointer; 64 | border-radius: 4px; 65 | border: none; 66 | padding: 0.5rem; 67 | } 68 | 69 | .btn:hover { 70 | background-color: #d0d0d0; 71 | } 72 | 73 | .btn:disabled { 74 | background-color: #a7a7a7; 75 | color: rgb(255, 255, 255); 76 | cursor: default; 77 | } 78 | 79 | .btn img { 80 | width: 1rem; 81 | height: 1rem; 82 | } 83 | 84 | /* LOADING */ 85 | 86 | .stage { 87 | display: flex; 88 | justify-content: center; 89 | align-items: center; 90 | position: relative; 91 | margin: 0 -5%; 92 | overflow: hidden; 93 | } 94 | 95 | #loading-indicator { 96 | display: none; 97 | color: white; 98 | margin-top: 0.5rem; 99 | } 100 | 101 | .dot-flashing { 102 | position: relative; 103 | width: 10px; 104 | height: 10px; 105 | border-radius: 5px; 106 | background-color: #1b263b; 107 | color: #1b263b; 108 | animation: dot-flashing 0.4s infinite linear alternate; 109 | animation-delay: 0.2s; 110 | } 111 | 112 | .dot-flashing::before, 113 | .dot-flashing::after { 114 | content: ""; 115 | display: inline-block; 116 | position: absolute; 117 | top: 0; 118 | } 119 | 120 | .dot-flashing::before { 121 | left: -15px; 122 | width: 10px; 123 | height: 10px; 124 | border-radius: 5px; 125 | background-color: #1b263b; 126 | color: #1b263b; 127 | animation: dot-flashing 0.4s infinite alternate; 128 | animation-delay: 0s; 129 | } 130 | 131 | .dot-flashing::after { 132 | left: 15px; 133 | width: 10px; 134 | height: 10px; 135 | border-radius: 5px; 136 | background-color: #1b263b; 137 | color: #1b263b; 138 | animation: dot-flashing 0.4s infinite alternate; 139 | animation-delay: 0.4s; 140 | } 141 | 142 | @keyframes dot-flashing { 143 | 0% { 144 | background-color: #1b263b; 145 | } 146 | 147 | 50%, 148 | 100% { 149 | background-color: #415a77; 150 | } 151 | } 152 | 153 | /* ANSWERS */ 154 | #queriesAnswersContainer { 155 | display: block; 156 | color: white; 157 | margin-top: 0.5rem; 158 | } 159 | 160 | #answer { 161 | color: #333333; 162 | } 163 | 164 | #answerWrapper { 165 | display: none; 166 | background-color: #ffd166; 167 | border-radius: 8px; 168 | padding: 0.5rem; 169 | margin-top: 0.5rem; 170 | } 171 | 172 | .queriesAnswers { 173 | border-radius: 8px; 174 | background-color: #ffd166; 175 | padding: 0.5rem; 176 | color: #333333; 177 | } 178 | 179 | #lastQuery { 180 | color: rgb(188, 188, 188); 181 | } 182 | 183 | #lastAnswer { 184 | color: white; 185 | margin-top: 0.5rem; 186 | } 187 | 188 | #lastRequest { 189 | padding: 0.5rem; 190 | margin-top: 0.5rem; 191 | background-color: #333333; 192 | border-radius: 4px; 193 | } 194 | 195 | /* ANSWER OPTIONS */ 196 | .timeStamp { 197 | color: #9a8c98; 198 | } 199 | 200 | .copyRow { 201 | display: flex; 202 | flex-direction: row; 203 | align-items: end; 204 | justify-content: space-between; 205 | color: #a7a7a7; 206 | margin-top: 0.5rem; 207 | } 208 | 209 | .copyText { 210 | display: none; 211 | color: #a7a7a7; 212 | margin-right: 0.5rem; 213 | } 214 | 215 | .copyButton { 216 | color: #415a77; 217 | background-color: transparent; 218 | border: none; 219 | cursor: pointer; 220 | padding: 0; 221 | margin-left: 0.5rem; 222 | } 223 | 224 | .copyButton:hover { 225 | color: #5e80a7; 226 | background-color: transparent; 227 | } 228 | 229 | .removeButton { 230 | color: #415a77; 231 | background-color: transparent; 232 | border: none; 233 | cursor: pointer; 234 | padding: 0; 235 | } 236 | 237 | .removeButton:hover { 238 | color: #5e80a7; 239 | background-color: transparent; 240 | } 241 | -------------------------------------------------------------------------------- /examples/chrome-extension/src/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chatbot 6 | 7 | 11 | 12 | 13 | 14 |
15 |

Initializing model...

16 |
17 |
18 |

19 |
20 | 25 | 28 |
29 | 30 |
31 |
32 |
33 | 34 |
35 |
36 |
37 | 38 | 45 |
46 |
47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /examples/embeddings/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting. 4 | To try it out, you can do the following steps under this folder 5 | 6 | ```bash 7 | npm install 8 | npm start 9 | ``` 10 | 11 | Note if you would like to hack WebLLM core package. 12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 13 | instruction in the project to build webllm locally. This option is only recommended 14 | if you would like to hack WebLLM core package. 15 | -------------------------------------------------------------------------------- /examples/embeddings/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "embeddings-example", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/embeddings.html --port 8885", 7 | "build": "parcel build src/embeddings.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79", 19 | "langchain": "0.2.15" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /examples/embeddings/src/embeddings.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/function-calling/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos - Function calling 2 | 3 | This folder contains two main ways of using function calling with WebLLM. 4 | 5 | `function-calling-manual` demonstrates how you can use function calling with Llama3.1 and Hermes2 6 | without using the `tools`, `tool_choice`, and `tool_call` fields. This is the most flexible way and you can follow 7 | the instruction given by the model releaser and iterate yourself on top of that. However, you need to do parsing on your own, which differs for each model. For instance, Hermes2 models use `` and `` to wrap around a tool call, which may be very different from other models' format. 8 | 9 | `function-calling-openai` conforms to the OpenAI function calling usage, leveraging `tools`, `tool_choice`, and `tool_call` 10 | fields. This is more usable, but sacrifices the flexibility since we have pre-defined system prompt 11 | for this. 12 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-manual/README.md: -------------------------------------------------------------------------------- 1 | ### Demos - Function calling 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-manual/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-api", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/function_calling_manual.html --port 8888", 7 | "build": "parcel build src/function_calling_manual.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-manual/src/function_calling_manual.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-openai/README.md: -------------------------------------------------------------------------------- 1 | ### Demos - Function calling 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-openai/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-api", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/function_calling_openai.html --port 8888", 7 | "build": "parcel build src/function_calling_openai.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-openai/src/function_calling_openai.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/function-calling/function-calling-openai/src/function_calling_openai.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | async function main() { 12 | const initProgressCallback = (report: webllm.InitProgressReport) => { 13 | setLabel("init-label", report.text); 14 | }; 15 | const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC"; 16 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 17 | selectedModel, 18 | { initProgressCallback: initProgressCallback }, 19 | ); 20 | 21 | const tools: Array = [ 22 | { 23 | type: "function", 24 | function: { 25 | name: "get_current_weather", 26 | description: "Get the current weather in a given location", 27 | parameters: { 28 | type: "object", 29 | properties: { 30 | location: { 31 | type: "string", 32 | description: "The city and state, e.g. San Francisco, CA", 33 | }, 34 | unit: { type: "string", enum: ["celsius", "fahrenheit"] }, 35 | }, 36 | required: ["location"], 37 | }, 38 | }, 39 | }, 40 | ]; 41 | 42 | const request: webllm.ChatCompletionRequest = { 43 | stream: true, // works with stream as well, where the last chunk returns tool_calls 44 | stream_options: { include_usage: true }, 45 | messages: [ 46 | { 47 | role: "user", 48 | content: 49 | "What is the current weather in celsius in Pittsburgh and Tokyo?", 50 | }, 51 | ], 52 | tool_choice: "auto", 53 | tools: tools, 54 | }; 55 | 56 | if (!request.stream) { 57 | const reply0 = await engine.chat.completions.create(request); 58 | console.log(reply0.choices[0]); 59 | console.log(reply0.usage); 60 | } else { 61 | // If streaming, the last chunk returns tool calls 62 | const asyncChunkGenerator = await engine.chat.completions.create(request); 63 | let message = ""; 64 | let lastChunk: webllm.ChatCompletionChunk | undefined; 65 | let usageChunk: webllm.ChatCompletionChunk | undefined; 66 | for await (const chunk of asyncChunkGenerator) { 67 | console.log(chunk); 68 | message += chunk.choices[0]?.delta?.content || ""; 69 | setLabel("generate-label", message); 70 | if (!chunk.usage) { 71 | lastChunk = chunk; 72 | } 73 | usageChunk = chunk; 74 | } 75 | console.log(lastChunk!.choices[0].delta); 76 | console.log(usageChunk!.usage); 77 | } 78 | } 79 | 80 | main(); 81 | -------------------------------------------------------------------------------- /examples/get-started-web-worker/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started with WebWorker 2 | 3 | This folder provides a minimum demo to show WebLLM API using 4 | [WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers). 5 | The main benefit of web worker is that all ML workloads runs on a separate thread as a result 6 | will less likely block the UI. 7 | 8 | To try it out, you can do the following steps under this folder 9 | 10 | ```bash 11 | npm install 12 | npm start 13 | ``` 14 | 15 | Note if you would like to hack WebLLM core package. 16 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 17 | instruction in the project to build webllm locally. This option is only recommended 18 | if you would like to hack WebLLM core package. 19 | -------------------------------------------------------------------------------- /examples/get-started-web-worker/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-started-web-worker", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/get_started.html --port 8885", 7 | "build": "parcel build src/get_started.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^6.0.3", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/get-started-web-worker/src/get_started.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/get-started-web-worker/src/main.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | // There are two demonstrations, pick one to run 12 | 13 | /** 14 | * Chat completion (OpenAI style) without streaming, where we get the entire response at once. 15 | */ 16 | async function mainNonStreaming() { 17 | const initProgressCallback = (report: webllm.InitProgressReport) => { 18 | setLabel("init-label", report.text); 19 | }; 20 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 21 | 22 | const engine: webllm.MLCEngineInterface = 23 | await webllm.CreateWebWorkerMLCEngine( 24 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), 25 | selectedModel, 26 | { initProgressCallback: initProgressCallback }, 27 | ); 28 | 29 | const request: webllm.ChatCompletionRequest = { 30 | messages: [ 31 | { 32 | role: "system", 33 | content: 34 | "You are a helpful, respectful and honest assistant. " + 35 | "Be as happy as you can when speaking please. ", 36 | }, 37 | { role: "user", content: "Provide me three US states." }, 38 | { role: "assistant", content: "California, New York, Pennsylvania." }, 39 | { role: "user", content: "Two more please!" }, 40 | ], 41 | n: 3, 42 | temperature: 1.5, 43 | max_tokens: 256, 44 | }; 45 | 46 | const reply0 = await engine.chat.completions.create(request); 47 | console.log(reply0); 48 | 49 | console.log(reply0.usage); 50 | } 51 | 52 | /** 53 | * Chat completion (OpenAI style) with streaming, where delta is sent while generating response. 54 | */ 55 | async function mainStreaming() { 56 | const initProgressCallback = (report: webllm.InitProgressReport) => { 57 | setLabel("init-label", report.text); 58 | }; 59 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 60 | 61 | const engine: webllm.MLCEngineInterface = 62 | await webllm.CreateWebWorkerMLCEngine( 63 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), 64 | selectedModel, 65 | { initProgressCallback: initProgressCallback }, 66 | ); 67 | 68 | const request: webllm.ChatCompletionRequest = { 69 | stream: true, 70 | stream_options: { include_usage: true }, 71 | messages: [ 72 | { 73 | role: "system", 74 | content: 75 | "You are a helpful, respectful and honest assistant. " + 76 | "Be as happy as you can when speaking please. ", 77 | }, 78 | { role: "user", content: "Provide me three US states." }, 79 | { role: "assistant", content: "California, New York, Pennsylvania." }, 80 | { role: "user", content: "Two more please!" }, 81 | ], 82 | temperature: 1.5, 83 | max_tokens: 256, 84 | }; 85 | 86 | const asyncChunkGenerator = await engine.chat.completions.create(request); 87 | let message = ""; 88 | for await (const chunk of asyncChunkGenerator) { 89 | console.log(chunk); 90 | message += chunk.choices[0]?.delta?.content || ""; 91 | setLabel("generate-label", message); 92 | if (chunk.usage) { 93 | console.log(chunk.usage); // only last chunk has usage 94 | } 95 | // engine.interruptGenerate(); // works with interrupt as well 96 | } 97 | console.log("Final message:\n", await engine.getMessage()); // the concatenated message 98 | } 99 | 100 | // Run one of the function below 101 | // mainNonStreaming(); 102 | mainStreaming(); 103 | -------------------------------------------------------------------------------- /examples/get-started-web-worker/src/worker.ts: -------------------------------------------------------------------------------- 1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 2 | 3 | // Hookup an engine to a worker handler 4 | const handler = new WebWorkerMLCEngineHandler(); 5 | self.onmessage = (msg: MessageEvent) => { 6 | handler.onmessage(msg); 7 | }; 8 | -------------------------------------------------------------------------------- /examples/get-started/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting. 4 | To try it out, you can do the following steps under this folder 5 | 6 | ```bash 7 | npm install 8 | npm start 9 | ``` 10 | 11 | Note if you would like to hack WebLLM core package. 12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 13 | instruction in the project to build webllm locally. This option is only recommended 14 | if you would like to hack WebLLM core package. 15 | -------------------------------------------------------------------------------- /examples/get-started/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-started", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/get_started.html --port 8888", 7 | "build": "parcel build src/get_started.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/get-started/src/get_started.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/get-started/src/get_started.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | async function main() { 12 | const initProgressCallback = (report: webllm.InitProgressReport) => { 13 | setLabel("init-label", report.text); 14 | }; 15 | // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts` 16 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 17 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 18 | selectedModel, 19 | { 20 | initProgressCallback: initProgressCallback, 21 | logLevel: "INFO", // specify the log level 22 | }, 23 | // customize kv cache, use either context_window_size or sliding_window_size (with attention sink) 24 | { 25 | context_window_size: 2048, 26 | // sliding_window_size: 1024, 27 | // attention_sink_size: 4, 28 | }, 29 | ); 30 | 31 | // Option 2: Specify your own model other than the prebuilt ones 32 | // const appConfig: webllm.AppConfig = { 33 | // model_list: [ 34 | // { 35 | // model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC", 36 | // model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC", 37 | // model_lib: 38 | // webllm.modelLibURLPrefix + 39 | // webllm.modelVersion + 40 | // "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", 41 | // overrides: { 42 | // context_window_size: 2048, 43 | // }, 44 | // }, 45 | // ], 46 | // }; 47 | // const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 48 | // selectedModel, 49 | // { appConfig: appConfig, initProgressCallback: initProgressCallback }, 50 | // ); 51 | 52 | // Option 3: Instantiate MLCEngine() and call reload() separately 53 | // const engine: webllm.MLCEngineInterface = new webllm.MLCEngine({ 54 | // appConfig: appConfig, // if do not specify, we use webllm.prebuiltAppConfig 55 | // initProgressCallback: initProgressCallback, 56 | // }); 57 | // await engine.reload(selectedModel); 58 | 59 | const reply0 = await engine.chat.completions.create({ 60 | messages: [{ role: "user", content: "List three US states." }], 61 | // below configurations are all optional 62 | n: 3, 63 | temperature: 1.5, 64 | max_tokens: 256, 65 | // 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct 66 | // So we would have a higher chance of seeing the latter two, but never the first in the answer 67 | logit_bias: { 68 | "46510": -100, 69 | "7188": -100, 70 | "8421": 5, 71 | "51325": 5, 72 | }, 73 | logprobs: true, 74 | top_logprobs: 2, 75 | }); 76 | console.log(reply0); 77 | console.log(reply0.usage); 78 | 79 | // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)` 80 | } 81 | 82 | main(); 83 | -------------------------------------------------------------------------------- /examples/json-mode/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/json-mode/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-api", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/json_mode.html --port 8888", 7 | "build": "parcel build src/json_mode.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/json-mode/src/json_mode.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output. 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/json-mode/src/json_mode.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | async function main() { 12 | const initProgressCallback = (report: webllm.InitProgressReport) => { 13 | setLabel("init-label", report.text); 14 | }; 15 | // Pick any one of these models to start trying -- most models in WebLLM support grammar 16 | const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC"; 17 | // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC"; 18 | // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC"; 19 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 20 | selectedModel, 21 | { initProgressCallback: initProgressCallback }, 22 | ); 23 | // Note that you'd need to prompt the model to answer in JSON either in 24 | // user's message or the system prompt 25 | const request: webllm.ChatCompletionRequest = { 26 | stream: false, // works with streaming, logprobs, top_logprobs as well 27 | messages: [ 28 | { 29 | role: "user", 30 | content: "Write a short JSON file introducing yourself.", 31 | }, 32 | ], 33 | n: 2, 34 | max_tokens: 128, 35 | response_format: { type: "json_object" } as webllm.ResponseFormat, 36 | }; 37 | 38 | const reply0 = await engine.chatCompletion(request); 39 | console.log(reply0); 40 | console.log("First reply's last choice:\n" + (await engine.getMessage())); 41 | console.log(reply0.usage); 42 | } 43 | 44 | main(); 45 | -------------------------------------------------------------------------------- /examples/json-schema/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/json-schema/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-api", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/json_schema.html --port 8885", 7 | "build": "parcel build src/json_schema.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/json-schema/src/json_schema.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output. 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/logit-processor/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Logit Processor and Low-Level API Example 2 | 3 | This folder explains the usage of `LogitProcessor`, demonstrating how it can be used to 4 | manipulate the raw logits before sampling the token (e.g. setting certain tokens to `inf` or `-inf`). 5 | We demonstrate how to use it with and without a web worker, which can be toggled with `USE_WEB_WORKER` 6 | in `logit_processor.ts` (see `worker.ts` on how `LogitProcessor` plays a role there). 7 | 8 | We also demonstrate the usage of a low-level API `forwardTokenAndSample()`, which, unlike `chat.completions.create()` 9 | that assumes the usage is for autoregressive chatting, here we have more fine-grained control. 10 | 11 | See `my_logit_processor.ts` on how to customize your own logit processor. Here we make the logit 12 | of token 0 `100.0` manually, large enough that we should expect to always sample token 0, which 13 | is indeed the case if we observe the console log. We also demonstarte that a LogitProcessor can be 14 | stateful, and the state can also be cleaned with `LogitProcessor.resetState()`. 15 | 16 | To try it out, you can do the following steps under this folder 17 | 18 | ```bash 19 | npm install 20 | npm start 21 | ``` 22 | 23 | Note if you would like to hack WebLLM core package, you can change web-llm dependencies as `"file:../.."`, and follow the build from source instruction in the project to build webllm locally. This option is only recommended if you would like to hack WebLLM core package. 24 | -------------------------------------------------------------------------------- /examples/logit-processor/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "logit-processor", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/logit_processor.html --port 8885", 7 | "build": "parcel build src/logit_processor.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/logit-processor/src/logit_processor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Logit Processor Test Page

9 | Open console to see the effect of your logit processor. 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/logit-processor/src/logit_processor.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | import { MyLogitProcessor } from "./my_logit_processor"; 3 | 4 | const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker 5 | const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test 6 | 7 | function setLabel(id: string, text: string) { 8 | const label = document.getElementById(id); 9 | if (label == null) { 10 | throw Error("Cannot find label " + id); 11 | } 12 | label.innerText = text; 13 | } 14 | 15 | async function main() { 16 | const initProgressCallback = (report: webllm.InitProgressReport) => { 17 | setLabel("init-label", report.text); 18 | }; 19 | // Instantiate myLogitProcessor, registering in the logitProcessorRegistry 20 | const myLogitProcessor = new MyLogitProcessor(); 21 | const logitProcessorRegistry = new Map(); 22 | logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor); 23 | 24 | let engine: webllm.MLCEngineInterface; 25 | 26 | // Depending on whether we use a web worker, the code is slightly different 27 | if (USE_WEB_WORKER) { 28 | // see worker.ts on how LogitProcessor plays a role there 29 | engine = await webllm.CreateWebWorkerMLCEngine( 30 | new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), 31 | "phi-2-q4f32_1-MLC", 32 | { initProgressCallback: initProgressCallback }, 33 | ); 34 | } else { 35 | engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", { 36 | initProgressCallback: initProgressCallback, 37 | logitProcessorRegistry: logitProcessorRegistry, 38 | }); 39 | } 40 | 41 | // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()` 42 | const prompt: Array = [42]; 43 | let nextToken = await engine.forwardTokensAndSample( 44 | prompt, 45 | /*isPrefill=*/ true, 46 | ); 47 | console.log(nextToken); 48 | 49 | let counter = prompt.length; 50 | while (counter < AUTOREGRESS_LIMIT) { 51 | counter += 1; 52 | nextToken = await engine.forwardTokensAndSample( 53 | [nextToken], 54 | /*isPrefill=*/ false, 55 | ); 56 | console.log(nextToken); 57 | } 58 | 59 | // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState() 60 | engine.resetChat(); 61 | counter = prompt.length; 62 | nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true); 63 | console.log(nextToken); 64 | while (counter < AUTOREGRESS_LIMIT) { 65 | counter += 1; 66 | nextToken = await engine.forwardTokensAndSample( 67 | [nextToken], 68 | /*isPrefill=*/ false, 69 | ); 70 | console.log(nextToken); 71 | } 72 | 73 | // `forwardTokensAndSample()` is made compatible with registering runtime stats. 74 | console.log(await engine.runtimeStatsText()); 75 | } 76 | 77 | main(); 78 | -------------------------------------------------------------------------------- /examples/logit-processor/src/my_logit_processor.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | // Define LogitProcessor 4 | export class MyLogitProcessor implements webllm.LogitProcessor { 5 | private tokenSequence: Array = []; 6 | 7 | processLogits(logits: Float32Array): Float32Array { 8 | logits[0] = 100.0; // should be enough so that we always sample token 0 below 9 | return logits; 10 | } 11 | 12 | processSampledToken(token: number): void { 13 | this.tokenSequence.push(token); 14 | console.log("processSampledToken: " + this.tokenSequence.length); 15 | } 16 | 17 | resetState(): void { 18 | this.tokenSequence = []; 19 | console.log("resetState"); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /examples/logit-processor/src/worker.ts: -------------------------------------------------------------------------------- 1 | // Serve the chat workload through web worker 2 | import * as webllm from "@mlc-ai/web-llm"; 3 | import { MyLogitProcessor } from "./my_logit_processor"; 4 | 5 | console.log("Use web worker for logit processor"); 6 | 7 | const myLogitProcessor = new MyLogitProcessor(); 8 | const logitProcessorRegistry = new Map(); 9 | logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor); 10 | 11 | const handler = new webllm.WebWorkerMLCEngineHandler(); 12 | handler.setLogitProcessorRegistry(logitProcessorRegistry); 13 | self.onmessage = (msg: MessageEvent) => { 14 | handler.onmessage(msg); 15 | }; 16 | -------------------------------------------------------------------------------- /examples/multi-models/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting. 4 | To try it out, you can do the following steps under this folder 5 | 6 | ```bash 7 | npm install 8 | npm start 9 | ``` 10 | 11 | Note if you would like to hack WebLLM core package. 12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 13 | instruction in the project to build webllm locally. This option is only recommended 14 | if you would like to hack WebLLM core package. 15 | -------------------------------------------------------------------------------- /examples/multi-models/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-started", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/multi_models.html --port 8888", 7 | "build": "parcel build src/multi_models.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/multi-models/src/multi_models.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt 1

14 | 15 | 16 |

Response from model 1

17 | 18 |
19 | 20 |

Prompt 2

21 | 22 | 23 |

Response from model 2

24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/multi-models/src/worker.ts: -------------------------------------------------------------------------------- 1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 2 | 3 | // Hookup an engine to a worker handler 4 | const handler = new WebWorkerMLCEngineHandler(); 5 | self.onmessage = (msg: MessageEvent) => { 6 | handler.onmessage(msg); 7 | }; 8 | -------------------------------------------------------------------------------- /examples/multi-round-chat/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/multi-round-chat/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-api", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/multi_round_chat.html --port 8888", 7 | "build": "parcel build src/multi_round_chat.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/multi-round-chat/src/multi_round_chat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/multi-round-chat/src/multi_round_chat.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | /** 12 | * We demonstrate multiround chatting. Though users are required to maintain chat history, internally 13 | * we compare provided `messages` with the internal chat history. If it matches, we will reuse KVs 14 | * and hence save computation -- essentially an implicit internal optimization. 15 | */ 16 | async function main() { 17 | const initProgressCallback = (report: webllm.InitProgressReport) => { 18 | setLabel("init-label", report.text); 19 | }; 20 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 21 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 22 | selectedModel, 23 | { initProgressCallback: initProgressCallback }, 24 | ); 25 | 26 | // Round 0 27 | const messages: webllm.ChatCompletionMessageParam[] = [ 28 | { 29 | role: "system", 30 | content: 31 | "You are a helpful, respectful and honest assistant. " + 32 | "Be as happy as you can when speaking please. ", 33 | }, 34 | { role: "user", content: "Provide me three US states." }, 35 | ]; 36 | 37 | const request0: webllm.ChatCompletionRequest = { 38 | stream: false, // can be streaming, same behavior 39 | messages: messages, 40 | }; 41 | 42 | const reply0 = await engine.chat.completions.create(request0); 43 | const replyMessage0 = await engine.getMessage(); 44 | console.log(reply0); 45 | console.log(replyMessage0); 46 | console.log(reply0.usage); 47 | 48 | // Round 1 49 | // Append generated response to messages 50 | messages.push({ role: "assistant", content: replyMessage0 }); 51 | // Append new user input 52 | messages.push({ role: "user", content: "Two more please!" }); 53 | // Below line would cause an internal reset (clear KV cache, etc.) since the history no longer 54 | // matches the new request 55 | // messages[0].content = "Another system prompt"; 56 | 57 | const request1: webllm.ChatCompletionRequest = { 58 | stream: false, // can be streaming, same behavior 59 | messages: messages, 60 | }; 61 | 62 | const reply1 = await engine.chat.completions.create(request1); 63 | const replyMessage1 = await engine.getMessage(); 64 | console.log(reply1); 65 | console.log(replyMessage1); 66 | console.log(reply1.usage); 67 | 68 | // If we used multiround chat, request1 should only prefill a small number of tokens 69 | const prefillTokens0 = reply0.usage?.prompt_tokens; 70 | const prefillTokens1 = reply1.usage?.prompt_tokens; 71 | console.log("Requset 0 prompt tokens: ", prefillTokens0); 72 | console.log("Requset 1 prompt tokens: ", prefillTokens1); 73 | if ( 74 | prefillTokens0 === undefined || 75 | prefillTokens1 === undefined || 76 | prefillTokens1 > prefillTokens0 77 | ) { 78 | throw Error("Multi-round chat is not triggered as expected."); 79 | } 80 | } 81 | 82 | main(); 83 | -------------------------------------------------------------------------------- /examples/next-simple-chat/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # local env files 28 | .env*.local 29 | 30 | # vercel 31 | .vercel 32 | 33 | # typescript 34 | *.tsbuildinfo 35 | next-env.d.ts 36 | -------------------------------------------------------------------------------- /examples/next-simple-chat/README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org/) project using web-llm. 2 | 3 | ## Getting Started 4 | 5 | First, install web-llm from source. 6 | 7 | Then, run the development server: 8 | 9 | ```bash 10 | npm run dev 11 | # or 12 | yarn dev 13 | # or 14 | pnpm dev 15 | ``` 16 | 17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 18 | -------------------------------------------------------------------------------- /examples/next-simple-chat/next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | 5 | webpack: (config, { isServer }) => { 6 | // Fixes npm packages that depend on `fs` module 7 | if (!isServer) { 8 | config.resolve.fallback = { 9 | ...config.resolve.fallback, // if you miss it, all the other options in fallback, specified 10 | // by next.js will be dropped. Doesn't make much sense, but how it is 11 | fs: false, // the solution 12 | module: false, 13 | perf_hooks: false, 14 | }; 15 | } 16 | 17 | return config; 18 | }, 19 | }; 20 | 21 | module.exports = nextConfig; 22 | -------------------------------------------------------------------------------- /examples/next-simple-chat/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "next-simple-chat", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@mlc-ai/web-llm": "^0.2.79", 13 | "@types/node": "20.3.3", 14 | "@types/react": "18.2.14", 15 | "@types/react-dom": "18.2.6", 16 | "autoprefixer": "10.4.14", 17 | "eslint": "8.44.0", 18 | "eslint-config-next": "13.4.7", 19 | "next": "^13.5.6", 20 | "postcss": "8.4.24", 21 | "react": "18.2.0", 22 | "react-dom": "18.2.0", 23 | "tailwindcss": "3.3.2", 24 | "typescript": "5.1.6" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/next-simple-chat/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /examples/next-simple-chat/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/next-simple-chat/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "~/styles/globals.css"; 2 | import type { AppProps } from "next/app"; 3 | 4 | export default function App({ Component, pageProps }: AppProps) { 5 | return ; 6 | } 7 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from "next/document"; 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/pages/api/hello.ts: -------------------------------------------------------------------------------- 1 | // Next.js API route support: https://nextjs.org/docs/api-routes/introduction 2 | import type { NextApiRequest, NextApiResponse } from "next"; 3 | 4 | type Data = { 5 | name: string; 6 | }; 7 | 8 | export default function handler( 9 | req: NextApiRequest, 10 | res: NextApiResponse, 11 | ) { 12 | res.status(200).json({ name: "John Doe" }); 13 | } 14 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import Head from "next/head"; 2 | import ChatComponent from "~/utils/chat_component"; 3 | import { Inter } from "next/font/google"; 4 | 5 | const inter = Inter({ subsets: ["latin"] }); 6 | 7 | export default function Home() { 8 | return ( 9 | <> 10 | 11 | Example App 12 | 16 | 17 | 18 |
21 | 22 |
23 | 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | --foreground-rgb: 0, 0, 0; 7 | --background-start-rgb: 214, 219, 220; 8 | --background-end-rgb: 255, 255, 255; 9 | } 10 | 11 | @media (prefers-color-scheme: dark) { 12 | :root { 13 | --foreground-rgb: 255, 255, 255; 14 | --background-start-rgb: 0, 0, 0; 15 | --background-end-rgb: 0, 0, 0; 16 | } 17 | } 18 | 19 | body { 20 | color: rgb(var(--foreground-rgb)); 21 | background: linear-gradient( 22 | to bottom, 23 | transparent, 24 | rgb(var(--background-end-rgb)) 25 | ) 26 | rgb(var(--background-start-rgb)); 27 | } 28 | 29 | a { 30 | color: inherit; 31 | text-decoration: none; 32 | } 33 | 34 | * { 35 | box-sizing: border-box; 36 | } 37 | 38 | chatui-chat { 39 | height: 100; 40 | } 41 | 42 | .chatui { 43 | display: flex; 44 | flex-flow: column wrap; 45 | justify-content: space-between; 46 | width: 100%; 47 | max-width: 867px; 48 | margin: 25px 10px; 49 | height: 600px; 50 | border: 2px solid #ddd; 51 | border-radius: 5px; 52 | box-shadow: 0 15px 15px -5px rgba(0, 0, 0, 0.2); 53 | } 54 | 55 | s .chatui-header { 56 | display: flex; 57 | justify-content: space-between; 58 | padding: 10px; 59 | border-bottom: 2px solid #ddd; 60 | background: #eee; 61 | color: #666; 62 | } 63 | 64 | .chatui-chat { 65 | flex: 1; 66 | overflow-y: auto; 67 | padding: 10px; 68 | } 69 | 70 | .chatui-chat::-webkit-scrollbar { 71 | width: 6px; 72 | } 73 | 74 | .chatui-chat::-webkit-scrollbar-track { 75 | background: #ddd; 76 | } 77 | 78 | .chatui-chat::-webkit-scrollbar-thumb { 79 | background: #bdbdbd; 80 | } 81 | 82 | .msg { 83 | display: flex; 84 | align-items: flex-end; 85 | margin-bottom: 10px; 86 | } 87 | 88 | .msg:last-of-type { 89 | margin: 0; 90 | } 91 | 92 | .msg-bubble { 93 | max-width: 450px; 94 | padding: 15px; 95 | border-radius: 15px; 96 | background: #ececec; 97 | } 98 | 99 | .left-msg .msg-bubble { 100 | border-bottom-left-radius: 0; 101 | } 102 | 103 | .error-msg .msg-bubble { 104 | border-bottom-left-radius: 0; 105 | color: #f15959; 106 | } 107 | 108 | .init-msg .msg-bubble { 109 | border-bottom-left-radius: 0; 110 | } 111 | 112 | .right-msg { 113 | flex-direction: row-reverse; 114 | } 115 | 116 | .right-msg .msg-bubble { 117 | background: #579ffb; 118 | color: #fff; 119 | border-bottom-right-radius: 0; 120 | } 121 | 122 | .chatui-inputarea { 123 | display: flex; 124 | padding: 10px; 125 | border-top: 2px solid #ddd; 126 | background: #eee; 127 | } 128 | 129 | .chatui-inputarea * { 130 | padding: 10px; 131 | border: none; 132 | border-radius: 3px; 133 | font-size: 1em; 134 | } 135 | 136 | .chatui-input { 137 | flex: 1; 138 | background: #ddd; 139 | } 140 | 141 | .chatui-btn { 142 | margin-left: 10px; 143 | background: #579ffb; 144 | color: #fff; 145 | font-weight: bold; 146 | cursor: pointer; 147 | padding: 10px; 148 | } 149 | 150 | .chatui-btn:hover { 151 | background: #577bfb; 152 | } 153 | 154 | .chatui-chat { 155 | background-color: #fcfcfe; 156 | } 157 | -------------------------------------------------------------------------------- /examples/next-simple-chat/src/utils/chat_component.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import { MLCEngine } from "@mlc-ai/web-llm"; 3 | import ChatUI from "~/utils/chat_ui"; 4 | 5 | const ChatComponent = () => { 6 | const [messages, setMessages] = useState<{ kind: string; text: string }[]>( 7 | [], 8 | ); 9 | const [prompt, setPrompt] = useState(""); 10 | const [runtimeStats, setRuntimeStats] = useState(""); 11 | const [chat_ui] = useState(new ChatUI(new MLCEngine())); 12 | const updateMessage = (kind: string, text: string, append: boolean) => { 13 | if (kind == "init") { 14 | text = "[System Initalize] " + text; 15 | } 16 | const msgCopy = [...messages]; 17 | if (msgCopy.length == 0 || append) { 18 | setMessages([...msgCopy, { kind, text }]); 19 | } else { 20 | msgCopy[msgCopy.length - 1] = { kind, text }; 21 | setMessages([...msgCopy]); 22 | } 23 | }; 24 | return ( 25 |
26 | 36 | 37 |
38 |
39 | {messages.map((value, index) => ( 40 |
41 |
42 |
${value.text}
43 |
44 |
45 | ))} 46 |
47 | 48 |
49 | { 55 | if (event.key === "Enter") { 56 | chat_ui 57 | .onGenerate(prompt, updateMessage, setRuntimeStats) 58 | .catch((error) => console.log(error)); 59 | } 60 | }} 61 | value={prompt} 62 | onChange={(event) => setPrompt(event.target.value)} 63 | /> 64 | 74 |
75 |
76 | 77 |
78 | 90 | 91 |
92 |
93 | ); 94 | }; 95 | 96 | export default ChatComponent; 97 | -------------------------------------------------------------------------------- /examples/next-simple-chat/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./src/pages/**/*.{js,ts,jsx,tsx,mdx}", 5 | "./src/components/**/*.{js,ts,jsx,tsx,mdx}", 6 | "./src/app/**/*.{js,ts,jsx,tsx,mdx}", 7 | ], 8 | theme: { 9 | extend: { 10 | backgroundImage: { 11 | "gradient-radial": "radial-gradient(var(--tw-gradient-stops))", 12 | "gradient-conic": 13 | "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))", 14 | }, 15 | }, 16 | }, 17 | plugins: [], 18 | }; 19 | -------------------------------------------------------------------------------- /examples/next-simple-chat/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "paths": { 18 | "~/*": ["./src/*"] 19 | } 20 | }, 21 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 22 | "exclude": ["node_modules"] 23 | } 24 | -------------------------------------------------------------------------------- /examples/qwen3/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos w/ Qwen3 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/qwen3/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "qwen3_example", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/qwen3_example.html --port 8883", 7 | "build": "parcel build src/qwen3_example.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/qwen3/src/qwen3_example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 |

Response

14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/seed-to-reproduce/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/seed-to-reproduce/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "seed-to-reproduce", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/seed.html --port 8888", 7 | "build": "parcel build src/seed.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/seed-to-reproduce/src/seed.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output. We make two generations with same seed, we 10 | should expect them to be the same. 11 |
12 |
13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/seed-to-reproduce/src/seed.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | /** 12 | * We domnstrate the effect of seeding. The prompt is about writing a poem and we use a high 13 | * `temperature`, making the sampling distribution supposedly more random. However, we demonstrate 14 | * that with seeding, we should see the exact same result being generated across two trials. 15 | * With `n > 1`, all choices should also be exactly the same. 16 | */ 17 | async function main() { 18 | const initProgressCallback = (report: webllm.InitProgressReport) => { 19 | setLabel("init-label", report.text); 20 | }; 21 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 22 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 23 | selectedModel, 24 | { initProgressCallback: initProgressCallback }, 25 | ); 26 | 27 | const request: webllm.ChatCompletionRequest = { 28 | stream: false, // works with streaming as well 29 | messages: [ 30 | { role: "user", content: "Write a creative Haiku about Pittsburgh" }, 31 | ], 32 | n: 3, 33 | temperature: 1.2, // high temperature gives much more random results 34 | max_tokens: 128, // To save time; enough to demonstrate the effect 35 | seed: 42, 36 | }; 37 | 38 | const reply0 = await engine.chat.completions.create(request); 39 | console.log(reply0); 40 | console.log("First reply's last choice:\n" + (await engine.getMessage())); 41 | console.log(reply0.usage); 42 | 43 | const reply1 = await engine.chat.completions.create(request); 44 | console.log(reply1); 45 | console.log("Second reply's last choice:\n" + (await engine.getMessage())); 46 | 47 | // Rigorously check the generation results of each choice for the two requests 48 | for (const choice0 of reply0.choices) { 49 | const id = choice0.index; 50 | const choice1 = reply1.choices[id]; 51 | if (choice0.message.content !== choice1.message.content) { 52 | throw Error( 53 | "Chocie " + 54 | id + 55 | " of the two generations are different despite seeding", 56 | ); 57 | } 58 | } 59 | 60 | console.log(reply1.usage); 61 | } 62 | 63 | // Run one of the functions 64 | main(); 65 | -------------------------------------------------------------------------------- /examples/service-worker/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Service Worker Example 2 | 3 | This example shows how we can create a page with Web-LLM running in service worker. 4 | 5 | ```bash 6 | npm install 7 | npm run build 8 | ``` 9 | -------------------------------------------------------------------------------- /examples/service-worker/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "web-llm-service-worker", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "rm -rf .parcel-cache && parcel src/index.html --port 3000", 7 | "build": "rm -rf .parcel-cache && parcel build src/index.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^6.0.3", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/service-worker/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/service-worker/src/main.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | const registerServiceWorker = async () => { 4 | if ("serviceWorker" in navigator) { 5 | try { 6 | const registration = await navigator.serviceWorker.register( 7 | new URL("sw.ts", import.meta.url), 8 | { type: "module" }, 9 | ); 10 | if (registration.installing) { 11 | console.log("Service worker installing"); 12 | } else if (registration.waiting) { 13 | console.log("Service worker installed"); 14 | } else if (registration.active) { 15 | console.log("Service worker active"); 16 | } 17 | } catch (error) { 18 | console.error(`Registration failed with ${error}`); 19 | } 20 | } 21 | }; 22 | 23 | function setLabel(id: string, text: string) { 24 | const label = document.getElementById(id); 25 | if (label == null) { 26 | throw Error("Cannot find label " + id); 27 | } 28 | label.innerText = text; 29 | } 30 | 31 | // There are two demonstrations, pick one to run 32 | 33 | /** 34 | * Chat completion (OpenAI style) without streaming, where we get the entire response at once. 35 | */ 36 | async function mainNonStreaming() { 37 | const initProgressCallback = (report: webllm.InitProgressReport) => { 38 | setLabel("init-label", report.text); 39 | }; 40 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 41 | 42 | const engine: webllm.MLCEngineInterface = 43 | await webllm.CreateServiceWorkerMLCEngine(selectedModel, { 44 | initProgressCallback: initProgressCallback, 45 | }); 46 | 47 | const request: webllm.ChatCompletionRequest = { 48 | messages: [ 49 | { 50 | role: "system", 51 | content: 52 | "You are a helpful, respectful and honest assistant. " + 53 | "Be as happy as you can when speaking please. ", 54 | }, 55 | { role: "user", content: "Provide me three US states." }, 56 | { role: "assistant", content: "California, New York, Pennsylvania." }, 57 | { role: "user", content: "Two more please!" }, 58 | ], 59 | n: 3, 60 | temperature: 1.5, 61 | max_tokens: 256, 62 | }; 63 | 64 | const reply0 = await engine.chat.completions.create(request); 65 | console.log(reply0); 66 | setLabel("generate-label", reply0.choices[0].message.content || ""); 67 | 68 | console.log(reply0.usage); 69 | } 70 | 71 | /** 72 | * Chat completion (OpenAI style) with streaming, where delta is sent while generating response. 73 | */ 74 | async function mainStreaming() { 75 | const initProgressCallback = (report: webllm.InitProgressReport) => { 76 | setLabel("init-label", report.text); 77 | }; 78 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 79 | 80 | const engine: webllm.ServiceWorkerMLCEngine = 81 | await webllm.CreateServiceWorkerMLCEngine(selectedModel, { 82 | initProgressCallback: initProgressCallback, 83 | }); 84 | 85 | const request: webllm.ChatCompletionRequest = { 86 | stream: true, 87 | stream_options: { include_usage: true }, 88 | messages: [ 89 | { 90 | role: "system", 91 | content: 92 | "You are a helpful, respectful and honest assistant. " + 93 | "Be as happy as you can when speaking please. ", 94 | }, 95 | { role: "user", content: "Provide me three US states." }, 96 | { role: "assistant", content: "California, New York, Pennsylvania." }, 97 | { role: "user", content: "Two more please!" }, 98 | ], 99 | temperature: 1.5, 100 | max_tokens: 256, 101 | }; 102 | 103 | const asyncChunkGenerator = await engine.chat.completions.create(request); 104 | let message = ""; 105 | for await (const chunk of asyncChunkGenerator) { 106 | console.log(chunk); 107 | message += chunk.choices[0]?.delta?.content || ""; 108 | setLabel("generate-label", message); 109 | if (chunk.usage) { 110 | console.log(chunk.usage); // only last chunk has usage 111 | } 112 | // engine.interruptGenerate(); // works with interrupt as well 113 | } 114 | console.log("Final message:\n", await engine.getMessage()); // the concatenated message 115 | } 116 | 117 | registerServiceWorker(); 118 | // Run one of the function below 119 | // mainNonStreaming(); 120 | mainStreaming(); 121 | -------------------------------------------------------------------------------- /examples/service-worker/src/sw.ts: -------------------------------------------------------------------------------- 1 | import { ServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 2 | 3 | let handler: ServiceWorkerMLCEngineHandler; 4 | 5 | self.addEventListener("activate", function (event) { 6 | handler = new ServiceWorkerMLCEngineHandler(); 7 | console.log("Web-LLM Service Worker Activated"); 8 | }); 9 | -------------------------------------------------------------------------------- /examples/simple-chat-js/index.css: -------------------------------------------------------------------------------- 1 | body, 2 | html { 3 | font-family: Arial, sans-serif; 4 | padding: 10px 20px; 5 | } 6 | 7 | .download-container { 8 | display: flex; 9 | justify-content: space-between; 10 | margin-bottom: 20px; 11 | } 12 | 13 | #download-status { 14 | border: solid 1px black; 15 | box-shadow: 16 | 0 10px 15px -3px rgba(0, 0, 0, 0.1), 17 | 0 4px 6px -2px rgba(0, 0, 0, 0.05); 18 | padding: 10px; 19 | } 20 | 21 | .chat-container { 22 | height: 400px; 23 | width: 100%; 24 | border: 2px solid black; 25 | display: flex; 26 | flex-direction: column; 27 | } 28 | 29 | .chat-box { 30 | overflow-y: scroll; 31 | background-color: #c3c3c3; 32 | border: 1px solid #ccc; 33 | padding: 5px; 34 | flex: 1 1; 35 | } 36 | 37 | .chat-stats { 38 | background-color: #d3eceb; 39 | flex: 0 0; 40 | padding: 10px; 41 | font-size: 0.75rem; 42 | } 43 | 44 | .message-container { 45 | width: 100%; 46 | display: flex; 47 | } 48 | 49 | .message { 50 | padding: 10px; 51 | margin: 10px 0; 52 | border-radius: 10px; 53 | width: fit-content; 54 | } 55 | 56 | .message-container.user { 57 | justify-content: end; 58 | } 59 | 60 | .message-container.assistant { 61 | justify-content: start; 62 | } 63 | 64 | .message-container.user .message { 65 | background: #007bff; 66 | color: #fff; 67 | } 68 | 69 | .message-container.assistant .message { 70 | background: #f1f0f0; 71 | color: #333; 72 | } 73 | 74 | .chat-input-container { 75 | min-height: 40px; 76 | flex: 0 0; 77 | display: flex; 78 | } 79 | 80 | #user-input { 81 | width: 70%; 82 | padding: 10px; 83 | border: 1px solid #ccc; 84 | } 85 | 86 | button { 87 | width: 25%; 88 | padding: 10px; 89 | border: none; 90 | background-color: #007bff; 91 | color: white; 92 | cursor: pointer; 93 | } 94 | 95 | button:disabled { 96 | background-color: lightgray; 97 | cursor: not-allowed; 98 | } 99 | 100 | button:hover:not(:disabled) { 101 | background-color: #0056b3; 102 | } 103 | 104 | .hidden { 105 | display: none; 106 | } 107 | -------------------------------------------------------------------------------- /examples/simple-chat-js/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Simple Chatbot 5 | 6 | 7 | 8 | 9 | 10 | 11 |

Step 1: Initialize WebLLM and Download Model

12 |
13 | 14 | 15 |
16 | 17 | 18 |

Step 2: Chat

19 |
20 |
21 | 22 |
23 | 24 | 25 |
26 |
27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/simple-chat-js/index.js: -------------------------------------------------------------------------------- 1 | import * as webllm from "https://esm.run/@mlc-ai/web-llm"; 2 | 3 | /*************** WebLLM logic ***************/ 4 | const messages = [ 5 | { 6 | content: "You are a helpful AI agent helping users.", 7 | role: "system", 8 | }, 9 | ]; 10 | 11 | const availableModels = webllm.prebuiltAppConfig.model_list.map( 12 | (m) => m.model_id, 13 | ); 14 | let selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-1k"; 15 | 16 | // Callback function for initializing progress 17 | function updateEngineInitProgressCallback(report) { 18 | console.log("initialize", report.progress); 19 | document.getElementById("download-status").textContent = report.text; 20 | } 21 | 22 | // Create engine instance 23 | const engine = new webllm.MLCEngine(); 24 | engine.setInitProgressCallback(updateEngineInitProgressCallback); 25 | 26 | async function initializeWebLLMEngine() { 27 | document.getElementById("download-status").classList.remove("hidden"); 28 | selectedModel = document.getElementById("model-selection").value; 29 | const config = { 30 | temperature: 1.0, 31 | top_p: 1, 32 | }; 33 | await engine.reload(selectedModel, config); 34 | } 35 | 36 | async function streamingGenerating(messages, onUpdate, onFinish, onError) { 37 | try { 38 | let curMessage = ""; 39 | let usage; 40 | const completion = await engine.chat.completions.create({ 41 | stream: true, 42 | messages, 43 | stream_options: { include_usage: true }, 44 | }); 45 | for await (const chunk of completion) { 46 | const curDelta = chunk.choices[0]?.delta.content; 47 | if (curDelta) { 48 | curMessage += curDelta; 49 | } 50 | if (chunk.usage) { 51 | usage = chunk.usage; 52 | } 53 | onUpdate(curMessage); 54 | } 55 | const finalMessage = await engine.getMessage(); 56 | onFinish(finalMessage, usage); 57 | } catch (err) { 58 | onError(err); 59 | } 60 | } 61 | 62 | /*************** UI logic ***************/ 63 | function onMessageSend() { 64 | const input = document.getElementById("user-input").value.trim(); 65 | const message = { 66 | content: input, 67 | role: "user", 68 | }; 69 | if (input.length === 0) { 70 | return; 71 | } 72 | document.getElementById("send").disabled = true; 73 | 74 | messages.push(message); 75 | appendMessage(message); 76 | 77 | document.getElementById("user-input").value = ""; 78 | document 79 | .getElementById("user-input") 80 | .setAttribute("placeholder", "Generating..."); 81 | 82 | const aiMessage = { 83 | content: "typing...", 84 | role: "assistant", 85 | }; 86 | appendMessage(aiMessage); 87 | 88 | const onFinishGenerating = (finalMessage, usage) => { 89 | updateLastMessage(finalMessage); 90 | document.getElementById("send").disabled = false; 91 | const usageText = 92 | `prompt_tokens: ${usage.prompt_tokens}, ` + 93 | `completion_tokens: ${usage.completion_tokens}, ` + 94 | `prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` + 95 | `decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`; 96 | document.getElementById("chat-stats").classList.remove("hidden"); 97 | document.getElementById("chat-stats").textContent = usageText; 98 | }; 99 | 100 | streamingGenerating( 101 | messages, 102 | updateLastMessage, 103 | onFinishGenerating, 104 | console.error, 105 | ); 106 | } 107 | 108 | function appendMessage(message) { 109 | const chatBox = document.getElementById("chat-box"); 110 | const container = document.createElement("div"); 111 | container.classList.add("message-container"); 112 | const newMessage = document.createElement("div"); 113 | newMessage.classList.add("message"); 114 | newMessage.textContent = message.content; 115 | 116 | if (message.role === "user") { 117 | container.classList.add("user"); 118 | } else { 119 | container.classList.add("assistant"); 120 | } 121 | 122 | container.appendChild(newMessage); 123 | chatBox.appendChild(container); 124 | chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message 125 | } 126 | 127 | function updateLastMessage(content) { 128 | const messageDoms = document 129 | .getElementById("chat-box") 130 | .querySelectorAll(".message"); 131 | const lastMessageDom = messageDoms[messageDoms.length - 1]; 132 | lastMessageDom.textContent = content; 133 | } 134 | 135 | /*************** UI binding ***************/ 136 | availableModels.forEach((modelId) => { 137 | const option = document.createElement("option"); 138 | option.value = modelId; 139 | option.textContent = modelId; 140 | document.getElementById("model-selection").appendChild(option); 141 | }); 142 | document.getElementById("model-selection").value = selectedModel; 143 | document.getElementById("download").addEventListener("click", function () { 144 | initializeWebLLMEngine().then(() => { 145 | document.getElementById("send").disabled = false; 146 | }); 147 | }); 148 | document.getElementById("send").addEventListener("click", function () { 149 | onMessageSend(); 150 | }); 151 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/.gitignore: -------------------------------------------------------------------------------- 1 | src/app-config.js 2 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/README.md: -------------------------------------------------------------------------------- 1 | # SimpleChat 2 | 3 | This folder provides a complete implementation of a simple 4 | chat app based on WebLLM. To try it out, you can do the following steps 5 | under this folder 6 | 7 | ```bash 8 | npm install 9 | npm start 10 | ``` 11 | 12 | Note if you would like to hack WebLLM core package. 13 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 14 | instruction in the project to build webllm locally. This option is only recommended 15 | if you would like to hack WebLLM core package. 16 | 17 | Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows. 18 | 19 | ### Steps for Windows Users 20 | 21 | 1. **Create a Node.js Script File**: 22 | 23 | - In the `examples\simple-chat` directory, create a file named `copy-config.js`. 24 | - Add the following code to handle file copying: 25 | ```javascript 26 | const fs = require("fs"); 27 | // Copy file 28 | fs.copyFileSync("src/gh-config.js", "src/app-config.js"); 29 | ``` 30 | 31 | 2. **Modify `package.json`**: 32 | 33 | - In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example: 34 | ```json 35 | "scripts": { 36 | "start": "node copy-config.js && parcel src/llm_chat.html --port 8888", 37 | "mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888", 38 | "build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash" 39 | }, 40 | ``` 41 | 42 | 3. **Run the Application**: 43 | - Save your changes and run `npm start` in CMD or PowerShell to start the application. 44 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-chat", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html --port 8883", 7 | "build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/gh-config.js: -------------------------------------------------------------------------------- 1 | import { prebuiltAppConfig } from "@mlc-ai/web-llm"; 2 | 3 | export default { 4 | model_list: prebuiltAppConfig.model_list, 5 | use_web_worker: true, 6 | }; 7 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/img/plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-ts/src/img/plane.png -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/img/reset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-ts/src/img/reset.png -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/llm_chat.css: -------------------------------------------------------------------------------- 1 | .chatui { 2 | display: flex; 3 | position: relative; 4 | flex-flow: column wrap; 5 | justify-content: space-between; 6 | width: 100%; 7 | max-width: 867px; 8 | margin: 25px 10px; 9 | height: 600px; 10 | border: 2px solid #ddd; 11 | border-radius: 5px; 12 | background-color: #1f2027; 13 | } 14 | 15 | .chatui-select-wrapper { 16 | display: flex; 17 | justify-content: center; 18 | background-color: #1f2027; 19 | padding: 10px 0; 20 | } 21 | 22 | #chatui-select { 23 | width: 350px; 24 | background-color: #1f2027; 25 | color: white; 26 | border: none; 27 | } 28 | 29 | #chatui-select:focus { 30 | outline: none; 31 | } 32 | 33 | #chatui-select::-webkit-scrollbar { 34 | display: none; 35 | } 36 | 37 | #chatui-select option { 38 | background-color: #1f2027; 39 | color: white; 40 | } 41 | 42 | #chatui-select option:hover { 43 | background-color: #474747; 44 | color: white; 45 | } 46 | 47 | s .chatui-header { 48 | display: flex; 49 | justify-content: space-between; 50 | padding: 10px; 51 | border-bottom: 2px solid #ddd; 52 | background: #eee; 53 | color: #666; 54 | } 55 | 56 | /* Used to remove tiny white lines in android devices; not sure if there is a better way */ 57 | *, 58 | *::before, 59 | *::after { 60 | box-sizing: content-box; 61 | } 62 | 63 | .chatui-chat { 64 | flex: 1; 65 | overflow-y: auto; 66 | padding: 10px; 67 | background-color: #1f2027; 68 | } 69 | 70 | .chatui-chat::-webkit-scrollbar { 71 | width: 6px; 72 | } 73 | 74 | .chatui-chat::-webkit-scrollbar-track { 75 | background: #1f2027; 76 | } 77 | 78 | .chatui-chat::-webkit-scrollbar-thumb { 79 | background: #888; 80 | } 81 | 82 | .chatui-chat::-webkit-scrollbar-thumb:hover { 83 | background: #555; 84 | } 85 | 86 | .msg { 87 | display: flex; 88 | align-items: flex-end; 89 | margin-bottom: 10px; 90 | } 91 | 92 | .msg:last-of-type { 93 | margin: 0; 94 | } 95 | 96 | .msg-bubble { 97 | background-color: #f0f0f0; 98 | border-radius: 8px; 99 | padding: 16px; 100 | margin: 5px auto; 101 | width: calc(100% - 20px); 102 | box-sizing: border-box; 103 | color: black; 104 | border: none; 105 | font-size: medium; 106 | margin-left: auto; 107 | margin-right: auto; 108 | } 109 | 110 | .left-msg .msg-bubble { 111 | background-color: #343541; 112 | color: #ececec; 113 | } 114 | 115 | .error-msg .msg-bubble { 116 | background-color: #343541; 117 | color: #f15959; 118 | } 119 | 120 | .init-msg .msg-bubble { 121 | background-color: #343541; 122 | color: #ececec; 123 | } 124 | 125 | .right-msg .msg-bubble { 126 | background-color: #444654; 127 | color: #ececec; 128 | } 129 | 130 | .chatui-inputarea { 131 | display: flex; 132 | padding: 10px; 133 | border-top: 2px solid transparent; 134 | background-color: #1f2027; 135 | } 136 | 137 | .chatui-inputarea * { 138 | padding: 10px; 139 | border: none; 140 | border-radius: 3px; 141 | font-size: 1em; 142 | color: white; 143 | background: rgba(0, 0, 0, 0.3); 144 | } 145 | 146 | .chatui-input { 147 | flex: 1; 148 | background-color: #40414f; 149 | color: white; 150 | } 151 | 152 | .chatui-reset-btn { 153 | margin-left: 10px; 154 | background-color: #40414f; 155 | color: #fff; 156 | font-weight: bold; 157 | cursor: pointer; 158 | background-image: url("img/reset.png"); 159 | background-repeat: no-repeat; 160 | background-position: center; 161 | width: 40px; 162 | background-repeat: no-repeat; 163 | background-position: center; 164 | background-size: 20px 20px; 165 | } 166 | 167 | .chatui-reset-btn:hover { 168 | background-color: #03a33e; 169 | } 170 | 171 | .chatui-send-btn { 172 | margin-left: 10px; 173 | background-color: #40414f; 174 | color: #fff; 175 | font-weight: bold; 176 | cursor: pointer; 177 | background-image: url("img/plane.png"); 178 | background-repeat: no-repeat; 179 | background-position: center; 180 | width: 40px; 181 | background-repeat: no-repeat; 182 | background-position: center; 183 | background-size: 20px 20px; 184 | } 185 | 186 | .chatui-send-btn:hover { 187 | background-color: #03a33e; 188 | } 189 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/llm_chat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 | 6 |
7 |
8 | 9 |
10 | 16 | 17 | 18 |
19 |
20 | 21 |
22 | 23 |
24 | 25 | 26 | -------------------------------------------------------------------------------- /examples/simple-chat-ts/src/worker.ts: -------------------------------------------------------------------------------- 1 | // Serve the engine workload through web worker 2 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 3 | 4 | const handler = new WebWorkerMLCEngineHandler(); 5 | self.onmessage = (msg: MessageEvent) => { 6 | handler.onmessage(msg); 7 | }; 8 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/.gitignore: -------------------------------------------------------------------------------- 1 | src/app-config.js 2 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/README.md: -------------------------------------------------------------------------------- 1 | # SimpleChat 2 | 3 | This folder provides a complete implementation of a simple 4 | chat app based on WebLLM. To try it out, you can do the following steps 5 | under this folder 6 | 7 | ```bash 8 | npm install 9 | npm start 10 | ``` 11 | 12 | Note if you would like to hack WebLLM core package. 13 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 14 | instruction in the project to build webllm locally. This option is only recommended 15 | if you would like to hack WebLLM core package. 16 | 17 | Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows. 18 | 19 | ### Steps for Windows Users 20 | 21 | 1. **Create a Node.js Script File**: 22 | 23 | - In the `examples\simple-chat` directory, create a file named `copy-config.js`. 24 | - Add the following code to handle file copying: 25 | ```javascript 26 | const fs = require("fs"); 27 | // Copy file 28 | fs.copyFileSync("src/gh-config.js", "src/app-config.js"); 29 | ``` 30 | 31 | 2. **Modify `package.json`**: 32 | 33 | - In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example: 34 | ```json 35 | "scripts": { 36 | "start": "node copy-config.js && parcel src/llm_chat.html --port 8888", 37 | "mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888", 38 | "build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash" 39 | }, 40 | ``` 41 | 42 | 3. **Run the Application**: 43 | - Save your changes and run `npm start` in CMD or PowerShell to start the application. 44 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-chat", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html --port 8883", 7 | "build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.31" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/gh-config.js: -------------------------------------------------------------------------------- 1 | import { prebuiltAppConfig } from "@mlc-ai/web-llm"; 2 | 3 | export default { 4 | model_list: prebuiltAppConfig.model_list, 5 | use_web_worker: true, 6 | }; 7 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/img/plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-upload/src/img/plane.png -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/img/reset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/examples/simple-chat-upload/src/img/reset.png -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/llm_chat.css: -------------------------------------------------------------------------------- 1 | .chatui { 2 | display: flex; 3 | position: relative; 4 | flex-flow: column wrap; 5 | justify-content: space-between; 6 | width: 100%; 7 | max-width: 867px; 8 | margin: 25px 10px; 9 | height: 600px; 10 | border: 2px solid #ddd; 11 | border-radius: 5px; 12 | background-color: #1f2027; 13 | } 14 | 15 | .chatui-select-wrapper { 16 | display: flex; 17 | justify-content: center; 18 | background-color: #1f2027; 19 | padding: 10px 0; 20 | } 21 | 22 | #chatui-select { 23 | width: 350px; 24 | background-color: #1f2027; 25 | color: white; 26 | border: none; 27 | } 28 | 29 | #chatui-select:focus { 30 | outline: none; 31 | } 32 | 33 | #chatui-select::-webkit-scrollbar { 34 | display: none; 35 | } 36 | 37 | #chatui-select option { 38 | background-color: #1f2027; 39 | color: white; 40 | } 41 | 42 | #chatui-select option:hover { 43 | background-color: #474747; 44 | color: white; 45 | } 46 | 47 | s .chatui-header { 48 | display: flex; 49 | justify-content: space-between; 50 | padding: 10px; 51 | border-bottom: 2px solid #ddd; 52 | background: #eee; 53 | color: #666; 54 | } 55 | 56 | /* Used to remove tiny white lines in android devices; not sure if there is a better way */ 57 | *, 58 | *::before, 59 | *::after { 60 | box-sizing: content-box; 61 | } 62 | 63 | .chatui-chat { 64 | flex: 1; 65 | overflow-y: auto; 66 | padding: 10px; 67 | background-color: #1f2027; 68 | } 69 | 70 | .chatui-chat::-webkit-scrollbar { 71 | width: 6px; 72 | } 73 | 74 | .chatui-chat::-webkit-scrollbar-track { 75 | background: #1f2027; 76 | } 77 | 78 | .chatui-chat::-webkit-scrollbar-thumb { 79 | background: #888; 80 | } 81 | 82 | .chatui-chat::-webkit-scrollbar-thumb:hover { 83 | background: #555; 84 | } 85 | 86 | .msg { 87 | display: flex; 88 | align-items: flex-end; 89 | margin-bottom: 10px; 90 | } 91 | 92 | .msg:last-of-type { 93 | margin: 0; 94 | } 95 | 96 | .msg-bubble { 97 | background-color: #f0f0f0; 98 | border-radius: 8px; 99 | padding: 16px; 100 | margin: 5px auto; 101 | width: calc(100% - 20px); 102 | box-sizing: border-box; 103 | color: black; 104 | border: none; 105 | font-size: medium; 106 | margin-left: auto; 107 | margin-right: auto; 108 | } 109 | 110 | .left-msg .msg-bubble { 111 | background-color: #343541; 112 | color: #ececec; 113 | } 114 | 115 | .error-msg .msg-bubble { 116 | background-color: #343541; 117 | color: #f15959; 118 | } 119 | 120 | .init-msg .msg-bubble { 121 | background-color: #343541; 122 | color: #ececec; 123 | } 124 | 125 | .right-msg .msg-bubble { 126 | background-color: #444654; 127 | color: #ececec; 128 | } 129 | 130 | .chatui-inputarea { 131 | display: flex; 132 | padding: 10px; 133 | border-top: 2px solid transparent; 134 | background-color: #1f2027; 135 | } 136 | 137 | .chatui-inputarea * { 138 | padding: 10px; 139 | border: none; 140 | border-radius: 3px; 141 | font-size: 1em; 142 | color: white; 143 | background: rgba(0, 0, 0, 0.3); 144 | } 145 | 146 | .chatui-input { 147 | flex: 1; 148 | background-color: #40414f; 149 | color: white; 150 | } 151 | 152 | .chatui-reset-btn { 153 | margin-left: 10px; 154 | background-color: #40414f; 155 | color: #fff; 156 | font-weight: bold; 157 | cursor: pointer; 158 | background-image: url("img/reset.png"); 159 | background-repeat: no-repeat; 160 | background-position: center; 161 | width: 40px; 162 | background-repeat: no-repeat; 163 | background-position: center; 164 | background-size: 20px 20px; 165 | } 166 | 167 | .chatui-reset-btn:hover { 168 | background-color: #03a33e; 169 | } 170 | 171 | .chatui-send-btn { 172 | margin-left: 10px; 173 | background-color: #40414f; 174 | color: #fff; 175 | font-weight: bold; 176 | cursor: pointer; 177 | background-image: url("img/plane.png"); 178 | background-repeat: no-repeat; 179 | background-position: center; 180 | width: 40px; 181 | background-repeat: no-repeat; 182 | background-position: center; 183 | background-size: 20px 20px; 184 | } 185 | 186 | .chatui-send-btn:hover { 187 | background-color: #03a33e; 188 | } 189 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/llm_chat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 | 6 |
7 |
8 | 9 | 10 | /> 17 | 18 |
19 | 25 | 26 | 27 |
28 |
29 | 30 |
31 | 32 |
33 | 34 | 35 | -------------------------------------------------------------------------------- /examples/simple-chat-upload/src/worker.ts: -------------------------------------------------------------------------------- 1 | // Serve the engine workload through web worker 2 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 3 | 4 | const handler = new WebWorkerMLCEngineHandler(); 5 | self.onmessage = (msg: MessageEvent) => { 6 | handler.onmessage(msg); 7 | }; 8 | -------------------------------------------------------------------------------- /examples/streaming/README.md: -------------------------------------------------------------------------------- 1 | ### OpenAI API Demos 2 | 3 | Run `npm install` first, followed by `npm start`. 4 | 5 | Note if you would like to hack WebLLM core package, 6 | you can change web-llm dependencies as `"file:../.."`, and follow the build from source 7 | instruction in the project to build webllm locally. This option is only recommended 8 | if you would like to hack WebLLM core package. 9 | -------------------------------------------------------------------------------- /examples/streaming/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "streaming", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/streaming.html --port 8888", 7 | "build": "parcel build src/streaming.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/streaming/src/streaming.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

WebLLM Test Page

9 | Open console to see output 10 |
11 |
12 | 13 |

Response

14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /examples/streaming/src/streaming.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | /** 12 | * We demonstrate chat completion with streaming, where delta is sent while generating response. 13 | */ 14 | async function main() { 15 | const initProgressCallback = (report: webllm.InitProgressReport) => { 16 | setLabel("init-label", report.text); 17 | }; 18 | const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC"; 19 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 20 | selectedModel, 21 | { initProgressCallback: initProgressCallback }, 22 | ); 23 | 24 | const request: webllm.ChatCompletionRequest = { 25 | stream: true, 26 | stream_options: { include_usage: true }, 27 | messages: [ 28 | { 29 | role: "system", 30 | content: 31 | "You are a pirate chatbot who always responds in pirate speak!", 32 | }, 33 | { role: "user", content: "Who are you?" }, 34 | ], 35 | logprobs: true, 36 | top_logprobs: 2, 37 | }; 38 | 39 | const asyncChunkGenerator = await engine.chat.completions.create(request); 40 | let message = ""; 41 | for await (const chunk of asyncChunkGenerator) { 42 | console.log(chunk); 43 | message += chunk.choices[0]?.delta?.content || ""; 44 | setLabel("generate-label", message); 45 | if (chunk.usage) { 46 | console.log(chunk.usage); // only last chunk has usage 47 | } 48 | // engine.interruptGenerate(); // works with interrupt as well 49 | } 50 | console.log("Final message:\n", await engine.getMessage()); // the concatenated message 51 | } 52 | 53 | main(); 54 | -------------------------------------------------------------------------------- /examples/text-completion/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting. 4 | To try it out, you can do the following steps under this folder 5 | 6 | ```bash 7 | npm install 8 | npm start 9 | ``` 10 | 11 | Note if you would like to hack WebLLM core package. 12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 13 | instruction in the project to build webllm locally. This option is only recommended 14 | if you would like to hack WebLLM core package. 15 | -------------------------------------------------------------------------------- /examples/text-completion/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "text-completion", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/text_completion.html --port 8888", 7 | "build": "parcel build src/text_completion.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/text-completion/src/text_completion.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/text-completion/src/text_completion.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | 3 | function setLabel(id: string, text: string) { 4 | const label = document.getElementById(id); 5 | if (label == null) { 6 | throw Error("Cannot find label " + id); 7 | } 8 | label.innerText = text; 9 | } 10 | 11 | async function main() { 12 | const initProgressCallback = (report: webllm.InitProgressReport) => { 13 | setLabel("init-label", report.text); 14 | }; 15 | 16 | // Unlike "Llama-3.1-8B-Instruct-q4f32_1-MLC", this is a base model 17 | const selectedModel = "Llama-3.1-8B-q4f32_1-MLC"; 18 | 19 | const appConfig: webllm.AppConfig = { 20 | model_list: [ 21 | { 22 | model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-q4f32_1-MLC", // a base model 23 | model_id: selectedModel, 24 | model_lib: 25 | webllm.modelLibURLPrefix + 26 | webllm.modelVersion + 27 | "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", 28 | overrides: { 29 | context_window_size: 2048, 30 | }, 31 | }, 32 | ], 33 | }; 34 | const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( 35 | selectedModel, 36 | { 37 | appConfig: appConfig, 38 | initProgressCallback: initProgressCallback, 39 | logLevel: "INFO", 40 | }, 41 | ); 42 | 43 | const reply0 = await engine.completions.create({ 44 | prompt: "List 3 US states: ", 45 | // below configurations are all optional 46 | echo: true, 47 | n: 2, 48 | max_tokens: 64, 49 | logprobs: true, 50 | top_logprobs: 2, 51 | }); 52 | console.log(reply0); 53 | console.log(reply0.usage); 54 | 55 | // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)` 56 | } 57 | 58 | main(); 59 | -------------------------------------------------------------------------------- /examples/vision-model/README.md: -------------------------------------------------------------------------------- 1 | # WebLLM Get Started App 2 | 3 | This folder provides a minimum demo to show WebLLM API in a webapp setting. 4 | To try it out, you can do the following steps under this folder 5 | 6 | ```bash 7 | npm install 8 | npm start 9 | ``` 10 | 11 | Note if you would like to hack WebLLM core package. 12 | You can change web-llm dependencies as `"file:../.."`, and follow the build from source 13 | instruction in the project to build webllm locally. This option is only recommended 14 | if you would like to hack WebLLM core package. 15 | -------------------------------------------------------------------------------- /examples/vision-model/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-started", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "parcel src/vision_model.html --port 8888", 7 | "build": "parcel build src/vision_model.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "parcel": "^2.8.3", 12 | "process": "^0.11.10", 13 | "tslib": "^2.3.1", 14 | "typescript": "^4.9.5", 15 | "url": "^0.11.3" 16 | }, 17 | "dependencies": { 18 | "@mlc-ai/web-llm": "^0.2.79" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/vision-model/src/utils.ts: -------------------------------------------------------------------------------- 1 | export function getImageDataFromURL(url: string): Promise { 2 | return new Promise((resolve, reject) => { 3 | // Converts img to any, and later `as CanvasImageSource`, otherwise build complains 4 | const img: any = new Image(); 5 | img.crossOrigin = "anonymous"; // Important for CORS 6 | img.onload = () => { 7 | const canvas: HTMLCanvasElement = document.createElement("canvas"); 8 | const ctx: CanvasRenderingContext2D = canvas.getContext("2d")!; 9 | canvas.width = img.width; 10 | canvas.height = img.height; 11 | ctx.drawImage(img as CanvasImageSource, 0, 0); 12 | 13 | const imageData = ctx.getImageData(0, 0, img.width, img.height); 14 | resolve(imageData); 15 | }; 16 | img.onerror = () => reject(new Error("Failed to load image")); 17 | img.src = url; 18 | }); 19 | } 20 | 21 | export async function imageURLToBase64(url: string): Promise { 22 | const imageData: ImageData = await getImageDataFromURL(url); 23 | const canvas = document.createElement("canvas"); 24 | const ctx = canvas.getContext("2d"); 25 | 26 | canvas.width = imageData.width; 27 | canvas.height = imageData.height; 28 | 29 | ctx!.putImageData(imageData, 0, 0); 30 | 31 | return canvas.toDataURL(); 32 | } 33 | -------------------------------------------------------------------------------- /examples/vision-model/src/vision_model.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 |

WebLLM Test Page

8 | Open console to see output 9 |
10 |
11 | 12 | 13 |

Prompt

14 | 15 | 16 |

Response

17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/vision-model/src/vision_model.ts: -------------------------------------------------------------------------------- 1 | import * as webllm from "@mlc-ai/web-llm"; 2 | import { imageURLToBase64 } from "./utils"; 3 | 4 | function setLabel(id: string, text: string) { 5 | const label = document.getElementById(id); 6 | if (label == null) { 7 | throw Error("Cannot find label " + id); 8 | } 9 | label.innerText = text; 10 | } 11 | 12 | const USE_WEB_WORKER = true; 13 | 14 | const proxyUrl = "https://cors-anywhere.herokuapp.com/"; 15 | const url_https_street = "https://www.ilankelman.org/stopsigns/australia.jpg"; 16 | const url_https_tree = "https://www.ilankelman.org/sunset.jpg"; 17 | const url_https_sea = 18 | "https://www.islandvulnerability.org/index/silhouette.jpg"; 19 | 20 | async function main() { 21 | // can feed request with either base64 or http url 22 | const url_base64_street = await imageURLToBase64(proxyUrl + url_https_street); 23 | 24 | const initProgressCallback = (report: webllm.InitProgressReport) => { 25 | setLabel("init-label", report.text); 26 | }; 27 | const selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC"; 28 | 29 | const engineConfig: webllm.MLCEngineConfig = { 30 | initProgressCallback: initProgressCallback, 31 | logLevel: "INFO", // specify the log level 32 | }; 33 | const chatOpts = { 34 | context_window_size: 6144, 35 | }; 36 | 37 | const engine: webllm.MLCEngineInterface = USE_WEB_WORKER 38 | ? await webllm.CreateWebWorkerMLCEngine( 39 | new Worker(new URL("./worker.ts", import.meta.url), { 40 | type: "module", 41 | }), 42 | selectedModel, 43 | engineConfig, 44 | chatOpts, 45 | ) 46 | : await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts); 47 | 48 | // 1. Prefill two images 49 | const messages: webllm.ChatCompletionMessageParam[] = [ 50 | { 51 | role: "user", 52 | content: [ 53 | { type: "text", text: "List the items in each image concisely." }, 54 | { 55 | type: "image_url", 56 | image_url: { 57 | url: url_base64_street, 58 | }, 59 | }, 60 | { 61 | type: "image_url", 62 | image_url: { 63 | url: proxyUrl + url_https_sea, 64 | }, 65 | }, 66 | ], 67 | }, 68 | ]; 69 | const request0: webllm.ChatCompletionRequest = { 70 | stream: false, // can be streaming, same behavior 71 | messages: messages, 72 | }; 73 | const reply0 = await engine.chat.completions.create(request0); 74 | const replyMessage0 = await engine.getMessage(); 75 | console.log(reply0); 76 | console.log(replyMessage0); 77 | console.log(reply0.usage); 78 | 79 | // 2. A follow up text-only question 80 | messages.push({ role: "assistant", content: replyMessage0 }); 81 | messages.push({ role: "user", content: "What is special about each image?" }); 82 | const request1: webllm.ChatCompletionRequest = { 83 | stream: false, // can be streaming, same behavior 84 | messages: messages, 85 | }; 86 | const reply1 = await engine.chat.completions.create(request1); 87 | const replyMessage1 = await engine.getMessage(); 88 | console.log(reply1); 89 | console.log(replyMessage1); 90 | console.log(reply1.usage); 91 | 92 | // 3. A follow up single-image question 93 | messages.push({ role: "assistant", content: replyMessage1 }); 94 | messages.push({ 95 | role: "user", 96 | content: [ 97 | { type: "text", text: "What about this image? Answer concisely." }, 98 | { 99 | type: "image_url", 100 | image_url: { url: proxyUrl + url_https_tree }, 101 | }, 102 | ], 103 | }); 104 | const request2: webllm.ChatCompletionRequest = { 105 | stream: false, // can be streaming, same behavior 106 | messages: messages, 107 | }; 108 | const reply2 = await engine.chat.completions.create(request2); 109 | const replyMessage2 = await engine.getMessage(); 110 | console.log(reply2); 111 | console.log(replyMessage2); 112 | console.log(reply2.usage); 113 | } 114 | 115 | main(); 116 | -------------------------------------------------------------------------------- /examples/vision-model/src/worker.ts: -------------------------------------------------------------------------------- 1 | import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm"; 2 | 3 | const handler = new WebWorkerMLCEngineHandler(); 4 | 5 | self.onmessage = (msg: MessageEvent) => { 6 | handler.onmessage(msg); 7 | }; 8 | -------------------------------------------------------------------------------- /jest.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: "ts-jest", 3 | testEnvironment: "node", 4 | }; 5 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@mlc-ai/web-llm", 3 | "version": "0.2.79", 4 | "description": "Hardware accelerated language model chats on browsers", 5 | "main": "lib/index.js", 6 | "types": "lib/index.d.ts", 7 | "type": "module", 8 | "scripts": { 9 | "build": "rollup -c && ./cleanup-index-js.sh", 10 | "lint": "npx eslint ./src/ ./tests/ ./examples/ && npx prettier ./src/ ./tests/ ./examples/ --check", 11 | "test": "yarn jest", 12 | "format": "prettier --write \"./src/\" \"./examples/\" \"./tests/\"", 13 | "prepare": "husky" 14 | }, 15 | "files": [ 16 | "lib" 17 | ], 18 | "repository": { 19 | "type": "git", 20 | "url": "git+https://github.com/mlc-ai/web-llm" 21 | }, 22 | "keywords": [ 23 | "llm", 24 | "large language model", 25 | "machine learning" 26 | ], 27 | "license": "Apache-2.0", 28 | "homepage": "https://github.com/mlc-ai/web-llm", 29 | "devDependencies": { 30 | "@mlc-ai/web-tokenizers": "^0.1.6", 31 | "@next/eslint-plugin-next": "^14.2.3", 32 | "@rollup/plugin-commonjs": "^20.0.0", 33 | "@rollup/plugin-node-resolve": "^13.0.4", 34 | "@types/chrome": "^0.0.266", 35 | "@types/jest": "^29.5.11", 36 | "@types/serviceworker": "^0.0.86", 37 | "@typescript-eslint/eslint-plugin": "^5.59.6", 38 | "@typescript-eslint/parser": "^5.59.6", 39 | "@webgpu/types": "^0.1.24", 40 | "buffer": "^5.7.1", 41 | "eslint": "^8.41.0", 42 | "eslint-config-prettier": "^9.1.0", 43 | "eslint-plugin-prettier": "^5.1.3", 44 | "husky": "^9.0.11", 45 | "jest": "^29.7.0", 46 | "prettier": "3.2.5", 47 | "process": "^0.11.10", 48 | "rollup": "^2.56.2", 49 | "rollup-plugin-ignore": "^1.0.10", 50 | "rollup-plugin-typescript2": "^0.34.1", 51 | "ts-jest": "^29.1.2", 52 | "tslib": "^2.3.1", 53 | "@mlc-ai/web-runtime": "0.18.0-dev2", 54 | "@mlc-ai/web-xgrammar": "0.1.0", 55 | "typescript": "^4.9.5" 56 | }, 57 | "dependencies": { 58 | "loglevel": "^1.9.1" 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import { nodeResolve } from '@rollup/plugin-node-resolve'; 2 | import ignore from "rollup-plugin-ignore"; 3 | import commonjs from '@rollup/plugin-commonjs'; 4 | import typescript from 'rollup-plugin-typescript2'; 5 | 6 | export default { 7 | input: 'src/index.ts', 8 | output: [ 9 | { 10 | file: 'lib/index.js', 11 | exports: 'named', 12 | format: 'es', 13 | sourcemap: true, 14 | globals: {'ws': 'ws', 15 | 'perf_hooks': 'perf_hooks'} 16 | } 17 | ], 18 | plugins: [ 19 | ignore(["fs", "path", "crypto"]), 20 | nodeResolve({ browser: true }), 21 | commonjs({ 22 | ignoreDynamicRequires: true, 23 | }), 24 | typescript({ 25 | rollupCommonJSResolveHack: false, 26 | clean: true 27 | }) 28 | ] 29 | }; 30 | -------------------------------------------------------------------------------- /scripts/gh_deploy_site.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euxo pipefail 3 | 4 | export PYTHONPATH=$PWD/python 5 | cd docs && make html && cd .. 6 | cd site && jekyll b && cd .. 7 | rm -rf site/_site/docs 8 | cp -r docs/_build/html site/_site/docs 9 | 10 | git fetch 11 | git checkout -B gh-pages origin/gh-pages 12 | rm -rf docs .gitignore 13 | mkdir -p docs 14 | cp -rf site/_site/* docs 15 | touch docs/.nojekyll 16 | echo "webllm.mlc.ai" >> docs/CNAME 17 | 18 | DATE=`date` 19 | git add docs && git commit -am "Build at ${DATE}" 20 | git push origin gh-pages 21 | git checkout main && git submodule update 22 | echo "Finish deployment at ${DATE}" 23 | -------------------------------------------------------------------------------- /scripts/local_deploy_site.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euxo pipefail 3 | 4 | cd examples/simple-chat 5 | rm -rf lib 6 | npm run build 7 | cd ../.. 8 | 9 | cp examples/simple-chat/lib/* site 10 | 11 | cd site && jekyll serve --host localhost --port 8888 12 | -------------------------------------------------------------------------------- /scripts/prep_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This file prepares all the necessary dependencies for the web build. 3 | set -euxo pipefail 4 | 5 | emcc --version 6 | npm --version 7 | 8 | TVM_SOURCE_DIR_SET="${TVM_SOURCE_DIR:-}" 9 | 10 | if [[ -z ${TVM_SOURCE_DIR_SET} ]]; then 11 | if [[ ! -d "3rdparty/tvm-unity" ]]; then 12 | echo "Do not find TVM_SOURCE_DIR env variable, cloning a version as source". 13 | git clone https://github.com/mlc-ai/relax 3rdparty/tvm-unity --recursive 14 | fi 15 | export TVM_SOURCE_DIR="${TVM_SOURCE_DIR:-3rdparty/tvm-unity}" 16 | fi 17 | 18 | cd ${TVM_SOURCE_DIR}/web && make && npm install && npm run build && cd - 19 | rm -rf tvm_home 20 | ln -s ${TVM_SOURCE_DIR} tvm_home 21 | npm install 22 | -------------------------------------------------------------------------------- /scripts/serve_mlc_llm_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This file prepares all the necessary dependencies for the web build. 3 | set -euxo pipefail 4 | 5 | npm --version 6 | 7 | MLC_LLM_HOME_SET="${MLC_LLM_HOME:-}" 8 | 9 | if [[ -z ${MLC_LLM_HOME_SET} ]]; then 10 | echo "Do not find MLC_LLM_HOME env variable, need to set this to work". 11 | fi 12 | cd ${MLC_LLM_HOME}/dist 13 | echo "Serving ${MLC_LLM_HOME}/dist for local debugging purposes" 14 | npx http-server -p 8000 --cors 15 | cd - 16 | -------------------------------------------------------------------------------- /site/.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | llm-chat-config.json 3 | _includes/stable_diffusion.html 4 | _site 5 | llm_chat.* 6 | -------------------------------------------------------------------------------- /site/_config.yml: -------------------------------------------------------------------------------- 1 | name: "WebLLM" 2 | short_name: "WebLLM" 3 | 4 | url: https://webllm.mlc.ai 5 | 6 | exclude: [README.md, serve_local.sh] 7 | 8 | plugins: 9 | - jekyll-remote-theme 10 | 11 | remote_theme: mlc-ai/jekyll-theme-mlc 12 | 13 | # Colorize code snippets with the rogue module if we want to deploy on GH. 14 | highlighter: rouge 15 | 16 | markdown: kramdown 17 | 18 | # The path structure for blog posts. 19 | permalink: /blog/:year/:month/:day/:title.html 20 | 21 | # Number of news stories on the front page. 22 | front_page_news: 8 23 | 24 | # Base pathname for links. 25 | base: "" 26 | 27 | # make pages for the _projects folder 28 | collections: 29 | projects: 30 | output: true 31 | 32 | course_title: 33 | 34 | # Navigation bar links. 35 | navigation: 36 | - title: Home 37 | link: / 38 | - title: GitHub 39 | link: https://github.com/mlc-ai/web-llm 40 | -------------------------------------------------------------------------------- /site/_includes/arrow.svg: -------------------------------------------------------------------------------- 1 | 22 | -------------------------------------------------------------------------------- /site/_includes/github.svg: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | -------------------------------------------------------------------------------- /site/_includes/head.html: -------------------------------------------------------------------------------- 1 | 2 | 6 | 10 | 11 | -------------------------------------------------------------------------------- /site/_includes/hero.html: -------------------------------------------------------------------------------- 1 |
2 | 20 |
21 | 36 |
37 |
38 | 39 | 73 | -------------------------------------------------------------------------------- /site/assets/img/fig/Pittsburgh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/fig/Pittsburgh.png -------------------------------------------------------------------------------- /site/assets/img/logo/cmuscs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/cmuscs.png -------------------------------------------------------------------------------- /site/assets/img/logo/mlc-logo-with-text-landscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/mlc-logo-with-text-landscape.png -------------------------------------------------------------------------------- /site/assets/img/logo/octoml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/octoml.png -------------------------------------------------------------------------------- /site/assets/img/logo/sjtu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/sjtu.png -------------------------------------------------------------------------------- /site/assets/img/logo/uw.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/img/logo/uw.jpg -------------------------------------------------------------------------------- /site/assets/video/Code.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Code.mp4 -------------------------------------------------------------------------------- /site/assets/video/Code.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Code.webm -------------------------------------------------------------------------------- /site/assets/video/Pittsburgh.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Pittsburgh.mp4 -------------------------------------------------------------------------------- /site/assets/video/Pittsburgh.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlc-ai/web-llm/d8b25fed8e81d6f6b27cdc07e839c1c09cfaa43d/site/assets/video/Pittsburgh.webm -------------------------------------------------------------------------------- /site/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Home 4 | notitle: true 5 | --- 6 | 7 | {% include hero.html %} 8 | 9 | ## Overview 10 | 11 | We have been seeing amazing progress in generative AI and LLM recently. Thanks to the open-source efforts like LLaMA, Alpaca, Vicuna and Dolly, we start to see an exciting future of building our own open source language models and personal AI assistant. 12 | 13 | These models are usually big and compute-heavy. To build a chat service, we will need a large cluster to run an inference server, while clients send requests to servers and retrieve the inference output. We also usually have to run on a specific type of GPUs where popular deep-learning frameworks are readily available. 14 | 15 | This project is our step to bring more diversity to the ecosystem. Specifically, can we simply bake LLMs directly into the client side and directly run them inside a browser? If that can be realized, we could offer support for client personal AI models with the benefit of cost reduction, enhancement for personalization and privacy protection. The client side is getting pretty powerful. 16 | 17 | Won’t it be even more amazing if we can simply open up a browser and directly bring AI natively to your browser tab? There is some level of readiness in the ecosystem. This project provides an affirmative answer to the question. 18 | 19 | ## Key Features 20 | - **In-Browser Inference**: WebLLM is a high-performance, in-browser language model inference engine that leverages WebGPU for hardware acceleration, enabling powerful LLM operations directly within web browsers without server-side processing. 21 | 22 | - [**Full OpenAI API Compatibility**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#full-openai-compatibility): Seamlessly integrate your app with WebLLM using OpenAI API with functionalities such as JSON-mode, function-calling, streaming, and more. 23 | 24 | - [**Extensive Model Support**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#built-in-models): WebLLM natively supports a range of models including Llama, Phi, Gemma, RedPajama, Mistral, Qwen(通义千问), and many others, making it versatile for various AI tasks. 25 | 26 | - [**Custom Model Integration**](https://github.com/mlc-ai/web-llm?tab=readme-ov-file#custom-models): Easily integrate and deploy custom models in MLC format, allowing you to adapt WebLLM to specific needs and scenarios, enhancing flexibility in model deployment. 27 | 28 | - **Plug-and-Play Integration**: Easily integrate WebLLM into your projects using package managers like NPM and Yarn, or directly via CDN, complete with comprehensive [examples](https://github.com/mlc-ai/web-llm/tree/main/examples) and a modular design for connecting with UI components. 29 | 30 | - **Streaming & Real-Time Interactions**: Supports streaming chat completions, allowing real-time output generation which enhances interactive applications like chatbots and virtual assistants. 31 | 32 | - **Web Worker & Service Worker Support**: Optimize UI performance and manage the lifecycle of models efficiently by offloading computations to separate worker threads or service workers. 33 | 34 | - **Chrome Extension Support**: Extend the functionality of web browsers through custom Chrome extensions using WebLLM, with examples available for building both basic and advanced extensions. 35 | 36 | ## Disclaimer 37 | 38 | The [demo site](https://chat.webllm.ai) is for research purposes only, subject to the model License of LLaMA, Vicuna and RedPajama. Please contact us if you find any potential violation. 39 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { 2 | ModelRecord, 3 | AppConfig, 4 | ChatOptions, 5 | MLCEngineConfig, 6 | GenerationConfig, 7 | ModelType, 8 | prebuiltAppConfig, 9 | modelVersion, 10 | modelLibURLPrefix, 11 | functionCallingModelIds, 12 | } from "./config"; 13 | 14 | export { 15 | InitProgressCallback, 16 | InitProgressReport, 17 | MLCEngineInterface, 18 | LogitProcessor, 19 | LogLevel, 20 | } from "./types"; 21 | 22 | export { MLCEngine, CreateMLCEngine } from "./engine"; 23 | 24 | export { 25 | hasModelInCache, 26 | deleteChatConfigInCache, 27 | deleteModelAllInfoInCache, 28 | deleteModelWasmInCache, 29 | deleteModelInCache, 30 | } from "./cache_util"; 31 | 32 | export { 33 | WebWorkerMLCEngineHandler, 34 | WebWorkerMLCEngine, 35 | CreateWebWorkerMLCEngine, 36 | } from "./web_worker"; 37 | 38 | export { WorkerRequest, WorkerResponse, CustomRequestParams } from "./message"; 39 | 40 | export { 41 | ServiceWorkerMLCEngineHandler, 42 | ServiceWorkerMLCEngine, 43 | CreateServiceWorkerMLCEngine, 44 | } from "./service_worker"; 45 | 46 | export { 47 | ServiceWorkerMLCEngineHandler as ExtensionServiceWorkerMLCEngineHandler, 48 | ServiceWorkerMLCEngine as ExtensionServiceWorkerMLCEngine, 49 | CreateServiceWorkerMLCEngine as CreateExtensionServiceWorkerMLCEngine, 50 | } from "./extension_service_worker"; 51 | 52 | export * from "./openai_api_protocols/index"; 53 | -------------------------------------------------------------------------------- /src/message.ts: -------------------------------------------------------------------------------- 1 | import { AppConfig, ChatOptions } from "./config"; 2 | import { InitProgressReport, LogLevel } from "./types"; 3 | import { 4 | ChatCompletionRequestStreaming, 5 | ChatCompletionRequestNonStreaming, 6 | ChatCompletion, 7 | ChatCompletionChunk, 8 | CompletionCreateParamsNonStreaming, 9 | CompletionCreateParamsStreaming, 10 | Completion, 11 | EmbeddingCreateParams, 12 | CreateEmbeddingResponse, 13 | } from "./openai_api_protocols/index"; 14 | 15 | /** 16 | * Message kind used by worker 17 | */ 18 | type RequestKind = 19 | | "reload" 20 | | "runtimeStatsText" 21 | | "interruptGenerate" 22 | | "unload" 23 | | "resetChat" 24 | | "getMaxStorageBufferBindingSize" 25 | | "getGPUVendor" 26 | | "forwardTokensAndSample" 27 | | "chatCompletionNonStreaming" 28 | | "completionNonStreaming" 29 | | "embedding" 30 | | "getMessage" 31 | | "chatCompletionStreamInit" 32 | | "completionStreamInit" 33 | | "completionStreamNextChunk" 34 | | "customRequest" 35 | | "keepAlive" 36 | | "setLogLevel" 37 | | "setAppConfig"; 38 | 39 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 40 | type ResponseKind = "return" | "throw" | "initProgressCallback"; 41 | 42 | export interface ReloadParams { 43 | modelId: string[]; 44 | chatOpts?: ChatOptions[]; 45 | } 46 | export interface ResetChatParams { 47 | keepStats: boolean; 48 | modelId?: string; 49 | } 50 | export interface GetMessageParams { 51 | modelId?: string; 52 | } 53 | export interface RuntimeStatsTextParams { 54 | modelId?: string; 55 | } 56 | export interface ForwardTokensAndSampleParams { 57 | inputIds: Array; 58 | isPrefill: boolean; 59 | modelId?: string; 60 | } 61 | 62 | // Notes on the following Params with modelId and chatOpts: 63 | // These fields are the model and chatOpts that the frontend engine expects the backend 64 | // to be loaded with. If not loaded due to web/service worker unexpectedly killed, 65 | // handler will call reload(). An engine can load multiple models, hence both are list. 66 | // TODO(webllm-team): should add appConfig here as well if rigorous. 67 | // Fore more, see https://github.com/mlc-ai/web-llm/pull/471 68 | 69 | // Note on the messages with selectedModelId: 70 | // This is the modelId this request uses. It is needed to identify which async generator 71 | // to instantiate / use, since an engine can load multiple models, thus the handler 72 | // needs to maintain multiple generators. 73 | export interface ChatCompletionNonStreamingParams { 74 | request: ChatCompletionRequestNonStreaming; 75 | modelId: string[]; 76 | chatOpts?: ChatOptions[]; 77 | } 78 | export interface ChatCompletionStreamInitParams { 79 | request: ChatCompletionRequestStreaming; 80 | selectedModelId: string; 81 | modelId: string[]; 82 | chatOpts?: ChatOptions[]; 83 | } 84 | export interface CompletionNonStreamingParams { 85 | request: CompletionCreateParamsNonStreaming; 86 | modelId: string[]; 87 | chatOpts?: ChatOptions[]; 88 | } 89 | export interface CompletionStreamInitParams { 90 | request: CompletionCreateParamsStreaming; 91 | selectedModelId: string; 92 | modelId: string[]; 93 | chatOpts?: ChatOptions[]; 94 | } 95 | export interface EmbeddingParams { 96 | request: EmbeddingCreateParams; 97 | modelId: string[]; 98 | chatOpts?: ChatOptions[]; 99 | } 100 | export interface CompletionStreamNextChunkParams { 101 | selectedModelId: string; 102 | } 103 | 104 | export interface CustomRequestParams { 105 | requestName: string; 106 | requestMessage: string; 107 | } 108 | export type MessageContent = 109 | | ReloadParams 110 | | ResetChatParams 111 | | GetMessageParams 112 | | RuntimeStatsTextParams 113 | | ForwardTokensAndSampleParams 114 | | ChatCompletionNonStreamingParams 115 | | ChatCompletionStreamInitParams 116 | | CompletionNonStreamingParams 117 | | CompletionStreamInitParams 118 | | EmbeddingParams 119 | | CompletionStreamNextChunkParams 120 | | CustomRequestParams 121 | | InitProgressReport 122 | | LogLevel 123 | | string 124 | | null 125 | | number 126 | | ChatCompletion 127 | | ChatCompletionChunk 128 | | CreateEmbeddingResponse 129 | | Completion 130 | | AppConfig 131 | | void; 132 | /** 133 | * The message used in exchange between worker 134 | * and the main thread. 135 | */ 136 | 137 | export type WorkerRequest = { 138 | kind: RequestKind; 139 | uuid: string; 140 | content: MessageContent; 141 | }; 142 | 143 | type HeartbeatWorkerResponse = { 144 | kind: "heartbeat"; 145 | uuid: string; 146 | }; 147 | 148 | type OneTimeWorkerResponse = { 149 | kind: "return" | "throw"; 150 | uuid: string; 151 | content: MessageContent; 152 | }; 153 | 154 | type InitProgressWorkerResponse = { 155 | kind: "initProgressCallback"; 156 | uuid: string; 157 | content: InitProgressReport; 158 | }; 159 | 160 | export type WorkerResponse = 161 | | OneTimeWorkerResponse 162 | | InitProgressWorkerResponse 163 | | HeartbeatWorkerResponse; 164 | -------------------------------------------------------------------------------- /src/openai_api_protocols/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * The input to OpenAI API, directly adopted from openai-node with small tweaks: 3 | * https://github.com/openai/openai-node/blob/master/src/resources/chat/completions.ts 4 | * 5 | * Copyright 2024 OpenAI 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | export { 19 | Chat, 20 | ChatCompletionRequestBase, 21 | ChatCompletionRequestNonStreaming, 22 | ChatCompletionRequestStreaming, 23 | ChatCompletionRequest, 24 | ChatCompletion, 25 | ChatCompletionChunk, 26 | ChatCompletionRequestUnsupportedFields, 27 | postInitAndCheckFields as postInitAndCheckFieldsChatCompletion, 28 | ChatCompletionContentPart, 29 | ChatCompletionContentPartText, 30 | ChatCompletionContentPartImage, 31 | ChatCompletionMessageToolCall, 32 | ChatCompletionRole, 33 | ChatCompletionSystemMessageParam, 34 | ChatCompletionUserMessageParam, 35 | ChatCompletionAssistantMessageParam, 36 | ChatCompletionToolMessageParam, 37 | ChatCompletionMessageParam, 38 | FunctionParameters, 39 | FunctionDefinition, 40 | ChatCompletionTool, 41 | ChatCompletionNamedToolChoice, 42 | ChatCompletionToolChoiceOption, 43 | TopLogprob, 44 | ChatCompletionTokenLogprob, 45 | ChatCompletionMessage, 46 | CompletionUsage, 47 | ResponseFormat, 48 | ChatCompletionFinishReason, 49 | } from "./chat_completion"; 50 | 51 | export { 52 | Completions, 53 | CompletionCreateParamsNonStreaming, 54 | CompletionCreateParamsStreaming, 55 | CompletionCreateParamsBase, 56 | CompletionCreateParams, 57 | Completion, 58 | CompletionChoice, 59 | postInitAndCheckFields as postInitAndCheckFieldsCompletion, 60 | } from "./completion"; 61 | 62 | export { 63 | Embeddings, 64 | Embedding, 65 | EmbeddingCreateParams, 66 | CreateEmbeddingResponse, 67 | postInitAndCheckFields as postInitAndCheckFieldsEmbedding, 68 | } from "./embedding"; 69 | -------------------------------------------------------------------------------- /tests/generation_config.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | GenerationConfig, 3 | postInitAndCheckGenerationConfigValues, 4 | } from "../src/config"; 5 | import { describe, expect, test } from "@jest/globals"; 6 | 7 | describe("Check generation config illegal values", () => { 8 | test("High-level unsupported fields", () => { 9 | expect(() => { 10 | const genConfig: GenerationConfig = { 11 | max_tokens: 0, 12 | }; 13 | postInitAndCheckGenerationConfigValues(genConfig); 14 | }).toThrow("Make sure `max_tokens` > 0"); 15 | }); 16 | 17 | test("logit_bias exceeds range", () => { 18 | expect(() => { 19 | const genConfig: GenerationConfig = { 20 | max_tokens: 10, 21 | logit_bias: { 22 | "1355": 155, 23 | }, 24 | }; 25 | postInitAndCheckGenerationConfigValues(genConfig); 26 | }).toThrow("Make sure -100 < logit_bias <= 100."); 27 | }); 28 | 29 | test("logit_bias invalid key", () => { 30 | expect(() => { 31 | const genConfig: GenerationConfig = { 32 | max_tokens: 10, 33 | logit_bias: { 34 | thisRaisesError: 50, 35 | }, 36 | }; 37 | postInitAndCheckGenerationConfigValues(genConfig); 38 | }).toThrow( 39 | "Make sure logit_bias's keys to be number represented in string.", 40 | ); 41 | }); 42 | 43 | test("top_logprobs out of range", () => { 44 | expect(() => { 45 | const genConfig: GenerationConfig = { 46 | logprobs: true, 47 | top_logprobs: 6, 48 | max_tokens: 10, 49 | }; 50 | postInitAndCheckGenerationConfigValues(genConfig); 51 | }).toThrow("Make sure 0 < top_logprobs <= 5."); 52 | }); 53 | 54 | test("top_logprobs set without setting logprobs", () => { 55 | expect(() => { 56 | const genConfig: GenerationConfig = { 57 | top_logprobs: 3, 58 | max_tokens: 10, 59 | }; 60 | postInitAndCheckGenerationConfigValues(genConfig); 61 | }).toThrow("top_logprobs requires logprobs to be true"); 62 | }); 63 | 64 | test("top_logprobs set though logprobs is false", () => { 65 | expect(() => { 66 | const genConfig: GenerationConfig = { 67 | logprobs: false, 68 | top_logprobs: 3, 69 | max_tokens: 10, 70 | }; 71 | postInitAndCheckGenerationConfigValues(genConfig); 72 | }).toThrow("top_logprobs requires logprobs to be true"); 73 | }); 74 | }); 75 | 76 | describe("Check generation post init", () => { 77 | test("Only set one of presence or frequency penalty", () => { 78 | const genConfig: GenerationConfig = { 79 | frequency_penalty: 1.5, 80 | }; 81 | postInitAndCheckGenerationConfigValues(genConfig); 82 | expect(genConfig.presence_penalty).toBe(0.0); 83 | }); 84 | 85 | test("Set logprobs without setting top_logprobs", () => { 86 | const genConfig: GenerationConfig = { 87 | logprobs: true, 88 | }; 89 | postInitAndCheckGenerationConfigValues(genConfig); 90 | expect(genConfig.top_logprobs).toBe(0); 91 | }); 92 | 93 | test("Set both logprobs and top_logprobs", () => { 94 | const genConfig: GenerationConfig = { 95 | logprobs: true, 96 | top_logprobs: 2, 97 | }; 98 | postInitAndCheckGenerationConfigValues(genConfig); 99 | expect(genConfig.top_logprobs).toBe(2); 100 | }); 101 | }); 102 | -------------------------------------------------------------------------------- /tests/openai_completion.test.ts: -------------------------------------------------------------------------------- 1 | import { getConversation } from "../src/conversation"; 2 | import { 3 | TextCompletionConversationError, 4 | TextCompletionConversationExpectsPrompt, 5 | } from "../src/error"; 6 | import { 7 | CompletionCreateParams, 8 | postInitAndCheckFields, 9 | } from "../src/openai_api_protocols/completion"; 10 | import { llama3_1ChatConfig } from "./constants"; 11 | import { describe, expect, test } from "@jest/globals"; 12 | 13 | describe("Conversation object with text completion", () => { 14 | test("Conversation checks ", () => { 15 | const conv = getConversation( 16 | llama3_1ChatConfig.conv_template, 17 | llama3_1ChatConfig.conv_config, 18 | /*isTextCompletion=*/ true, 19 | ); 20 | expect(() => { 21 | conv.getPromptArrayTextCompletion(); 22 | }).toThrow(new TextCompletionConversationExpectsPrompt()); 23 | expect(() => { 24 | conv.getPromptArray(); 25 | }).toThrow(new TextCompletionConversationError("getPromptArray")); 26 | 27 | conv.prompt = "Hi"; 28 | expect(conv.getPromptArrayTextCompletion()).toEqual(["Hi"]); 29 | 30 | conv.reset(); 31 | expect(conv.prompt === undefined).toEqual(true); 32 | }); 33 | }); 34 | 35 | describe("Check completion unsupported requests", () => { 36 | test("stream_options without stream specified", () => { 37 | expect(() => { 38 | const request: CompletionCreateParams = { 39 | prompt: "Hello, ", 40 | stream_options: { include_usage: true }, 41 | }; 42 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 43 | }).toThrow("Only specify stream_options when stream=True."); 44 | }); 45 | 46 | test("stream_options with stream=false", () => { 47 | expect(() => { 48 | const request: CompletionCreateParams = { 49 | stream: false, 50 | prompt: "Hello, ", 51 | stream_options: { include_usage: true }, 52 | }; 53 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 54 | }).toThrow("Only specify stream_options when stream=True."); 55 | }); 56 | 57 | test("High-level unsupported fields", () => { 58 | expect(() => { 59 | const request: CompletionCreateParams = { 60 | prompt: "Hello, ", 61 | suffix: "this is suffix", // this raises error 62 | }; 63 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 64 | }).toThrow( 65 | "The following fields in CompletionCreateParams are not yet supported", 66 | ); 67 | 68 | expect(() => { 69 | const request: CompletionCreateParams = { 70 | prompt: "Hello, ", 71 | best_of: 3, // this raises error 72 | }; 73 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 74 | }).toThrow( 75 | "The following fields in CompletionCreateParams are not yet supported", 76 | ); 77 | 78 | expect(() => { 79 | const request: CompletionCreateParams = { 80 | prompt: "Hello, ", 81 | user: "Bob", // this raises error 82 | }; 83 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 84 | }).toThrow( 85 | "The following fields in CompletionCreateParams are not yet supported", 86 | ); 87 | }); 88 | 89 | test("When streaming `n` needs to be 1", () => { 90 | expect(() => { 91 | const request: CompletionCreateParams = { 92 | stream: true, 93 | n: 2, 94 | prompt: "Hello, ", 95 | }; 96 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 97 | }).toThrow("When streaming, `n` cannot be > 1."); 98 | }); 99 | 100 | test("Non-integer seed", () => { 101 | expect(() => { 102 | const request: CompletionCreateParams = { 103 | prompt: "Hello, ", 104 | max_tokens: 10, 105 | seed: 42.2, // Note that Number.isInteger(42.0) is true 106 | }; 107 | postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC"); 108 | }).toThrow("`seed` should be an integer, but got"); 109 | }); 110 | }); 111 | -------------------------------------------------------------------------------- /tests/openai_embeddings.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | EmbeddingInputEmptyError, 3 | EmbeddingUnsupportedEncodingFormatError, 4 | } from "../src/error"; 5 | import { 6 | EmbeddingCreateParams, 7 | postInitAndCheckFields, 8 | } from "../src/openai_api_protocols/embedding"; 9 | import { describe, expect, test } from "@jest/globals"; 10 | 11 | describe("Check embeddings supported requests", () => { 12 | test("Supported embedding request float", () => { 13 | const request: EmbeddingCreateParams = { 14 | input: ["Hello", "Hi"], 15 | encoding_format: "float", 16 | }; 17 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 18 | }); 19 | 20 | test("Supported embedding request, unspecified format", () => { 21 | const request: EmbeddingCreateParams = { 22 | input: ["Hello", "Hi"], 23 | }; 24 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 25 | }); 26 | 27 | test("Supported embedding request, single string", () => { 28 | const request: EmbeddingCreateParams = { 29 | input: "Hello", 30 | }; 31 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 32 | }); 33 | 34 | test("Supported embedding request, single token array", () => { 35 | const request: EmbeddingCreateParams = { 36 | input: [0, 1], 37 | }; 38 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 39 | }); 40 | 41 | test("Supported embedding request, array of token arrays", () => { 42 | const request: EmbeddingCreateParams = { 43 | input: [ 44 | [0, 1], 45 | [0, 1], 46 | ], 47 | }; 48 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 49 | }); 50 | }); 51 | 52 | describe("Invalid embedding input", () => { 53 | test("Empty string", () => { 54 | expect(() => { 55 | const request: EmbeddingCreateParams = { 56 | input: "", 57 | }; 58 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 59 | }).toThrow(new EmbeddingInputEmptyError()); 60 | }); 61 | 62 | test("Contains empty string", () => { 63 | expect(() => { 64 | const request: EmbeddingCreateParams = { 65 | input: ["Hi", "hello", ""], 66 | }; 67 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 68 | }).toThrow(new EmbeddingInputEmptyError()); 69 | }); 70 | 71 | test("Empty token array", () => { 72 | expect(() => { 73 | const request: EmbeddingCreateParams = { 74 | input: [], 75 | }; 76 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 77 | }).toThrow(new EmbeddingInputEmptyError()); 78 | }); 79 | 80 | test("Contains empty token array", () => { 81 | expect(() => { 82 | const request: EmbeddingCreateParams = { 83 | input: [[1, 2], [3], [], [4]], 84 | }; 85 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 86 | }).toThrow(new EmbeddingInputEmptyError()); 87 | }); 88 | }); 89 | 90 | describe("Check embeddings unsupported requests", () => { 91 | test("base64 encoding_format", () => { 92 | expect(() => { 93 | const request: EmbeddingCreateParams = { 94 | input: ["Hello", "Hi"], 95 | encoding_format: "base64", 96 | }; 97 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 98 | }).toThrow(new EmbeddingUnsupportedEncodingFormatError()); 99 | }); 100 | 101 | test("user", () => { 102 | expect(() => { 103 | const request: EmbeddingCreateParams = { 104 | input: ["Hello", "Hi"], 105 | encoding_format: "float", 106 | user: "Bob", 107 | }; 108 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 109 | }).toThrow("The following fields in"); 110 | }); 111 | 112 | test("dimensions", () => { 113 | expect(() => { 114 | const request: EmbeddingCreateParams = { 115 | input: ["Hello", "Hi"], 116 | encoding_format: "float", 117 | dimensions: 2048, 118 | }; 119 | postInitAndCheckFields(request, "snowflake-arctic-embed-m-q0f32-MLC"); 120 | }).toThrow("The following fields in"); 121 | }); 122 | }); 123 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es6", 4 | "declaration": true, 5 | "outDir": "lib", 6 | "declarationMap": true, 7 | "sourceMap": true, 8 | "strict": true, 9 | "moduleResolution": "Node", 10 | "esModuleInterop": true, 11 | "lib": ["dom", "WebWorker"] 12 | }, 13 | "typeRoots": ["./node_modules/@webgpu/types", "./node_modules/@types"], 14 | "include": ["src"], 15 | "exclude": ["node_modules", "build", "dist", "rollup.config.cjs"] 16 | } 17 | -------------------------------------------------------------------------------- /utils/.gitignore: -------------------------------------------------------------------------------- 1 | package-lock.json 2 | -------------------------------------------------------------------------------- /utils/vram_requirements/.gitignore: -------------------------------------------------------------------------------- 1 | src/app-config.js 2 | -------------------------------------------------------------------------------- /utils/vram_requirements/README.md: -------------------------------------------------------------------------------- 1 | ### vRAM Requirements 2 | 3 | To check vRAM requirement for a model, add models to check in `gh-config.json`. 4 | 5 | Then run `npm install` followed by `npm start`. -------------------------------------------------------------------------------- /utils/vram_requirements/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "vram-requirements", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "start": "cp src/gh-config.js src/app-config.js && parcel src/vram_requirements.html --port 8885", 7 | "build": "cp src/gh-config.js src/app-config.js && parcel build src/vram_requirements.html --dist-dir lib" 8 | }, 9 | "devDependencies": { 10 | "buffer": "^5.7.1", 11 | "crypto-browserify": "^3.12.0", 12 | "events": "^3.3.0", 13 | "parcel": "^2.8.3", 14 | "path-browserify": "^1.0.1", 15 | "process": "^0.11.10", 16 | "stream-browserify": "^3.0.0", 17 | "tslib": "^2.3.1", 18 | "typescript": "^4.9.5", 19 | "url": "^0.11.3" 20 | }, 21 | "dependencies": { 22 | "@mlc-ai/web-llm": "^0.2.79", 23 | "@mlc-ai/web-runtime": "0.18.0-dev2" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /utils/vram_requirements/src/gh-config.js: -------------------------------------------------------------------------------- 1 | import { prebuiltAppConfig } from "../../../lib/config"; 2 | 3 | export default { 4 | "model_list": prebuiltAppConfig.model_list, 5 | "use_web_worker": true 6 | } 7 | -------------------------------------------------------------------------------- /utils/vram_requirements/src/vram_requirements.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 |

vRAM Requirement Report

9 | Open console to see logs 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /utils/vram_requirements/src/vram_requirements.ts: -------------------------------------------------------------------------------- 1 | import ModelRecord from "@mlc-ai/web-llm"; 2 | import appConfig from "./app-config"; // Modify this to inspect vram requirement for models of choice 3 | import * as tvmjs from "@mlc-ai/web-runtime"; 4 | import log from "loglevel"; 5 | 6 | function setLabel(id: string, text: string) { 7 | const label = document.getElementById(id); 8 | if (label == null) { 9 | throw Error("Cannot find label " + id); 10 | } 11 | label.innerText = text; 12 | } 13 | 14 | interface AppConfig { 15 | model_list: Array; 16 | } 17 | 18 | const dtypeBytesMap = new Map([ 19 | ["uint32", 4], 20 | ["uint16", 2], 21 | ["float32", 4], 22 | ["float16", 4], 23 | ]); 24 | 25 | async function main() { 26 | const config: AppConfig = appConfig; 27 | let report = ""; 28 | for (let i = 0; i < config.model_list.length; ++i) { 29 | // 1. Read each model record 30 | const modelRecord: ModelRecord = config.model_list[i]; 31 | const model_id = modelRecord.model_id; 32 | // 2. Load the wasm 33 | const wasmUrl = modelRecord.model_lib; 34 | const wasmSource = await (await fetch(wasmUrl)).arrayBuffer(); 35 | report += `${model_id}: \n`; 36 | // 3. Initialize tvmjs instance and virtual machine using the wasm 37 | const tvm = await tvmjs.instantiate( 38 | new Uint8Array(wasmSource), 39 | tvmjs.createPolyfillWASI(), 40 | log.info, 41 | ); 42 | const gpuDetectOutput = await tvmjs.detectGPUDevice(); 43 | if (gpuDetectOutput == undefined) { 44 | throw Error("Cannot find WebGPU in the environment"); 45 | } 46 | tvm.initWebGPU(gpuDetectOutput.device); 47 | tvm.beginScope(); 48 | const vm = tvm.detachFromCurrentScope( 49 | tvm.createVirtualMachine(tvm.webgpu()), 50 | ); 51 | // 4. Get metadata from the vm 52 | let fgetMetadata: any; 53 | try { 54 | fgetMetadata = vm.getFunction("_metadata"); 55 | } catch (err) { 56 | log.error( 57 | "The wasm needs to have function `_metadata` to inspect vram requirement.", 58 | err, 59 | ); 60 | } 61 | const ret_value = fgetMetadata(); 62 | const metadataStr = tvm.detachFromCurrentScope(ret_value).toString(); 63 | const metadata = JSON.parse(metadataStr); 64 | // 5. Parse the vram requirement 65 | // 5.1. Get bytes for loading params 66 | let paramBytes = 0; 67 | metadata.params.forEach((param: any) => { 68 | if (Math.min(...param.shape) > 0) { 69 | // Possible to have shape -1 signifying a dynamic shape -- we disregard them 70 | const dtypeBytes = dtypeBytesMap.get(param.dtype); 71 | if (dtypeBytes === undefined) { 72 | throw Error( 73 | "Cannot find size of " + 74 | param.dtype + 75 | ", add it to `dtypeBytesMap`.", 76 | ); 77 | } 78 | const numParams = param.shape.reduce((a: number, b: number) => a * b); 79 | paramBytes += numParams * dtypeBytes; 80 | } else { 81 | log.info( 82 | `${model_id}'s ${param.name} has dynamic shape; excluded from vRAM calculation.`, 83 | ); 84 | } 85 | }); 86 | // 5.2. Get maximum bytes needed for temporary buffer across all functions 87 | let maxTempFuncBytes = 0; 88 | Object.entries(metadata.memory_usage).forEach(([funcName, funcBytes]) => { 89 | if (typeof funcBytes !== "number") { 90 | throw Error("`memory_usage` expects entry `funcName: funcBytes`."); 91 | } 92 | maxTempFuncBytes = Math.max(maxTempFuncBytes, funcBytes); 93 | }); 94 | // 5.3. Get kv cache bytes 95 | const kv_cache_bytes: number = metadata.kv_cache_bytes; 96 | // 5.4. Get total vRAM needed 97 | const totalBytes = paramBytes + maxTempFuncBytes + kv_cache_bytes; 98 | // 6. Report vRAM Requirement 99 | report += 100 | `totalBytes: ${(totalBytes / 1024 / 1024).toFixed(2)} MB\n` + 101 | `paramBytes: ${(paramBytes / 1024 / 1024).toFixed(2)} MB\n` + 102 | `maxTempFuncBytes: ${(maxTempFuncBytes / 1024 / 1024).toFixed(2)} MB\n` + 103 | `kv_cache_bytes: ${(kv_cache_bytes / 1024 / 1024).toFixed(2)} MB\n\n`; 104 | // 7. Dispose everything 105 | tvm.endScope(); 106 | vm.dispose(); 107 | tvm.dispose(); 108 | } 109 | setLabel("report-label", report); 110 | } 111 | 112 | main(); 113 | --------------------------------------------------------------------------------