├── LICENSE ├── README.md ├── index.js ├── index.html └── .gitignore /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 AI Anytime 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # On-device-LLM-Inference-using-Mediapipe 2 | On-device LLM Inference using Mediapipe LLM Inference API. 3 | 4 | # LLM Task Sample Setup Guide 5 | 6 | This guide provides step-by-step instructions on how to set up and run a sample LLM task on your local machine. Ensure you have Python (3.x or 2.x for older versions) installed and a modern web browser, preferably Chrome, before you begin. 7 | 8 | ## Setup Instructions 9 | 10 | - Create a new folder on your device named `llm_task`. 11 | - Copy `index.html` and `index.js` files into the `llm_task` folder. These are essential for the task's web interface. 12 | - Download the Gemma 2B model (TensorFlow Lite `2b-it-gpu-int4` or `2b-it-gpu-int8`) into the `llm_task` folder. Alternatively, you can convert an external LLM (like Phi-2, Falcon, or StableLM) for a GPU backend, as only that is currently supported. 13 | - Open the `index.js` file in a text editor and update the `modelFileName` variable with the name of your model file. 14 | - Run a local HTTP server within the `llm_task` folder by executing `python3 -m http.server 8000` (or `python -m SimpleHTTPServer 8000` for older Python versions) in your terminal. 15 | - Open a web browser and go to `http://localhost:8000`. The web interface for your LLM task will appear, and the button on the webpage will be enabled after about 10 seconds, indicating the task is ready. 16 | 17 | Enjoy exploring the capabilities of your large language model with this simple setup! 18 | 19 | 20 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai'; 2 | 3 | const input = document.getElementById('input'); 4 | const output = document.getElementById('output'); 5 | const submit = document.getElementById('submit'); 6 | 7 | const modelFileName = 'gemma-2b-it-gpu-int4.bin'; 8 | 9 | /** 10 | * Display newly generated partial results to the output text box. 11 | */ 12 | function displayPartialResults(partialResults, complete) { 13 | output.textContent += partialResults; 14 | 15 | if (complete) { 16 | if (!output.textContent) { 17 | output.textContent = 'Result is empty'; 18 | } 19 | submit.disabled = false; 20 | } 21 | } 22 | 23 | /** 24 | * Main function to run LLM Inference. 25 | */ 26 | async function runDemo() { 27 | const genaiFileset = await FilesetResolver.forGenAiTasks( 28 | 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm'); 29 | let llmInference; 30 | 31 | submit.onclick = () => { 32 | output.textContent = ''; 33 | submit.disabled = true; 34 | llmInference.generateResponse(input.value, displayPartialResults); 35 | }; 36 | 37 | submit.value = 'Loading the model...' 38 | LlmInference 39 | .createFromOptions(genaiFileset, { 40 | baseOptions: {modelAssetPath: modelFileName}, 41 | // maxTokens: 512, // The maximum number of tokens (input tokens + output 42 | // // tokens) the model handles. 43 | // randomSeed: 1, // The random seed used during text generation. 44 | // topK: 1, // The number of tokens the model considers at each step of 45 | // // generation. Limits predictions to the top k most-probable 46 | // // tokens. Setting randomSeed is required for this to make 47 | // // effects. 48 | // temperature: 49 | // 1.0, // The amount of randomness introduced during generation. 50 | // // Setting randomSeed is required for this to make effects. 51 | }) 52 | .then(llm => { 53 | llmInference = llm; 54 | submit.disabled = false; 55 | submit.value = 'Get Response' 56 | }) 57 | .catch(() => { 58 | alert('Failed to initialize the task.'); 59 | }); 60 | } 61 | 62 | runDemo(); 63 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 |