├── LICENSE
├── README.md
├── index.js
├── index.html
└── .gitignore


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 AI Anytime
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # On-device-LLM-Inference-using-Mediapipe
 2 | On-device LLM Inference using Mediapipe LLM Inference API.
 3 | 
 4 | # LLM Task Sample Setup Guide
 5 | 
 6 | This guide provides step-by-step instructions on how to set up and run a sample LLM task on your local machine. Ensure you have Python (3.x or 2.x for older versions) installed and a modern web browser, preferably Chrome, before you begin.
 7 | 
 8 | ## Setup Instructions
 9 | 
10 | - Create a new folder on your device named `llm_task`.
11 | - Copy `index.html` and `index.js` files into the `llm_task` folder. These are essential for the task's web interface.
12 | - Download the Gemma 2B model (TensorFlow Lite `2b-it-gpu-int4` or `2b-it-gpu-int8`) into the `llm_task` folder. Alternatively, you can convert an external LLM (like Phi-2, Falcon, or StableLM) for a GPU backend, as only that is currently supported.
13 | - Open the `index.js` file in a text editor and update the `modelFileName` variable with the name of your model file.
14 | - Run a local HTTP server within the `llm_task` folder by executing `python3 -m http.server 8000` (or `python -m SimpleHTTPServer 8000` for older Python versions) in your terminal.
15 | - Open a web browser and go to `http://localhost:8000`. The web interface for your LLM task will appear, and the button on the webpage will be enabled after about 10 seconds, indicating the task is ready.
16 | 
17 | Enjoy exploring the capabilities of your large language model with this simple setup!
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
 2 | 
 3 | const input = document.getElementById('input');
 4 | const output = document.getElementById('output');
 5 | const submit = document.getElementById('submit');
 6 | 
 7 | const modelFileName = 'gemma-2b-it-gpu-int4.bin'; 
 8 | 
 9 | /**
10 |  * Display newly generated partial results to the output text box.
11 |  */
12 | function displayPartialResults(partialResults, complete) {
13 |   output.textContent += partialResults;
14 | 
15 |   if (complete) {
16 |     if (!output.textContent) {
17 |       output.textContent = 'Result is empty';
18 |     }
19 |     submit.disabled = false;
20 |   }
21 | }
22 | 
23 | /**
24 |  * Main function to run LLM Inference.
25 |  */
26 | async function runDemo() {
27 |   const genaiFileset = await FilesetResolver.forGenAiTasks(
28 |       'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
29 |   let llmInference;
30 | 
31 |   submit.onclick = () => {
32 |     output.textContent = '';
33 |     submit.disabled = true;
34 |     llmInference.generateResponse(input.value, displayPartialResults);
35 |   };
36 | 
37 |   submit.value = 'Loading the model...'
38 |   LlmInference
39 |       .createFromOptions(genaiFileset, {
40 |         baseOptions: {modelAssetPath: modelFileName},
41 |         // maxTokens: 512,  // The maximum number of tokens (input tokens + output
42 |         //                  // tokens) the model handles.
43 |         // randomSeed: 1,   // The random seed used during text generation.
44 |         // topK: 1,  // The number of tokens the model considers at each step of
45 |         //           // generation. Limits predictions to the top k most-probable
46 |         //           // tokens. Setting randomSeed is required for this to make
47 |         //           // effects.
48 |         // temperature:
49 |         //     1.0,  // The amount of randomness introduced during generation.
50 |         //           // Setting randomSeed is required for this to make effects.
51 |       })
52 |       .then(llm => {
53 |         llmInference = llm;
54 |         submit.disabled = false;
55 |         submit.value = 'Get Response'
56 |       })
57 |       .catch(() => {
58 |         alert('Failed to initialize the task.');
59 |       });
60 | }
61 | 
62 | runDemo();
63 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>On-device LLM Inference</title>
 7 |     <style>
 8 |         body {
 9 |             font-family: Arial, sans-serif;
10 |             display: flex;
11 |             flex-direction: column;
12 |             align-items: center;
13 |             justify-content: center;
14 |             margin: 0;
15 |             padding-top: 20px; /* Added padding at the top */
16 |             height: auto; /* Changed to auto for dynamic content sizing */
17 |             min-height: 100vh; /* Ensure it covers at least the full viewport height */
18 |             background-color: #f0f0f0;
19 |         }
20 |         .container {
21 |             width: 80%;
22 |             max-width: 640px; /* Adjusted for better control over max width */
23 |             text-align: center;
24 |         }
25 |         h1 {
26 |             color: #333;
27 |             margin-bottom: 20px;
28 |         }
29 |         textarea {
30 |             width: 100%; /* Full width of the container */
31 |             height: 200px; /* Height adjusted for paragraphs */
32 |             margin: 10px 0;
33 |             padding: 15px; /* More padding for better readability */
34 |             border: 1px solid #ccc;
35 |             border-radius: 8px;
36 |             box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
37 |             font-family: inherit;
38 |             font-size: 16px;
39 |             resize: vertical; /* Users can adjust the vertical size */
40 |         }
41 |         input[type="button"] {
42 |             padding: 10px 20px;
43 |             font-size: 16px;
44 |             border: none;
45 |             border-radius: 5px;
46 |             background-color: #007bff;
47 |             color: white;
48 |             cursor: pointer;
49 |             transition: background-color 0.2s;
50 |         }
51 |         input[type="button"]:hover {
52 |             background-color: #0056b3;
53 |         }
54 |         input[type="button"]:disabled {
55 |             background-color: #ccc;
56 |             cursor: not-allowed;
57 |         }
58 |     </style>
59 | </head>
60 | <body>
61 |     <div class="container">
62 |         <h1>On-device LLM Inference</h1>
63 |         <label for="input">Input:</label><br />
64 |         <textarea id="input"></textarea><br />
65 |         <input type="button" id="submit" value="Get Response" disabled /><br />
66 |         <br />
67 |         <label for="output">Result:</label><br />
68 |         <textarea id="output"></textarea>
69 |     </div>
70 |     <script type="module" src="index.js"></script>
71 | </body>
72 | </html>
73 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------