├── .github ├── FUNDING.yml └── workflows │ └── deploy.yml ├── .gitignore ├── LICENSE ├── README.md ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── public └── vite.svg ├── src ├── App.tsx ├── assets │ ├── icons │ │ ├── CheckmarkIcon.tsx │ │ ├── CircleSpinner.tsx │ │ ├── CopyButtonIcon.tsx │ │ ├── CopyIcon.tsx │ │ ├── GitHubIcon.tsx │ │ ├── LoadingSpinner.tsx │ │ ├── MicrophoneIcon.tsx │ │ ├── MoonIcon.tsx │ │ ├── ResetIcon.tsx │ │ ├── SendIcon.tsx │ │ ├── StopIcon.tsx │ │ └── SunIcon.tsx │ └── react.svg ├── components │ ├── AudioManager.tsx │ ├── AudioRecorder.tsx │ ├── AudioSection.tsx │ ├── DarkModeToggle.tsx │ ├── LatexOutput.tsx │ ├── LatexRenderer.tsx │ ├── LoadingIndicator.tsx │ ├── ModelLoader.tsx │ ├── ModelSelectionPanel.tsx │ ├── ModelSelector.tsx │ ├── Progress.tsx │ └── Transcript.tsx ├── hooks │ ├── useConversation.ts │ ├── useLLMEngine.ts │ ├── useTranscriber.ts │ └── useWorker.ts ├── index.css ├── main.tsx ├── utils │ ├── AudioUtils.ts │ ├── BlobFix.ts │ └── Constants.ts ├── vite-env.d.ts └── whisper-worker.js ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: thomasmckanna 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: false 16 | 17 | jobs: 18 | build: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | 24 | - name: Setup Node 25 | uses: actions/setup-node@v4 26 | with: 27 | node-version: "20" 28 | cache: "npm" 29 | 30 | - name: Install dependencies 31 | run: npm ci 32 | 33 | - name: Build 34 | run: | 35 | # Build with the correct base path for GitHub Pages 36 | npm run build -- --base=/ 37 | 38 | - name: Create CNAME file 39 | run: echo "latex.thomasmckanna.com" > dist/CNAME 40 | 41 | - name: Setup Pages 42 | uses: actions/configure-pages@v4 43 | 44 | - name: Upload artifact 45 | uses: actions/upload-pages-artifact@v3 46 | with: 47 | path: "./dist" 48 | 49 | deploy: 50 | environment: 51 | name: github-pages 52 | url: ${{ steps.deployment.outputs.page_url }} 53 | runs-on: ubuntu-latest 54 | needs: build 55 | steps: 56 | - name: Deploy to GitHub Pages 57 | id: deployment 58 | uses: actions/deploy-pages@v4 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .aider* 2 | 3 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 4 | 5 | # dependencies 6 | /node_modules 7 | /.pnp 8 | .pnp.js 9 | 10 | # testing 11 | /coverage 12 | 13 | # next.js 14 | /.next/ 15 | /out/ 16 | 17 | # production 18 | /build 19 | 20 | # misc 21 | .DS_Store 22 | *.pem 23 | 24 | # debug 25 | npm-debug.log* 26 | yarn-debug.log* 27 | yarn-error.log* 28 | .pnpm-debug.log* 29 | 30 | # local env files 31 | .env*.local 32 | 33 | # vercel 34 | .vercel 35 | 36 | # typescript 37 | *.tsbuildinfo 38 | next-env.d.ts 39 | dev 40 | 41 | .vscode 42 | .idea 43 | 44 | # docker-compose env files 45 | .env 46 | 47 | *.key 48 | *.key.pub 49 | 50 | # service worker generated files 51 | public/sw.js 52 | public/workbox-*.js 53 | public/workbox-*.js.map 54 | public/worker-*.js 55 | 56 | # Logs 57 | logs 58 | *.log 59 | npm-debug.log* 60 | yarn-debug.log* 61 | yarn-error.log* 62 | pnpm-debug.log* 63 | lerna-debug.log* 64 | 65 | node_modules 66 | dist 67 | dist-ssr 68 | *.local 69 | 70 | # Editor directories and files 71 | .vscode/* 72 | !.vscode/extensions.json 73 | .idea 74 | .DS_Store 75 | *.suo 76 | *.ntvs* 77 | *.njsproj 78 | *.sln 79 | *.sw? 80 | 81 | reference/ 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Thomas McKanna 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Speech to LaTeX 2 | 3 | 4 | 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 6 | [![Built with WebLLM](https://img.shields.io/badge/Built%20with-WebLLM-blue)](https://github.com/mlc-ai/web-llm) 7 | [![Powered by Whisper](https://img.shields.io/badge/Powered%20by-Whisper-orange)](https://github.com/xenova/whisper-web) 8 | [![Rendered with MathJax](https://img.shields.io/badge/Rendered%20with-MathJax-green)](https://www.mathjax.org/) 9 | 10 | https://github.com/user-attachments/assets/595ae5fd-17d8-49bc-b811-ad450c06a02a 11 | 12 | ## [🚀 Try it now!](https://thomas-mckanna.github.io/speech-to-latex/) 13 | 14 | ## Overview 15 | 16 | Speech to LaTeX is a powerful web application that converts spoken mathematics into LaTeX expressions, running entirely in your browser. No server required! Simply speak your mathematical expressions, and watch as they're transformed into beautifully formatted LaTeX. 17 | 18 | ## Features 19 | 20 | - 🎤 **Voice to LaTeX** - Dictate mathematical expressions naturally 21 | - 💻 **100% Client-side** - All processing happens in your browser 22 | - 🔒 **Privacy-focused** - No data leaves your device 23 | - 🌐 **Works offline** - Once loaded, no internet connection needed 24 | - ⚡ **Real-time conversion** - See results as you speak 25 | - 🎛️ **Customizable models** - Choose from different Whisper and LLM models 26 | 27 | ## How It Works 28 | 29 | Speech to LaTeX combines three powerful technologies: 30 | 31 | 1. **[Whisper Web](https://github.com/xenova/whisper-web)** - Transcribes your speech to text 32 | 2. **[WebLLM](https://github.com/mlc-ai/web-llm)** - Converts transcribed text to LaTeX expressions 33 | 3. **[MathJax](https://www.mathjax.org/)** - Renders LaTeX expressions beautifully in the browser 34 | 35 | ## Getting Started 36 | 37 | 1. Visit the [Speech to LaTeX app](https://your-deployment-url.com) 38 | 2. Allow microphone access when prompted 39 | 3. Click the microphone button and start speaking your mathematical expression 40 | 4. Watch as your speech is converted to LaTeX in real-time 41 | 5. Copy the generated LaTeX code or view the rendered expression 42 | 43 | ## Example Expressions 44 | 45 | Try saying: 46 | - "The quadratic formula is x equals negative b plus or minus the square root of b squared minus 4ac all over 2a" 47 | - "The integral from 0 to infinity of e to the negative x squared dx equals square root of pi over 2" 48 | - "The sum from n equals 1 to infinity of 1 over n squared equals pi squared over 6" 49 | 50 | ## Local Development 51 | 52 | ```bash 53 | # Clone the repository 54 | git clone https://github.com/Thomas-McKanna/speech-to-latex.git 55 | 56 | # Navigate to the project directory 57 | cd speech-to-latex 58 | 59 | # Install dependencies 60 | npm install 61 | 62 | # Start the development server 63 | npm run dev 64 | ``` 65 | 66 | ## Contributing 67 | 68 | Contributions are welcome! Please feel free to submit a Pull Request. 69 | 70 | ## Acknowledgements 71 | 72 | This project wouldn't be possible without these amazing open-source projects: 73 | 74 | - [WebLLM](https://github.com/mlc-ai/web-llm) - For running LLMs directly in the browser 75 | - [Whisper Web](https://github.com/xenova/whisper-web) - For browser-based speech recognition 76 | - [MathJax](https://www.mathjax.org/) - For rendering LaTeX expressions 77 | 78 | ## License 79 | 80 | This project is licensed under the MIT License - see the LICENSE file for details. 81 | -------------------------------------------------------------------------------- /eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Speech to LaTeX 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "speech-to-latex", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "@mlc-ai/web-llm": "^0.2.78", 14 | "@tailwindcss/vite": "^4.1.4", 15 | "@xenova/transformers": "^2.17.2", 16 | "better-react-mathjax": "^2.3.0", 17 | "react": "^19.0.0", 18 | "react-dom": "^19.0.0", 19 | "tailwindcss": "^4.1.4" 20 | }, 21 | "devDependencies": { 22 | "@eslint/js": "^9.22.0", 23 | "@types/react": "^19.0.10", 24 | "@types/react-dom": "^19.0.4", 25 | "@vitejs/plugin-react": "^4.3.4", 26 | "eslint": "^9.22.0", 27 | "eslint-plugin-react-hooks": "^5.2.0", 28 | "eslint-plugin-react-refresh": "^0.4.19", 29 | "globals": "^16.0.0", 30 | "typescript": "~5.7.2", 31 | "typescript-eslint": "^8.26.1", 32 | "vite": "^6.3.0" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/App.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import { useTranscriber } from "./hooks/useTranscriber"; 3 | import { ModelSelectionPanel } from "./components/ModelSelectionPanel"; 4 | import { LatexOutput } from "./components/LatexOutput"; 5 | import { LoadingIndicator } from "./components/LoadingIndicator"; 6 | import { ModelLoader } from "./components/ModelLoader"; 7 | import { AudioSection } from "./components/AudioSection"; 8 | import { useLLMEngine } from "./hooks/useLLMEngine"; 9 | import { useConversation } from "./hooks/useConversation"; 10 | import { DarkModeToggle } from "./components/DarkModeToggle"; 11 | import { GitHubIcon } from "./assets/icons/GitHubIcon"; 12 | 13 | function App() { 14 | const [whisperModel, setWhisperModel] = useState("Xenova/whisper-tiny"); 15 | const [isWhisperModelLoading, setIsWhisperModelLoading] = useState(false); 16 | const [llmModel, setLlmModel] = useState("Llama-3.1-8B-Instruct-q4f32_1-MLC"); 17 | 18 | const transcriber = useTranscriber(); 19 | const { engine, loadingStatus, modelLoaded, isChangingModel } = 20 | useLLMEngine(llmModel); 21 | const { 22 | latexOutput, 23 | isLoading, 24 | hasPreviousExpression, 25 | sendToLLM, 26 | resetConversation, 27 | } = useConversation(engine); 28 | 29 | // Set the whisper model when it changes in the dropdown 30 | useEffect(() => { 31 | const loadModel = async () => { 32 | setIsWhisperModelLoading(true); 33 | await transcriber.setModel(whisperModel); 34 | setIsWhisperModelLoading(false); 35 | }; 36 | 37 | loadModel(); 38 | }, [whisperModel]); 39 | 40 | return ( 41 |
42 |
43 |
44 |

45 | Speech to LaTeX 46 |

47 | 48 |
49 | 50 | 51 | 52 | {modelLoaded && ( 53 |
54 | {/* LaTeX Rendered Output */} 55 | 62 | 63 | {/* Audio Recording Section */} 64 | 70 | 71 | {/* Model Selection Panel */} 72 |
73 | 81 |
82 |
83 | )} 84 |
85 | 86 | {/* GitHub Link */} 87 | 98 |
99 | ); 100 | } 101 | 102 | export default App; 103 | -------------------------------------------------------------------------------- /src/assets/icons/CheckmarkIcon.tsx: -------------------------------------------------------------------------------- 1 | interface CheckmarkIconProps { 2 | className?: string; 3 | } 4 | 5 | export function CheckmarkIcon({ className = "" }: CheckmarkIconProps) { 6 | return ( 7 | 17 | 18 | 19 | ); 20 | } 21 | -------------------------------------------------------------------------------- /src/assets/icons/CircleSpinner.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const CircleSpinner: React.FC> = (props) => ( 4 | 22 | ); 23 | -------------------------------------------------------------------------------- /src/assets/icons/CopyButtonIcon.tsx: -------------------------------------------------------------------------------- 1 | interface CopyButtonIconProps { 2 | className?: string; 3 | } 4 | 5 | export function CopyButtonIcon({ className = "" }: CopyButtonIconProps) { 6 | return ( 7 | 17 | 18 | 19 | 20 | ); 21 | } 22 | -------------------------------------------------------------------------------- /src/assets/icons/CopyIcon.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | interface CopyIconProps { 4 | className?: string; 5 | } 6 | 7 | export const CopyIcon: React.FC = ({ className = "" }) => { 8 | return ( 9 | 21 | 22 | 23 | 24 | 25 | ); 26 | }; 27 | -------------------------------------------------------------------------------- /src/assets/icons/GitHubIcon.tsx: -------------------------------------------------------------------------------- 1 | interface GitHubIconProps { 2 | className?: string; 3 | } 4 | 5 | export function GitHubIcon({ className = "" }: GitHubIconProps) { 6 | return ( 7 | 13 | 14 | 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /src/assets/icons/LoadingSpinner.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const LoadingSpinner: React.FC> = (props) => ( 4 | 11 | 19 | 24 | 25 | ); 26 | -------------------------------------------------------------------------------- /src/assets/icons/MicrophoneIcon.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const MicrophoneIcon: React.FC> = (props) => ( 4 | 13 | 18 | 19 | ); 20 | -------------------------------------------------------------------------------- /src/assets/icons/MoonIcon.tsx: -------------------------------------------------------------------------------- 1 | interface MoonIconProps { 2 | className?: string; 3 | } 4 | 5 | export function MoonIcon({ className = "" }: MoonIconProps) { 6 | return ( 7 | 13 | 14 | 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /src/assets/icons/ResetIcon.tsx: -------------------------------------------------------------------------------- 1 | interface ResetIconProps { 2 | className?: string; 3 | } 4 | 5 | export function ResetIcon({ className = "" }: ResetIconProps) { 6 | return ( 7 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /src/assets/icons/SendIcon.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const SendIcon: React.FC> = (props) => ( 4 | 12 | 18 | 19 | ); 20 | -------------------------------------------------------------------------------- /src/assets/icons/StopIcon.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export const StopIcon: React.FC> = (props) => ( 4 | 12 | 13 | 14 | ); 15 | -------------------------------------------------------------------------------- /src/assets/icons/SunIcon.tsx: -------------------------------------------------------------------------------- 1 | interface SunIconProps { 2 | className?: string; 3 | } 4 | 5 | export function SunIcon({ className = "" }: SunIconProps) { 6 | return ( 7 | 13 | 18 | 19 | ); 20 | } 21 | -------------------------------------------------------------------------------- /src/assets/react.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/components/AudioManager.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import Constants from "../utils/Constants"; 3 | import { Transcriber } from "../hooks/useTranscriber"; 4 | import Progress from "./Progress"; 5 | import AudioRecorder from "./AudioRecorder"; 6 | 7 | export function AudioManager({ 8 | transcriber, 9 | onTranscriptReady, 10 | isModelLoading, 11 | }: { 12 | transcriber: Transcriber; 13 | onTranscriptReady?: (text: string) => void; 14 | isModelLoading?: boolean; 15 | }) { 16 | const [isRecording, setIsRecording] = useState(false); 17 | 18 | const processAudioRecording = async (data: Blob) => { 19 | if (!data) return; 20 | 21 | const fileReader = new FileReader(); 22 | 23 | fileReader.onloadend = async () => { 24 | try { 25 | const audioCTX = new AudioContext({ 26 | sampleRate: Constants.SAMPLING_RATE, 27 | }); 28 | const arrayBuffer = fileReader.result as ArrayBuffer; 29 | const decoded = await audioCTX.decodeAudioData(arrayBuffer); 30 | 31 | // Start transcription immediately 32 | transcriber.start(decoded); 33 | } catch (error) { 34 | console.error("Error processing audio:", error); 35 | } 36 | }; 37 | 38 | fileReader.readAsArrayBuffer(data); 39 | }; 40 | 41 | // When transcription is complete, pass the text to parent component 42 | useEffect(() => { 43 | if (onTranscriptReady && transcriber.output && !transcriber.isBusy) { 44 | const text = transcriber.output.chunks 45 | .map((chunk) => chunk.text) 46 | .join(" ") 47 | .trim(); 48 | 49 | if (text) { 50 | onTranscriptReady(text); 51 | } 52 | } 53 | }, [transcriber.output, transcriber.isBusy]); 54 | 55 | return ( 56 |
57 |
58 |
59 |
60 | { 63 | setIsRecording(!isRecording); 64 | if (blob) { 65 | transcriber.onInputChange(); 66 | processAudioRecording(blob); 67 | } 68 | }} 69 | disabled={isModelLoading || transcriber.isModelLoading} 70 | /> 71 |
72 | 73 | {(isModelLoading || 74 | transcriber.isModelLoading || 75 | transcriber.isBusy) && ( 76 |
77 | {isModelLoading || transcriber.isModelLoading 78 | ? "Loading model..." 79 | : transcriber.isBusy 80 | ? "Transcribing..." 81 | : ""} 82 |
83 | )} 84 |
85 |
86 | 87 | {transcriber.progressItems.length > 0 && ( 88 |
89 | 92 | {transcriber.progressItems.map((data) => ( 93 |
94 | 95 |
96 | ))} 97 |
98 | )} 99 |
100 | ); 101 | } 102 | -------------------------------------------------------------------------------- /src/components/AudioRecorder.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect, useRef } from "react"; 2 | import { MicrophoneIcon } from "../assets/icons/MicrophoneIcon"; 3 | import { StopIcon } from "../assets/icons/StopIcon"; 4 | import { formatAudioTimestamp } from "../utils/AudioUtils"; 5 | import { webmFixDuration } from "../utils/BlobFix"; 6 | 7 | function getMimeType() { 8 | const types = [ 9 | "audio/webm", 10 | "audio/mp4", 11 | "audio/ogg", 12 | "audio/wav", 13 | "audio/aac", 14 | ]; 15 | for (let i = 0; i < types.length; i++) { 16 | if (MediaRecorder.isTypeSupported(types[i])) { 17 | return types[i]; 18 | } 19 | } 20 | return undefined; 21 | } 22 | 23 | export default function AudioRecorder(props: { 24 | isRecording: boolean; 25 | onRecordingToggle: (blob?: Blob) => void; 26 | disabled?: boolean; 27 | }) { 28 | const [duration, setDuration] = useState(0); 29 | const { isRecording } = props; 30 | 31 | const streamRef = useRef(null); 32 | const mediaRecorderRef = useRef(null); 33 | const chunksRef = useRef([]); 34 | const startTimeRef = useRef(0); 35 | 36 | // Initialize or stop the media recorder when isRecording changes 37 | useEffect(() => { 38 | const startRecording = async () => { 39 | startTimeRef.current = Date.now(); 40 | chunksRef.current = []; 41 | 42 | try { 43 | if (!streamRef.current) { 44 | streamRef.current = await navigator.mediaDevices.getUserMedia({ 45 | audio: true, 46 | }); 47 | } 48 | 49 | const mimeType = getMimeType(); 50 | const mediaRecorder = new MediaRecorder(streamRef.current, { 51 | mimeType, 52 | }); 53 | 54 | mediaRecorderRef.current = mediaRecorder; 55 | 56 | mediaRecorder.addEventListener("dataavailable", async (event) => { 57 | console.log( 58 | "MediaRecorder data available:", 59 | event.data.size, 60 | "bytes" 61 | ); 62 | if (event.data.size > 0) { 63 | chunksRef.current.push(event.data); 64 | } 65 | }); 66 | 67 | mediaRecorder.addEventListener("stop", async () => { 68 | console.log("MediaRecorder stopped"); 69 | const recordingDuration = Date.now() - startTimeRef.current; 70 | console.log("Recording duration:", recordingDuration, "ms"); 71 | 72 | if (chunksRef.current.length === 0) { 73 | console.error("No audio data recorded"); 74 | props.onRecordingToggle(); 75 | return; 76 | } 77 | 78 | // Received a stop event 79 | let blob = new Blob(chunksRef.current, { type: mimeType }); 80 | console.log("Created blob:", blob.size, "bytes, type:", blob.type); 81 | 82 | if (mimeType === "audio/webm") { 83 | try { 84 | blob = await webmFixDuration(blob, recordingDuration, blob.type); 85 | console.log("Fixed webm duration"); 86 | } catch (error) { 87 | console.error("Error fixing webm duration:", error); 88 | } 89 | } 90 | 91 | props.onRecordingToggle(blob); 92 | }); 93 | 94 | mediaRecorder.start(1000); // Collect data every second for better reliability 95 | setDuration(0); 96 | } catch (error) { 97 | console.error("Error accessing microphone:", error); 98 | props.onRecordingToggle(); // Toggle back to not recording 99 | } 100 | }; 101 | 102 | const stopRecording = () => { 103 | if ( 104 | mediaRecorderRef.current && 105 | mediaRecorderRef.current.state === "recording" 106 | ) { 107 | mediaRecorderRef.current.stop(); // set state to inactive 108 | setDuration(0); 109 | } 110 | }; 111 | 112 | if (isRecording) { 113 | startRecording(); 114 | } else if (mediaRecorderRef.current) { 115 | stopRecording(); 116 | } 117 | 118 | return () => { 119 | if (!isRecording && streamRef.current) { 120 | streamRef.current.getTracks().forEach((track) => track.stop()); 121 | streamRef.current = null; 122 | } 123 | }; 124 | }, [isRecording]); 125 | 126 | // Timer effect for recording duration 127 | useEffect(() => { 128 | if (isRecording) { 129 | const timer = setInterval(() => { 130 | setDuration((prevDuration) => prevDuration + 1); 131 | }, 1000); 132 | 133 | return () => { 134 | clearInterval(timer); 135 | }; 136 | } 137 | }, [isRecording]); 138 | 139 | return ( 140 | 154 | ); 155 | } 156 | -------------------------------------------------------------------------------- /src/components/AudioSection.tsx: -------------------------------------------------------------------------------- 1 | import { AudioManager } from "./AudioManager"; 2 | import { Transcriber } from "../hooks/useTranscriber"; 3 | 4 | interface AudioSectionProps { 5 | transcriber: Transcriber; 6 | onTranscriptReady: (text: string) => void; 7 | isWhisperModelLoading: boolean; 8 | hasPreviousExpression: boolean; 9 | } 10 | 11 | export function AudioSection({ 12 | transcriber, 13 | onTranscriptReady, 14 | isWhisperModelLoading, 15 | hasPreviousExpression 16 | }: AudioSectionProps) { 17 | return ( 18 |
19 |

20 | {hasPreviousExpression ? "Modify Expression" : "Dictate Math Expression"} 21 |

22 | 27 |
28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /src/components/DarkModeToggle.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import { SunIcon } from "../assets/icons/SunIcon"; 3 | import { MoonIcon } from "../assets/icons/MoonIcon"; 4 | 5 | export function DarkModeToggle() { 6 | const [darkMode, setDarkMode] = useState(false); 7 | 8 | // On component mount, check if user has a preference 9 | useEffect(() => { 10 | // Check if user has a preference in localStorage 11 | const savedPreference = localStorage.getItem("darkMode"); 12 | 13 | // If they have a preference, use it 14 | if (savedPreference !== null) { 15 | setDarkMode(savedPreference === "true"); 16 | } 17 | // Otherwise check system preference 18 | else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) { 19 | setDarkMode(true); 20 | } 21 | }, []); 22 | 23 | // Update the document when darkMode changes 24 | useEffect(() => { 25 | if (darkMode) { 26 | document.documentElement.classList.add('dark'); 27 | document.documentElement.style.setProperty('color-scheme', 'dark'); 28 | document.documentElement.style.setProperty('--bg-primary', 'var(--color-dark-bg-primary)'); 29 | document.documentElement.style.setProperty('--bg-secondary', 'var(--color-dark-bg-secondary)'); 30 | document.documentElement.style.setProperty('--text-primary', 'var(--color-dark-text-primary)'); 31 | document.documentElement.style.setProperty('--text-secondary', 'var(--color-dark-text-secondary)'); 32 | document.documentElement.style.setProperty('--border-color', 'var(--color-dark-border)'); 33 | } else { 34 | document.documentElement.classList.remove('dark'); 35 | document.documentElement.style.setProperty('color-scheme', 'light'); 36 | document.documentElement.style.setProperty('--bg-primary', 'white'); 37 | document.documentElement.style.setProperty('--bg-secondary', 'oklch(0.97 0.01 240)'); 38 | document.documentElement.style.setProperty('--text-primary', 'oklch(0.2 0.01 240)'); 39 | document.documentElement.style.setProperty('--text-secondary', 'oklch(0.4 0.01 240)'); 40 | document.documentElement.style.setProperty('--border-color', 'oklch(0.85 0.01 240)'); 41 | } 42 | localStorage.setItem("darkMode", darkMode.toString()); 43 | }, [darkMode]); 44 | 45 | return ( 46 | 57 | ); 58 | } 59 | -------------------------------------------------------------------------------- /src/components/LatexOutput.tsx: -------------------------------------------------------------------------------- 1 | import { ResetIcon } from "../assets/icons/ResetIcon"; 2 | import { CopyButtonIcon } from "../assets/icons/CopyButtonIcon"; 3 | import { CheckmarkIcon } from "../assets/icons/CheckmarkIcon"; 4 | import { LoadingSpinner } from "../assets/icons/LoadingSpinner"; 5 | import LatexRenderer from "./LatexRenderer"; 6 | import { useState } from "react"; 7 | 8 | interface LatexOutputProps { 9 | latexOutput: string; 10 | hasPreviousExpression: boolean; 11 | onReset: () => void; 12 | isLoading?: boolean; 13 | isChangingModel?: boolean; 14 | } 15 | 16 | export function LatexOutput({ 17 | latexOutput, 18 | hasPreviousExpression, 19 | onReset, 20 | isLoading = false, 21 | isChangingModel = false, 22 | }: LatexOutputProps) { 23 | const [copied, setCopied] = useState(false); 24 | 25 | const handleCopy = () => { 26 | navigator.clipboard.writeText(latexOutput); 27 | setCopied(true); 28 | 29 | // Reset after 1.5 seconds 30 | setTimeout(() => { 31 | setCopied(false); 32 | }, 1500); 33 | }; 34 | 35 | return ( 36 |
37 |
38 |

39 | LaTeX Expression 40 |

41 | {hasPreviousExpression && ( 42 | 50 | )} 51 |
52 | 53 | {/* Rendered LaTeX */} 54 |
55 |
56 | {isLoading || isChangingModel ? ( 57 |
58 |
59 | 60 | 61 | {isChangingModel 62 | ? "Loading LLM model..." 63 | : "Converting speech to LaTeX..."} 64 | 65 |
66 |
67 | ) : latexOutput ? ( 68 | 69 | ) : ( 70 |

71 | LaTeX rendering will appear here... 72 |

73 | )} 74 |
75 |
76 | 77 | {/* Raw LaTeX Code */} 78 | {latexOutput && ( 79 |
80 | 95 |
96 |
 97 |               {latexOutput}
 98 |             
99 |
100 |
101 | )} 102 |
103 | ); 104 | } 105 | -------------------------------------------------------------------------------- /src/components/LatexRenderer.tsx: -------------------------------------------------------------------------------- 1 | import { FC } from "react"; 2 | import { MathJax, MathJaxContext } from "better-react-mathjax"; 3 | 4 | const config = { 5 | loader: { load: ["[tex]/html"] }, 6 | tex: { 7 | packages: { "[+]": ["html"] }, 8 | inlineMath: [ 9 | ["$", "$"], 10 | ["\\(", "\\)"], 11 | ], 12 | }, 13 | chtml: { 14 | scale: 2.0, 15 | }, 16 | }; 17 | 18 | interface LatexRendererProps { 19 | latex: string; 20 | } 21 | 22 | const LatexRenderer: FC = ({ latex }) => { 23 | if (!latex) return null; 24 | 25 | try { 26 | // Extract the LaTeX content from any delimiters 27 | let cleanLatex = latex; 28 | 29 | // Remove any existing delimiters 30 | cleanLatex = cleanLatex 31 | .replace(/^\\\(|\\\)$/g, "") // Remove inline delimiters \( \) 32 | .replace(/^\\\[|\\\]$/g, "") // Remove display delimiters \[ \] 33 | .replace(/^\$|\$$/g, "") // Remove $ delimiters 34 | .replace(/^\$\$|\$\$$/g, "") // Remove $$ delimiters 35 | .trim(); 36 | 37 | // Format with proper delimiters for MathJax - always use inline mode 38 | const formattedLatex = `$${cleanLatex}$`; 39 | 40 | return ( 41 |
42 | 43 | 44 | {formattedLatex} 45 | 46 | 47 |
48 | ); 49 | } catch (error) { 50 | console.error("Error rendering LaTeX:", error); 51 | return ( 52 |
53 | Error rendering LaTeX: {(error as Error).message} 54 |
55 | ); 56 | } 57 | }; 58 | 59 | export default LatexRenderer; 60 | -------------------------------------------------------------------------------- /src/components/LoadingIndicator.tsx: -------------------------------------------------------------------------------- 1 | import { LoadingSpinner } from "../assets/icons/LoadingSpinner"; 2 | 3 | interface LoadingIndicatorProps { 4 | isLoading: boolean; 5 | isChangingModel: boolean; 6 | } 7 | 8 | export function LoadingIndicator({ isLoading, isChangingModel }: LoadingIndicatorProps) { 9 | if (!isLoading && !isChangingModel) return null; 10 | 11 | return ( 12 |
13 |
14 | 15 | {isChangingModel 16 | ? "Loading LLM model..." 17 | : "Converting speech to LaTeX..."} 18 |
19 |
20 | ); 21 | } 22 | -------------------------------------------------------------------------------- /src/components/ModelLoader.tsx: -------------------------------------------------------------------------------- 1 | interface ModelLoaderProps { 2 | loadingStatus: string; 3 | modelLoaded: boolean; 4 | } 5 | 6 | export function ModelLoader({ loadingStatus, modelLoaded }: ModelLoaderProps) { 7 | if (modelLoaded) return null; 8 | 9 | return ( 10 |
11 |
12 | {loadingStatus} 13 |
14 |
15 |
26 |
27 |
28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /src/components/ModelSelectionPanel.tsx: -------------------------------------------------------------------------------- 1 | import { ModelSelector } from "./ModelSelector"; 2 | import { DEFAULT_MODELS } from "../utils/Constants"; 3 | 4 | interface ModelSelectionPanelProps { 5 | llmModel: string; 6 | setLlmModel: (model: string) => void; 7 | whisperModel: string; 8 | setWhisperModel: (model: string) => void; 9 | isChangingLLMModel: boolean; 10 | isWhisperModelLoading: boolean; 11 | } 12 | 13 | export function ModelSelectionPanel({ 14 | llmModel, 15 | setLlmModel, 16 | whisperModel, 17 | setWhisperModel, 18 | isChangingLLMModel, 19 | isWhisperModelLoading 20 | }: ModelSelectionPanelProps) { 21 | return ( 22 |
23 |

Model Selection

24 |
25 |
26 | 32 | {/* Group models by family */} 33 | {Object.entries( 34 | DEFAULT_MODELS.reduce((acc, model) => { 35 | const family = model.family; 36 | if (!acc[family]) { 37 | acc[family] = []; 38 | } 39 | acc[family].push(model); 40 | return acc; 41 | }, {} as Record) 42 | ).map(([family, models]) => ( 43 | 44 | {models.map((modelOption) => ( 45 | 52 | ))} 53 | 54 | ))} 55 | 56 |
57 |
58 | 64 | 65 | 66 | 67 | 68 | 69 |
70 |
71 |
72 | ); 73 | } 74 | -------------------------------------------------------------------------------- /src/components/ModelSelector.tsx: -------------------------------------------------------------------------------- 1 | import { ReactNode } from "react"; 2 | 3 | interface ModelSelectorProps { 4 | label: string; 5 | model: string; 6 | onChange: (model: string) => void; 7 | disabled?: boolean; 8 | children: ReactNode; 9 | } 10 | 11 | export function ModelSelector({ 12 | label, 13 | model, 14 | onChange, 15 | disabled = false, 16 | children, 17 | }: ModelSelectorProps) { 18 | return ( 19 |
20 | 21 | 29 |
30 | ); 31 | } 32 | -------------------------------------------------------------------------------- /src/components/Progress.tsx: -------------------------------------------------------------------------------- 1 | export default function Progress({ 2 | text, 3 | percentage, 4 | }: { 5 | text: string; 6 | percentage: number; 7 | }) { 8 | percentage = percentage ?? 0; 9 | return ( 10 |
11 |
15 | {text} ({`${percentage.toFixed(2)}%`}) 16 |
17 |
18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/components/Transcript.tsx: -------------------------------------------------------------------------------- 1 | import { useRef, useEffect } from "react"; 2 | 3 | import { TranscriberData } from "../hooks/useTranscriber"; 4 | import { formatAudioTimestamp } from "../utils/AudioUtils"; 5 | 6 | interface Props { 7 | transcribedData: TranscriberData | undefined; 8 | } 9 | 10 | export default function Transcript({ transcribedData }: Props) { 11 | const divRef = useRef(null); 12 | 13 | const saveBlob = (blob: Blob, filename: string) => { 14 | const url = URL.createObjectURL(blob); 15 | const link = document.createElement("a"); 16 | link.href = url; 17 | link.download = filename; 18 | link.click(); 19 | URL.revokeObjectURL(url); 20 | }; 21 | 22 | const exportTXT = () => { 23 | const chunks = transcribedData?.chunks ?? []; 24 | const text = chunks 25 | .map((chunk) => chunk.text) 26 | .join("") 27 | .trim(); 28 | 29 | const blob = new Blob([text], { type: "text/plain" }); 30 | saveBlob(blob, "transcript.txt"); 31 | }; 32 | 33 | // Scroll to the bottom when the component updates 34 | useEffect(() => { 35 | if (divRef.current) { 36 | divRef.current.scrollTop = divRef.current.scrollHeight; 37 | } 38 | }); 39 | 40 | if (!transcribedData?.chunks?.length) { 41 | return null; 42 | } 43 | 44 | return ( 45 |
46 |

Transcription

47 | 48 |
52 | {transcribedData.chunks.map((chunk, i) => ( 53 |
57 |
58 | {formatAudioTimestamp(chunk.timestamp[0])} 59 |
60 |
{chunk.text}
61 |
62 | ))} 63 |
64 | 65 | {!transcribedData.isBusy && 66 | { 67 | /* Removed buttons as they're no longer needed */ 68 | }} 69 |
70 | ); 71 | } 72 | -------------------------------------------------------------------------------- /src/hooks/useConversation.ts: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import * as webllm from "@mlc-ai/web-llm"; 3 | 4 | export interface ChatMessage { 5 | role: "system" | "user" | "assistant"; 6 | content: string; 7 | } 8 | 9 | export interface ConversationState { 10 | latexOutput: string; 11 | isLoading: boolean; 12 | hasPreviousExpression: boolean; 13 | conversationHistory: ChatMessage[]; 14 | sendToLLM: (text: string) => Promise; 15 | resetConversation: () => void; 16 | } 17 | 18 | export function useConversation(engine: webllm.MLCEngine | null): ConversationState { 19 | const [latexOutput, setLatexOutput] = useState(""); 20 | const [isLoading, setIsLoading] = useState(false); 21 | const [hasPreviousExpression, setHasPreviousExpression] = useState(false); 22 | const [conversationHistory, setConversationHistory] = useState([ 23 | { 24 | role: "system", 25 | content: `You are a LaTeX expression generator. Convert the user's spoken math description into a valid LaTeX expression. Return ONLY the LaTeX code without any explanations, markdown formatting, or backticks. Do not include any text before or after the LaTeX expression. Do not include any $ symbols.`, 26 | }, 27 | ]); 28 | 29 | // Send text to LLM 30 | const sendToLLM = async (text: string) => { 31 | if (!text.trim() || !engine) return; 32 | 33 | setIsLoading(true); 34 | 35 | try { 36 | console.log("Sending to LLM:", text); 37 | 38 | // Create the user message based on whether we're creating a new expression or modifying an existing one 39 | const userMessage: ChatMessage = { 40 | role: "user", 41 | content: hasPreviousExpression 42 | ? `Modify the previous LaTeX expression based on this instruction: "${text.trim()}"` 43 | : `Convert this math description to LaTeX: "${text.trim()}"`, 44 | }; 45 | 46 | // Add the user message to conversation history 47 | const updatedHistory = [...conversationHistory, userMessage]; 48 | setConversationHistory(updatedHistory); 49 | 50 | // Use streaming for better user experience 51 | const chunks = await engine.chat.completions.create({ 52 | messages: updatedHistory, 53 | temperature: 0.3, 54 | stream: true, 55 | }); 56 | 57 | let fullResponse = ""; 58 | for await (const chunk of chunks) { 59 | const content = chunk.choices[0]?.delta.content || ""; 60 | fullResponse += content; 61 | setLatexOutput(fullResponse); 62 | } 63 | 64 | // Add the assistant's response to the conversation history 65 | setConversationHistory([ 66 | ...updatedHistory, 67 | { role: "assistant", content: fullResponse }, 68 | ]); 69 | 70 | // Now we have a previous expression 71 | setHasPreviousExpression(true); 72 | 73 | console.log("LaTeX generation complete"); 74 | } catch (error) { 75 | console.error("Error generating LaTeX:", error); 76 | } finally { 77 | setIsLoading(false); 78 | } 79 | }; 80 | 81 | // Reset the conversation and latex output 82 | const resetConversation = () => { 83 | setLatexOutput(""); 84 | setHasPreviousExpression(false); 85 | setConversationHistory([ 86 | { 87 | role: "system", 88 | content: `You are a LaTeX expression generator. Convert the user's spoken math description into a valid LaTeX expression. Return ONLY the LaTeX code without any explanations, markdown formatting, or backticks. Do not include any text before or after the LaTeX expression. Do not include any $ symbols.`, 89 | }, 90 | ]); 91 | }; 92 | 93 | return { 94 | latexOutput, 95 | isLoading, 96 | hasPreviousExpression, 97 | conversationHistory, 98 | sendToLLM, 99 | resetConversation 100 | }; 101 | } 102 | -------------------------------------------------------------------------------- /src/hooks/useLLMEngine.ts: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import * as webllm from "@mlc-ai/web-llm"; 3 | import { DEFAULT_MODELS } from "../utils/Constants"; 4 | 5 | export interface LLMEngineState { 6 | engine: webllm.MLCEngine | null; 7 | loadingStatus: string; 8 | modelLoaded: boolean; 9 | isChangingModel: boolean; 10 | } 11 | 12 | export function useLLMEngine(modelName: string): LLMEngineState { 13 | const [engine, setEngine] = useState(null); 14 | const [loadingStatus, setLoadingStatus] = useState("Loading model..."); 15 | const [modelLoaded, setModelLoaded] = useState(false); 16 | const [isChangingModel, setIsChangingModel] = useState(false); 17 | 18 | useEffect(() => { 19 | async function initializeEngine() { 20 | try { 21 | setLoadingStatus("Initializing WebLLM engine..."); 22 | setModelLoaded(false); 23 | setIsChangingModel(true); 24 | 25 | // Find the selected model in DEFAULT_MODELS 26 | const selectedModel = DEFAULT_MODELS.find( 27 | (model) => model.name === modelName 28 | ); 29 | 30 | if (!selectedModel) { 31 | throw new Error(`Model ${modelName} not found in available models`); 32 | } 33 | 34 | console.log("Loading model:", selectedModel.name); 35 | 36 | // Track the highest progress value we've seen 37 | let highestProgress = 0; 38 | 39 | // Use the model directly without custom appConfig 40 | const newEngine = await webllm.CreateMLCEngine(selectedModel.name, { 41 | initProgressCallback: (progress) => { 42 | // Only update if the new progress is higher than what we've seen before 43 | if (progress.progress > highestProgress) { 44 | highestProgress = progress.progress; 45 | setLoadingStatus( 46 | `Loading model (only slow first time): ${Math.round( 47 | highestProgress * 100 48 | )}%` 49 | ); 50 | } else { 51 | // If progress seems to go backward, just report the stage without percentage 52 | setLoadingStatus(`${progress.text || "Processing..."}`); 53 | } 54 | }, 55 | }); 56 | 57 | setEngine(newEngine); 58 | setModelLoaded(true); 59 | setLoadingStatus("Model loaded successfully!"); 60 | } catch (error) { 61 | console.error("Failed to initialize WebLLM engine:", error); 62 | setLoadingStatus(`Error loading model: ${error}`); 63 | } finally { 64 | setIsChangingModel(false); 65 | } 66 | } 67 | 68 | initializeEngine(); 69 | }, [modelName]); 70 | 71 | return { 72 | engine, 73 | loadingStatus, 74 | modelLoaded, 75 | isChangingModel, 76 | }; 77 | } 78 | -------------------------------------------------------------------------------- /src/hooks/useTranscriber.ts: -------------------------------------------------------------------------------- 1 | import { useCallback, useMemo, useState } from "react"; 2 | import { useWorker } from "./useWorker"; 3 | import Constants from "../utils/Constants"; 4 | 5 | interface ProgressItem { 6 | file: string; 7 | loaded: number; 8 | progress: number; 9 | total: number; 10 | name: string; 11 | status: string; 12 | } 13 | 14 | interface TranscriberUpdateData { 15 | data: [ 16 | string, 17 | { chunks: { text: string; timestamp: [number, number | null] }[] }, 18 | ]; 19 | text: string; 20 | } 21 | 22 | interface TranscriberCompleteData { 23 | data: { 24 | text: string; 25 | chunks: { text: string; timestamp: [number, number | null] }[]; 26 | }; 27 | } 28 | 29 | export interface TranscriberData { 30 | isBusy: boolean; 31 | text: string; 32 | chunks: { text: string; timestamp: [number, number | null] }[]; 33 | } 34 | 35 | export interface Transcriber { 36 | onInputChange: () => void; 37 | isBusy: boolean; 38 | isModelLoading: boolean; 39 | progressItems: ProgressItem[]; 40 | start: (audioData: AudioBuffer | undefined) => void; 41 | output?: TranscriberData; 42 | model: string; 43 | setModel: (model: string) => void; 44 | multilingual: boolean; 45 | setMultilingual: (model: boolean) => void; 46 | quantized: boolean; 47 | setQuantized: (model: boolean) => void; 48 | subtask: string; 49 | setSubtask: (subtask: string) => void; 50 | language?: string; 51 | setLanguage: (language: string) => void; 52 | } 53 | 54 | export function useTranscriber(): Transcriber { 55 | const [transcript, setTranscript] = useState( 56 | undefined, 57 | ); 58 | const [isBusy, setIsBusy] = useState(false); 59 | const [isModelLoading, setIsModelLoading] = useState(false); 60 | 61 | const [progressItems, setProgressItems] = useState([]); 62 | 63 | const webWorker = useWorker((event) => { 64 | const message = event.data; 65 | // Update the state with the result 66 | switch (message.status) { 67 | case "progress": 68 | // Model file progress: update one of the progress items. 69 | setProgressItems((prev) => 70 | prev.map((item) => { 71 | if (item.file === message.file) { 72 | return { ...item, progress: message.progress }; 73 | } 74 | return item; 75 | }), 76 | ); 77 | break; 78 | case "update": 79 | // Received partial update 80 | // console.log("update", message); 81 | // eslint-disable-next-line no-case-declarations 82 | const updateMessage = message as TranscriberUpdateData; 83 | setTranscript({ 84 | isBusy: true, 85 | text: updateMessage.data[0], 86 | chunks: updateMessage.data[1].chunks, 87 | }); 88 | break; 89 | case "complete": 90 | // Received complete transcript 91 | // console.log("complete", message); 92 | // eslint-disable-next-line no-case-declarations 93 | const completeMessage = message as TranscriberCompleteData; 94 | setTranscript({ 95 | isBusy: false, 96 | text: completeMessage.data.text, 97 | chunks: completeMessage.data.chunks, 98 | }); 99 | setIsBusy(false); 100 | break; 101 | 102 | case "initiate": 103 | // Model file start load: add a new progress item to the list. 104 | setIsModelLoading(true); 105 | setProgressItems((prev) => [...prev, message]); 106 | break; 107 | case "ready": 108 | setIsModelLoading(false); 109 | break; 110 | case "error": 111 | setIsBusy(false); 112 | alert( 113 | `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`, 114 | ); 115 | break; 116 | case "done": 117 | // Model file loaded: remove the progress item from the list. 118 | setProgressItems((prev) => 119 | prev.filter((item) => item.file !== message.file), 120 | ); 121 | break; 122 | 123 | default: 124 | // initiate/download/done 125 | break; 126 | } 127 | }); 128 | 129 | const [model, setModel] = useState(Constants.DEFAULT_MODEL); 130 | const [subtask, setSubtask] = useState(Constants.DEFAULT_SUBTASK); 131 | const [quantized, setQuantized] = useState( 132 | Constants.DEFAULT_QUANTIZED, 133 | ); 134 | const [multilingual, setMultilingual] = useState( 135 | Constants.DEFAULT_MULTILINGUAL, 136 | ); 137 | const [language, setLanguage] = useState( 138 | Constants.DEFAULT_LANGUAGE, 139 | ); 140 | 141 | const onInputChange = useCallback(() => { 142 | setTranscript(undefined); 143 | }, []); 144 | 145 | const postRequest = useCallback( 146 | async (audioData: AudioBuffer | undefined) => { 147 | if (audioData) { 148 | setTranscript(undefined); 149 | setIsBusy(true); 150 | 151 | let audio; 152 | if (audioData.numberOfChannels === 2) { 153 | const SCALING_FACTOR = Math.sqrt(2); 154 | 155 | let left = audioData.getChannelData(0); 156 | let right = audioData.getChannelData(1); 157 | 158 | audio = new Float32Array(left.length); 159 | for (let i = 0; i < audioData.length; ++i) { 160 | audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2; 161 | } 162 | } else { 163 | // If the audio is not stereo, we can just use the first channel: 164 | audio = audioData.getChannelData(0); 165 | } 166 | 167 | webWorker.postMessage({ 168 | audio, 169 | model, 170 | multilingual, 171 | quantized, 172 | subtask: multilingual ? subtask : null, 173 | language: 174 | multilingual && language !== "auto" ? language : null, 175 | }); 176 | } 177 | }, 178 | [webWorker, model, multilingual, quantized, subtask, language], 179 | ); 180 | 181 | const transcriber = useMemo(() => { 182 | return { 183 | onInputChange, 184 | isBusy, 185 | isModelLoading, 186 | progressItems, 187 | start: postRequest, 188 | output: transcript, 189 | model, 190 | setModel, 191 | multilingual, 192 | setMultilingual, 193 | quantized, 194 | setQuantized, 195 | subtask, 196 | setSubtask, 197 | language, 198 | setLanguage, 199 | }; 200 | }, [ 201 | isBusy, 202 | isModelLoading, 203 | progressItems, 204 | postRequest, 205 | transcript, 206 | model, 207 | multilingual, 208 | quantized, 209 | subtask, 210 | language, 211 | ]); 212 | 213 | return transcriber; 214 | } 215 | -------------------------------------------------------------------------------- /src/hooks/useWorker.ts: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | 3 | export interface MessageEventHandler { 4 | (event: MessageEvent): void; 5 | } 6 | 7 | export function useWorker(messageEventHandler: MessageEventHandler): Worker { 8 | // Create new worker once and never again 9 | const [worker] = useState(() => createWorker(messageEventHandler)); 10 | return worker; 11 | } 12 | 13 | function createWorker(messageEventHandler: MessageEventHandler): Worker { 14 | const worker = new Worker(new URL("../whisper-worker.js", import.meta.url), { 15 | type: "module", 16 | }); 17 | // Listen for messages from the Web Worker 18 | worker.addEventListener("message", messageEventHandler); 19 | return worker; 20 | } 21 | -------------------------------------------------------------------------------- /src/index.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | @theme { 4 | /* Dark mode colors */ 5 | --color-dark-bg-primary: oklch(0.15 0.02 240); 6 | --color-dark-bg-secondary: oklch(0.2 0.02 240); 7 | --color-dark-text-primary: oklch(0.95 0.02 240); 8 | --color-dark-text-secondary: oklch(0.85 0.02 240); 9 | --color-dark-border: oklch(0.3 0.02 240); 10 | 11 | /* Light mode accent colors that work well in both modes */ 12 | --color-accent-blue: oklch(0.65 0.2 250); 13 | --color-accent-blue-light: oklch(0.85 0.1 250); 14 | --color-accent-green: oklch(0.65 0.2 150); 15 | --color-accent-green-light: oklch(0.85 0.1 150); 16 | --color-accent-purple: oklch(0.6 0.18 300); 17 | --color-accent-purple-light: oklch(0.8 0.1 300); 18 | --color-accent-teal: oklch(0.7 0.15 200); 19 | --color-accent-teal-light: oklch(0.85 0.08 200); 20 | } 21 | 22 | :root { 23 | font-family: "Inter", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", 24 | Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; 25 | line-height: 1.5; 26 | font-weight: 400; 27 | 28 | /* Default light mode variables */ 29 | --bg-primary: white; 30 | --bg-secondary: oklch(0.97 0.01 240); 31 | --text-primary: oklch(0.2 0.01 240); 32 | --text-secondary: oklch(0.4 0.01 240); 33 | --border-color: oklch(0.85 0.01 240); 34 | 35 | font-synthesis: none; 36 | text-rendering: optimizeLegibility; 37 | -webkit-font-smoothing: antialiased; 38 | -moz-osx-font-smoothing: grayscale; 39 | } 40 | 41 | a { 42 | font-weight: 500; 43 | color: var(--color-accent-blue); 44 | text-decoration: inherit; 45 | } 46 | a:hover { 47 | opacity: 0.8; 48 | } 49 | 50 | body { 51 | margin: 0; 52 | display: flex; 53 | justify-content: center; 54 | min-width: 320px; 55 | min-height: 100vh; 56 | background-color: var(--bg-primary); 57 | color: var(--text-primary); 58 | /* Remove transition for instant mode switching */ 59 | overflow-x: hidden; 60 | } 61 | 62 | #root { 63 | width: 100%; 64 | display: flex; 65 | justify-content: center; 66 | } 67 | 68 | h1 { 69 | font-size: 2.5em; 70 | line-height: 1.1; 71 | } 72 | 73 | @media (prefers-color-scheme: dark) { 74 | :root { 75 | color-scheme: dark; 76 | --bg-primary: var(--color-dark-bg-primary); 77 | --bg-secondary: var(--color-dark-bg-secondary); 78 | --text-primary: var(--color-dark-text-primary); 79 | --text-secondary: var(--color-dark-text-secondary); 80 | --border-color: var(--color-dark-border); 81 | } 82 | } 83 | 84 | :root.dark { 85 | color-scheme: dark; 86 | --bg-primary: var(--color-dark-bg-primary); 87 | --bg-secondary: var(--color-dark-bg-secondary); 88 | --text-primary: var(--color-dark-text-primary); 89 | --text-secondary: var(--color-dark-text-secondary); 90 | --border-color: var(--color-dark-border); 91 | } 92 | -------------------------------------------------------------------------------- /src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react' 2 | import { createRoot } from 'react-dom/client' 3 | import './index.css' 4 | import App from './App.tsx' 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /src/utils/AudioUtils.ts: -------------------------------------------------------------------------------- 1 | function padTime(time: number) { 2 | return String(time).padStart(2, "0"); 3 | } 4 | 5 | export function formatAudioTimestamp(time: number) { 6 | const hours = (time / (60 * 60)) | 0; 7 | time -= hours * (60 * 60); 8 | const minutes = (time / 60) | 0; 9 | time -= minutes * 60; 10 | const seconds = time | 0; 11 | return `${hours ? padTime(hours) + ":" : ""}${padTime(minutes)}:${padTime( 12 | seconds 13 | )}`; 14 | } 15 | -------------------------------------------------------------------------------- /src/utils/BlobFix.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * There is a bug where `navigator.mediaDevices.getUserMedia` + `MediaRecorder` 3 | * creates WEBM files without duration metadata. See: 4 | * - https://bugs.chromium.org/p/chromium/issues/detail?id=642012 5 | * - https://stackoverflow.com/a/39971175/13989043 6 | * 7 | * This file contains a function that fixes the duration metadata of a WEBM file. 8 | * - Answer found: https://stackoverflow.com/a/75218309/13989043 9 | * - Code adapted from: https://github.com/mat-sz/webm-fix-duration 10 | * (forked from https://github.com/yusitnikov/fix-webm-duration) 11 | */ 12 | 13 | /* 14 | * This is the list of possible WEBM file sections by their IDs. 15 | * Possible types: Container, Binary, Uint, Int, String, Float, Date 16 | */ 17 | interface Section { 18 | name: string; 19 | type: string; 20 | } 21 | 22 | const sections: Record = { 23 | 0xa45dfa3: { name: "EBML", type: "Container" }, 24 | 0x286: { name: "EBMLVersion", type: "Uint" }, 25 | 0x2f7: { name: "EBMLReadVersion", type: "Uint" }, 26 | 0x2f2: { name: "EBMLMaxIDLength", type: "Uint" }, 27 | 0x2f3: { name: "EBMLMaxSizeLength", type: "Uint" }, 28 | 0x282: { name: "DocType", type: "String" }, 29 | 0x287: { name: "DocTypeVersion", type: "Uint" }, 30 | 0x285: { name: "DocTypeReadVersion", type: "Uint" }, 31 | 0x6c: { name: "Void", type: "Binary" }, 32 | 0x3f: { name: "CRC-32", type: "Binary" }, 33 | 0xb538667: { name: "SignatureSlot", type: "Container" }, 34 | 0x3e8a: { name: "SignatureAlgo", type: "Uint" }, 35 | 0x3e9a: { name: "SignatureHash", type: "Uint" }, 36 | 0x3ea5: { name: "SignaturePublicKey", type: "Binary" }, 37 | 0x3eb5: { name: "Signature", type: "Binary" }, 38 | 0x3e5b: { name: "SignatureElements", type: "Container" }, 39 | 0x3e7b: { name: "SignatureElementList", type: "Container" }, 40 | 0x2532: { name: "SignedElement", type: "Binary" }, 41 | 0x8538067: { name: "Segment", type: "Container" }, 42 | 0x14d9b74: { name: "SeekHead", type: "Container" }, 43 | 0xdbb: { name: "Seek", type: "Container" }, 44 | 0x13ab: { name: "SeekID", type: "Binary" }, 45 | 0x13ac: { name: "SeekPosition", type: "Uint" }, 46 | 0x549a966: { name: "Info", type: "Container" }, 47 | 0x33a4: { name: "SegmentUID", type: "Binary" }, 48 | 0x3384: { name: "SegmentFilename", type: "String" }, 49 | 0x1cb923: { name: "PrevUID", type: "Binary" }, 50 | 0x1c83ab: { name: "PrevFilename", type: "String" }, 51 | 0x1eb923: { name: "NextUID", type: "Binary" }, 52 | 0x1e83bb: { name: "NextFilename", type: "String" }, 53 | 0x444: { name: "SegmentFamily", type: "Binary" }, 54 | 0x2924: { name: "ChapterTranslate", type: "Container" }, 55 | 0x29fc: { name: "ChapterTranslateEditionUID", type: "Uint" }, 56 | 0x29bf: { name: "ChapterTranslateCodec", type: "Uint" }, 57 | 0x29a5: { name: "ChapterTranslateID", type: "Binary" }, 58 | 0xad7b1: { name: "TimecodeScale", type: "Uint" }, 59 | 0x489: { name: "Duration", type: "Float" }, 60 | 0x461: { name: "DateUTC", type: "Date" }, 61 | 0x3ba9: { name: "Title", type: "String" }, 62 | 0xd80: { name: "MuxingApp", type: "String" }, 63 | 0x1741: { name: "WritingApp", type: "String" }, 64 | // 0xf43b675: { name: 'Cluster', type: 'Container' }, 65 | 0x67: { name: "Timecode", type: "Uint" }, 66 | 0x1854: { name: "SilentTracks", type: "Container" }, 67 | 0x18d7: { name: "SilentTrackNumber", type: "Uint" }, 68 | 0x27: { name: "Position", type: "Uint" }, 69 | 0x2b: { name: "PrevSize", type: "Uint" }, 70 | 0x23: { name: "SimpleBlock", type: "Binary" }, 71 | 0x20: { name: "BlockGroup", type: "Container" }, 72 | 0x21: { name: "Block", type: "Binary" }, 73 | 0x22: { name: "BlockVirtual", type: "Binary" }, 74 | 0x35a1: { name: "BlockAdditions", type: "Container" }, 75 | 0x26: { name: "BlockMore", type: "Container" }, 76 | 0x6e: { name: "BlockAddID", type: "Uint" }, 77 | 0x25: { name: "BlockAdditional", type: "Binary" }, 78 | 0x1b: { name: "BlockDuration", type: "Uint" }, 79 | 0x7a: { name: "ReferencePriority", type: "Uint" }, 80 | 0x7b: { name: "ReferenceBlock", type: "Int" }, 81 | 0x7d: { name: "ReferenceVirtual", type: "Int" }, 82 | 0x24: { name: "CodecState", type: "Binary" }, 83 | 0x35a2: { name: "DiscardPadding", type: "Int" }, 84 | 0xe: { name: "Slices", type: "Container" }, 85 | 0x68: { name: "TimeSlice", type: "Container" }, 86 | 0x4c: { name: "LaceNumber", type: "Uint" }, 87 | 0x4d: { name: "FrameNumber", type: "Uint" }, 88 | 0x4b: { name: "BlockAdditionID", type: "Uint" }, 89 | 0x4e: { name: "Delay", type: "Uint" }, 90 | 0x4f: { name: "SliceDuration", type: "Uint" }, 91 | 0x48: { name: "ReferenceFrame", type: "Container" }, 92 | 0x49: { name: "ReferenceOffset", type: "Uint" }, 93 | 0x4a: { name: "ReferenceTimeCode", type: "Uint" }, 94 | 0x2f: { name: "EncryptedBlock", type: "Binary" }, 95 | 0x654ae6b: { name: "Tracks", type: "Container" }, 96 | 0x2e: { name: "TrackEntry", type: "Container" }, 97 | 0x57: { name: "TrackNumber", type: "Uint" }, 98 | 0x33c5: { name: "TrackUID", type: "Uint" }, 99 | 0x3: { name: "TrackType", type: "Uint" }, 100 | 0x39: { name: "FlagEnabled", type: "Uint" }, 101 | 0x8: { name: "FlagDefault", type: "Uint" }, 102 | 0x15aa: { name: "FlagForced", type: "Uint" }, 103 | 0x1c: { name: "FlagLacing", type: "Uint" }, 104 | 0x2de7: { name: "MinCache", type: "Uint" }, 105 | 0x2df8: { name: "MaxCache", type: "Uint" }, 106 | 0x3e383: { name: "DefaultDuration", type: "Uint" }, 107 | 0x34e7a: { name: "DefaultDecodedFieldDuration", type: "Uint" }, 108 | 0x3314f: { name: "TrackTimecodeScale", type: "Float" }, 109 | 0x137f: { name: "TrackOffset", type: "Int" }, 110 | 0x15ee: { name: "MaxBlockAdditionID", type: "Uint" }, 111 | 0x136e: { name: "Name", type: "String" }, 112 | 0x2b59c: { name: "Language", type: "String" }, 113 | 0x6: { name: "CodecID", type: "String" }, 114 | 0x23a2: { name: "CodecPrivate", type: "Binary" }, 115 | 0x58688: { name: "CodecName", type: "String" }, 116 | 0x3446: { name: "AttachmentLink", type: "Uint" }, 117 | 0x1a9697: { name: "CodecSettings", type: "String" }, 118 | 0x1b4040: { name: "CodecInfoURL", type: "String" }, 119 | 0x6b240: { name: "CodecDownloadURL", type: "String" }, 120 | 0x2a: { name: "CodecDecodeAll", type: "Uint" }, 121 | 0x2fab: { name: "TrackOverlay", type: "Uint" }, 122 | 0x16aa: { name: "CodecDelay", type: "Uint" }, 123 | 0x16bb: { name: "SeekPreRoll", type: "Uint" }, 124 | 0x2624: { name: "TrackTranslate", type: "Container" }, 125 | 0x26fc: { name: "TrackTranslateEditionUID", type: "Uint" }, 126 | 0x26bf: { name: "TrackTranslateCodec", type: "Uint" }, 127 | 0x26a5: { name: "TrackTranslateTrackID", type: "Binary" }, 128 | 0x60: { name: "Video", type: "Container" }, 129 | 0x1a: { name: "FlagInterlaced", type: "Uint" }, 130 | 0x13b8: { name: "StereoMode", type: "Uint" }, 131 | 0x13c0: { name: "AlphaMode", type: "Uint" }, 132 | 0x13b9: { name: "OldStereoMode", type: "Uint" }, 133 | 0x30: { name: "PixelWidth", type: "Uint" }, 134 | 0x3a: { name: "PixelHeight", type: "Uint" }, 135 | 0x14aa: { name: "PixelCropBottom", type: "Uint" }, 136 | 0x14bb: { name: "PixelCropTop", type: "Uint" }, 137 | 0x14cc: { name: "PixelCropLeft", type: "Uint" }, 138 | 0x14dd: { name: "PixelCropRight", type: "Uint" }, 139 | 0x14b0: { name: "DisplayWidth", type: "Uint" }, 140 | 0x14ba: { name: "DisplayHeight", type: "Uint" }, 141 | 0x14b2: { name: "DisplayUnit", type: "Uint" }, 142 | 0x14b3: { name: "AspectRatioType", type: "Uint" }, 143 | 0xeb524: { name: "ColourSpace", type: "Binary" }, 144 | 0xfb523: { name: "GammaValue", type: "Float" }, 145 | 0x383e3: { name: "FrameRate", type: "Float" }, 146 | 0x61: { name: "Audio", type: "Container" }, 147 | 0x35: { name: "SamplingFrequency", type: "Float" }, 148 | 0x38b5: { name: "OutputSamplingFrequency", type: "Float" }, 149 | 0x1f: { name: "Channels", type: "Uint" }, 150 | 0x3d7b: { name: "ChannelPositions", type: "Binary" }, 151 | 0x2264: { name: "BitDepth", type: "Uint" }, 152 | 0x62: { name: "TrackOperation", type: "Container" }, 153 | 0x63: { name: "TrackCombinePlanes", type: "Container" }, 154 | 0x64: { name: "TrackPlane", type: "Container" }, 155 | 0x65: { name: "TrackPlaneUID", type: "Uint" }, 156 | 0x66: { name: "TrackPlaneType", type: "Uint" }, 157 | 0x69: { name: "TrackJoinBlocks", type: "Container" }, 158 | 0x6d: { name: "TrackJoinUID", type: "Uint" }, 159 | 0x40: { name: "TrickTrackUID", type: "Uint" }, 160 | 0x41: { name: "TrickTrackSegmentUID", type: "Binary" }, 161 | 0x46: { name: "TrickTrackFlag", type: "Uint" }, 162 | 0x47: { name: "TrickMasterTrackUID", type: "Uint" }, 163 | 0x44: { name: "TrickMasterTrackSegmentUID", type: "Binary" }, 164 | 0x2d80: { name: "ContentEncodings", type: "Container" }, 165 | 0x2240: { name: "ContentEncoding", type: "Container" }, 166 | 0x1031: { name: "ContentEncodingOrder", type: "Uint" }, 167 | 0x1032: { name: "ContentEncodingScope", type: "Uint" }, 168 | 0x1033: { name: "ContentEncodingType", type: "Uint" }, 169 | 0x1034: { name: "ContentCompression", type: "Container" }, 170 | 0x254: { name: "ContentCompAlgo", type: "Uint" }, 171 | 0x255: { name: "ContentCompSettings", type: "Binary" }, 172 | 0x1035: { name: "ContentEncryption", type: "Container" }, 173 | 0x7e1: { name: "ContentEncAlgo", type: "Uint" }, 174 | 0x7e2: { name: "ContentEncKeyID", type: "Binary" }, 175 | 0x7e3: { name: "ContentSignature", type: "Binary" }, 176 | 0x7e4: { name: "ContentSigKeyID", type: "Binary" }, 177 | 0x7e5: { name: "ContentSigAlgo", type: "Uint" }, 178 | 0x7e6: { name: "ContentSigHashAlgo", type: "Uint" }, 179 | 0xc53bb6b: { name: "Cues", type: "Container" }, 180 | 0x3b: { name: "CuePoint", type: "Container" }, 181 | 0x33: { name: "CueTime", type: "Uint" }, 182 | 0x37: { name: "CueTrackPositions", type: "Container" }, 183 | 0x77: { name: "CueTrack", type: "Uint" }, 184 | 0x71: { name: "CueClusterPosition", type: "Uint" }, 185 | 0x70: { name: "CueRelativePosition", type: "Uint" }, 186 | 0x32: { name: "CueDuration", type: "Uint" }, 187 | 0x1378: { name: "CueBlockNumber", type: "Uint" }, 188 | 0x6a: { name: "CueCodecState", type: "Uint" }, 189 | 0x5b: { name: "CueReference", type: "Container" }, 190 | 0x16: { name: "CueRefTime", type: "Uint" }, 191 | 0x17: { name: "CueRefCluster", type: "Uint" }, 192 | 0x135f: { name: "CueRefNumber", type: "Uint" }, 193 | 0x6b: { name: "CueRefCodecState", type: "Uint" }, 194 | 0x941a469: { name: "Attachments", type: "Container" }, 195 | 0x21a7: { name: "AttachedFile", type: "Container" }, 196 | 0x67e: { name: "FileDescription", type: "String" }, 197 | 0x66e: { name: "FileName", type: "String" }, 198 | 0x660: { name: "FileMimeType", type: "String" }, 199 | 0x65c: { name: "FileData", type: "Binary" }, 200 | 0x6ae: { name: "FileUID", type: "Uint" }, 201 | 0x675: { name: "FileReferral", type: "Binary" }, 202 | 0x661: { name: "FileUsedStartTime", type: "Uint" }, 203 | 0x662: { name: "FileUsedEndTime", type: "Uint" }, 204 | 0x43a770: { name: "Chapters", type: "Container" }, 205 | 0x5b9: { name: "EditionEntry", type: "Container" }, 206 | 0x5bc: { name: "EditionUID", type: "Uint" }, 207 | 0x5bd: { name: "EditionFlagHidden", type: "Uint" }, 208 | 0x5db: { name: "EditionFlagDefault", type: "Uint" }, 209 | 0x5dd: { name: "EditionFlagOrdered", type: "Uint" }, 210 | 0x36: { name: "ChapterAtom", type: "Container" }, 211 | 0x33c4: { name: "ChapterUID", type: "Uint" }, 212 | 0x1654: { name: "ChapterStringUID", type: "String" }, 213 | 0x11: { name: "ChapterTimeStart", type: "Uint" }, 214 | 0x12: { name: "ChapterTimeEnd", type: "Uint" }, 215 | 0x18: { name: "ChapterFlagHidden", type: "Uint" }, 216 | 0x598: { name: "ChapterFlagEnabled", type: "Uint" }, 217 | 0x2e67: { name: "ChapterSegmentUID", type: "Binary" }, 218 | 0x2ebc: { name: "ChapterSegmentEditionUID", type: "Uint" }, 219 | 0x23c3: { name: "ChapterPhysicalEquiv", type: "Uint" }, 220 | 0xf: { name: "ChapterTrack", type: "Container" }, 221 | 0x9: { name: "ChapterTrackNumber", type: "Uint" }, 222 | 0x0: { name: "ChapterDisplay", type: "Container" }, 223 | 0x5: { name: "ChapString", type: "String" }, 224 | 0x37c: { name: "ChapLanguage", type: "String" }, 225 | 0x37e: { name: "ChapCountry", type: "String" }, 226 | 0x2944: { name: "ChapProcess", type: "Container" }, 227 | 0x2955: { name: "ChapProcessCodecID", type: "Uint" }, 228 | 0x50d: { name: "ChapProcessPrivate", type: "Binary" }, 229 | 0x2911: { name: "ChapProcessCommand", type: "Container" }, 230 | 0x2922: { name: "ChapProcessTime", type: "Uint" }, 231 | 0x2933: { name: "ChapProcessData", type: "Binary" }, 232 | 0x254c367: { name: "Tags", type: "Container" }, 233 | 0x3373: { name: "Tag", type: "Container" }, 234 | 0x23c0: { name: "Targets", type: "Container" }, 235 | 0x28ca: { name: "TargetTypeValue", type: "Uint" }, 236 | 0x23ca: { name: "TargetType", type: "String" }, 237 | 0x23c5: { name: "TagTrackUID", type: "Uint" }, 238 | 0x23c9: { name: "TagEditionUID", type: "Uint" }, 239 | 0x23c4: { name: "TagChapterUID", type: "Uint" }, 240 | 0x23c6: { name: "TagAttachmentUID", type: "Uint" }, 241 | 0x27c8: { name: "SimpleTag", type: "Container" }, 242 | 0x5a3: { name: "TagName", type: "String" }, 243 | 0x47a: { name: "TagLanguage", type: "String" }, 244 | 0x484: { name: "TagDefault", type: "Uint" }, 245 | 0x487: { name: "TagString", type: "String" }, 246 | 0x485: { name: "TagBinary", type: "Binary" }, 247 | }; 248 | 249 | class WebmBase { 250 | source?: Uint8Array; 251 | data?: T; 252 | 253 | constructor(private name = "Unknown", private type = "Unknown") {} 254 | 255 | updateBySource() {} 256 | 257 | setSource(source: Uint8Array) { 258 | this.source = source; 259 | this.updateBySource(); 260 | } 261 | 262 | updateByData() {} 263 | 264 | setData(data: T) { 265 | this.data = data; 266 | this.updateByData(); 267 | } 268 | } 269 | 270 | class WebmUint extends WebmBase { 271 | constructor(name: string, type: string) { 272 | super(name, type || "Uint"); 273 | } 274 | 275 | updateBySource() { 276 | // use hex representation of a number instead of number value 277 | this.data = ""; 278 | for (let i = 0; i < this.source!.length; i++) { 279 | const hex = this.source![i].toString(16); 280 | this.data += padHex(hex); 281 | } 282 | } 283 | 284 | updateByData() { 285 | const length = this.data!.length / 2; 286 | this.source = new Uint8Array(length); 287 | for (let i = 0; i < length; i++) { 288 | const hex = this.data!.substr(i * 2, 2); 289 | this.source[i] = parseInt(hex, 16); 290 | } 291 | } 292 | 293 | getValue() { 294 | return parseInt(this.data!, 16); 295 | } 296 | 297 | setValue(value: number) { 298 | this.setData(padHex(value.toString(16))); 299 | } 300 | } 301 | 302 | function padHex(hex: string) { 303 | return hex.length % 2 === 1 ? "0" + hex : hex; 304 | } 305 | 306 | class WebmFloat extends WebmBase { 307 | constructor(name: string, type: string) { 308 | super(name, type || "Float"); 309 | } 310 | 311 | getFloatArrayType() { 312 | return this.source && this.source.length === 4 313 | ? Float32Array 314 | : Float64Array; 315 | } 316 | updateBySource() { 317 | const byteArray = this.source!.reverse(); 318 | const floatArrayType = this.getFloatArrayType(); 319 | const floatArray = new floatArrayType(byteArray.buffer); 320 | this.data! = floatArray[0]; 321 | } 322 | updateByData() { 323 | const floatArrayType = this.getFloatArrayType(); 324 | const floatArray = new floatArrayType([this.data!]); 325 | const byteArray = new Uint8Array(floatArray.buffer); 326 | this.source = byteArray.reverse(); 327 | } 328 | getValue() { 329 | return this.data; 330 | } 331 | setValue(value: number) { 332 | this.setData(value); 333 | } 334 | } 335 | 336 | interface ContainerData { 337 | id: number; 338 | idHex?: string; 339 | data: WebmBase; 340 | } 341 | 342 | class WebmContainer extends WebmBase { 343 | offset: number = 0; 344 | data: ContainerData[] = []; 345 | 346 | constructor(name: string, type: string) { 347 | super(name, type || "Container"); 348 | } 349 | 350 | readByte() { 351 | return this.source![this.offset++]; 352 | } 353 | readUint() { 354 | const firstByte = this.readByte(); 355 | const bytes = 8 - firstByte.toString(2).length; 356 | let value = firstByte - (1 << (7 - bytes)); 357 | for (let i = 0; i < bytes; i++) { 358 | // don't use bit operators to support x86 359 | value *= 256; 360 | value += this.readByte(); 361 | } 362 | return value; 363 | } 364 | updateBySource() { 365 | let end: number | undefined = undefined; 366 | this.data = []; 367 | for ( 368 | this.offset = 0; 369 | this.offset < this.source!.length; 370 | this.offset = end 371 | ) { 372 | const id = this.readUint(); 373 | const len = this.readUint(); 374 | end = Math.min(this.offset + len, this.source!.length); 375 | const data = this.source!.slice(this.offset, end); 376 | 377 | const info = sections[id] || { name: "Unknown", type: "Unknown" }; 378 | let ctr: any = WebmBase; 379 | switch (info.type) { 380 | case "Container": 381 | ctr = WebmContainer; 382 | break; 383 | case "Uint": 384 | ctr = WebmUint; 385 | break; 386 | case "Float": 387 | ctr = WebmFloat; 388 | break; 389 | } 390 | const section = new ctr(info.name, info.type); 391 | section.setSource(data); 392 | this.data.push({ 393 | id: id, 394 | idHex: id.toString(16), 395 | data: section, 396 | }); 397 | } 398 | } 399 | writeUint(x: number, draft = false) { 400 | for ( 401 | var bytes = 1, flag = 0x80; 402 | x >= flag && bytes < 8; 403 | bytes++, flag *= 0x80 404 | ) {} 405 | 406 | if (!draft) { 407 | let value = flag + x; 408 | for (let i = bytes - 1; i >= 0; i--) { 409 | // don't use bit operators to support x86 410 | const c = value % 256; 411 | this.source![this.offset! + i] = c; 412 | value = (value - c) / 256; 413 | } 414 | } 415 | 416 | this.offset += bytes; 417 | } 418 | 419 | writeSections(draft = false) { 420 | this.offset = 0; 421 | for (let i = 0; i < this.data.length; i++) { 422 | const section = this.data[i], 423 | content = section.data.source, 424 | contentLength = content!.length; 425 | this.writeUint(section.id, draft); 426 | this.writeUint(contentLength, draft); 427 | if (!draft) { 428 | this.source!.set(content!, this.offset); 429 | } 430 | this.offset += contentLength; 431 | } 432 | return this.offset; 433 | } 434 | 435 | updateByData() { 436 | // run without accessing this.source to determine total length - need to know it to create Uint8Array 437 | const length = this.writeSections(true); 438 | this.source = new Uint8Array(length); 439 | // now really write data 440 | this.writeSections(); 441 | } 442 | 443 | getSectionById(id: number) { 444 | for (let i = 0; i < this.data.length; i++) { 445 | const section = this.data[i]; 446 | if (section.id === id) { 447 | return section.data; 448 | } 449 | } 450 | 451 | return undefined; 452 | } 453 | } 454 | 455 | class WebmFile extends WebmContainer { 456 | constructor(source: Uint8Array) { 457 | super("File", "File"); 458 | this.setSource(source); 459 | } 460 | 461 | fixDuration(duration: number) { 462 | const segmentSection = this.getSectionById(0x8538067) as WebmContainer; 463 | if (!segmentSection) { 464 | return false; 465 | } 466 | 467 | const infoSection = segmentSection.getSectionById( 468 | 0x549a966, 469 | ) as WebmContainer; 470 | if (!infoSection) { 471 | return false; 472 | } 473 | 474 | const timeScaleSection = infoSection.getSectionById( 475 | 0xad7b1, 476 | ) as WebmFloat; 477 | if (!timeScaleSection) { 478 | return false; 479 | } 480 | 481 | let durationSection = infoSection.getSectionById(0x489) as WebmFloat; 482 | if (durationSection) { 483 | if (durationSection.getValue()! <= 0) { 484 | durationSection.setValue(duration); 485 | } else { 486 | return false; 487 | } 488 | } else { 489 | // append Duration section 490 | durationSection = new WebmFloat("Duration", "Float"); 491 | durationSection.setValue(duration); 492 | infoSection.data.push({ 493 | id: 0x489, 494 | data: durationSection, 495 | }); 496 | } 497 | 498 | // set default time scale to 1 millisecond (1000000 nanoseconds) 499 | timeScaleSection.setValue(1000000); 500 | infoSection.updateByData(); 501 | segmentSection.updateByData(); 502 | this.updateByData(); 503 | 504 | return true; 505 | } 506 | 507 | toBlob(type = "video/webm") { 508 | return new Blob([this.source!.buffer], { type }); 509 | } 510 | } 511 | 512 | /** 513 | * Fixes duration on MediaRecorder output. 514 | * @param blob Input Blob with incorrect duration. 515 | * @param duration Correct duration (in milliseconds). 516 | * @param type Output blob mimetype (default: video/webm). 517 | * @returns 518 | */ 519 | export const webmFixDuration = ( 520 | blob: Blob, 521 | duration: number, 522 | type = "video/webm", 523 | ): Promise => { 524 | return new Promise((resolve, reject) => { 525 | try { 526 | const reader = new FileReader(); 527 | 528 | reader.addEventListener("loadend", () => { 529 | try { 530 | const result = reader.result as ArrayBuffer; 531 | const file = new WebmFile(new Uint8Array(result)); 532 | if (file.fixDuration(duration)) { 533 | resolve(file.toBlob(type)); 534 | } else { 535 | resolve(blob); 536 | } 537 | } catch (ex) { 538 | reject(ex); 539 | } 540 | }); 541 | 542 | reader.addEventListener("error", () => reject()); 543 | 544 | reader.readAsArrayBuffer(blob); 545 | } catch (ex) { 546 | reject(ex); 547 | } 548 | }); 549 | }; 550 | -------------------------------------------------------------------------------- /src/utils/Constants.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | SAMPLING_RATE: 16000, 3 | DEFAULT_MODEL: "Xenova/whisper-tiny", 4 | DEFAULT_SUBTASK: "transcribe", 5 | DEFAULT_LANGUAGE: "english", 6 | DEFAULT_QUANTIZED: false, 7 | DEFAULT_MULTILINGUAL: false, 8 | }; 9 | 10 | import { prebuiltAppConfig } from "@mlc-ai/web-llm"; 11 | 12 | export const OWNER = "mlc-ai"; 13 | export const REPO = "web-llm-chat"; 14 | export const WEBLLM_HOME_URL = "https://webllm.mlc.ai"; 15 | export const REPO_URL = `https://github.com/${OWNER}/${REPO}`; 16 | export const ISSUE_URL = `https://github.com/${OWNER}/${REPO}/issues`; 17 | 18 | export interface ModelRecord { 19 | name: string; 20 | display_name: string; 21 | provider?: string; 22 | size?: string; 23 | quantization?: string; 24 | family: ModelFamily; 25 | recommended_config?: { 26 | temperature?: number; 27 | context_window_size?: number; 28 | top_p?: number; 29 | presence_penalty?: number; 30 | frequency_penalty?: number; 31 | }; 32 | } 33 | 34 | export enum Path { 35 | Home = "/", 36 | Chat = "/chat", 37 | Settings = "/settings", 38 | Templates = "/templates", 39 | } 40 | 41 | export enum ApiPath { 42 | Cors = "", 43 | } 44 | 45 | export enum SlotID { 46 | AppBody = "app-body", 47 | CustomModel = "custom-model", 48 | } 49 | 50 | export enum FileName { 51 | Templates = "templates.json", 52 | Prompts = "prompts.json", 53 | } 54 | 55 | export enum StoreKey { 56 | Chat = "chat-next-web-store", 57 | Access = "access-control", 58 | Config = "app-config", 59 | Templates = "templates-store", 60 | Prompt = "prompt-store", 61 | Update = "chat-update", 62 | Sync = "sync", 63 | } 64 | 65 | export const DEFAULT_SIDEBAR_WIDTH = 320; 66 | export const MAX_SIDEBAR_WIDTH = 500; 67 | export const MIN_SIDEBAR_WIDTH = 260; 68 | export const NARROW_SIDEBAR_WIDTH = 100; 69 | 70 | export const ACCESS_CODE_PREFIX = "nk-"; 71 | 72 | export const LAST_INPUT_KEY = "last-input"; 73 | export const UNFINISHED_INPUT = (name: string) => "unfinished-input-" + name; 74 | 75 | export const STORAGE_KEY = "chatgpt-next-web"; 76 | 77 | export const REQUEST_TIMEOUT_MS = 60000; 78 | 79 | export const EXPORT_MESSAGE_CLASS_NAME = "export-markdown"; 80 | 81 | export const DEFAULT_INPUT_TEMPLATE = `{{input}}`; // input / time / model / lang 82 | 83 | export const DEFAULT_SYSTEM_TEMPLATE = ` 84 | You are an AI large language model assistant trained by {{provider}}. 85 | You are currently engaging with users on WebLLM Chat, an open-source AI Chatbot UI developed by MLC.ai (Machine Learning Compilation). 86 | Model display_name: {{model}} 87 | The current date and time is {{time}}. 88 | Latex inline format: \\(x^2\\) 89 | Latex block format: $$e=mc^2$$ 90 | `; 91 | 92 | export enum ModelFamily { 93 | LLAMA = "llama", 94 | PHI = "phi", 95 | MISTRAL = "mistral", 96 | GEMMA = "gemma", 97 | QWEN = "qwen", 98 | SMOL_LM = "smollm", 99 | WIZARD_MATH = "wizardmath", 100 | STABLE_LM = "stablelm", 101 | REDPAJAMA = "redpajama", 102 | DEEPSEEK = "DeepSeek", 103 | } 104 | 105 | const DEFAULT_MODEL_BASES: ModelRecord[] = [ 106 | // Phi-3.5 Vision 107 | { 108 | name: "Phi-3.5-vision-instruct-q4f32_1-MLC", 109 | display_name: "Phi", 110 | provider: "Microsoft", 111 | family: ModelFamily.PHI, 112 | recommended_config: { 113 | temperature: 1, 114 | presence_penalty: 0, 115 | frequency_penalty: 0, 116 | top_p: 1, 117 | }, 118 | }, 119 | { 120 | name: "Phi-3.5-vision-instruct-q4f16_1-MLC", 121 | display_name: "Phi", 122 | provider: "Microsoft", 123 | family: ModelFamily.PHI, 124 | recommended_config: { 125 | temperature: 1, 126 | presence_penalty: 0, 127 | frequency_penalty: 0, 128 | top_p: 1, 129 | }, 130 | }, 131 | // Llama-3.2 132 | { 133 | name: "Llama-3.2-1B-Instruct-q4f32_1-MLC", 134 | display_name: "Llama", 135 | provider: "Meta", 136 | family: ModelFamily.LLAMA, 137 | recommended_config: { 138 | temperature: 0.6, 139 | presence_penalty: 0, 140 | frequency_penalty: 0, 141 | top_p: 0.9, 142 | }, 143 | }, 144 | { 145 | name: "Llama-3.2-1B-Instruct-q4f16_1-MLC", 146 | display_name: "Llama", 147 | provider: "Meta", 148 | family: ModelFamily.LLAMA, 149 | recommended_config: { 150 | temperature: 0.6, 151 | presence_penalty: 0, 152 | frequency_penalty: 0, 153 | top_p: 0.9, 154 | }, 155 | }, 156 | { 157 | name: "Llama-3.2-1B-Instruct-q0f32-MLC", 158 | display_name: "Llama", 159 | provider: "Meta", 160 | family: ModelFamily.LLAMA, 161 | recommended_config: { 162 | temperature: 0.6, 163 | presence_penalty: 0, 164 | frequency_penalty: 0, 165 | top_p: 0.9, 166 | }, 167 | }, 168 | { 169 | name: "Llama-3.2-1B-Instruct-q0f16-MLC", 170 | display_name: "Llama", 171 | provider: "Meta", 172 | family: ModelFamily.LLAMA, 173 | recommended_config: { 174 | temperature: 0.6, 175 | presence_penalty: 0, 176 | frequency_penalty: 0, 177 | top_p: 0.9, 178 | }, 179 | }, 180 | { 181 | name: "Llama-3.2-3B-Instruct-q4f32_1-MLC", 182 | display_name: "Llama", 183 | provider: "Meta", 184 | family: ModelFamily.LLAMA, 185 | recommended_config: { 186 | temperature: 0.6, 187 | presence_penalty: 0, 188 | frequency_penalty: 0, 189 | top_p: 0.9, 190 | }, 191 | }, 192 | { 193 | name: "Llama-3.2-3B-Instruct-q4f16_1-MLC", 194 | display_name: "Llama", 195 | provider: "Meta", 196 | family: ModelFamily.LLAMA, 197 | recommended_config: { 198 | temperature: 0.6, 199 | presence_penalty: 0, 200 | frequency_penalty: 0, 201 | top_p: 0.9, 202 | }, 203 | }, 204 | // Llama-3.1 8B 205 | { 206 | name: "Llama-3.1-8B-Instruct-q4f32_1-MLC-1k", 207 | display_name: "Llama", 208 | provider: "Meta", 209 | family: ModelFamily.LLAMA, 210 | recommended_config: { 211 | temperature: 0.6, 212 | presence_penalty: 0, 213 | frequency_penalty: 0, 214 | top_p: 0.9, 215 | }, 216 | }, 217 | { 218 | name: "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k", 219 | display_name: "Llama", 220 | provider: "Meta", 221 | family: ModelFamily.LLAMA, 222 | recommended_config: { 223 | temperature: 0.6, 224 | presence_penalty: 0, 225 | frequency_penalty: 0, 226 | top_p: 0.9, 227 | }, 228 | }, 229 | { 230 | name: "Llama-3.1-8B-Instruct-q4f32_1-MLC", 231 | display_name: "Llama", 232 | provider: "Meta", 233 | family: ModelFamily.LLAMA, 234 | recommended_config: { 235 | temperature: 0.6, 236 | presence_penalty: 0, 237 | frequency_penalty: 0, 238 | top_p: 0.9, 239 | }, 240 | }, 241 | { 242 | name: "Llama-3.1-8B-Instruct-q4f16_1-MLC", 243 | display_name: "Llama", 244 | provider: "Meta", 245 | family: ModelFamily.LLAMA, 246 | recommended_config: { 247 | temperature: 0.6, 248 | presence_penalty: 0, 249 | frequency_penalty: 0, 250 | top_p: 0.9, 251 | }, 252 | }, 253 | // Deepseek 254 | { 255 | name: "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC", 256 | display_name: "DeepSeek", 257 | provider: "DeepSeek", 258 | family: ModelFamily.DEEPSEEK, 259 | recommended_config: { 260 | temperature: 1, 261 | presence_penalty: 0, 262 | frequency_penalty: 0, 263 | top_p: 1, 264 | }, 265 | }, 266 | { 267 | name: "DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC", 268 | display_name: "DeepSeek", 269 | provider: "DeepSeek", 270 | family: ModelFamily.DEEPSEEK, 271 | recommended_config: { 272 | temperature: 1, 273 | presence_penalty: 0, 274 | frequency_penalty: 0, 275 | top_p: 1, 276 | }, 277 | }, 278 | { 279 | name: "DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC", 280 | display_name: "DeepSeek", 281 | provider: "DeepSeek", 282 | family: ModelFamily.DEEPSEEK, 283 | recommended_config: { 284 | temperature: 1, 285 | presence_penalty: 0, 286 | frequency_penalty: 0, 287 | top_p: 1, 288 | }, 289 | }, 290 | { 291 | name: "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC", 292 | display_name: "DeepSeek", 293 | provider: "DeepSeek", 294 | family: ModelFamily.DEEPSEEK, 295 | recommended_config: { 296 | temperature: 1, 297 | presence_penalty: 0, 298 | frequency_penalty: 0, 299 | top_p: 1, 300 | }, 301 | }, 302 | // Hermes 303 | { 304 | name: "Hermes-3-Llama-3.2-3B-q4f32_1-MLC", 305 | display_name: "Hermes", 306 | provider: "NousResearch", 307 | family: ModelFamily.LLAMA, 308 | recommended_config: { 309 | temperature: 0.6, 310 | presence_penalty: 0, 311 | frequency_penalty: 0, 312 | top_p: 0.9, 313 | }, 314 | }, 315 | { 316 | name: "Hermes-3-Llama-3.2-3B-q4f16_1-MLC", 317 | display_name: "Hermes", 318 | provider: "NousResearch", 319 | family: ModelFamily.LLAMA, 320 | recommended_config: { 321 | temperature: 0.6, 322 | presence_penalty: 0, 323 | frequency_penalty: 0, 324 | top_p: 0.9, 325 | }, 326 | }, 327 | { 328 | name: "Hermes-3-Llama-3.1-8B-q4f32_1-MLC", 329 | display_name: "Hermes", 330 | provider: "NousResearch", 331 | family: ModelFamily.LLAMA, 332 | recommended_config: { 333 | temperature: 0.6, 334 | presence_penalty: 0, 335 | frequency_penalty: 0, 336 | top_p: 0.9, 337 | }, 338 | }, 339 | { 340 | name: "Hermes-3-Llama-3.1-8B-q4f16_1-MLC", 341 | display_name: "Hermes", 342 | provider: "NousResearch", 343 | family: ModelFamily.LLAMA, 344 | recommended_config: { 345 | temperature: 0.6, 346 | presence_penalty: 0, 347 | frequency_penalty: 0, 348 | top_p: 0.9, 349 | }, 350 | }, 351 | { 352 | name: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", 353 | display_name: "Hermes", 354 | provider: "NousResearch", 355 | family: ModelFamily.MISTRAL, 356 | recommended_config: { 357 | temperature: 0.7, 358 | presence_penalty: 0, 359 | frequency_penalty: 0, 360 | top_p: 0.95, 361 | }, 362 | }, 363 | { 364 | name: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", 365 | display_name: "Hermes", 366 | provider: "NousResearch", 367 | family: ModelFamily.LLAMA, 368 | recommended_config: { 369 | temperature: 1, 370 | presence_penalty: 0, 371 | frequency_penalty: 0, 372 | top_p: 1, 373 | }, 374 | }, 375 | { 376 | name: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", 377 | display_name: "Hermes", 378 | provider: "NousResearch", 379 | family: ModelFamily.LLAMA, 380 | recommended_config: { 381 | temperature: 1, 382 | presence_penalty: 0, 383 | frequency_penalty: 0, 384 | top_p: 1, 385 | }, 386 | }, 387 | // Phi 388 | { 389 | name: "Phi-3.5-mini-instruct-q4f16_1-MLC", 390 | display_name: "Phi", 391 | provider: "Microsoft", 392 | family: ModelFamily.PHI, 393 | recommended_config: { 394 | temperature: 1, 395 | presence_penalty: 0, 396 | frequency_penalty: 0, 397 | top_p: 1, 398 | }, 399 | }, 400 | { 401 | name: "Phi-3.5-mini-instruct-q4f32_1-MLC", 402 | display_name: "Phi", 403 | provider: "Microsoft", 404 | family: ModelFamily.PHI, 405 | recommended_config: { 406 | temperature: 1, 407 | presence_penalty: 0, 408 | frequency_penalty: 0, 409 | top_p: 1, 410 | }, 411 | }, 412 | { 413 | name: "Phi-3.5-mini-instruct-q4f16_1-MLC-1k", 414 | display_name: "Phi", 415 | provider: "Microsoft", 416 | family: ModelFamily.PHI, 417 | recommended_config: { 418 | temperature: 1, 419 | presence_penalty: 0, 420 | frequency_penalty: 0, 421 | top_p: 1, 422 | }, 423 | }, 424 | { 425 | name: "Phi-3.5-mini-instruct-q4f32_1-MLC-1k", 426 | display_name: "Phi", 427 | provider: "Microsoft", 428 | family: ModelFamily.PHI, 429 | recommended_config: { 430 | temperature: 1, 431 | presence_penalty: 0, 432 | frequency_penalty: 0, 433 | top_p: 1, 434 | }, 435 | }, 436 | { 437 | name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC", 438 | display_name: "Mistral", 439 | provider: "Mistral AI", 440 | family: ModelFamily.MISTRAL, 441 | recommended_config: { 442 | temperature: 1, 443 | presence_penalty: 0, 444 | frequency_penalty: 0, 445 | top_p: 1, 446 | }, 447 | }, 448 | { 449 | name: "Mistral-7B-Instruct-v0.3-q4f32_1-MLC", 450 | display_name: "Mistral", 451 | provider: "Mistral AI", 452 | family: ModelFamily.MISTRAL, 453 | recommended_config: { 454 | temperature: 1, 455 | presence_penalty: 0, 456 | frequency_penalty: 0, 457 | top_p: 1, 458 | }, 459 | }, 460 | { 461 | name: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", 462 | display_name: "Mistral", 463 | provider: "Mistral AI", 464 | family: ModelFamily.MISTRAL, 465 | recommended_config: { 466 | temperature: 0.7, 467 | top_p: 0.95, 468 | }, 469 | }, 470 | { 471 | name: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", 472 | display_name: "OpenHermes", 473 | provider: "NousResearch", 474 | family: ModelFamily.MISTRAL, 475 | recommended_config: { 476 | temperature: 0.7, 477 | top_p: 0.95, 478 | }, 479 | }, 480 | { 481 | name: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", 482 | display_name: "NeuralHermes", 483 | provider: "Maxime Labonne", 484 | family: ModelFamily.MISTRAL, 485 | recommended_config: { 486 | temperature: 0.7, 487 | top_p: 0.95, 488 | }, 489 | }, 490 | { 491 | name: "WizardMath-7B-V1.1-q4f16_1-MLC", 492 | display_name: "WizardMath", 493 | provider: "WizardLM", 494 | family: ModelFamily.WIZARD_MATH, 495 | recommended_config: { 496 | temperature: 0.7, 497 | top_p: 0.95, 498 | }, 499 | }, 500 | // SmolLM2 501 | { 502 | name: "SmolLM2-1.7B-Instruct-q4f16_1-MLC", 503 | display_name: "SmolLM", 504 | provider: "HuggingFaceTB", 505 | family: ModelFamily.SMOL_LM, 506 | recommended_config: { 507 | temperature: 1, 508 | presence_penalty: 0, 509 | frequency_penalty: 0, 510 | top_p: 1, 511 | }, 512 | }, 513 | { 514 | name: "SmolLM2-1.7B-Instruct-q4f32_1-MLC", 515 | display_name: "SmolLM", 516 | provider: "HuggingFaceTB", 517 | family: ModelFamily.SMOL_LM, 518 | recommended_config: { 519 | temperature: 1, 520 | presence_penalty: 0, 521 | frequency_penalty: 0, 522 | top_p: 1, 523 | }, 524 | }, 525 | { 526 | name: "SmolLM2-360M-Instruct-q0f16-MLC", 527 | display_name: "SmolLM", 528 | provider: "HuggingFaceTB", 529 | family: ModelFamily.SMOL_LM, 530 | recommended_config: { 531 | temperature: 1, 532 | presence_penalty: 0, 533 | frequency_penalty: 0, 534 | top_p: 1, 535 | }, 536 | }, 537 | { 538 | name: "SmolLM2-360M-Instruct-q0f32-MLC", 539 | display_name: "SmolLM", 540 | provider: "HuggingFaceTB", 541 | family: ModelFamily.SMOL_LM, 542 | recommended_config: { 543 | temperature: 1, 544 | presence_penalty: 0, 545 | frequency_penalty: 0, 546 | top_p: 1, 547 | }, 548 | }, 549 | { 550 | name: "SmolLM2-360M-Instruct-q4f16_1-MLC", 551 | display_name: "SmolLM", 552 | provider: "HuggingFaceTB", 553 | family: ModelFamily.SMOL_LM, 554 | recommended_config: { 555 | temperature: 1, 556 | presence_penalty: 0, 557 | frequency_penalty: 0, 558 | top_p: 1, 559 | }, 560 | }, 561 | { 562 | name: "SmolLM2-360M-Instruct-q4f32_1-MLC", 563 | display_name: "SmolLM", 564 | provider: "HuggingFaceTB", 565 | family: ModelFamily.SMOL_LM, 566 | recommended_config: { 567 | temperature: 1, 568 | presence_penalty: 0, 569 | frequency_penalty: 0, 570 | top_p: 1, 571 | }, 572 | }, 573 | { 574 | name: "SmolLM2-135M-Instruct-q0f16-MLC", 575 | display_name: "SmolLM", 576 | provider: "HuggingFaceTB", 577 | family: ModelFamily.SMOL_LM, 578 | recommended_config: { 579 | temperature: 1, 580 | presence_penalty: 0, 581 | frequency_penalty: 0, 582 | top_p: 1, 583 | }, 584 | }, 585 | { 586 | name: "SmolLM2-135M-Instruct-q0f32-MLC", 587 | display_name: "SmolLM", 588 | provider: "HuggingFaceTB", 589 | family: ModelFamily.SMOL_LM, 590 | recommended_config: { 591 | temperature: 1, 592 | presence_penalty: 0, 593 | frequency_penalty: 0, 594 | top_p: 1, 595 | }, 596 | }, 597 | { 598 | name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC", 599 | display_name: "Qwen", 600 | provider: "Alibaba", 601 | family: ModelFamily.QWEN, 602 | recommended_config: { 603 | temperature: 0.7, 604 | presence_penalty: 0, 605 | frequency_penalty: 0, 606 | top_p: 0.8, 607 | }, 608 | }, 609 | { 610 | name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC", 611 | display_name: "Qwen", 612 | provider: "Alibaba", 613 | family: ModelFamily.QWEN, 614 | recommended_config: { 615 | temperature: 0.7, 616 | presence_penalty: 0, 617 | frequency_penalty: 0, 618 | top_p: 0.8, 619 | }, 620 | }, 621 | { 622 | name: "Qwen2.5-0.5B-Instruct-q0f16-MLC", 623 | display_name: "Qwen", 624 | provider: "Alibaba", 625 | family: ModelFamily.QWEN, 626 | recommended_config: { 627 | temperature: 0.7, 628 | presence_penalty: 0, 629 | frequency_penalty: 0, 630 | top_p: 0.8, 631 | }, 632 | }, 633 | { 634 | name: "Qwen2.5-0.5B-Instruct-q0f32-MLC", 635 | display_name: "Qwen", 636 | provider: "Alibaba", 637 | family: ModelFamily.QWEN, 638 | recommended_config: { 639 | temperature: 0.7, 640 | presence_penalty: 0, 641 | frequency_penalty: 0, 642 | top_p: 0.8, 643 | }, 644 | }, 645 | { 646 | name: "Qwen2.5-1.5B-Instruct-q4f16_1-MLC", 647 | display_name: "Qwen", 648 | provider: "Alibaba", 649 | family: ModelFamily.QWEN, 650 | recommended_config: { 651 | temperature: 0.7, 652 | presence_penalty: 0, 653 | frequency_penalty: 0, 654 | top_p: 0.8, 655 | }, 656 | }, 657 | { 658 | name: "Qwen2.5-1.5B-Instruct-q4f32_1-MLC", 659 | display_name: "Qwen", 660 | provider: "Alibaba", 661 | family: ModelFamily.QWEN, 662 | recommended_config: { 663 | temperature: 0.7, 664 | presence_penalty: 0, 665 | frequency_penalty: 0, 666 | top_p: 0.8, 667 | }, 668 | }, 669 | { 670 | name: "Qwen2.5-3B-Instruct-q4f16_1-MLC", 671 | display_name: "Qwen", 672 | provider: "Alibaba", 673 | family: ModelFamily.QWEN, 674 | recommended_config: { 675 | temperature: 0.7, 676 | presence_penalty: 0, 677 | frequency_penalty: 0, 678 | top_p: 0.8, 679 | }, 680 | }, 681 | { 682 | name: "Qwen2.5-3B-Instruct-q4f32_1-MLC", 683 | display_name: "Qwen", 684 | provider: "Alibaba", 685 | family: ModelFamily.QWEN, 686 | recommended_config: { 687 | temperature: 0.7, 688 | presence_penalty: 0, 689 | frequency_penalty: 0, 690 | top_p: 0.8, 691 | }, 692 | }, 693 | { 694 | name: "Qwen2.5-7B-Instruct-q4f16_1-MLC", 695 | display_name: "Qwen", 696 | provider: "Alibaba", 697 | family: ModelFamily.QWEN, 698 | recommended_config: { 699 | temperature: 0.7, 700 | presence_penalty: 0, 701 | frequency_penalty: 0, 702 | top_p: 0.8, 703 | }, 704 | }, 705 | { 706 | name: "Qwen2.5-7B-Instruct-q4f32_1-MLC", 707 | display_name: "Qwen", 708 | provider: "Alibaba", 709 | family: ModelFamily.QWEN, 710 | recommended_config: { 711 | temperature: 0.7, 712 | presence_penalty: 0, 713 | frequency_penalty: 0, 714 | top_p: 0.8, 715 | }, 716 | }, 717 | // Qwen2.5-Coder 718 | { 719 | name: "Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC", 720 | display_name: "Qwen", 721 | provider: "Alibaba", 722 | family: ModelFamily.QWEN, 723 | recommended_config: { 724 | temperature: 0.7, 725 | presence_penalty: 0, 726 | frequency_penalty: 0, 727 | top_p: 0.8, 728 | }, 729 | }, 730 | { 731 | name: "Qwen2.5-Coder-0.5B-Instruct-q4f32_1-MLC", 732 | display_name: "Qwen", 733 | provider: "Alibaba", 734 | family: ModelFamily.QWEN, 735 | recommended_config: { 736 | temperature: 0.7, 737 | presence_penalty: 0, 738 | frequency_penalty: 0, 739 | top_p: 0.8, 740 | }, 741 | }, 742 | { 743 | name: "Qwen2.5-Coder-0.5B-Instruct-q0f16-MLC", 744 | display_name: "Qwen", 745 | provider: "Alibaba", 746 | family: ModelFamily.QWEN, 747 | recommended_config: { 748 | temperature: 0.7, 749 | presence_penalty: 0, 750 | frequency_penalty: 0, 751 | top_p: 0.8, 752 | }, 753 | }, 754 | { 755 | name: "Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC", 756 | display_name: "Qwen", 757 | provider: "Alibaba", 758 | family: ModelFamily.QWEN, 759 | recommended_config: { 760 | temperature: 0.7, 761 | presence_penalty: 0, 762 | frequency_penalty: 0, 763 | top_p: 0.8, 764 | }, 765 | }, 766 | { 767 | name: "Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC", 768 | display_name: "Qwen", 769 | provider: "Alibaba", 770 | family: ModelFamily.QWEN, 771 | recommended_config: { 772 | temperature: 1.0, 773 | presence_penalty: 0, 774 | frequency_penalty: 0, 775 | top_p: 1.0, 776 | }, 777 | }, 778 | { 779 | name: "Qwen2.5-Coder-1.5B-Instruct-q4f32_1-MLC", 780 | display_name: "Qwen", 781 | provider: "Alibaba", 782 | family: ModelFamily.QWEN, 783 | recommended_config: { 784 | temperature: 1.0, 785 | presence_penalty: 0, 786 | frequency_penalty: 0, 787 | top_p: 1.0, 788 | }, 789 | }, 790 | { 791 | name: "Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC", 792 | display_name: "Qwen", 793 | provider: "Alibaba", 794 | family: ModelFamily.QWEN, 795 | recommended_config: { 796 | temperature: 0.7, 797 | presence_penalty: 0, 798 | frequency_penalty: 0, 799 | top_p: 0.8, 800 | }, 801 | }, 802 | { 803 | name: "Qwen2.5-Coder-3B-Instruct-q4f32_1-MLC", 804 | display_name: "Qwen", 805 | provider: "Alibaba", 806 | family: ModelFamily.QWEN, 807 | recommended_config: { 808 | temperature: 0.7, 809 | presence_penalty: 0, 810 | frequency_penalty: 0, 811 | top_p: 0.8, 812 | }, 813 | }, 814 | { 815 | name: "Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC", 816 | display_name: "Qwen", 817 | provider: "Alibaba", 818 | family: ModelFamily.QWEN, 819 | recommended_config: { 820 | temperature: 1.0, 821 | presence_penalty: 0, 822 | frequency_penalty: 0, 823 | top_p: 1.0, 824 | }, 825 | }, 826 | { 827 | name: "Qwen2.5-Coder-7B-Instruct-q4f32_1-MLC", 828 | display_name: "Qwen", 829 | provider: "Alibaba", 830 | family: ModelFamily.QWEN, 831 | recommended_config: { 832 | temperature: 1.0, 833 | presence_penalty: 0, 834 | frequency_penalty: 0, 835 | top_p: 1.0, 836 | }, 837 | }, 838 | // Qwen2-Math 839 | { 840 | name: "Qwen2-Math-1.5B-Instruct-q4f16_1-MLC", 841 | display_name: "Qwen", 842 | provider: "Alibaba", 843 | family: ModelFamily.QWEN, 844 | recommended_config: { 845 | temperature: 1.0, 846 | presence_penalty: 0, 847 | frequency_penalty: 0, 848 | top_p: 0.8, 849 | }, 850 | }, 851 | { 852 | name: "Qwen2-Math-1.5B-Instruct-q4f32_1-MLC", 853 | display_name: "Qwen", 854 | provider: "Alibaba", 855 | family: ModelFamily.QWEN, 856 | recommended_config: { 857 | temperature: 1.0, 858 | presence_penalty: 0, 859 | frequency_penalty: 0, 860 | top_p: 0.8, 861 | }, 862 | }, 863 | { 864 | name: "Qwen2-Math-7B-Instruct-q4f16_1-MLC", 865 | display_name: "Qwen", 866 | provider: "Alibaba", 867 | family: ModelFamily.QWEN, 868 | recommended_config: { 869 | temperature: 0.7, 870 | presence_penalty: 0, 871 | frequency_penalty: 0, 872 | top_p: 0.8, 873 | }, 874 | }, 875 | { 876 | name: "Qwen2-Math-7B-Instruct-q4f32_1-MLC", 877 | display_name: "Qwen", 878 | provider: "Alibaba", 879 | family: ModelFamily.QWEN, 880 | recommended_config: { 881 | temperature: 0.7, 882 | presence_penalty: 0, 883 | frequency_penalty: 0, 884 | top_p: 0.8, 885 | }, 886 | }, 887 | { 888 | name: "gemma-2-2b-it-q4f16_1-MLC", 889 | display_name: "Gemma", 890 | provider: "Google", 891 | family: ModelFamily.GEMMA, 892 | recommended_config: { 893 | temperature: 0.7, 894 | presence_penalty: 0, 895 | frequency_penalty: 1, 896 | top_p: 0.95, 897 | }, 898 | }, 899 | { 900 | name: "gemma-2-2b-it-q4f32_1-MLC", 901 | display_name: "Gemma", 902 | provider: "Google", 903 | family: ModelFamily.GEMMA, 904 | recommended_config: { 905 | temperature: 0.7, 906 | presence_penalty: 0, 907 | frequency_penalty: 1, 908 | top_p: 0.95, 909 | }, 910 | }, 911 | { 912 | name: "gemma-2-2b-it-q4f16_1-MLC-1k", 913 | display_name: "Gemma", 914 | provider: "Google", 915 | family: ModelFamily.GEMMA, 916 | recommended_config: { 917 | temperature: 0.7, 918 | presence_penalty: 0, 919 | frequency_penalty: 1, 920 | top_p: 0.95, 921 | }, 922 | }, 923 | { 924 | name: "gemma-2-2b-it-q4f32_1-MLC-1k", 925 | display_name: "Gemma", 926 | provider: "Google", 927 | family: ModelFamily.GEMMA, 928 | recommended_config: { 929 | temperature: 0.7, 930 | presence_penalty: 0, 931 | frequency_penalty: 1, 932 | top_p: 0.95, 933 | }, 934 | }, 935 | { 936 | name: "gemma-2-9b-it-q4f16_1-MLC", 937 | display_name: "Gemma", 938 | provider: "Google", 939 | family: ModelFamily.GEMMA, 940 | recommended_config: { 941 | temperature: 0.7, 942 | presence_penalty: 0, 943 | frequency_penalty: 1, 944 | top_p: 0.95, 945 | }, 946 | }, 947 | { 948 | name: "gemma-2-9b-it-q4f32_1-MLC", 949 | display_name: "Gemma", 950 | provider: "Google", 951 | family: ModelFamily.GEMMA, 952 | recommended_config: { 953 | temperature: 0.7, 954 | presence_penalty: 0, 955 | frequency_penalty: 1, 956 | top_p: 0.95, 957 | }, 958 | }, 959 | { 960 | name: "gemma-2-2b-jpn-it-q4f16_1-MLC", 961 | display_name: "Gemma", 962 | provider: "Google", 963 | family: ModelFamily.GEMMA, 964 | recommended_config: { 965 | temperature: 0.7, 966 | presence_penalty: 0, 967 | frequency_penalty: 1, 968 | top_p: 0.9, 969 | }, 970 | }, 971 | { 972 | name: "gemma-2-2b-jpn-it-q4f32_1-MLC", 973 | display_name: "Gemma", 974 | provider: "Google", 975 | family: ModelFamily.GEMMA, 976 | recommended_config: { 977 | temperature: 0.7, 978 | presence_penalty: 0, 979 | frequency_penalty: 1, 980 | top_p: 0.9, 981 | }, 982 | }, 983 | { 984 | name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", 985 | display_name: "StableLM", 986 | provider: "Hugging Face", 987 | family: ModelFamily.STABLE_LM, 988 | recommended_config: { 989 | temperature: 0.7, 990 | presence_penalty: 0, 991 | frequency_penalty: 0, 992 | top_p: 0.95, 993 | }, 994 | }, 995 | { 996 | name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", 997 | display_name: "StableLM", 998 | provider: "Hugging Face", 999 | family: ModelFamily.STABLE_LM, 1000 | recommended_config: { 1001 | temperature: 0.7, 1002 | presence_penalty: 0, 1003 | frequency_penalty: 0, 1004 | top_p: 0.95, 1005 | }, 1006 | }, 1007 | { 1008 | name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", 1009 | display_name: "StableLM", 1010 | provider: "Hugging Face", 1011 | family: ModelFamily.STABLE_LM, 1012 | recommended_config: { 1013 | temperature: 0.7, 1014 | presence_penalty: 0, 1015 | frequency_penalty: 0, 1016 | top_p: 0.95, 1017 | }, 1018 | }, 1019 | { 1020 | name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", 1021 | display_name: "StableLM", 1022 | provider: "Hugging Face", 1023 | family: ModelFamily.STABLE_LM, 1024 | recommended_config: { 1025 | temperature: 0.7, 1026 | presence_penalty: 0, 1027 | frequency_penalty: 0, 1028 | top_p: 0.95, 1029 | }, 1030 | }, 1031 | { 1032 | name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", 1033 | display_name: "RedPajama", 1034 | provider: "Together", 1035 | family: ModelFamily.REDPAJAMA, 1036 | recommended_config: { 1037 | temperature: 0.7, 1038 | top_p: 0.95, 1039 | }, 1040 | }, 1041 | { 1042 | name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", 1043 | display_name: "RedPajama", 1044 | provider: "Together", 1045 | family: ModelFamily.REDPAJAMA, 1046 | recommended_config: { 1047 | temperature: 0.7, 1048 | top_p: 0.95, 1049 | }, 1050 | }, 1051 | { 1052 | name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k", 1053 | display_name: "RedPajama", 1054 | provider: "Together", 1055 | family: ModelFamily.REDPAJAMA, 1056 | recommended_config: { 1057 | temperature: 0.7, 1058 | top_p: 0.95, 1059 | }, 1060 | }, 1061 | { 1062 | name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k", 1063 | display_name: "RedPajama", 1064 | provider: "Together", 1065 | family: ModelFamily.REDPAJAMA, 1066 | recommended_config: { 1067 | temperature: 0.7, 1068 | top_p: 0.95, 1069 | }, 1070 | }, 1071 | { 1072 | name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC", 1073 | display_name: "TinyLlama", 1074 | provider: "Zhang Peiyuan", 1075 | family: ModelFamily.LLAMA, 1076 | recommended_config: { 1077 | temperature: 1, 1078 | presence_penalty: 0, 1079 | frequency_penalty: 0, 1080 | top_p: 1, 1081 | }, 1082 | }, 1083 | { 1084 | name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC", 1085 | display_name: "TinyLlama", 1086 | provider: "Zhang Peiyuan", 1087 | family: ModelFamily.LLAMA, 1088 | recommended_config: { 1089 | temperature: 1, 1090 | presence_penalty: 0, 1091 | frequency_penalty: 0, 1092 | top_p: 1, 1093 | }, 1094 | }, 1095 | { 1096 | name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC-1k", 1097 | display_name: "TinyLlama", 1098 | provider: "Zhang Peiyuan", 1099 | family: ModelFamily.LLAMA, 1100 | recommended_config: { 1101 | temperature: 1, 1102 | presence_penalty: 0, 1103 | frequency_penalty: 0, 1104 | top_p: 1, 1105 | }, 1106 | }, 1107 | { 1108 | name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC-1k", 1109 | display_name: "TinyLlama", 1110 | provider: "Zhang Peiyuan", 1111 | family: ModelFamily.LLAMA, 1112 | recommended_config: { 1113 | temperature: 1, 1114 | presence_penalty: 0, 1115 | frequency_penalty: 0, 1116 | top_p: 1, 1117 | }, 1118 | }, 1119 | { 1120 | name: "Llama-3.1-70B-Instruct-q3f16_1-MLC", 1121 | display_name: "Llama", 1122 | provider: "Meta", 1123 | family: ModelFamily.LLAMA, 1124 | recommended_config: { 1125 | temperature: 0.6, 1126 | presence_penalty: 0, 1127 | frequency_penalty: 0, 1128 | top_p: 0.9, 1129 | }, 1130 | }, 1131 | { 1132 | name: "Qwen2-0.5B-Instruct-q4f16_1-MLC", 1133 | display_name: "Qwen", 1134 | provider: "Alibaba", 1135 | family: ModelFamily.QWEN, 1136 | recommended_config: { 1137 | temperature: 0.7, 1138 | presence_penalty: 0, 1139 | frequency_penalty: 0, 1140 | top_p: 0.8, 1141 | }, 1142 | }, 1143 | { 1144 | name: "Qwen2-0.5B-Instruct-q0f16-MLC", 1145 | display_name: "Qwen", 1146 | provider: "Alibaba", 1147 | family: ModelFamily.QWEN, 1148 | recommended_config: { 1149 | temperature: 0.7, 1150 | presence_penalty: 0, 1151 | frequency_penalty: 0, 1152 | top_p: 0.8, 1153 | }, 1154 | }, 1155 | { 1156 | name: "Qwen2-0.5B-Instruct-q0f32-MLC", 1157 | display_name: "Qwen", 1158 | provider: "Alibaba", 1159 | family: ModelFamily.QWEN, 1160 | recommended_config: { 1161 | temperature: 0.7, 1162 | presence_penalty: 0, 1163 | frequency_penalty: 0, 1164 | top_p: 0.8, 1165 | }, 1166 | }, 1167 | { 1168 | name: "Qwen2-1.5B-Instruct-q4f16_1-MLC", 1169 | display_name: "Qwen", 1170 | provider: "Alibaba", 1171 | family: ModelFamily.QWEN, 1172 | recommended_config: { 1173 | temperature: 0.7, 1174 | presence_penalty: 0, 1175 | frequency_penalty: 0, 1176 | top_p: 0.8, 1177 | }, 1178 | }, 1179 | { 1180 | name: "Qwen2-1.5B-Instruct-q4f32_1-MLC", 1181 | display_name: "Qwen", 1182 | provider: "Alibaba", 1183 | family: ModelFamily.QWEN, 1184 | recommended_config: { 1185 | temperature: 0.7, 1186 | presence_penalty: 0, 1187 | frequency_penalty: 0, 1188 | top_p: 0.8, 1189 | }, 1190 | }, 1191 | { 1192 | name: "Qwen2-7B-Instruct-q4f16_1-MLC", 1193 | display_name: "Qwen", 1194 | provider: "Alibaba", 1195 | family: ModelFamily.QWEN, 1196 | recommended_config: { 1197 | temperature: 0.7, 1198 | presence_penalty: 0, 1199 | frequency_penalty: 0, 1200 | top_p: 0.8, 1201 | }, 1202 | }, 1203 | { 1204 | name: "Qwen2-7B-Instruct-q4f32_1-MLC", 1205 | display_name: "Qwen", 1206 | provider: "Alibaba", 1207 | family: ModelFamily.QWEN, 1208 | recommended_config: { 1209 | temperature: 0.7, 1210 | presence_penalty: 0, 1211 | frequency_penalty: 0, 1212 | top_p: 0.8, 1213 | }, 1214 | }, 1215 | { 1216 | name: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", 1217 | display_name: "Llama", 1218 | provider: "Meta", 1219 | family: ModelFamily.LLAMA, 1220 | recommended_config: { 1221 | temperature: 0.6, 1222 | presence_penalty: 0, 1223 | frequency_penalty: 0, 1224 | top_p: 0.9, 1225 | }, 1226 | }, 1227 | { 1228 | name: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", 1229 | display_name: "Llama", 1230 | provider: "Meta", 1231 | family: ModelFamily.LLAMA, 1232 | recommended_config: { 1233 | temperature: 0.6, 1234 | presence_penalty: 0, 1235 | frequency_penalty: 0, 1236 | top_p: 0.9, 1237 | }, 1238 | }, 1239 | { 1240 | name: "Llama-3-8B-Instruct-q4f32_1-MLC", 1241 | display_name: "Llama", 1242 | provider: "Meta", 1243 | family: ModelFamily.LLAMA, 1244 | recommended_config: { 1245 | temperature: 0.6, 1246 | presence_penalty: 0, 1247 | frequency_penalty: 0, 1248 | top_p: 0.9, 1249 | }, 1250 | }, 1251 | { 1252 | name: "Llama-3-8B-Instruct-q4f16_1-MLC", 1253 | display_name: "Llama", 1254 | provider: "Meta", 1255 | family: ModelFamily.LLAMA, 1256 | recommended_config: { 1257 | temperature: 0.6, 1258 | presence_penalty: 0, 1259 | frequency_penalty: 0, 1260 | top_p: 0.9, 1261 | }, 1262 | }, 1263 | { 1264 | name: "Llama-3-70B-Instruct-q3f16_1-MLC", 1265 | display_name: "Llama", 1266 | provider: "Meta", 1267 | family: ModelFamily.LLAMA, 1268 | recommended_config: { 1269 | temperature: 0.7, 1270 | presence_penalty: 0, 1271 | frequency_penalty: 0, 1272 | top_p: 0.95, 1273 | }, 1274 | }, 1275 | // Phi3-mini-instruct 1276 | { 1277 | name: "Phi-3-mini-4k-instruct-q4f16_1-MLC", 1278 | display_name: "Phi 3", 1279 | provider: "Microsoft", 1280 | family: ModelFamily.PHI, 1281 | recommended_config: { 1282 | temperature: 0.7, 1283 | presence_penalty: 0, 1284 | frequency_penalty: 0, 1285 | top_p: 1, 1286 | }, 1287 | }, 1288 | { 1289 | name: "Phi-3-mini-4k-instruct-q4f32_1-MLC", 1290 | display_name: "Phi 3", 1291 | provider: "Microsoft", 1292 | family: ModelFamily.PHI, 1293 | recommended_config: { 1294 | temperature: 0.7, 1295 | presence_penalty: 0, 1296 | frequency_penalty: 0, 1297 | top_p: 1, 1298 | }, 1299 | }, 1300 | { 1301 | name: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k", 1302 | display_name: "Phi 3", 1303 | provider: "Microsoft", 1304 | family: ModelFamily.PHI, 1305 | recommended_config: { 1306 | temperature: 0.7, 1307 | presence_penalty: 0, 1308 | frequency_penalty: 0, 1309 | top_p: 1, 1310 | }, 1311 | }, 1312 | { 1313 | name: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k", 1314 | display_name: "Phi 3", 1315 | provider: "Microsoft", 1316 | family: ModelFamily.PHI, 1317 | recommended_config: { 1318 | temperature: 0.7, 1319 | presence_penalty: 0, 1320 | frequency_penalty: 0, 1321 | top_p: 1, 1322 | }, 1323 | }, 1324 | { 1325 | name: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", 1326 | display_name: "Llama", 1327 | provider: "Meta", 1328 | family: ModelFamily.LLAMA, 1329 | recommended_config: { 1330 | temperature: 0.6, 1331 | top_p: 0.9, 1332 | }, 1333 | }, 1334 | { 1335 | name: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", 1336 | display_name: "Llama", 1337 | provider: "Meta", 1338 | family: ModelFamily.LLAMA, 1339 | recommended_config: { 1340 | temperature: 0.6, 1341 | top_p: 0.9, 1342 | }, 1343 | }, 1344 | { 1345 | name: "Llama-2-7b-chat-hf-q4f32_1-MLC", 1346 | display_name: "Llama", 1347 | provider: "Meta", 1348 | family: ModelFamily.LLAMA, 1349 | recommended_config: { 1350 | temperature: 0.6, 1351 | top_p: 0.9, 1352 | }, 1353 | }, 1354 | { 1355 | name: "Llama-2-7b-chat-hf-q4f16_1-MLC", 1356 | display_name: "Llama", 1357 | provider: "Meta", 1358 | family: ModelFamily.LLAMA, 1359 | recommended_config: { 1360 | temperature: 0.6, 1361 | top_p: 0.9, 1362 | }, 1363 | }, 1364 | { 1365 | name: "Llama-2-13b-chat-hf-q4f16_1-MLC", 1366 | display_name: "Llama", 1367 | provider: "Meta", 1368 | family: ModelFamily.LLAMA, 1369 | recommended_config: { 1370 | temperature: 0.6, 1371 | top_p: 0.9, 1372 | }, 1373 | }, 1374 | { 1375 | name: "phi-2-q4f16_1-MLC", 1376 | display_name: "Phi", 1377 | provider: "Microsoft", 1378 | family: ModelFamily.PHI, 1379 | recommended_config: { 1380 | temperature: 0.7, 1381 | top_p: 0.95, 1382 | }, 1383 | }, 1384 | { 1385 | name: "phi-2-q4f32_1-MLC", 1386 | display_name: "Phi", 1387 | provider: "Microsoft", 1388 | family: ModelFamily.PHI, 1389 | recommended_config: { 1390 | temperature: 0.7, 1391 | top_p: 0.95, 1392 | }, 1393 | }, 1394 | { 1395 | name: "phi-2-q4f16_1-MLC-1k", 1396 | display_name: "Phi", 1397 | provider: "Microsoft", 1398 | family: ModelFamily.PHI, 1399 | recommended_config: { 1400 | temperature: 0.7, 1401 | top_p: 0.95, 1402 | }, 1403 | }, 1404 | { 1405 | name: "phi-2-q4f32_1-MLC-1k", 1406 | display_name: "Phi", 1407 | provider: "Microsoft", 1408 | family: ModelFamily.PHI, 1409 | recommended_config: { 1410 | temperature: 0.7, 1411 | top_p: 0.95, 1412 | }, 1413 | }, 1414 | { 1415 | name: "phi-1_5-q4f16_1-MLC", 1416 | display_name: "Phi", 1417 | provider: "Microsoft", 1418 | family: ModelFamily.PHI, 1419 | recommended_config: { 1420 | temperature: 0.7, 1421 | top_p: 0.95, 1422 | }, 1423 | }, 1424 | { 1425 | name: "phi-1_5-q4f32_1-MLC", 1426 | display_name: "Phi", 1427 | provider: "Microsoft", 1428 | family: ModelFamily.PHI, 1429 | recommended_config: { 1430 | temperature: 0.7, 1431 | top_p: 0.95, 1432 | }, 1433 | }, 1434 | { 1435 | name: "phi-1_5-q4f16_1-MLC-1k", 1436 | display_name: "Phi", 1437 | provider: "Microsoft", 1438 | family: ModelFamily.PHI, 1439 | recommended_config: { 1440 | temperature: 0.7, 1441 | top_p: 0.95, 1442 | }, 1443 | }, 1444 | { 1445 | name: "phi-1_5-q4f32_1-MLC-1k", 1446 | display_name: "Phi", 1447 | provider: "Microsoft", 1448 | family: ModelFamily.PHI, 1449 | recommended_config: { 1450 | temperature: 0.7, 1451 | top_p: 0.95, 1452 | }, 1453 | }, 1454 | { 1455 | name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", 1456 | display_name: "TinyLlama", 1457 | provider: "Zhang Peiyuan", 1458 | family: ModelFamily.LLAMA, 1459 | recommended_config: { 1460 | temperature: 0.7, 1461 | top_p: 0.95, 1462 | }, 1463 | }, 1464 | { 1465 | name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", 1466 | display_name: "TinyLlama", 1467 | provider: "Zhang Peiyuan", 1468 | family: ModelFamily.LLAMA, 1469 | recommended_config: { 1470 | temperature: 0.7, 1471 | top_p: 0.95, 1472 | }, 1473 | }, 1474 | { 1475 | name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k", 1476 | display_name: "TinyLlama", 1477 | provider: "Zhang Peiyuan", 1478 | family: ModelFamily.LLAMA, 1479 | recommended_config: { 1480 | temperature: 0.7, 1481 | top_p: 0.95, 1482 | }, 1483 | }, 1484 | { 1485 | name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k", 1486 | display_name: "TinyLlama", 1487 | provider: "Zhang Peiyuan", 1488 | family: ModelFamily.LLAMA, 1489 | recommended_config: { 1490 | temperature: 0.7, 1491 | top_p: 0.95, 1492 | }, 1493 | }, 1494 | ]; 1495 | 1496 | // Get model size from model id 1497 | export function getSize(model_id: string): string | undefined { 1498 | const sizeRegex = /-(\d+(\.\d+)?[BK])-?/; 1499 | const match = model_id.match(sizeRegex); 1500 | if (match) { 1501 | return match[1]; 1502 | } 1503 | return undefined; 1504 | } 1505 | 1506 | // Get quantization method from model id 1507 | export function getQuantization(model_id: string): string | undefined { 1508 | const quantizationRegex = /-(q[0-9]f[0-9]+(?:_[0-9])?)-/; 1509 | const match = model_id.match(quantizationRegex); 1510 | if (match) { 1511 | return match[1]; 1512 | } 1513 | return undefined; 1514 | } 1515 | 1516 | export const DEFAULT_MODELS: ModelRecord[] = DEFAULT_MODEL_BASES.filter( 1517 | (model) => { 1518 | if ( 1519 | !prebuiltAppConfig.model_list.map((m) => m.model_id).includes(model.name) 1520 | ) { 1521 | console.warn( 1522 | `Model ${model.name} not supported by current WebLLM version.` 1523 | ); 1524 | return false; 1525 | } 1526 | return true; 1527 | } 1528 | ).map((model) => ({ 1529 | ...model, 1530 | size: getSize(model.name), 1531 | quantization: getQuantization(model.name), 1532 | })); 1533 | -------------------------------------------------------------------------------- /src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /src/whisper-worker.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable camelcase */ 2 | import { pipeline, env } from "@xenova/transformers"; 3 | 4 | // Disable local models 5 | env.allowLocalModels = false; 6 | 7 | // Define model factories 8 | // Ensures only one model is created of each type 9 | class PipelineFactory { 10 | static task = null; 11 | static model = null; 12 | static quantized = null; 13 | static instance = null; 14 | 15 | constructor(tokenizer, model, quantized) { 16 | this.tokenizer = tokenizer; 17 | this.model = model; 18 | this.quantized = quantized; 19 | } 20 | 21 | static async getInstance(progress_callback = null) { 22 | if (this.instance === null) { 23 | this.instance = pipeline(this.task, this.model, { 24 | quantized: this.quantized, 25 | progress_callback, 26 | 27 | // For medium models, we need to load the `no_attentions` revision to avoid running out of memory 28 | revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main" 29 | }); 30 | } 31 | 32 | return this.instance; 33 | } 34 | } 35 | 36 | self.addEventListener("message", async (event) => { 37 | const message = event.data; 38 | 39 | // Do some work... 40 | // TODO use message data 41 | let transcript = await transcribe( 42 | message.audio, 43 | message.model, 44 | message.multilingual, 45 | message.quantized, 46 | message.subtask, 47 | message.language, 48 | ); 49 | if (transcript === null) return; 50 | 51 | // Send the result back to the main thread 52 | self.postMessage({ 53 | status: "complete", 54 | task: "automatic-speech-recognition", 55 | data: transcript, 56 | }); 57 | }); 58 | 59 | class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory { 60 | static task = "automatic-speech-recognition"; 61 | static model = null; 62 | static quantized = null; 63 | } 64 | 65 | const transcribe = async ( 66 | audio, 67 | model, 68 | multilingual, 69 | quantized, 70 | subtask, 71 | language, 72 | ) => { 73 | 74 | const isDistilWhisper = model.startsWith("distil-whisper/"); 75 | 76 | let modelName = model; 77 | if (!isDistilWhisper && !multilingual) { 78 | modelName += ".en" 79 | } 80 | 81 | const p = AutomaticSpeechRecognitionPipelineFactory; 82 | if (p.model !== modelName || p.quantized !== quantized) { 83 | // Invalidate model if different 84 | p.model = modelName; 85 | p.quantized = quantized; 86 | 87 | if (p.instance !== null) { 88 | (await p.getInstance()).dispose(); 89 | p.instance = null; 90 | } 91 | } 92 | 93 | // Load transcriber model 94 | let transcriber = await p.getInstance((data) => { 95 | self.postMessage(data); 96 | }); 97 | 98 | const time_precision = 99 | transcriber.processor.feature_extractor.config.chunk_length / 100 | transcriber.model.config.max_source_positions; 101 | 102 | // Storage for chunks to be processed. Initialise with an empty chunk. 103 | let chunks_to_process = [ 104 | { 105 | tokens: [], 106 | finalised: false, 107 | }, 108 | ]; 109 | 110 | // TODO: Storage for fully-processed and merged chunks 111 | // let decoded_chunks = []; 112 | 113 | function chunk_callback(chunk) { 114 | let last = chunks_to_process[chunks_to_process.length - 1]; 115 | 116 | // Overwrite last chunk with new info 117 | Object.assign(last, chunk); 118 | last.finalised = true; 119 | 120 | // Create an empty chunk after, if it not the last chunk 121 | if (!chunk.is_last) { 122 | chunks_to_process.push({ 123 | tokens: [], 124 | finalised: false, 125 | }); 126 | } 127 | } 128 | 129 | // Inject custom callback function to handle merging of chunks 130 | function callback_function(item) { 131 | let last = chunks_to_process[chunks_to_process.length - 1]; 132 | 133 | // Update tokens of last chunk 134 | last.tokens = [...item[0].output_token_ids]; 135 | 136 | // Merge text chunks 137 | // TODO optimise so we don't have to decode all chunks every time 138 | let data = transcriber.tokenizer._decode_asr(chunks_to_process, { 139 | time_precision: time_precision, 140 | return_timestamps: true, 141 | force_full_sequences: false, 142 | }); 143 | 144 | self.postMessage({ 145 | status: "update", 146 | task: "automatic-speech-recognition", 147 | data: data, 148 | }); 149 | } 150 | 151 | // Actually run transcription 152 | let output = await transcriber(audio, { 153 | // Greedy 154 | top_k: 0, 155 | do_sample: false, 156 | 157 | // Sliding window 158 | chunk_length_s: isDistilWhisper ? 20 : 30, 159 | stride_length_s: isDistilWhisper ? 3 : 5, 160 | 161 | // Language and task 162 | language: language, 163 | task: subtask, 164 | 165 | // Return timestamps 166 | return_timestamps: true, 167 | force_full_sequences: false, 168 | 169 | // Callback functions 170 | callback_function: callback_function, // after each generation step 171 | chunk_callback: chunk_callback, // after each chunk is processed 172 | }).catch((error) => { 173 | self.postMessage({ 174 | status: "error", 175 | task: "automatic-speech-recognition", 176 | data: error, 177 | }); 178 | return null; 179 | }); 180 | 181 | return output; 182 | }; 183 | -------------------------------------------------------------------------------- /tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 4 | "target": "ES2020", 5 | "useDefineForClassFields": true, 6 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 7 | "module": "ESNext", 8 | "skipLibCheck": true, 9 | 10 | /* Bundler mode */ 11 | "moduleResolution": "bundler", 12 | "allowImportingTsExtensions": true, 13 | "isolatedModules": true, 14 | "moduleDetection": "force", 15 | "noEmit": true, 16 | "jsx": "react-jsx", 17 | 18 | /* Linting */ 19 | "strict": true, 20 | "noUnusedLocals": true, 21 | "noUnusedParameters": true, 22 | "noFallthroughCasesInSwitch": true, 23 | "noUncheckedSideEffectImports": true 24 | }, 25 | "include": ["src"] 26 | } 27 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 4 | "target": "ES2022", 5 | "lib": ["ES2023"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | 16 | /* Linting */ 17 | "strict": true, 18 | "noUnusedLocals": true, 19 | "noUnusedParameters": true, 20 | "noFallthroughCasesInSwitch": true, 21 | "noUncheckedSideEffectImports": true 22 | }, 23 | "include": ["vite.config.ts"] 24 | } 25 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | import tailwindcss from "@tailwindcss/vite"; 4 | 5 | // https://vite.dev/config/ 6 | export default defineConfig({ 7 | plugins: [react(), tailwindcss()], 8 | }); 9 | --------------------------------------------------------------------------------