├── .github
├── FUNDING.yml
└── workflows
│ └── deploy.yml
├── .gitignore
├── LICENSE
├── README.md
├── eslint.config.js
├── index.html
├── package-lock.json
├── package.json
├── public
└── vite.svg
├── src
├── App.tsx
├── assets
│ ├── icons
│ │ ├── CheckmarkIcon.tsx
│ │ ├── CircleSpinner.tsx
│ │ ├── CopyButtonIcon.tsx
│ │ ├── CopyIcon.tsx
│ │ ├── GitHubIcon.tsx
│ │ ├── LoadingSpinner.tsx
│ │ ├── MicrophoneIcon.tsx
│ │ ├── MoonIcon.tsx
│ │ ├── ResetIcon.tsx
│ │ ├── SendIcon.tsx
│ │ ├── StopIcon.tsx
│ │ └── SunIcon.tsx
│ └── react.svg
├── components
│ ├── AudioManager.tsx
│ ├── AudioRecorder.tsx
│ ├── AudioSection.tsx
│ ├── DarkModeToggle.tsx
│ ├── LatexOutput.tsx
│ ├── LatexRenderer.tsx
│ ├── LoadingIndicator.tsx
│ ├── ModelLoader.tsx
│ ├── ModelSelectionPanel.tsx
│ ├── ModelSelector.tsx
│ ├── Progress.tsx
│ └── Transcript.tsx
├── hooks
│ ├── useConversation.ts
│ ├── useLLMEngine.ts
│ ├── useTranscriber.ts
│ └── useWorker.ts
├── index.css
├── main.tsx
├── utils
│ ├── AudioUtils.ts
│ ├── BlobFix.ts
│ └── Constants.ts
├── vite-env.d.ts
└── whisper-worker.js
├── tsconfig.app.json
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: thomasmckanna
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: Deploy to GitHub Pages
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | workflow_dispatch:
7 |
8 | permissions:
9 | contents: read
10 | pages: write
11 | id-token: write
12 |
13 | concurrency:
14 | group: "pages"
15 | cancel-in-progress: false
16 |
17 | jobs:
18 | build:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - name: Checkout
22 | uses: actions/checkout@v4
23 |
24 | - name: Setup Node
25 | uses: actions/setup-node@v4
26 | with:
27 | node-version: "20"
28 | cache: "npm"
29 |
30 | - name: Install dependencies
31 | run: npm ci
32 |
33 | - name: Build
34 | run: |
35 | # Build with the correct base path for GitHub Pages
36 | npm run build -- --base=/
37 |
38 | - name: Create CNAME file
39 | run: echo "latex.thomasmckanna.com" > dist/CNAME
40 |
41 | - name: Setup Pages
42 | uses: actions/configure-pages@v4
43 |
44 | - name: Upload artifact
45 | uses: actions/upload-pages-artifact@v3
46 | with:
47 | path: "./dist"
48 |
49 | deploy:
50 | environment:
51 | name: github-pages
52 | url: ${{ steps.deployment.outputs.page_url }}
53 | runs-on: ubuntu-latest
54 | needs: build
55 | steps:
56 | - name: Deploy to GitHub Pages
57 | id: deployment
58 | uses: actions/deploy-pages@v4
59 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .aider*
2 |
3 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
4 |
5 | # dependencies
6 | /node_modules
7 | /.pnp
8 | .pnp.js
9 |
10 | # testing
11 | /coverage
12 |
13 | # next.js
14 | /.next/
15 | /out/
16 |
17 | # production
18 | /build
19 |
20 | # misc
21 | .DS_Store
22 | *.pem
23 |
24 | # debug
25 | npm-debug.log*
26 | yarn-debug.log*
27 | yarn-error.log*
28 | .pnpm-debug.log*
29 |
30 | # local env files
31 | .env*.local
32 |
33 | # vercel
34 | .vercel
35 |
36 | # typescript
37 | *.tsbuildinfo
38 | next-env.d.ts
39 | dev
40 |
41 | .vscode
42 | .idea
43 |
44 | # docker-compose env files
45 | .env
46 |
47 | *.key
48 | *.key.pub
49 |
50 | # service worker generated files
51 | public/sw.js
52 | public/workbox-*.js
53 | public/workbox-*.js.map
54 | public/worker-*.js
55 |
56 | # Logs
57 | logs
58 | *.log
59 | npm-debug.log*
60 | yarn-debug.log*
61 | yarn-error.log*
62 | pnpm-debug.log*
63 | lerna-debug.log*
64 |
65 | node_modules
66 | dist
67 | dist-ssr
68 | *.local
69 |
70 | # Editor directories and files
71 | .vscode/*
72 | !.vscode/extensions.json
73 | .idea
74 | .DS_Store
75 | *.suo
76 | *.ntvs*
77 | *.njsproj
78 | *.sln
79 | *.sw?
80 |
81 | reference/
82 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Thomas McKanna
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Speech to LaTeX
2 |
3 |
4 |
5 | [](https://opensource.org/licenses/MIT)
6 | [](https://github.com/mlc-ai/web-llm)
7 | [](https://github.com/xenova/whisper-web)
8 | [](https://www.mathjax.org/)
9 |
10 | https://github.com/user-attachments/assets/595ae5fd-17d8-49bc-b811-ad450c06a02a
11 |
12 | ## [🚀 Try it now!](https://thomas-mckanna.github.io/speech-to-latex/)
13 |
14 | ## Overview
15 |
16 | Speech to LaTeX is a powerful web application that converts spoken mathematics into LaTeX expressions, running entirely in your browser. No server required! Simply speak your mathematical expressions, and watch as they're transformed into beautifully formatted LaTeX.
17 |
18 | ## Features
19 |
20 | - 🎤 **Voice to LaTeX** - Dictate mathematical expressions naturally
21 | - 💻 **100% Client-side** - All processing happens in your browser
22 | - 🔒 **Privacy-focused** - No data leaves your device
23 | - 🌐 **Works offline** - Once loaded, no internet connection needed
24 | - ⚡ **Real-time conversion** - See results as you speak
25 | - 🎛️ **Customizable models** - Choose from different Whisper and LLM models
26 |
27 | ## How It Works
28 |
29 | Speech to LaTeX combines three powerful technologies:
30 |
31 | 1. **[Whisper Web](https://github.com/xenova/whisper-web)** - Transcribes your speech to text
32 | 2. **[WebLLM](https://github.com/mlc-ai/web-llm)** - Converts transcribed text to LaTeX expressions
33 | 3. **[MathJax](https://www.mathjax.org/)** - Renders LaTeX expressions beautifully in the browser
34 |
35 | ## Getting Started
36 |
37 | 1. Visit the [Speech to LaTeX app](https://your-deployment-url.com)
38 | 2. Allow microphone access when prompted
39 | 3. Click the microphone button and start speaking your mathematical expression
40 | 4. Watch as your speech is converted to LaTeX in real-time
41 | 5. Copy the generated LaTeX code or view the rendered expression
42 |
43 | ## Example Expressions
44 |
45 | Try saying:
46 | - "The quadratic formula is x equals negative b plus or minus the square root of b squared minus 4ac all over 2a"
47 | - "The integral from 0 to infinity of e to the negative x squared dx equals square root of pi over 2"
48 | - "The sum from n equals 1 to infinity of 1 over n squared equals pi squared over 6"
49 |
50 | ## Local Development
51 |
52 | ```bash
53 | # Clone the repository
54 | git clone https://github.com/Thomas-McKanna/speech-to-latex.git
55 |
56 | # Navigate to the project directory
57 | cd speech-to-latex
58 |
59 | # Install dependencies
60 | npm install
61 |
62 | # Start the development server
63 | npm run dev
64 | ```
65 |
66 | ## Contributing
67 |
68 | Contributions are welcome! Please feel free to submit a Pull Request.
69 |
70 | ## Acknowledgements
71 |
72 | This project wouldn't be possible without these amazing open-source projects:
73 |
74 | - [WebLLM](https://github.com/mlc-ai/web-llm) - For running LLMs directly in the browser
75 | - [Whisper Web](https://github.com/xenova/whisper-web) - For browser-based speech recognition
76 | - [MathJax](https://www.mathjax.org/) - For rendering LaTeX expressions
77 |
78 | ## License
79 |
80 | This project is licensed under the MIT License - see the LICENSE file for details.
81 |
--------------------------------------------------------------------------------
/eslint.config.js:
--------------------------------------------------------------------------------
1 | import js from '@eslint/js'
2 | import globals from 'globals'
3 | import reactHooks from 'eslint-plugin-react-hooks'
4 | import reactRefresh from 'eslint-plugin-react-refresh'
5 | import tseslint from 'typescript-eslint'
6 |
7 | export default tseslint.config(
8 | { ignores: ['dist'] },
9 | {
10 | extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 | files: ['**/*.{ts,tsx}'],
12 | languageOptions: {
13 | ecmaVersion: 2020,
14 | globals: globals.browser,
15 | },
16 | plugins: {
17 | 'react-hooks': reactHooks,
18 | 'react-refresh': reactRefresh,
19 | },
20 | rules: {
21 | ...reactHooks.configs.recommended.rules,
22 | 'react-refresh/only-export-components': [
23 | 'warn',
24 | { allowConstantExport: true },
25 | ],
26 | },
27 | },
28 | )
29 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Speech to LaTeX
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "speech-to-latex",
3 | "private": true,
4 | "version": "0.0.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "vite build",
9 | "lint": "eslint .",
10 | "preview": "vite preview"
11 | },
12 | "dependencies": {
13 | "@mlc-ai/web-llm": "^0.2.78",
14 | "@tailwindcss/vite": "^4.1.4",
15 | "@xenova/transformers": "^2.17.2",
16 | "better-react-mathjax": "^2.3.0",
17 | "react": "^19.0.0",
18 | "react-dom": "^19.0.0",
19 | "tailwindcss": "^4.1.4"
20 | },
21 | "devDependencies": {
22 | "@eslint/js": "^9.22.0",
23 | "@types/react": "^19.0.10",
24 | "@types/react-dom": "^19.0.4",
25 | "@vitejs/plugin-react": "^4.3.4",
26 | "eslint": "^9.22.0",
27 | "eslint-plugin-react-hooks": "^5.2.0",
28 | "eslint-plugin-react-refresh": "^0.4.19",
29 | "globals": "^16.0.0",
30 | "typescript": "~5.7.2",
31 | "typescript-eslint": "^8.26.1",
32 | "vite": "^6.3.0"
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/public/vite.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/App.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from "react";
2 | import { useTranscriber } from "./hooks/useTranscriber";
3 | import { ModelSelectionPanel } from "./components/ModelSelectionPanel";
4 | import { LatexOutput } from "./components/LatexOutput";
5 | import { LoadingIndicator } from "./components/LoadingIndicator";
6 | import { ModelLoader } from "./components/ModelLoader";
7 | import { AudioSection } from "./components/AudioSection";
8 | import { useLLMEngine } from "./hooks/useLLMEngine";
9 | import { useConversation } from "./hooks/useConversation";
10 | import { DarkModeToggle } from "./components/DarkModeToggle";
11 | import { GitHubIcon } from "./assets/icons/GitHubIcon";
12 |
13 | function App() {
14 | const [whisperModel, setWhisperModel] = useState("Xenova/whisper-tiny");
15 | const [isWhisperModelLoading, setIsWhisperModelLoading] = useState(false);
16 | const [llmModel, setLlmModel] = useState("Llama-3.1-8B-Instruct-q4f32_1-MLC");
17 |
18 | const transcriber = useTranscriber();
19 | const { engine, loadingStatus, modelLoaded, isChangingModel } =
20 | useLLMEngine(llmModel);
21 | const {
22 | latexOutput,
23 | isLoading,
24 | hasPreviousExpression,
25 | sendToLLM,
26 | resetConversation,
27 | } = useConversation(engine);
28 |
29 | // Set the whisper model when it changes in the dropdown
30 | useEffect(() => {
31 | const loadModel = async () => {
32 | setIsWhisperModelLoading(true);
33 | await transcriber.setModel(whisperModel);
34 | setIsWhisperModelLoading(false);
35 | };
36 |
37 | loadModel();
38 | }, [whisperModel]);
39 |
40 | return (
41 |
42 |
43 |
44 |
45 | Speech to LaTeX
46 |
47 |
48 |
49 |
50 |
51 |
52 | {modelLoaded && (
53 |
54 | {/* LaTeX Rendered Output */}
55 |
62 |
63 | {/* Audio Recording Section */}
64 |
70 |
71 | {/* Model Selection Panel */}
72 |
73 |
81 |
82 |
83 | )}
84 |
85 |
86 | {/* GitHub Link */}
87 |
98 |
99 | );
100 | }
101 |
102 | export default App;
103 |
--------------------------------------------------------------------------------
/src/assets/icons/CheckmarkIcon.tsx:
--------------------------------------------------------------------------------
1 | interface CheckmarkIconProps {
2 | className?: string;
3 | }
4 |
5 | export function CheckmarkIcon({ className = "" }: CheckmarkIconProps) {
6 | return (
7 |
19 | );
20 | }
21 |
--------------------------------------------------------------------------------
/src/assets/icons/CircleSpinner.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | export const CircleSpinner: React.FC> = (props) => (
4 |
22 | );
23 |
--------------------------------------------------------------------------------
/src/assets/icons/CopyButtonIcon.tsx:
--------------------------------------------------------------------------------
1 | interface CopyButtonIconProps {
2 | className?: string;
3 | }
4 |
5 | export function CopyButtonIcon({ className = "" }: CopyButtonIconProps) {
6 | return (
7 |
20 | );
21 | }
22 |
--------------------------------------------------------------------------------
/src/assets/icons/CopyIcon.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | interface CopyIconProps {
4 | className?: string;
5 | }
6 |
7 | export const CopyIcon: React.FC = ({ className = "" }) => {
8 | return (
9 |
25 | );
26 | };
27 |
--------------------------------------------------------------------------------
/src/assets/icons/GitHubIcon.tsx:
--------------------------------------------------------------------------------
1 | interface GitHubIconProps {
2 | className?: string;
3 | }
4 |
5 | export function GitHubIcon({ className = "" }: GitHubIconProps) {
6 | return (
7 |
15 | );
16 | }
17 |
--------------------------------------------------------------------------------
/src/assets/icons/LoadingSpinner.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | export const LoadingSpinner: React.FC> = (props) => (
4 |
25 | );
26 |
--------------------------------------------------------------------------------
/src/assets/icons/MicrophoneIcon.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | export const MicrophoneIcon: React.FC> = (props) => (
4 |
19 | );
20 |
--------------------------------------------------------------------------------
/src/assets/icons/MoonIcon.tsx:
--------------------------------------------------------------------------------
1 | interface MoonIconProps {
2 | className?: string;
3 | }
4 |
5 | export function MoonIcon({ className = "" }: MoonIconProps) {
6 | return (
7 |
15 | );
16 | }
17 |
--------------------------------------------------------------------------------
/src/assets/icons/ResetIcon.tsx:
--------------------------------------------------------------------------------
1 | interface ResetIconProps {
2 | className?: string;
3 | }
4 |
5 | export function ResetIcon({ className = "" }: ResetIconProps) {
6 | return (
7 |
24 | );
25 | }
26 |
--------------------------------------------------------------------------------
/src/assets/icons/SendIcon.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | export const SendIcon: React.FC> = (props) => (
4 |
19 | );
20 |
--------------------------------------------------------------------------------
/src/assets/icons/StopIcon.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | export const StopIcon: React.FC> = (props) => (
4 |
14 | );
15 |
--------------------------------------------------------------------------------
/src/assets/icons/SunIcon.tsx:
--------------------------------------------------------------------------------
1 | interface SunIconProps {
2 | className?: string;
3 | }
4 |
5 | export function SunIcon({ className = "" }: SunIconProps) {
6 | return (
7 |
19 | );
20 | }
21 |
--------------------------------------------------------------------------------
/src/assets/react.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/components/AudioManager.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from "react";
2 | import Constants from "../utils/Constants";
3 | import { Transcriber } from "../hooks/useTranscriber";
4 | import Progress from "./Progress";
5 | import AudioRecorder from "./AudioRecorder";
6 |
7 | export function AudioManager({
8 | transcriber,
9 | onTranscriptReady,
10 | isModelLoading,
11 | }: {
12 | transcriber: Transcriber;
13 | onTranscriptReady?: (text: string) => void;
14 | isModelLoading?: boolean;
15 | }) {
16 | const [isRecording, setIsRecording] = useState(false);
17 |
18 | const processAudioRecording = async (data: Blob) => {
19 | if (!data) return;
20 |
21 | const fileReader = new FileReader();
22 |
23 | fileReader.onloadend = async () => {
24 | try {
25 | const audioCTX = new AudioContext({
26 | sampleRate: Constants.SAMPLING_RATE,
27 | });
28 | const arrayBuffer = fileReader.result as ArrayBuffer;
29 | const decoded = await audioCTX.decodeAudioData(arrayBuffer);
30 |
31 | // Start transcription immediately
32 | transcriber.start(decoded);
33 | } catch (error) {
34 | console.error("Error processing audio:", error);
35 | }
36 | };
37 |
38 | fileReader.readAsArrayBuffer(data);
39 | };
40 |
41 | // When transcription is complete, pass the text to parent component
42 | useEffect(() => {
43 | if (onTranscriptReady && transcriber.output && !transcriber.isBusy) {
44 | const text = transcriber.output.chunks
45 | .map((chunk) => chunk.text)
46 | .join(" ")
47 | .trim();
48 |
49 | if (text) {
50 | onTranscriptReady(text);
51 | }
52 | }
53 | }, [transcriber.output, transcriber.isBusy]);
54 |
55 | return (
56 |
57 |
58 |
59 |
60 |
{
63 | setIsRecording(!isRecording);
64 | if (blob) {
65 | transcriber.onInputChange();
66 | processAudioRecording(blob);
67 | }
68 | }}
69 | disabled={isModelLoading || transcriber.isModelLoading}
70 | />
71 |
72 |
73 | {(isModelLoading ||
74 | transcriber.isModelLoading ||
75 | transcriber.isBusy) && (
76 |
77 | {isModelLoading || transcriber.isModelLoading
78 | ? "Loading model..."
79 | : transcriber.isBusy
80 | ? "Transcribing..."
81 | : ""}
82 |
83 | )}
84 |
85 |
86 |
87 | {transcriber.progressItems.length > 0 && (
88 |
89 |
92 | {transcriber.progressItems.map((data) => (
93 |
96 | ))}
97 |
98 | )}
99 |
100 | );
101 | }
102 |
--------------------------------------------------------------------------------
/src/components/AudioRecorder.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect, useRef } from "react";
2 | import { MicrophoneIcon } from "../assets/icons/MicrophoneIcon";
3 | import { StopIcon } from "../assets/icons/StopIcon";
4 | import { formatAudioTimestamp } from "../utils/AudioUtils";
5 | import { webmFixDuration } from "../utils/BlobFix";
6 |
7 | function getMimeType() {
8 | const types = [
9 | "audio/webm",
10 | "audio/mp4",
11 | "audio/ogg",
12 | "audio/wav",
13 | "audio/aac",
14 | ];
15 | for (let i = 0; i < types.length; i++) {
16 | if (MediaRecorder.isTypeSupported(types[i])) {
17 | return types[i];
18 | }
19 | }
20 | return undefined;
21 | }
22 |
23 | export default function AudioRecorder(props: {
24 | isRecording: boolean;
25 | onRecordingToggle: (blob?: Blob) => void;
26 | disabled?: boolean;
27 | }) {
28 | const [duration, setDuration] = useState(0);
29 | const { isRecording } = props;
30 |
31 | const streamRef = useRef(null);
32 | const mediaRecorderRef = useRef(null);
33 | const chunksRef = useRef([]);
34 | const startTimeRef = useRef(0);
35 |
36 | // Initialize or stop the media recorder when isRecording changes
37 | useEffect(() => {
38 | const startRecording = async () => {
39 | startTimeRef.current = Date.now();
40 | chunksRef.current = [];
41 |
42 | try {
43 | if (!streamRef.current) {
44 | streamRef.current = await navigator.mediaDevices.getUserMedia({
45 | audio: true,
46 | });
47 | }
48 |
49 | const mimeType = getMimeType();
50 | const mediaRecorder = new MediaRecorder(streamRef.current, {
51 | mimeType,
52 | });
53 |
54 | mediaRecorderRef.current = mediaRecorder;
55 |
56 | mediaRecorder.addEventListener("dataavailable", async (event) => {
57 | console.log(
58 | "MediaRecorder data available:",
59 | event.data.size,
60 | "bytes"
61 | );
62 | if (event.data.size > 0) {
63 | chunksRef.current.push(event.data);
64 | }
65 | });
66 |
67 | mediaRecorder.addEventListener("stop", async () => {
68 | console.log("MediaRecorder stopped");
69 | const recordingDuration = Date.now() - startTimeRef.current;
70 | console.log("Recording duration:", recordingDuration, "ms");
71 |
72 | if (chunksRef.current.length === 0) {
73 | console.error("No audio data recorded");
74 | props.onRecordingToggle();
75 | return;
76 | }
77 |
78 | // Received a stop event
79 | let blob = new Blob(chunksRef.current, { type: mimeType });
80 | console.log("Created blob:", blob.size, "bytes, type:", blob.type);
81 |
82 | if (mimeType === "audio/webm") {
83 | try {
84 | blob = await webmFixDuration(blob, recordingDuration, blob.type);
85 | console.log("Fixed webm duration");
86 | } catch (error) {
87 | console.error("Error fixing webm duration:", error);
88 | }
89 | }
90 |
91 | props.onRecordingToggle(blob);
92 | });
93 |
94 | mediaRecorder.start(1000); // Collect data every second for better reliability
95 | setDuration(0);
96 | } catch (error) {
97 | console.error("Error accessing microphone:", error);
98 | props.onRecordingToggle(); // Toggle back to not recording
99 | }
100 | };
101 |
102 | const stopRecording = () => {
103 | if (
104 | mediaRecorderRef.current &&
105 | mediaRecorderRef.current.state === "recording"
106 | ) {
107 | mediaRecorderRef.current.stop(); // set state to inactive
108 | setDuration(0);
109 | }
110 | };
111 |
112 | if (isRecording) {
113 | startRecording();
114 | } else if (mediaRecorderRef.current) {
115 | stopRecording();
116 | }
117 |
118 | return () => {
119 | if (!isRecording && streamRef.current) {
120 | streamRef.current.getTracks().forEach((track) => track.stop());
121 | streamRef.current = null;
122 | }
123 | };
124 | }, [isRecording]);
125 |
126 | // Timer effect for recording duration
127 | useEffect(() => {
128 | if (isRecording) {
129 | const timer = setInterval(() => {
130 | setDuration((prevDuration) => prevDuration + 1);
131 | }, 1000);
132 |
133 | return () => {
134 | clearInterval(timer);
135 | };
136 | }
137 | }, [isRecording]);
138 |
139 | return (
140 |
154 | );
155 | }
156 |
--------------------------------------------------------------------------------
/src/components/AudioSection.tsx:
--------------------------------------------------------------------------------
1 | import { AudioManager } from "./AudioManager";
2 | import { Transcriber } from "../hooks/useTranscriber";
3 |
4 | interface AudioSectionProps {
5 | transcriber: Transcriber;
6 | onTranscriptReady: (text: string) => void;
7 | isWhisperModelLoading: boolean;
8 | hasPreviousExpression: boolean;
9 | }
10 |
11 | export function AudioSection({
12 | transcriber,
13 | onTranscriptReady,
14 | isWhisperModelLoading,
15 | hasPreviousExpression
16 | }: AudioSectionProps) {
17 | return (
18 |
19 |
20 | {hasPreviousExpression ? "Modify Expression" : "Dictate Math Expression"}
21 |
22 |
27 |
28 | );
29 | }
30 |
--------------------------------------------------------------------------------
/src/components/DarkModeToggle.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from "react";
2 | import { SunIcon } from "../assets/icons/SunIcon";
3 | import { MoonIcon } from "../assets/icons/MoonIcon";
4 |
5 | export function DarkModeToggle() {
6 | const [darkMode, setDarkMode] = useState(false);
7 |
8 | // On component mount, check if user has a preference
9 | useEffect(() => {
10 | // Check if user has a preference in localStorage
11 | const savedPreference = localStorage.getItem("darkMode");
12 |
13 | // If they have a preference, use it
14 | if (savedPreference !== null) {
15 | setDarkMode(savedPreference === "true");
16 | }
17 | // Otherwise check system preference
18 | else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
19 | setDarkMode(true);
20 | }
21 | }, []);
22 |
23 | // Update the document when darkMode changes
24 | useEffect(() => {
25 | if (darkMode) {
26 | document.documentElement.classList.add('dark');
27 | document.documentElement.style.setProperty('color-scheme', 'dark');
28 | document.documentElement.style.setProperty('--bg-primary', 'var(--color-dark-bg-primary)');
29 | document.documentElement.style.setProperty('--bg-secondary', 'var(--color-dark-bg-secondary)');
30 | document.documentElement.style.setProperty('--text-primary', 'var(--color-dark-text-primary)');
31 | document.documentElement.style.setProperty('--text-secondary', 'var(--color-dark-text-secondary)');
32 | document.documentElement.style.setProperty('--border-color', 'var(--color-dark-border)');
33 | } else {
34 | document.documentElement.classList.remove('dark');
35 | document.documentElement.style.setProperty('color-scheme', 'light');
36 | document.documentElement.style.setProperty('--bg-primary', 'white');
37 | document.documentElement.style.setProperty('--bg-secondary', 'oklch(0.97 0.01 240)');
38 | document.documentElement.style.setProperty('--text-primary', 'oklch(0.2 0.01 240)');
39 | document.documentElement.style.setProperty('--text-secondary', 'oklch(0.4 0.01 240)');
40 | document.documentElement.style.setProperty('--border-color', 'oklch(0.85 0.01 240)');
41 | }
42 | localStorage.setItem("darkMode", darkMode.toString());
43 | }, [darkMode]);
44 |
45 | return (
46 |
57 | );
58 | }
59 |
--------------------------------------------------------------------------------
/src/components/LatexOutput.tsx:
--------------------------------------------------------------------------------
1 | import { ResetIcon } from "../assets/icons/ResetIcon";
2 | import { CopyButtonIcon } from "../assets/icons/CopyButtonIcon";
3 | import { CheckmarkIcon } from "../assets/icons/CheckmarkIcon";
4 | import { LoadingSpinner } from "../assets/icons/LoadingSpinner";
5 | import LatexRenderer from "./LatexRenderer";
6 | import { useState } from "react";
7 |
8 | interface LatexOutputProps {
9 | latexOutput: string;
10 | hasPreviousExpression: boolean;
11 | onReset: () => void;
12 | isLoading?: boolean;
13 | isChangingModel?: boolean;
14 | }
15 |
16 | export function LatexOutput({
17 | latexOutput,
18 | hasPreviousExpression,
19 | onReset,
20 | isLoading = false,
21 | isChangingModel = false,
22 | }: LatexOutputProps) {
23 | const [copied, setCopied] = useState(false);
24 |
25 | const handleCopy = () => {
26 | navigator.clipboard.writeText(latexOutput);
27 | setCopied(true);
28 |
29 | // Reset after 1.5 seconds
30 | setTimeout(() => {
31 | setCopied(false);
32 | }, 1500);
33 | };
34 |
35 | return (
36 |
37 |
38 |
39 | LaTeX Expression
40 |
41 | {hasPreviousExpression && (
42 |
50 | )}
51 |
52 |
53 | {/* Rendered LaTeX */}
54 |
55 |
56 | {isLoading || isChangingModel ? (
57 |
58 |
59 |
60 |
61 | {isChangingModel
62 | ? "Loading LLM model..."
63 | : "Converting speech to LaTeX..."}
64 |
65 |
66 |
67 | ) : latexOutput ? (
68 |
69 | ) : (
70 |
71 | LaTeX rendering will appear here...
72 |
73 | )}
74 |
75 |
76 |
77 | {/* Raw LaTeX Code */}
78 | {latexOutput && (
79 |
80 |
95 |
96 |
97 | {latexOutput}
98 |
99 |
100 |
101 | )}
102 |
103 | );
104 | }
105 |
--------------------------------------------------------------------------------
/src/components/LatexRenderer.tsx:
--------------------------------------------------------------------------------
1 | import { FC } from "react";
2 | import { MathJax, MathJaxContext } from "better-react-mathjax";
3 |
4 | const config = {
5 | loader: { load: ["[tex]/html"] },
6 | tex: {
7 | packages: { "[+]": ["html"] },
8 | inlineMath: [
9 | ["$", "$"],
10 | ["\\(", "\\)"],
11 | ],
12 | },
13 | chtml: {
14 | scale: 2.0,
15 | },
16 | };
17 |
18 | interface LatexRendererProps {
19 | latex: string;
20 | }
21 |
22 | const LatexRenderer: FC = ({ latex }) => {
23 | if (!latex) return null;
24 |
25 | try {
26 | // Extract the LaTeX content from any delimiters
27 | let cleanLatex = latex;
28 |
29 | // Remove any existing delimiters
30 | cleanLatex = cleanLatex
31 | .replace(/^\\\(|\\\)$/g, "") // Remove inline delimiters \( \)
32 | .replace(/^\\\[|\\\]$/g, "") // Remove display delimiters \[ \]
33 | .replace(/^\$|\$$/g, "") // Remove $ delimiters
34 | .replace(/^\$\$|\$\$$/g, "") // Remove $$ delimiters
35 | .trim();
36 |
37 | // Format with proper delimiters for MathJax - always use inline mode
38 | const formattedLatex = `$${cleanLatex}$`;
39 |
40 | return (
41 |
42 |
43 |
44 | {formattedLatex}
45 |
46 |
47 |
48 | );
49 | } catch (error) {
50 | console.error("Error rendering LaTeX:", error);
51 | return (
52 |
53 | Error rendering LaTeX: {(error as Error).message}
54 |
55 | );
56 | }
57 | };
58 |
59 | export default LatexRenderer;
60 |
--------------------------------------------------------------------------------
/src/components/LoadingIndicator.tsx:
--------------------------------------------------------------------------------
1 | import { LoadingSpinner } from "../assets/icons/LoadingSpinner";
2 |
3 | interface LoadingIndicatorProps {
4 | isLoading: boolean;
5 | isChangingModel: boolean;
6 | }
7 |
8 | export function LoadingIndicator({ isLoading, isChangingModel }: LoadingIndicatorProps) {
9 | if (!isLoading && !isChangingModel) return null;
10 |
11 | return (
12 |
13 |
14 |
15 | {isChangingModel
16 | ? "Loading LLM model..."
17 | : "Converting speech to LaTeX..."}
18 |
19 |
20 | );
21 | }
22 |
--------------------------------------------------------------------------------
/src/components/ModelLoader.tsx:
--------------------------------------------------------------------------------
1 | interface ModelLoaderProps {
2 | loadingStatus: string;
3 | modelLoaded: boolean;
4 | }
5 |
6 | export function ModelLoader({ loadingStatus, modelLoaded }: ModelLoaderProps) {
7 | if (modelLoaded) return null;
8 |
9 | return (
10 |
11 |
12 | {loadingStatus}
13 |
14 |
27 |
28 | );
29 | }
30 |
--------------------------------------------------------------------------------
/src/components/ModelSelectionPanel.tsx:
--------------------------------------------------------------------------------
1 | import { ModelSelector } from "./ModelSelector";
2 | import { DEFAULT_MODELS } from "../utils/Constants";
3 |
4 | interface ModelSelectionPanelProps {
5 | llmModel: string;
6 | setLlmModel: (model: string) => void;
7 | whisperModel: string;
8 | setWhisperModel: (model: string) => void;
9 | isChangingLLMModel: boolean;
10 | isWhisperModelLoading: boolean;
11 | }
12 |
13 | export function ModelSelectionPanel({
14 | llmModel,
15 | setLlmModel,
16 | whisperModel,
17 | setWhisperModel,
18 | isChangingLLMModel,
19 | isWhisperModelLoading
20 | }: ModelSelectionPanelProps) {
21 | return (
22 |
23 |
Model Selection
24 |
25 |
26 |
32 | {/* Group models by family */}
33 | {Object.entries(
34 | DEFAULT_MODELS.reduce((acc, model) => {
35 | const family = model.family;
36 | if (!acc[family]) {
37 | acc[family] = [];
38 | }
39 | acc[family].push(model);
40 | return acc;
41 | }, {} as Record)
42 | ).map(([family, models]) => (
43 |
54 | ))}
55 |
56 |
57 |
58 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 | );
73 | }
74 |
--------------------------------------------------------------------------------
/src/components/ModelSelector.tsx:
--------------------------------------------------------------------------------
1 | import { ReactNode } from "react";
2 |
3 | interface ModelSelectorProps {
4 | label: string;
5 | model: string;
6 | onChange: (model: string) => void;
7 | disabled?: boolean;
8 | children: ReactNode;
9 | }
10 |
11 | export function ModelSelector({
12 | label,
13 | model,
14 | onChange,
15 | disabled = false,
16 | children,
17 | }: ModelSelectorProps) {
18 | return (
19 |
20 |
21 |
29 |
30 | );
31 | }
32 |
--------------------------------------------------------------------------------
/src/components/Progress.tsx:
--------------------------------------------------------------------------------
1 | export default function Progress({
2 | text,
3 | percentage,
4 | }: {
5 | text: string;
6 | percentage: number;
7 | }) {
8 | percentage = percentage ?? 0;
9 | return (
10 |
11 |
15 | {text} ({`${percentage.toFixed(2)}%`})
16 |
17 |
18 | );
19 | }
20 |
--------------------------------------------------------------------------------
/src/components/Transcript.tsx:
--------------------------------------------------------------------------------
1 | import { useRef, useEffect } from "react";
2 |
3 | import { TranscriberData } from "../hooks/useTranscriber";
4 | import { formatAudioTimestamp } from "../utils/AudioUtils";
5 |
6 | interface Props {
7 | transcribedData: TranscriberData | undefined;
8 | }
9 |
10 | export default function Transcript({ transcribedData }: Props) {
11 | const divRef = useRef(null);
12 |
13 | const saveBlob = (blob: Blob, filename: string) => {
14 | const url = URL.createObjectURL(blob);
15 | const link = document.createElement("a");
16 | link.href = url;
17 | link.download = filename;
18 | link.click();
19 | URL.revokeObjectURL(url);
20 | };
21 |
22 | const exportTXT = () => {
23 | const chunks = transcribedData?.chunks ?? [];
24 | const text = chunks
25 | .map((chunk) => chunk.text)
26 | .join("")
27 | .trim();
28 |
29 | const blob = new Blob([text], { type: "text/plain" });
30 | saveBlob(blob, "transcript.txt");
31 | };
32 |
33 | // Scroll to the bottom when the component updates
34 | useEffect(() => {
35 | if (divRef.current) {
36 | divRef.current.scrollTop = divRef.current.scrollHeight;
37 | }
38 | });
39 |
40 | if (!transcribedData?.chunks?.length) {
41 | return null;
42 | }
43 |
44 | return (
45 |
46 |
Transcription
47 |
48 |
52 | {transcribedData.chunks.map((chunk, i) => (
53 |
57 |
58 | {formatAudioTimestamp(chunk.timestamp[0])}
59 |
60 |
{chunk.text}
61 |
62 | ))}
63 |
64 |
65 | {!transcribedData.isBusy &&
66 | {
67 | /* Removed buttons as they're no longer needed */
68 | }}
69 |
70 | );
71 | }
72 |
--------------------------------------------------------------------------------
/src/hooks/useConversation.ts:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 | import * as webllm from "@mlc-ai/web-llm";
3 |
4 | export interface ChatMessage {
5 | role: "system" | "user" | "assistant";
6 | content: string;
7 | }
8 |
9 | export interface ConversationState {
10 | latexOutput: string;
11 | isLoading: boolean;
12 | hasPreviousExpression: boolean;
13 | conversationHistory: ChatMessage[];
14 | sendToLLM: (text: string) => Promise;
15 | resetConversation: () => void;
16 | }
17 |
18 | export function useConversation(engine: webllm.MLCEngine | null): ConversationState {
19 | const [latexOutput, setLatexOutput] = useState("");
20 | const [isLoading, setIsLoading] = useState(false);
21 | const [hasPreviousExpression, setHasPreviousExpression] = useState(false);
22 | const [conversationHistory, setConversationHistory] = useState([
23 | {
24 | role: "system",
25 | content: `You are a LaTeX expression generator. Convert the user's spoken math description into a valid LaTeX expression. Return ONLY the LaTeX code without any explanations, markdown formatting, or backticks. Do not include any text before or after the LaTeX expression. Do not include any $ symbols.`,
26 | },
27 | ]);
28 |
29 | // Send text to LLM
30 | const sendToLLM = async (text: string) => {
31 | if (!text.trim() || !engine) return;
32 |
33 | setIsLoading(true);
34 |
35 | try {
36 | console.log("Sending to LLM:", text);
37 |
38 | // Create the user message based on whether we're creating a new expression or modifying an existing one
39 | const userMessage: ChatMessage = {
40 | role: "user",
41 | content: hasPreviousExpression
42 | ? `Modify the previous LaTeX expression based on this instruction: "${text.trim()}"`
43 | : `Convert this math description to LaTeX: "${text.trim()}"`,
44 | };
45 |
46 | // Add the user message to conversation history
47 | const updatedHistory = [...conversationHistory, userMessage];
48 | setConversationHistory(updatedHistory);
49 |
50 | // Use streaming for better user experience
51 | const chunks = await engine.chat.completions.create({
52 | messages: updatedHistory,
53 | temperature: 0.3,
54 | stream: true,
55 | });
56 |
57 | let fullResponse = "";
58 | for await (const chunk of chunks) {
59 | const content = chunk.choices[0]?.delta.content || "";
60 | fullResponse += content;
61 | setLatexOutput(fullResponse);
62 | }
63 |
64 | // Add the assistant's response to the conversation history
65 | setConversationHistory([
66 | ...updatedHistory,
67 | { role: "assistant", content: fullResponse },
68 | ]);
69 |
70 | // Now we have a previous expression
71 | setHasPreviousExpression(true);
72 |
73 | console.log("LaTeX generation complete");
74 | } catch (error) {
75 | console.error("Error generating LaTeX:", error);
76 | } finally {
77 | setIsLoading(false);
78 | }
79 | };
80 |
81 | // Reset the conversation and latex output
82 | const resetConversation = () => {
83 | setLatexOutput("");
84 | setHasPreviousExpression(false);
85 | setConversationHistory([
86 | {
87 | role: "system",
88 | content: `You are a LaTeX expression generator. Convert the user's spoken math description into a valid LaTeX expression. Return ONLY the LaTeX code without any explanations, markdown formatting, or backticks. Do not include any text before or after the LaTeX expression. Do not include any $ symbols.`,
89 | },
90 | ]);
91 | };
92 |
93 | return {
94 | latexOutput,
95 | isLoading,
96 | hasPreviousExpression,
97 | conversationHistory,
98 | sendToLLM,
99 | resetConversation
100 | };
101 | }
102 |
--------------------------------------------------------------------------------
/src/hooks/useLLMEngine.ts:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from "react";
2 | import * as webllm from "@mlc-ai/web-llm";
3 | import { DEFAULT_MODELS } from "../utils/Constants";
4 |
5 | export interface LLMEngineState {
6 | engine: webllm.MLCEngine | null;
7 | loadingStatus: string;
8 | modelLoaded: boolean;
9 | isChangingModel: boolean;
10 | }
11 |
12 | export function useLLMEngine(modelName: string): LLMEngineState {
13 | const [engine, setEngine] = useState(null);
14 | const [loadingStatus, setLoadingStatus] = useState("Loading model...");
15 | const [modelLoaded, setModelLoaded] = useState(false);
16 | const [isChangingModel, setIsChangingModel] = useState(false);
17 |
18 | useEffect(() => {
19 | async function initializeEngine() {
20 | try {
21 | setLoadingStatus("Initializing WebLLM engine...");
22 | setModelLoaded(false);
23 | setIsChangingModel(true);
24 |
25 | // Find the selected model in DEFAULT_MODELS
26 | const selectedModel = DEFAULT_MODELS.find(
27 | (model) => model.name === modelName
28 | );
29 |
30 | if (!selectedModel) {
31 | throw new Error(`Model ${modelName} not found in available models`);
32 | }
33 |
34 | console.log("Loading model:", selectedModel.name);
35 |
36 | // Track the highest progress value we've seen
37 | let highestProgress = 0;
38 |
39 | // Use the model directly without custom appConfig
40 | const newEngine = await webllm.CreateMLCEngine(selectedModel.name, {
41 | initProgressCallback: (progress) => {
42 | // Only update if the new progress is higher than what we've seen before
43 | if (progress.progress > highestProgress) {
44 | highestProgress = progress.progress;
45 | setLoadingStatus(
46 | `Loading model (only slow first time): ${Math.round(
47 | highestProgress * 100
48 | )}%`
49 | );
50 | } else {
51 | // If progress seems to go backward, just report the stage without percentage
52 | setLoadingStatus(`${progress.text || "Processing..."}`);
53 | }
54 | },
55 | });
56 |
57 | setEngine(newEngine);
58 | setModelLoaded(true);
59 | setLoadingStatus("Model loaded successfully!");
60 | } catch (error) {
61 | console.error("Failed to initialize WebLLM engine:", error);
62 | setLoadingStatus(`Error loading model: ${error}`);
63 | } finally {
64 | setIsChangingModel(false);
65 | }
66 | }
67 |
68 | initializeEngine();
69 | }, [modelName]);
70 |
71 | return {
72 | engine,
73 | loadingStatus,
74 | modelLoaded,
75 | isChangingModel,
76 | };
77 | }
78 |
--------------------------------------------------------------------------------
/src/hooks/useTranscriber.ts:
--------------------------------------------------------------------------------
1 | import { useCallback, useMemo, useState } from "react";
2 | import { useWorker } from "./useWorker";
3 | import Constants from "../utils/Constants";
4 |
5 | interface ProgressItem {
6 | file: string;
7 | loaded: number;
8 | progress: number;
9 | total: number;
10 | name: string;
11 | status: string;
12 | }
13 |
14 | interface TranscriberUpdateData {
15 | data: [
16 | string,
17 | { chunks: { text: string; timestamp: [number, number | null] }[] },
18 | ];
19 | text: string;
20 | }
21 |
22 | interface TranscriberCompleteData {
23 | data: {
24 | text: string;
25 | chunks: { text: string; timestamp: [number, number | null] }[];
26 | };
27 | }
28 |
29 | export interface TranscriberData {
30 | isBusy: boolean;
31 | text: string;
32 | chunks: { text: string; timestamp: [number, number | null] }[];
33 | }
34 |
35 | export interface Transcriber {
36 | onInputChange: () => void;
37 | isBusy: boolean;
38 | isModelLoading: boolean;
39 | progressItems: ProgressItem[];
40 | start: (audioData: AudioBuffer | undefined) => void;
41 | output?: TranscriberData;
42 | model: string;
43 | setModel: (model: string) => void;
44 | multilingual: boolean;
45 | setMultilingual: (model: boolean) => void;
46 | quantized: boolean;
47 | setQuantized: (model: boolean) => void;
48 | subtask: string;
49 | setSubtask: (subtask: string) => void;
50 | language?: string;
51 | setLanguage: (language: string) => void;
52 | }
53 |
54 | export function useTranscriber(): Transcriber {
55 | const [transcript, setTranscript] = useState(
56 | undefined,
57 | );
58 | const [isBusy, setIsBusy] = useState(false);
59 | const [isModelLoading, setIsModelLoading] = useState(false);
60 |
61 | const [progressItems, setProgressItems] = useState([]);
62 |
63 | const webWorker = useWorker((event) => {
64 | const message = event.data;
65 | // Update the state with the result
66 | switch (message.status) {
67 | case "progress":
68 | // Model file progress: update one of the progress items.
69 | setProgressItems((prev) =>
70 | prev.map((item) => {
71 | if (item.file === message.file) {
72 | return { ...item, progress: message.progress };
73 | }
74 | return item;
75 | }),
76 | );
77 | break;
78 | case "update":
79 | // Received partial update
80 | // console.log("update", message);
81 | // eslint-disable-next-line no-case-declarations
82 | const updateMessage = message as TranscriberUpdateData;
83 | setTranscript({
84 | isBusy: true,
85 | text: updateMessage.data[0],
86 | chunks: updateMessage.data[1].chunks,
87 | });
88 | break;
89 | case "complete":
90 | // Received complete transcript
91 | // console.log("complete", message);
92 | // eslint-disable-next-line no-case-declarations
93 | const completeMessage = message as TranscriberCompleteData;
94 | setTranscript({
95 | isBusy: false,
96 | text: completeMessage.data.text,
97 | chunks: completeMessage.data.chunks,
98 | });
99 | setIsBusy(false);
100 | break;
101 |
102 | case "initiate":
103 | // Model file start load: add a new progress item to the list.
104 | setIsModelLoading(true);
105 | setProgressItems((prev) => [...prev, message]);
106 | break;
107 | case "ready":
108 | setIsModelLoading(false);
109 | break;
110 | case "error":
111 | setIsBusy(false);
112 | alert(
113 | `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`,
114 | );
115 | break;
116 | case "done":
117 | // Model file loaded: remove the progress item from the list.
118 | setProgressItems((prev) =>
119 | prev.filter((item) => item.file !== message.file),
120 | );
121 | break;
122 |
123 | default:
124 | // initiate/download/done
125 | break;
126 | }
127 | });
128 |
129 | const [model, setModel] = useState(Constants.DEFAULT_MODEL);
130 | const [subtask, setSubtask] = useState(Constants.DEFAULT_SUBTASK);
131 | const [quantized, setQuantized] = useState(
132 | Constants.DEFAULT_QUANTIZED,
133 | );
134 | const [multilingual, setMultilingual] = useState(
135 | Constants.DEFAULT_MULTILINGUAL,
136 | );
137 | const [language, setLanguage] = useState(
138 | Constants.DEFAULT_LANGUAGE,
139 | );
140 |
141 | const onInputChange = useCallback(() => {
142 | setTranscript(undefined);
143 | }, []);
144 |
145 | const postRequest = useCallback(
146 | async (audioData: AudioBuffer | undefined) => {
147 | if (audioData) {
148 | setTranscript(undefined);
149 | setIsBusy(true);
150 |
151 | let audio;
152 | if (audioData.numberOfChannels === 2) {
153 | const SCALING_FACTOR = Math.sqrt(2);
154 |
155 | let left = audioData.getChannelData(0);
156 | let right = audioData.getChannelData(1);
157 |
158 | audio = new Float32Array(left.length);
159 | for (let i = 0; i < audioData.length; ++i) {
160 | audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
161 | }
162 | } else {
163 | // If the audio is not stereo, we can just use the first channel:
164 | audio = audioData.getChannelData(0);
165 | }
166 |
167 | webWorker.postMessage({
168 | audio,
169 | model,
170 | multilingual,
171 | quantized,
172 | subtask: multilingual ? subtask : null,
173 | language:
174 | multilingual && language !== "auto" ? language : null,
175 | });
176 | }
177 | },
178 | [webWorker, model, multilingual, quantized, subtask, language],
179 | );
180 |
181 | const transcriber = useMemo(() => {
182 | return {
183 | onInputChange,
184 | isBusy,
185 | isModelLoading,
186 | progressItems,
187 | start: postRequest,
188 | output: transcript,
189 | model,
190 | setModel,
191 | multilingual,
192 | setMultilingual,
193 | quantized,
194 | setQuantized,
195 | subtask,
196 | setSubtask,
197 | language,
198 | setLanguage,
199 | };
200 | }, [
201 | isBusy,
202 | isModelLoading,
203 | progressItems,
204 | postRequest,
205 | transcript,
206 | model,
207 | multilingual,
208 | quantized,
209 | subtask,
210 | language,
211 | ]);
212 |
213 | return transcriber;
214 | }
215 |
--------------------------------------------------------------------------------
/src/hooks/useWorker.ts:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 |
3 | export interface MessageEventHandler {
4 | (event: MessageEvent): void;
5 | }
6 |
7 | export function useWorker(messageEventHandler: MessageEventHandler): Worker {
8 | // Create new worker once and never again
9 | const [worker] = useState(() => createWorker(messageEventHandler));
10 | return worker;
11 | }
12 |
13 | function createWorker(messageEventHandler: MessageEventHandler): Worker {
14 | const worker = new Worker(new URL("../whisper-worker.js", import.meta.url), {
15 | type: "module",
16 | });
17 | // Listen for messages from the Web Worker
18 | worker.addEventListener("message", messageEventHandler);
19 | return worker;
20 | }
21 |
--------------------------------------------------------------------------------
/src/index.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 |
3 | @theme {
4 | /* Dark mode colors */
5 | --color-dark-bg-primary: oklch(0.15 0.02 240);
6 | --color-dark-bg-secondary: oklch(0.2 0.02 240);
7 | --color-dark-text-primary: oklch(0.95 0.02 240);
8 | --color-dark-text-secondary: oklch(0.85 0.02 240);
9 | --color-dark-border: oklch(0.3 0.02 240);
10 |
11 | /* Light mode accent colors that work well in both modes */
12 | --color-accent-blue: oklch(0.65 0.2 250);
13 | --color-accent-blue-light: oklch(0.85 0.1 250);
14 | --color-accent-green: oklch(0.65 0.2 150);
15 | --color-accent-green-light: oklch(0.85 0.1 150);
16 | --color-accent-purple: oklch(0.6 0.18 300);
17 | --color-accent-purple-light: oklch(0.8 0.1 300);
18 | --color-accent-teal: oklch(0.7 0.15 200);
19 | --color-accent-teal-light: oklch(0.85 0.08 200);
20 | }
21 |
22 | :root {
23 | font-family: "Inter", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI",
24 | Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
25 | line-height: 1.5;
26 | font-weight: 400;
27 |
28 | /* Default light mode variables */
29 | --bg-primary: white;
30 | --bg-secondary: oklch(0.97 0.01 240);
31 | --text-primary: oklch(0.2 0.01 240);
32 | --text-secondary: oklch(0.4 0.01 240);
33 | --border-color: oklch(0.85 0.01 240);
34 |
35 | font-synthesis: none;
36 | text-rendering: optimizeLegibility;
37 | -webkit-font-smoothing: antialiased;
38 | -moz-osx-font-smoothing: grayscale;
39 | }
40 |
41 | a {
42 | font-weight: 500;
43 | color: var(--color-accent-blue);
44 | text-decoration: inherit;
45 | }
46 | a:hover {
47 | opacity: 0.8;
48 | }
49 |
50 | body {
51 | margin: 0;
52 | display: flex;
53 | justify-content: center;
54 | min-width: 320px;
55 | min-height: 100vh;
56 | background-color: var(--bg-primary);
57 | color: var(--text-primary);
58 | /* Remove transition for instant mode switching */
59 | overflow-x: hidden;
60 | }
61 |
62 | #root {
63 | width: 100%;
64 | display: flex;
65 | justify-content: center;
66 | }
67 |
68 | h1 {
69 | font-size: 2.5em;
70 | line-height: 1.1;
71 | }
72 |
73 | @media (prefers-color-scheme: dark) {
74 | :root {
75 | color-scheme: dark;
76 | --bg-primary: var(--color-dark-bg-primary);
77 | --bg-secondary: var(--color-dark-bg-secondary);
78 | --text-primary: var(--color-dark-text-primary);
79 | --text-secondary: var(--color-dark-text-secondary);
80 | --border-color: var(--color-dark-border);
81 | }
82 | }
83 |
84 | :root.dark {
85 | color-scheme: dark;
86 | --bg-primary: var(--color-dark-bg-primary);
87 | --bg-secondary: var(--color-dark-bg-secondary);
88 | --text-primary: var(--color-dark-text-primary);
89 | --text-secondary: var(--color-dark-text-secondary);
90 | --border-color: var(--color-dark-border);
91 | }
92 |
--------------------------------------------------------------------------------
/src/main.tsx:
--------------------------------------------------------------------------------
1 | import { StrictMode } from 'react'
2 | import { createRoot } from 'react-dom/client'
3 | import './index.css'
4 | import App from './App.tsx'
5 |
6 | createRoot(document.getElementById('root')!).render(
7 |
8 |
9 | ,
10 | )
11 |
--------------------------------------------------------------------------------
/src/utils/AudioUtils.ts:
--------------------------------------------------------------------------------
1 | function padTime(time: number) {
2 | return String(time).padStart(2, "0");
3 | }
4 |
5 | export function formatAudioTimestamp(time: number) {
6 | const hours = (time / (60 * 60)) | 0;
7 | time -= hours * (60 * 60);
8 | const minutes = (time / 60) | 0;
9 | time -= minutes * 60;
10 | const seconds = time | 0;
11 | return `${hours ? padTime(hours) + ":" : ""}${padTime(minutes)}:${padTime(
12 | seconds
13 | )}`;
14 | }
15 |
--------------------------------------------------------------------------------
/src/utils/BlobFix.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * There is a bug where `navigator.mediaDevices.getUserMedia` + `MediaRecorder`
3 | * creates WEBM files without duration metadata. See:
4 | * - https://bugs.chromium.org/p/chromium/issues/detail?id=642012
5 | * - https://stackoverflow.com/a/39971175/13989043
6 | *
7 | * This file contains a function that fixes the duration metadata of a WEBM file.
8 | * - Answer found: https://stackoverflow.com/a/75218309/13989043
9 | * - Code adapted from: https://github.com/mat-sz/webm-fix-duration
10 | * (forked from https://github.com/yusitnikov/fix-webm-duration)
11 | */
12 |
13 | /*
14 | * This is the list of possible WEBM file sections by their IDs.
15 | * Possible types: Container, Binary, Uint, Int, String, Float, Date
16 | */
17 | interface Section {
18 | name: string;
19 | type: string;
20 | }
21 |
22 | const sections: Record = {
23 | 0xa45dfa3: { name: "EBML", type: "Container" },
24 | 0x286: { name: "EBMLVersion", type: "Uint" },
25 | 0x2f7: { name: "EBMLReadVersion", type: "Uint" },
26 | 0x2f2: { name: "EBMLMaxIDLength", type: "Uint" },
27 | 0x2f3: { name: "EBMLMaxSizeLength", type: "Uint" },
28 | 0x282: { name: "DocType", type: "String" },
29 | 0x287: { name: "DocTypeVersion", type: "Uint" },
30 | 0x285: { name: "DocTypeReadVersion", type: "Uint" },
31 | 0x6c: { name: "Void", type: "Binary" },
32 | 0x3f: { name: "CRC-32", type: "Binary" },
33 | 0xb538667: { name: "SignatureSlot", type: "Container" },
34 | 0x3e8a: { name: "SignatureAlgo", type: "Uint" },
35 | 0x3e9a: { name: "SignatureHash", type: "Uint" },
36 | 0x3ea5: { name: "SignaturePublicKey", type: "Binary" },
37 | 0x3eb5: { name: "Signature", type: "Binary" },
38 | 0x3e5b: { name: "SignatureElements", type: "Container" },
39 | 0x3e7b: { name: "SignatureElementList", type: "Container" },
40 | 0x2532: { name: "SignedElement", type: "Binary" },
41 | 0x8538067: { name: "Segment", type: "Container" },
42 | 0x14d9b74: { name: "SeekHead", type: "Container" },
43 | 0xdbb: { name: "Seek", type: "Container" },
44 | 0x13ab: { name: "SeekID", type: "Binary" },
45 | 0x13ac: { name: "SeekPosition", type: "Uint" },
46 | 0x549a966: { name: "Info", type: "Container" },
47 | 0x33a4: { name: "SegmentUID", type: "Binary" },
48 | 0x3384: { name: "SegmentFilename", type: "String" },
49 | 0x1cb923: { name: "PrevUID", type: "Binary" },
50 | 0x1c83ab: { name: "PrevFilename", type: "String" },
51 | 0x1eb923: { name: "NextUID", type: "Binary" },
52 | 0x1e83bb: { name: "NextFilename", type: "String" },
53 | 0x444: { name: "SegmentFamily", type: "Binary" },
54 | 0x2924: { name: "ChapterTranslate", type: "Container" },
55 | 0x29fc: { name: "ChapterTranslateEditionUID", type: "Uint" },
56 | 0x29bf: { name: "ChapterTranslateCodec", type: "Uint" },
57 | 0x29a5: { name: "ChapterTranslateID", type: "Binary" },
58 | 0xad7b1: { name: "TimecodeScale", type: "Uint" },
59 | 0x489: { name: "Duration", type: "Float" },
60 | 0x461: { name: "DateUTC", type: "Date" },
61 | 0x3ba9: { name: "Title", type: "String" },
62 | 0xd80: { name: "MuxingApp", type: "String" },
63 | 0x1741: { name: "WritingApp", type: "String" },
64 | // 0xf43b675: { name: 'Cluster', type: 'Container' },
65 | 0x67: { name: "Timecode", type: "Uint" },
66 | 0x1854: { name: "SilentTracks", type: "Container" },
67 | 0x18d7: { name: "SilentTrackNumber", type: "Uint" },
68 | 0x27: { name: "Position", type: "Uint" },
69 | 0x2b: { name: "PrevSize", type: "Uint" },
70 | 0x23: { name: "SimpleBlock", type: "Binary" },
71 | 0x20: { name: "BlockGroup", type: "Container" },
72 | 0x21: { name: "Block", type: "Binary" },
73 | 0x22: { name: "BlockVirtual", type: "Binary" },
74 | 0x35a1: { name: "BlockAdditions", type: "Container" },
75 | 0x26: { name: "BlockMore", type: "Container" },
76 | 0x6e: { name: "BlockAddID", type: "Uint" },
77 | 0x25: { name: "BlockAdditional", type: "Binary" },
78 | 0x1b: { name: "BlockDuration", type: "Uint" },
79 | 0x7a: { name: "ReferencePriority", type: "Uint" },
80 | 0x7b: { name: "ReferenceBlock", type: "Int" },
81 | 0x7d: { name: "ReferenceVirtual", type: "Int" },
82 | 0x24: { name: "CodecState", type: "Binary" },
83 | 0x35a2: { name: "DiscardPadding", type: "Int" },
84 | 0xe: { name: "Slices", type: "Container" },
85 | 0x68: { name: "TimeSlice", type: "Container" },
86 | 0x4c: { name: "LaceNumber", type: "Uint" },
87 | 0x4d: { name: "FrameNumber", type: "Uint" },
88 | 0x4b: { name: "BlockAdditionID", type: "Uint" },
89 | 0x4e: { name: "Delay", type: "Uint" },
90 | 0x4f: { name: "SliceDuration", type: "Uint" },
91 | 0x48: { name: "ReferenceFrame", type: "Container" },
92 | 0x49: { name: "ReferenceOffset", type: "Uint" },
93 | 0x4a: { name: "ReferenceTimeCode", type: "Uint" },
94 | 0x2f: { name: "EncryptedBlock", type: "Binary" },
95 | 0x654ae6b: { name: "Tracks", type: "Container" },
96 | 0x2e: { name: "TrackEntry", type: "Container" },
97 | 0x57: { name: "TrackNumber", type: "Uint" },
98 | 0x33c5: { name: "TrackUID", type: "Uint" },
99 | 0x3: { name: "TrackType", type: "Uint" },
100 | 0x39: { name: "FlagEnabled", type: "Uint" },
101 | 0x8: { name: "FlagDefault", type: "Uint" },
102 | 0x15aa: { name: "FlagForced", type: "Uint" },
103 | 0x1c: { name: "FlagLacing", type: "Uint" },
104 | 0x2de7: { name: "MinCache", type: "Uint" },
105 | 0x2df8: { name: "MaxCache", type: "Uint" },
106 | 0x3e383: { name: "DefaultDuration", type: "Uint" },
107 | 0x34e7a: { name: "DefaultDecodedFieldDuration", type: "Uint" },
108 | 0x3314f: { name: "TrackTimecodeScale", type: "Float" },
109 | 0x137f: { name: "TrackOffset", type: "Int" },
110 | 0x15ee: { name: "MaxBlockAdditionID", type: "Uint" },
111 | 0x136e: { name: "Name", type: "String" },
112 | 0x2b59c: { name: "Language", type: "String" },
113 | 0x6: { name: "CodecID", type: "String" },
114 | 0x23a2: { name: "CodecPrivate", type: "Binary" },
115 | 0x58688: { name: "CodecName", type: "String" },
116 | 0x3446: { name: "AttachmentLink", type: "Uint" },
117 | 0x1a9697: { name: "CodecSettings", type: "String" },
118 | 0x1b4040: { name: "CodecInfoURL", type: "String" },
119 | 0x6b240: { name: "CodecDownloadURL", type: "String" },
120 | 0x2a: { name: "CodecDecodeAll", type: "Uint" },
121 | 0x2fab: { name: "TrackOverlay", type: "Uint" },
122 | 0x16aa: { name: "CodecDelay", type: "Uint" },
123 | 0x16bb: { name: "SeekPreRoll", type: "Uint" },
124 | 0x2624: { name: "TrackTranslate", type: "Container" },
125 | 0x26fc: { name: "TrackTranslateEditionUID", type: "Uint" },
126 | 0x26bf: { name: "TrackTranslateCodec", type: "Uint" },
127 | 0x26a5: { name: "TrackTranslateTrackID", type: "Binary" },
128 | 0x60: { name: "Video", type: "Container" },
129 | 0x1a: { name: "FlagInterlaced", type: "Uint" },
130 | 0x13b8: { name: "StereoMode", type: "Uint" },
131 | 0x13c0: { name: "AlphaMode", type: "Uint" },
132 | 0x13b9: { name: "OldStereoMode", type: "Uint" },
133 | 0x30: { name: "PixelWidth", type: "Uint" },
134 | 0x3a: { name: "PixelHeight", type: "Uint" },
135 | 0x14aa: { name: "PixelCropBottom", type: "Uint" },
136 | 0x14bb: { name: "PixelCropTop", type: "Uint" },
137 | 0x14cc: { name: "PixelCropLeft", type: "Uint" },
138 | 0x14dd: { name: "PixelCropRight", type: "Uint" },
139 | 0x14b0: { name: "DisplayWidth", type: "Uint" },
140 | 0x14ba: { name: "DisplayHeight", type: "Uint" },
141 | 0x14b2: { name: "DisplayUnit", type: "Uint" },
142 | 0x14b3: { name: "AspectRatioType", type: "Uint" },
143 | 0xeb524: { name: "ColourSpace", type: "Binary" },
144 | 0xfb523: { name: "GammaValue", type: "Float" },
145 | 0x383e3: { name: "FrameRate", type: "Float" },
146 | 0x61: { name: "Audio", type: "Container" },
147 | 0x35: { name: "SamplingFrequency", type: "Float" },
148 | 0x38b5: { name: "OutputSamplingFrequency", type: "Float" },
149 | 0x1f: { name: "Channels", type: "Uint" },
150 | 0x3d7b: { name: "ChannelPositions", type: "Binary" },
151 | 0x2264: { name: "BitDepth", type: "Uint" },
152 | 0x62: { name: "TrackOperation", type: "Container" },
153 | 0x63: { name: "TrackCombinePlanes", type: "Container" },
154 | 0x64: { name: "TrackPlane", type: "Container" },
155 | 0x65: { name: "TrackPlaneUID", type: "Uint" },
156 | 0x66: { name: "TrackPlaneType", type: "Uint" },
157 | 0x69: { name: "TrackJoinBlocks", type: "Container" },
158 | 0x6d: { name: "TrackJoinUID", type: "Uint" },
159 | 0x40: { name: "TrickTrackUID", type: "Uint" },
160 | 0x41: { name: "TrickTrackSegmentUID", type: "Binary" },
161 | 0x46: { name: "TrickTrackFlag", type: "Uint" },
162 | 0x47: { name: "TrickMasterTrackUID", type: "Uint" },
163 | 0x44: { name: "TrickMasterTrackSegmentUID", type: "Binary" },
164 | 0x2d80: { name: "ContentEncodings", type: "Container" },
165 | 0x2240: { name: "ContentEncoding", type: "Container" },
166 | 0x1031: { name: "ContentEncodingOrder", type: "Uint" },
167 | 0x1032: { name: "ContentEncodingScope", type: "Uint" },
168 | 0x1033: { name: "ContentEncodingType", type: "Uint" },
169 | 0x1034: { name: "ContentCompression", type: "Container" },
170 | 0x254: { name: "ContentCompAlgo", type: "Uint" },
171 | 0x255: { name: "ContentCompSettings", type: "Binary" },
172 | 0x1035: { name: "ContentEncryption", type: "Container" },
173 | 0x7e1: { name: "ContentEncAlgo", type: "Uint" },
174 | 0x7e2: { name: "ContentEncKeyID", type: "Binary" },
175 | 0x7e3: { name: "ContentSignature", type: "Binary" },
176 | 0x7e4: { name: "ContentSigKeyID", type: "Binary" },
177 | 0x7e5: { name: "ContentSigAlgo", type: "Uint" },
178 | 0x7e6: { name: "ContentSigHashAlgo", type: "Uint" },
179 | 0xc53bb6b: { name: "Cues", type: "Container" },
180 | 0x3b: { name: "CuePoint", type: "Container" },
181 | 0x33: { name: "CueTime", type: "Uint" },
182 | 0x37: { name: "CueTrackPositions", type: "Container" },
183 | 0x77: { name: "CueTrack", type: "Uint" },
184 | 0x71: { name: "CueClusterPosition", type: "Uint" },
185 | 0x70: { name: "CueRelativePosition", type: "Uint" },
186 | 0x32: { name: "CueDuration", type: "Uint" },
187 | 0x1378: { name: "CueBlockNumber", type: "Uint" },
188 | 0x6a: { name: "CueCodecState", type: "Uint" },
189 | 0x5b: { name: "CueReference", type: "Container" },
190 | 0x16: { name: "CueRefTime", type: "Uint" },
191 | 0x17: { name: "CueRefCluster", type: "Uint" },
192 | 0x135f: { name: "CueRefNumber", type: "Uint" },
193 | 0x6b: { name: "CueRefCodecState", type: "Uint" },
194 | 0x941a469: { name: "Attachments", type: "Container" },
195 | 0x21a7: { name: "AttachedFile", type: "Container" },
196 | 0x67e: { name: "FileDescription", type: "String" },
197 | 0x66e: { name: "FileName", type: "String" },
198 | 0x660: { name: "FileMimeType", type: "String" },
199 | 0x65c: { name: "FileData", type: "Binary" },
200 | 0x6ae: { name: "FileUID", type: "Uint" },
201 | 0x675: { name: "FileReferral", type: "Binary" },
202 | 0x661: { name: "FileUsedStartTime", type: "Uint" },
203 | 0x662: { name: "FileUsedEndTime", type: "Uint" },
204 | 0x43a770: { name: "Chapters", type: "Container" },
205 | 0x5b9: { name: "EditionEntry", type: "Container" },
206 | 0x5bc: { name: "EditionUID", type: "Uint" },
207 | 0x5bd: { name: "EditionFlagHidden", type: "Uint" },
208 | 0x5db: { name: "EditionFlagDefault", type: "Uint" },
209 | 0x5dd: { name: "EditionFlagOrdered", type: "Uint" },
210 | 0x36: { name: "ChapterAtom", type: "Container" },
211 | 0x33c4: { name: "ChapterUID", type: "Uint" },
212 | 0x1654: { name: "ChapterStringUID", type: "String" },
213 | 0x11: { name: "ChapterTimeStart", type: "Uint" },
214 | 0x12: { name: "ChapterTimeEnd", type: "Uint" },
215 | 0x18: { name: "ChapterFlagHidden", type: "Uint" },
216 | 0x598: { name: "ChapterFlagEnabled", type: "Uint" },
217 | 0x2e67: { name: "ChapterSegmentUID", type: "Binary" },
218 | 0x2ebc: { name: "ChapterSegmentEditionUID", type: "Uint" },
219 | 0x23c3: { name: "ChapterPhysicalEquiv", type: "Uint" },
220 | 0xf: { name: "ChapterTrack", type: "Container" },
221 | 0x9: { name: "ChapterTrackNumber", type: "Uint" },
222 | 0x0: { name: "ChapterDisplay", type: "Container" },
223 | 0x5: { name: "ChapString", type: "String" },
224 | 0x37c: { name: "ChapLanguage", type: "String" },
225 | 0x37e: { name: "ChapCountry", type: "String" },
226 | 0x2944: { name: "ChapProcess", type: "Container" },
227 | 0x2955: { name: "ChapProcessCodecID", type: "Uint" },
228 | 0x50d: { name: "ChapProcessPrivate", type: "Binary" },
229 | 0x2911: { name: "ChapProcessCommand", type: "Container" },
230 | 0x2922: { name: "ChapProcessTime", type: "Uint" },
231 | 0x2933: { name: "ChapProcessData", type: "Binary" },
232 | 0x254c367: { name: "Tags", type: "Container" },
233 | 0x3373: { name: "Tag", type: "Container" },
234 | 0x23c0: { name: "Targets", type: "Container" },
235 | 0x28ca: { name: "TargetTypeValue", type: "Uint" },
236 | 0x23ca: { name: "TargetType", type: "String" },
237 | 0x23c5: { name: "TagTrackUID", type: "Uint" },
238 | 0x23c9: { name: "TagEditionUID", type: "Uint" },
239 | 0x23c4: { name: "TagChapterUID", type: "Uint" },
240 | 0x23c6: { name: "TagAttachmentUID", type: "Uint" },
241 | 0x27c8: { name: "SimpleTag", type: "Container" },
242 | 0x5a3: { name: "TagName", type: "String" },
243 | 0x47a: { name: "TagLanguage", type: "String" },
244 | 0x484: { name: "TagDefault", type: "Uint" },
245 | 0x487: { name: "TagString", type: "String" },
246 | 0x485: { name: "TagBinary", type: "Binary" },
247 | };
248 |
249 | class WebmBase {
250 | source?: Uint8Array;
251 | data?: T;
252 |
253 | constructor(private name = "Unknown", private type = "Unknown") {}
254 |
255 | updateBySource() {}
256 |
257 | setSource(source: Uint8Array) {
258 | this.source = source;
259 | this.updateBySource();
260 | }
261 |
262 | updateByData() {}
263 |
264 | setData(data: T) {
265 | this.data = data;
266 | this.updateByData();
267 | }
268 | }
269 |
270 | class WebmUint extends WebmBase {
271 | constructor(name: string, type: string) {
272 | super(name, type || "Uint");
273 | }
274 |
275 | updateBySource() {
276 | // use hex representation of a number instead of number value
277 | this.data = "";
278 | for (let i = 0; i < this.source!.length; i++) {
279 | const hex = this.source![i].toString(16);
280 | this.data += padHex(hex);
281 | }
282 | }
283 |
284 | updateByData() {
285 | const length = this.data!.length / 2;
286 | this.source = new Uint8Array(length);
287 | for (let i = 0; i < length; i++) {
288 | const hex = this.data!.substr(i * 2, 2);
289 | this.source[i] = parseInt(hex, 16);
290 | }
291 | }
292 |
293 | getValue() {
294 | return parseInt(this.data!, 16);
295 | }
296 |
297 | setValue(value: number) {
298 | this.setData(padHex(value.toString(16)));
299 | }
300 | }
301 |
302 | function padHex(hex: string) {
303 | return hex.length % 2 === 1 ? "0" + hex : hex;
304 | }
305 |
306 | class WebmFloat extends WebmBase {
307 | constructor(name: string, type: string) {
308 | super(name, type || "Float");
309 | }
310 |
311 | getFloatArrayType() {
312 | return this.source && this.source.length === 4
313 | ? Float32Array
314 | : Float64Array;
315 | }
316 | updateBySource() {
317 | const byteArray = this.source!.reverse();
318 | const floatArrayType = this.getFloatArrayType();
319 | const floatArray = new floatArrayType(byteArray.buffer);
320 | this.data! = floatArray[0];
321 | }
322 | updateByData() {
323 | const floatArrayType = this.getFloatArrayType();
324 | const floatArray = new floatArrayType([this.data!]);
325 | const byteArray = new Uint8Array(floatArray.buffer);
326 | this.source = byteArray.reverse();
327 | }
328 | getValue() {
329 | return this.data;
330 | }
331 | setValue(value: number) {
332 | this.setData(value);
333 | }
334 | }
335 |
336 | interface ContainerData {
337 | id: number;
338 | idHex?: string;
339 | data: WebmBase;
340 | }
341 |
342 | class WebmContainer extends WebmBase {
343 | offset: number = 0;
344 | data: ContainerData[] = [];
345 |
346 | constructor(name: string, type: string) {
347 | super(name, type || "Container");
348 | }
349 |
350 | readByte() {
351 | return this.source![this.offset++];
352 | }
353 | readUint() {
354 | const firstByte = this.readByte();
355 | const bytes = 8 - firstByte.toString(2).length;
356 | let value = firstByte - (1 << (7 - bytes));
357 | for (let i = 0; i < bytes; i++) {
358 | // don't use bit operators to support x86
359 | value *= 256;
360 | value += this.readByte();
361 | }
362 | return value;
363 | }
364 | updateBySource() {
365 | let end: number | undefined = undefined;
366 | this.data = [];
367 | for (
368 | this.offset = 0;
369 | this.offset < this.source!.length;
370 | this.offset = end
371 | ) {
372 | const id = this.readUint();
373 | const len = this.readUint();
374 | end = Math.min(this.offset + len, this.source!.length);
375 | const data = this.source!.slice(this.offset, end);
376 |
377 | const info = sections[id] || { name: "Unknown", type: "Unknown" };
378 | let ctr: any = WebmBase;
379 | switch (info.type) {
380 | case "Container":
381 | ctr = WebmContainer;
382 | break;
383 | case "Uint":
384 | ctr = WebmUint;
385 | break;
386 | case "Float":
387 | ctr = WebmFloat;
388 | break;
389 | }
390 | const section = new ctr(info.name, info.type);
391 | section.setSource(data);
392 | this.data.push({
393 | id: id,
394 | idHex: id.toString(16),
395 | data: section,
396 | });
397 | }
398 | }
399 | writeUint(x: number, draft = false) {
400 | for (
401 | var bytes = 1, flag = 0x80;
402 | x >= flag && bytes < 8;
403 | bytes++, flag *= 0x80
404 | ) {}
405 |
406 | if (!draft) {
407 | let value = flag + x;
408 | for (let i = bytes - 1; i >= 0; i--) {
409 | // don't use bit operators to support x86
410 | const c = value % 256;
411 | this.source![this.offset! + i] = c;
412 | value = (value - c) / 256;
413 | }
414 | }
415 |
416 | this.offset += bytes;
417 | }
418 |
419 | writeSections(draft = false) {
420 | this.offset = 0;
421 | for (let i = 0; i < this.data.length; i++) {
422 | const section = this.data[i],
423 | content = section.data.source,
424 | contentLength = content!.length;
425 | this.writeUint(section.id, draft);
426 | this.writeUint(contentLength, draft);
427 | if (!draft) {
428 | this.source!.set(content!, this.offset);
429 | }
430 | this.offset += contentLength;
431 | }
432 | return this.offset;
433 | }
434 |
435 | updateByData() {
436 | // run without accessing this.source to determine total length - need to know it to create Uint8Array
437 | const length = this.writeSections(true);
438 | this.source = new Uint8Array(length);
439 | // now really write data
440 | this.writeSections();
441 | }
442 |
443 | getSectionById(id: number) {
444 | for (let i = 0; i < this.data.length; i++) {
445 | const section = this.data[i];
446 | if (section.id === id) {
447 | return section.data;
448 | }
449 | }
450 |
451 | return undefined;
452 | }
453 | }
454 |
455 | class WebmFile extends WebmContainer {
456 | constructor(source: Uint8Array) {
457 | super("File", "File");
458 | this.setSource(source);
459 | }
460 |
461 | fixDuration(duration: number) {
462 | const segmentSection = this.getSectionById(0x8538067) as WebmContainer;
463 | if (!segmentSection) {
464 | return false;
465 | }
466 |
467 | const infoSection = segmentSection.getSectionById(
468 | 0x549a966,
469 | ) as WebmContainer;
470 | if (!infoSection) {
471 | return false;
472 | }
473 |
474 | const timeScaleSection = infoSection.getSectionById(
475 | 0xad7b1,
476 | ) as WebmFloat;
477 | if (!timeScaleSection) {
478 | return false;
479 | }
480 |
481 | let durationSection = infoSection.getSectionById(0x489) as WebmFloat;
482 | if (durationSection) {
483 | if (durationSection.getValue()! <= 0) {
484 | durationSection.setValue(duration);
485 | } else {
486 | return false;
487 | }
488 | } else {
489 | // append Duration section
490 | durationSection = new WebmFloat("Duration", "Float");
491 | durationSection.setValue(duration);
492 | infoSection.data.push({
493 | id: 0x489,
494 | data: durationSection,
495 | });
496 | }
497 |
498 | // set default time scale to 1 millisecond (1000000 nanoseconds)
499 | timeScaleSection.setValue(1000000);
500 | infoSection.updateByData();
501 | segmentSection.updateByData();
502 | this.updateByData();
503 |
504 | return true;
505 | }
506 |
507 | toBlob(type = "video/webm") {
508 | return new Blob([this.source!.buffer], { type });
509 | }
510 | }
511 |
512 | /**
513 | * Fixes duration on MediaRecorder output.
514 | * @param blob Input Blob with incorrect duration.
515 | * @param duration Correct duration (in milliseconds).
516 | * @param type Output blob mimetype (default: video/webm).
517 | * @returns
518 | */
519 | export const webmFixDuration = (
520 | blob: Blob,
521 | duration: number,
522 | type = "video/webm",
523 | ): Promise => {
524 | return new Promise((resolve, reject) => {
525 | try {
526 | const reader = new FileReader();
527 |
528 | reader.addEventListener("loadend", () => {
529 | try {
530 | const result = reader.result as ArrayBuffer;
531 | const file = new WebmFile(new Uint8Array(result));
532 | if (file.fixDuration(duration)) {
533 | resolve(file.toBlob(type));
534 | } else {
535 | resolve(blob);
536 | }
537 | } catch (ex) {
538 | reject(ex);
539 | }
540 | });
541 |
542 | reader.addEventListener("error", () => reject());
543 |
544 | reader.readAsArrayBuffer(blob);
545 | } catch (ex) {
546 | reject(ex);
547 | }
548 | });
549 | };
550 |
--------------------------------------------------------------------------------
/src/utils/Constants.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | SAMPLING_RATE: 16000,
3 | DEFAULT_MODEL: "Xenova/whisper-tiny",
4 | DEFAULT_SUBTASK: "transcribe",
5 | DEFAULT_LANGUAGE: "english",
6 | DEFAULT_QUANTIZED: false,
7 | DEFAULT_MULTILINGUAL: false,
8 | };
9 |
10 | import { prebuiltAppConfig } from "@mlc-ai/web-llm";
11 |
12 | export const OWNER = "mlc-ai";
13 | export const REPO = "web-llm-chat";
14 | export const WEBLLM_HOME_URL = "https://webllm.mlc.ai";
15 | export const REPO_URL = `https://github.com/${OWNER}/${REPO}`;
16 | export const ISSUE_URL = `https://github.com/${OWNER}/${REPO}/issues`;
17 |
18 | export interface ModelRecord {
19 | name: string;
20 | display_name: string;
21 | provider?: string;
22 | size?: string;
23 | quantization?: string;
24 | family: ModelFamily;
25 | recommended_config?: {
26 | temperature?: number;
27 | context_window_size?: number;
28 | top_p?: number;
29 | presence_penalty?: number;
30 | frequency_penalty?: number;
31 | };
32 | }
33 |
34 | export enum Path {
35 | Home = "/",
36 | Chat = "/chat",
37 | Settings = "/settings",
38 | Templates = "/templates",
39 | }
40 |
41 | export enum ApiPath {
42 | Cors = "",
43 | }
44 |
45 | export enum SlotID {
46 | AppBody = "app-body",
47 | CustomModel = "custom-model",
48 | }
49 |
50 | export enum FileName {
51 | Templates = "templates.json",
52 | Prompts = "prompts.json",
53 | }
54 |
55 | export enum StoreKey {
56 | Chat = "chat-next-web-store",
57 | Access = "access-control",
58 | Config = "app-config",
59 | Templates = "templates-store",
60 | Prompt = "prompt-store",
61 | Update = "chat-update",
62 | Sync = "sync",
63 | }
64 |
65 | export const DEFAULT_SIDEBAR_WIDTH = 320;
66 | export const MAX_SIDEBAR_WIDTH = 500;
67 | export const MIN_SIDEBAR_WIDTH = 260;
68 | export const NARROW_SIDEBAR_WIDTH = 100;
69 |
70 | export const ACCESS_CODE_PREFIX = "nk-";
71 |
72 | export const LAST_INPUT_KEY = "last-input";
73 | export const UNFINISHED_INPUT = (name: string) => "unfinished-input-" + name;
74 |
75 | export const STORAGE_KEY = "chatgpt-next-web";
76 |
77 | export const REQUEST_TIMEOUT_MS = 60000;
78 |
79 | export const EXPORT_MESSAGE_CLASS_NAME = "export-markdown";
80 |
81 | export const DEFAULT_INPUT_TEMPLATE = `{{input}}`; // input / time / model / lang
82 |
83 | export const DEFAULT_SYSTEM_TEMPLATE = `
84 | You are an AI large language model assistant trained by {{provider}}.
85 | You are currently engaging with users on WebLLM Chat, an open-source AI Chatbot UI developed by MLC.ai (Machine Learning Compilation).
86 | Model display_name: {{model}}
87 | The current date and time is {{time}}.
88 | Latex inline format: \\(x^2\\)
89 | Latex block format: $$e=mc^2$$
90 | `;
91 |
92 | export enum ModelFamily {
93 | LLAMA = "llama",
94 | PHI = "phi",
95 | MISTRAL = "mistral",
96 | GEMMA = "gemma",
97 | QWEN = "qwen",
98 | SMOL_LM = "smollm",
99 | WIZARD_MATH = "wizardmath",
100 | STABLE_LM = "stablelm",
101 | REDPAJAMA = "redpajama",
102 | DEEPSEEK = "DeepSeek",
103 | }
104 |
105 | const DEFAULT_MODEL_BASES: ModelRecord[] = [
106 | // Phi-3.5 Vision
107 | {
108 | name: "Phi-3.5-vision-instruct-q4f32_1-MLC",
109 | display_name: "Phi",
110 | provider: "Microsoft",
111 | family: ModelFamily.PHI,
112 | recommended_config: {
113 | temperature: 1,
114 | presence_penalty: 0,
115 | frequency_penalty: 0,
116 | top_p: 1,
117 | },
118 | },
119 | {
120 | name: "Phi-3.5-vision-instruct-q4f16_1-MLC",
121 | display_name: "Phi",
122 | provider: "Microsoft",
123 | family: ModelFamily.PHI,
124 | recommended_config: {
125 | temperature: 1,
126 | presence_penalty: 0,
127 | frequency_penalty: 0,
128 | top_p: 1,
129 | },
130 | },
131 | // Llama-3.2
132 | {
133 | name: "Llama-3.2-1B-Instruct-q4f32_1-MLC",
134 | display_name: "Llama",
135 | provider: "Meta",
136 | family: ModelFamily.LLAMA,
137 | recommended_config: {
138 | temperature: 0.6,
139 | presence_penalty: 0,
140 | frequency_penalty: 0,
141 | top_p: 0.9,
142 | },
143 | },
144 | {
145 | name: "Llama-3.2-1B-Instruct-q4f16_1-MLC",
146 | display_name: "Llama",
147 | provider: "Meta",
148 | family: ModelFamily.LLAMA,
149 | recommended_config: {
150 | temperature: 0.6,
151 | presence_penalty: 0,
152 | frequency_penalty: 0,
153 | top_p: 0.9,
154 | },
155 | },
156 | {
157 | name: "Llama-3.2-1B-Instruct-q0f32-MLC",
158 | display_name: "Llama",
159 | provider: "Meta",
160 | family: ModelFamily.LLAMA,
161 | recommended_config: {
162 | temperature: 0.6,
163 | presence_penalty: 0,
164 | frequency_penalty: 0,
165 | top_p: 0.9,
166 | },
167 | },
168 | {
169 | name: "Llama-3.2-1B-Instruct-q0f16-MLC",
170 | display_name: "Llama",
171 | provider: "Meta",
172 | family: ModelFamily.LLAMA,
173 | recommended_config: {
174 | temperature: 0.6,
175 | presence_penalty: 0,
176 | frequency_penalty: 0,
177 | top_p: 0.9,
178 | },
179 | },
180 | {
181 | name: "Llama-3.2-3B-Instruct-q4f32_1-MLC",
182 | display_name: "Llama",
183 | provider: "Meta",
184 | family: ModelFamily.LLAMA,
185 | recommended_config: {
186 | temperature: 0.6,
187 | presence_penalty: 0,
188 | frequency_penalty: 0,
189 | top_p: 0.9,
190 | },
191 | },
192 | {
193 | name: "Llama-3.2-3B-Instruct-q4f16_1-MLC",
194 | display_name: "Llama",
195 | provider: "Meta",
196 | family: ModelFamily.LLAMA,
197 | recommended_config: {
198 | temperature: 0.6,
199 | presence_penalty: 0,
200 | frequency_penalty: 0,
201 | top_p: 0.9,
202 | },
203 | },
204 | // Llama-3.1 8B
205 | {
206 | name: "Llama-3.1-8B-Instruct-q4f32_1-MLC-1k",
207 | display_name: "Llama",
208 | provider: "Meta",
209 | family: ModelFamily.LLAMA,
210 | recommended_config: {
211 | temperature: 0.6,
212 | presence_penalty: 0,
213 | frequency_penalty: 0,
214 | top_p: 0.9,
215 | },
216 | },
217 | {
218 | name: "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k",
219 | display_name: "Llama",
220 | provider: "Meta",
221 | family: ModelFamily.LLAMA,
222 | recommended_config: {
223 | temperature: 0.6,
224 | presence_penalty: 0,
225 | frequency_penalty: 0,
226 | top_p: 0.9,
227 | },
228 | },
229 | {
230 | name: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
231 | display_name: "Llama",
232 | provider: "Meta",
233 | family: ModelFamily.LLAMA,
234 | recommended_config: {
235 | temperature: 0.6,
236 | presence_penalty: 0,
237 | frequency_penalty: 0,
238 | top_p: 0.9,
239 | },
240 | },
241 | {
242 | name: "Llama-3.1-8B-Instruct-q4f16_1-MLC",
243 | display_name: "Llama",
244 | provider: "Meta",
245 | family: ModelFamily.LLAMA,
246 | recommended_config: {
247 | temperature: 0.6,
248 | presence_penalty: 0,
249 | frequency_penalty: 0,
250 | top_p: 0.9,
251 | },
252 | },
253 | // Deepseek
254 | {
255 | name: "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
256 | display_name: "DeepSeek",
257 | provider: "DeepSeek",
258 | family: ModelFamily.DEEPSEEK,
259 | recommended_config: {
260 | temperature: 1,
261 | presence_penalty: 0,
262 | frequency_penalty: 0,
263 | top_p: 1,
264 | },
265 | },
266 | {
267 | name: "DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC",
268 | display_name: "DeepSeek",
269 | provider: "DeepSeek",
270 | family: ModelFamily.DEEPSEEK,
271 | recommended_config: {
272 | temperature: 1,
273 | presence_penalty: 0,
274 | frequency_penalty: 0,
275 | top_p: 1,
276 | },
277 | },
278 | {
279 | name: "DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC",
280 | display_name: "DeepSeek",
281 | provider: "DeepSeek",
282 | family: ModelFamily.DEEPSEEK,
283 | recommended_config: {
284 | temperature: 1,
285 | presence_penalty: 0,
286 | frequency_penalty: 0,
287 | top_p: 1,
288 | },
289 | },
290 | {
291 | name: "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
292 | display_name: "DeepSeek",
293 | provider: "DeepSeek",
294 | family: ModelFamily.DEEPSEEK,
295 | recommended_config: {
296 | temperature: 1,
297 | presence_penalty: 0,
298 | frequency_penalty: 0,
299 | top_p: 1,
300 | },
301 | },
302 | // Hermes
303 | {
304 | name: "Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
305 | display_name: "Hermes",
306 | provider: "NousResearch",
307 | family: ModelFamily.LLAMA,
308 | recommended_config: {
309 | temperature: 0.6,
310 | presence_penalty: 0,
311 | frequency_penalty: 0,
312 | top_p: 0.9,
313 | },
314 | },
315 | {
316 | name: "Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
317 | display_name: "Hermes",
318 | provider: "NousResearch",
319 | family: ModelFamily.LLAMA,
320 | recommended_config: {
321 | temperature: 0.6,
322 | presence_penalty: 0,
323 | frequency_penalty: 0,
324 | top_p: 0.9,
325 | },
326 | },
327 | {
328 | name: "Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
329 | display_name: "Hermes",
330 | provider: "NousResearch",
331 | family: ModelFamily.LLAMA,
332 | recommended_config: {
333 | temperature: 0.6,
334 | presence_penalty: 0,
335 | frequency_penalty: 0,
336 | top_p: 0.9,
337 | },
338 | },
339 | {
340 | name: "Hermes-3-Llama-3.1-8B-q4f16_1-MLC",
341 | display_name: "Hermes",
342 | provider: "NousResearch",
343 | family: ModelFamily.LLAMA,
344 | recommended_config: {
345 | temperature: 0.6,
346 | presence_penalty: 0,
347 | frequency_penalty: 0,
348 | top_p: 0.9,
349 | },
350 | },
351 | {
352 | name: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
353 | display_name: "Hermes",
354 | provider: "NousResearch",
355 | family: ModelFamily.MISTRAL,
356 | recommended_config: {
357 | temperature: 0.7,
358 | presence_penalty: 0,
359 | frequency_penalty: 0,
360 | top_p: 0.95,
361 | },
362 | },
363 | {
364 | name: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
365 | display_name: "Hermes",
366 | provider: "NousResearch",
367 | family: ModelFamily.LLAMA,
368 | recommended_config: {
369 | temperature: 1,
370 | presence_penalty: 0,
371 | frequency_penalty: 0,
372 | top_p: 1,
373 | },
374 | },
375 | {
376 | name: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC",
377 | display_name: "Hermes",
378 | provider: "NousResearch",
379 | family: ModelFamily.LLAMA,
380 | recommended_config: {
381 | temperature: 1,
382 | presence_penalty: 0,
383 | frequency_penalty: 0,
384 | top_p: 1,
385 | },
386 | },
387 | // Phi
388 | {
389 | name: "Phi-3.5-mini-instruct-q4f16_1-MLC",
390 | display_name: "Phi",
391 | provider: "Microsoft",
392 | family: ModelFamily.PHI,
393 | recommended_config: {
394 | temperature: 1,
395 | presence_penalty: 0,
396 | frequency_penalty: 0,
397 | top_p: 1,
398 | },
399 | },
400 | {
401 | name: "Phi-3.5-mini-instruct-q4f32_1-MLC",
402 | display_name: "Phi",
403 | provider: "Microsoft",
404 | family: ModelFamily.PHI,
405 | recommended_config: {
406 | temperature: 1,
407 | presence_penalty: 0,
408 | frequency_penalty: 0,
409 | top_p: 1,
410 | },
411 | },
412 | {
413 | name: "Phi-3.5-mini-instruct-q4f16_1-MLC-1k",
414 | display_name: "Phi",
415 | provider: "Microsoft",
416 | family: ModelFamily.PHI,
417 | recommended_config: {
418 | temperature: 1,
419 | presence_penalty: 0,
420 | frequency_penalty: 0,
421 | top_p: 1,
422 | },
423 | },
424 | {
425 | name: "Phi-3.5-mini-instruct-q4f32_1-MLC-1k",
426 | display_name: "Phi",
427 | provider: "Microsoft",
428 | family: ModelFamily.PHI,
429 | recommended_config: {
430 | temperature: 1,
431 | presence_penalty: 0,
432 | frequency_penalty: 0,
433 | top_p: 1,
434 | },
435 | },
436 | {
437 | name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
438 | display_name: "Mistral",
439 | provider: "Mistral AI",
440 | family: ModelFamily.MISTRAL,
441 | recommended_config: {
442 | temperature: 1,
443 | presence_penalty: 0,
444 | frequency_penalty: 0,
445 | top_p: 1,
446 | },
447 | },
448 | {
449 | name: "Mistral-7B-Instruct-v0.3-q4f32_1-MLC",
450 | display_name: "Mistral",
451 | provider: "Mistral AI",
452 | family: ModelFamily.MISTRAL,
453 | recommended_config: {
454 | temperature: 1,
455 | presence_penalty: 0,
456 | frequency_penalty: 0,
457 | top_p: 1,
458 | },
459 | },
460 | {
461 | name: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
462 | display_name: "Mistral",
463 | provider: "Mistral AI",
464 | family: ModelFamily.MISTRAL,
465 | recommended_config: {
466 | temperature: 0.7,
467 | top_p: 0.95,
468 | },
469 | },
470 | {
471 | name: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
472 | display_name: "OpenHermes",
473 | provider: "NousResearch",
474 | family: ModelFamily.MISTRAL,
475 | recommended_config: {
476 | temperature: 0.7,
477 | top_p: 0.95,
478 | },
479 | },
480 | {
481 | name: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
482 | display_name: "NeuralHermes",
483 | provider: "Maxime Labonne",
484 | family: ModelFamily.MISTRAL,
485 | recommended_config: {
486 | temperature: 0.7,
487 | top_p: 0.95,
488 | },
489 | },
490 | {
491 | name: "WizardMath-7B-V1.1-q4f16_1-MLC",
492 | display_name: "WizardMath",
493 | provider: "WizardLM",
494 | family: ModelFamily.WIZARD_MATH,
495 | recommended_config: {
496 | temperature: 0.7,
497 | top_p: 0.95,
498 | },
499 | },
500 | // SmolLM2
501 | {
502 | name: "SmolLM2-1.7B-Instruct-q4f16_1-MLC",
503 | display_name: "SmolLM",
504 | provider: "HuggingFaceTB",
505 | family: ModelFamily.SMOL_LM,
506 | recommended_config: {
507 | temperature: 1,
508 | presence_penalty: 0,
509 | frequency_penalty: 0,
510 | top_p: 1,
511 | },
512 | },
513 | {
514 | name: "SmolLM2-1.7B-Instruct-q4f32_1-MLC",
515 | display_name: "SmolLM",
516 | provider: "HuggingFaceTB",
517 | family: ModelFamily.SMOL_LM,
518 | recommended_config: {
519 | temperature: 1,
520 | presence_penalty: 0,
521 | frequency_penalty: 0,
522 | top_p: 1,
523 | },
524 | },
525 | {
526 | name: "SmolLM2-360M-Instruct-q0f16-MLC",
527 | display_name: "SmolLM",
528 | provider: "HuggingFaceTB",
529 | family: ModelFamily.SMOL_LM,
530 | recommended_config: {
531 | temperature: 1,
532 | presence_penalty: 0,
533 | frequency_penalty: 0,
534 | top_p: 1,
535 | },
536 | },
537 | {
538 | name: "SmolLM2-360M-Instruct-q0f32-MLC",
539 | display_name: "SmolLM",
540 | provider: "HuggingFaceTB",
541 | family: ModelFamily.SMOL_LM,
542 | recommended_config: {
543 | temperature: 1,
544 | presence_penalty: 0,
545 | frequency_penalty: 0,
546 | top_p: 1,
547 | },
548 | },
549 | {
550 | name: "SmolLM2-360M-Instruct-q4f16_1-MLC",
551 | display_name: "SmolLM",
552 | provider: "HuggingFaceTB",
553 | family: ModelFamily.SMOL_LM,
554 | recommended_config: {
555 | temperature: 1,
556 | presence_penalty: 0,
557 | frequency_penalty: 0,
558 | top_p: 1,
559 | },
560 | },
561 | {
562 | name: "SmolLM2-360M-Instruct-q4f32_1-MLC",
563 | display_name: "SmolLM",
564 | provider: "HuggingFaceTB",
565 | family: ModelFamily.SMOL_LM,
566 | recommended_config: {
567 | temperature: 1,
568 | presence_penalty: 0,
569 | frequency_penalty: 0,
570 | top_p: 1,
571 | },
572 | },
573 | {
574 | name: "SmolLM2-135M-Instruct-q0f16-MLC",
575 | display_name: "SmolLM",
576 | provider: "HuggingFaceTB",
577 | family: ModelFamily.SMOL_LM,
578 | recommended_config: {
579 | temperature: 1,
580 | presence_penalty: 0,
581 | frequency_penalty: 0,
582 | top_p: 1,
583 | },
584 | },
585 | {
586 | name: "SmolLM2-135M-Instruct-q0f32-MLC",
587 | display_name: "SmolLM",
588 | provider: "HuggingFaceTB",
589 | family: ModelFamily.SMOL_LM,
590 | recommended_config: {
591 | temperature: 1,
592 | presence_penalty: 0,
593 | frequency_penalty: 0,
594 | top_p: 1,
595 | },
596 | },
597 | {
598 | name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
599 | display_name: "Qwen",
600 | provider: "Alibaba",
601 | family: ModelFamily.QWEN,
602 | recommended_config: {
603 | temperature: 0.7,
604 | presence_penalty: 0,
605 | frequency_penalty: 0,
606 | top_p: 0.8,
607 | },
608 | },
609 | {
610 | name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
611 | display_name: "Qwen",
612 | provider: "Alibaba",
613 | family: ModelFamily.QWEN,
614 | recommended_config: {
615 | temperature: 0.7,
616 | presence_penalty: 0,
617 | frequency_penalty: 0,
618 | top_p: 0.8,
619 | },
620 | },
621 | {
622 | name: "Qwen2.5-0.5B-Instruct-q0f16-MLC",
623 | display_name: "Qwen",
624 | provider: "Alibaba",
625 | family: ModelFamily.QWEN,
626 | recommended_config: {
627 | temperature: 0.7,
628 | presence_penalty: 0,
629 | frequency_penalty: 0,
630 | top_p: 0.8,
631 | },
632 | },
633 | {
634 | name: "Qwen2.5-0.5B-Instruct-q0f32-MLC",
635 | display_name: "Qwen",
636 | provider: "Alibaba",
637 | family: ModelFamily.QWEN,
638 | recommended_config: {
639 | temperature: 0.7,
640 | presence_penalty: 0,
641 | frequency_penalty: 0,
642 | top_p: 0.8,
643 | },
644 | },
645 | {
646 | name: "Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
647 | display_name: "Qwen",
648 | provider: "Alibaba",
649 | family: ModelFamily.QWEN,
650 | recommended_config: {
651 | temperature: 0.7,
652 | presence_penalty: 0,
653 | frequency_penalty: 0,
654 | top_p: 0.8,
655 | },
656 | },
657 | {
658 | name: "Qwen2.5-1.5B-Instruct-q4f32_1-MLC",
659 | display_name: "Qwen",
660 | provider: "Alibaba",
661 | family: ModelFamily.QWEN,
662 | recommended_config: {
663 | temperature: 0.7,
664 | presence_penalty: 0,
665 | frequency_penalty: 0,
666 | top_p: 0.8,
667 | },
668 | },
669 | {
670 | name: "Qwen2.5-3B-Instruct-q4f16_1-MLC",
671 | display_name: "Qwen",
672 | provider: "Alibaba",
673 | family: ModelFamily.QWEN,
674 | recommended_config: {
675 | temperature: 0.7,
676 | presence_penalty: 0,
677 | frequency_penalty: 0,
678 | top_p: 0.8,
679 | },
680 | },
681 | {
682 | name: "Qwen2.5-3B-Instruct-q4f32_1-MLC",
683 | display_name: "Qwen",
684 | provider: "Alibaba",
685 | family: ModelFamily.QWEN,
686 | recommended_config: {
687 | temperature: 0.7,
688 | presence_penalty: 0,
689 | frequency_penalty: 0,
690 | top_p: 0.8,
691 | },
692 | },
693 | {
694 | name: "Qwen2.5-7B-Instruct-q4f16_1-MLC",
695 | display_name: "Qwen",
696 | provider: "Alibaba",
697 | family: ModelFamily.QWEN,
698 | recommended_config: {
699 | temperature: 0.7,
700 | presence_penalty: 0,
701 | frequency_penalty: 0,
702 | top_p: 0.8,
703 | },
704 | },
705 | {
706 | name: "Qwen2.5-7B-Instruct-q4f32_1-MLC",
707 | display_name: "Qwen",
708 | provider: "Alibaba",
709 | family: ModelFamily.QWEN,
710 | recommended_config: {
711 | temperature: 0.7,
712 | presence_penalty: 0,
713 | frequency_penalty: 0,
714 | top_p: 0.8,
715 | },
716 | },
717 | // Qwen2.5-Coder
718 | {
719 | name: "Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC",
720 | display_name: "Qwen",
721 | provider: "Alibaba",
722 | family: ModelFamily.QWEN,
723 | recommended_config: {
724 | temperature: 0.7,
725 | presence_penalty: 0,
726 | frequency_penalty: 0,
727 | top_p: 0.8,
728 | },
729 | },
730 | {
731 | name: "Qwen2.5-Coder-0.5B-Instruct-q4f32_1-MLC",
732 | display_name: "Qwen",
733 | provider: "Alibaba",
734 | family: ModelFamily.QWEN,
735 | recommended_config: {
736 | temperature: 0.7,
737 | presence_penalty: 0,
738 | frequency_penalty: 0,
739 | top_p: 0.8,
740 | },
741 | },
742 | {
743 | name: "Qwen2.5-Coder-0.5B-Instruct-q0f16-MLC",
744 | display_name: "Qwen",
745 | provider: "Alibaba",
746 | family: ModelFamily.QWEN,
747 | recommended_config: {
748 | temperature: 0.7,
749 | presence_penalty: 0,
750 | frequency_penalty: 0,
751 | top_p: 0.8,
752 | },
753 | },
754 | {
755 | name: "Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
756 | display_name: "Qwen",
757 | provider: "Alibaba",
758 | family: ModelFamily.QWEN,
759 | recommended_config: {
760 | temperature: 0.7,
761 | presence_penalty: 0,
762 | frequency_penalty: 0,
763 | top_p: 0.8,
764 | },
765 | },
766 | {
767 | name: "Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
768 | display_name: "Qwen",
769 | provider: "Alibaba",
770 | family: ModelFamily.QWEN,
771 | recommended_config: {
772 | temperature: 1.0,
773 | presence_penalty: 0,
774 | frequency_penalty: 0,
775 | top_p: 1.0,
776 | },
777 | },
778 | {
779 | name: "Qwen2.5-Coder-1.5B-Instruct-q4f32_1-MLC",
780 | display_name: "Qwen",
781 | provider: "Alibaba",
782 | family: ModelFamily.QWEN,
783 | recommended_config: {
784 | temperature: 1.0,
785 | presence_penalty: 0,
786 | frequency_penalty: 0,
787 | top_p: 1.0,
788 | },
789 | },
790 | {
791 | name: "Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
792 | display_name: "Qwen",
793 | provider: "Alibaba",
794 | family: ModelFamily.QWEN,
795 | recommended_config: {
796 | temperature: 0.7,
797 | presence_penalty: 0,
798 | frequency_penalty: 0,
799 | top_p: 0.8,
800 | },
801 | },
802 | {
803 | name: "Qwen2.5-Coder-3B-Instruct-q4f32_1-MLC",
804 | display_name: "Qwen",
805 | provider: "Alibaba",
806 | family: ModelFamily.QWEN,
807 | recommended_config: {
808 | temperature: 0.7,
809 | presence_penalty: 0,
810 | frequency_penalty: 0,
811 | top_p: 0.8,
812 | },
813 | },
814 | {
815 | name: "Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC",
816 | display_name: "Qwen",
817 | provider: "Alibaba",
818 | family: ModelFamily.QWEN,
819 | recommended_config: {
820 | temperature: 1.0,
821 | presence_penalty: 0,
822 | frequency_penalty: 0,
823 | top_p: 1.0,
824 | },
825 | },
826 | {
827 | name: "Qwen2.5-Coder-7B-Instruct-q4f32_1-MLC",
828 | display_name: "Qwen",
829 | provider: "Alibaba",
830 | family: ModelFamily.QWEN,
831 | recommended_config: {
832 | temperature: 1.0,
833 | presence_penalty: 0,
834 | frequency_penalty: 0,
835 | top_p: 1.0,
836 | },
837 | },
838 | // Qwen2-Math
839 | {
840 | name: "Qwen2-Math-1.5B-Instruct-q4f16_1-MLC",
841 | display_name: "Qwen",
842 | provider: "Alibaba",
843 | family: ModelFamily.QWEN,
844 | recommended_config: {
845 | temperature: 1.0,
846 | presence_penalty: 0,
847 | frequency_penalty: 0,
848 | top_p: 0.8,
849 | },
850 | },
851 | {
852 | name: "Qwen2-Math-1.5B-Instruct-q4f32_1-MLC",
853 | display_name: "Qwen",
854 | provider: "Alibaba",
855 | family: ModelFamily.QWEN,
856 | recommended_config: {
857 | temperature: 1.0,
858 | presence_penalty: 0,
859 | frequency_penalty: 0,
860 | top_p: 0.8,
861 | },
862 | },
863 | {
864 | name: "Qwen2-Math-7B-Instruct-q4f16_1-MLC",
865 | display_name: "Qwen",
866 | provider: "Alibaba",
867 | family: ModelFamily.QWEN,
868 | recommended_config: {
869 | temperature: 0.7,
870 | presence_penalty: 0,
871 | frequency_penalty: 0,
872 | top_p: 0.8,
873 | },
874 | },
875 | {
876 | name: "Qwen2-Math-7B-Instruct-q4f32_1-MLC",
877 | display_name: "Qwen",
878 | provider: "Alibaba",
879 | family: ModelFamily.QWEN,
880 | recommended_config: {
881 | temperature: 0.7,
882 | presence_penalty: 0,
883 | frequency_penalty: 0,
884 | top_p: 0.8,
885 | },
886 | },
887 | {
888 | name: "gemma-2-2b-it-q4f16_1-MLC",
889 | display_name: "Gemma",
890 | provider: "Google",
891 | family: ModelFamily.GEMMA,
892 | recommended_config: {
893 | temperature: 0.7,
894 | presence_penalty: 0,
895 | frequency_penalty: 1,
896 | top_p: 0.95,
897 | },
898 | },
899 | {
900 | name: "gemma-2-2b-it-q4f32_1-MLC",
901 | display_name: "Gemma",
902 | provider: "Google",
903 | family: ModelFamily.GEMMA,
904 | recommended_config: {
905 | temperature: 0.7,
906 | presence_penalty: 0,
907 | frequency_penalty: 1,
908 | top_p: 0.95,
909 | },
910 | },
911 | {
912 | name: "gemma-2-2b-it-q4f16_1-MLC-1k",
913 | display_name: "Gemma",
914 | provider: "Google",
915 | family: ModelFamily.GEMMA,
916 | recommended_config: {
917 | temperature: 0.7,
918 | presence_penalty: 0,
919 | frequency_penalty: 1,
920 | top_p: 0.95,
921 | },
922 | },
923 | {
924 | name: "gemma-2-2b-it-q4f32_1-MLC-1k",
925 | display_name: "Gemma",
926 | provider: "Google",
927 | family: ModelFamily.GEMMA,
928 | recommended_config: {
929 | temperature: 0.7,
930 | presence_penalty: 0,
931 | frequency_penalty: 1,
932 | top_p: 0.95,
933 | },
934 | },
935 | {
936 | name: "gemma-2-9b-it-q4f16_1-MLC",
937 | display_name: "Gemma",
938 | provider: "Google",
939 | family: ModelFamily.GEMMA,
940 | recommended_config: {
941 | temperature: 0.7,
942 | presence_penalty: 0,
943 | frequency_penalty: 1,
944 | top_p: 0.95,
945 | },
946 | },
947 | {
948 | name: "gemma-2-9b-it-q4f32_1-MLC",
949 | display_name: "Gemma",
950 | provider: "Google",
951 | family: ModelFamily.GEMMA,
952 | recommended_config: {
953 | temperature: 0.7,
954 | presence_penalty: 0,
955 | frequency_penalty: 1,
956 | top_p: 0.95,
957 | },
958 | },
959 | {
960 | name: "gemma-2-2b-jpn-it-q4f16_1-MLC",
961 | display_name: "Gemma",
962 | provider: "Google",
963 | family: ModelFamily.GEMMA,
964 | recommended_config: {
965 | temperature: 0.7,
966 | presence_penalty: 0,
967 | frequency_penalty: 1,
968 | top_p: 0.9,
969 | },
970 | },
971 | {
972 | name: "gemma-2-2b-jpn-it-q4f32_1-MLC",
973 | display_name: "Gemma",
974 | provider: "Google",
975 | family: ModelFamily.GEMMA,
976 | recommended_config: {
977 | temperature: 0.7,
978 | presence_penalty: 0,
979 | frequency_penalty: 1,
980 | top_p: 0.9,
981 | },
982 | },
983 | {
984 | name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
985 | display_name: "StableLM",
986 | provider: "Hugging Face",
987 | family: ModelFamily.STABLE_LM,
988 | recommended_config: {
989 | temperature: 0.7,
990 | presence_penalty: 0,
991 | frequency_penalty: 0,
992 | top_p: 0.95,
993 | },
994 | },
995 | {
996 | name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC",
997 | display_name: "StableLM",
998 | provider: "Hugging Face",
999 | family: ModelFamily.STABLE_LM,
1000 | recommended_config: {
1001 | temperature: 0.7,
1002 | presence_penalty: 0,
1003 | frequency_penalty: 0,
1004 | top_p: 0.95,
1005 | },
1006 | },
1007 | {
1008 | name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k",
1009 | display_name: "StableLM",
1010 | provider: "Hugging Face",
1011 | family: ModelFamily.STABLE_LM,
1012 | recommended_config: {
1013 | temperature: 0.7,
1014 | presence_penalty: 0,
1015 | frequency_penalty: 0,
1016 | top_p: 0.95,
1017 | },
1018 | },
1019 | {
1020 | name: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k",
1021 | display_name: "StableLM",
1022 | provider: "Hugging Face",
1023 | family: ModelFamily.STABLE_LM,
1024 | recommended_config: {
1025 | temperature: 0.7,
1026 | presence_penalty: 0,
1027 | frequency_penalty: 0,
1028 | top_p: 0.95,
1029 | },
1030 | },
1031 | {
1032 | name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
1033 | display_name: "RedPajama",
1034 | provider: "Together",
1035 | family: ModelFamily.REDPAJAMA,
1036 | recommended_config: {
1037 | temperature: 0.7,
1038 | top_p: 0.95,
1039 | },
1040 | },
1041 | {
1042 | name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
1043 | display_name: "RedPajama",
1044 | provider: "Together",
1045 | family: ModelFamily.REDPAJAMA,
1046 | recommended_config: {
1047 | temperature: 0.7,
1048 | top_p: 0.95,
1049 | },
1050 | },
1051 | {
1052 | name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k",
1053 | display_name: "RedPajama",
1054 | provider: "Together",
1055 | family: ModelFamily.REDPAJAMA,
1056 | recommended_config: {
1057 | temperature: 0.7,
1058 | top_p: 0.95,
1059 | },
1060 | },
1061 | {
1062 | name: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k",
1063 | display_name: "RedPajama",
1064 | provider: "Together",
1065 | family: ModelFamily.REDPAJAMA,
1066 | recommended_config: {
1067 | temperature: 0.7,
1068 | top_p: 0.95,
1069 | },
1070 | },
1071 | {
1072 | name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
1073 | display_name: "TinyLlama",
1074 | provider: "Zhang Peiyuan",
1075 | family: ModelFamily.LLAMA,
1076 | recommended_config: {
1077 | temperature: 1,
1078 | presence_penalty: 0,
1079 | frequency_penalty: 0,
1080 | top_p: 1,
1081 | },
1082 | },
1083 | {
1084 | name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC",
1085 | display_name: "TinyLlama",
1086 | provider: "Zhang Peiyuan",
1087 | family: ModelFamily.LLAMA,
1088 | recommended_config: {
1089 | temperature: 1,
1090 | presence_penalty: 0,
1091 | frequency_penalty: 0,
1092 | top_p: 1,
1093 | },
1094 | },
1095 | {
1096 | name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC-1k",
1097 | display_name: "TinyLlama",
1098 | provider: "Zhang Peiyuan",
1099 | family: ModelFamily.LLAMA,
1100 | recommended_config: {
1101 | temperature: 1,
1102 | presence_penalty: 0,
1103 | frequency_penalty: 0,
1104 | top_p: 1,
1105 | },
1106 | },
1107 | {
1108 | name: "TinyLlama-1.1B-Chat-v1.0-q4f32_1-MLC-1k",
1109 | display_name: "TinyLlama",
1110 | provider: "Zhang Peiyuan",
1111 | family: ModelFamily.LLAMA,
1112 | recommended_config: {
1113 | temperature: 1,
1114 | presence_penalty: 0,
1115 | frequency_penalty: 0,
1116 | top_p: 1,
1117 | },
1118 | },
1119 | {
1120 | name: "Llama-3.1-70B-Instruct-q3f16_1-MLC",
1121 | display_name: "Llama",
1122 | provider: "Meta",
1123 | family: ModelFamily.LLAMA,
1124 | recommended_config: {
1125 | temperature: 0.6,
1126 | presence_penalty: 0,
1127 | frequency_penalty: 0,
1128 | top_p: 0.9,
1129 | },
1130 | },
1131 | {
1132 | name: "Qwen2-0.5B-Instruct-q4f16_1-MLC",
1133 | display_name: "Qwen",
1134 | provider: "Alibaba",
1135 | family: ModelFamily.QWEN,
1136 | recommended_config: {
1137 | temperature: 0.7,
1138 | presence_penalty: 0,
1139 | frequency_penalty: 0,
1140 | top_p: 0.8,
1141 | },
1142 | },
1143 | {
1144 | name: "Qwen2-0.5B-Instruct-q0f16-MLC",
1145 | display_name: "Qwen",
1146 | provider: "Alibaba",
1147 | family: ModelFamily.QWEN,
1148 | recommended_config: {
1149 | temperature: 0.7,
1150 | presence_penalty: 0,
1151 | frequency_penalty: 0,
1152 | top_p: 0.8,
1153 | },
1154 | },
1155 | {
1156 | name: "Qwen2-0.5B-Instruct-q0f32-MLC",
1157 | display_name: "Qwen",
1158 | provider: "Alibaba",
1159 | family: ModelFamily.QWEN,
1160 | recommended_config: {
1161 | temperature: 0.7,
1162 | presence_penalty: 0,
1163 | frequency_penalty: 0,
1164 | top_p: 0.8,
1165 | },
1166 | },
1167 | {
1168 | name: "Qwen2-1.5B-Instruct-q4f16_1-MLC",
1169 | display_name: "Qwen",
1170 | provider: "Alibaba",
1171 | family: ModelFamily.QWEN,
1172 | recommended_config: {
1173 | temperature: 0.7,
1174 | presence_penalty: 0,
1175 | frequency_penalty: 0,
1176 | top_p: 0.8,
1177 | },
1178 | },
1179 | {
1180 | name: "Qwen2-1.5B-Instruct-q4f32_1-MLC",
1181 | display_name: "Qwen",
1182 | provider: "Alibaba",
1183 | family: ModelFamily.QWEN,
1184 | recommended_config: {
1185 | temperature: 0.7,
1186 | presence_penalty: 0,
1187 | frequency_penalty: 0,
1188 | top_p: 0.8,
1189 | },
1190 | },
1191 | {
1192 | name: "Qwen2-7B-Instruct-q4f16_1-MLC",
1193 | display_name: "Qwen",
1194 | provider: "Alibaba",
1195 | family: ModelFamily.QWEN,
1196 | recommended_config: {
1197 | temperature: 0.7,
1198 | presence_penalty: 0,
1199 | frequency_penalty: 0,
1200 | top_p: 0.8,
1201 | },
1202 | },
1203 | {
1204 | name: "Qwen2-7B-Instruct-q4f32_1-MLC",
1205 | display_name: "Qwen",
1206 | provider: "Alibaba",
1207 | family: ModelFamily.QWEN,
1208 | recommended_config: {
1209 | temperature: 0.7,
1210 | presence_penalty: 0,
1211 | frequency_penalty: 0,
1212 | top_p: 0.8,
1213 | },
1214 | },
1215 | {
1216 | name: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
1217 | display_name: "Llama",
1218 | provider: "Meta",
1219 | family: ModelFamily.LLAMA,
1220 | recommended_config: {
1221 | temperature: 0.6,
1222 | presence_penalty: 0,
1223 | frequency_penalty: 0,
1224 | top_p: 0.9,
1225 | },
1226 | },
1227 | {
1228 | name: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
1229 | display_name: "Llama",
1230 | provider: "Meta",
1231 | family: ModelFamily.LLAMA,
1232 | recommended_config: {
1233 | temperature: 0.6,
1234 | presence_penalty: 0,
1235 | frequency_penalty: 0,
1236 | top_p: 0.9,
1237 | },
1238 | },
1239 | {
1240 | name: "Llama-3-8B-Instruct-q4f32_1-MLC",
1241 | display_name: "Llama",
1242 | provider: "Meta",
1243 | family: ModelFamily.LLAMA,
1244 | recommended_config: {
1245 | temperature: 0.6,
1246 | presence_penalty: 0,
1247 | frequency_penalty: 0,
1248 | top_p: 0.9,
1249 | },
1250 | },
1251 | {
1252 | name: "Llama-3-8B-Instruct-q4f16_1-MLC",
1253 | display_name: "Llama",
1254 | provider: "Meta",
1255 | family: ModelFamily.LLAMA,
1256 | recommended_config: {
1257 | temperature: 0.6,
1258 | presence_penalty: 0,
1259 | frequency_penalty: 0,
1260 | top_p: 0.9,
1261 | },
1262 | },
1263 | {
1264 | name: "Llama-3-70B-Instruct-q3f16_1-MLC",
1265 | display_name: "Llama",
1266 | provider: "Meta",
1267 | family: ModelFamily.LLAMA,
1268 | recommended_config: {
1269 | temperature: 0.7,
1270 | presence_penalty: 0,
1271 | frequency_penalty: 0,
1272 | top_p: 0.95,
1273 | },
1274 | },
1275 | // Phi3-mini-instruct
1276 | {
1277 | name: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
1278 | display_name: "Phi 3",
1279 | provider: "Microsoft",
1280 | family: ModelFamily.PHI,
1281 | recommended_config: {
1282 | temperature: 0.7,
1283 | presence_penalty: 0,
1284 | frequency_penalty: 0,
1285 | top_p: 1,
1286 | },
1287 | },
1288 | {
1289 | name: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
1290 | display_name: "Phi 3",
1291 | provider: "Microsoft",
1292 | family: ModelFamily.PHI,
1293 | recommended_config: {
1294 | temperature: 0.7,
1295 | presence_penalty: 0,
1296 | frequency_penalty: 0,
1297 | top_p: 1,
1298 | },
1299 | },
1300 | {
1301 | name: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
1302 | display_name: "Phi 3",
1303 | provider: "Microsoft",
1304 | family: ModelFamily.PHI,
1305 | recommended_config: {
1306 | temperature: 0.7,
1307 | presence_penalty: 0,
1308 | frequency_penalty: 0,
1309 | top_p: 1,
1310 | },
1311 | },
1312 | {
1313 | name: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
1314 | display_name: "Phi 3",
1315 | provider: "Microsoft",
1316 | family: ModelFamily.PHI,
1317 | recommended_config: {
1318 | temperature: 0.7,
1319 | presence_penalty: 0,
1320 | frequency_penalty: 0,
1321 | top_p: 1,
1322 | },
1323 | },
1324 | {
1325 | name: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k",
1326 | display_name: "Llama",
1327 | provider: "Meta",
1328 | family: ModelFamily.LLAMA,
1329 | recommended_config: {
1330 | temperature: 0.6,
1331 | top_p: 0.9,
1332 | },
1333 | },
1334 | {
1335 | name: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k",
1336 | display_name: "Llama",
1337 | provider: "Meta",
1338 | family: ModelFamily.LLAMA,
1339 | recommended_config: {
1340 | temperature: 0.6,
1341 | top_p: 0.9,
1342 | },
1343 | },
1344 | {
1345 | name: "Llama-2-7b-chat-hf-q4f32_1-MLC",
1346 | display_name: "Llama",
1347 | provider: "Meta",
1348 | family: ModelFamily.LLAMA,
1349 | recommended_config: {
1350 | temperature: 0.6,
1351 | top_p: 0.9,
1352 | },
1353 | },
1354 | {
1355 | name: "Llama-2-7b-chat-hf-q4f16_1-MLC",
1356 | display_name: "Llama",
1357 | provider: "Meta",
1358 | family: ModelFamily.LLAMA,
1359 | recommended_config: {
1360 | temperature: 0.6,
1361 | top_p: 0.9,
1362 | },
1363 | },
1364 | {
1365 | name: "Llama-2-13b-chat-hf-q4f16_1-MLC",
1366 | display_name: "Llama",
1367 | provider: "Meta",
1368 | family: ModelFamily.LLAMA,
1369 | recommended_config: {
1370 | temperature: 0.6,
1371 | top_p: 0.9,
1372 | },
1373 | },
1374 | {
1375 | name: "phi-2-q4f16_1-MLC",
1376 | display_name: "Phi",
1377 | provider: "Microsoft",
1378 | family: ModelFamily.PHI,
1379 | recommended_config: {
1380 | temperature: 0.7,
1381 | top_p: 0.95,
1382 | },
1383 | },
1384 | {
1385 | name: "phi-2-q4f32_1-MLC",
1386 | display_name: "Phi",
1387 | provider: "Microsoft",
1388 | family: ModelFamily.PHI,
1389 | recommended_config: {
1390 | temperature: 0.7,
1391 | top_p: 0.95,
1392 | },
1393 | },
1394 | {
1395 | name: "phi-2-q4f16_1-MLC-1k",
1396 | display_name: "Phi",
1397 | provider: "Microsoft",
1398 | family: ModelFamily.PHI,
1399 | recommended_config: {
1400 | temperature: 0.7,
1401 | top_p: 0.95,
1402 | },
1403 | },
1404 | {
1405 | name: "phi-2-q4f32_1-MLC-1k",
1406 | display_name: "Phi",
1407 | provider: "Microsoft",
1408 | family: ModelFamily.PHI,
1409 | recommended_config: {
1410 | temperature: 0.7,
1411 | top_p: 0.95,
1412 | },
1413 | },
1414 | {
1415 | name: "phi-1_5-q4f16_1-MLC",
1416 | display_name: "Phi",
1417 | provider: "Microsoft",
1418 | family: ModelFamily.PHI,
1419 | recommended_config: {
1420 | temperature: 0.7,
1421 | top_p: 0.95,
1422 | },
1423 | },
1424 | {
1425 | name: "phi-1_5-q4f32_1-MLC",
1426 | display_name: "Phi",
1427 | provider: "Microsoft",
1428 | family: ModelFamily.PHI,
1429 | recommended_config: {
1430 | temperature: 0.7,
1431 | top_p: 0.95,
1432 | },
1433 | },
1434 | {
1435 | name: "phi-1_5-q4f16_1-MLC-1k",
1436 | display_name: "Phi",
1437 | provider: "Microsoft",
1438 | family: ModelFamily.PHI,
1439 | recommended_config: {
1440 | temperature: 0.7,
1441 | top_p: 0.95,
1442 | },
1443 | },
1444 | {
1445 | name: "phi-1_5-q4f32_1-MLC-1k",
1446 | display_name: "Phi",
1447 | provider: "Microsoft",
1448 | family: ModelFamily.PHI,
1449 | recommended_config: {
1450 | temperature: 0.7,
1451 | top_p: 0.95,
1452 | },
1453 | },
1454 | {
1455 | name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
1456 | display_name: "TinyLlama",
1457 | provider: "Zhang Peiyuan",
1458 | family: ModelFamily.LLAMA,
1459 | recommended_config: {
1460 | temperature: 0.7,
1461 | top_p: 0.95,
1462 | },
1463 | },
1464 | {
1465 | name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
1466 | display_name: "TinyLlama",
1467 | provider: "Zhang Peiyuan",
1468 | family: ModelFamily.LLAMA,
1469 | recommended_config: {
1470 | temperature: 0.7,
1471 | top_p: 0.95,
1472 | },
1473 | },
1474 | {
1475 | name: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k",
1476 | display_name: "TinyLlama",
1477 | provider: "Zhang Peiyuan",
1478 | family: ModelFamily.LLAMA,
1479 | recommended_config: {
1480 | temperature: 0.7,
1481 | top_p: 0.95,
1482 | },
1483 | },
1484 | {
1485 | name: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k",
1486 | display_name: "TinyLlama",
1487 | provider: "Zhang Peiyuan",
1488 | family: ModelFamily.LLAMA,
1489 | recommended_config: {
1490 | temperature: 0.7,
1491 | top_p: 0.95,
1492 | },
1493 | },
1494 | ];
1495 |
1496 | // Get model size from model id
1497 | export function getSize(model_id: string): string | undefined {
1498 | const sizeRegex = /-(\d+(\.\d+)?[BK])-?/;
1499 | const match = model_id.match(sizeRegex);
1500 | if (match) {
1501 | return match[1];
1502 | }
1503 | return undefined;
1504 | }
1505 |
1506 | // Get quantization method from model id
1507 | export function getQuantization(model_id: string): string | undefined {
1508 | const quantizationRegex = /-(q[0-9]f[0-9]+(?:_[0-9])?)-/;
1509 | const match = model_id.match(quantizationRegex);
1510 | if (match) {
1511 | return match[1];
1512 | }
1513 | return undefined;
1514 | }
1515 |
1516 | export const DEFAULT_MODELS: ModelRecord[] = DEFAULT_MODEL_BASES.filter(
1517 | (model) => {
1518 | if (
1519 | !prebuiltAppConfig.model_list.map((m) => m.model_id).includes(model.name)
1520 | ) {
1521 | console.warn(
1522 | `Model ${model.name} not supported by current WebLLM version.`
1523 | );
1524 | return false;
1525 | }
1526 | return true;
1527 | }
1528 | ).map((model) => ({
1529 | ...model,
1530 | size: getSize(model.name),
1531 | quantization: getQuantization(model.name),
1532 | }));
1533 |
--------------------------------------------------------------------------------
/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/src/whisper-worker.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable camelcase */
2 | import { pipeline, env } from "@xenova/transformers";
3 |
4 | // Disable local models
5 | env.allowLocalModels = false;
6 |
7 | // Define model factories
8 | // Ensures only one model is created of each type
9 | class PipelineFactory {
10 | static task = null;
11 | static model = null;
12 | static quantized = null;
13 | static instance = null;
14 |
15 | constructor(tokenizer, model, quantized) {
16 | this.tokenizer = tokenizer;
17 | this.model = model;
18 | this.quantized = quantized;
19 | }
20 |
21 | static async getInstance(progress_callback = null) {
22 | if (this.instance === null) {
23 | this.instance = pipeline(this.task, this.model, {
24 | quantized: this.quantized,
25 | progress_callback,
26 |
27 | // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
28 | revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main"
29 | });
30 | }
31 |
32 | return this.instance;
33 | }
34 | }
35 |
36 | self.addEventListener("message", async (event) => {
37 | const message = event.data;
38 |
39 | // Do some work...
40 | // TODO use message data
41 | let transcript = await transcribe(
42 | message.audio,
43 | message.model,
44 | message.multilingual,
45 | message.quantized,
46 | message.subtask,
47 | message.language,
48 | );
49 | if (transcript === null) return;
50 |
51 | // Send the result back to the main thread
52 | self.postMessage({
53 | status: "complete",
54 | task: "automatic-speech-recognition",
55 | data: transcript,
56 | });
57 | });
58 |
59 | class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory {
60 | static task = "automatic-speech-recognition";
61 | static model = null;
62 | static quantized = null;
63 | }
64 |
65 | const transcribe = async (
66 | audio,
67 | model,
68 | multilingual,
69 | quantized,
70 | subtask,
71 | language,
72 | ) => {
73 |
74 | const isDistilWhisper = model.startsWith("distil-whisper/");
75 |
76 | let modelName = model;
77 | if (!isDistilWhisper && !multilingual) {
78 | modelName += ".en"
79 | }
80 |
81 | const p = AutomaticSpeechRecognitionPipelineFactory;
82 | if (p.model !== modelName || p.quantized !== quantized) {
83 | // Invalidate model if different
84 | p.model = modelName;
85 | p.quantized = quantized;
86 |
87 | if (p.instance !== null) {
88 | (await p.getInstance()).dispose();
89 | p.instance = null;
90 | }
91 | }
92 |
93 | // Load transcriber model
94 | let transcriber = await p.getInstance((data) => {
95 | self.postMessage(data);
96 | });
97 |
98 | const time_precision =
99 | transcriber.processor.feature_extractor.config.chunk_length /
100 | transcriber.model.config.max_source_positions;
101 |
102 | // Storage for chunks to be processed. Initialise with an empty chunk.
103 | let chunks_to_process = [
104 | {
105 | tokens: [],
106 | finalised: false,
107 | },
108 | ];
109 |
110 | // TODO: Storage for fully-processed and merged chunks
111 | // let decoded_chunks = [];
112 |
113 | function chunk_callback(chunk) {
114 | let last = chunks_to_process[chunks_to_process.length - 1];
115 |
116 | // Overwrite last chunk with new info
117 | Object.assign(last, chunk);
118 | last.finalised = true;
119 |
120 | // Create an empty chunk after, if it not the last chunk
121 | if (!chunk.is_last) {
122 | chunks_to_process.push({
123 | tokens: [],
124 | finalised: false,
125 | });
126 | }
127 | }
128 |
129 | // Inject custom callback function to handle merging of chunks
130 | function callback_function(item) {
131 | let last = chunks_to_process[chunks_to_process.length - 1];
132 |
133 | // Update tokens of last chunk
134 | last.tokens = [...item[0].output_token_ids];
135 |
136 | // Merge text chunks
137 | // TODO optimise so we don't have to decode all chunks every time
138 | let data = transcriber.tokenizer._decode_asr(chunks_to_process, {
139 | time_precision: time_precision,
140 | return_timestamps: true,
141 | force_full_sequences: false,
142 | });
143 |
144 | self.postMessage({
145 | status: "update",
146 | task: "automatic-speech-recognition",
147 | data: data,
148 | });
149 | }
150 |
151 | // Actually run transcription
152 | let output = await transcriber(audio, {
153 | // Greedy
154 | top_k: 0,
155 | do_sample: false,
156 |
157 | // Sliding window
158 | chunk_length_s: isDistilWhisper ? 20 : 30,
159 | stride_length_s: isDistilWhisper ? 3 : 5,
160 |
161 | // Language and task
162 | language: language,
163 | task: subtask,
164 |
165 | // Return timestamps
166 | return_timestamps: true,
167 | force_full_sequences: false,
168 |
169 | // Callback functions
170 | callback_function: callback_function, // after each generation step
171 | chunk_callback: chunk_callback, // after each chunk is processed
172 | }).catch((error) => {
173 | self.postMessage({
174 | status: "error",
175 | task: "automatic-speech-recognition",
176 | data: error,
177 | });
178 | return null;
179 | });
180 |
181 | return output;
182 | };
183 |
--------------------------------------------------------------------------------
/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
4 | "target": "ES2020",
5 | "useDefineForClassFields": true,
6 | "lib": ["ES2020", "DOM", "DOM.Iterable"],
7 | "module": "ESNext",
8 | "skipLibCheck": true,
9 |
10 | /* Bundler mode */
11 | "moduleResolution": "bundler",
12 | "allowImportingTsExtensions": true,
13 | "isolatedModules": true,
14 | "moduleDetection": "force",
15 | "noEmit": true,
16 | "jsx": "react-jsx",
17 |
18 | /* Linting */
19 | "strict": true,
20 | "noUnusedLocals": true,
21 | "noUnusedParameters": true,
22 | "noFallthroughCasesInSwitch": true,
23 | "noUncheckedSideEffectImports": true
24 | },
25 | "include": ["src"]
26 | }
27 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "files": [],
3 | "references": [
4 | { "path": "./tsconfig.app.json" },
5 | { "path": "./tsconfig.node.json" }
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
4 | "target": "ES2022",
5 | "lib": ["ES2023"],
6 | "module": "ESNext",
7 | "skipLibCheck": true,
8 |
9 | /* Bundler mode */
10 | "moduleResolution": "bundler",
11 | "allowImportingTsExtensions": true,
12 | "isolatedModules": true,
13 | "moduleDetection": "force",
14 | "noEmit": true,
15 |
16 | /* Linting */
17 | "strict": true,
18 | "noUnusedLocals": true,
19 | "noUnusedParameters": true,
20 | "noFallthroughCasesInSwitch": true,
21 | "noUncheckedSideEffectImports": true
22 | },
23 | "include": ["vite.config.ts"]
24 | }
25 |
--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from "vite";
2 | import react from "@vitejs/plugin-react";
3 | import tailwindcss from "@tailwindcss/vite";
4 |
5 | // https://vite.dev/config/
6 | export default defineConfig({
7 | plugins: [react(), tailwindcss()],
8 | });
9 |
--------------------------------------------------------------------------------