├── .eslintignore ├── .eslintrc ├── .gitignore ├── .prettierrc ├── LICENSE ├── README.md ├── index.html ├── package-lock.json ├── package.json ├── postcss.config.cjs ├── public └── vite.svg ├── src ├── App.tsx ├── assets │ └── react.svg ├── components │ ├── AudioManager.tsx │ ├── AudioPlayer.tsx │ ├── AudioRecorder.tsx │ ├── Progress.tsx │ ├── TranscribeButton.tsx │ ├── Transcript.tsx │ └── modal │ │ ├── Modal.tsx │ │ └── UrlInput.tsx ├── css │ └── index.css ├── hooks │ ├── useTranscriber.ts │ └── useWorker.ts ├── index.tsx ├── utils │ ├── AudioUtils.ts │ ├── BlobFix.ts │ └── Constants.ts ├── vite-env.d.ts └── worker.js ├── tailwind.config.cjs ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "env": { 4 | "browser": true, 5 | "es2021": true 6 | }, 7 | "parser": "@typescript-eslint/parser", 8 | "extends": [ 9 | "eslint:recommended", 10 | "plugin:react/recommended", 11 | "plugin:@typescript-eslint/recommended", 12 | "plugin:@typescript-eslint/eslint-recommended", 13 | "prettier" 14 | ], 15 | "overrides": [], 16 | "parserOptions": { 17 | "ecmaFeatures": { 18 | "jsx": true 19 | }, 20 | "ecmaVersion": "latest", 21 | "sourceType": "module" 22 | }, 23 | "plugins": [ 24 | "react", 25 | "react-hooks", 26 | "@typescript-eslint", 27 | "prettier" 28 | ], 29 | "rules": { 30 | "react/react-in-jsx-scope": "off", 31 | "camelcase": "error", 32 | "spaced-comment": "error", 33 | "no-duplicate-imports": "error", 34 | "prettier/prettier": "error" 35 | }, 36 | "settings": { 37 | "react": { 38 | "version": "detect" 39 | } 40 | }, 41 | "prettier/prettier": [ 42 | "error", 43 | { 44 | "endOfLine": "auto" 45 | } 46 | ] 47 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "tabWidth": 4, 4 | "printWidth": 80, 5 | "singleQuote": false, 6 | "trailingComma": "all", 7 | "jsxSingleQuote": true, 8 | "bracketSpacing": true, 9 | "endOfLine":"auto" 10 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Xenova 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Whisper Web 2 | 3 | ML-powered speech recognition directly in your browser! Built with [🤗 Transformers.js](https://github.com/xenova/transformers.js). 4 | 5 | Check out the demo site [here](https://huggingface.co/spaces/Xenova/whisper-web). 6 | 7 | > [!IMPORTANT] 8 | > Experimental WebGPU support has been added to [this branch](https://github.com/xenova/whisper-web/tree/experimental-webgpu) ([demo](https://huggingface.co/spaces/Xenova/whisper-webgpu)), if you'd like to run with GPU acceleration! 9 | 10 | https://github.com/xenova/whisper-web/assets/26504141/fb170d84-9678-41b5-9248-a112ecc74c27 11 | 12 | ## Running locally 13 | 14 | 1. Clone the repo and install dependencies: 15 | 16 | ```bash 17 | git clone https://github.com/xenova/whisper-web.git 18 | cd whisper-web 19 | npm install 20 | ``` 21 | 22 | 2. Run the development server: 23 | 24 | ```bash 25 | npm run dev 26 | ``` 27 | > Firefox users need to change the `dom.workers.modules.enabled` setting in `about:config` to `true` to enable Web Workers. 28 | > Check out [this issue](https://github.com/xenova/whisper-web/issues/8) for more details. 29 | 30 | 3. Open the link (e.g., [http://localhost:5173/](http://localhost:5173/)) in your browser. 31 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Whisper Web 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whisper-web", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "clean": "rm -rf node_modules/ dist/", 9 | "build": "tsc && vite build", 10 | "preview": "vite preview", 11 | "lint": "eslint src/**/*.{js,jsx,ts,tsx,json}", 12 | "lint:fix": "eslint --fix src/**/*.{js,jsx,ts,tsx,json}", 13 | "format": "prettier --write src/**/*.{js,jsx,ts,tsx,css,md,json} --config ./.prettierrc", 14 | "tsc": "tsc" 15 | }, 16 | "dependencies": { 17 | "@headlessui/react": "^1.7.13", 18 | "@xenova/transformers": "^2.7.0", 19 | "axios": "^1.3.4", 20 | "react": "^18.2.0", 21 | "react-dom": "^18.2.0" 22 | }, 23 | "devDependencies": { 24 | "@types/react": "^18.0.28", 25 | "@types/react-dom": "^18.0.11", 26 | "@typescript-eslint/eslint-plugin": "^5.57.0", 27 | "@typescript-eslint/parser": "^5.57.0", 28 | "@vitejs/plugin-react": "^3.1.0", 29 | "autoprefixer": "^10.4.14", 30 | "eslint": "^8.37.0", 31 | "eslint-config-prettier": "^8.8.0", 32 | "eslint-config-standard-with-typescript": "^34.0.1", 33 | "eslint-plugin-import": "^2.27.5", 34 | "eslint-plugin-n": "^15.7.0", 35 | "eslint-plugin-prettier": "^4.2.1", 36 | "eslint-plugin-promise": "^6.1.1", 37 | "eslint-plugin-react": "^7.32.2", 38 | "eslint-plugin-react-hooks": "^4.6.0", 39 | "postcss": "^8.4.21", 40 | "prettier": "^2.8.7", 41 | "tailwindcss": "^3.2.7", 42 | "typescript": "^4.9.5", 43 | "vite": "^4.2.0" 44 | }, 45 | "overrides": { 46 | "semver": "^7.5.3", 47 | "protobufjs": "^7.2.4" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/App.tsx: -------------------------------------------------------------------------------- 1 | import { AudioManager } from "./components/AudioManager"; 2 | import Transcript from "./components/Transcript"; 3 | import { useTranscriber } from "./hooks/useTranscriber"; 4 | 5 | function App() { 6 | const transcriber = useTranscriber(); 7 | 8 | return ( 9 |
10 |
11 |

12 | Whisper Web 13 |

14 |

15 | ML-powered speech recognition directly in your browser 16 |

17 | 18 | 19 |
20 | 21 |
22 | Made with{" "} 23 | 27 | 🤗 Transformers.js 28 | 29 |
30 |
31 | ); 32 | } 33 | 34 | export default App; 35 | -------------------------------------------------------------------------------- /src/assets/react.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/components/AudioManager.tsx: -------------------------------------------------------------------------------- 1 | import React, { useCallback, useEffect, useState } from "react"; 2 | import axios from "axios"; 3 | import Modal from "./modal/Modal"; 4 | import { UrlInput } from "./modal/UrlInput"; 5 | import AudioPlayer from "./AudioPlayer"; 6 | import { TranscribeButton } from "./TranscribeButton"; 7 | import Constants from "../utils/Constants"; 8 | import { Transcriber } from "../hooks/useTranscriber"; 9 | import Progress from "./Progress"; 10 | import AudioRecorder from "./AudioRecorder"; 11 | 12 | function titleCase(str: string) { 13 | str = str.toLowerCase(); 14 | return (str.match(/\w+.?/g) || []) 15 | .map((word) => { 16 | return word.charAt(0).toUpperCase() + word.slice(1); 17 | }) 18 | .join(""); 19 | } 20 | 21 | // List of supported languages: 22 | // https://help.openai.com/en/articles/7031512-whisper-api-faq 23 | // https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79 24 | const LANGUAGES = { 25 | en: "english", 26 | zh: "chinese", 27 | de: "german", 28 | es: "spanish/castilian", 29 | ru: "russian", 30 | ko: "korean", 31 | fr: "french", 32 | ja: "japanese", 33 | pt: "portuguese", 34 | tr: "turkish", 35 | pl: "polish", 36 | ca: "catalan/valencian", 37 | nl: "dutch/flemish", 38 | ar: "arabic", 39 | sv: "swedish", 40 | it: "italian", 41 | id: "indonesian", 42 | hi: "hindi", 43 | fi: "finnish", 44 | vi: "vietnamese", 45 | he: "hebrew", 46 | uk: "ukrainian", 47 | el: "greek", 48 | ms: "malay", 49 | cs: "czech", 50 | ro: "romanian/moldavian/moldovan", 51 | da: "danish", 52 | hu: "hungarian", 53 | ta: "tamil", 54 | no: "norwegian", 55 | th: "thai", 56 | ur: "urdu", 57 | hr: "croatian", 58 | bg: "bulgarian", 59 | lt: "lithuanian", 60 | la: "latin", 61 | mi: "maori", 62 | ml: "malayalam", 63 | cy: "welsh", 64 | sk: "slovak", 65 | te: "telugu", 66 | fa: "persian", 67 | lv: "latvian", 68 | bn: "bengali", 69 | sr: "serbian", 70 | az: "azerbaijani", 71 | sl: "slovenian", 72 | kn: "kannada", 73 | et: "estonian", 74 | mk: "macedonian", 75 | br: "breton", 76 | eu: "basque", 77 | is: "icelandic", 78 | hy: "armenian", 79 | ne: "nepali", 80 | mn: "mongolian", 81 | bs: "bosnian", 82 | kk: "kazakh", 83 | sq: "albanian", 84 | sw: "swahili", 85 | gl: "galician", 86 | mr: "marathi", 87 | pa: "punjabi/panjabi", 88 | si: "sinhala/sinhalese", 89 | km: "khmer", 90 | sn: "shona", 91 | yo: "yoruba", 92 | so: "somali", 93 | af: "afrikaans", 94 | oc: "occitan", 95 | ka: "georgian", 96 | be: "belarusian", 97 | tg: "tajik", 98 | sd: "sindhi", 99 | gu: "gujarati", 100 | am: "amharic", 101 | yi: "yiddish", 102 | lo: "lao", 103 | uz: "uzbek", 104 | fo: "faroese", 105 | ht: "haitian creole/haitian", 106 | ps: "pashto/pushto", 107 | tk: "turkmen", 108 | nn: "nynorsk", 109 | mt: "maltese", 110 | sa: "sanskrit", 111 | lb: "luxembourgish/letzeburgesch", 112 | my: "myanmar/burmese", 113 | bo: "tibetan", 114 | tl: "tagalog", 115 | mg: "malagasy", 116 | as: "assamese", 117 | tt: "tatar", 118 | haw: "hawaiian", 119 | ln: "lingala", 120 | ha: "hausa", 121 | ba: "bashkir", 122 | jw: "javanese", 123 | su: "sundanese", 124 | }; 125 | 126 | export enum AudioSource { 127 | URL = "URL", 128 | FILE = "FILE", 129 | RECORDING = "RECORDING", 130 | } 131 | 132 | export function AudioManager(props: { transcriber: Transcriber }) { 133 | const [progress, setProgress] = useState(undefined); 134 | const [audioData, setAudioData] = useState< 135 | | { 136 | buffer: AudioBuffer; 137 | url: string; 138 | source: AudioSource; 139 | mimeType: string; 140 | } 141 | | undefined 142 | >(undefined); 143 | const [audioDownloadUrl, setAudioDownloadUrl] = useState< 144 | string | undefined 145 | >(undefined); 146 | 147 | const isAudioLoading = progress !== undefined; 148 | 149 | const resetAudio = () => { 150 | setAudioData(undefined); 151 | setAudioDownloadUrl(undefined); 152 | }; 153 | 154 | const setAudioFromDownload = async ( 155 | data: ArrayBuffer, 156 | mimeType: string, 157 | ) => { 158 | const audioCTX = new AudioContext({ 159 | sampleRate: Constants.SAMPLING_RATE, 160 | }); 161 | const blobUrl = URL.createObjectURL( 162 | new Blob([data], { type: "audio/*" }), 163 | ); 164 | const decoded = await audioCTX.decodeAudioData(data); 165 | setAudioData({ 166 | buffer: decoded, 167 | url: blobUrl, 168 | source: AudioSource.URL, 169 | mimeType: mimeType, 170 | }); 171 | }; 172 | 173 | const setAudioFromRecording = async (data: Blob) => { 174 | resetAudio(); 175 | setProgress(0); 176 | const blobUrl = URL.createObjectURL(data); 177 | const fileReader = new FileReader(); 178 | fileReader.onprogress = (event) => { 179 | setProgress(event.loaded / event.total || 0); 180 | }; 181 | fileReader.onloadend = async () => { 182 | const audioCTX = new AudioContext({ 183 | sampleRate: Constants.SAMPLING_RATE, 184 | }); 185 | const arrayBuffer = fileReader.result as ArrayBuffer; 186 | const decoded = await audioCTX.decodeAudioData(arrayBuffer); 187 | setProgress(undefined); 188 | setAudioData({ 189 | buffer: decoded, 190 | url: blobUrl, 191 | source: AudioSource.RECORDING, 192 | mimeType: data.type, 193 | }); 194 | }; 195 | fileReader.readAsArrayBuffer(data); 196 | }; 197 | 198 | const downloadAudioFromUrl = async ( 199 | requestAbortController: AbortController, 200 | ) => { 201 | if (audioDownloadUrl) { 202 | try { 203 | setAudioData(undefined); 204 | setProgress(0); 205 | const { data, headers } = (await axios.get(audioDownloadUrl, { 206 | signal: requestAbortController.signal, 207 | responseType: "arraybuffer", 208 | onDownloadProgress(progressEvent) { 209 | setProgress(progressEvent.progress || 0); 210 | }, 211 | })) as { 212 | data: ArrayBuffer; 213 | headers: { "content-type": string }; 214 | }; 215 | 216 | let mimeType = headers["content-type"]; 217 | if (!mimeType || mimeType === "audio/wave") { 218 | mimeType = "audio/wav"; 219 | } 220 | setAudioFromDownload(data, mimeType); 221 | } catch (error) { 222 | console.log("Request failed or aborted", error); 223 | } finally { 224 | setProgress(undefined); 225 | } 226 | } 227 | }; 228 | 229 | // When URL changes, download audio 230 | useEffect(() => { 231 | if (audioDownloadUrl) { 232 | const requestAbortController = new AbortController(); 233 | downloadAudioFromUrl(requestAbortController); 234 | return () => { 235 | requestAbortController.abort(); 236 | }; 237 | } 238 | }, [audioDownloadUrl]); 239 | 240 | return ( 241 | <> 242 |
243 |
244 | } 246 | text={"From URL"} 247 | onUrlUpdate={(e) => { 248 | props.transcriber.onInputChange(); 249 | setAudioDownloadUrl(e); 250 | }} 251 | /> 252 | 253 | } 255 | text={"From file"} 256 | onFileUpdate={(decoded, blobUrl, mimeType) => { 257 | props.transcriber.onInputChange(); 258 | setAudioData({ 259 | buffer: decoded, 260 | url: blobUrl, 261 | source: AudioSource.FILE, 262 | mimeType: mimeType, 263 | }); 264 | }} 265 | /> 266 | {navigator.mediaDevices && ( 267 | <> 268 | 269 | } 271 | text={"Record"} 272 | setAudioData={(e) => { 273 | props.transcriber.onInputChange(); 274 | setAudioFromRecording(e); 275 | }} 276 | /> 277 | 278 | )} 279 |
280 | { 281 | 284 | } 285 |
286 | {audioData && ( 287 | <> 288 | 292 | 293 |
294 | { 296 | props.transcriber.start(audioData.buffer); 297 | }} 298 | isModelLoading={props.transcriber.isModelLoading} 299 | // isAudioLoading || 300 | isTranscribing={props.transcriber.isBusy} 301 | /> 302 | 303 | } 307 | /> 308 |
309 | {props.transcriber.progressItems.length > 0 && ( 310 |
311 | 314 | {props.transcriber.progressItems.map((data) => ( 315 |
316 | 320 |
321 | ))} 322 |
323 | )} 324 | 325 | )} 326 | 327 | ); 328 | } 329 | 330 | function SettingsTile(props: { 331 | icon: JSX.Element; 332 | className?: string; 333 | transcriber: Transcriber; 334 | }) { 335 | const [showModal, setShowModal] = useState(false); 336 | 337 | const onClick = () => { 338 | setShowModal(true); 339 | }; 340 | 341 | const onClose = () => { 342 | setShowModal(false); 343 | }; 344 | 345 | const onSubmit = (url: string) => { 346 | onClose(); 347 | }; 348 | 349 | return ( 350 |
351 | 352 | 358 |
359 | ); 360 | } 361 | 362 | function SettingsModal(props: { 363 | show: boolean; 364 | onSubmit: (url: string) => void; 365 | onClose: () => void; 366 | transcriber: Transcriber; 367 | }) { 368 | const names = Object.values(LANGUAGES).map(titleCase); 369 | 370 | const models = { 371 | // Original checkpoints 372 | 'Xenova/whisper-tiny': [41, 152], 373 | 'Xenova/whisper-base': [77, 291], 374 | 'Xenova/whisper-small': [249], 375 | 'Xenova/whisper-medium': [776], 376 | 377 | // Distil Whisper (English-only) 378 | 'distil-whisper/distil-medium.en': [402], 379 | 'distil-whisper/distil-large-v2': [767], 380 | }; 381 | return ( 382 | 387 | 388 | 418 |
419 |
420 | { 425 | props.transcriber.setMultilingual( 426 | e.target.checked, 427 | ); 428 | }} 429 | > 430 | 433 |
434 |
435 | { 440 | props.transcriber.setQuantized( 441 | e.target.checked, 442 | ); 443 | }} 444 | > 445 | 448 |
449 |
450 | {props.transcriber.multilingual && ( 451 | <> 452 | 453 | 468 | 469 | 483 | 484 | )} 485 | 486 | } 487 | onClose={props.onClose} 488 | onSubmit={() => {}} 489 | /> 490 | ); 491 | } 492 | 493 | function VerticalBar() { 494 | return
; 495 | } 496 | 497 | function AudioDataBar(props: { progress: number }) { 498 | return ; 499 | } 500 | 501 | function ProgressBar(props: { progress: string }) { 502 | return ( 503 |
504 |
508 |
509 | ); 510 | } 511 | 512 | function UrlTile(props: { 513 | icon: JSX.Element; 514 | text: string; 515 | onUrlUpdate: (url: string) => void; 516 | }) { 517 | const [showModal, setShowModal] = useState(false); 518 | 519 | const onClick = () => { 520 | setShowModal(true); 521 | }; 522 | 523 | const onClose = () => { 524 | setShowModal(false); 525 | }; 526 | 527 | const onSubmit = (url: string) => { 528 | props.onUrlUpdate(url); 529 | onClose(); 530 | }; 531 | 532 | return ( 533 | <> 534 | 535 | 536 | 537 | ); 538 | } 539 | 540 | function UrlModal(props: { 541 | show: boolean; 542 | onSubmit: (url: string) => void; 543 | onClose: () => void; 544 | }) { 545 | const [url, setUrl] = useState(Constants.DEFAULT_AUDIO_URL); 546 | 547 | const onChange = (event: React.ChangeEvent) => { 548 | setUrl(event.target.value); 549 | }; 550 | 551 | const onSubmit = () => { 552 | props.onSubmit(url); 553 | }; 554 | 555 | return ( 556 | 561 | {"Enter the URL of the audio file you want to load."} 562 | 563 | 564 | } 565 | onClose={props.onClose} 566 | submitText={"Load"} 567 | onSubmit={onSubmit} 568 | /> 569 | ); 570 | } 571 | 572 | function FileTile(props: { 573 | icon: JSX.Element; 574 | text: string; 575 | onFileUpdate: ( 576 | decoded: AudioBuffer, 577 | blobUrl: string, 578 | mimeType: string, 579 | ) => void; 580 | }) { 581 | // const audioPlayer = useRef(null); 582 | 583 | // Create hidden input element 584 | let elem = document.createElement("input"); 585 | elem.type = "file"; 586 | elem.oninput = (event) => { 587 | // Make sure we have files to use 588 | let files = (event.target as HTMLInputElement).files; 589 | if (!files) return; 590 | 591 | // Create a blob that we can use as an src for our audio element 592 | const urlObj = URL.createObjectURL(files[0]); 593 | const mimeType = files[0].type; 594 | 595 | const reader = new FileReader(); 596 | reader.addEventListener("load", async (e) => { 597 | const arrayBuffer = e.target?.result as ArrayBuffer; // Get the ArrayBuffer 598 | if (!arrayBuffer) return; 599 | 600 | const audioCTX = new AudioContext({ 601 | sampleRate: Constants.SAMPLING_RATE, 602 | }); 603 | 604 | const decoded = await audioCTX.decodeAudioData(arrayBuffer); 605 | 606 | props.onFileUpdate(decoded, urlObj, mimeType); 607 | }); 608 | reader.readAsArrayBuffer(files[0]); 609 | 610 | // Reset files 611 | elem.value = ""; 612 | }; 613 | 614 | return ( 615 | <> 616 | elem.click()} 620 | /> 621 | 622 | ); 623 | } 624 | 625 | function RecordTile(props: { 626 | icon: JSX.Element; 627 | text: string; 628 | setAudioData: (data: Blob) => void; 629 | }) { 630 | const [showModal, setShowModal] = useState(false); 631 | 632 | const onClick = () => { 633 | setShowModal(true); 634 | }; 635 | 636 | const onClose = () => { 637 | setShowModal(false); 638 | }; 639 | 640 | const onSubmit = (data: Blob | undefined) => { 641 | if (data) { 642 | props.setAudioData(data); 643 | onClose(); 644 | } 645 | }; 646 | 647 | return ( 648 | <> 649 | 650 | 655 | 656 | ); 657 | } 658 | 659 | function RecordModal(props: { 660 | show: boolean; 661 | onSubmit: (data: Blob | undefined) => void; 662 | onClose: () => void; 663 | }) { 664 | const [audioBlob, setAudioBlob] = useState(); 665 | 666 | const onRecordingComplete = (blob: Blob) => { 667 | setAudioBlob(blob); 668 | }; 669 | 670 | const onSubmit = () => { 671 | props.onSubmit(audioBlob); 672 | setAudioBlob(undefined); 673 | }; 674 | 675 | const onClose = () => { 676 | props.onClose(); 677 | setAudioBlob(undefined); 678 | }; 679 | 680 | return ( 681 | 686 | {"Record audio using your microphone"} 687 | 688 | 689 | } 690 | onClose={onClose} 691 | submitText={"Load"} 692 | submitEnabled={audioBlob !== undefined} 693 | onSubmit={onSubmit} 694 | /> 695 | ); 696 | } 697 | 698 | function Tile(props: { 699 | icon: JSX.Element; 700 | text?: string; 701 | onClick?: () => void; 702 | }) { 703 | return ( 704 | 715 | ); 716 | } 717 | 718 | function AnchorIcon() { 719 | return ( 720 | 727 | 732 | 733 | ); 734 | } 735 | 736 | function FolderIcon() { 737 | return ( 738 | 745 | 750 | 751 | ); 752 | } 753 | 754 | function SettingsIcon() { 755 | return ( 756 | 763 | 768 | 773 | 774 | ); 775 | } 776 | 777 | function MicrophoneIcon() { 778 | return ( 779 | 786 | 791 | 792 | ); 793 | } 794 | -------------------------------------------------------------------------------- /src/components/AudioPlayer.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef } from "react"; 2 | 3 | export default function AudioPlayer(props: { 4 | audioUrl: string; 5 | mimeType: string; 6 | }) { 7 | const audioPlayer = useRef(null); 8 | const audioSource = useRef(null); 9 | 10 | // Updates src when url changes 11 | useEffect(() => { 12 | if (audioPlayer.current && audioSource.current) { 13 | audioSource.current.src = props.audioUrl; 14 | audioPlayer.current.load(); 15 | } 16 | }, [props.audioUrl]); 17 | 18 | return ( 19 |
20 | 27 |
28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /src/components/AudioRecorder.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect, useRef } from "react"; 2 | 3 | import { formatAudioTimestamp } from "../utils/AudioUtils"; 4 | import { webmFixDuration } from "../utils/BlobFix"; 5 | 6 | function getMimeType() { 7 | const types = [ 8 | "audio/webm", 9 | "audio/mp4", 10 | "audio/ogg", 11 | "audio/wav", 12 | "audio/aac", 13 | ]; 14 | for (let i = 0; i < types.length; i++) { 15 | if (MediaRecorder.isTypeSupported(types[i])) { 16 | return types[i]; 17 | } 18 | } 19 | return undefined; 20 | } 21 | 22 | export default function AudioRecorder(props: { 23 | onRecordingComplete: (blob: Blob) => void; 24 | }) { 25 | const [recording, setRecording] = useState(false); 26 | const [duration, setDuration] = useState(0); 27 | const [recordedBlob, setRecordedBlob] = useState(null); 28 | 29 | const streamRef = useRef(null); 30 | const mediaRecorderRef = useRef(null); 31 | const chunksRef = useRef([]); 32 | 33 | const audioRef = useRef(null); 34 | 35 | const startRecording = async () => { 36 | // Reset recording (if any) 37 | setRecordedBlob(null); 38 | 39 | let startTime = Date.now(); 40 | 41 | try { 42 | if (!streamRef.current) { 43 | streamRef.current = await navigator.mediaDevices.getUserMedia({ 44 | audio: true, 45 | }); 46 | } 47 | 48 | const mimeType = getMimeType(); 49 | const mediaRecorder = new MediaRecorder(streamRef.current, { 50 | mimeType, 51 | }); 52 | 53 | mediaRecorderRef.current = mediaRecorder; 54 | 55 | mediaRecorder.addEventListener("dataavailable", async (event) => { 56 | if (event.data.size > 0) { 57 | chunksRef.current.push(event.data); 58 | } 59 | if (mediaRecorder.state === "inactive") { 60 | const duration = Date.now() - startTime; 61 | 62 | // Received a stop event 63 | let blob = new Blob(chunksRef.current, { type: mimeType }); 64 | 65 | if (mimeType === "audio/webm") { 66 | blob = await webmFixDuration(blob, duration, blob.type); 67 | } 68 | 69 | setRecordedBlob(blob); 70 | props.onRecordingComplete(blob); 71 | 72 | chunksRef.current = []; 73 | } 74 | }); 75 | mediaRecorder.start(); 76 | setRecording(true); 77 | } catch (error) { 78 | console.error("Error accessing microphone:", error); 79 | } 80 | }; 81 | 82 | const stopRecording = () => { 83 | if ( 84 | mediaRecorderRef.current && 85 | mediaRecorderRef.current.state === "recording" 86 | ) { 87 | mediaRecorderRef.current.stop(); // set state to inactive 88 | setDuration(0); 89 | setRecording(false); 90 | } 91 | }; 92 | 93 | useEffect(() => { 94 | let stream: MediaStream | null = null; 95 | 96 | if (recording) { 97 | const timer = setInterval(() => { 98 | setDuration((prevDuration) => prevDuration + 1); 99 | }, 1000); 100 | 101 | return () => { 102 | clearInterval(timer); 103 | }; 104 | } 105 | 106 | return () => { 107 | if (stream) { 108 | stream.getTracks().forEach((track) => track.stop()); 109 | } 110 | }; 111 | }, [recording]); 112 | 113 | const handleToggleRecording = () => { 114 | if (recording) { 115 | stopRecording(); 116 | } else { 117 | startRecording(); 118 | } 119 | }; 120 | 121 | return ( 122 |
123 | 136 | 137 | {recordedBlob && ( 138 | 144 | )} 145 |
146 | ); 147 | } 148 | -------------------------------------------------------------------------------- /src/components/Progress.tsx: -------------------------------------------------------------------------------- 1 | export default function Progress({ 2 | text, 3 | percentage, 4 | }: { 5 | text: string; 6 | percentage: number; 7 | }) { 8 | percentage = percentage ?? 0; 9 | return ( 10 |
11 |
15 | {text} ({`${percentage.toFixed(2)}%`}) 16 |
17 |
18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/components/TranscribeButton.tsx: -------------------------------------------------------------------------------- 1 | interface Props extends React.ButtonHTMLAttributes { 2 | isModelLoading: boolean; 3 | isTranscribing: boolean; 4 | } 5 | 6 | export function TranscribeButton(props: Props): JSX.Element { 7 | const { isModelLoading, isTranscribing, onClick, ...buttonProps } = props; 8 | return ( 9 | 27 | ); 28 | } 29 | 30 | export function Spinner(props: { text: string }): JSX.Element { 31 | return ( 32 |
33 | 50 | {props.text} 51 |
52 | ); 53 | } 54 | -------------------------------------------------------------------------------- /src/components/Transcript.tsx: -------------------------------------------------------------------------------- 1 | import { useRef, useEffect } from "react"; 2 | 3 | import { TranscriberData } from "../hooks/useTranscriber"; 4 | import { formatAudioTimestamp } from "../utils/AudioUtils"; 5 | 6 | interface Props { 7 | transcribedData: TranscriberData | undefined; 8 | } 9 | 10 | export default function Transcript({ transcribedData }: Props) { 11 | const divRef = useRef(null); 12 | 13 | const saveBlob = (blob: Blob, filename: string) => { 14 | const url = URL.createObjectURL(blob); 15 | const link = document.createElement("a"); 16 | link.href = url; 17 | link.download = filename; 18 | link.click(); 19 | URL.revokeObjectURL(url); 20 | }; 21 | const exportTXT = () => { 22 | let chunks = transcribedData?.chunks ?? []; 23 | let text = chunks 24 | .map((chunk) => chunk.text) 25 | .join("") 26 | .trim(); 27 | 28 | const blob = new Blob([text], { type: "text/plain" }); 29 | saveBlob(blob, "transcript.txt"); 30 | }; 31 | const exportJSON = () => { 32 | let jsonData = JSON.stringify(transcribedData?.chunks ?? [], null, 2); 33 | 34 | // post-process the JSON to make it more readable 35 | const regex = /( "timestamp": )\[\s+(\S+)\s+(\S+)\s+\]/gm; 36 | jsonData = jsonData.replace(regex, "$1[$2 $3]"); 37 | 38 | const blob = new Blob([jsonData], { type: "application/json" }); 39 | saveBlob(blob, "transcript.json"); 40 | }; 41 | 42 | // Scroll to the bottom when the component updates 43 | useEffect(() => { 44 | if (divRef.current) { 45 | const diff = Math.abs( 46 | divRef.current.offsetHeight + 47 | divRef.current.scrollTop - 48 | divRef.current.scrollHeight, 49 | ); 50 | 51 | if (diff <= 64) { 52 | // We're close enough to the bottom, so scroll to the bottom 53 | divRef.current.scrollTop = divRef.current.scrollHeight; 54 | } 55 | } 56 | }); 57 | 58 | return ( 59 |
63 | {transcribedData?.chunks && 64 | transcribedData.chunks.map((chunk, i) => ( 65 |
69 |
70 | {formatAudioTimestamp(chunk.timestamp[0])} 71 |
72 | {chunk.text} 73 |
74 | ))} 75 | {transcribedData && !transcribedData.isBusy && ( 76 |
77 | 83 | 89 |
90 | )} 91 |
92 | ); 93 | } 94 | -------------------------------------------------------------------------------- /src/components/modal/Modal.tsx: -------------------------------------------------------------------------------- 1 | import { Dialog, Transition } from "@headlessui/react"; 2 | import { Fragment } from "react"; 3 | 4 | export interface Props { 5 | show: boolean; 6 | onClose: () => void; 7 | onSubmit: () => void; 8 | submitText?: string; 9 | submitEnabled?: boolean; 10 | title: string | JSX.Element; 11 | content: string | JSX.Element; 12 | } 13 | 14 | export default function Modal({ 15 | show, 16 | onClose, 17 | onSubmit, 18 | title, 19 | content, 20 | submitText, 21 | submitEnabled = true, 22 | }: Props) { 23 | return ( 24 | 25 | 26 | 35 |
36 | 37 | 38 |
39 |
40 | 49 | 50 | 54 | {title} 55 | 56 |
57 | {content} 58 |
59 | 60 |
61 | {submitText && ( 62 | 78 | )} 79 | 86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | ); 94 | } 95 | -------------------------------------------------------------------------------- /src/components/modal/UrlInput.tsx: -------------------------------------------------------------------------------- 1 | import { DetailedHTMLProps, InputHTMLAttributes } from "react"; 2 | 3 | export function UrlInput( 4 | props: DetailedHTMLProps< 5 | InputHTMLAttributes, 6 | HTMLInputElement 7 | >, 8 | ) { 9 | return ( 10 |
11 | 18 |
19 | ); 20 | } 21 | -------------------------------------------------------------------------------- /src/css/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | html, 6 | body, 7 | #root { 8 | height: 100%; 9 | } 10 | 11 | audio::-webkit-media-controls-panel { 12 | background-color: white; 13 | } 14 | 15 | .container { 16 | width: 41rem /* 656px */; 17 | max-width: 95vw; 18 | } 19 | -------------------------------------------------------------------------------- /src/hooks/useTranscriber.ts: -------------------------------------------------------------------------------- 1 | import { useCallback, useMemo, useState } from "react"; 2 | import { useWorker } from "./useWorker"; 3 | import Constants from "../utils/Constants"; 4 | 5 | interface ProgressItem { 6 | file: string; 7 | loaded: number; 8 | progress: number; 9 | total: number; 10 | name: string; 11 | status: string; 12 | } 13 | 14 | interface TranscriberUpdateData { 15 | data: [ 16 | string, 17 | { chunks: { text: string; timestamp: [number, number | null] }[] }, 18 | ]; 19 | text: string; 20 | } 21 | 22 | interface TranscriberCompleteData { 23 | data: { 24 | text: string; 25 | chunks: { text: string; timestamp: [number, number | null] }[]; 26 | }; 27 | } 28 | 29 | export interface TranscriberData { 30 | isBusy: boolean; 31 | text: string; 32 | chunks: { text: string; timestamp: [number, number | null] }[]; 33 | } 34 | 35 | export interface Transcriber { 36 | onInputChange: () => void; 37 | isBusy: boolean; 38 | isModelLoading: boolean; 39 | progressItems: ProgressItem[]; 40 | start: (audioData: AudioBuffer | undefined) => void; 41 | output?: TranscriberData; 42 | model: string; 43 | setModel: (model: string) => void; 44 | multilingual: boolean; 45 | setMultilingual: (model: boolean) => void; 46 | quantized: boolean; 47 | setQuantized: (model: boolean) => void; 48 | subtask: string; 49 | setSubtask: (subtask: string) => void; 50 | language?: string; 51 | setLanguage: (language: string) => void; 52 | } 53 | 54 | export function useTranscriber(): Transcriber { 55 | const [transcript, setTranscript] = useState( 56 | undefined, 57 | ); 58 | const [isBusy, setIsBusy] = useState(false); 59 | const [isModelLoading, setIsModelLoading] = useState(false); 60 | 61 | const [progressItems, setProgressItems] = useState([]); 62 | 63 | const webWorker = useWorker((event) => { 64 | const message = event.data; 65 | // Update the state with the result 66 | switch (message.status) { 67 | case "progress": 68 | // Model file progress: update one of the progress items. 69 | setProgressItems((prev) => 70 | prev.map((item) => { 71 | if (item.file === message.file) { 72 | return { ...item, progress: message.progress }; 73 | } 74 | return item; 75 | }), 76 | ); 77 | break; 78 | case "update": 79 | // Received partial update 80 | // console.log("update", message); 81 | // eslint-disable-next-line no-case-declarations 82 | const updateMessage = message as TranscriberUpdateData; 83 | setTranscript({ 84 | isBusy: true, 85 | text: updateMessage.data[0], 86 | chunks: updateMessage.data[1].chunks, 87 | }); 88 | break; 89 | case "complete": 90 | // Received complete transcript 91 | // console.log("complete", message); 92 | // eslint-disable-next-line no-case-declarations 93 | const completeMessage = message as TranscriberCompleteData; 94 | setTranscript({ 95 | isBusy: false, 96 | text: completeMessage.data.text, 97 | chunks: completeMessage.data.chunks, 98 | }); 99 | setIsBusy(false); 100 | break; 101 | 102 | case "initiate": 103 | // Model file start load: add a new progress item to the list. 104 | setIsModelLoading(true); 105 | setProgressItems((prev) => [...prev, message]); 106 | break; 107 | case "ready": 108 | setIsModelLoading(false); 109 | break; 110 | case "error": 111 | setIsBusy(false); 112 | alert( 113 | `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`, 114 | ); 115 | break; 116 | case "done": 117 | // Model file loaded: remove the progress item from the list. 118 | setProgressItems((prev) => 119 | prev.filter((item) => item.file !== message.file), 120 | ); 121 | break; 122 | 123 | default: 124 | // initiate/download/done 125 | break; 126 | } 127 | }); 128 | 129 | const [model, setModel] = useState(Constants.DEFAULT_MODEL); 130 | const [subtask, setSubtask] = useState(Constants.DEFAULT_SUBTASK); 131 | const [quantized, setQuantized] = useState( 132 | Constants.DEFAULT_QUANTIZED, 133 | ); 134 | const [multilingual, setMultilingual] = useState( 135 | Constants.DEFAULT_MULTILINGUAL, 136 | ); 137 | const [language, setLanguage] = useState( 138 | Constants.DEFAULT_LANGUAGE, 139 | ); 140 | 141 | const onInputChange = useCallback(() => { 142 | setTranscript(undefined); 143 | }, []); 144 | 145 | const postRequest = useCallback( 146 | async (audioData: AudioBuffer | undefined) => { 147 | if (audioData) { 148 | setTranscript(undefined); 149 | setIsBusy(true); 150 | 151 | let audio; 152 | if (audioData.numberOfChannels === 2) { 153 | const SCALING_FACTOR = Math.sqrt(2); 154 | 155 | let left = audioData.getChannelData(0); 156 | let right = audioData.getChannelData(1); 157 | 158 | audio = new Float32Array(left.length); 159 | for (let i = 0; i < audioData.length; ++i) { 160 | audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2; 161 | } 162 | } else { 163 | // If the audio is not stereo, we can just use the first channel: 164 | audio = audioData.getChannelData(0); 165 | } 166 | 167 | webWorker.postMessage({ 168 | audio, 169 | model, 170 | multilingual, 171 | quantized, 172 | subtask: multilingual ? subtask : null, 173 | language: 174 | multilingual && language !== "auto" ? language : null, 175 | }); 176 | } 177 | }, 178 | [webWorker, model, multilingual, quantized, subtask, language], 179 | ); 180 | 181 | const transcriber = useMemo(() => { 182 | return { 183 | onInputChange, 184 | isBusy, 185 | isModelLoading, 186 | progressItems, 187 | start: postRequest, 188 | output: transcript, 189 | model, 190 | setModel, 191 | multilingual, 192 | setMultilingual, 193 | quantized, 194 | setQuantized, 195 | subtask, 196 | setSubtask, 197 | language, 198 | setLanguage, 199 | }; 200 | }, [ 201 | isBusy, 202 | isModelLoading, 203 | progressItems, 204 | postRequest, 205 | transcript, 206 | model, 207 | multilingual, 208 | quantized, 209 | subtask, 210 | language, 211 | ]); 212 | 213 | return transcriber; 214 | } 215 | -------------------------------------------------------------------------------- /src/hooks/useWorker.ts: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | 3 | export interface MessageEventHandler { 4 | (event: MessageEvent): void; 5 | } 6 | 7 | export function useWorker(messageEventHandler: MessageEventHandler): Worker { 8 | // Create new worker once and never again 9 | const [worker] = useState(() => createWorker(messageEventHandler)); 10 | return worker; 11 | } 12 | 13 | function createWorker(messageEventHandler: MessageEventHandler): Worker { 14 | const worker = new Worker(new URL("../worker.js", import.meta.url), { 15 | type: "module", 16 | }); 17 | // Listen for messages from the Web Worker 18 | worker.addEventListener("message", messageEventHandler); 19 | return worker; 20 | } 21 | -------------------------------------------------------------------------------- /src/index.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom/client"; 3 | import App from "./App"; 4 | import "./css/index.css"; 5 | 6 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render( 7 | 8 | 9 | , 10 | ); 11 | -------------------------------------------------------------------------------- /src/utils/AudioUtils.ts: -------------------------------------------------------------------------------- 1 | function padTime(time: number) { 2 | return String(time).padStart(2, "0"); 3 | } 4 | 5 | export function formatAudioTimestamp(time: number) { 6 | const hours = (time / (60 * 60)) | 0; 7 | time -= hours * (60 * 60); 8 | const minutes = (time / 60) | 0; 9 | time -= minutes * 60; 10 | const seconds = time | 0; 11 | return `${hours ? padTime(hours) + ":" : ""}${padTime(minutes)}:${padTime( 12 | seconds, 13 | )}`; 14 | } 15 | -------------------------------------------------------------------------------- /src/utils/BlobFix.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * There is a bug where `navigator.mediaDevices.getUserMedia` + `MediaRecorder` 3 | * creates WEBM files without duration metadata. See: 4 | * - https://bugs.chromium.org/p/chromium/issues/detail?id=642012 5 | * - https://stackoverflow.com/a/39971175/13989043 6 | * 7 | * This file contains a function that fixes the duration metadata of a WEBM file. 8 | * - Answer found: https://stackoverflow.com/a/75218309/13989043 9 | * - Code adapted from: https://github.com/mat-sz/webm-fix-duration 10 | * (forked from https://github.com/yusitnikov/fix-webm-duration) 11 | */ 12 | 13 | /* 14 | * This is the list of possible WEBM file sections by their IDs. 15 | * Possible types: Container, Binary, Uint, Int, String, Float, Date 16 | */ 17 | interface Section { 18 | name: string; 19 | type: string; 20 | } 21 | 22 | const sections: Record = { 23 | 0xa45dfa3: { name: "EBML", type: "Container" }, 24 | 0x286: { name: "EBMLVersion", type: "Uint" }, 25 | 0x2f7: { name: "EBMLReadVersion", type: "Uint" }, 26 | 0x2f2: { name: "EBMLMaxIDLength", type: "Uint" }, 27 | 0x2f3: { name: "EBMLMaxSizeLength", type: "Uint" }, 28 | 0x282: { name: "DocType", type: "String" }, 29 | 0x287: { name: "DocTypeVersion", type: "Uint" }, 30 | 0x285: { name: "DocTypeReadVersion", type: "Uint" }, 31 | 0x6c: { name: "Void", type: "Binary" }, 32 | 0x3f: { name: "CRC-32", type: "Binary" }, 33 | 0xb538667: { name: "SignatureSlot", type: "Container" }, 34 | 0x3e8a: { name: "SignatureAlgo", type: "Uint" }, 35 | 0x3e9a: { name: "SignatureHash", type: "Uint" }, 36 | 0x3ea5: { name: "SignaturePublicKey", type: "Binary" }, 37 | 0x3eb5: { name: "Signature", type: "Binary" }, 38 | 0x3e5b: { name: "SignatureElements", type: "Container" }, 39 | 0x3e7b: { name: "SignatureElementList", type: "Container" }, 40 | 0x2532: { name: "SignedElement", type: "Binary" }, 41 | 0x8538067: { name: "Segment", type: "Container" }, 42 | 0x14d9b74: { name: "SeekHead", type: "Container" }, 43 | 0xdbb: { name: "Seek", type: "Container" }, 44 | 0x13ab: { name: "SeekID", type: "Binary" }, 45 | 0x13ac: { name: "SeekPosition", type: "Uint" }, 46 | 0x549a966: { name: "Info", type: "Container" }, 47 | 0x33a4: { name: "SegmentUID", type: "Binary" }, 48 | 0x3384: { name: "SegmentFilename", type: "String" }, 49 | 0x1cb923: { name: "PrevUID", type: "Binary" }, 50 | 0x1c83ab: { name: "PrevFilename", type: "String" }, 51 | 0x1eb923: { name: "NextUID", type: "Binary" }, 52 | 0x1e83bb: { name: "NextFilename", type: "String" }, 53 | 0x444: { name: "SegmentFamily", type: "Binary" }, 54 | 0x2924: { name: "ChapterTranslate", type: "Container" }, 55 | 0x29fc: { name: "ChapterTranslateEditionUID", type: "Uint" }, 56 | 0x29bf: { name: "ChapterTranslateCodec", type: "Uint" }, 57 | 0x29a5: { name: "ChapterTranslateID", type: "Binary" }, 58 | 0xad7b1: { name: "TimecodeScale", type: "Uint" }, 59 | 0x489: { name: "Duration", type: "Float" }, 60 | 0x461: { name: "DateUTC", type: "Date" }, 61 | 0x3ba9: { name: "Title", type: "String" }, 62 | 0xd80: { name: "MuxingApp", type: "String" }, 63 | 0x1741: { name: "WritingApp", type: "String" }, 64 | // 0xf43b675: { name: 'Cluster', type: 'Container' }, 65 | 0x67: { name: "Timecode", type: "Uint" }, 66 | 0x1854: { name: "SilentTracks", type: "Container" }, 67 | 0x18d7: { name: "SilentTrackNumber", type: "Uint" }, 68 | 0x27: { name: "Position", type: "Uint" }, 69 | 0x2b: { name: "PrevSize", type: "Uint" }, 70 | 0x23: { name: "SimpleBlock", type: "Binary" }, 71 | 0x20: { name: "BlockGroup", type: "Container" }, 72 | 0x21: { name: "Block", type: "Binary" }, 73 | 0x22: { name: "BlockVirtual", type: "Binary" }, 74 | 0x35a1: { name: "BlockAdditions", type: "Container" }, 75 | 0x26: { name: "BlockMore", type: "Container" }, 76 | 0x6e: { name: "BlockAddID", type: "Uint" }, 77 | 0x25: { name: "BlockAdditional", type: "Binary" }, 78 | 0x1b: { name: "BlockDuration", type: "Uint" }, 79 | 0x7a: { name: "ReferencePriority", type: "Uint" }, 80 | 0x7b: { name: "ReferenceBlock", type: "Int" }, 81 | 0x7d: { name: "ReferenceVirtual", type: "Int" }, 82 | 0x24: { name: "CodecState", type: "Binary" }, 83 | 0x35a2: { name: "DiscardPadding", type: "Int" }, 84 | 0xe: { name: "Slices", type: "Container" }, 85 | 0x68: { name: "TimeSlice", type: "Container" }, 86 | 0x4c: { name: "LaceNumber", type: "Uint" }, 87 | 0x4d: { name: "FrameNumber", type: "Uint" }, 88 | 0x4b: { name: "BlockAdditionID", type: "Uint" }, 89 | 0x4e: { name: "Delay", type: "Uint" }, 90 | 0x4f: { name: "SliceDuration", type: "Uint" }, 91 | 0x48: { name: "ReferenceFrame", type: "Container" }, 92 | 0x49: { name: "ReferenceOffset", type: "Uint" }, 93 | 0x4a: { name: "ReferenceTimeCode", type: "Uint" }, 94 | 0x2f: { name: "EncryptedBlock", type: "Binary" }, 95 | 0x654ae6b: { name: "Tracks", type: "Container" }, 96 | 0x2e: { name: "TrackEntry", type: "Container" }, 97 | 0x57: { name: "TrackNumber", type: "Uint" }, 98 | 0x33c5: { name: "TrackUID", type: "Uint" }, 99 | 0x3: { name: "TrackType", type: "Uint" }, 100 | 0x39: { name: "FlagEnabled", type: "Uint" }, 101 | 0x8: { name: "FlagDefault", type: "Uint" }, 102 | 0x15aa: { name: "FlagForced", type: "Uint" }, 103 | 0x1c: { name: "FlagLacing", type: "Uint" }, 104 | 0x2de7: { name: "MinCache", type: "Uint" }, 105 | 0x2df8: { name: "MaxCache", type: "Uint" }, 106 | 0x3e383: { name: "DefaultDuration", type: "Uint" }, 107 | 0x34e7a: { name: "DefaultDecodedFieldDuration", type: "Uint" }, 108 | 0x3314f: { name: "TrackTimecodeScale", type: "Float" }, 109 | 0x137f: { name: "TrackOffset", type: "Int" }, 110 | 0x15ee: { name: "MaxBlockAdditionID", type: "Uint" }, 111 | 0x136e: { name: "Name", type: "String" }, 112 | 0x2b59c: { name: "Language", type: "String" }, 113 | 0x6: { name: "CodecID", type: "String" }, 114 | 0x23a2: { name: "CodecPrivate", type: "Binary" }, 115 | 0x58688: { name: "CodecName", type: "String" }, 116 | 0x3446: { name: "AttachmentLink", type: "Uint" }, 117 | 0x1a9697: { name: "CodecSettings", type: "String" }, 118 | 0x1b4040: { name: "CodecInfoURL", type: "String" }, 119 | 0x6b240: { name: "CodecDownloadURL", type: "String" }, 120 | 0x2a: { name: "CodecDecodeAll", type: "Uint" }, 121 | 0x2fab: { name: "TrackOverlay", type: "Uint" }, 122 | 0x16aa: { name: "CodecDelay", type: "Uint" }, 123 | 0x16bb: { name: "SeekPreRoll", type: "Uint" }, 124 | 0x2624: { name: "TrackTranslate", type: "Container" }, 125 | 0x26fc: { name: "TrackTranslateEditionUID", type: "Uint" }, 126 | 0x26bf: { name: "TrackTranslateCodec", type: "Uint" }, 127 | 0x26a5: { name: "TrackTranslateTrackID", type: "Binary" }, 128 | 0x60: { name: "Video", type: "Container" }, 129 | 0x1a: { name: "FlagInterlaced", type: "Uint" }, 130 | 0x13b8: { name: "StereoMode", type: "Uint" }, 131 | 0x13c0: { name: "AlphaMode", type: "Uint" }, 132 | 0x13b9: { name: "OldStereoMode", type: "Uint" }, 133 | 0x30: { name: "PixelWidth", type: "Uint" }, 134 | 0x3a: { name: "PixelHeight", type: "Uint" }, 135 | 0x14aa: { name: "PixelCropBottom", type: "Uint" }, 136 | 0x14bb: { name: "PixelCropTop", type: "Uint" }, 137 | 0x14cc: { name: "PixelCropLeft", type: "Uint" }, 138 | 0x14dd: { name: "PixelCropRight", type: "Uint" }, 139 | 0x14b0: { name: "DisplayWidth", type: "Uint" }, 140 | 0x14ba: { name: "DisplayHeight", type: "Uint" }, 141 | 0x14b2: { name: "DisplayUnit", type: "Uint" }, 142 | 0x14b3: { name: "AspectRatioType", type: "Uint" }, 143 | 0xeb524: { name: "ColourSpace", type: "Binary" }, 144 | 0xfb523: { name: "GammaValue", type: "Float" }, 145 | 0x383e3: { name: "FrameRate", type: "Float" }, 146 | 0x61: { name: "Audio", type: "Container" }, 147 | 0x35: { name: "SamplingFrequency", type: "Float" }, 148 | 0x38b5: { name: "OutputSamplingFrequency", type: "Float" }, 149 | 0x1f: { name: "Channels", type: "Uint" }, 150 | 0x3d7b: { name: "ChannelPositions", type: "Binary" }, 151 | 0x2264: { name: "BitDepth", type: "Uint" }, 152 | 0x62: { name: "TrackOperation", type: "Container" }, 153 | 0x63: { name: "TrackCombinePlanes", type: "Container" }, 154 | 0x64: { name: "TrackPlane", type: "Container" }, 155 | 0x65: { name: "TrackPlaneUID", type: "Uint" }, 156 | 0x66: { name: "TrackPlaneType", type: "Uint" }, 157 | 0x69: { name: "TrackJoinBlocks", type: "Container" }, 158 | 0x6d: { name: "TrackJoinUID", type: "Uint" }, 159 | 0x40: { name: "TrickTrackUID", type: "Uint" }, 160 | 0x41: { name: "TrickTrackSegmentUID", type: "Binary" }, 161 | 0x46: { name: "TrickTrackFlag", type: "Uint" }, 162 | 0x47: { name: "TrickMasterTrackUID", type: "Uint" }, 163 | 0x44: { name: "TrickMasterTrackSegmentUID", type: "Binary" }, 164 | 0x2d80: { name: "ContentEncodings", type: "Container" }, 165 | 0x2240: { name: "ContentEncoding", type: "Container" }, 166 | 0x1031: { name: "ContentEncodingOrder", type: "Uint" }, 167 | 0x1032: { name: "ContentEncodingScope", type: "Uint" }, 168 | 0x1033: { name: "ContentEncodingType", type: "Uint" }, 169 | 0x1034: { name: "ContentCompression", type: "Container" }, 170 | 0x254: { name: "ContentCompAlgo", type: "Uint" }, 171 | 0x255: { name: "ContentCompSettings", type: "Binary" }, 172 | 0x1035: { name: "ContentEncryption", type: "Container" }, 173 | 0x7e1: { name: "ContentEncAlgo", type: "Uint" }, 174 | 0x7e2: { name: "ContentEncKeyID", type: "Binary" }, 175 | 0x7e3: { name: "ContentSignature", type: "Binary" }, 176 | 0x7e4: { name: "ContentSigKeyID", type: "Binary" }, 177 | 0x7e5: { name: "ContentSigAlgo", type: "Uint" }, 178 | 0x7e6: { name: "ContentSigHashAlgo", type: "Uint" }, 179 | 0xc53bb6b: { name: "Cues", type: "Container" }, 180 | 0x3b: { name: "CuePoint", type: "Container" }, 181 | 0x33: { name: "CueTime", type: "Uint" }, 182 | 0x37: { name: "CueTrackPositions", type: "Container" }, 183 | 0x77: { name: "CueTrack", type: "Uint" }, 184 | 0x71: { name: "CueClusterPosition", type: "Uint" }, 185 | 0x70: { name: "CueRelativePosition", type: "Uint" }, 186 | 0x32: { name: "CueDuration", type: "Uint" }, 187 | 0x1378: { name: "CueBlockNumber", type: "Uint" }, 188 | 0x6a: { name: "CueCodecState", type: "Uint" }, 189 | 0x5b: { name: "CueReference", type: "Container" }, 190 | 0x16: { name: "CueRefTime", type: "Uint" }, 191 | 0x17: { name: "CueRefCluster", type: "Uint" }, 192 | 0x135f: { name: "CueRefNumber", type: "Uint" }, 193 | 0x6b: { name: "CueRefCodecState", type: "Uint" }, 194 | 0x941a469: { name: "Attachments", type: "Container" }, 195 | 0x21a7: { name: "AttachedFile", type: "Container" }, 196 | 0x67e: { name: "FileDescription", type: "String" }, 197 | 0x66e: { name: "FileName", type: "String" }, 198 | 0x660: { name: "FileMimeType", type: "String" }, 199 | 0x65c: { name: "FileData", type: "Binary" }, 200 | 0x6ae: { name: "FileUID", type: "Uint" }, 201 | 0x675: { name: "FileReferral", type: "Binary" }, 202 | 0x661: { name: "FileUsedStartTime", type: "Uint" }, 203 | 0x662: { name: "FileUsedEndTime", type: "Uint" }, 204 | 0x43a770: { name: "Chapters", type: "Container" }, 205 | 0x5b9: { name: "EditionEntry", type: "Container" }, 206 | 0x5bc: { name: "EditionUID", type: "Uint" }, 207 | 0x5bd: { name: "EditionFlagHidden", type: "Uint" }, 208 | 0x5db: { name: "EditionFlagDefault", type: "Uint" }, 209 | 0x5dd: { name: "EditionFlagOrdered", type: "Uint" }, 210 | 0x36: { name: "ChapterAtom", type: "Container" }, 211 | 0x33c4: { name: "ChapterUID", type: "Uint" }, 212 | 0x1654: { name: "ChapterStringUID", type: "String" }, 213 | 0x11: { name: "ChapterTimeStart", type: "Uint" }, 214 | 0x12: { name: "ChapterTimeEnd", type: "Uint" }, 215 | 0x18: { name: "ChapterFlagHidden", type: "Uint" }, 216 | 0x598: { name: "ChapterFlagEnabled", type: "Uint" }, 217 | 0x2e67: { name: "ChapterSegmentUID", type: "Binary" }, 218 | 0x2ebc: { name: "ChapterSegmentEditionUID", type: "Uint" }, 219 | 0x23c3: { name: "ChapterPhysicalEquiv", type: "Uint" }, 220 | 0xf: { name: "ChapterTrack", type: "Container" }, 221 | 0x9: { name: "ChapterTrackNumber", type: "Uint" }, 222 | 0x0: { name: "ChapterDisplay", type: "Container" }, 223 | 0x5: { name: "ChapString", type: "String" }, 224 | 0x37c: { name: "ChapLanguage", type: "String" }, 225 | 0x37e: { name: "ChapCountry", type: "String" }, 226 | 0x2944: { name: "ChapProcess", type: "Container" }, 227 | 0x2955: { name: "ChapProcessCodecID", type: "Uint" }, 228 | 0x50d: { name: "ChapProcessPrivate", type: "Binary" }, 229 | 0x2911: { name: "ChapProcessCommand", type: "Container" }, 230 | 0x2922: { name: "ChapProcessTime", type: "Uint" }, 231 | 0x2933: { name: "ChapProcessData", type: "Binary" }, 232 | 0x254c367: { name: "Tags", type: "Container" }, 233 | 0x3373: { name: "Tag", type: "Container" }, 234 | 0x23c0: { name: "Targets", type: "Container" }, 235 | 0x28ca: { name: "TargetTypeValue", type: "Uint" }, 236 | 0x23ca: { name: "TargetType", type: "String" }, 237 | 0x23c5: { name: "TagTrackUID", type: "Uint" }, 238 | 0x23c9: { name: "TagEditionUID", type: "Uint" }, 239 | 0x23c4: { name: "TagChapterUID", type: "Uint" }, 240 | 0x23c6: { name: "TagAttachmentUID", type: "Uint" }, 241 | 0x27c8: { name: "SimpleTag", type: "Container" }, 242 | 0x5a3: { name: "TagName", type: "String" }, 243 | 0x47a: { name: "TagLanguage", type: "String" }, 244 | 0x484: { name: "TagDefault", type: "Uint" }, 245 | 0x487: { name: "TagString", type: "String" }, 246 | 0x485: { name: "TagBinary", type: "Binary" }, 247 | }; 248 | 249 | class WebmBase { 250 | source?: Uint8Array; 251 | data?: T; 252 | 253 | constructor(private name = "Unknown", private type = "Unknown") {} 254 | 255 | updateBySource() {} 256 | 257 | setSource(source: Uint8Array) { 258 | this.source = source; 259 | this.updateBySource(); 260 | } 261 | 262 | updateByData() {} 263 | 264 | setData(data: T) { 265 | this.data = data; 266 | this.updateByData(); 267 | } 268 | } 269 | 270 | class WebmUint extends WebmBase { 271 | constructor(name: string, type: string) { 272 | super(name, type || "Uint"); 273 | } 274 | 275 | updateBySource() { 276 | // use hex representation of a number instead of number value 277 | this.data = ""; 278 | for (let i = 0; i < this.source!.length; i++) { 279 | const hex = this.source![i].toString(16); 280 | this.data += padHex(hex); 281 | } 282 | } 283 | 284 | updateByData() { 285 | const length = this.data!.length / 2; 286 | this.source = new Uint8Array(length); 287 | for (let i = 0; i < length; i++) { 288 | const hex = this.data!.substr(i * 2, 2); 289 | this.source[i] = parseInt(hex, 16); 290 | } 291 | } 292 | 293 | getValue() { 294 | return parseInt(this.data!, 16); 295 | } 296 | 297 | setValue(value: number) { 298 | this.setData(padHex(value.toString(16))); 299 | } 300 | } 301 | 302 | function padHex(hex: string) { 303 | return hex.length % 2 === 1 ? "0" + hex : hex; 304 | } 305 | 306 | class WebmFloat extends WebmBase { 307 | constructor(name: string, type: string) { 308 | super(name, type || "Float"); 309 | } 310 | 311 | getFloatArrayType() { 312 | return this.source && this.source.length === 4 313 | ? Float32Array 314 | : Float64Array; 315 | } 316 | updateBySource() { 317 | const byteArray = this.source!.reverse(); 318 | const floatArrayType = this.getFloatArrayType(); 319 | const floatArray = new floatArrayType(byteArray.buffer); 320 | this.data! = floatArray[0]; 321 | } 322 | updateByData() { 323 | const floatArrayType = this.getFloatArrayType(); 324 | const floatArray = new floatArrayType([this.data!]); 325 | const byteArray = new Uint8Array(floatArray.buffer); 326 | this.source = byteArray.reverse(); 327 | } 328 | getValue() { 329 | return this.data; 330 | } 331 | setValue(value: number) { 332 | this.setData(value); 333 | } 334 | } 335 | 336 | interface ContainerData { 337 | id: number; 338 | idHex?: string; 339 | data: WebmBase; 340 | } 341 | 342 | class WebmContainer extends WebmBase { 343 | offset: number = 0; 344 | data: ContainerData[] = []; 345 | 346 | constructor(name: string, type: string) { 347 | super(name, type || "Container"); 348 | } 349 | 350 | readByte() { 351 | return this.source![this.offset++]; 352 | } 353 | readUint() { 354 | const firstByte = this.readByte(); 355 | const bytes = 8 - firstByte.toString(2).length; 356 | let value = firstByte - (1 << (7 - bytes)); 357 | for (let i = 0; i < bytes; i++) { 358 | // don't use bit operators to support x86 359 | value *= 256; 360 | value += this.readByte(); 361 | } 362 | return value; 363 | } 364 | updateBySource() { 365 | let end: number | undefined = undefined; 366 | this.data = []; 367 | for ( 368 | this.offset = 0; 369 | this.offset < this.source!.length; 370 | this.offset = end 371 | ) { 372 | const id = this.readUint(); 373 | const len = this.readUint(); 374 | end = Math.min(this.offset + len, this.source!.length); 375 | const data = this.source!.slice(this.offset, end); 376 | 377 | const info = sections[id] || { name: "Unknown", type: "Unknown" }; 378 | let ctr: any = WebmBase; 379 | switch (info.type) { 380 | case "Container": 381 | ctr = WebmContainer; 382 | break; 383 | case "Uint": 384 | ctr = WebmUint; 385 | break; 386 | case "Float": 387 | ctr = WebmFloat; 388 | break; 389 | } 390 | const section = new ctr(info.name, info.type); 391 | section.setSource(data); 392 | this.data.push({ 393 | id: id, 394 | idHex: id.toString(16), 395 | data: section, 396 | }); 397 | } 398 | } 399 | writeUint(x: number, draft = false) { 400 | for ( 401 | var bytes = 1, flag = 0x80; 402 | x >= flag && bytes < 8; 403 | bytes++, flag *= 0x80 404 | ) {} 405 | 406 | if (!draft) { 407 | let value = flag + x; 408 | for (let i = bytes - 1; i >= 0; i--) { 409 | // don't use bit operators to support x86 410 | const c = value % 256; 411 | this.source![this.offset! + i] = c; 412 | value = (value - c) / 256; 413 | } 414 | } 415 | 416 | this.offset += bytes; 417 | } 418 | 419 | writeSections(draft = false) { 420 | this.offset = 0; 421 | for (let i = 0; i < this.data.length; i++) { 422 | const section = this.data[i], 423 | content = section.data.source, 424 | contentLength = content!.length; 425 | this.writeUint(section.id, draft); 426 | this.writeUint(contentLength, draft); 427 | if (!draft) { 428 | this.source!.set(content!, this.offset); 429 | } 430 | this.offset += contentLength; 431 | } 432 | return this.offset; 433 | } 434 | 435 | updateByData() { 436 | // run without accessing this.source to determine total length - need to know it to create Uint8Array 437 | const length = this.writeSections(true); 438 | this.source = new Uint8Array(length); 439 | // now really write data 440 | this.writeSections(); 441 | } 442 | 443 | getSectionById(id: number) { 444 | for (let i = 0; i < this.data.length; i++) { 445 | const section = this.data[i]; 446 | if (section.id === id) { 447 | return section.data; 448 | } 449 | } 450 | 451 | return undefined; 452 | } 453 | } 454 | 455 | class WebmFile extends WebmContainer { 456 | constructor(source: Uint8Array) { 457 | super("File", "File"); 458 | this.setSource(source); 459 | } 460 | 461 | fixDuration(duration: number) { 462 | const segmentSection = this.getSectionById(0x8538067) as WebmContainer; 463 | if (!segmentSection) { 464 | return false; 465 | } 466 | 467 | const infoSection = segmentSection.getSectionById( 468 | 0x549a966, 469 | ) as WebmContainer; 470 | if (!infoSection) { 471 | return false; 472 | } 473 | 474 | const timeScaleSection = infoSection.getSectionById( 475 | 0xad7b1, 476 | ) as WebmFloat; 477 | if (!timeScaleSection) { 478 | return false; 479 | } 480 | 481 | let durationSection = infoSection.getSectionById(0x489) as WebmFloat; 482 | if (durationSection) { 483 | if (durationSection.getValue()! <= 0) { 484 | durationSection.setValue(duration); 485 | } else { 486 | return false; 487 | } 488 | } else { 489 | // append Duration section 490 | durationSection = new WebmFloat("Duration", "Float"); 491 | durationSection.setValue(duration); 492 | infoSection.data.push({ 493 | id: 0x489, 494 | data: durationSection, 495 | }); 496 | } 497 | 498 | // set default time scale to 1 millisecond (1000000 nanoseconds) 499 | timeScaleSection.setValue(1000000); 500 | infoSection.updateByData(); 501 | segmentSection.updateByData(); 502 | this.updateByData(); 503 | 504 | return true; 505 | } 506 | 507 | toBlob(type = "video/webm") { 508 | return new Blob([this.source!.buffer], { type }); 509 | } 510 | } 511 | 512 | /** 513 | * Fixes duration on MediaRecorder output. 514 | * @param blob Input Blob with incorrect duration. 515 | * @param duration Correct duration (in milliseconds). 516 | * @param type Output blob mimetype (default: video/webm). 517 | * @returns 518 | */ 519 | export const webmFixDuration = ( 520 | blob: Blob, 521 | duration: number, 522 | type = "video/webm", 523 | ): Promise => { 524 | return new Promise((resolve, reject) => { 525 | try { 526 | const reader = new FileReader(); 527 | 528 | reader.addEventListener("loadend", () => { 529 | try { 530 | const result = reader.result as ArrayBuffer; 531 | const file = new WebmFile(new Uint8Array(result)); 532 | if (file.fixDuration(duration)) { 533 | resolve(file.toBlob(type)); 534 | } else { 535 | resolve(blob); 536 | } 537 | } catch (ex) { 538 | reject(ex); 539 | } 540 | }); 541 | 542 | reader.addEventListener("error", () => reject()); 543 | 544 | reader.readAsArrayBuffer(blob); 545 | } catch (ex) { 546 | reject(ex); 547 | } 548 | }); 549 | }; 550 | -------------------------------------------------------------------------------- /src/utils/Constants.ts: -------------------------------------------------------------------------------- 1 | function mobileTabletCheck() { 2 | // https://stackoverflow.com/questions/11381673/detecting-a-mobile-browser 3 | let check = false; 4 | (function (a: string) { 5 | if ( 6 | /(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino|android|ipad|playbook|silk/i.test( 7 | a, 8 | ) || 9 | /1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test( 10 | a.substr(0, 4), 11 | ) 12 | ) 13 | check = true; 14 | })( 15 | navigator.userAgent || 16 | navigator.vendor || 17 | ("opera" in window && typeof window.opera === "string" 18 | ? window.opera 19 | : ""), 20 | ); 21 | return check; 22 | } 23 | const isMobileOrTablet = mobileTabletCheck(); 24 | export default { 25 | SAMPLING_RATE: 16000, 26 | DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${ 27 | isMobileOrTablet ? "jfk" : "ted_60_16k" 28 | }.wav`, 29 | DEFAULT_MODEL: "Xenova/whisper-tiny", 30 | DEFAULT_SUBTASK: "transcribe", 31 | DEFAULT_LANGUAGE: "english", 32 | DEFAULT_QUANTIZED: isMobileOrTablet, 33 | DEFAULT_MULTILINGUAL: false, 34 | }; 35 | -------------------------------------------------------------------------------- /src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line spaced-comment 2 | /// 3 | -------------------------------------------------------------------------------- /src/worker.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable camelcase */ 2 | import { pipeline, env } from "@xenova/transformers"; 3 | 4 | // Disable local models 5 | env.allowLocalModels = false; 6 | 7 | // Define model factories 8 | // Ensures only one model is created of each type 9 | class PipelineFactory { 10 | static task = null; 11 | static model = null; 12 | static quantized = null; 13 | static instance = null; 14 | 15 | constructor(tokenizer, model, quantized) { 16 | this.tokenizer = tokenizer; 17 | this.model = model; 18 | this.quantized = quantized; 19 | } 20 | 21 | static async getInstance(progress_callback = null) { 22 | if (this.instance === null) { 23 | this.instance = pipeline(this.task, this.model, { 24 | quantized: this.quantized, 25 | progress_callback, 26 | 27 | // For medium models, we need to load the `no_attentions` revision to avoid running out of memory 28 | revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main" 29 | }); 30 | } 31 | 32 | return this.instance; 33 | } 34 | } 35 | 36 | self.addEventListener("message", async (event) => { 37 | const message = event.data; 38 | 39 | // Do some work... 40 | // TODO use message data 41 | let transcript = await transcribe( 42 | message.audio, 43 | message.model, 44 | message.multilingual, 45 | message.quantized, 46 | message.subtask, 47 | message.language, 48 | ); 49 | if (transcript === null) return; 50 | 51 | // Send the result back to the main thread 52 | self.postMessage({ 53 | status: "complete", 54 | task: "automatic-speech-recognition", 55 | data: transcript, 56 | }); 57 | }); 58 | 59 | class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory { 60 | static task = "automatic-speech-recognition"; 61 | static model = null; 62 | static quantized = null; 63 | } 64 | 65 | const transcribe = async ( 66 | audio, 67 | model, 68 | multilingual, 69 | quantized, 70 | subtask, 71 | language, 72 | ) => { 73 | 74 | const isDistilWhisper = model.startsWith("distil-whisper/"); 75 | 76 | let modelName = model; 77 | if (!isDistilWhisper && !multilingual) { 78 | modelName += ".en" 79 | } 80 | 81 | const p = AutomaticSpeechRecognitionPipelineFactory; 82 | if (p.model !== modelName || p.quantized !== quantized) { 83 | // Invalidate model if different 84 | p.model = modelName; 85 | p.quantized = quantized; 86 | 87 | if (p.instance !== null) { 88 | (await p.getInstance()).dispose(); 89 | p.instance = null; 90 | } 91 | } 92 | 93 | // Load transcriber model 94 | let transcriber = await p.getInstance((data) => { 95 | self.postMessage(data); 96 | }); 97 | 98 | const time_precision = 99 | transcriber.processor.feature_extractor.config.chunk_length / 100 | transcriber.model.config.max_source_positions; 101 | 102 | // Storage for chunks to be processed. Initialise with an empty chunk. 103 | let chunks_to_process = [ 104 | { 105 | tokens: [], 106 | finalised: false, 107 | }, 108 | ]; 109 | 110 | // TODO: Storage for fully-processed and merged chunks 111 | // let decoded_chunks = []; 112 | 113 | function chunk_callback(chunk) { 114 | let last = chunks_to_process[chunks_to_process.length - 1]; 115 | 116 | // Overwrite last chunk with new info 117 | Object.assign(last, chunk); 118 | last.finalised = true; 119 | 120 | // Create an empty chunk after, if it not the last chunk 121 | if (!chunk.is_last) { 122 | chunks_to_process.push({ 123 | tokens: [], 124 | finalised: false, 125 | }); 126 | } 127 | } 128 | 129 | // Inject custom callback function to handle merging of chunks 130 | function callback_function(item) { 131 | let last = chunks_to_process[chunks_to_process.length - 1]; 132 | 133 | // Update tokens of last chunk 134 | last.tokens = [...item[0].output_token_ids]; 135 | 136 | // Merge text chunks 137 | // TODO optimise so we don't have to decode all chunks every time 138 | let data = transcriber.tokenizer._decode_asr(chunks_to_process, { 139 | time_precision: time_precision, 140 | return_timestamps: true, 141 | force_full_sequences: false, 142 | }); 143 | 144 | self.postMessage({ 145 | status: "update", 146 | task: "automatic-speech-recognition", 147 | data: data, 148 | }); 149 | } 150 | 151 | // Actually run transcription 152 | let output = await transcriber(audio, { 153 | // Greedy 154 | top_k: 0, 155 | do_sample: false, 156 | 157 | // Sliding window 158 | chunk_length_s: isDistilWhisper ? 20 : 30, 159 | stride_length_s: isDistilWhisper ? 3 : 5, 160 | 161 | // Language and task 162 | language: language, 163 | task: subtask, 164 | 165 | // Return timestamps 166 | return_timestamps: true, 167 | force_full_sequences: false, 168 | 169 | // Callback functions 170 | callback_function: callback_function, // after each generation step 171 | chunk_callback: chunk_callback, // after each chunk is processed 172 | }).catch((error) => { 173 | self.postMessage({ 174 | status: "error", 175 | task: "automatic-speech-recognition", 176 | data: error, 177 | }); 178 | return null; 179 | }); 180 | 181 | return output; 182 | }; 183 | -------------------------------------------------------------------------------- /tailwind.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./index.html", 5 | "./src/**/*.{js,ts,jsx,tsx}", 6 | ], 7 | theme: { 8 | extend: {}, 9 | }, 10 | plugins: [], 11 | } 12 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "useDefineForClassFields": true, 5 | "lib": ["DOM", "DOM.Iterable", "ESNext"], 6 | "allowJs": false, 7 | "skipLibCheck": true, 8 | "esModuleInterop": false, 9 | "allowSyntheticDefaultImports": true, 10 | "strict": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "module": "ESNext", 13 | "moduleResolution": "Node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "noEmit": true, 17 | "jsx": "react-jsx" 18 | }, 19 | "include": ["src"], 20 | "references": [{ "path": "./tsconfig.node.json" }] 21 | } 22 | -------------------------------------------------------------------------------- /tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "module": "ESNext", 5 | "moduleResolution": "Node", 6 | "allowSyntheticDefaultImports": true 7 | }, 8 | "include": ["vite.config.ts"] 9 | } 10 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [ 8 | react() 9 | ], 10 | }) 11 | --------------------------------------------------------------------------------