├── .eslintignore
├── .eslintrc
├── .gitignore
├── .prettierrc
├── LICENSE
├── README.md
├── index.html
├── package-lock.json
├── package.json
├── postcss.config.cjs
├── public
└── vite.svg
├── src
├── App.tsx
├── assets
│ └── react.svg
├── components
│ ├── AudioManager.tsx
│ ├── AudioPlayer.tsx
│ ├── AudioRecorder.tsx
│ ├── Progress.tsx
│ ├── TranscribeButton.tsx
│ ├── Transcript.tsx
│ └── modal
│ │ ├── Modal.tsx
│ │ └── UrlInput.tsx
├── css
│ └── index.css
├── hooks
│ ├── useTranscriber.ts
│ └── useWorker.ts
├── index.tsx
├── utils
│ ├── AudioUtils.ts
│ ├── BlobFix.ts
│ └── Constants.ts
├── vite-env.d.ts
└── worker.js
├── tailwind.config.cjs
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "root": true,
3 | "env": {
4 | "browser": true,
5 | "es2021": true
6 | },
7 | "parser": "@typescript-eslint/parser",
8 | "extends": [
9 | "eslint:recommended",
10 | "plugin:react/recommended",
11 | "plugin:@typescript-eslint/recommended",
12 | "plugin:@typescript-eslint/eslint-recommended",
13 | "prettier"
14 | ],
15 | "overrides": [],
16 | "parserOptions": {
17 | "ecmaFeatures": {
18 | "jsx": true
19 | },
20 | "ecmaVersion": "latest",
21 | "sourceType": "module"
22 | },
23 | "plugins": [
24 | "react",
25 | "react-hooks",
26 | "@typescript-eslint",
27 | "prettier"
28 | ],
29 | "rules": {
30 | "react/react-in-jsx-scope": "off",
31 | "camelcase": "error",
32 | "spaced-comment": "error",
33 | "no-duplicate-imports": "error",
34 | "prettier/prettier": "error"
35 | },
36 | "settings": {
37 | "react": {
38 | "version": "detect"
39 | }
40 | },
41 | "prettier/prettier": [
42 | "error",
43 | {
44 | "endOfLine": "auto"
45 | }
46 | ]
47 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 |
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "semi": true,
3 | "tabWidth": 4,
4 | "printWidth": 80,
5 | "singleQuote": false,
6 | "trailingComma": "all",
7 | "jsxSingleQuote": true,
8 | "bracketSpacing": true,
9 | "endOfLine":"auto"
10 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Xenova
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Whisper Web
2 |
3 | ML-powered speech recognition directly in your browser! Built with [🤗 Transformers.js](https://github.com/xenova/transformers.js).
4 |
5 | Check out the demo site [here](https://huggingface.co/spaces/Xenova/whisper-web).
6 |
7 | > [!IMPORTANT]
8 | > Experimental WebGPU support has been added to [this branch](https://github.com/xenova/whisper-web/tree/experimental-webgpu) ([demo](https://huggingface.co/spaces/Xenova/whisper-webgpu)), if you'd like to run with GPU acceleration!
9 |
10 | https://github.com/xenova/whisper-web/assets/26504141/fb170d84-9678-41b5-9248-a112ecc74c27
11 |
12 | ## Running locally
13 |
14 | 1. Clone the repo and install dependencies:
15 |
16 | ```bash
17 | git clone https://github.com/xenova/whisper-web.git
18 | cd whisper-web
19 | npm install
20 | ```
21 |
22 | 2. Run the development server:
23 |
24 | ```bash
25 | npm run dev
26 | ```
27 | > Firefox users need to change the `dom.workers.modules.enabled` setting in `about:config` to `true` to enable Web Workers.
28 | > Check out [this issue](https://github.com/xenova/whisper-web/issues/8) for more details.
29 |
30 | 3. Open the link (e.g., [http://localhost:5173/](http://localhost:5173/)) in your browser.
31 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Whisper Web
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "whisper-web",
3 | "private": true,
4 | "version": "0.0.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "clean": "rm -rf node_modules/ dist/",
9 | "build": "tsc && vite build",
10 | "preview": "vite preview",
11 | "lint": "eslint src/**/*.{js,jsx,ts,tsx,json}",
12 | "lint:fix": "eslint --fix src/**/*.{js,jsx,ts,tsx,json}",
13 | "format": "prettier --write src/**/*.{js,jsx,ts,tsx,css,md,json} --config ./.prettierrc",
14 | "tsc": "tsc"
15 | },
16 | "dependencies": {
17 | "@headlessui/react": "^1.7.13",
18 | "@xenova/transformers": "^2.7.0",
19 | "axios": "^1.3.4",
20 | "react": "^18.2.0",
21 | "react-dom": "^18.2.0"
22 | },
23 | "devDependencies": {
24 | "@types/react": "^18.0.28",
25 | "@types/react-dom": "^18.0.11",
26 | "@typescript-eslint/eslint-plugin": "^5.57.0",
27 | "@typescript-eslint/parser": "^5.57.0",
28 | "@vitejs/plugin-react": "^3.1.0",
29 | "autoprefixer": "^10.4.14",
30 | "eslint": "^8.37.0",
31 | "eslint-config-prettier": "^8.8.0",
32 | "eslint-config-standard-with-typescript": "^34.0.1",
33 | "eslint-plugin-import": "^2.27.5",
34 | "eslint-plugin-n": "^15.7.0",
35 | "eslint-plugin-prettier": "^4.2.1",
36 | "eslint-plugin-promise": "^6.1.1",
37 | "eslint-plugin-react": "^7.32.2",
38 | "eslint-plugin-react-hooks": "^4.6.0",
39 | "postcss": "^8.4.21",
40 | "prettier": "^2.8.7",
41 | "tailwindcss": "^3.2.7",
42 | "typescript": "^4.9.5",
43 | "vite": "^4.2.0"
44 | },
45 | "overrides": {
46 | "semver": "^7.5.3",
47 | "protobufjs": "^7.2.4"
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | }
7 |
--------------------------------------------------------------------------------
/public/vite.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/App.tsx:
--------------------------------------------------------------------------------
1 | import { AudioManager } from "./components/AudioManager";
2 | import Transcript from "./components/Transcript";
3 | import { useTranscriber } from "./hooks/useTranscriber";
4 |
5 | function App() {
6 | const transcriber = useTranscriber();
7 |
8 | return (
9 |
10 |
11 |
12 | Whisper Web
13 |
14 |
15 | ML-powered speech recognition directly in your browser
16 |
17 |
18 |
19 |
20 |
21 |
30 |
31 | );
32 | }
33 |
34 | export default App;
35 |
--------------------------------------------------------------------------------
/src/assets/react.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/components/AudioManager.tsx:
--------------------------------------------------------------------------------
1 | import React, { useCallback, useEffect, useState } from "react";
2 | import axios from "axios";
3 | import Modal from "./modal/Modal";
4 | import { UrlInput } from "./modal/UrlInput";
5 | import AudioPlayer from "./AudioPlayer";
6 | import { TranscribeButton } from "./TranscribeButton";
7 | import Constants from "../utils/Constants";
8 | import { Transcriber } from "../hooks/useTranscriber";
9 | import Progress from "./Progress";
10 | import AudioRecorder from "./AudioRecorder";
11 |
12 | function titleCase(str: string) {
13 | str = str.toLowerCase();
14 | return (str.match(/\w+.?/g) || [])
15 | .map((word) => {
16 | return word.charAt(0).toUpperCase() + word.slice(1);
17 | })
18 | .join("");
19 | }
20 |
21 | // List of supported languages:
22 | // https://help.openai.com/en/articles/7031512-whisper-api-faq
23 | // https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79
24 | const LANGUAGES = {
25 | en: "english",
26 | zh: "chinese",
27 | de: "german",
28 | es: "spanish/castilian",
29 | ru: "russian",
30 | ko: "korean",
31 | fr: "french",
32 | ja: "japanese",
33 | pt: "portuguese",
34 | tr: "turkish",
35 | pl: "polish",
36 | ca: "catalan/valencian",
37 | nl: "dutch/flemish",
38 | ar: "arabic",
39 | sv: "swedish",
40 | it: "italian",
41 | id: "indonesian",
42 | hi: "hindi",
43 | fi: "finnish",
44 | vi: "vietnamese",
45 | he: "hebrew",
46 | uk: "ukrainian",
47 | el: "greek",
48 | ms: "malay",
49 | cs: "czech",
50 | ro: "romanian/moldavian/moldovan",
51 | da: "danish",
52 | hu: "hungarian",
53 | ta: "tamil",
54 | no: "norwegian",
55 | th: "thai",
56 | ur: "urdu",
57 | hr: "croatian",
58 | bg: "bulgarian",
59 | lt: "lithuanian",
60 | la: "latin",
61 | mi: "maori",
62 | ml: "malayalam",
63 | cy: "welsh",
64 | sk: "slovak",
65 | te: "telugu",
66 | fa: "persian",
67 | lv: "latvian",
68 | bn: "bengali",
69 | sr: "serbian",
70 | az: "azerbaijani",
71 | sl: "slovenian",
72 | kn: "kannada",
73 | et: "estonian",
74 | mk: "macedonian",
75 | br: "breton",
76 | eu: "basque",
77 | is: "icelandic",
78 | hy: "armenian",
79 | ne: "nepali",
80 | mn: "mongolian",
81 | bs: "bosnian",
82 | kk: "kazakh",
83 | sq: "albanian",
84 | sw: "swahili",
85 | gl: "galician",
86 | mr: "marathi",
87 | pa: "punjabi/panjabi",
88 | si: "sinhala/sinhalese",
89 | km: "khmer",
90 | sn: "shona",
91 | yo: "yoruba",
92 | so: "somali",
93 | af: "afrikaans",
94 | oc: "occitan",
95 | ka: "georgian",
96 | be: "belarusian",
97 | tg: "tajik",
98 | sd: "sindhi",
99 | gu: "gujarati",
100 | am: "amharic",
101 | yi: "yiddish",
102 | lo: "lao",
103 | uz: "uzbek",
104 | fo: "faroese",
105 | ht: "haitian creole/haitian",
106 | ps: "pashto/pushto",
107 | tk: "turkmen",
108 | nn: "nynorsk",
109 | mt: "maltese",
110 | sa: "sanskrit",
111 | lb: "luxembourgish/letzeburgesch",
112 | my: "myanmar/burmese",
113 | bo: "tibetan",
114 | tl: "tagalog",
115 | mg: "malagasy",
116 | as: "assamese",
117 | tt: "tatar",
118 | haw: "hawaiian",
119 | ln: "lingala",
120 | ha: "hausa",
121 | ba: "bashkir",
122 | jw: "javanese",
123 | su: "sundanese",
124 | };
125 |
126 | export enum AudioSource {
127 | URL = "URL",
128 | FILE = "FILE",
129 | RECORDING = "RECORDING",
130 | }
131 |
132 | export function AudioManager(props: { transcriber: Transcriber }) {
133 | const [progress, setProgress] = useState(undefined);
134 | const [audioData, setAudioData] = useState<
135 | | {
136 | buffer: AudioBuffer;
137 | url: string;
138 | source: AudioSource;
139 | mimeType: string;
140 | }
141 | | undefined
142 | >(undefined);
143 | const [audioDownloadUrl, setAudioDownloadUrl] = useState<
144 | string | undefined
145 | >(undefined);
146 |
147 | const isAudioLoading = progress !== undefined;
148 |
149 | const resetAudio = () => {
150 | setAudioData(undefined);
151 | setAudioDownloadUrl(undefined);
152 | };
153 |
154 | const setAudioFromDownload = async (
155 | data: ArrayBuffer,
156 | mimeType: string,
157 | ) => {
158 | const audioCTX = new AudioContext({
159 | sampleRate: Constants.SAMPLING_RATE,
160 | });
161 | const blobUrl = URL.createObjectURL(
162 | new Blob([data], { type: "audio/*" }),
163 | );
164 | const decoded = await audioCTX.decodeAudioData(data);
165 | setAudioData({
166 | buffer: decoded,
167 | url: blobUrl,
168 | source: AudioSource.URL,
169 | mimeType: mimeType,
170 | });
171 | };
172 |
173 | const setAudioFromRecording = async (data: Blob) => {
174 | resetAudio();
175 | setProgress(0);
176 | const blobUrl = URL.createObjectURL(data);
177 | const fileReader = new FileReader();
178 | fileReader.onprogress = (event) => {
179 | setProgress(event.loaded / event.total || 0);
180 | };
181 | fileReader.onloadend = async () => {
182 | const audioCTX = new AudioContext({
183 | sampleRate: Constants.SAMPLING_RATE,
184 | });
185 | const arrayBuffer = fileReader.result as ArrayBuffer;
186 | const decoded = await audioCTX.decodeAudioData(arrayBuffer);
187 | setProgress(undefined);
188 | setAudioData({
189 | buffer: decoded,
190 | url: blobUrl,
191 | source: AudioSource.RECORDING,
192 | mimeType: data.type,
193 | });
194 | };
195 | fileReader.readAsArrayBuffer(data);
196 | };
197 |
198 | const downloadAudioFromUrl = async (
199 | requestAbortController: AbortController,
200 | ) => {
201 | if (audioDownloadUrl) {
202 | try {
203 | setAudioData(undefined);
204 | setProgress(0);
205 | const { data, headers } = (await axios.get(audioDownloadUrl, {
206 | signal: requestAbortController.signal,
207 | responseType: "arraybuffer",
208 | onDownloadProgress(progressEvent) {
209 | setProgress(progressEvent.progress || 0);
210 | },
211 | })) as {
212 | data: ArrayBuffer;
213 | headers: { "content-type": string };
214 | };
215 |
216 | let mimeType = headers["content-type"];
217 | if (!mimeType || mimeType === "audio/wave") {
218 | mimeType = "audio/wav";
219 | }
220 | setAudioFromDownload(data, mimeType);
221 | } catch (error) {
222 | console.log("Request failed or aborted", error);
223 | } finally {
224 | setProgress(undefined);
225 | }
226 | }
227 | };
228 |
229 | // When URL changes, download audio
230 | useEffect(() => {
231 | if (audioDownloadUrl) {
232 | const requestAbortController = new AbortController();
233 | downloadAudioFromUrl(requestAbortController);
234 | return () => {
235 | requestAbortController.abort();
236 | };
237 | }
238 | }, [audioDownloadUrl]);
239 |
240 | return (
241 | <>
242 |
243 |
244 | }
246 | text={"From URL"}
247 | onUrlUpdate={(e) => {
248 | props.transcriber.onInputChange();
249 | setAudioDownloadUrl(e);
250 | }}
251 | />
252 |
253 | }
255 | text={"From file"}
256 | onFileUpdate={(decoded, blobUrl, mimeType) => {
257 | props.transcriber.onInputChange();
258 | setAudioData({
259 | buffer: decoded,
260 | url: blobUrl,
261 | source: AudioSource.FILE,
262 | mimeType: mimeType,
263 | });
264 | }}
265 | />
266 | {navigator.mediaDevices && (
267 | <>
268 |
269 | }
271 | text={"Record"}
272 | setAudioData={(e) => {
273 | props.transcriber.onInputChange();
274 | setAudioFromRecording(e);
275 | }}
276 | />
277 | >
278 | )}
279 |
280 | {
281 |
284 | }
285 |
286 | {audioData && (
287 | <>
288 |
292 |
293 |
294 | {
296 | props.transcriber.start(audioData.buffer);
297 | }}
298 | isModelLoading={props.transcriber.isModelLoading}
299 | // isAudioLoading ||
300 | isTranscribing={props.transcriber.isBusy}
301 | />
302 |
303 | }
307 | />
308 |
309 | {props.transcriber.progressItems.length > 0 && (
310 |
311 |
314 | {props.transcriber.progressItems.map((data) => (
315 |
321 | ))}
322 |
323 | )}
324 | >
325 | )}
326 | >
327 | );
328 | }
329 |
330 | function SettingsTile(props: {
331 | icon: JSX.Element;
332 | className?: string;
333 | transcriber: Transcriber;
334 | }) {
335 | const [showModal, setShowModal] = useState(false);
336 |
337 | const onClick = () => {
338 | setShowModal(true);
339 | };
340 |
341 | const onClose = () => {
342 | setShowModal(false);
343 | };
344 |
345 | const onSubmit = (url: string) => {
346 | onClose();
347 | };
348 |
349 | return (
350 |
351 |
352 |
358 |
359 | );
360 | }
361 |
362 | function SettingsModal(props: {
363 | show: boolean;
364 | onSubmit: (url: string) => void;
365 | onClose: () => void;
366 | transcriber: Transcriber;
367 | }) {
368 | const names = Object.values(LANGUAGES).map(titleCase);
369 |
370 | const models = {
371 | // Original checkpoints
372 | 'Xenova/whisper-tiny': [41, 152],
373 | 'Xenova/whisper-base': [77, 291],
374 | 'Xenova/whisper-small': [249],
375 | 'Xenova/whisper-medium': [776],
376 |
377 | // Distil Whisper (English-only)
378 | 'distil-whisper/distil-medium.en': [402],
379 | 'distil-whisper/distil-large-v2': [767],
380 | };
381 | return (
382 |
387 |
388 |
418 |
450 | {props.transcriber.multilingual && (
451 | <>
452 |
453 |
468 |
469 |
483 | >
484 | )}
485 | >
486 | }
487 | onClose={props.onClose}
488 | onSubmit={() => {}}
489 | />
490 | );
491 | }
492 |
493 | function VerticalBar() {
494 | return ;
495 | }
496 |
497 | function AudioDataBar(props: { progress: number }) {
498 | return ;
499 | }
500 |
501 | function ProgressBar(props: { progress: string }) {
502 | return (
503 |
509 | );
510 | }
511 |
512 | function UrlTile(props: {
513 | icon: JSX.Element;
514 | text: string;
515 | onUrlUpdate: (url: string) => void;
516 | }) {
517 | const [showModal, setShowModal] = useState(false);
518 |
519 | const onClick = () => {
520 | setShowModal(true);
521 | };
522 |
523 | const onClose = () => {
524 | setShowModal(false);
525 | };
526 |
527 | const onSubmit = (url: string) => {
528 | props.onUrlUpdate(url);
529 | onClose();
530 | };
531 |
532 | return (
533 | <>
534 |
535 |
536 | >
537 | );
538 | }
539 |
540 | function UrlModal(props: {
541 | show: boolean;
542 | onSubmit: (url: string) => void;
543 | onClose: () => void;
544 | }) {
545 | const [url, setUrl] = useState(Constants.DEFAULT_AUDIO_URL);
546 |
547 | const onChange = (event: React.ChangeEvent) => {
548 | setUrl(event.target.value);
549 | };
550 |
551 | const onSubmit = () => {
552 | props.onSubmit(url);
553 | };
554 |
555 | return (
556 |
561 | {"Enter the URL of the audio file you want to load."}
562 |
563 | >
564 | }
565 | onClose={props.onClose}
566 | submitText={"Load"}
567 | onSubmit={onSubmit}
568 | />
569 | );
570 | }
571 |
572 | function FileTile(props: {
573 | icon: JSX.Element;
574 | text: string;
575 | onFileUpdate: (
576 | decoded: AudioBuffer,
577 | blobUrl: string,
578 | mimeType: string,
579 | ) => void;
580 | }) {
581 | // const audioPlayer = useRef(null);
582 |
583 | // Create hidden input element
584 | let elem = document.createElement("input");
585 | elem.type = "file";
586 | elem.oninput = (event) => {
587 | // Make sure we have files to use
588 | let files = (event.target as HTMLInputElement).files;
589 | if (!files) return;
590 |
591 | // Create a blob that we can use as an src for our audio element
592 | const urlObj = URL.createObjectURL(files[0]);
593 | const mimeType = files[0].type;
594 |
595 | const reader = new FileReader();
596 | reader.addEventListener("load", async (e) => {
597 | const arrayBuffer = e.target?.result as ArrayBuffer; // Get the ArrayBuffer
598 | if (!arrayBuffer) return;
599 |
600 | const audioCTX = new AudioContext({
601 | sampleRate: Constants.SAMPLING_RATE,
602 | });
603 |
604 | const decoded = await audioCTX.decodeAudioData(arrayBuffer);
605 |
606 | props.onFileUpdate(decoded, urlObj, mimeType);
607 | });
608 | reader.readAsArrayBuffer(files[0]);
609 |
610 | // Reset files
611 | elem.value = "";
612 | };
613 |
614 | return (
615 | <>
616 | elem.click()}
620 | />
621 | >
622 | );
623 | }
624 |
625 | function RecordTile(props: {
626 | icon: JSX.Element;
627 | text: string;
628 | setAudioData: (data: Blob) => void;
629 | }) {
630 | const [showModal, setShowModal] = useState(false);
631 |
632 | const onClick = () => {
633 | setShowModal(true);
634 | };
635 |
636 | const onClose = () => {
637 | setShowModal(false);
638 | };
639 |
640 | const onSubmit = (data: Blob | undefined) => {
641 | if (data) {
642 | props.setAudioData(data);
643 | onClose();
644 | }
645 | };
646 |
647 | return (
648 | <>
649 |
650 |
655 | >
656 | );
657 | }
658 |
659 | function RecordModal(props: {
660 | show: boolean;
661 | onSubmit: (data: Blob | undefined) => void;
662 | onClose: () => void;
663 | }) {
664 | const [audioBlob, setAudioBlob] = useState();
665 |
666 | const onRecordingComplete = (blob: Blob) => {
667 | setAudioBlob(blob);
668 | };
669 |
670 | const onSubmit = () => {
671 | props.onSubmit(audioBlob);
672 | setAudioBlob(undefined);
673 | };
674 |
675 | const onClose = () => {
676 | props.onClose();
677 | setAudioBlob(undefined);
678 | };
679 |
680 | return (
681 |
686 | {"Record audio using your microphone"}
687 |
688 | >
689 | }
690 | onClose={onClose}
691 | submitText={"Load"}
692 | submitEnabled={audioBlob !== undefined}
693 | onSubmit={onSubmit}
694 | />
695 | );
696 | }
697 |
698 | function Tile(props: {
699 | icon: JSX.Element;
700 | text?: string;
701 | onClick?: () => void;
702 | }) {
703 | return (
704 |
715 | );
716 | }
717 |
718 | function AnchorIcon() {
719 | return (
720 |
733 | );
734 | }
735 |
736 | function FolderIcon() {
737 | return (
738 |
751 | );
752 | }
753 |
754 | function SettingsIcon() {
755 | return (
756 |
774 | );
775 | }
776 |
777 | function MicrophoneIcon() {
778 | return (
779 |
792 | );
793 | }
794 |
--------------------------------------------------------------------------------
/src/components/AudioPlayer.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useRef } from "react";
2 |
3 | export default function AudioPlayer(props: {
4 | audioUrl: string;
5 | mimeType: string;
6 | }) {
7 | const audioPlayer = useRef(null);
8 | const audioSource = useRef(null);
9 |
10 | // Updates src when url changes
11 | useEffect(() => {
12 | if (audioPlayer.current && audioSource.current) {
13 | audioSource.current.src = props.audioUrl;
14 | audioPlayer.current.load();
15 | }
16 | }, [props.audioUrl]);
17 |
18 | return (
19 |
28 | );
29 | }
30 |
--------------------------------------------------------------------------------
/src/components/AudioRecorder.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect, useRef } from "react";
2 |
3 | import { formatAudioTimestamp } from "../utils/AudioUtils";
4 | import { webmFixDuration } from "../utils/BlobFix";
5 |
6 | function getMimeType() {
7 | const types = [
8 | "audio/webm",
9 | "audio/mp4",
10 | "audio/ogg",
11 | "audio/wav",
12 | "audio/aac",
13 | ];
14 | for (let i = 0; i < types.length; i++) {
15 | if (MediaRecorder.isTypeSupported(types[i])) {
16 | return types[i];
17 | }
18 | }
19 | return undefined;
20 | }
21 |
22 | export default function AudioRecorder(props: {
23 | onRecordingComplete: (blob: Blob) => void;
24 | }) {
25 | const [recording, setRecording] = useState(false);
26 | const [duration, setDuration] = useState(0);
27 | const [recordedBlob, setRecordedBlob] = useState(null);
28 |
29 | const streamRef = useRef(null);
30 | const mediaRecorderRef = useRef(null);
31 | const chunksRef = useRef([]);
32 |
33 | const audioRef = useRef(null);
34 |
35 | const startRecording = async () => {
36 | // Reset recording (if any)
37 | setRecordedBlob(null);
38 |
39 | let startTime = Date.now();
40 |
41 | try {
42 | if (!streamRef.current) {
43 | streamRef.current = await navigator.mediaDevices.getUserMedia({
44 | audio: true,
45 | });
46 | }
47 |
48 | const mimeType = getMimeType();
49 | const mediaRecorder = new MediaRecorder(streamRef.current, {
50 | mimeType,
51 | });
52 |
53 | mediaRecorderRef.current = mediaRecorder;
54 |
55 | mediaRecorder.addEventListener("dataavailable", async (event) => {
56 | if (event.data.size > 0) {
57 | chunksRef.current.push(event.data);
58 | }
59 | if (mediaRecorder.state === "inactive") {
60 | const duration = Date.now() - startTime;
61 |
62 | // Received a stop event
63 | let blob = new Blob(chunksRef.current, { type: mimeType });
64 |
65 | if (mimeType === "audio/webm") {
66 | blob = await webmFixDuration(blob, duration, blob.type);
67 | }
68 |
69 | setRecordedBlob(blob);
70 | props.onRecordingComplete(blob);
71 |
72 | chunksRef.current = [];
73 | }
74 | });
75 | mediaRecorder.start();
76 | setRecording(true);
77 | } catch (error) {
78 | console.error("Error accessing microphone:", error);
79 | }
80 | };
81 |
82 | const stopRecording = () => {
83 | if (
84 | mediaRecorderRef.current &&
85 | mediaRecorderRef.current.state === "recording"
86 | ) {
87 | mediaRecorderRef.current.stop(); // set state to inactive
88 | setDuration(0);
89 | setRecording(false);
90 | }
91 | };
92 |
93 | useEffect(() => {
94 | let stream: MediaStream | null = null;
95 |
96 | if (recording) {
97 | const timer = setInterval(() => {
98 | setDuration((prevDuration) => prevDuration + 1);
99 | }, 1000);
100 |
101 | return () => {
102 | clearInterval(timer);
103 | };
104 | }
105 |
106 | return () => {
107 | if (stream) {
108 | stream.getTracks().forEach((track) => track.stop());
109 | }
110 | };
111 | }, [recording]);
112 |
113 | const handleToggleRecording = () => {
114 | if (recording) {
115 | stopRecording();
116 | } else {
117 | startRecording();
118 | }
119 | };
120 |
121 | return (
122 |
123 |
136 |
137 | {recordedBlob && (
138 |
144 | )}
145 |
146 | );
147 | }
148 |
--------------------------------------------------------------------------------
/src/components/Progress.tsx:
--------------------------------------------------------------------------------
1 | export default function Progress({
2 | text,
3 | percentage,
4 | }: {
5 | text: string;
6 | percentage: number;
7 | }) {
8 | percentage = percentage ?? 0;
9 | return (
10 |
11 |
15 | {text} ({`${percentage.toFixed(2)}%`})
16 |
17 |
18 | );
19 | }
20 |
--------------------------------------------------------------------------------
/src/components/TranscribeButton.tsx:
--------------------------------------------------------------------------------
1 | interface Props extends React.ButtonHTMLAttributes {
2 | isModelLoading: boolean;
3 | isTranscribing: boolean;
4 | }
5 |
6 | export function TranscribeButton(props: Props): JSX.Element {
7 | const { isModelLoading, isTranscribing, onClick, ...buttonProps } = props;
8 | return (
9 |
27 | );
28 | }
29 |
30 | export function Spinner(props: { text: string }): JSX.Element {
31 | return (
32 |
33 |
50 | {props.text}
51 |
52 | );
53 | }
54 |
--------------------------------------------------------------------------------
/src/components/Transcript.tsx:
--------------------------------------------------------------------------------
1 | import { useRef, useEffect } from "react";
2 |
3 | import { TranscriberData } from "../hooks/useTranscriber";
4 | import { formatAudioTimestamp } from "../utils/AudioUtils";
5 |
6 | interface Props {
7 | transcribedData: TranscriberData | undefined;
8 | }
9 |
10 | export default function Transcript({ transcribedData }: Props) {
11 | const divRef = useRef(null);
12 |
13 | const saveBlob = (blob: Blob, filename: string) => {
14 | const url = URL.createObjectURL(blob);
15 | const link = document.createElement("a");
16 | link.href = url;
17 | link.download = filename;
18 | link.click();
19 | URL.revokeObjectURL(url);
20 | };
21 | const exportTXT = () => {
22 | let chunks = transcribedData?.chunks ?? [];
23 | let text = chunks
24 | .map((chunk) => chunk.text)
25 | .join("")
26 | .trim();
27 |
28 | const blob = new Blob([text], { type: "text/plain" });
29 | saveBlob(blob, "transcript.txt");
30 | };
31 | const exportJSON = () => {
32 | let jsonData = JSON.stringify(transcribedData?.chunks ?? [], null, 2);
33 |
34 | // post-process the JSON to make it more readable
35 | const regex = /( "timestamp": )\[\s+(\S+)\s+(\S+)\s+\]/gm;
36 | jsonData = jsonData.replace(regex, "$1[$2 $3]");
37 |
38 | const blob = new Blob([jsonData], { type: "application/json" });
39 | saveBlob(blob, "transcript.json");
40 | };
41 |
42 | // Scroll to the bottom when the component updates
43 | useEffect(() => {
44 | if (divRef.current) {
45 | const diff = Math.abs(
46 | divRef.current.offsetHeight +
47 | divRef.current.scrollTop -
48 | divRef.current.scrollHeight,
49 | );
50 |
51 | if (diff <= 64) {
52 | // We're close enough to the bottom, so scroll to the bottom
53 | divRef.current.scrollTop = divRef.current.scrollHeight;
54 | }
55 | }
56 | });
57 |
58 | return (
59 |
63 | {transcribedData?.chunks &&
64 | transcribedData.chunks.map((chunk, i) => (
65 |
69 |
70 | {formatAudioTimestamp(chunk.timestamp[0])}
71 |
72 | {chunk.text}
73 |
74 | ))}
75 | {transcribedData && !transcribedData.isBusy && (
76 |
77 |
83 |
89 |
90 | )}
91 |
92 | );
93 | }
94 |
--------------------------------------------------------------------------------
/src/components/modal/Modal.tsx:
--------------------------------------------------------------------------------
1 | import { Dialog, Transition } from "@headlessui/react";
2 | import { Fragment } from "react";
3 |
4 | export interface Props {
5 | show: boolean;
6 | onClose: () => void;
7 | onSubmit: () => void;
8 | submitText?: string;
9 | submitEnabled?: boolean;
10 | title: string | JSX.Element;
11 | content: string | JSX.Element;
12 | }
13 |
14 | export default function Modal({
15 | show,
16 | onClose,
17 | onSubmit,
18 | title,
19 | content,
20 | submitText,
21 | submitEnabled = true,
22 | }: Props) {
23 | return (
24 |
25 |
92 |
93 | );
94 | }
95 |
--------------------------------------------------------------------------------
/src/components/modal/UrlInput.tsx:
--------------------------------------------------------------------------------
1 | import { DetailedHTMLProps, InputHTMLAttributes } from "react";
2 |
3 | export function UrlInput(
4 | props: DetailedHTMLProps<
5 | InputHTMLAttributes,
6 | HTMLInputElement
7 | >,
8 | ) {
9 | return (
10 |
11 |
18 |
19 | );
20 | }
21 |
--------------------------------------------------------------------------------
/src/css/index.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | html,
6 | body,
7 | #root {
8 | height: 100%;
9 | }
10 |
11 | audio::-webkit-media-controls-panel {
12 | background-color: white;
13 | }
14 |
15 | .container {
16 | width: 41rem /* 656px */;
17 | max-width: 95vw;
18 | }
19 |
--------------------------------------------------------------------------------
/src/hooks/useTranscriber.ts:
--------------------------------------------------------------------------------
1 | import { useCallback, useMemo, useState } from "react";
2 | import { useWorker } from "./useWorker";
3 | import Constants from "../utils/Constants";
4 |
5 | interface ProgressItem {
6 | file: string;
7 | loaded: number;
8 | progress: number;
9 | total: number;
10 | name: string;
11 | status: string;
12 | }
13 |
14 | interface TranscriberUpdateData {
15 | data: [
16 | string,
17 | { chunks: { text: string; timestamp: [number, number | null] }[] },
18 | ];
19 | text: string;
20 | }
21 |
22 | interface TranscriberCompleteData {
23 | data: {
24 | text: string;
25 | chunks: { text: string; timestamp: [number, number | null] }[];
26 | };
27 | }
28 |
29 | export interface TranscriberData {
30 | isBusy: boolean;
31 | text: string;
32 | chunks: { text: string; timestamp: [number, number | null] }[];
33 | }
34 |
35 | export interface Transcriber {
36 | onInputChange: () => void;
37 | isBusy: boolean;
38 | isModelLoading: boolean;
39 | progressItems: ProgressItem[];
40 | start: (audioData: AudioBuffer | undefined) => void;
41 | output?: TranscriberData;
42 | model: string;
43 | setModel: (model: string) => void;
44 | multilingual: boolean;
45 | setMultilingual: (model: boolean) => void;
46 | quantized: boolean;
47 | setQuantized: (model: boolean) => void;
48 | subtask: string;
49 | setSubtask: (subtask: string) => void;
50 | language?: string;
51 | setLanguage: (language: string) => void;
52 | }
53 |
54 | export function useTranscriber(): Transcriber {
55 | const [transcript, setTranscript] = useState(
56 | undefined,
57 | );
58 | const [isBusy, setIsBusy] = useState(false);
59 | const [isModelLoading, setIsModelLoading] = useState(false);
60 |
61 | const [progressItems, setProgressItems] = useState([]);
62 |
63 | const webWorker = useWorker((event) => {
64 | const message = event.data;
65 | // Update the state with the result
66 | switch (message.status) {
67 | case "progress":
68 | // Model file progress: update one of the progress items.
69 | setProgressItems((prev) =>
70 | prev.map((item) => {
71 | if (item.file === message.file) {
72 | return { ...item, progress: message.progress };
73 | }
74 | return item;
75 | }),
76 | );
77 | break;
78 | case "update":
79 | // Received partial update
80 | // console.log("update", message);
81 | // eslint-disable-next-line no-case-declarations
82 | const updateMessage = message as TranscriberUpdateData;
83 | setTranscript({
84 | isBusy: true,
85 | text: updateMessage.data[0],
86 | chunks: updateMessage.data[1].chunks,
87 | });
88 | break;
89 | case "complete":
90 | // Received complete transcript
91 | // console.log("complete", message);
92 | // eslint-disable-next-line no-case-declarations
93 | const completeMessage = message as TranscriberCompleteData;
94 | setTranscript({
95 | isBusy: false,
96 | text: completeMessage.data.text,
97 | chunks: completeMessage.data.chunks,
98 | });
99 | setIsBusy(false);
100 | break;
101 |
102 | case "initiate":
103 | // Model file start load: add a new progress item to the list.
104 | setIsModelLoading(true);
105 | setProgressItems((prev) => [...prev, message]);
106 | break;
107 | case "ready":
108 | setIsModelLoading(false);
109 | break;
110 | case "error":
111 | setIsBusy(false);
112 | alert(
113 | `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`,
114 | );
115 | break;
116 | case "done":
117 | // Model file loaded: remove the progress item from the list.
118 | setProgressItems((prev) =>
119 | prev.filter((item) => item.file !== message.file),
120 | );
121 | break;
122 |
123 | default:
124 | // initiate/download/done
125 | break;
126 | }
127 | });
128 |
129 | const [model, setModel] = useState(Constants.DEFAULT_MODEL);
130 | const [subtask, setSubtask] = useState(Constants.DEFAULT_SUBTASK);
131 | const [quantized, setQuantized] = useState(
132 | Constants.DEFAULT_QUANTIZED,
133 | );
134 | const [multilingual, setMultilingual] = useState(
135 | Constants.DEFAULT_MULTILINGUAL,
136 | );
137 | const [language, setLanguage] = useState(
138 | Constants.DEFAULT_LANGUAGE,
139 | );
140 |
141 | const onInputChange = useCallback(() => {
142 | setTranscript(undefined);
143 | }, []);
144 |
145 | const postRequest = useCallback(
146 | async (audioData: AudioBuffer | undefined) => {
147 | if (audioData) {
148 | setTranscript(undefined);
149 | setIsBusy(true);
150 |
151 | let audio;
152 | if (audioData.numberOfChannels === 2) {
153 | const SCALING_FACTOR = Math.sqrt(2);
154 |
155 | let left = audioData.getChannelData(0);
156 | let right = audioData.getChannelData(1);
157 |
158 | audio = new Float32Array(left.length);
159 | for (let i = 0; i < audioData.length; ++i) {
160 | audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
161 | }
162 | } else {
163 | // If the audio is not stereo, we can just use the first channel:
164 | audio = audioData.getChannelData(0);
165 | }
166 |
167 | webWorker.postMessage({
168 | audio,
169 | model,
170 | multilingual,
171 | quantized,
172 | subtask: multilingual ? subtask : null,
173 | language:
174 | multilingual && language !== "auto" ? language : null,
175 | });
176 | }
177 | },
178 | [webWorker, model, multilingual, quantized, subtask, language],
179 | );
180 |
181 | const transcriber = useMemo(() => {
182 | return {
183 | onInputChange,
184 | isBusy,
185 | isModelLoading,
186 | progressItems,
187 | start: postRequest,
188 | output: transcript,
189 | model,
190 | setModel,
191 | multilingual,
192 | setMultilingual,
193 | quantized,
194 | setQuantized,
195 | subtask,
196 | setSubtask,
197 | language,
198 | setLanguage,
199 | };
200 | }, [
201 | isBusy,
202 | isModelLoading,
203 | progressItems,
204 | postRequest,
205 | transcript,
206 | model,
207 | multilingual,
208 | quantized,
209 | subtask,
210 | language,
211 | ]);
212 |
213 | return transcriber;
214 | }
215 |
--------------------------------------------------------------------------------
/src/hooks/useWorker.ts:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 |
3 | export interface MessageEventHandler {
4 | (event: MessageEvent): void;
5 | }
6 |
7 | export function useWorker(messageEventHandler: MessageEventHandler): Worker {
8 | // Create new worker once and never again
9 | const [worker] = useState(() => createWorker(messageEventHandler));
10 | return worker;
11 | }
12 |
13 | function createWorker(messageEventHandler: MessageEventHandler): Worker {
14 | const worker = new Worker(new URL("../worker.js", import.meta.url), {
15 | type: "module",
16 | });
17 | // Listen for messages from the Web Worker
18 | worker.addEventListener("message", messageEventHandler);
19 | return worker;
20 | }
21 |
--------------------------------------------------------------------------------
/src/index.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import ReactDOM from "react-dom/client";
3 | import App from "./App";
4 | import "./css/index.css";
5 |
6 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
7 |
8 |
9 | ,
10 | );
11 |
--------------------------------------------------------------------------------
/src/utils/AudioUtils.ts:
--------------------------------------------------------------------------------
1 | function padTime(time: number) {
2 | return String(time).padStart(2, "0");
3 | }
4 |
5 | export function formatAudioTimestamp(time: number) {
6 | const hours = (time / (60 * 60)) | 0;
7 | time -= hours * (60 * 60);
8 | const minutes = (time / 60) | 0;
9 | time -= minutes * 60;
10 | const seconds = time | 0;
11 | return `${hours ? padTime(hours) + ":" : ""}${padTime(minutes)}:${padTime(
12 | seconds,
13 | )}`;
14 | }
15 |
--------------------------------------------------------------------------------
/src/utils/BlobFix.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * There is a bug where `navigator.mediaDevices.getUserMedia` + `MediaRecorder`
3 | * creates WEBM files without duration metadata. See:
4 | * - https://bugs.chromium.org/p/chromium/issues/detail?id=642012
5 | * - https://stackoverflow.com/a/39971175/13989043
6 | *
7 | * This file contains a function that fixes the duration metadata of a WEBM file.
8 | * - Answer found: https://stackoverflow.com/a/75218309/13989043
9 | * - Code adapted from: https://github.com/mat-sz/webm-fix-duration
10 | * (forked from https://github.com/yusitnikov/fix-webm-duration)
11 | */
12 |
13 | /*
14 | * This is the list of possible WEBM file sections by their IDs.
15 | * Possible types: Container, Binary, Uint, Int, String, Float, Date
16 | */
17 | interface Section {
18 | name: string;
19 | type: string;
20 | }
21 |
22 | const sections: Record = {
23 | 0xa45dfa3: { name: "EBML", type: "Container" },
24 | 0x286: { name: "EBMLVersion", type: "Uint" },
25 | 0x2f7: { name: "EBMLReadVersion", type: "Uint" },
26 | 0x2f2: { name: "EBMLMaxIDLength", type: "Uint" },
27 | 0x2f3: { name: "EBMLMaxSizeLength", type: "Uint" },
28 | 0x282: { name: "DocType", type: "String" },
29 | 0x287: { name: "DocTypeVersion", type: "Uint" },
30 | 0x285: { name: "DocTypeReadVersion", type: "Uint" },
31 | 0x6c: { name: "Void", type: "Binary" },
32 | 0x3f: { name: "CRC-32", type: "Binary" },
33 | 0xb538667: { name: "SignatureSlot", type: "Container" },
34 | 0x3e8a: { name: "SignatureAlgo", type: "Uint" },
35 | 0x3e9a: { name: "SignatureHash", type: "Uint" },
36 | 0x3ea5: { name: "SignaturePublicKey", type: "Binary" },
37 | 0x3eb5: { name: "Signature", type: "Binary" },
38 | 0x3e5b: { name: "SignatureElements", type: "Container" },
39 | 0x3e7b: { name: "SignatureElementList", type: "Container" },
40 | 0x2532: { name: "SignedElement", type: "Binary" },
41 | 0x8538067: { name: "Segment", type: "Container" },
42 | 0x14d9b74: { name: "SeekHead", type: "Container" },
43 | 0xdbb: { name: "Seek", type: "Container" },
44 | 0x13ab: { name: "SeekID", type: "Binary" },
45 | 0x13ac: { name: "SeekPosition", type: "Uint" },
46 | 0x549a966: { name: "Info", type: "Container" },
47 | 0x33a4: { name: "SegmentUID", type: "Binary" },
48 | 0x3384: { name: "SegmentFilename", type: "String" },
49 | 0x1cb923: { name: "PrevUID", type: "Binary" },
50 | 0x1c83ab: { name: "PrevFilename", type: "String" },
51 | 0x1eb923: { name: "NextUID", type: "Binary" },
52 | 0x1e83bb: { name: "NextFilename", type: "String" },
53 | 0x444: { name: "SegmentFamily", type: "Binary" },
54 | 0x2924: { name: "ChapterTranslate", type: "Container" },
55 | 0x29fc: { name: "ChapterTranslateEditionUID", type: "Uint" },
56 | 0x29bf: { name: "ChapterTranslateCodec", type: "Uint" },
57 | 0x29a5: { name: "ChapterTranslateID", type: "Binary" },
58 | 0xad7b1: { name: "TimecodeScale", type: "Uint" },
59 | 0x489: { name: "Duration", type: "Float" },
60 | 0x461: { name: "DateUTC", type: "Date" },
61 | 0x3ba9: { name: "Title", type: "String" },
62 | 0xd80: { name: "MuxingApp", type: "String" },
63 | 0x1741: { name: "WritingApp", type: "String" },
64 | // 0xf43b675: { name: 'Cluster', type: 'Container' },
65 | 0x67: { name: "Timecode", type: "Uint" },
66 | 0x1854: { name: "SilentTracks", type: "Container" },
67 | 0x18d7: { name: "SilentTrackNumber", type: "Uint" },
68 | 0x27: { name: "Position", type: "Uint" },
69 | 0x2b: { name: "PrevSize", type: "Uint" },
70 | 0x23: { name: "SimpleBlock", type: "Binary" },
71 | 0x20: { name: "BlockGroup", type: "Container" },
72 | 0x21: { name: "Block", type: "Binary" },
73 | 0x22: { name: "BlockVirtual", type: "Binary" },
74 | 0x35a1: { name: "BlockAdditions", type: "Container" },
75 | 0x26: { name: "BlockMore", type: "Container" },
76 | 0x6e: { name: "BlockAddID", type: "Uint" },
77 | 0x25: { name: "BlockAdditional", type: "Binary" },
78 | 0x1b: { name: "BlockDuration", type: "Uint" },
79 | 0x7a: { name: "ReferencePriority", type: "Uint" },
80 | 0x7b: { name: "ReferenceBlock", type: "Int" },
81 | 0x7d: { name: "ReferenceVirtual", type: "Int" },
82 | 0x24: { name: "CodecState", type: "Binary" },
83 | 0x35a2: { name: "DiscardPadding", type: "Int" },
84 | 0xe: { name: "Slices", type: "Container" },
85 | 0x68: { name: "TimeSlice", type: "Container" },
86 | 0x4c: { name: "LaceNumber", type: "Uint" },
87 | 0x4d: { name: "FrameNumber", type: "Uint" },
88 | 0x4b: { name: "BlockAdditionID", type: "Uint" },
89 | 0x4e: { name: "Delay", type: "Uint" },
90 | 0x4f: { name: "SliceDuration", type: "Uint" },
91 | 0x48: { name: "ReferenceFrame", type: "Container" },
92 | 0x49: { name: "ReferenceOffset", type: "Uint" },
93 | 0x4a: { name: "ReferenceTimeCode", type: "Uint" },
94 | 0x2f: { name: "EncryptedBlock", type: "Binary" },
95 | 0x654ae6b: { name: "Tracks", type: "Container" },
96 | 0x2e: { name: "TrackEntry", type: "Container" },
97 | 0x57: { name: "TrackNumber", type: "Uint" },
98 | 0x33c5: { name: "TrackUID", type: "Uint" },
99 | 0x3: { name: "TrackType", type: "Uint" },
100 | 0x39: { name: "FlagEnabled", type: "Uint" },
101 | 0x8: { name: "FlagDefault", type: "Uint" },
102 | 0x15aa: { name: "FlagForced", type: "Uint" },
103 | 0x1c: { name: "FlagLacing", type: "Uint" },
104 | 0x2de7: { name: "MinCache", type: "Uint" },
105 | 0x2df8: { name: "MaxCache", type: "Uint" },
106 | 0x3e383: { name: "DefaultDuration", type: "Uint" },
107 | 0x34e7a: { name: "DefaultDecodedFieldDuration", type: "Uint" },
108 | 0x3314f: { name: "TrackTimecodeScale", type: "Float" },
109 | 0x137f: { name: "TrackOffset", type: "Int" },
110 | 0x15ee: { name: "MaxBlockAdditionID", type: "Uint" },
111 | 0x136e: { name: "Name", type: "String" },
112 | 0x2b59c: { name: "Language", type: "String" },
113 | 0x6: { name: "CodecID", type: "String" },
114 | 0x23a2: { name: "CodecPrivate", type: "Binary" },
115 | 0x58688: { name: "CodecName", type: "String" },
116 | 0x3446: { name: "AttachmentLink", type: "Uint" },
117 | 0x1a9697: { name: "CodecSettings", type: "String" },
118 | 0x1b4040: { name: "CodecInfoURL", type: "String" },
119 | 0x6b240: { name: "CodecDownloadURL", type: "String" },
120 | 0x2a: { name: "CodecDecodeAll", type: "Uint" },
121 | 0x2fab: { name: "TrackOverlay", type: "Uint" },
122 | 0x16aa: { name: "CodecDelay", type: "Uint" },
123 | 0x16bb: { name: "SeekPreRoll", type: "Uint" },
124 | 0x2624: { name: "TrackTranslate", type: "Container" },
125 | 0x26fc: { name: "TrackTranslateEditionUID", type: "Uint" },
126 | 0x26bf: { name: "TrackTranslateCodec", type: "Uint" },
127 | 0x26a5: { name: "TrackTranslateTrackID", type: "Binary" },
128 | 0x60: { name: "Video", type: "Container" },
129 | 0x1a: { name: "FlagInterlaced", type: "Uint" },
130 | 0x13b8: { name: "StereoMode", type: "Uint" },
131 | 0x13c0: { name: "AlphaMode", type: "Uint" },
132 | 0x13b9: { name: "OldStereoMode", type: "Uint" },
133 | 0x30: { name: "PixelWidth", type: "Uint" },
134 | 0x3a: { name: "PixelHeight", type: "Uint" },
135 | 0x14aa: { name: "PixelCropBottom", type: "Uint" },
136 | 0x14bb: { name: "PixelCropTop", type: "Uint" },
137 | 0x14cc: { name: "PixelCropLeft", type: "Uint" },
138 | 0x14dd: { name: "PixelCropRight", type: "Uint" },
139 | 0x14b0: { name: "DisplayWidth", type: "Uint" },
140 | 0x14ba: { name: "DisplayHeight", type: "Uint" },
141 | 0x14b2: { name: "DisplayUnit", type: "Uint" },
142 | 0x14b3: { name: "AspectRatioType", type: "Uint" },
143 | 0xeb524: { name: "ColourSpace", type: "Binary" },
144 | 0xfb523: { name: "GammaValue", type: "Float" },
145 | 0x383e3: { name: "FrameRate", type: "Float" },
146 | 0x61: { name: "Audio", type: "Container" },
147 | 0x35: { name: "SamplingFrequency", type: "Float" },
148 | 0x38b5: { name: "OutputSamplingFrequency", type: "Float" },
149 | 0x1f: { name: "Channels", type: "Uint" },
150 | 0x3d7b: { name: "ChannelPositions", type: "Binary" },
151 | 0x2264: { name: "BitDepth", type: "Uint" },
152 | 0x62: { name: "TrackOperation", type: "Container" },
153 | 0x63: { name: "TrackCombinePlanes", type: "Container" },
154 | 0x64: { name: "TrackPlane", type: "Container" },
155 | 0x65: { name: "TrackPlaneUID", type: "Uint" },
156 | 0x66: { name: "TrackPlaneType", type: "Uint" },
157 | 0x69: { name: "TrackJoinBlocks", type: "Container" },
158 | 0x6d: { name: "TrackJoinUID", type: "Uint" },
159 | 0x40: { name: "TrickTrackUID", type: "Uint" },
160 | 0x41: { name: "TrickTrackSegmentUID", type: "Binary" },
161 | 0x46: { name: "TrickTrackFlag", type: "Uint" },
162 | 0x47: { name: "TrickMasterTrackUID", type: "Uint" },
163 | 0x44: { name: "TrickMasterTrackSegmentUID", type: "Binary" },
164 | 0x2d80: { name: "ContentEncodings", type: "Container" },
165 | 0x2240: { name: "ContentEncoding", type: "Container" },
166 | 0x1031: { name: "ContentEncodingOrder", type: "Uint" },
167 | 0x1032: { name: "ContentEncodingScope", type: "Uint" },
168 | 0x1033: { name: "ContentEncodingType", type: "Uint" },
169 | 0x1034: { name: "ContentCompression", type: "Container" },
170 | 0x254: { name: "ContentCompAlgo", type: "Uint" },
171 | 0x255: { name: "ContentCompSettings", type: "Binary" },
172 | 0x1035: { name: "ContentEncryption", type: "Container" },
173 | 0x7e1: { name: "ContentEncAlgo", type: "Uint" },
174 | 0x7e2: { name: "ContentEncKeyID", type: "Binary" },
175 | 0x7e3: { name: "ContentSignature", type: "Binary" },
176 | 0x7e4: { name: "ContentSigKeyID", type: "Binary" },
177 | 0x7e5: { name: "ContentSigAlgo", type: "Uint" },
178 | 0x7e6: { name: "ContentSigHashAlgo", type: "Uint" },
179 | 0xc53bb6b: { name: "Cues", type: "Container" },
180 | 0x3b: { name: "CuePoint", type: "Container" },
181 | 0x33: { name: "CueTime", type: "Uint" },
182 | 0x37: { name: "CueTrackPositions", type: "Container" },
183 | 0x77: { name: "CueTrack", type: "Uint" },
184 | 0x71: { name: "CueClusterPosition", type: "Uint" },
185 | 0x70: { name: "CueRelativePosition", type: "Uint" },
186 | 0x32: { name: "CueDuration", type: "Uint" },
187 | 0x1378: { name: "CueBlockNumber", type: "Uint" },
188 | 0x6a: { name: "CueCodecState", type: "Uint" },
189 | 0x5b: { name: "CueReference", type: "Container" },
190 | 0x16: { name: "CueRefTime", type: "Uint" },
191 | 0x17: { name: "CueRefCluster", type: "Uint" },
192 | 0x135f: { name: "CueRefNumber", type: "Uint" },
193 | 0x6b: { name: "CueRefCodecState", type: "Uint" },
194 | 0x941a469: { name: "Attachments", type: "Container" },
195 | 0x21a7: { name: "AttachedFile", type: "Container" },
196 | 0x67e: { name: "FileDescription", type: "String" },
197 | 0x66e: { name: "FileName", type: "String" },
198 | 0x660: { name: "FileMimeType", type: "String" },
199 | 0x65c: { name: "FileData", type: "Binary" },
200 | 0x6ae: { name: "FileUID", type: "Uint" },
201 | 0x675: { name: "FileReferral", type: "Binary" },
202 | 0x661: { name: "FileUsedStartTime", type: "Uint" },
203 | 0x662: { name: "FileUsedEndTime", type: "Uint" },
204 | 0x43a770: { name: "Chapters", type: "Container" },
205 | 0x5b9: { name: "EditionEntry", type: "Container" },
206 | 0x5bc: { name: "EditionUID", type: "Uint" },
207 | 0x5bd: { name: "EditionFlagHidden", type: "Uint" },
208 | 0x5db: { name: "EditionFlagDefault", type: "Uint" },
209 | 0x5dd: { name: "EditionFlagOrdered", type: "Uint" },
210 | 0x36: { name: "ChapterAtom", type: "Container" },
211 | 0x33c4: { name: "ChapterUID", type: "Uint" },
212 | 0x1654: { name: "ChapterStringUID", type: "String" },
213 | 0x11: { name: "ChapterTimeStart", type: "Uint" },
214 | 0x12: { name: "ChapterTimeEnd", type: "Uint" },
215 | 0x18: { name: "ChapterFlagHidden", type: "Uint" },
216 | 0x598: { name: "ChapterFlagEnabled", type: "Uint" },
217 | 0x2e67: { name: "ChapterSegmentUID", type: "Binary" },
218 | 0x2ebc: { name: "ChapterSegmentEditionUID", type: "Uint" },
219 | 0x23c3: { name: "ChapterPhysicalEquiv", type: "Uint" },
220 | 0xf: { name: "ChapterTrack", type: "Container" },
221 | 0x9: { name: "ChapterTrackNumber", type: "Uint" },
222 | 0x0: { name: "ChapterDisplay", type: "Container" },
223 | 0x5: { name: "ChapString", type: "String" },
224 | 0x37c: { name: "ChapLanguage", type: "String" },
225 | 0x37e: { name: "ChapCountry", type: "String" },
226 | 0x2944: { name: "ChapProcess", type: "Container" },
227 | 0x2955: { name: "ChapProcessCodecID", type: "Uint" },
228 | 0x50d: { name: "ChapProcessPrivate", type: "Binary" },
229 | 0x2911: { name: "ChapProcessCommand", type: "Container" },
230 | 0x2922: { name: "ChapProcessTime", type: "Uint" },
231 | 0x2933: { name: "ChapProcessData", type: "Binary" },
232 | 0x254c367: { name: "Tags", type: "Container" },
233 | 0x3373: { name: "Tag", type: "Container" },
234 | 0x23c0: { name: "Targets", type: "Container" },
235 | 0x28ca: { name: "TargetTypeValue", type: "Uint" },
236 | 0x23ca: { name: "TargetType", type: "String" },
237 | 0x23c5: { name: "TagTrackUID", type: "Uint" },
238 | 0x23c9: { name: "TagEditionUID", type: "Uint" },
239 | 0x23c4: { name: "TagChapterUID", type: "Uint" },
240 | 0x23c6: { name: "TagAttachmentUID", type: "Uint" },
241 | 0x27c8: { name: "SimpleTag", type: "Container" },
242 | 0x5a3: { name: "TagName", type: "String" },
243 | 0x47a: { name: "TagLanguage", type: "String" },
244 | 0x484: { name: "TagDefault", type: "Uint" },
245 | 0x487: { name: "TagString", type: "String" },
246 | 0x485: { name: "TagBinary", type: "Binary" },
247 | };
248 |
249 | class WebmBase {
250 | source?: Uint8Array;
251 | data?: T;
252 |
253 | constructor(private name = "Unknown", private type = "Unknown") {}
254 |
255 | updateBySource() {}
256 |
257 | setSource(source: Uint8Array) {
258 | this.source = source;
259 | this.updateBySource();
260 | }
261 |
262 | updateByData() {}
263 |
264 | setData(data: T) {
265 | this.data = data;
266 | this.updateByData();
267 | }
268 | }
269 |
270 | class WebmUint extends WebmBase {
271 | constructor(name: string, type: string) {
272 | super(name, type || "Uint");
273 | }
274 |
275 | updateBySource() {
276 | // use hex representation of a number instead of number value
277 | this.data = "";
278 | for (let i = 0; i < this.source!.length; i++) {
279 | const hex = this.source![i].toString(16);
280 | this.data += padHex(hex);
281 | }
282 | }
283 |
284 | updateByData() {
285 | const length = this.data!.length / 2;
286 | this.source = new Uint8Array(length);
287 | for (let i = 0; i < length; i++) {
288 | const hex = this.data!.substr(i * 2, 2);
289 | this.source[i] = parseInt(hex, 16);
290 | }
291 | }
292 |
293 | getValue() {
294 | return parseInt(this.data!, 16);
295 | }
296 |
297 | setValue(value: number) {
298 | this.setData(padHex(value.toString(16)));
299 | }
300 | }
301 |
302 | function padHex(hex: string) {
303 | return hex.length % 2 === 1 ? "0" + hex : hex;
304 | }
305 |
306 | class WebmFloat extends WebmBase {
307 | constructor(name: string, type: string) {
308 | super(name, type || "Float");
309 | }
310 |
311 | getFloatArrayType() {
312 | return this.source && this.source.length === 4
313 | ? Float32Array
314 | : Float64Array;
315 | }
316 | updateBySource() {
317 | const byteArray = this.source!.reverse();
318 | const floatArrayType = this.getFloatArrayType();
319 | const floatArray = new floatArrayType(byteArray.buffer);
320 | this.data! = floatArray[0];
321 | }
322 | updateByData() {
323 | const floatArrayType = this.getFloatArrayType();
324 | const floatArray = new floatArrayType([this.data!]);
325 | const byteArray = new Uint8Array(floatArray.buffer);
326 | this.source = byteArray.reverse();
327 | }
328 | getValue() {
329 | return this.data;
330 | }
331 | setValue(value: number) {
332 | this.setData(value);
333 | }
334 | }
335 |
336 | interface ContainerData {
337 | id: number;
338 | idHex?: string;
339 | data: WebmBase;
340 | }
341 |
342 | class WebmContainer extends WebmBase {
343 | offset: number = 0;
344 | data: ContainerData[] = [];
345 |
346 | constructor(name: string, type: string) {
347 | super(name, type || "Container");
348 | }
349 |
350 | readByte() {
351 | return this.source![this.offset++];
352 | }
353 | readUint() {
354 | const firstByte = this.readByte();
355 | const bytes = 8 - firstByte.toString(2).length;
356 | let value = firstByte - (1 << (7 - bytes));
357 | for (let i = 0; i < bytes; i++) {
358 | // don't use bit operators to support x86
359 | value *= 256;
360 | value += this.readByte();
361 | }
362 | return value;
363 | }
364 | updateBySource() {
365 | let end: number | undefined = undefined;
366 | this.data = [];
367 | for (
368 | this.offset = 0;
369 | this.offset < this.source!.length;
370 | this.offset = end
371 | ) {
372 | const id = this.readUint();
373 | const len = this.readUint();
374 | end = Math.min(this.offset + len, this.source!.length);
375 | const data = this.source!.slice(this.offset, end);
376 |
377 | const info = sections[id] || { name: "Unknown", type: "Unknown" };
378 | let ctr: any = WebmBase;
379 | switch (info.type) {
380 | case "Container":
381 | ctr = WebmContainer;
382 | break;
383 | case "Uint":
384 | ctr = WebmUint;
385 | break;
386 | case "Float":
387 | ctr = WebmFloat;
388 | break;
389 | }
390 | const section = new ctr(info.name, info.type);
391 | section.setSource(data);
392 | this.data.push({
393 | id: id,
394 | idHex: id.toString(16),
395 | data: section,
396 | });
397 | }
398 | }
399 | writeUint(x: number, draft = false) {
400 | for (
401 | var bytes = 1, flag = 0x80;
402 | x >= flag && bytes < 8;
403 | bytes++, flag *= 0x80
404 | ) {}
405 |
406 | if (!draft) {
407 | let value = flag + x;
408 | for (let i = bytes - 1; i >= 0; i--) {
409 | // don't use bit operators to support x86
410 | const c = value % 256;
411 | this.source![this.offset! + i] = c;
412 | value = (value - c) / 256;
413 | }
414 | }
415 |
416 | this.offset += bytes;
417 | }
418 |
419 | writeSections(draft = false) {
420 | this.offset = 0;
421 | for (let i = 0; i < this.data.length; i++) {
422 | const section = this.data[i],
423 | content = section.data.source,
424 | contentLength = content!.length;
425 | this.writeUint(section.id, draft);
426 | this.writeUint(contentLength, draft);
427 | if (!draft) {
428 | this.source!.set(content!, this.offset);
429 | }
430 | this.offset += contentLength;
431 | }
432 | return this.offset;
433 | }
434 |
435 | updateByData() {
436 | // run without accessing this.source to determine total length - need to know it to create Uint8Array
437 | const length = this.writeSections(true);
438 | this.source = new Uint8Array(length);
439 | // now really write data
440 | this.writeSections();
441 | }
442 |
443 | getSectionById(id: number) {
444 | for (let i = 0; i < this.data.length; i++) {
445 | const section = this.data[i];
446 | if (section.id === id) {
447 | return section.data;
448 | }
449 | }
450 |
451 | return undefined;
452 | }
453 | }
454 |
455 | class WebmFile extends WebmContainer {
456 | constructor(source: Uint8Array) {
457 | super("File", "File");
458 | this.setSource(source);
459 | }
460 |
461 | fixDuration(duration: number) {
462 | const segmentSection = this.getSectionById(0x8538067) as WebmContainer;
463 | if (!segmentSection) {
464 | return false;
465 | }
466 |
467 | const infoSection = segmentSection.getSectionById(
468 | 0x549a966,
469 | ) as WebmContainer;
470 | if (!infoSection) {
471 | return false;
472 | }
473 |
474 | const timeScaleSection = infoSection.getSectionById(
475 | 0xad7b1,
476 | ) as WebmFloat;
477 | if (!timeScaleSection) {
478 | return false;
479 | }
480 |
481 | let durationSection = infoSection.getSectionById(0x489) as WebmFloat;
482 | if (durationSection) {
483 | if (durationSection.getValue()! <= 0) {
484 | durationSection.setValue(duration);
485 | } else {
486 | return false;
487 | }
488 | } else {
489 | // append Duration section
490 | durationSection = new WebmFloat("Duration", "Float");
491 | durationSection.setValue(duration);
492 | infoSection.data.push({
493 | id: 0x489,
494 | data: durationSection,
495 | });
496 | }
497 |
498 | // set default time scale to 1 millisecond (1000000 nanoseconds)
499 | timeScaleSection.setValue(1000000);
500 | infoSection.updateByData();
501 | segmentSection.updateByData();
502 | this.updateByData();
503 |
504 | return true;
505 | }
506 |
507 | toBlob(type = "video/webm") {
508 | return new Blob([this.source!.buffer], { type });
509 | }
510 | }
511 |
512 | /**
513 | * Fixes duration on MediaRecorder output.
514 | * @param blob Input Blob with incorrect duration.
515 | * @param duration Correct duration (in milliseconds).
516 | * @param type Output blob mimetype (default: video/webm).
517 | * @returns
518 | */
519 | export const webmFixDuration = (
520 | blob: Blob,
521 | duration: number,
522 | type = "video/webm",
523 | ): Promise => {
524 | return new Promise((resolve, reject) => {
525 | try {
526 | const reader = new FileReader();
527 |
528 | reader.addEventListener("loadend", () => {
529 | try {
530 | const result = reader.result as ArrayBuffer;
531 | const file = new WebmFile(new Uint8Array(result));
532 | if (file.fixDuration(duration)) {
533 | resolve(file.toBlob(type));
534 | } else {
535 | resolve(blob);
536 | }
537 | } catch (ex) {
538 | reject(ex);
539 | }
540 | });
541 |
542 | reader.addEventListener("error", () => reject());
543 |
544 | reader.readAsArrayBuffer(blob);
545 | } catch (ex) {
546 | reject(ex);
547 | }
548 | });
549 | };
550 |
--------------------------------------------------------------------------------
/src/utils/Constants.ts:
--------------------------------------------------------------------------------
1 | function mobileTabletCheck() {
2 | // https://stackoverflow.com/questions/11381673/detecting-a-mobile-browser
3 | let check = false;
4 | (function (a: string) {
5 | if (
6 | /(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino|android|ipad|playbook|silk/i.test(
7 | a,
8 | ) ||
9 | /1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test(
10 | a.substr(0, 4),
11 | )
12 | )
13 | check = true;
14 | })(
15 | navigator.userAgent ||
16 | navigator.vendor ||
17 | ("opera" in window && typeof window.opera === "string"
18 | ? window.opera
19 | : ""),
20 | );
21 | return check;
22 | }
23 | const isMobileOrTablet = mobileTabletCheck();
24 | export default {
25 | SAMPLING_RATE: 16000,
26 | DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
27 | isMobileOrTablet ? "jfk" : "ted_60_16k"
28 | }.wav`,
29 | DEFAULT_MODEL: "Xenova/whisper-tiny",
30 | DEFAULT_SUBTASK: "transcribe",
31 | DEFAULT_LANGUAGE: "english",
32 | DEFAULT_QUANTIZED: isMobileOrTablet,
33 | DEFAULT_MULTILINGUAL: false,
34 | };
35 |
--------------------------------------------------------------------------------
/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | // eslint-disable-next-line spaced-comment
2 | ///
3 |
--------------------------------------------------------------------------------
/src/worker.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable camelcase */
2 | import { pipeline, env } from "@xenova/transformers";
3 |
4 | // Disable local models
5 | env.allowLocalModels = false;
6 |
7 | // Define model factories
8 | // Ensures only one model is created of each type
9 | class PipelineFactory {
10 | static task = null;
11 | static model = null;
12 | static quantized = null;
13 | static instance = null;
14 |
15 | constructor(tokenizer, model, quantized) {
16 | this.tokenizer = tokenizer;
17 | this.model = model;
18 | this.quantized = quantized;
19 | }
20 |
21 | static async getInstance(progress_callback = null) {
22 | if (this.instance === null) {
23 | this.instance = pipeline(this.task, this.model, {
24 | quantized: this.quantized,
25 | progress_callback,
26 |
27 | // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
28 | revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main"
29 | });
30 | }
31 |
32 | return this.instance;
33 | }
34 | }
35 |
36 | self.addEventListener("message", async (event) => {
37 | const message = event.data;
38 |
39 | // Do some work...
40 | // TODO use message data
41 | let transcript = await transcribe(
42 | message.audio,
43 | message.model,
44 | message.multilingual,
45 | message.quantized,
46 | message.subtask,
47 | message.language,
48 | );
49 | if (transcript === null) return;
50 |
51 | // Send the result back to the main thread
52 | self.postMessage({
53 | status: "complete",
54 | task: "automatic-speech-recognition",
55 | data: transcript,
56 | });
57 | });
58 |
59 | class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory {
60 | static task = "automatic-speech-recognition";
61 | static model = null;
62 | static quantized = null;
63 | }
64 |
65 | const transcribe = async (
66 | audio,
67 | model,
68 | multilingual,
69 | quantized,
70 | subtask,
71 | language,
72 | ) => {
73 |
74 | const isDistilWhisper = model.startsWith("distil-whisper/");
75 |
76 | let modelName = model;
77 | if (!isDistilWhisper && !multilingual) {
78 | modelName += ".en"
79 | }
80 |
81 | const p = AutomaticSpeechRecognitionPipelineFactory;
82 | if (p.model !== modelName || p.quantized !== quantized) {
83 | // Invalidate model if different
84 | p.model = modelName;
85 | p.quantized = quantized;
86 |
87 | if (p.instance !== null) {
88 | (await p.getInstance()).dispose();
89 | p.instance = null;
90 | }
91 | }
92 |
93 | // Load transcriber model
94 | let transcriber = await p.getInstance((data) => {
95 | self.postMessage(data);
96 | });
97 |
98 | const time_precision =
99 | transcriber.processor.feature_extractor.config.chunk_length /
100 | transcriber.model.config.max_source_positions;
101 |
102 | // Storage for chunks to be processed. Initialise with an empty chunk.
103 | let chunks_to_process = [
104 | {
105 | tokens: [],
106 | finalised: false,
107 | },
108 | ];
109 |
110 | // TODO: Storage for fully-processed and merged chunks
111 | // let decoded_chunks = [];
112 |
113 | function chunk_callback(chunk) {
114 | let last = chunks_to_process[chunks_to_process.length - 1];
115 |
116 | // Overwrite last chunk with new info
117 | Object.assign(last, chunk);
118 | last.finalised = true;
119 |
120 | // Create an empty chunk after, if it not the last chunk
121 | if (!chunk.is_last) {
122 | chunks_to_process.push({
123 | tokens: [],
124 | finalised: false,
125 | });
126 | }
127 | }
128 |
129 | // Inject custom callback function to handle merging of chunks
130 | function callback_function(item) {
131 | let last = chunks_to_process[chunks_to_process.length - 1];
132 |
133 | // Update tokens of last chunk
134 | last.tokens = [...item[0].output_token_ids];
135 |
136 | // Merge text chunks
137 | // TODO optimise so we don't have to decode all chunks every time
138 | let data = transcriber.tokenizer._decode_asr(chunks_to_process, {
139 | time_precision: time_precision,
140 | return_timestamps: true,
141 | force_full_sequences: false,
142 | });
143 |
144 | self.postMessage({
145 | status: "update",
146 | task: "automatic-speech-recognition",
147 | data: data,
148 | });
149 | }
150 |
151 | // Actually run transcription
152 | let output = await transcriber(audio, {
153 | // Greedy
154 | top_k: 0,
155 | do_sample: false,
156 |
157 | // Sliding window
158 | chunk_length_s: isDistilWhisper ? 20 : 30,
159 | stride_length_s: isDistilWhisper ? 3 : 5,
160 |
161 | // Language and task
162 | language: language,
163 | task: subtask,
164 |
165 | // Return timestamps
166 | return_timestamps: true,
167 | force_full_sequences: false,
168 |
169 | // Callback functions
170 | callback_function: callback_function, // after each generation step
171 | chunk_callback: chunk_callback, // after each chunk is processed
172 | }).catch((error) => {
173 | self.postMessage({
174 | status: "error",
175 | task: "automatic-speech-recognition",
176 | data: error,
177 | });
178 | return null;
179 | });
180 |
181 | return output;
182 | };
183 |
--------------------------------------------------------------------------------
/tailwind.config.cjs:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: [
4 | "./index.html",
5 | "./src/**/*.{js,ts,jsx,tsx}",
6 | ],
7 | theme: {
8 | extend: {},
9 | },
10 | plugins: [],
11 | }
12 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ESNext",
4 | "useDefineForClassFields": true,
5 | "lib": ["DOM", "DOM.Iterable", "ESNext"],
6 | "allowJs": false,
7 | "skipLibCheck": true,
8 | "esModuleInterop": false,
9 | "allowSyntheticDefaultImports": true,
10 | "strict": true,
11 | "forceConsistentCasingInFileNames": true,
12 | "module": "ESNext",
13 | "moduleResolution": "Node",
14 | "resolveJsonModule": true,
15 | "isolatedModules": true,
16 | "noEmit": true,
17 | "jsx": "react-jsx"
18 | },
19 | "include": ["src"],
20 | "references": [{ "path": "./tsconfig.node.json" }]
21 | }
22 |
--------------------------------------------------------------------------------
/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "composite": true,
4 | "module": "ESNext",
5 | "moduleResolution": "Node",
6 | "allowSyntheticDefaultImports": true
7 | },
8 | "include": ["vite.config.ts"]
9 | }
10 |
--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 | import react from '@vitejs/plugin-react'
3 |
4 |
5 | // https://vitejs.dev/config/
6 | export default defineConfig({
7 | plugins: [
8 | react()
9 | ],
10 | })
11 |
--------------------------------------------------------------------------------