├── .eslintignore
├── .eslintrc
├── .gitignore
├── .prettierrc
├── LICENSE
├── README.md
├── index.html
├── package-lock.json
├── package.json
├── postcss.config.cjs
├── public
    └── vite.svg
├── src
    ├── App.tsx
    ├── assets
    │   └── react.svg
    ├── components
    │   ├── AudioManager.tsx
    │   ├── AudioPlayer.tsx
    │   ├── AudioRecorder.tsx
    │   ├── Progress.tsx
    │   ├── TranscribeButton.tsx
    │   ├── Transcript.tsx
    │   └── modal
    │   │   ├── Modal.tsx
    │   │   └── UrlInput.tsx
    ├── css
    │   └── index.css
    ├── hooks
    │   ├── useTranscriber.ts
    │   └── useWorker.ts
    ├── index.tsx
    ├── utils
    │   ├── AudioUtils.ts
    │   ├── BlobFix.ts
    │   └── Constants.ts
    ├── vite-env.d.ts
    └── worker.js
├── tailwind.config.cjs
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts


/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "env": {
 4 |     "browser": true,
 5 |     "es2021": true
 6 |   },
 7 |   "parser": "@typescript-eslint/parser",
 8 |   "extends": [
 9 |     "eslint:recommended",
10 |     "plugin:react/recommended",
11 |     "plugin:@typescript-eslint/recommended",
12 |     "plugin:@typescript-eslint/eslint-recommended",
13 |     "prettier"
14 |   ],
15 |   "overrides": [],
16 |   "parserOptions": {
17 |     "ecmaFeatures": {
18 |       "jsx": true
19 |     },
20 |     "ecmaVersion": "latest",
21 |     "sourceType": "module"
22 |   },
23 |   "plugins": [
24 |     "react",
25 |     "react-hooks",
26 |     "@typescript-eslint",
27 |     "prettier"
28 |   ],
29 |   "rules": {
30 |     "react/react-in-jsx-scope": "off",
31 |     "camelcase": "error",
32 |     "spaced-comment": "error",
33 |     "no-duplicate-imports": "error",
34 |     "prettier/prettier": "error"
35 |   },
36 |   "settings": {
37 |     "react": {
38 |       "version": "detect"
39 |     }
40 |   },
41 |   "prettier/prettier": [
42 |     "error",
43 |     {
44 |       "endOfLine": "auto"
45 |     }
46 |   ]
47 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "semi": true,
 3 |   "tabWidth": 4,
 4 |   "printWidth": 80,
 5 |   "singleQuote": false,
 6 |   "trailingComma": "all",
 7 |   "jsxSingleQuote": true,
 8 |   "bracketSpacing": true,
 9 |   "endOfLine":"auto"
10 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Xenova
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Whisper Web
 2 | 
 3 | ML-powered speech recognition directly in your browser! Built with [🤗 Transformers.js](https://github.com/xenova/transformers.js).
 4 | 
 5 | Check out the demo site [here](https://huggingface.co/spaces/Xenova/whisper-web). 
 6 | 
 7 | > [!IMPORTANT]  
 8 | > Experimental WebGPU support has been added to [this branch](https://github.com/xenova/whisper-web/tree/experimental-webgpu) ([demo](https://huggingface.co/spaces/Xenova/whisper-webgpu)), if you'd like to run with GPU acceleration!
 9 | 
10 | https://github.com/xenova/whisper-web/assets/26504141/fb170d84-9678-41b5-9248-a112ecc74c27
11 | 
12 | ## Running locally
13 | 
14 | 1. Clone the repo and install dependencies:
15 | 
16 |     ```bash
17 |     git clone https://github.com/xenova/whisper-web.git
18 |     cd whisper-web
19 |     npm install
20 |     ```
21 | 
22 | 2. Run the development server:
23 | 
24 |     ```bash
25 |     npm run dev
26 |     ```
27 |     > Firefox users need to change the `dom.workers.modules.enabled` setting in `about:config` to `true` to enable Web Workers.
28 |     > Check out [this issue](https://github.com/xenova/whisper-web/issues/8) for more details.
29 | 
30 | 3. Open the link (e.g., [http://localhost:5173/](http://localhost:5173/)) in your browser.
31 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Whisper Web</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/index.tsx"></script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "whisper-web",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "clean": "rm -rf node_modules/ dist/",
 9 |     "build": "tsc && vite build",
10 |     "preview": "vite preview",
11 |     "lint": "eslint src/**/*.{js,jsx,ts,tsx,json}",
12 |     "lint:fix": "eslint --fix src/**/*.{js,jsx,ts,tsx,json}",
13 |     "format": "prettier --write src/**/*.{js,jsx,ts,tsx,css,md,json} --config ./.prettierrc",
14 |     "tsc": "tsc"
15 |   },
16 |   "dependencies": {
17 |     "@headlessui/react": "^1.7.13",
18 |     "@xenova/transformers": "^2.7.0",
19 |     "axios": "^1.3.4",
20 |     "react": "^18.2.0",
21 |     "react-dom": "^18.2.0"
22 |   },
23 |   "devDependencies": {
24 |     "@types/react": "^18.0.28",
25 |     "@types/react-dom": "^18.0.11",
26 |     "@typescript-eslint/eslint-plugin": "^5.57.0",
27 |     "@typescript-eslint/parser": "^5.57.0",
28 |     "@vitejs/plugin-react": "^3.1.0",
29 |     "autoprefixer": "^10.4.14",
30 |     "eslint": "^8.37.0",
31 |     "eslint-config-prettier": "^8.8.0",
32 |     "eslint-config-standard-with-typescript": "^34.0.1",
33 |     "eslint-plugin-import": "^2.27.5",
34 |     "eslint-plugin-n": "^15.7.0",
35 |     "eslint-plugin-prettier": "^4.2.1",
36 |     "eslint-plugin-promise": "^6.1.1",
37 |     "eslint-plugin-react": "^7.32.2",
38 |     "eslint-plugin-react-hooks": "^4.6.0",
39 |     "postcss": "^8.4.21",
40 |     "prettier": "^2.8.7",
41 |     "tailwindcss": "^3.2.7",
42 |     "typescript": "^4.9.5",
43 |     "vite": "^4.2.0"
44 |   },
45 |   "overrides": {
46 |     "semver": "^7.5.3",
47 |     "protobufjs": "^7.2.4"
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 


--------------------------------------------------------------------------------
/public/vite.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>


--------------------------------------------------------------------------------
/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import { AudioManager } from "./components/AudioManager";
 2 | import Transcript from "./components/Transcript";
 3 | import { useTranscriber } from "./hooks/useTranscriber";
 4 | 
 5 | function App() {
 6 |     const transcriber = useTranscriber();
 7 | 
 8 |     return (
 9 |         <div className='flex justify-center items-center min-h-screen'>
10 |             <div className='container flex flex-col justify-center items-center'>
11 |                 <h1 className='text-5xl font-extrabold tracking-tight text-slate-900 sm:text-7xl text-center'>
12 |                     Whisper Web
13 |                 </h1>
14 |                 <h2 className='mt-3 mb-5 px-4 text-center text-1xl font-semibold tracking-tight text-slate-900 sm:text-2xl'>
15 |                     ML-powered speech recognition directly in your browser
16 |                 </h2>
17 |                 <AudioManager transcriber={transcriber} />
18 |                 <Transcript transcribedData={transcriber.output} />
19 |             </div>
20 | 
21 |             <div className='absolute bottom-4'>
22 |                 Made with{" "}
23 |                 <a
24 |                     className='underline'
25 |                     href='https://github.com/xenova/transformers.js'
26 |                 >
27 |                     🤗 Transformers.js
28 |                 </a>
29 |             </div>
30 |         </div>
31 |     );
32 | }
33 | 
34 | export default App;
35 | 


--------------------------------------------------------------------------------
/src/assets/react.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>


--------------------------------------------------------------------------------
/src/components/AudioManager.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useCallback, useEffect, useState } from "react";
  2 | import axios from "axios";
  3 | import Modal from "./modal/Modal";
  4 | import { UrlInput } from "./modal/UrlInput";
  5 | import AudioPlayer from "./AudioPlayer";
  6 | import { TranscribeButton } from "./TranscribeButton";
  7 | import Constants from "../utils/Constants";
  8 | import { Transcriber } from "../hooks/useTranscriber";
  9 | import Progress from "./Progress";
 10 | import AudioRecorder from "./AudioRecorder";
 11 | 
 12 | function titleCase(str: string) {
 13 |     str = str.toLowerCase();
 14 |     return (str.match(/\w+.?/g) || [])
 15 |         .map((word) => {
 16 |             return word.charAt(0).toUpperCase() + word.slice(1);
 17 |         })
 18 |         .join("");
 19 | }
 20 | 
 21 | // List of supported languages:
 22 | // https://help.openai.com/en/articles/7031512-whisper-api-faq
 23 | // https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79
 24 | const LANGUAGES = {
 25 |     en: "english",
 26 |     zh: "chinese",
 27 |     de: "german",
 28 |     es: "spanish/castilian",
 29 |     ru: "russian",
 30 |     ko: "korean",
 31 |     fr: "french",
 32 |     ja: "japanese",
 33 |     pt: "portuguese",
 34 |     tr: "turkish",
 35 |     pl: "polish",
 36 |     ca: "catalan/valencian",
 37 |     nl: "dutch/flemish",
 38 |     ar: "arabic",
 39 |     sv: "swedish",
 40 |     it: "italian",
 41 |     id: "indonesian",
 42 |     hi: "hindi",
 43 |     fi: "finnish",
 44 |     vi: "vietnamese",
 45 |     he: "hebrew",
 46 |     uk: "ukrainian",
 47 |     el: "greek",
 48 |     ms: "malay",
 49 |     cs: "czech",
 50 |     ro: "romanian/moldavian/moldovan",
 51 |     da: "danish",
 52 |     hu: "hungarian",
 53 |     ta: "tamil",
 54 |     no: "norwegian",
 55 |     th: "thai",
 56 |     ur: "urdu",
 57 |     hr: "croatian",
 58 |     bg: "bulgarian",
 59 |     lt: "lithuanian",
 60 |     la: "latin",
 61 |     mi: "maori",
 62 |     ml: "malayalam",
 63 |     cy: "welsh",
 64 |     sk: "slovak",
 65 |     te: "telugu",
 66 |     fa: "persian",
 67 |     lv: "latvian",
 68 |     bn: "bengali",
 69 |     sr: "serbian",
 70 |     az: "azerbaijani",
 71 |     sl: "slovenian",
 72 |     kn: "kannada",
 73 |     et: "estonian",
 74 |     mk: "macedonian",
 75 |     br: "breton",
 76 |     eu: "basque",
 77 |     is: "icelandic",
 78 |     hy: "armenian",
 79 |     ne: "nepali",
 80 |     mn: "mongolian",
 81 |     bs: "bosnian",
 82 |     kk: "kazakh",
 83 |     sq: "albanian",
 84 |     sw: "swahili",
 85 |     gl: "galician",
 86 |     mr: "marathi",
 87 |     pa: "punjabi/panjabi",
 88 |     si: "sinhala/sinhalese",
 89 |     km: "khmer",
 90 |     sn: "shona",
 91 |     yo: "yoruba",
 92 |     so: "somali",
 93 |     af: "afrikaans",
 94 |     oc: "occitan",
 95 |     ka: "georgian",
 96 |     be: "belarusian",
 97 |     tg: "tajik",
 98 |     sd: "sindhi",
 99 |     gu: "gujarati",
100 |     am: "amharic",
101 |     yi: "yiddish",
102 |     lo: "lao",
103 |     uz: "uzbek",
104 |     fo: "faroese",
105 |     ht: "haitian creole/haitian",
106 |     ps: "pashto/pushto",
107 |     tk: "turkmen",
108 |     nn: "nynorsk",
109 |     mt: "maltese",
110 |     sa: "sanskrit",
111 |     lb: "luxembourgish/letzeburgesch",
112 |     my: "myanmar/burmese",
113 |     bo: "tibetan",
114 |     tl: "tagalog",
115 |     mg: "malagasy",
116 |     as: "assamese",
117 |     tt: "tatar",
118 |     haw: "hawaiian",
119 |     ln: "lingala",
120 |     ha: "hausa",
121 |     ba: "bashkir",
122 |     jw: "javanese",
123 |     su: "sundanese",
124 | };
125 | 
126 | export enum AudioSource {
127 |     URL = "URL",
128 |     FILE = "FILE",
129 |     RECORDING = "RECORDING",
130 | }
131 | 
132 | export function AudioManager(props: { transcriber: Transcriber }) {
133 |     const [progress, setProgress] = useState<number | undefined>(undefined);
134 |     const [audioData, setAudioData] = useState<
135 |         | {
136 |               buffer: AudioBuffer;
137 |               url: string;
138 |               source: AudioSource;
139 |               mimeType: string;
140 |           }
141 |         | undefined
142 |     >(undefined);
143 |     const [audioDownloadUrl, setAudioDownloadUrl] = useState<
144 |         string | undefined
145 |     >(undefined);
146 | 
147 |     const isAudioLoading = progress !== undefined;
148 | 
149 |     const resetAudio = () => {
150 |         setAudioData(undefined);
151 |         setAudioDownloadUrl(undefined);
152 |     };
153 | 
154 |     const setAudioFromDownload = async (
155 |         data: ArrayBuffer,
156 |         mimeType: string,
157 |     ) => {
158 |         const audioCTX = new AudioContext({
159 |             sampleRate: Constants.SAMPLING_RATE,
160 |         });
161 |         const blobUrl = URL.createObjectURL(
162 |             new Blob([data], { type: "audio/*" }),
163 |         );
164 |         const decoded = await audioCTX.decodeAudioData(data);
165 |         setAudioData({
166 |             buffer: decoded,
167 |             url: blobUrl,
168 |             source: AudioSource.URL,
169 |             mimeType: mimeType,
170 |         });
171 |     };
172 | 
173 |     const setAudioFromRecording = async (data: Blob) => {
174 |         resetAudio();
175 |         setProgress(0);
176 |         const blobUrl = URL.createObjectURL(data);
177 |         const fileReader = new FileReader();
178 |         fileReader.onprogress = (event) => {
179 |             setProgress(event.loaded / event.total || 0);
180 |         };
181 |         fileReader.onloadend = async () => {
182 |             const audioCTX = new AudioContext({
183 |                 sampleRate: Constants.SAMPLING_RATE,
184 |             });
185 |             const arrayBuffer = fileReader.result as ArrayBuffer;
186 |             const decoded = await audioCTX.decodeAudioData(arrayBuffer);
187 |             setProgress(undefined);
188 |             setAudioData({
189 |                 buffer: decoded,
190 |                 url: blobUrl,
191 |                 source: AudioSource.RECORDING,
192 |                 mimeType: data.type,
193 |             });
194 |         };
195 |         fileReader.readAsArrayBuffer(data);
196 |     };
197 | 
198 |     const downloadAudioFromUrl = async (
199 |         requestAbortController: AbortController,
200 |     ) => {
201 |         if (audioDownloadUrl) {
202 |             try {
203 |                 setAudioData(undefined);
204 |                 setProgress(0);
205 |                 const { data, headers } = (await axios.get(audioDownloadUrl, {
206 |                     signal: requestAbortController.signal,
207 |                     responseType: "arraybuffer",
208 |                     onDownloadProgress(progressEvent) {
209 |                         setProgress(progressEvent.progress || 0);
210 |                     },
211 |                 })) as {
212 |                     data: ArrayBuffer;
213 |                     headers: { "content-type": string };
214 |                 };
215 | 
216 |                 let mimeType = headers["content-type"];
217 |                 if (!mimeType || mimeType === "audio/wave") {
218 |                     mimeType = "audio/wav";
219 |                 }
220 |                 setAudioFromDownload(data, mimeType);
221 |             } catch (error) {
222 |                 console.log("Request failed or aborted", error);
223 |             } finally {
224 |                 setProgress(undefined);
225 |             }
226 |         }
227 |     };
228 | 
229 |     // When URL changes, download audio
230 |     useEffect(() => {
231 |         if (audioDownloadUrl) {
232 |             const requestAbortController = new AbortController();
233 |             downloadAudioFromUrl(requestAbortController);
234 |             return () => {
235 |                 requestAbortController.abort();
236 |             };
237 |         }
238 |     }, [audioDownloadUrl]);
239 | 
240 |     return (
241 |         <>
242 |             <div className='flex flex-col justify-center items-center rounded-lg bg-white shadow-xl shadow-black/5 ring-1 ring-slate-700/10'>
243 |                 <div className='flex flex-row space-x-2 py-2 w-full px-2'>
244 |                     <UrlTile
245 |                         icon={<AnchorIcon />}
246 |                         text={"From URL"}
247 |                         onUrlUpdate={(e) => {
248 |                             props.transcriber.onInputChange();
249 |                             setAudioDownloadUrl(e);
250 |                         }}
251 |                     />
252 |                     <VerticalBar />
253 |                     <FileTile
254 |                         icon={<FolderIcon />}
255 |                         text={"From file"}
256 |                         onFileUpdate={(decoded, blobUrl, mimeType) => {
257 |                             props.transcriber.onInputChange();
258 |                             setAudioData({
259 |                                 buffer: decoded,
260 |                                 url: blobUrl,
261 |                                 source: AudioSource.FILE,
262 |                                 mimeType: mimeType,
263 |                             });
264 |                         }}
265 |                     />
266 |                     {navigator.mediaDevices && (
267 |                         <>
268 |                             <VerticalBar />
269 |                             <RecordTile
270 |                                 icon={<MicrophoneIcon />}
271 |                                 text={"Record"}
272 |                                 setAudioData={(e) => {
273 |                                     props.transcriber.onInputChange();
274 |                                     setAudioFromRecording(e);
275 |                                 }}
276 |                             />
277 |                         </>
278 |                     )}
279 |                 </div>
280 |                 {
281 |                     <AudioDataBar
282 |                         progress={isAudioLoading ? progress : +!!audioData}
283 |                     />
284 |                 }
285 |             </div>
286 |             {audioData && (
287 |                 <>
288 |                     <AudioPlayer
289 |                         audioUrl={audioData.url}
290 |                         mimeType={audioData.mimeType}
291 |                     />
292 | 
293 |                     <div className='relative w-full flex justify-center items-center'>
294 |                         <TranscribeButton
295 |                             onClick={() => {
296 |                                 props.transcriber.start(audioData.buffer);
297 |                             }}
298 |                             isModelLoading={props.transcriber.isModelLoading}
299 |                             // isAudioLoading ||
300 |                             isTranscribing={props.transcriber.isBusy}
301 |                         />
302 | 
303 |                         <SettingsTile
304 |                             className='absolute right-4'
305 |                             transcriber={props.transcriber}
306 |                             icon={<SettingsIcon />}
307 |                         />
308 |                     </div>
309 |                     {props.transcriber.progressItems.length > 0 && (
310 |                         <div className='relative z-10 p-4 w-full'>
311 |                             <label>
312 |                                 Loading model files... (only run once)
313 |                             </label>
314 |                             {props.transcriber.progressItems.map((data) => (
315 |                                 <div key={data.file}>
316 |                                     <Progress
317 |                                         text={data.file}
318 |                                         percentage={data.progress}
319 |                                     />
320 |                                 </div>
321 |                             ))}
322 |                         </div>
323 |                     )}
324 |                 </>
325 |             )}
326 |         </>
327 |     );
328 | }
329 | 
330 | function SettingsTile(props: {
331 |     icon: JSX.Element;
332 |     className?: string;
333 |     transcriber: Transcriber;
334 | }) {
335 |     const [showModal, setShowModal] = useState(false);
336 | 
337 |     const onClick = () => {
338 |         setShowModal(true);
339 |     };
340 | 
341 |     const onClose = () => {
342 |         setShowModal(false);
343 |     };
344 | 
345 |     const onSubmit = (url: string) => {
346 |         onClose();
347 |     };
348 | 
349 |     return (
350 |         <div className={props.className}>
351 |             <Tile icon={props.icon} onClick={onClick} />
352 |             <SettingsModal
353 |                 show={showModal}
354 |                 onSubmit={onSubmit}
355 |                 onClose={onClose}
356 |                 transcriber={props.transcriber}
357 |             />
358 |         </div>
359 |     );
360 | }
361 | 
362 | function SettingsModal(props: {
363 |     show: boolean;
364 |     onSubmit: (url: string) => void;
365 |     onClose: () => void;
366 |     transcriber: Transcriber;
367 | }) {
368 |     const names = Object.values(LANGUAGES).map(titleCase);
369 | 
370 |     const models = {
371 |         // Original checkpoints
372 |         'Xenova/whisper-tiny': [41, 152],
373 |         'Xenova/whisper-base': [77, 291],
374 |         'Xenova/whisper-small': [249],
375 |         'Xenova/whisper-medium': [776],
376 | 
377 |         // Distil Whisper (English-only)
378 |         'distil-whisper/distil-medium.en': [402],
379 |         'distil-whisper/distil-large-v2': [767],
380 |     };
381 |     return (
382 |         <Modal
383 |             show={props.show}
384 |             title={"Settings"}
385 |             content={
386 |                 <>
387 |                     <label>Select the model to use.</label>
388 |                     <select
389 |                         className='mt-1 mb-1 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500'
390 |                         defaultValue={props.transcriber.model}
391 |                         onChange={(e) => {
392 |                             props.transcriber.setModel(e.target.value);
393 |                         }}
394 |                     >
395 |                         {Object.keys(models)
396 |                             .filter(
397 |                                 (key) =>
398 |                                     props.transcriber.quantized ||
399 |                                     // @ts-ignore
400 |                                     models[key].length == 2,
401 |                             )
402 |                             .filter(
403 |                                 (key) => (
404 |                                     !props.transcriber.multilingual || !key.startsWith('distil-whisper/')
405 |                                 )
406 |                             )
407 |                             .map((key) => (
408 |                                 <option key={key} value={key}>{`${key}${
409 |                                     (props.transcriber.multilingual || key.startsWith('distil-whisper/')) ? "" : ".en"
410 |                                 } (${
411 |                                     // @ts-ignore
412 |                                     models[key][
413 |                                         props.transcriber.quantized ? 0 : 1
414 |                                     ]
415 |                                 }MB)`}</option>
416 |                             ))}
417 |                     </select>
418 |                     <div className='flex justify-between items-center mb-3 px-1'>
419 |                         <div className='flex'>
420 |                             <input
421 |                                 id='multilingual'
422 |                                 type='checkbox'
423 |                                 checked={props.transcriber.multilingual}
424 |                                 onChange={(e) => {
425 |                                     props.transcriber.setMultilingual(
426 |                                         e.target.checked,
427 |                                     );
428 |                                 }}
429 |                             ></input>
430 |                             <label htmlFor={"multilingual"} className='ms-1'>
431 |                                 Multilingual
432 |                             </label>
433 |                         </div>
434 |                         <div className='flex'>
435 |                             <input
436 |                                 id='quantize'
437 |                                 type='checkbox'
438 |                                 checked={props.transcriber.quantized}
439 |                                 onChange={(e) => {
440 |                                     props.transcriber.setQuantized(
441 |                                         e.target.checked,
442 |                                     );
443 |                                 }}
444 |                             ></input>
445 |                             <label htmlFor={"quantize"} className='ms-1'>
446 |                                 Quantized
447 |                             </label>
448 |                         </div>
449 |                     </div>
450 |                     {props.transcriber.multilingual && (
451 |                         <>
452 |                             <label>Select the source language.</label>
453 |                             <select
454 |                                 className='mt-1 mb-3 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500'
455 |                                 defaultValue={props.transcriber.language}
456 |                                 onChange={(e) => {
457 |                                     props.transcriber.setLanguage(
458 |                                         e.target.value,
459 |                                     );
460 |                                 }}
461 |                             >
462 |                                 {Object.keys(LANGUAGES).map((key, i) => (
463 |                                     <option key={key} value={key}>
464 |                                         {names[i]}
465 |                                     </option>
466 |                                 ))}
467 |                             </select>
468 |                             <label>Select the task to perform.</label>
469 |                             <select
470 |                                 className='mt-1 mb-3 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500'
471 |                                 defaultValue={props.transcriber.subtask}
472 |                                 onChange={(e) => {
473 |                                     props.transcriber.setSubtask(
474 |                                         e.target.value,
475 |                                     );
476 |                                 }}
477 |                             >
478 |                                 <option value={"transcribe"}>Transcribe</option>
479 |                                 <option value={"translate"}>
480 |                                     Translate (to English)
481 |                                 </option>
482 |                             </select>
483 |                         </>
484 |                     )}
485 |                 </>
486 |             }
487 |             onClose={props.onClose}
488 |             onSubmit={() => {}}
489 |         />
490 |     );
491 | }
492 | 
493 | function VerticalBar() {
494 |     return <div className='w-[1px] bg-slate-200'></div>;
495 | }
496 | 
497 | function AudioDataBar(props: { progress: number }) {
498 |     return <ProgressBar progress={`${Math.round(props.progress * 100)}%`} />;
499 | }
500 | 
501 | function ProgressBar(props: { progress: string }) {
502 |     return (
503 |         <div className='w-full bg-gray-200 rounded-full h-1 dark:bg-gray-700'>
504 |             <div
505 |                 className='bg-blue-600 h-1 rounded-full transition-all duration-100'
506 |                 style={{ width: props.progress }}
507 |             ></div>
508 |         </div>
509 |     );
510 | }
511 | 
512 | function UrlTile(props: {
513 |     icon: JSX.Element;
514 |     text: string;
515 |     onUrlUpdate: (url: string) => void;
516 | }) {
517 |     const [showModal, setShowModal] = useState(false);
518 | 
519 |     const onClick = () => {
520 |         setShowModal(true);
521 |     };
522 | 
523 |     const onClose = () => {
524 |         setShowModal(false);
525 |     };
526 | 
527 |     const onSubmit = (url: string) => {
528 |         props.onUrlUpdate(url);
529 |         onClose();
530 |     };
531 | 
532 |     return (
533 |         <>
534 |             <Tile icon={props.icon} text={props.text} onClick={onClick} />
535 |             <UrlModal show={showModal} onSubmit={onSubmit} onClose={onClose} />
536 |         </>
537 |     );
538 | }
539 | 
540 | function UrlModal(props: {
541 |     show: boolean;
542 |     onSubmit: (url: string) => void;
543 |     onClose: () => void;
544 | }) {
545 |     const [url, setUrl] = useState(Constants.DEFAULT_AUDIO_URL);
546 | 
547 |     const onChange = (event: React.ChangeEvent<HTMLInputElement>) => {
548 |         setUrl(event.target.value);
549 |     };
550 | 
551 |     const onSubmit = () => {
552 |         props.onSubmit(url);
553 |     };
554 | 
555 |     return (
556 |         <Modal
557 |             show={props.show}
558 |             title={"From URL"}
559 |             content={
560 |                 <>
561 |                     {"Enter the URL of the audio file you want to load."}
562 |                     <UrlInput onChange={onChange} value={url} />
563 |                 </>
564 |             }
565 |             onClose={props.onClose}
566 |             submitText={"Load"}
567 |             onSubmit={onSubmit}
568 |         />
569 |     );
570 | }
571 | 
572 | function FileTile(props: {
573 |     icon: JSX.Element;
574 |     text: string;
575 |     onFileUpdate: (
576 |         decoded: AudioBuffer,
577 |         blobUrl: string,
578 |         mimeType: string,
579 |     ) => void;
580 | }) {
581 |     // const audioPlayer = useRef<HTMLAudioElement>(null);
582 | 
583 |     // Create hidden input element
584 |     let elem = document.createElement("input");
585 |     elem.type = "file";
586 |     elem.oninput = (event) => {
587 |         // Make sure we have files to use
588 |         let files = (event.target as HTMLInputElement).files;
589 |         if (!files) return;
590 | 
591 |         // Create a blob that we can use as an src for our audio element
592 |         const urlObj = URL.createObjectURL(files[0]);
593 |         const mimeType = files[0].type;
594 | 
595 |         const reader = new FileReader();
596 |         reader.addEventListener("load", async (e) => {
597 |             const arrayBuffer = e.target?.result as ArrayBuffer; // Get the ArrayBuffer
598 |             if (!arrayBuffer) return;
599 | 
600 |             const audioCTX = new AudioContext({
601 |                 sampleRate: Constants.SAMPLING_RATE,
602 |             });
603 | 
604 |             const decoded = await audioCTX.decodeAudioData(arrayBuffer);
605 | 
606 |             props.onFileUpdate(decoded, urlObj, mimeType);
607 |         });
608 |         reader.readAsArrayBuffer(files[0]);
609 | 
610 |         // Reset files
611 |         elem.value = "";
612 |     };
613 | 
614 |     return (
615 |         <>
616 |             <Tile
617 |                 icon={props.icon}
618 |                 text={props.text}
619 |                 onClick={() => elem.click()}
620 |             />
621 |         </>
622 |     );
623 | }
624 | 
625 | function RecordTile(props: {
626 |     icon: JSX.Element;
627 |     text: string;
628 |     setAudioData: (data: Blob) => void;
629 | }) {
630 |     const [showModal, setShowModal] = useState(false);
631 | 
632 |     const onClick = () => {
633 |         setShowModal(true);
634 |     };
635 | 
636 |     const onClose = () => {
637 |         setShowModal(false);
638 |     };
639 | 
640 |     const onSubmit = (data: Blob | undefined) => {
641 |         if (data) {
642 |             props.setAudioData(data);
643 |             onClose();
644 |         }
645 |     };
646 | 
647 |     return (
648 |         <>
649 |             <Tile icon={props.icon} text={props.text} onClick={onClick} />
650 |             <RecordModal
651 |                 show={showModal}
652 |                 onSubmit={onSubmit}
653 |                 onClose={onClose}
654 |             />
655 |         </>
656 |     );
657 | }
658 | 
659 | function RecordModal(props: {
660 |     show: boolean;
661 |     onSubmit: (data: Blob | undefined) => void;
662 |     onClose: () => void;
663 | }) {
664 |     const [audioBlob, setAudioBlob] = useState<Blob>();
665 | 
666 |     const onRecordingComplete = (blob: Blob) => {
667 |         setAudioBlob(blob);
668 |     };
669 | 
670 |     const onSubmit = () => {
671 |         props.onSubmit(audioBlob);
672 |         setAudioBlob(undefined);
673 |     };
674 | 
675 |     const onClose = () => {
676 |         props.onClose();
677 |         setAudioBlob(undefined);
678 |     };
679 | 
680 |     return (
681 |         <Modal
682 |             show={props.show}
683 |             title={"From Recording"}
684 |             content={
685 |                 <>
686 |                     {"Record audio using your microphone"}
687 |                     <AudioRecorder onRecordingComplete={onRecordingComplete} />
688 |                 </>
689 |             }
690 |             onClose={onClose}
691 |             submitText={"Load"}
692 |             submitEnabled={audioBlob !== undefined}
693 |             onSubmit={onSubmit}
694 |         />
695 |     );
696 | }
697 | 
698 | function Tile(props: {
699 |     icon: JSX.Element;
700 |     text?: string;
701 |     onClick?: () => void;
702 | }) {
703 |     return (
704 |         <button
705 |             onClick={props.onClick}
706 |             className='flex items-center justify-center rounded-lg p-2 bg-blue text-slate-500 hover:text-indigo-600 hover:bg-indigo-50 transition-all duration-200'
707 |         >
708 |             <div className='w-7 h-7'>{props.icon}</div>
709 |             {props.text && (
710 |                 <div className='ml-2 break-text text-center text-md w-30'>
711 |                     {props.text}
712 |                 </div>
713 |             )}
714 |         </button>
715 |     );
716 | }
717 | 
718 | function AnchorIcon() {
719 |     return (
720 |         <svg
721 |             xmlns='http://www.w3.org/2000/svg'
722 |             fill='none'
723 |             viewBox='0 0 24 24'
724 |             strokeWidth='1.5'
725 |             stroke='currentColor'
726 |         >
727 |             <path
728 |                 strokeLinecap='round'
729 |                 strokeLinejoin='round'
730 |                 d='M13.19 8.688a4.5 4.5 0 011.242 7.244l-4.5 4.5a4.5 4.5 0 01-6.364-6.364l1.757-1.757m13.35-.622l1.757-1.757a4.5 4.5 0 00-6.364-6.364l-4.5 4.5a4.5 4.5 0 001.242 7.244'
731 |             />
732 |         </svg>
733 |     );
734 | }
735 | 
736 | function FolderIcon() {
737 |     return (
738 |         <svg
739 |             xmlns='http://www.w3.org/2000/svg'
740 |             fill='none'
741 |             viewBox='0 0 24 24'
742 |             strokeWidth='1.5'
743 |             stroke='currentColor'
744 |         >
745 |             <path
746 |                 strokeLinecap='round'
747 |                 strokeLinejoin='round'
748 |                 d='M3.75 9.776c.112-.017.227-.026.344-.026h15.812c.117 0 .232.009.344.026m-16.5 0a2.25 2.25 0 00-1.883 2.542l.857 6a2.25 2.25 0 002.227 1.932H19.05a2.25 2.25 0 002.227-1.932l.857-6a2.25 2.25 0 00-1.883-2.542m-16.5 0V6A2.25 2.25 0 016 3.75h3.879a1.5 1.5 0 011.06.44l2.122 2.12a1.5 1.5 0 001.06.44H18A2.25 2.25 0 0120.25 9v.776'
749 |             />
750 |         </svg>
751 |     );
752 | }
753 | 
754 | function SettingsIcon() {
755 |     return (
756 |         <svg
757 |             xmlns='http://www.w3.org/2000/svg'
758 |             fill='none'
759 |             viewBox='0 0 24 24'
760 |             strokeWidth='1.25'
761 |             stroke='currentColor'
762 |         >
763 |             <path
764 |                 strokeLinecap='round'
765 |                 strokeLinejoin='round'
766 |                 d='M9.594 3.94c.09-.542.56-.94 1.11-.94h2.593c.55 0 1.02.398 1.11.94l.213 1.281c.063.374.313.686.645.87.074.04.147.083.22.127.324.196.72.257 1.075.124l1.217-.456a1.125 1.125 0 011.37.49l1.296 2.247a1.125 1.125 0 01-.26 1.431l-1.003.827c-.293.24-.438.613-.431.992a6.759 6.759 0 010 .255c-.007.378.138.75.43.99l1.005.828c.424.35.534.954.26 1.43l-1.298 2.247a1.125 1.125 0 01-1.369.491l-1.217-.456c-.355-.133-.75-.072-1.076.124a6.57 6.57 0 01-.22.128c-.331.183-.581.495-.644.869l-.213 1.28c-.09.543-.56.941-1.11.941h-2.594c-.55 0-1.02-.398-1.11-.94l-.213-1.281c-.062-.374-.312-.686-.644-.87a6.52 6.52 0 01-.22-.127c-.325-.196-.72-.257-1.076-.124l-1.217.456a1.125 1.125 0 01-1.369-.49l-1.297-2.247a1.125 1.125 0 01.26-1.431l1.004-.827c.292-.24.437-.613.43-.992a6.932 6.932 0 010-.255c.007-.378-.138-.75-.43-.99l-1.004-.828a1.125 1.125 0 01-.26-1.43l1.297-2.247a1.125 1.125 0 011.37-.491l1.216.456c.356.133.751.072 1.076-.124.072-.044.146-.087.22-.128.332-.183.582-.495.644-.869l.214-1.281z'
767 |             />
768 |             <path
769 |                 strokeLinecap='round'
770 |                 strokeLinejoin='round'
771 |                 d='M15 12a3 3 0 11-6 0 3 3 0 016 0z'
772 |             />
773 |         </svg>
774 |     );
775 | }
776 | 
777 | function MicrophoneIcon() {
778 |     return (
779 |         <svg
780 |             xmlns='http://www.w3.org/2000/svg'
781 |             fill='none'
782 |             viewBox='0 0 24 24'
783 |             strokeWidth={1.5}
784 |             stroke='currentColor'
785 |         >
786 |             <path
787 |                 strokeLinecap='round'
788 |                 strokeLinejoin='round'
789 |                 d='M12 18.75a6 6 0 006-6v-1.5m-6 7.5a6 6 0 01-6-6v-1.5m6 7.5v3.75m-3.75 0h7.5M12 15.75a3 3 0 01-3-3V4.5a3 3 0 116 0v8.25a3 3 0 01-3 3z'
790 |             />
791 |         </svg>
792 |     );
793 | }
794 | 


--------------------------------------------------------------------------------
/src/components/AudioPlayer.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useRef } from "react";
 2 | 
 3 | export default function AudioPlayer(props: {
 4 |     audioUrl: string;
 5 |     mimeType: string;
 6 | }) {
 7 |     const audioPlayer = useRef<HTMLAudioElement>(null);
 8 |     const audioSource = useRef<HTMLSourceElement>(null);
 9 | 
10 |     // Updates src when url changes
11 |     useEffect(() => {
12 |         if (audioPlayer.current && audioSource.current) {
13 |             audioSource.current.src = props.audioUrl;
14 |             audioPlayer.current.load();
15 |         }
16 |     }, [props.audioUrl]);
17 | 
18 |     return (
19 |         <div className='flex relative z-10 p-4 w-full'>
20 |             <audio
21 |                 ref={audioPlayer}
22 |                 controls
23 |                 className='w-full h-14 rounded-lg bg-white shadow-xl shadow-black/5 ring-1 ring-slate-700/10'
24 |             >
25 |                 <source ref={audioSource} type={props.mimeType}></source>
26 |             </audio>
27 |         </div>
28 |     );
29 | }
30 | 


--------------------------------------------------------------------------------
/src/components/AudioRecorder.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect, useRef } from "react";
  2 | 
  3 | import { formatAudioTimestamp } from "../utils/AudioUtils";
  4 | import { webmFixDuration } from "../utils/BlobFix";
  5 | 
  6 | function getMimeType() {
  7 |     const types = [
  8 |         "audio/webm",
  9 |         "audio/mp4",
 10 |         "audio/ogg",
 11 |         "audio/wav",
 12 |         "audio/aac",
 13 |     ];
 14 |     for (let i = 0; i < types.length; i++) {
 15 |         if (MediaRecorder.isTypeSupported(types[i])) {
 16 |             return types[i];
 17 |         }
 18 |     }
 19 |     return undefined;
 20 | }
 21 | 
 22 | export default function AudioRecorder(props: {
 23 |     onRecordingComplete: (blob: Blob) => void;
 24 | }) {
 25 |     const [recording, setRecording] = useState(false);
 26 |     const [duration, setDuration] = useState(0);
 27 |     const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null);
 28 | 
 29 |     const streamRef = useRef<MediaStream | null>(null);
 30 |     const mediaRecorderRef = useRef<MediaRecorder | null>(null);
 31 |     const chunksRef = useRef<Blob[]>([]);
 32 | 
 33 |     const audioRef = useRef<HTMLAudioElement | null>(null);
 34 | 
 35 |     const startRecording = async () => {
 36 |         // Reset recording (if any)
 37 |         setRecordedBlob(null);
 38 | 
 39 |         let startTime = Date.now();
 40 | 
 41 |         try {
 42 |             if (!streamRef.current) {
 43 |                 streamRef.current = await navigator.mediaDevices.getUserMedia({
 44 |                     audio: true,
 45 |                 });
 46 |             }
 47 | 
 48 |             const mimeType = getMimeType();
 49 |             const mediaRecorder = new MediaRecorder(streamRef.current, {
 50 |                 mimeType,
 51 |             });
 52 | 
 53 |             mediaRecorderRef.current = mediaRecorder;
 54 | 
 55 |             mediaRecorder.addEventListener("dataavailable", async (event) => {
 56 |                 if (event.data.size > 0) {
 57 |                     chunksRef.current.push(event.data);
 58 |                 }
 59 |                 if (mediaRecorder.state === "inactive") {
 60 |                     const duration = Date.now() - startTime;
 61 | 
 62 |                     // Received a stop event
 63 |                     let blob = new Blob(chunksRef.current, { type: mimeType });
 64 | 
 65 |                     if (mimeType === "audio/webm") {
 66 |                         blob = await webmFixDuration(blob, duration, blob.type);
 67 |                     }
 68 | 
 69 |                     setRecordedBlob(blob);
 70 |                     props.onRecordingComplete(blob);
 71 | 
 72 |                     chunksRef.current = [];
 73 |                 }
 74 |             });
 75 |             mediaRecorder.start();
 76 |             setRecording(true);
 77 |         } catch (error) {
 78 |             console.error("Error accessing microphone:", error);
 79 |         }
 80 |     };
 81 | 
 82 |     const stopRecording = () => {
 83 |         if (
 84 |             mediaRecorderRef.current &&
 85 |             mediaRecorderRef.current.state === "recording"
 86 |         ) {
 87 |             mediaRecorderRef.current.stop(); // set state to inactive
 88 |             setDuration(0);
 89 |             setRecording(false);
 90 |         }
 91 |     };
 92 | 
 93 |     useEffect(() => {
 94 |         let stream: MediaStream | null = null;
 95 | 
 96 |         if (recording) {
 97 |             const timer = setInterval(() => {
 98 |                 setDuration((prevDuration) => prevDuration + 1);
 99 |             }, 1000);
100 | 
101 |             return () => {
102 |                 clearInterval(timer);
103 |             };
104 |         }
105 | 
106 |         return () => {
107 |             if (stream) {
108 |                 stream.getTracks().forEach((track) => track.stop());
109 |             }
110 |         };
111 |     }, [recording]);
112 | 
113 |     const handleToggleRecording = () => {
114 |         if (recording) {
115 |             stopRecording();
116 |         } else {
117 |             startRecording();
118 |         }
119 |     };
120 | 
121 |     return (
122 |         <div className='flex flex-col justify-center items-center'>
123 |             <button
124 |                 type='button'
125 |                 className={`m-2 inline-flex justify-center rounded-md border border-transparent px-4 py-2 text-sm font-medium text-white focus:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 transition-all duration-200 ${
126 |                     recording
127 |                         ? "bg-red-500 hover:bg-red-600"
128 |                         : "bg-green-500 hover:bg-green-600"
129 |                 }`}
130 |                 onClick={handleToggleRecording}
131 |             >
132 |                 {recording
133 |                     ? `Stop Recording (${formatAudioTimestamp(duration)})`
134 |                     : "Start Recording"}
135 |             </button>
136 | 
137 |             {recordedBlob && (
138 |                 <audio className='w-full' ref={audioRef} controls>
139 |                     <source
140 |                         src={URL.createObjectURL(recordedBlob)}
141 |                         type={recordedBlob.type}
142 |                     />
143 |                 </audio>
144 |             )}
145 |         </div>
146 |     );
147 | }
148 | 


--------------------------------------------------------------------------------
/src/components/Progress.tsx:
--------------------------------------------------------------------------------
 1 | export default function Progress({
 2 |     text,
 3 |     percentage,
 4 | }: {
 5 |     text: string;
 6 |     percentage: number;
 7 | }) {
 8 |     percentage = percentage ?? 0;
 9 |     return (
10 |         <div className='mt-0.5 w-full relative text-sm text-white background-bg-cyan-400 bg-gray-200 border-1 border-gray-400 rounded-lg text-left overflow-hidden'>
11 |             <div
12 |                 className='top-0 h-full bg-blue-500 whitespace-nowrap px-2'
13 |                 style={{ width: `${percentage}%` }}
14 |             >
15 |                 {text} ({`${percentage.toFixed(2)}%`})
16 |             </div>
17 |         </div>
18 |     );
19 | }
20 | 


--------------------------------------------------------------------------------
/src/components/TranscribeButton.tsx:
--------------------------------------------------------------------------------
 1 | interface Props extends React.ButtonHTMLAttributes<HTMLButtonElement> {
 2 |     isModelLoading: boolean;
 3 |     isTranscribing: boolean;
 4 | }
 5 | 
 6 | export function TranscribeButton(props: Props): JSX.Element {
 7 |     const { isModelLoading, isTranscribing, onClick, ...buttonProps } = props;
 8 |     return (
 9 |         <button
10 |             {...buttonProps}
11 |             onClick={(event) => {
12 |                 if (onClick && !isTranscribing && !isModelLoading) {
13 |                     onClick(event);
14 |                 }
15 |             }}
16 |             disabled={isTranscribing}
17 |             className='text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center mr-2 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800 inline-flex items-center'
18 |         >
19 |             {isModelLoading ? (
20 |                 <Spinner text={"Loading model..."} />
21 |             ) : isTranscribing ? (
22 |                 <Spinner text={"Transcribing..."} />
23 |             ) : (
24 |                 "Transcribe Audio"
25 |             )}
26 |         </button>
27 |     );
28 | }
29 | 
30 | export function Spinner(props: { text: string }): JSX.Element {
31 |     return (
32 |         <div role='status'>
33 |             <svg
34 |                 aria-hidden='true'
35 |                 role='status'
36 |                 className='inline w-4 h-4 mr-3 text-white animate-spin'
37 |                 viewBox='0 0 100 101'
38 |                 fill='none'
39 |                 xmlns='http://www.w3.org/2000/svg'
40 |             >
41 |                 <path
42 |                     d='M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z'
43 |                     fill='#E5E7EB'
44 |                 />
45 |                 <path
46 |                     d='M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z'
47 |                     fill='currentColor'
48 |                 />
49 |             </svg>
50 |             {props.text}
51 |         </div>
52 |     );
53 | }
54 | 


--------------------------------------------------------------------------------
/src/components/Transcript.tsx:
--------------------------------------------------------------------------------
 1 | import { useRef, useEffect } from "react";
 2 | 
 3 | import { TranscriberData } from "../hooks/useTranscriber";
 4 | import { formatAudioTimestamp } from "../utils/AudioUtils";
 5 | 
 6 | interface Props {
 7 |     transcribedData: TranscriberData | undefined;
 8 | }
 9 | 
10 | export default function Transcript({ transcribedData }: Props) {
11 |     const divRef = useRef<HTMLDivElement>(null);
12 | 
13 |     const saveBlob = (blob: Blob, filename: string) => {
14 |         const url = URL.createObjectURL(blob);
15 |         const link = document.createElement("a");
16 |         link.href = url;
17 |         link.download = filename;
18 |         link.click();
19 |         URL.revokeObjectURL(url);
20 |     };
21 |     const exportTXT = () => {
22 |         let chunks = transcribedData?.chunks ?? [];
23 |         let text = chunks
24 |             .map((chunk) => chunk.text)
25 |             .join("")
26 |             .trim();
27 | 
28 |         const blob = new Blob([text], { type: "text/plain" });
29 |         saveBlob(blob, "transcript.txt");
30 |     };
31 |     const exportJSON = () => {
32 |         let jsonData = JSON.stringify(transcribedData?.chunks ?? [], null, 2);
33 | 
34 |         // post-process the JSON to make it more readable
35 |         const regex = /(    "timestamp": )\[\s+(\S+)\s+(\S+)\s+\]/gm;
36 |         jsonData = jsonData.replace(regex, "$1[$2 $3]");
37 | 
38 |         const blob = new Blob([jsonData], { type: "application/json" });
39 |         saveBlob(blob, "transcript.json");
40 |     };
41 | 
42 |     // Scroll to the bottom when the component updates
43 |     useEffect(() => {
44 |         if (divRef.current) {
45 |             const diff = Math.abs(
46 |                 divRef.current.offsetHeight +
47 |                     divRef.current.scrollTop -
48 |                     divRef.current.scrollHeight,
49 |             );
50 | 
51 |             if (diff <= 64) {
52 |                 // We're close enough to the bottom, so scroll to the bottom
53 |                 divRef.current.scrollTop = divRef.current.scrollHeight;
54 |             }
55 |         }
56 |     });
57 | 
58 |     return (
59 |         <div
60 |             ref={divRef}
61 |             className='w-full flex flex-col my-2 p-4 max-h-[20rem] overflow-y-auto'
62 |         >
63 |             {transcribedData?.chunks &&
64 |                 transcribedData.chunks.map((chunk, i) => (
65 |                     <div
66 |                         key={`${i}-${chunk.text}`}
67 |                         className='w-full flex flex-row mb-2 bg-white rounded-lg p-4 shadow-xl shadow-black/5 ring-1 ring-slate-700/10'
68 |                     >
69 |                         <div className='mr-5'>
70 |                             {formatAudioTimestamp(chunk.timestamp[0])}
71 |                         </div>
72 |                         {chunk.text}
73 |                     </div>
74 |                 ))}
75 |             {transcribedData && !transcribedData.isBusy && (
76 |                 <div className='w-full text-right'>
77 |                     <button
78 |                         onClick={exportTXT}
79 |                         className='text-white bg-green-500 hover:bg-green-600 focus:ring-4 focus:ring-green-300 font-medium rounded-lg text-sm px-4 py-2 text-center mr-2 dark:bg-green-600 dark:hover:bg-green-700 dark:focus:ring-green-800 inline-flex items-center'
80 |                     >
81 |                         Export TXT
82 |                     </button>
83 |                     <button
84 |                         onClick={exportJSON}
85 |                         className='text-white bg-green-500 hover:bg-green-600 focus:ring-4 focus:ring-green-300 font-medium rounded-lg text-sm px-4 py-2 text-center mr-2 dark:bg-green-600 dark:hover:bg-green-700 dark:focus:ring-green-800 inline-flex items-center'
86 |                     >
87 |                         Export JSON
88 |                     </button>
89 |                 </div>
90 |             )}
91 |         </div>
92 |     );
93 | }
94 | 


--------------------------------------------------------------------------------
/src/components/modal/Modal.tsx:
--------------------------------------------------------------------------------
 1 | import { Dialog, Transition } from "@headlessui/react";
 2 | import { Fragment } from "react";
 3 | 
 4 | export interface Props {
 5 |     show: boolean;
 6 |     onClose: () => void;
 7 |     onSubmit: () => void;
 8 |     submitText?: string;
 9 |     submitEnabled?: boolean;
10 |     title: string | JSX.Element;
11 |     content: string | JSX.Element;
12 | }
13 | 
14 | export default function Modal({
15 |     show,
16 |     onClose,
17 |     onSubmit,
18 |     title,
19 |     content,
20 |     submitText,
21 |     submitEnabled = true,
22 | }: Props) {
23 |     return (
24 |         <Transition appear show={show} as={Fragment}>
25 |             <Dialog as='div' className='relative z-10' onClose={onClose}>
26 |                 <Transition.Child
27 |                     as={Fragment}
28 |                     enter='ease-out duration-300'
29 |                     enterFrom='opacity-0'
30 |                     enterTo='opacity-100'
31 |                     leave='ease-in duration-200'
32 |                     leaveFrom='opacity-100'
33 |                     leaveTo='opacity-0'
34 |                 >
35 |                     <div className='fixed inset-0 bg-black bg-opacity-25' />
36 |                 </Transition.Child>
37 | 
38 |                 <div className='fixed inset-0 overflow-y-auto'>
39 |                     <div className='flex min-h-full items-center justify-center p-4 text-center'>
40 |                         <Transition.Child
41 |                             as={Fragment}
42 |                             enter='ease-out duration-300'
43 |                             enterFrom='opacity-0 scale-95'
44 |                             enterTo='opacity-100 scale-100'
45 |                             leave='ease-in duration-200'
46 |                             leaveFrom='opacity-100 scale-100'
47 |                             leaveTo='opacity-0 scale-95'
48 |                         >
49 |                             <Dialog.Panel className='w-full max-w-md transform overflow-hidden rounded-2xl bg-white p-6 text-left align-middle shadow-xl transition-all'>
50 |                                 <Dialog.Title
51 |                                     as='h3'
52 |                                     className='text-lg font-medium leading-6 text-gray-900'
53 |                                 >
54 |                                     {title}
55 |                                 </Dialog.Title>
56 |                                 <div className='mt-3 text-sm text-gray-500'>
57 |                                     {content}
58 |                                 </div>
59 | 
60 |                                 <div className='mt-4 flex flex-row-reverse'>
61 |                                     {submitText && (
62 |                                         <button
63 |                                             type='button'
64 |                                             disabled={!submitEnabled}
65 |                                             className={`inline-flex ml-4 justify-center rounded-md border border-transparent ${
66 |                                                 submitEnabled
67 |                                                     ? "bg-indigo-600"
68 |                                                     : "bg-grey-300"
69 |                                             } px-4 py-2 text-sm font-medium text-indigo-100 ${
70 |                                                 submitEnabled
71 |                                                     ? "hover:bg-indigo-500 focus:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2"
72 |                                                     : ""
73 |                                             } transition-all duration-300`}
74 |                                             onClick={onSubmit}
75 |                                         >
76 |                                             {submitText}
77 |                                         </button>
78 |                                     )}
79 |                                     <button
80 |                                         type='button'
81 |                                         className='inline-flex justify-center rounded-md border border-transparent bg-indigo-100 px-4 py-2 text-sm font-medium text-indigo-900 hover:bg-indigo-200 focus:outline-none focus-visible:ring-2 focus-visible:ring-indigo-500 focus-visible:ring-offset-2 transition-all duration-300'
82 |                                         onClick={onClose}
83 |                                     >
84 |                                         Close
85 |                                     </button>
86 |                                 </div>
87 |                             </Dialog.Panel>
88 |                         </Transition.Child>
89 |                     </div>
90 |                 </div>
91 |             </Dialog>
92 |         </Transition>
93 |     );
94 | }
95 | 


--------------------------------------------------------------------------------
/src/components/modal/UrlInput.tsx:
--------------------------------------------------------------------------------
 1 | import { DetailedHTMLProps, InputHTMLAttributes } from "react";
 2 | 
 3 | export function UrlInput(
 4 |     props: DetailedHTMLProps<
 5 |         InputHTMLAttributes<HTMLInputElement>,
 6 |         HTMLInputElement
 7 |     >,
 8 | ) {
 9 |     return (
10 |         <div>
11 |             <input
12 |                 {...props}
13 |                 type='url'
14 |                 className='my-2 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500'
15 |                 placeholder='www.example.com'
16 |                 required
17 |             />
18 |         </div>
19 |     );
20 | }
21 | 


--------------------------------------------------------------------------------
/src/css/index.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | html,
 6 | body,
 7 | #root {
 8 |     height: 100%;
 9 | }
10 | 
11 | audio::-webkit-media-controls-panel {
12 |     background-color: white;
13 | }
14 | 
15 | .container {
16 |     width: 41rem /* 656px */;
17 |     max-width: 95vw;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/hooks/useTranscriber.ts:
--------------------------------------------------------------------------------
  1 | import { useCallback, useMemo, useState } from "react";
  2 | import { useWorker } from "./useWorker";
  3 | import Constants from "../utils/Constants";
  4 | 
  5 | interface ProgressItem {
  6 |     file: string;
  7 |     loaded: number;
  8 |     progress: number;
  9 |     total: number;
 10 |     name: string;
 11 |     status: string;
 12 | }
 13 | 
 14 | interface TranscriberUpdateData {
 15 |     data: [
 16 |         string,
 17 |         { chunks: { text: string; timestamp: [number, number | null] }[] },
 18 |     ];
 19 |     text: string;
 20 | }
 21 | 
 22 | interface TranscriberCompleteData {
 23 |     data: {
 24 |         text: string;
 25 |         chunks: { text: string; timestamp: [number, number | null] }[];
 26 |     };
 27 | }
 28 | 
 29 | export interface TranscriberData {
 30 |     isBusy: boolean;
 31 |     text: string;
 32 |     chunks: { text: string; timestamp: [number, number | null] }[];
 33 | }
 34 | 
 35 | export interface Transcriber {
 36 |     onInputChange: () => void;
 37 |     isBusy: boolean;
 38 |     isModelLoading: boolean;
 39 |     progressItems: ProgressItem[];
 40 |     start: (audioData: AudioBuffer | undefined) => void;
 41 |     output?: TranscriberData;
 42 |     model: string;
 43 |     setModel: (model: string) => void;
 44 |     multilingual: boolean;
 45 |     setMultilingual: (model: boolean) => void;
 46 |     quantized: boolean;
 47 |     setQuantized: (model: boolean) => void;
 48 |     subtask: string;
 49 |     setSubtask: (subtask: string) => void;
 50 |     language?: string;
 51 |     setLanguage: (language: string) => void;
 52 | }
 53 | 
 54 | export function useTranscriber(): Transcriber {
 55 |     const [transcript, setTranscript] = useState<TranscriberData | undefined>(
 56 |         undefined,
 57 |     );
 58 |     const [isBusy, setIsBusy] = useState(false);
 59 |     const [isModelLoading, setIsModelLoading] = useState(false);
 60 | 
 61 |     const [progressItems, setProgressItems] = useState<ProgressItem[]>([]);
 62 | 
 63 |     const webWorker = useWorker((event) => {
 64 |         const message = event.data;
 65 |         // Update the state with the result
 66 |         switch (message.status) {
 67 |             case "progress":
 68 |                 // Model file progress: update one of the progress items.
 69 |                 setProgressItems((prev) =>
 70 |                     prev.map((item) => {
 71 |                         if (item.file === message.file) {
 72 |                             return { ...item, progress: message.progress };
 73 |                         }
 74 |                         return item;
 75 |                     }),
 76 |                 );
 77 |                 break;
 78 |             case "update":
 79 |                 // Received partial update
 80 |                 // console.log("update", message);
 81 |                 // eslint-disable-next-line no-case-declarations
 82 |                 const updateMessage = message as TranscriberUpdateData;
 83 |                 setTranscript({
 84 |                     isBusy: true,
 85 |                     text: updateMessage.data[0],
 86 |                     chunks: updateMessage.data[1].chunks,
 87 |                 });
 88 |                 break;
 89 |             case "complete":
 90 |                 // Received complete transcript
 91 |                 // console.log("complete", message);
 92 |                 // eslint-disable-next-line no-case-declarations
 93 |                 const completeMessage = message as TranscriberCompleteData;
 94 |                 setTranscript({
 95 |                     isBusy: false,
 96 |                     text: completeMessage.data.text,
 97 |                     chunks: completeMessage.data.chunks,
 98 |                 });
 99 |                 setIsBusy(false);
100 |                 break;
101 | 
102 |             case "initiate":
103 |                 // Model file start load: add a new progress item to the list.
104 |                 setIsModelLoading(true);
105 |                 setProgressItems((prev) => [...prev, message]);
106 |                 break;
107 |             case "ready":
108 |                 setIsModelLoading(false);
109 |                 break;
110 |             case "error":
111 |                 setIsBusy(false);
112 |                 alert(
113 |                     `${message.data.message} This is most likely because you are using Safari on an M1/M2 Mac. Please try again from Chrome, Firefox, or Edge.\n\nIf this is not the case, please file a bug report.`,
114 |                 );
115 |                 break;
116 |             case "done":
117 |                 // Model file loaded: remove the progress item from the list.
118 |                 setProgressItems((prev) =>
119 |                     prev.filter((item) => item.file !== message.file),
120 |                 );
121 |                 break;
122 | 
123 |             default:
124 |                 // initiate/download/done
125 |                 break;
126 |         }
127 |     });
128 | 
129 |     const [model, setModel] = useState<string>(Constants.DEFAULT_MODEL);
130 |     const [subtask, setSubtask] = useState<string>(Constants.DEFAULT_SUBTASK);
131 |     const [quantized, setQuantized] = useState<boolean>(
132 |         Constants.DEFAULT_QUANTIZED,
133 |     );
134 |     const [multilingual, setMultilingual] = useState<boolean>(
135 |         Constants.DEFAULT_MULTILINGUAL,
136 |     );
137 |     const [language, setLanguage] = useState<string>(
138 |         Constants.DEFAULT_LANGUAGE,
139 |     );
140 | 
141 |     const onInputChange = useCallback(() => {
142 |         setTranscript(undefined);
143 |     }, []);
144 | 
145 |     const postRequest = useCallback(
146 |         async (audioData: AudioBuffer | undefined) => {
147 |             if (audioData) {
148 |                 setTranscript(undefined);
149 |                 setIsBusy(true);
150 | 
151 |                 let audio;
152 |                 if (audioData.numberOfChannels === 2) {
153 |                     const SCALING_FACTOR = Math.sqrt(2);
154 | 
155 |                     let left = audioData.getChannelData(0);
156 |                     let right = audioData.getChannelData(1);
157 | 
158 |                     audio = new Float32Array(left.length);
159 |                     for (let i = 0; i < audioData.length; ++i) {
160 |                         audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
161 |                     }
162 |                 } else {
163 |                     // If the audio is not stereo, we can just use the first channel:
164 |                     audio = audioData.getChannelData(0);
165 |                 }
166 | 
167 |                 webWorker.postMessage({
168 |                     audio,
169 |                     model,
170 |                     multilingual,
171 |                     quantized,
172 |                     subtask: multilingual ? subtask : null,
173 |                     language:
174 |                         multilingual && language !== "auto" ? language : null,
175 |                 });
176 |             }
177 |         },
178 |         [webWorker, model, multilingual, quantized, subtask, language],
179 |     );
180 | 
181 |     const transcriber = useMemo(() => {
182 |         return {
183 |             onInputChange,
184 |             isBusy,
185 |             isModelLoading,
186 |             progressItems,
187 |             start: postRequest,
188 |             output: transcript,
189 |             model,
190 |             setModel,
191 |             multilingual,
192 |             setMultilingual,
193 |             quantized,
194 |             setQuantized,
195 |             subtask,
196 |             setSubtask,
197 |             language,
198 |             setLanguage,
199 |         };
200 |     }, [
201 |         isBusy,
202 |         isModelLoading,
203 |         progressItems,
204 |         postRequest,
205 |         transcript,
206 |         model,
207 |         multilingual,
208 |         quantized,
209 |         subtask,
210 |         language,
211 |     ]);
212 | 
213 |     return transcriber;
214 | }
215 | 


--------------------------------------------------------------------------------
/src/hooks/useWorker.ts:
--------------------------------------------------------------------------------
 1 | import { useState } from "react";
 2 | 
 3 | export interface MessageEventHandler {
 4 |     (event: MessageEvent): void;
 5 | }
 6 | 
 7 | export function useWorker(messageEventHandler: MessageEventHandler): Worker {
 8 |     // Create new worker once and never again
 9 |     const [worker] = useState(() => createWorker(messageEventHandler));
10 |     return worker;
11 | }
12 | 
13 | function createWorker(messageEventHandler: MessageEventHandler): Worker {
14 |     const worker = new Worker(new URL("../worker.js", import.meta.url), {
15 |         type: "module",
16 |     });
17 |     // Listen for messages from the Web Worker
18 |     worker.addEventListener("message", messageEventHandler);
19 |     return worker;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from "react";
 2 | import ReactDOM from "react-dom/client";
 3 | import App from "./App";
 4 | import "./css/index.css";
 5 | 
 6 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
 7 |     <React.StrictMode>
 8 |         <App />
 9 |     </React.StrictMode>,
10 | );
11 | 


--------------------------------------------------------------------------------
/src/utils/AudioUtils.ts:
--------------------------------------------------------------------------------
 1 | function padTime(time: number) {
 2 |     return String(time).padStart(2, "0");
 3 | }
 4 | 
 5 | export function formatAudioTimestamp(time: number) {
 6 |     const hours = (time / (60 * 60)) | 0;
 7 |     time -= hours * (60 * 60);
 8 |     const minutes = (time / 60) | 0;
 9 |     time -= minutes * 60;
10 |     const seconds = time | 0;
11 |     return `${hours ? padTime(hours) + ":" : ""}${padTime(minutes)}:${padTime(
12 |         seconds,
13 |     )}`;
14 | }
15 | 


--------------------------------------------------------------------------------
/src/utils/BlobFix.ts:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * There is a bug where `navigator.mediaDevices.getUserMedia` + `MediaRecorder`
  3 |  * creates WEBM files without duration metadata. See:
  4 |  * - https://bugs.chromium.org/p/chromium/issues/detail?id=642012
  5 |  * - https://stackoverflow.com/a/39971175/13989043
  6 |  *
  7 |  * This file contains a function that fixes the duration metadata of a WEBM file.
  8 |  *  - Answer found: https://stackoverflow.com/a/75218309/13989043
  9 |  *  - Code adapted from: https://github.com/mat-sz/webm-fix-duration
 10 |  *    (forked from https://github.com/yusitnikov/fix-webm-duration)
 11 |  */
 12 | 
 13 | /*
 14 |  * This is the list of possible WEBM file sections by their IDs.
 15 |  * Possible types: Container, Binary, Uint, Int, String, Float, Date
 16 |  */
 17 | interface Section {
 18 |     name: string;
 19 |     type: string;
 20 | }
 21 | 
 22 | const sections: Record<number, Section> = {
 23 |     0xa45dfa3: { name: "EBML", type: "Container" },
 24 |     0x286: { name: "EBMLVersion", type: "Uint" },
 25 |     0x2f7: { name: "EBMLReadVersion", type: "Uint" },
 26 |     0x2f2: { name: "EBMLMaxIDLength", type: "Uint" },
 27 |     0x2f3: { name: "EBMLMaxSizeLength", type: "Uint" },
 28 |     0x282: { name: "DocType", type: "String" },
 29 |     0x287: { name: "DocTypeVersion", type: "Uint" },
 30 |     0x285: { name: "DocTypeReadVersion", type: "Uint" },
 31 |     0x6c: { name: "Void", type: "Binary" },
 32 |     0x3f: { name: "CRC-32", type: "Binary" },
 33 |     0xb538667: { name: "SignatureSlot", type: "Container" },
 34 |     0x3e8a: { name: "SignatureAlgo", type: "Uint" },
 35 |     0x3e9a: { name: "SignatureHash", type: "Uint" },
 36 |     0x3ea5: { name: "SignaturePublicKey", type: "Binary" },
 37 |     0x3eb5: { name: "Signature", type: "Binary" },
 38 |     0x3e5b: { name: "SignatureElements", type: "Container" },
 39 |     0x3e7b: { name: "SignatureElementList", type: "Container" },
 40 |     0x2532: { name: "SignedElement", type: "Binary" },
 41 |     0x8538067: { name: "Segment", type: "Container" },
 42 |     0x14d9b74: { name: "SeekHead", type: "Container" },
 43 |     0xdbb: { name: "Seek", type: "Container" },
 44 |     0x13ab: { name: "SeekID", type: "Binary" },
 45 |     0x13ac: { name: "SeekPosition", type: "Uint" },
 46 |     0x549a966: { name: "Info", type: "Container" },
 47 |     0x33a4: { name: "SegmentUID", type: "Binary" },
 48 |     0x3384: { name: "SegmentFilename", type: "String" },
 49 |     0x1cb923: { name: "PrevUID", type: "Binary" },
 50 |     0x1c83ab: { name: "PrevFilename", type: "String" },
 51 |     0x1eb923: { name: "NextUID", type: "Binary" },
 52 |     0x1e83bb: { name: "NextFilename", type: "String" },
 53 |     0x444: { name: "SegmentFamily", type: "Binary" },
 54 |     0x2924: { name: "ChapterTranslate", type: "Container" },
 55 |     0x29fc: { name: "ChapterTranslateEditionUID", type: "Uint" },
 56 |     0x29bf: { name: "ChapterTranslateCodec", type: "Uint" },
 57 |     0x29a5: { name: "ChapterTranslateID", type: "Binary" },
 58 |     0xad7b1: { name: "TimecodeScale", type: "Uint" },
 59 |     0x489: { name: "Duration", type: "Float" },
 60 |     0x461: { name: "DateUTC", type: "Date" },
 61 |     0x3ba9: { name: "Title", type: "String" },
 62 |     0xd80: { name: "MuxingApp", type: "String" },
 63 |     0x1741: { name: "WritingApp", type: "String" },
 64 |     // 0xf43b675: { name: 'Cluster', type: 'Container' },
 65 |     0x67: { name: "Timecode", type: "Uint" },
 66 |     0x1854: { name: "SilentTracks", type: "Container" },
 67 |     0x18d7: { name: "SilentTrackNumber", type: "Uint" },
 68 |     0x27: { name: "Position", type: "Uint" },
 69 |     0x2b: { name: "PrevSize", type: "Uint" },
 70 |     0x23: { name: "SimpleBlock", type: "Binary" },
 71 |     0x20: { name: "BlockGroup", type: "Container" },
 72 |     0x21: { name: "Block", type: "Binary" },
 73 |     0x22: { name: "BlockVirtual", type: "Binary" },
 74 |     0x35a1: { name: "BlockAdditions", type: "Container" },
 75 |     0x26: { name: "BlockMore", type: "Container" },
 76 |     0x6e: { name: "BlockAddID", type: "Uint" },
 77 |     0x25: { name: "BlockAdditional", type: "Binary" },
 78 |     0x1b: { name: "BlockDuration", type: "Uint" },
 79 |     0x7a: { name: "ReferencePriority", type: "Uint" },
 80 |     0x7b: { name: "ReferenceBlock", type: "Int" },
 81 |     0x7d: { name: "ReferenceVirtual", type: "Int" },
 82 |     0x24: { name: "CodecState", type: "Binary" },
 83 |     0x35a2: { name: "DiscardPadding", type: "Int" },
 84 |     0xe: { name: "Slices", type: "Container" },
 85 |     0x68: { name: "TimeSlice", type: "Container" },
 86 |     0x4c: { name: "LaceNumber", type: "Uint" },
 87 |     0x4d: { name: "FrameNumber", type: "Uint" },
 88 |     0x4b: { name: "BlockAdditionID", type: "Uint" },
 89 |     0x4e: { name: "Delay", type: "Uint" },
 90 |     0x4f: { name: "SliceDuration", type: "Uint" },
 91 |     0x48: { name: "ReferenceFrame", type: "Container" },
 92 |     0x49: { name: "ReferenceOffset", type: "Uint" },
 93 |     0x4a: { name: "ReferenceTimeCode", type: "Uint" },
 94 |     0x2f: { name: "EncryptedBlock", type: "Binary" },
 95 |     0x654ae6b: { name: "Tracks", type: "Container" },
 96 |     0x2e: { name: "TrackEntry", type: "Container" },
 97 |     0x57: { name: "TrackNumber", type: "Uint" },
 98 |     0x33c5: { name: "TrackUID", type: "Uint" },
 99 |     0x3: { name: "TrackType", type: "Uint" },
100 |     0x39: { name: "FlagEnabled", type: "Uint" },
101 |     0x8: { name: "FlagDefault", type: "Uint" },
102 |     0x15aa: { name: "FlagForced", type: "Uint" },
103 |     0x1c: { name: "FlagLacing", type: "Uint" },
104 |     0x2de7: { name: "MinCache", type: "Uint" },
105 |     0x2df8: { name: "MaxCache", type: "Uint" },
106 |     0x3e383: { name: "DefaultDuration", type: "Uint" },
107 |     0x34e7a: { name: "DefaultDecodedFieldDuration", type: "Uint" },
108 |     0x3314f: { name: "TrackTimecodeScale", type: "Float" },
109 |     0x137f: { name: "TrackOffset", type: "Int" },
110 |     0x15ee: { name: "MaxBlockAdditionID", type: "Uint" },
111 |     0x136e: { name: "Name", type: "String" },
112 |     0x2b59c: { name: "Language", type: "String" },
113 |     0x6: { name: "CodecID", type: "String" },
114 |     0x23a2: { name: "CodecPrivate", type: "Binary" },
115 |     0x58688: { name: "CodecName", type: "String" },
116 |     0x3446: { name: "AttachmentLink", type: "Uint" },
117 |     0x1a9697: { name: "CodecSettings", type: "String" },
118 |     0x1b4040: { name: "CodecInfoURL", type: "String" },
119 |     0x6b240: { name: "CodecDownloadURL", type: "String" },
120 |     0x2a: { name: "CodecDecodeAll", type: "Uint" },
121 |     0x2fab: { name: "TrackOverlay", type: "Uint" },
122 |     0x16aa: { name: "CodecDelay", type: "Uint" },
123 |     0x16bb: { name: "SeekPreRoll", type: "Uint" },
124 |     0x2624: { name: "TrackTranslate", type: "Container" },
125 |     0x26fc: { name: "TrackTranslateEditionUID", type: "Uint" },
126 |     0x26bf: { name: "TrackTranslateCodec", type: "Uint" },
127 |     0x26a5: { name: "TrackTranslateTrackID", type: "Binary" },
128 |     0x60: { name: "Video", type: "Container" },
129 |     0x1a: { name: "FlagInterlaced", type: "Uint" },
130 |     0x13b8: { name: "StereoMode", type: "Uint" },
131 |     0x13c0: { name: "AlphaMode", type: "Uint" },
132 |     0x13b9: { name: "OldStereoMode", type: "Uint" },
133 |     0x30: { name: "PixelWidth", type: "Uint" },
134 |     0x3a: { name: "PixelHeight", type: "Uint" },
135 |     0x14aa: { name: "PixelCropBottom", type: "Uint" },
136 |     0x14bb: { name: "PixelCropTop", type: "Uint" },
137 |     0x14cc: { name: "PixelCropLeft", type: "Uint" },
138 |     0x14dd: { name: "PixelCropRight", type: "Uint" },
139 |     0x14b0: { name: "DisplayWidth", type: "Uint" },
140 |     0x14ba: { name: "DisplayHeight", type: "Uint" },
141 |     0x14b2: { name: "DisplayUnit", type: "Uint" },
142 |     0x14b3: { name: "AspectRatioType", type: "Uint" },
143 |     0xeb524: { name: "ColourSpace", type: "Binary" },
144 |     0xfb523: { name: "GammaValue", type: "Float" },
145 |     0x383e3: { name: "FrameRate", type: "Float" },
146 |     0x61: { name: "Audio", type: "Container" },
147 |     0x35: { name: "SamplingFrequency", type: "Float" },
148 |     0x38b5: { name: "OutputSamplingFrequency", type: "Float" },
149 |     0x1f: { name: "Channels", type: "Uint" },
150 |     0x3d7b: { name: "ChannelPositions", type: "Binary" },
151 |     0x2264: { name: "BitDepth", type: "Uint" },
152 |     0x62: { name: "TrackOperation", type: "Container" },
153 |     0x63: { name: "TrackCombinePlanes", type: "Container" },
154 |     0x64: { name: "TrackPlane", type: "Container" },
155 |     0x65: { name: "TrackPlaneUID", type: "Uint" },
156 |     0x66: { name: "TrackPlaneType", type: "Uint" },
157 |     0x69: { name: "TrackJoinBlocks", type: "Container" },
158 |     0x6d: { name: "TrackJoinUID", type: "Uint" },
159 |     0x40: { name: "TrickTrackUID", type: "Uint" },
160 |     0x41: { name: "TrickTrackSegmentUID", type: "Binary" },
161 |     0x46: { name: "TrickTrackFlag", type: "Uint" },
162 |     0x47: { name: "TrickMasterTrackUID", type: "Uint" },
163 |     0x44: { name: "TrickMasterTrackSegmentUID", type: "Binary" },
164 |     0x2d80: { name: "ContentEncodings", type: "Container" },
165 |     0x2240: { name: "ContentEncoding", type: "Container" },
166 |     0x1031: { name: "ContentEncodingOrder", type: "Uint" },
167 |     0x1032: { name: "ContentEncodingScope", type: "Uint" },
168 |     0x1033: { name: "ContentEncodingType", type: "Uint" },
169 |     0x1034: { name: "ContentCompression", type: "Container" },
170 |     0x254: { name: "ContentCompAlgo", type: "Uint" },
171 |     0x255: { name: "ContentCompSettings", type: "Binary" },
172 |     0x1035: { name: "ContentEncryption", type: "Container" },
173 |     0x7e1: { name: "ContentEncAlgo", type: "Uint" },
174 |     0x7e2: { name: "ContentEncKeyID", type: "Binary" },
175 |     0x7e3: { name: "ContentSignature", type: "Binary" },
176 |     0x7e4: { name: "ContentSigKeyID", type: "Binary" },
177 |     0x7e5: { name: "ContentSigAlgo", type: "Uint" },
178 |     0x7e6: { name: "ContentSigHashAlgo", type: "Uint" },
179 |     0xc53bb6b: { name: "Cues", type: "Container" },
180 |     0x3b: { name: "CuePoint", type: "Container" },
181 |     0x33: { name: "CueTime", type: "Uint" },
182 |     0x37: { name: "CueTrackPositions", type: "Container" },
183 |     0x77: { name: "CueTrack", type: "Uint" },
184 |     0x71: { name: "CueClusterPosition", type: "Uint" },
185 |     0x70: { name: "CueRelativePosition", type: "Uint" },
186 |     0x32: { name: "CueDuration", type: "Uint" },
187 |     0x1378: { name: "CueBlockNumber", type: "Uint" },
188 |     0x6a: { name: "CueCodecState", type: "Uint" },
189 |     0x5b: { name: "CueReference", type: "Container" },
190 |     0x16: { name: "CueRefTime", type: "Uint" },
191 |     0x17: { name: "CueRefCluster", type: "Uint" },
192 |     0x135f: { name: "CueRefNumber", type: "Uint" },
193 |     0x6b: { name: "CueRefCodecState", type: "Uint" },
194 |     0x941a469: { name: "Attachments", type: "Container" },
195 |     0x21a7: { name: "AttachedFile", type: "Container" },
196 |     0x67e: { name: "FileDescription", type: "String" },
197 |     0x66e: { name: "FileName", type: "String" },
198 |     0x660: { name: "FileMimeType", type: "String" },
199 |     0x65c: { name: "FileData", type: "Binary" },
200 |     0x6ae: { name: "FileUID", type: "Uint" },
201 |     0x675: { name: "FileReferral", type: "Binary" },
202 |     0x661: { name: "FileUsedStartTime", type: "Uint" },
203 |     0x662: { name: "FileUsedEndTime", type: "Uint" },
204 |     0x43a770: { name: "Chapters", type: "Container" },
205 |     0x5b9: { name: "EditionEntry", type: "Container" },
206 |     0x5bc: { name: "EditionUID", type: "Uint" },
207 |     0x5bd: { name: "EditionFlagHidden", type: "Uint" },
208 |     0x5db: { name: "EditionFlagDefault", type: "Uint" },
209 |     0x5dd: { name: "EditionFlagOrdered", type: "Uint" },
210 |     0x36: { name: "ChapterAtom", type: "Container" },
211 |     0x33c4: { name: "ChapterUID", type: "Uint" },
212 |     0x1654: { name: "ChapterStringUID", type: "String" },
213 |     0x11: { name: "ChapterTimeStart", type: "Uint" },
214 |     0x12: { name: "ChapterTimeEnd", type: "Uint" },
215 |     0x18: { name: "ChapterFlagHidden", type: "Uint" },
216 |     0x598: { name: "ChapterFlagEnabled", type: "Uint" },
217 |     0x2e67: { name: "ChapterSegmentUID", type: "Binary" },
218 |     0x2ebc: { name: "ChapterSegmentEditionUID", type: "Uint" },
219 |     0x23c3: { name: "ChapterPhysicalEquiv", type: "Uint" },
220 |     0xf: { name: "ChapterTrack", type: "Container" },
221 |     0x9: { name: "ChapterTrackNumber", type: "Uint" },
222 |     0x0: { name: "ChapterDisplay", type: "Container" },
223 |     0x5: { name: "ChapString", type: "String" },
224 |     0x37c: { name: "ChapLanguage", type: "String" },
225 |     0x37e: { name: "ChapCountry", type: "String" },
226 |     0x2944: { name: "ChapProcess", type: "Container" },
227 |     0x2955: { name: "ChapProcessCodecID", type: "Uint" },
228 |     0x50d: { name: "ChapProcessPrivate", type: "Binary" },
229 |     0x2911: { name: "ChapProcessCommand", type: "Container" },
230 |     0x2922: { name: "ChapProcessTime", type: "Uint" },
231 |     0x2933: { name: "ChapProcessData", type: "Binary" },
232 |     0x254c367: { name: "Tags", type: "Container" },
233 |     0x3373: { name: "Tag", type: "Container" },
234 |     0x23c0: { name: "Targets", type: "Container" },
235 |     0x28ca: { name: "TargetTypeValue", type: "Uint" },
236 |     0x23ca: { name: "TargetType", type: "String" },
237 |     0x23c5: { name: "TagTrackUID", type: "Uint" },
238 |     0x23c9: { name: "TagEditionUID", type: "Uint" },
239 |     0x23c4: { name: "TagChapterUID", type: "Uint" },
240 |     0x23c6: { name: "TagAttachmentUID", type: "Uint" },
241 |     0x27c8: { name: "SimpleTag", type: "Container" },
242 |     0x5a3: { name: "TagName", type: "String" },
243 |     0x47a: { name: "TagLanguage", type: "String" },
244 |     0x484: { name: "TagDefault", type: "Uint" },
245 |     0x487: { name: "TagString", type: "String" },
246 |     0x485: { name: "TagBinary", type: "Binary" },
247 | };
248 | 
249 | class WebmBase<T> {
250 |     source?: Uint8Array;
251 |     data?: T;
252 | 
253 |     constructor(private name = "Unknown", private type = "Unknown") {}
254 | 
255 |     updateBySource() {}
256 | 
257 |     setSource(source: Uint8Array) {
258 |         this.source = source;
259 |         this.updateBySource();
260 |     }
261 | 
262 |     updateByData() {}
263 | 
264 |     setData(data: T) {
265 |         this.data = data;
266 |         this.updateByData();
267 |     }
268 | }
269 | 
270 | class WebmUint extends WebmBase<string> {
271 |     constructor(name: string, type: string) {
272 |         super(name, type || "Uint");
273 |     }
274 | 
275 |     updateBySource() {
276 |         // use hex representation of a number instead of number value
277 |         this.data = "";
278 |         for (let i = 0; i < this.source!.length; i++) {
279 |             const hex = this.source![i].toString(16);
280 |             this.data += padHex(hex);
281 |         }
282 |     }
283 | 
284 |     updateByData() {
285 |         const length = this.data!.length / 2;
286 |         this.source = new Uint8Array(length);
287 |         for (let i = 0; i < length; i++) {
288 |             const hex = this.data!.substr(i * 2, 2);
289 |             this.source[i] = parseInt(hex, 16);
290 |         }
291 |     }
292 | 
293 |     getValue() {
294 |         return parseInt(this.data!, 16);
295 |     }
296 | 
297 |     setValue(value: number) {
298 |         this.setData(padHex(value.toString(16)));
299 |     }
300 | }
301 | 
302 | function padHex(hex: string) {
303 |     return hex.length % 2 === 1 ? "0" + hex : hex;
304 | }
305 | 
306 | class WebmFloat extends WebmBase<number> {
307 |     constructor(name: string, type: string) {
308 |         super(name, type || "Float");
309 |     }
310 | 
311 |     getFloatArrayType() {
312 |         return this.source && this.source.length === 4
313 |             ? Float32Array
314 |             : Float64Array;
315 |     }
316 |     updateBySource() {
317 |         const byteArray = this.source!.reverse();
318 |         const floatArrayType = this.getFloatArrayType();
319 |         const floatArray = new floatArrayType(byteArray.buffer);
320 |         this.data! = floatArray[0];
321 |     }
322 |     updateByData() {
323 |         const floatArrayType = this.getFloatArrayType();
324 |         const floatArray = new floatArrayType([this.data!]);
325 |         const byteArray = new Uint8Array(floatArray.buffer);
326 |         this.source = byteArray.reverse();
327 |     }
328 |     getValue() {
329 |         return this.data;
330 |     }
331 |     setValue(value: number) {
332 |         this.setData(value);
333 |     }
334 | }
335 | 
336 | interface ContainerData {
337 |     id: number;
338 |     idHex?: string;
339 |     data: WebmBase<any>;
340 | }
341 | 
342 | class WebmContainer extends WebmBase<ContainerData[]> {
343 |     offset: number = 0;
344 |     data: ContainerData[] = [];
345 | 
346 |     constructor(name: string, type: string) {
347 |         super(name, type || "Container");
348 |     }
349 | 
350 |     readByte() {
351 |         return this.source![this.offset++];
352 |     }
353 |     readUint() {
354 |         const firstByte = this.readByte();
355 |         const bytes = 8 - firstByte.toString(2).length;
356 |         let value = firstByte - (1 << (7 - bytes));
357 |         for (let i = 0; i < bytes; i++) {
358 |             // don't use bit operators to support x86
359 |             value *= 256;
360 |             value += this.readByte();
361 |         }
362 |         return value;
363 |     }
364 |     updateBySource() {
365 |         let end: number | undefined = undefined;
366 |         this.data = [];
367 |         for (
368 |             this.offset = 0;
369 |             this.offset < this.source!.length;
370 |             this.offset = end
371 |         ) {
372 |             const id = this.readUint();
373 |             const len = this.readUint();
374 |             end = Math.min(this.offset + len, this.source!.length);
375 |             const data = this.source!.slice(this.offset, end);
376 | 
377 |             const info = sections[id] || { name: "Unknown", type: "Unknown" };
378 |             let ctr: any = WebmBase;
379 |             switch (info.type) {
380 |                 case "Container":
381 |                     ctr = WebmContainer;
382 |                     break;
383 |                 case "Uint":
384 |                     ctr = WebmUint;
385 |                     break;
386 |                 case "Float":
387 |                     ctr = WebmFloat;
388 |                     break;
389 |             }
390 |             const section = new ctr(info.name, info.type);
391 |             section.setSource(data);
392 |             this.data.push({
393 |                 id: id,
394 |                 idHex: id.toString(16),
395 |                 data: section,
396 |             });
397 |         }
398 |     }
399 |     writeUint(x: number, draft = false) {
400 |         for (
401 |             var bytes = 1, flag = 0x80;
402 |             x >= flag && bytes < 8;
403 |             bytes++, flag *= 0x80
404 |         ) {}
405 | 
406 |         if (!draft) {
407 |             let value = flag + x;
408 |             for (let i = bytes - 1; i >= 0; i--) {
409 |                 // don't use bit operators to support x86
410 |                 const c = value % 256;
411 |                 this.source![this.offset! + i] = c;
412 |                 value = (value - c) / 256;
413 |             }
414 |         }
415 | 
416 |         this.offset += bytes;
417 |     }
418 | 
419 |     writeSections(draft = false) {
420 |         this.offset = 0;
421 |         for (let i = 0; i < this.data.length; i++) {
422 |             const section = this.data[i],
423 |                 content = section.data.source,
424 |                 contentLength = content!.length;
425 |             this.writeUint(section.id, draft);
426 |             this.writeUint(contentLength, draft);
427 |             if (!draft) {
428 |                 this.source!.set(content!, this.offset);
429 |             }
430 |             this.offset += contentLength;
431 |         }
432 |         return this.offset;
433 |     }
434 | 
435 |     updateByData() {
436 |         // run without accessing this.source to determine total length - need to know it to create Uint8Array
437 |         const length = this.writeSections(true);
438 |         this.source = new Uint8Array(length);
439 |         // now really write data
440 |         this.writeSections();
441 |     }
442 | 
443 |     getSectionById(id: number) {
444 |         for (let i = 0; i < this.data.length; i++) {
445 |             const section = this.data[i];
446 |             if (section.id === id) {
447 |                 return section.data;
448 |             }
449 |         }
450 | 
451 |         return undefined;
452 |     }
453 | }
454 | 
455 | class WebmFile extends WebmContainer {
456 |     constructor(source: Uint8Array) {
457 |         super("File", "File");
458 |         this.setSource(source);
459 |     }
460 | 
461 |     fixDuration(duration: number) {
462 |         const segmentSection = this.getSectionById(0x8538067) as WebmContainer;
463 |         if (!segmentSection) {
464 |             return false;
465 |         }
466 | 
467 |         const infoSection = segmentSection.getSectionById(
468 |             0x549a966,
469 |         ) as WebmContainer;
470 |         if (!infoSection) {
471 |             return false;
472 |         }
473 | 
474 |         const timeScaleSection = infoSection.getSectionById(
475 |             0xad7b1,
476 |         ) as WebmFloat;
477 |         if (!timeScaleSection) {
478 |             return false;
479 |         }
480 | 
481 |         let durationSection = infoSection.getSectionById(0x489) as WebmFloat;
482 |         if (durationSection) {
483 |             if (durationSection.getValue()! <= 0) {
484 |                 durationSection.setValue(duration);
485 |             } else {
486 |                 return false;
487 |             }
488 |         } else {
489 |             // append Duration section
490 |             durationSection = new WebmFloat("Duration", "Float");
491 |             durationSection.setValue(duration);
492 |             infoSection.data.push({
493 |                 id: 0x489,
494 |                 data: durationSection,
495 |             });
496 |         }
497 | 
498 |         // set default time scale to 1 millisecond (1000000 nanoseconds)
499 |         timeScaleSection.setValue(1000000);
500 |         infoSection.updateByData();
501 |         segmentSection.updateByData();
502 |         this.updateByData();
503 | 
504 |         return true;
505 |     }
506 | 
507 |     toBlob(type = "video/webm") {
508 |         return new Blob([this.source!.buffer], { type });
509 |     }
510 | }
511 | 
512 | /**
513 |  * Fixes duration on MediaRecorder output.
514 |  * @param blob Input Blob with incorrect duration.
515 |  * @param duration Correct duration (in milliseconds).
516 |  * @param type Output blob mimetype (default: video/webm).
517 |  * @returns
518 |  */
519 | export const webmFixDuration = (
520 |     blob: Blob,
521 |     duration: number,
522 |     type = "video/webm",
523 | ): Promise<Blob> => {
524 |     return new Promise((resolve, reject) => {
525 |         try {
526 |             const reader = new FileReader();
527 | 
528 |             reader.addEventListener("loadend", () => {
529 |                 try {
530 |                     const result = reader.result as ArrayBuffer;
531 |                     const file = new WebmFile(new Uint8Array(result));
532 |                     if (file.fixDuration(duration)) {
533 |                         resolve(file.toBlob(type));
534 |                     } else {
535 |                         resolve(blob);
536 |                     }
537 |                 } catch (ex) {
538 |                     reject(ex);
539 |                 }
540 |             });
541 | 
542 |             reader.addEventListener("error", () => reject());
543 | 
544 |             reader.readAsArrayBuffer(blob);
545 |         } catch (ex) {
546 |             reject(ex);
547 |         }
548 |     });
549 | };
550 | 


--------------------------------------------------------------------------------
/src/utils/Constants.ts:
--------------------------------------------------------------------------------
 1 | function mobileTabletCheck() {
 2 |     // https://stackoverflow.com/questions/11381673/detecting-a-mobile-browser
 3 |     let check = false;
 4 |     (function (a: string) {
 5 |         if (
 6 |             /(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino|android|ipad|playbook|silk/i.test(
 7 |                 a,
 8 |             ) ||
 9 |             /1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test(
10 |                 a.substr(0, 4),
11 |             )
12 |         )
13 |             check = true;
14 |     })(
15 |         navigator.userAgent ||
16 |             navigator.vendor ||
17 |             ("opera" in window && typeof window.opera === "string"
18 |                 ? window.opera
19 |                 : ""),
20 |     );
21 |     return check;
22 | }
23 | const isMobileOrTablet = mobileTabletCheck();
24 | export default {
25 |     SAMPLING_RATE: 16000,
26 |     DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
27 |         isMobileOrTablet ? "jfk" : "ted_60_16k"
28 |     }.wav`,
29 |     DEFAULT_MODEL: "Xenova/whisper-tiny",
30 |     DEFAULT_SUBTASK: "transcribe",
31 |     DEFAULT_LANGUAGE: "english",
32 |     DEFAULT_QUANTIZED: isMobileOrTablet,
33 |     DEFAULT_MULTILINGUAL: false,
34 | };
35 | 


--------------------------------------------------------------------------------
/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | // eslint-disable-next-line spaced-comment
2 | /// <reference types="vite/client" />
3 | 


--------------------------------------------------------------------------------
/src/worker.js:
--------------------------------------------------------------------------------
  1 | /* eslint-disable camelcase */
  2 | import { pipeline, env } from "@xenova/transformers";
  3 | 
  4 | // Disable local models
  5 | env.allowLocalModels = false;
  6 | 
  7 | // Define model factories
  8 | // Ensures only one model is created of each type
  9 | class PipelineFactory {
 10 |     static task = null;
 11 |     static model = null;
 12 |     static quantized = null;
 13 |     static instance = null;
 14 | 
 15 |     constructor(tokenizer, model, quantized) {
 16 |         this.tokenizer = tokenizer;
 17 |         this.model = model;
 18 |         this.quantized = quantized;
 19 |     }
 20 | 
 21 |     static async getInstance(progress_callback = null) {
 22 |         if (this.instance === null) {
 23 |             this.instance = pipeline(this.task, this.model, {
 24 |                 quantized: this.quantized,
 25 |                 progress_callback,
 26 | 
 27 |                 // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
 28 |                 revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main"
 29 |             });
 30 |         }
 31 | 
 32 |         return this.instance;
 33 |     }
 34 | }
 35 | 
 36 | self.addEventListener("message", async (event) => {
 37 |     const message = event.data;
 38 | 
 39 |     // Do some work...
 40 |     // TODO use message data
 41 |     let transcript = await transcribe(
 42 |         message.audio,
 43 |         message.model,
 44 |         message.multilingual,
 45 |         message.quantized,
 46 |         message.subtask,
 47 |         message.language,
 48 |     );
 49 |     if (transcript === null) return;
 50 | 
 51 |     // Send the result back to the main thread
 52 |     self.postMessage({
 53 |         status: "complete",
 54 |         task: "automatic-speech-recognition",
 55 |         data: transcript,
 56 |     });
 57 | });
 58 | 
 59 | class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory {
 60 |     static task = "automatic-speech-recognition";
 61 |     static model = null;
 62 |     static quantized = null;
 63 | }
 64 | 
 65 | const transcribe = async (
 66 |     audio,
 67 |     model,
 68 |     multilingual,
 69 |     quantized,
 70 |     subtask,
 71 |     language,
 72 | ) => {
 73 | 
 74 |     const isDistilWhisper = model.startsWith("distil-whisper/");
 75 | 
 76 |     let modelName = model;
 77 |     if (!isDistilWhisper && !multilingual) {
 78 |         modelName += ".en"
 79 |     }
 80 | 
 81 |     const p = AutomaticSpeechRecognitionPipelineFactory;
 82 |     if (p.model !== modelName || p.quantized !== quantized) {
 83 |         // Invalidate model if different
 84 |         p.model = modelName;
 85 |         p.quantized = quantized;
 86 | 
 87 |         if (p.instance !== null) {
 88 |             (await p.getInstance()).dispose();
 89 |             p.instance = null;
 90 |         }
 91 |     }
 92 | 
 93 |     // Load transcriber model
 94 |     let transcriber = await p.getInstance((data) => {
 95 |         self.postMessage(data);
 96 |     });
 97 | 
 98 |     const time_precision =
 99 |         transcriber.processor.feature_extractor.config.chunk_length /
100 |         transcriber.model.config.max_source_positions;
101 | 
102 |     // Storage for chunks to be processed. Initialise with an empty chunk.
103 |     let chunks_to_process = [
104 |         {
105 |             tokens: [],
106 |             finalised: false,
107 |         },
108 |     ];
109 | 
110 |     // TODO: Storage for fully-processed and merged chunks
111 |     // let decoded_chunks = [];
112 | 
113 |     function chunk_callback(chunk) {
114 |         let last = chunks_to_process[chunks_to_process.length - 1];
115 | 
116 |         // Overwrite last chunk with new info
117 |         Object.assign(last, chunk);
118 |         last.finalised = true;
119 | 
120 |         // Create an empty chunk after, if it not the last chunk
121 |         if (!chunk.is_last) {
122 |             chunks_to_process.push({
123 |                 tokens: [],
124 |                 finalised: false,
125 |             });
126 |         }
127 |     }
128 | 
129 |     // Inject custom callback function to handle merging of chunks
130 |     function callback_function(item) {
131 |         let last = chunks_to_process[chunks_to_process.length - 1];
132 | 
133 |         // Update tokens of last chunk
134 |         last.tokens = [...item[0].output_token_ids];
135 | 
136 |         // Merge text chunks
137 |         // TODO optimise so we don't have to decode all chunks every time
138 |         let data = transcriber.tokenizer._decode_asr(chunks_to_process, {
139 |             time_precision: time_precision,
140 |             return_timestamps: true,
141 |             force_full_sequences: false,
142 |         });
143 | 
144 |         self.postMessage({
145 |             status: "update",
146 |             task: "automatic-speech-recognition",
147 |             data: data,
148 |         });
149 |     }
150 | 
151 |     // Actually run transcription
152 |     let output = await transcriber(audio, {
153 |         // Greedy
154 |         top_k: 0,
155 |         do_sample: false,
156 | 
157 |         // Sliding window
158 |         chunk_length_s: isDistilWhisper ? 20 : 30,
159 |         stride_length_s: isDistilWhisper ? 3 : 5,
160 | 
161 |         // Language and task
162 |         language: language,
163 |         task: subtask,
164 | 
165 |         // Return timestamps
166 |         return_timestamps: true,
167 |         force_full_sequences: false,
168 | 
169 |         // Callback functions
170 |         callback_function: callback_function, // after each generation step
171 |         chunk_callback: chunk_callback, // after each chunk is processed
172 |     }).catch((error) => {
173 |         self.postMessage({
174 |             status: "error",
175 |             task: "automatic-speech-recognition",
176 |             data: error,
177 |         });
178 |         return null;
179 |     });
180 | 
181 |     return output;
182 | };
183 | 


--------------------------------------------------------------------------------
/tailwind.config.cjs:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | module.exports = {
 3 |   content: [
 4 |     "./index.html",
 5 |     "./src/**/*.{js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [],
11 | }
12 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ESNext",
 4 |     "useDefineForClassFields": true,
 5 |     "lib": ["DOM", "DOM.Iterable", "ESNext"],
 6 |     "allowJs": false,
 7 |     "skipLibCheck": true,
 8 |     "esModuleInterop": false,
 9 |     "allowSyntheticDefaultImports": true,
10 |     "strict": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "module": "ESNext",
13 |     "moduleResolution": "Node",
14 |     "resolveJsonModule": true,
15 |     "isolatedModules": true,
16 |     "noEmit": true,
17 |     "jsx": "react-jsx"
18 |   },
19 |   "include": ["src"],
20 |   "references": [{ "path": "./tsconfig.node.json" }]
21 | }
22 | 


--------------------------------------------------------------------------------
/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "Node",
 6 |     "allowSyntheticDefaultImports": true
 7 |   },
 8 |   "include": ["vite.config.ts"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite'
 2 | import react from '@vitejs/plugin-react'
 3 | 
 4 | 
 5 | // https://vitejs.dev/config/
 6 | export default defineConfig({
 7 |   plugins: [
 8 |     react()
 9 |   ],
10 | })
11 | 


--------------------------------------------------------------------------------