├── .editorconfig ├── .github ├── FUNDING.yml └── workflows │ ├── main.yml │ └── publish.yml ├── .gitignore ├── .nvmrc ├── .yarnrc.yml ├── LICENSE ├── README.md ├── biome.json ├── docs ├── API.md ├── POLYFILLS.md ├── V3-MIGRATION.md └── logos │ ├── microsoft.png │ └── speechly.png ├── package.json ├── src ├── NativeSpeechRecognition.js ├── RecognitionManager.js ├── SpeechRecognition.js ├── SpeechRecognition.test.js ├── actions.js ├── android.test.js ├── constants.js ├── index.js ├── isAndroid.js ├── isAndroid.test.js ├── reducers.js └── utils.js ├── tests └── vendor │ └── corti.js └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: jamesbrill 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Builds, tests & co 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout tree 12 | uses: actions/checkout@v4 13 | 14 | - name: Set-up Node.js 15 | uses: actions/setup-node@v4 16 | with: 17 | check-latest: true 18 | node-version-file: .nvmrc 19 | 20 | - run: corepack enable 21 | 22 | - run: yarn install --immutable 23 | 24 | - run: yarn lint 25 | 26 | - run: yarn build 27 | 28 | - run: yarn test --coverage 29 | 30 | - name: Generate code coverage report 31 | uses: coverallsapp/github-action@v2 32 | with: 33 | github-token: ${{ github.token }} 34 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | release: 5 | types: 6 | - published 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | publish: 12 | runs-on: ubuntu-latest 13 | 14 | permissions: 15 | contents: read 16 | id-token: write 17 | 18 | steps: 19 | - name: Checkout tree 20 | uses: actions/checkout@v4 21 | 22 | - name: Set-up Node.js 23 | uses: actions/setup-node@v4 24 | with: 25 | check-latest: true 26 | node-version-file: .nvmrc 27 | 28 | - run: corepack enable 29 | 30 | - run: yarn install --immutable 31 | 32 | - run: yarn npm publish --tolerate-republish 33 | env: 34 | YARN_NPM_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.yarn/ 2 | /coverage/ 3 | /dist/ 4 | /node_modules/ 5 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 23 2 | -------------------------------------------------------------------------------- /.yarnrc.yml: -------------------------------------------------------------------------------- 1 | defaultSemverRangePrefix: "" 2 | 3 | nodeLinker: node-modules 4 | 5 | preferInteractive: true 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 James Brill 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # react-speech-recognition 2 | A React hook that converts speech from the microphone to text and makes it available to your React components. 3 | 4 | [![npm version](https://img.shields.io/npm/v/react-speech-recognition.svg)](https://www.npmjs.com/package/react-speech-recognition) 5 | [![npm downloads](https://img.shields.io/npm/dm/react-speech-recognition.svg)](https://www.npmjs.com/package/react-speech-recognition) 6 | [![license](https://img.shields.io/github/license/JamesBrill/react-speech-recognition.svg)](https://opensource.org/licenses/MIT) 7 | [![Coverage Status](https://coveralls.io/repos/github/JamesBrill/react-speech-recognition/badge.svg?branch=master)](https://coveralls.io/github/JamesBrill/react-speech-recognition?branch=master) 8 | 9 | ## How it works 10 | `useSpeechRecognition` is a React hook that gives a component access to a transcript of speech picked up from the user's microphone. 11 | 12 | `SpeechRecognition` manages the global state of the Web Speech API, exposing functions to turn the microphone on and off. 13 | 14 | Under the hood, 15 | it uses [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition). Note that browser support for this API is currently limited, with Chrome having the best experience - see [supported browsers](#supported-browsers) for more information. 16 | 17 | This version requires React 16.8 so that React hooks can be used. If you're used to version 2.x of `react-speech-recognition` or want to use an older version of React, you can see the old README [here](https://github.com/JamesBrill/react-speech-recognition/tree/v2.1.4). If you want to migrate to version 3.x, see the migration guide [here](docs/V3-MIGRATION.md). 18 | 19 | ## Useful links 20 | 21 | * [Basic example](#basic-example) 22 | * [Why you should use a polyfill with this library](#why-you-should-use-a-polyfill-with-this-library) 23 | * [Cross-browser example](#cross-browser-example) 24 | * [Supported browsers](#supported-browsers) 25 | * [Polyfills](docs/POLYFILLS.md) 26 | * [API docs](docs/API.md) 27 | * [Troubleshooting](#troubleshooting) 28 | * [Version 3 migration guide](docs/V3-MIGRATION.md) 29 | * [TypeScript declaration file in DefinitelyTyped](https://github.com/OleksandrYehorov/DefinitelyTyped/blob/master/types/react-speech-recognition/index.d.ts) 30 | 31 | ## Installation 32 | 33 | To install: 34 | 35 | ```shell 36 | npm install --save react-speech-recognition 37 | ``` 38 | 39 | To import in your React code: 40 | 41 | ```js 42 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition' 43 | ``` 44 | 45 | ## Basic example 46 | 47 | The most basic example of a component using this hook would be: 48 | 49 | ```jsx 50 | import React from 'react'; 51 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; 52 | 53 | const Dictaphone = () => { 54 | const { 55 | transcript, 56 | listening, 57 | resetTranscript, 58 | browserSupportsSpeechRecognition 59 | } = useSpeechRecognition(); 60 | 61 | if (!browserSupportsSpeechRecognition) { 62 | return Browser doesn't support speech recognition.; 63 | } 64 | 65 | return ( 66 |
67 |

Microphone: {listening ? 'on' : 'off'}

68 | 69 | 70 | 71 |

{transcript}

72 |
73 | ); 74 | }; 75 | export default Dictaphone; 76 | ``` 77 | 78 | You can see more examples in the example React app attached to this repo. See [Developing](#developing). 79 | 80 | ## Why you should use a polyfill with this library 81 | 82 | By default, speech recognition is not supported in all browsers, with the best native experience being available on desktop Chrome. To avoid the limitations of native browser speech recognition, it's recommended that you combine `react-speech-recognition` with a [speech recognition polyfill](docs/POLYFILLS.md). Why? Here's a comparison with and without polyfills: 83 | * ✅ With a polyfill, your web app will be voice-enabled on all modern browsers (except Internet Explorer) 84 | * ❌ Without a polyfill, your web app will only be voice-enabled on the browsers listed [here](#supported-browsers) 85 | * ✅ With a polyfill, your web app will have a consistent voice experience across browsers 86 | * ❌ Without a polyfill, different native implementations will produce different transcriptions, have different levels of accuracy, and have different formatting styles 87 | * ✅ With a polyfill, you control who is processing your users' voice data 88 | * ❌ Without a polyfill, your users' voice data will be sent to big tech companies like Google or Apple to be transcribed 89 | * ✅ With a polyfill, `react-speech-recognition` will be suitable for use in commercial applications 90 | * ❌ Without a polyfill, `react-speech-recognition` will still be fine for personal projects or use cases where cross-browser support is not needed 91 | 92 | `react-speech-recognition` currently supports polyfills for the following cloud providers: 93 | 94 |
95 | 96 | Microsoft Azure Cognitive Services 97 | 98 |
99 | 100 | ## Cross-browser example 101 | 102 | You can find the full guide for setting up a polyfill [here](docs/POLYFILLS.md). Alternatively, here is a quick example using Azure: 103 | * Install `web-speech-cognitive-services` and `microsoft-cognitiveservices-speech-sdk` in your web app. 104 | * You will need two things to configure this polyfill: the name of the Azure region your Speech Service is deployed in, plus a subscription key (or better still, an authorization token). [This doc](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/overview#find-keys-and-region) explains how to find those 105 | * Here's a component for a push-to-talk button. The basic example above would also work fine. 106 | ```jsx 107 | import React from 'react'; 108 | import createSpeechServicesPonyfill from 'web-speech-cognitive-services'; 109 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; 110 | 111 | const SUBSCRIPTION_KEY = ''; 112 | const REGION = ''; 113 | 114 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({ 115 | credentials: { 116 | region: REGION, 117 | subscriptionKey: SUBSCRIPTION_KEY, 118 | } 119 | }); 120 | SpeechRecognition.applyPolyfill(AzureSpeechRecognition); 121 | 122 | const Dictaphone = () => { 123 | const { 124 | transcript, 125 | resetTranscript, 126 | browserSupportsSpeechRecognition 127 | } = useSpeechRecognition(); 128 | 129 | const startListening = () => SpeechRecognition.startListening({ 130 | continuous: true, 131 | language: 'en-US' 132 | }); 133 | 134 | if (!browserSupportsSpeechRecognition) { 135 | return null; 136 | } 137 | 138 | return ( 139 |
140 | 141 | 142 | 143 |

{transcript}

144 |
145 | ); 146 | }; 147 | export default Dictaphone; 148 | ``` 149 | 150 | ## Detecting browser support for Web Speech API 151 | 152 | If you choose not to use a polyfill, this library still fails gracefully on browsers that don't support speech recognition. It is recommended that you render some fallback content if it is not supported by the user's browser: 153 | 154 | ```js 155 | if (!browserSupportsSpeechRecognition) { 156 | // Render some fallback content 157 | } 158 | ``` 159 | 160 | ### Supported browsers 161 | 162 | Without a polyfill, the Web Speech API is largely only supported by Google browsers. As of May 2021, the following browsers support the Web Speech API: 163 | 164 | * Chrome (desktop): this is by far the smoothest experience 165 | * Safari 14.1 166 | * Microsoft Edge 167 | * Chrome (Android): a word of warning about this platform, which is that there can be an annoying beeping sound when turning the microphone on. This is part of the Android OS and cannot be controlled from the browser 168 | * Android webview 169 | * Samsung Internet 170 | 171 | For all other browsers, you can render fallback content using the `SpeechRecognition.browserSupportsSpeechRecognition` function described above. Alternatively, as mentioned before, you can integrate a [polyfill](docs/POLYFILLS.md). 172 | 173 | ## Detecting when the user denies access to the microphone 174 | 175 | Even if the browser supports the Web Speech API, the user still has to give permission for their microphone to be used before transcription can begin. They are asked for permission when `react-speech-recognition` first tries to start listening. At this point, you can detect when the user denies access via the `isMicrophoneAvailable` state. When this becomes `false`, it's advised that you disable voice-driven features and indicate that microphone access is needed for them to work. 176 | 177 | ```js 178 | if (!isMicrophoneAvailable) { 179 | // Render some fallback content 180 | } 181 | ``` 182 | 183 | ## Controlling the microphone 184 | 185 | Before consuming the transcript, you should be familiar with `SpeechRecognition`, which gives you control over the microphone. The state of the microphone is global, so any functions you call on this object will affect _all_ components using `useSpeechRecognition`. 186 | 187 | ### Turning the microphone on 188 | 189 | To start listening to speech, call the `startListening` function. 190 | 191 | ```js 192 | SpeechRecognition.startListening() 193 | ``` 194 | 195 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned on. 196 | 197 | ### Turning the microphone off 198 | 199 | To turn the microphone off, but still finish processing any speech in progress, call `stopListening`. 200 | 201 | ```js 202 | SpeechRecognition.stopListening() 203 | ``` 204 | 205 | To turn the microphone off, and cancel the processing of any speech in progress, call `abortListening`. 206 | 207 | ```js 208 | SpeechRecognition.abortListening() 209 | ``` 210 | 211 | ## Consuming the microphone transcript 212 | 213 | To make the microphone transcript available in your component, simply add: 214 | 215 | ```js 216 | const { transcript } = useSpeechRecognition() 217 | ``` 218 | 219 | ## Resetting the microphone transcript 220 | 221 | To set the transcript to an empty string, you can call the `resetTranscript` function provided by `useSpeechRecognition`. Note that this is local to your component and does not affect any other components using Speech Recognition. 222 | 223 | ```js 224 | const { resetTranscript } = useSpeechRecognition() 225 | ``` 226 | 227 | ## Commands 228 | 229 | To respond when the user says a particular phrase, you can pass in a list of commands to the `useSpeechRecognition` hook. Each command is an object with the following properties: 230 | - `command`: This is a string or `RegExp` representing the phrase you want to listen for. If you want to use the same callback for multiple commands, you can also pass in an array here, with each value being a string or `RegExp` 231 | - `callback`: The function that is executed when the command is spoken. The last argument that this function receives will always be an object containing the following properties: 232 | - `command`: The command phrase that was matched. This can be useful when you provide an array of command phrases for the same callback and need to know which one triggered it 233 | - `resetTranscript`: A function that sets the transcript to an empty string 234 | - `matchInterim`: Boolean that determines whether "interim" results should be matched against the command. This will make your component respond faster to commands, but also makes false positives more likely - i.e. the command may be detected when it is not spoken. This is `false` by default and should only be set for simple commands. 235 | - `isFuzzyMatch`: Boolean that determines whether the comparison between speech and `command` is based on similarity rather than an exact match. Fuzzy matching is useful for commands that are easy to mispronounce or be misinterpreted by the Speech Recognition engine (e.g. names of places, sports teams, restaurant menu items). It is intended for commands that are string literals without special characters. If `command` is a string with special characters or a `RegExp`, it will be converted to a string without special characters when fuzzy matching. The similarity that is needed to match the command can be configured with `fuzzyMatchingThreshold`. `isFuzzyMatch` is `false` by default. When it is set to `true`, it will pass four arguments to `callback`: 236 | - The value of `command` (with any special characters removed) 237 | - The speech that matched `command` 238 | - The similarity between `command` and the speech 239 | - The object mentioned in the `callback` description above 240 | - `fuzzyMatchingThreshold`: If the similarity of speech to `command` is higher than this value when `isFuzzyMatch` is turned on, the `callback` will be invoked. You should set this only if `isFuzzyMatch` is `true`. It takes values between `0` (will match anything) and `1` (needs an exact match). The default value is `0.8`. 241 | - `bestMatchOnly`: Boolean that, when `isFuzzyMatch` is `true`, determines whether the callback should only be triggered by the command phrase that _best_ matches the speech, rather than being triggered by all matching fuzzy command phrases. This is useful for fuzzy commands with multiple command phrases assigned to the same callback function - you may only want the callback to be triggered once for each spoken command. You should set this only if `isFuzzyMatch` is `true`. The default value is `false`. 242 | 243 | ### Command symbols 244 | 245 | To make commands easier to write, the following symbols are supported: 246 | - Splats: this is just a `*` and will match multi-word text: 247 | - Example: `'I would like to order *'` 248 | - The words that match the splat will be passed into the callback, one argument per splat 249 | - Named variables: this is written `:` and will match a single word: 250 | - Example: `'I am :height metres tall'` 251 | - The one word that matches the named variable will be passed into the callback 252 | - Optional words: this is a phrase wrapped in parentheses `(` and `)`, and is not required to match the command: 253 | - Example: `'Pass the salt (please)'` 254 | - The above example would match both `'Pass the salt'` and `'Pass the salt please'` 255 | 256 | ### Example with commands 257 | 258 | ```jsx 259 | import React, { useState } from 'react' 260 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition' 261 | 262 | const Dictaphone = () => { 263 | const [message, setMessage] = useState('') 264 | const commands = [ 265 | { 266 | command: 'I would like to order *', 267 | callback: (food) => setMessage(`Your order is for: ${food}`) 268 | }, 269 | { 270 | command: 'The weather is :condition today', 271 | callback: (condition) => setMessage(`Today, the weather is ${condition}`) 272 | }, 273 | { 274 | command: 'My top sports are * and *', 275 | callback: (sport1, sport2) => setMessage(`#1: ${sport1}, #2: ${sport2}`) 276 | }, 277 | { 278 | command: 'Pass the salt (please)', 279 | callback: () => setMessage('My pleasure') 280 | }, 281 | { 282 | command: ['Hello', 'Hi'], 283 | callback: ({ command }) => setMessage(`Hi there! You said: "${command}"`), 284 | matchInterim: true 285 | }, 286 | { 287 | command: 'Beijing', 288 | callback: (command, spokenPhrase, similarityRatio) => setMessage(`${command} and ${spokenPhrase} are ${similarityRatio * 100}% similar`), 289 | // If the spokenPhrase is "Benji", the message would be "Beijing and Benji are 40% similar" 290 | isFuzzyMatch: true, 291 | fuzzyMatchingThreshold: 0.2 292 | }, 293 | { 294 | command: ['eat', 'sleep', 'leave'], 295 | callback: (command) => setMessage(`Best matching command: ${command}`), 296 | isFuzzyMatch: true, 297 | fuzzyMatchingThreshold: 0.2, 298 | bestMatchOnly: true 299 | }, 300 | { 301 | command: 'clear', 302 | callback: ({ resetTranscript }) => resetTranscript() 303 | } 304 | ] 305 | 306 | const { transcript, browserSupportsSpeechRecognition } = useSpeechRecognition({ commands }) 307 | 308 | if (!browserSupportsSpeechRecognition) { 309 | return null 310 | } 311 | 312 | return ( 313 |
314 |

{message}

315 |

{transcript}

316 |
317 | ) 318 | } 319 | export default Dictaphone 320 | ``` 321 | 322 | ## Continuous listening 323 | 324 | By default, the microphone will stop listening when the user stops speaking. This reflects the approach taken by "press to talk" buttons on modern devices. 325 | 326 | If you want to listen continuously, set the `continuous` property to `true` when calling `startListening`. The microphone will continue to listen, even after the user has stopped speaking. 327 | 328 | ```js 329 | SpeechRecognition.startListening({ continuous: true }) 330 | ``` 331 | 332 | Be warned that not all browsers have good support for continuous listening. Chrome on Android in particular constantly restarts the microphone, leading to a frustrating and noisy (from the beeping) experience. To avoid enabling continuous listening on these browsers, you can make use of the `browserSupportsContinuousListening` state from `useSpeechRecognition` to detect support for this feature. 333 | 334 | ```js 335 | if (browserSupportsContinuousListening) { 336 | SpeechRecognition.startListening({ continuous: true }) 337 | } else { 338 | // Fallback behaviour 339 | } 340 | ``` 341 | 342 | Alternatively, you can try one of the [polyfills](docs/POLYFILLS.md) to enable continuous listening on these browsers. 343 | 344 | ## Changing language 345 | 346 | To listen for a specific language, you can pass a language tag (e.g. `'zh-CN'` for Chinese) when calling `startListening`. See [here](docs/API.md#language-string) for a list of supported languages. 347 | 348 | ```js 349 | SpeechRecognition.startListening({ language: 'zh-CN' }) 350 | ``` 351 | 352 | ## Troubleshooting 353 | 354 | ### `regeneratorRuntime is not defined` 355 | 356 | If you see the error `regeneratorRuntime is not defined` when using this library, you will need to ensure your web app installs `regenerator-runtime`: 357 | * `npm i --save regenerator-runtime` 358 | * If you are using NextJS, put this at the top of your `_app.js` file: `import 'regenerator-runtime/runtime'`. For any other framework, put it at the top of your `index.js` file 359 | 360 | ### How to use `react-speech-recognition` offline? 361 | 362 | Unfortunately, speech recognition will not function in Chrome when offline. According to the [Web Speech API docs](https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API/Using_the_Web_Speech_API): `On Chrome, using Speech Recognition on a web page involves a server-based recognition engine. Your audio is sent to a web service for recognition processing, so it won't work offline.` 363 | 364 | If you are building an offline web app, you can detect when the browser is offline by inspecting the value of `navigator.onLine`. If it is `true`, you can render the transcript generated by React Speech Recognition. If it is `false`, it's advisable to render offline fallback content that signifies that speech recognition is disabled. The online/offline API is simple to use - you can read how to use it [here](https://developer.mozilla.org/en-US/docs/Web/API/NavigatorOnLine/Online_and_offline_events). 365 | 366 | ## Developing 367 | 368 | You can run an example React app that uses `react-speech-recognition` with: 369 | ```shell 370 | npm i 371 | npm run dev 372 | ``` 373 | 374 | On `http://localhost:3000`, you'll be able to speak into the microphone and see your speech as text on the web page. There are also controls for turning speech recognition on and off. You can make changes to the web app itself in the `example` directory. Any changes you make to the web app or `react-speech-recognition` itself will be live reloaded in the browser. 375 | 376 | ## API docs 377 | 378 | View the API docs [here](docs/API.md) or follow the guide above to learn how to use `react-speech-recognition`. 379 | 380 | ## License 381 | 382 | MIT 383 | -------------------------------------------------------------------------------- /biome.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "node_modules/@biomejs/biome/configuration_schema.json", 3 | "files": { 4 | "ignore": ["package.json"] 5 | }, 6 | "formatter": { 7 | "enabled": true, 8 | "useEditorconfig": true 9 | }, 10 | "linter": { 11 | "enabled": false, 12 | "rules": { 13 | "recommended": true 14 | } 15 | }, 16 | "organizeImports": { 17 | "enabled": true 18 | }, 19 | "vcs": { 20 | "clientKind": "git", 21 | "enabled": true, 22 | "useIgnoreFile": true 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /docs/API.md: -------------------------------------------------------------------------------- 1 | # API docs 2 | 3 | ## Interface 4 | 5 | * [useSpeechRecognition](#useSpeechRecognition) 6 | * [SpeechRecognition](#SpeechRecognition) 7 | 8 | ## useSpeechRecognition 9 | 10 | React hook for consuming speech recorded by the microphone. Import with: 11 | 12 | ```js 13 | import { useSpeechRecognition } from 'react-speech-recognition' 14 | ``` 15 | 16 | ### Input props 17 | 18 | These are passed as an object argument to `useSpeechRecognition`: 19 | 20 | ```js 21 | useSpeechRecognition({ transcribing, clearTranscriptOnListen, commands }) 22 | ``` 23 | 24 | #### transcribing [bool] 25 | 26 | Is this component collecting a transcript or not? This is independent of the global `listening` state of the microphone. `true` by default. 27 | 28 | #### clearTranscriptOnListen [bool] 29 | 30 | Does this component clear its transcript when the microphone is turned on? Has no effect when continuous listening is enabled. `true` by default. 31 | 32 | #### commands [list] 33 | 34 | See [Commands](../README.md#Commands). 35 | 36 | ### Output state 37 | 38 | These are returned from `useSpeechRecognition`: 39 | 40 | ```js 41 | const { 42 | transcript, 43 | interimTranscript, 44 | finalTranscript, 45 | resetTranscript, 46 | listening, 47 | browserSupportsSpeechRecognition, 48 | isMicrophoneAvailable, 49 | } = useSpeechRecognition() 50 | ``` 51 | 52 | #### transcript [string] 53 | 54 | Transcription of all speech that has been spoken into the microphone. Is equivalent to the final transcript followed by the interim transcript, separated by a space. 55 | 56 | #### resetTranscript [function] 57 | 58 | Sets `transcript` to an empty string. 59 | 60 | #### listening [bool] 61 | 62 | If true, the Web Speech API is listening to speech from the microphone. 63 | 64 | #### interimTranscript [string] 65 | 66 | Transcription of speech that the Web Speech API is still processing (i.e. it's still deciding what has just been spoken). 67 | 68 | For the current words being spoken, the interim transcript reflects each successive guess made by the transcription algorithm. When the browser’s confidence in its guess is maximized, it is added to the final transcript. 69 | 70 | The difference between interim and final transcripts can be illustrated by an example over four iterations of the transcription algorithm: 71 | 72 | | Final transcript | Interim transcript | 73 | |-------------------|--------------------| 74 | | 'Hello, I am' | 'jam' | 75 | | 'Hello, I am' | 'jams' | 76 | | 'Hello, I am' | 'James' | 77 | | 'Hello, I am James' | '' | 78 | 79 | #### finalTranscript [string] 80 | 81 | Transcription of speech that the Web Speech API has finished processing. 82 | 83 | #### browserSupportsSpeechRecognition [bool] 84 | 85 | The Web Speech API is not supported on all browsers, so it is recommended that you render some fallback content if it is not supported by the user's browser: 86 | 87 | ```js 88 | if (!browserSupportsSpeechRecognition) { 89 | // Render some fallback content 90 | } 91 | ``` 92 | 93 | #### browserSupportsContinuousListening [bool] 94 | 95 | Continuous listening is not supported on all browsers, so it is recommended that you apply some fallback behaviour if your web app uses continuous listening and is running on a browser that doesn't support it: 96 | 97 | ```js 98 | if (browserSupportsContinuousListening) { 99 | SpeechRecognition.startListening({ continuous: true }) 100 | } else { 101 | // Fallback behaviour 102 | } 103 | ``` 104 | 105 | #### isMicrophoneAvailable [bool] 106 | 107 | The user has to give permission for their microphone to be used before transcription can begin. They are asked for permission when `react-speech-recognition` first tries to start listening. This state will become `false` if they deny access. In this case, it's advised that you disable voice-driven features and indicate that microphone access is needed for them to work. 108 | 109 | ```js 110 | if (!isMicrophoneAvailable) { 111 | // Render some fallback content 112 | } 113 | ``` 114 | 115 | ## SpeechRecognition 116 | 117 | Object providing functions to manage the global state of the microphone. Import with: 118 | 119 | ```js 120 | import SpeechRecognition from 'react-speech-recognition' 121 | ``` 122 | 123 | ### Functions 124 | 125 | #### startListening (async) 126 | 127 | Start listening to speech. 128 | 129 | ```js 130 | SpeechRecognition.startListening() 131 | ``` 132 | 133 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned on. 134 | 135 | It can be called with an options argument. For example: 136 | 137 | ```js 138 | SpeechRecognition.startListening({ 139 | continuous: true, 140 | language: 'zh-CN' 141 | }) 142 | ``` 143 | 144 | The following options are available: 145 | 146 | ##### continuous [bool] 147 | 148 | By default, the microphone will stop listening when the user stops speaking (`continuous: false`). This reflects the approach taken by "press to talk" buttons on modern devices. 149 | 150 | If you want to listen continuously, set the `continuous` property to `true` when calling `startListening`. The microphone will continue to listen, even after the user has stopped speaking. 151 | 152 | ```js 153 | SpeechRecognition.startListening({ continuous: true }) 154 | ``` 155 | 156 | ##### language [string] 157 | 158 | To listen for a specific language, you can pass a language tag (e.g. `'zh-CN'` for Chinese) when calling `startListening`. 159 | 160 | ```js 161 | SpeechRecognition.startListening({ language: 'zh-CN' }) 162 | ``` 163 | 164 | Some known supported languages (based on [this Stack Overflow post](http://stackoverflow.com/a/14302134/338039)): 165 | 166 | * Afrikaans `af` 167 | * Basque `eu` 168 | * Bulgarian `bg` 169 | * Catalan `ca` 170 | * Arabic (Egypt) `ar-EG` 171 | * Arabic (Jordan) `ar-JO` 172 | * Arabic (Kuwait) `ar-KW` 173 | * Arabic (Lebanon) `ar-LB` 174 | * Arabic (Qatar) `ar-QA` 175 | * Arabic (UAE) `ar-AE` 176 | * Arabic (Morocco) `ar-MA` 177 | * Arabic (Iraq) `ar-IQ` 178 | * Arabic (Algeria) `ar-DZ` 179 | * Arabic (Bahrain) `ar-BH` 180 | * Arabic (Lybia) `ar-LY` 181 | * Arabic (Oman) `ar-OM` 182 | * Arabic (Saudi Arabia) `ar-SA` 183 | * Arabic (Tunisia) `ar-TN` 184 | * Arabic (Yemen) `ar-YE` 185 | * Czech `cs` 186 | * Dutch `nl-NL` 187 | * English (Australia) `en-AU` 188 | * English (Canada) `en-CA` 189 | * English (India) `en-IN` 190 | * English (New Zealand) `en-NZ` 191 | * English (South Africa) `en-ZA` 192 | * English(UK) `en-GB` 193 | * English(US) `en-US` 194 | * Finnish `fi` 195 | * French `fr-FR` 196 | * Galician `gl` 197 | * German `de-DE` 198 | * Greek `el-GR` 199 | * Hebrew `he` 200 | * Hungarian `hu` 201 | * Icelandic `is` 202 | * Italian `it-IT` 203 | * Indonesian `id` 204 | * Japanese `ja` 205 | * Korean `ko` 206 | * Latin `la` 207 | * Mandarin Chinese `zh-CN` 208 | * Taiwanese `zh-TW` 209 | * Cantonese `zh-HK` 210 | * Malaysian `ms-MY` 211 | * Norwegian `no-NO` 212 | * Polish `pl` 213 | * Pig Latin `xx-piglatin` 214 | * Portuguese `pt-PT` 215 | * Portuguese (Brasil) `pt-br` 216 | * Romanian `ro-RO` 217 | * Russian `ru` 218 | * Serbian `sr-SP` 219 | * Slovak `sk` 220 | * Spanish (Argentina) `es-AR` 221 | * Spanish (Bolivia) `es-BO` 222 | * Spanish (Chile) `es-CL` 223 | * Spanish (Colombia) `es-CO` 224 | * Spanish (Costa Rica) `es-CR` 225 | * Spanish (Dominican Republic) `es-DO` 226 | * Spanish (Ecuador) `es-EC` 227 | * Spanish (El Salvador) `es-SV` 228 | * Spanish (Guatemala) `es-GT` 229 | * Spanish (Honduras) `es-HN` 230 | * Spanish (Mexico) `es-MX` 231 | * Spanish (Nicaragua) `es-NI` 232 | * Spanish (Panama) `es-PA` 233 | * Spanish (Paraguay) `es-PY` 234 | * Spanish (Peru) `es-PE` 235 | * Spanish (Puerto Rico) `es-PR` 236 | * Spanish (Spain) `es-ES` 237 | * Spanish (US) `es-US` 238 | * Spanish (Uruguay) `es-UY` 239 | * Spanish (Venezuela) `es-VE` 240 | * Swedish `sv-SE` 241 | * Turkish `tr` 242 | * Zulu `zu` 243 | 244 | #### stopListening (async) 245 | 246 | Turn the microphone off, but still finish processing any speech in progress. 247 | 248 | ```js 249 | SpeechRecognition.stopListening() 250 | ``` 251 | 252 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned off. 253 | 254 | #### abortListening (async) 255 | 256 | Turn the microphone off, and cancel the processing of any speech in progress. 257 | 258 | ```js 259 | SpeechRecognition.abortListening() 260 | ``` 261 | 262 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned off. 263 | 264 | #### getRecognition 265 | 266 | This returns the underlying [object](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition) used by Web Speech API. 267 | 268 | #### applyPolyfill 269 | 270 | Replace the native Speech Recognition engine (if there is one) with a custom implementation of the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). If there is a Speech Recognition implementation already listening to the microphone, this will be turned off. See [Polyfills](./POLYFILLS.md) for more information on how to use this. 271 | 272 | ```js 273 | SpeechRecognition.applyPolyfill(SpeechRecognitionPolyfill) 274 | ``` 275 | 276 | #### removePolyfill 277 | 278 | If a polyfill was applied using `applyPolyfill`, reset the Speech Recognition engine to the native implementation. This can be useful when the user switches to a language that is supported by the native engine but not the polyfill engine. 279 | 280 | ```js 281 | SpeechRecognition.removePolyfill() 282 | ``` -------------------------------------------------------------------------------- /docs/POLYFILLS.md: -------------------------------------------------------------------------------- 1 | # Polyfills 2 | 3 | If you want `react-speech-recognition` to work on more browsers than just Chrome, you can integrate a polyfill. This is a piece of code that fills in some missing feature in browsers that don't support it. 4 | 5 | Under the hood, Web Speech API in Chrome uses Google's speech recognition servers. To replicate this functionality elsewhere, you will need to host your own speech recognition service and implement the Web Speech API using that service. That implementation, which is essentially a polyfill, can then be plugged into `react-speech-recognition`. You can write that polyfill yourself, but it's recommended you use one someone else has already made. 6 | 7 | # Basic usage 8 | 9 | The `SpeechRecognition` class exported by `react-speech-recognition` has the method `applyPolyfill`. This can take an implementation of the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). From then on, that implementation will used by `react-speech-recognition` to transcribe speech picked up by the microphone. 10 | 11 | ```js 12 | SpeechRecognition.applyPolyfill(SpeechRecognitionPolyfill) 13 | ``` 14 | 15 | Note that this type of polyfill that does not pollute the global scope is known as a "ponyfill" - the distinction is explained [here](https://ponyfoo.com/articles/polyfills-or-ponyfills). `react-speech-recognition` will also pick up traditional polyfills - just make sure you import them before `react-speech-recognition`. 16 | 17 | Polyfills can be removed using `removePolyfill`. This can be useful when the user switches to a language that is supported by the native Speech Recognition engine but not the polyfill engine. 18 | 19 | ```js 20 | SpeechRecognition.removePolyfill() 21 | ``` 22 | 23 | ## Usage recommendations 24 | * Call this as early as possible to minimise periods where fallback content, which you should render while the polyfill is loading, is rendered. Also note that if there is a Speech Recognition implementation already listening to the microphone, this will be turned off when the polyfill is applied, so make sure the polyfill is applied before rendering any buttons to start listening 25 | * After `applyPolyfill` has been called, `browserSupportsSpeechRecognition` will be `true` on _most_ browsers, but there are still exceptions. Browsers like Internet Explorer do not support the APIs needed for polyfills - in these cases where `browserSupportsSpeechRecognition` is `false`, you should still have some suitable fallback content 26 | * Do not rely on polyfills being perfect implementations of the Speech Recognition specification - make sure you have tested them in different browsers and are aware of their individual limitations 27 | 28 | # Polyfill libraries 29 | 30 | Rather than roll your own, you should use a ready-made polyfill for a cloud provider's speech recognition service. `react-speech-recognition` currently supports polyfills for the following cloud providers: 31 | 32 | ## Microsoft Azure Cognitive Services 33 | 34 | 35 | Microsoft Azure Cognitive Services 36 | 37 | 38 | This is Microsoft's offering for speech recognition (among many other features). The free trial gives you $200 of credit to get started. It's pretty easy to set up - see the [documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/). 39 | 40 | * Polyfill repo: [web-speech-cognitive-services](https://github.com/compulim/web-speech-cognitive-services) 41 | * Polyfill author: [compulim](https://github.com/compulim) 42 | * Requirements: 43 | * Install `web-speech-cognitive-services` and `microsoft-cognitiveservices-speech-sdk` in your web app for this polyfill to function 44 | * You will need two things to configure this polyfill: the name of the Azure region your Speech Service is deployed in, plus a subscription key (or better still, an authorization token). [This doc](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/overview#find-keys-and-region) explains how to find those 45 | 46 | Here is a basic example combining `web-speech-cognitive-services` and `react-speech-recognition` to get you started (do not use this in production; for a production-friendly version, read on below). This code worked with version 7.1.0 of the polyfill in February 2021 - if it has become outdated due to changes in the polyfill or in Azure Cognitive Services, please raise a GitHub issue or PR to get this updated. 47 | 48 | ```jsx 49 | import React from 'react'; 50 | import createSpeechServicesPonyfill from 'web-speech-cognitive-services'; 51 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; 52 | 53 | const SUBSCRIPTION_KEY = ''; 54 | const REGION = ''; 55 | 56 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({ 57 | credentials: { 58 | region: REGION, 59 | subscriptionKey: SUBSCRIPTION_KEY, 60 | } 61 | }); 62 | SpeechRecognition.applyPolyfill(AzureSpeechRecognition); 63 | 64 | const Dictaphone = () => { 65 | const { 66 | transcript, 67 | resetTranscript, 68 | browserSupportsSpeechRecognition 69 | } = useSpeechRecognition(); 70 | 71 | const startListening = () => SpeechRecognition.startListening({ 72 | continuous: true, 73 | language: 'en-US' 74 | }); 75 | 76 | if (!browserSupportsSpeechRecognition) { 77 | return null; 78 | } 79 | 80 | return ( 81 |
82 | 83 | 84 | 85 |

{transcript}

86 |
87 | ); 88 | }; 89 | export default Dictaphone; 90 | ``` 91 | 92 | ### Usage in production 93 | 94 | Your subscription key is a secret that you should not be leaking to your users in production. In other words, it should never be downloaded to your users' browsers. A more secure approach that's recommended by Microsoft is to exchange your subscription key for an authorization token, which has a limited lifetime. You should get this token on your backend and pass this to your frontend React app. Microsoft give guidance on how to do this [here](https://docs.microsoft.com/en-us/azure/cognitive-services/authentication?tabs=powershell). 95 | 96 | Once your React app has the authorization token, it should be passed into the polyfill creator instead of the subscription key like this: 97 | ```js 98 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({ 99 | credentials: { 100 | region: REGION, 101 | authorizationToken: AUTHORIZATION_TOKEN, 102 | } 103 | }); 104 | ``` 105 | 106 | ### Limitations 107 | * There is currently a [bug](https://github.com/compulim/web-speech-cognitive-services/issues/166) in this polyfill's `stop` method when using continuous listening. If you are using `continuous: true`, use `abortListening` to stop the transcription. Otherwise, you can use `stopListening`. 108 | * On Safari and Firefox, an error will be thrown if calling `startListening` to switch to a different language without first calling `stopListening`. It's recommended that you stick to one language and, if you do need to change languages, call `stopListening` first 109 | * If you don't specify a language, Azure will return a 400 response. When calling `startListening`, you will need to explicitly provide one of the language codes defined [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support). For English, use `en-GB` or `en-US` 110 | * Currently untested on iOS (let me know if it works!) 111 | 112 |
113 |
114 | 115 | ## AWS Transcribe 116 | 117 | There is no polyfill for [AWS Transcribe](https://aws.amazon.com/transcribe/) in the ecosystem yet, though a promising project can be found [here](https://github.com/ceuk/speech-recognition-aws-polyfill). 118 | 119 | # Providing your own polyfill 120 | 121 | If you want to roll your own implementation of the Speech Recognition API, follow the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). You should implement at least the following for `react-speech-recognition` to work: 122 | * `continuous` (property) 123 | * `lang` (property) 124 | * `interimResults` (property) 125 | * `onresult` (property). On the events received, the following properties are used: 126 | * `event.resultIndex` 127 | * `event.results[i].isFinal` 128 | * `event.results[i][0].transcript` 129 | * `event.results[i][0].confidence` 130 | * `onend` (property) 131 | * `onerror` (property) 132 | * `start` (method) 133 | * `stop` (method) 134 | * `abort` (method) 135 | 136 | -------------------------------------------------------------------------------- /docs/V3-MIGRATION.md: -------------------------------------------------------------------------------- 1 | # Migrating from v2 to v3 2 | 3 | v3 makes use of React hooks to simplify the consumption of `react-speech-recognition`: 4 | * Replacing the higher order component with a React hook 5 | * Introducing commands, functions that get executed when the user says a particular phrase 6 | * A clear separation between all parts of `react-speech-recognition` that are global (e.g. whether the microphone is listening or not) and local (e.g. transcripts). This makes it possible to have multiple components consuming the global microphone input while maintaining their own transcripts and commands 7 | * Some default prop values have changed so check those out below 8 | 9 | ## The original Dictaphone example 10 | 11 | ### In v2 12 | 13 | ```jsx 14 | import React, { Component } from "react"; 15 | import PropTypes from "prop-types"; 16 | import SpeechRecognition from "react-speech-recognition"; 17 | 18 | const propTypes = { 19 | // Props injected by SpeechRecognition 20 | transcript: PropTypes.string, 21 | resetTranscript: PropTypes.func, 22 | browserSupportsSpeechRecognition: PropTypes.bool 23 | }; 24 | 25 | const Dictaphone = ({ 26 | transcript, 27 | resetTranscript, 28 | browserSupportsSpeechRecognition 29 | }) => { 30 | if (!browserSupportsSpeechRecognition) { 31 | return null; 32 | } 33 | 34 | return ( 35 |
36 | 37 | {transcript} 38 |
39 | ); 40 | }; 41 | 42 | Dictaphone.propTypes = propTypes; 43 | 44 | export default SpeechRecognition(Dictaphone); 45 | ``` 46 | 47 | ### In v3 48 | 49 | Automatically enabling the microphone without any user input is no longer encouraged as most browsers now prevent this. This is due to concerns about privacy - users don't necessarily want their browser listening to them without being asked. The "auto-start" has been replaced with a button to trigger the microphone being turned on. 50 | 51 | ```jsx 52 | import React, { useEffect } from 'react' 53 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition' 54 | 55 | const Dictaphone = () => { 56 | const { transcript, resetTranscript, browserSupportsSpeechRecognition } = useSpeechRecognition() 57 | const startListening = () => SpeechRecognition.startListening({ continuous: true }) 58 | 59 | if (!browserSupportsSpeechRecognition) { 60 | return null 61 | } 62 | 63 | return ( 64 |
65 | 66 | 67 |

{transcript}

68 |
69 | ) 70 | } 71 | export default Dictaphone 72 | ``` 73 | 74 | ## autoStart 75 | 76 | This was a global option in v2 that would cause the microphone to start listening from the beginning by default. In v3, the microphone is initially turned off by default. 77 | 78 | Automatically enabling the microphone without any user input is no longer encouraged as most browsers now prevent this. This is due to concerns about privacy - users don't necessarily want their browser listening to them without being asked. The preferred approach is to have a button that starts the microphone when clicked. 79 | 80 | However, if you still want an auto-start feature for the purposes of testing in Chrome, which still allows it, you can do the following: the microphone can be turned on when your component first renders by either `useEffect` if you're using hooks or `componentDidMount` if you're still using class components. It is recommended that you do this close to the root of your application as this affects global state. 81 | 82 | ```js 83 | useEffect(() => { 84 | SpeechRecognition.startListening({ continuous: true }) 85 | }, []); 86 | ``` 87 | 88 | ## continuous 89 | 90 | This was another global option in v2 that would by default have the microphone permanently listen to the user, even when they finished speaking. This default behaviour did not match the most common usage pattern, which is to use `react-speech-recognition` for "press to talk" buttons that stop listening once a command has been spoken. 91 | 92 | `continuous` is now an option that can be passed to `SpeechRecognition.startListening`. It is `false` by default, but can be overridden like so: 93 | 94 | ```js 95 | SpeechRecognition.startListening({ continuous: true }) 96 | ``` 97 | 98 | ## clearTranscriptOnListen 99 | 100 | This is a new prop in v3 that is passed into `useSpeechRecognition` from the consumer. Its default value makes a subtle change to the previous behaviour. When `continuous` was set to `false` in v2, the transcript would not be reset when the microphone started listening again. `clearTranscriptOnListen` changes that, clearing the component's transcript at the beginning of every new discontinuous speech. To replicate the old behaviour, this can be turned off when passing props into `useSpeechRecognition`: 101 | 102 | ```js 103 | const { transcript } = useSpeechRecognition({ clearTranscriptOnListen: false }) 104 | ``` 105 | 106 | ## Injected props 107 | 108 | `SpeechRecognition` used to inject props into components in v2. These props are still available, but in different forms. 109 | 110 | ### transcript 111 | 112 | This is now state returned by `useSpeechRecognition`. This transcript is local to the component using the hook. 113 | 114 | ### resetTranscript 115 | 116 | This is now state returned by `useSpeechRecognition`. This only resets the component's transcript, not any global state. 117 | 118 | ### startListening 119 | 120 | This is now available as `SpeechRecognition.startListening`, an asynchronous function documented [here](API.md#startListening-async). 121 | 122 | ### stopListening 123 | 124 | This is now available as `SpeechRecognition.stopListening`, documented [here](API.md#stopListening). 125 | 126 | ### abortListening 127 | 128 | This is now available as `SpeechRecognition.abortListening`, documented [here](API.md#abortListening). 129 | 130 | ### browserSupportsSpeechRecognition 131 | 132 | This is now available as the function `SpeechRecognition.browserSupportsSpeechRecognition`, documented [here](API.md#browserSupportsSpeechRecognition). 133 | 134 | ### listening 135 | 136 | This is now state returned by `useSpeechRecognition`. This is the global listening state. 137 | 138 | ### interimTranscript 139 | 140 | This is now state returned by `useSpeechRecognition`. This transcript is local to the component using the hook. 141 | 142 | ### finalTranscript 143 | 144 | This is now state returned by `useSpeechRecognition`. This transcript is local to the component using the hook. 145 | 146 | ### recognition 147 | 148 | This is now returned by the function `SpeechRecognition.getRecognition`, documented [here](API.md#getRecognition). 149 | -------------------------------------------------------------------------------- /docs/logos/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JamesBrill/react-speech-recognition/849a908a8317e7e56739d937faf51be347c631b0/docs/logos/microsoft.png -------------------------------------------------------------------------------- /docs/logos/speechly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JamesBrill/react-speech-recognition/849a908a8317e7e56739d937faf51be347c631b0/docs/logos/speechly.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "react-speech-recognition", 3 | "version": "4.0.1", 4 | "description": "Speech recognition for your React app", 5 | "keywords": [ 6 | "react", 7 | "reactjs", 8 | "speech", 9 | "recognition" 10 | ], 11 | "homepage": "https://webspeechrecognition.com", 12 | "repository": "github:JamesBrill/react-speech-recognition", 13 | "license": "MIT", 14 | "author": "James Brill ", 15 | "type": "module", 16 | "exports": { 17 | ".": { 18 | "import": { 19 | "default": "./dist/index.js" 20 | }, 21 | "require": { 22 | "default": "./dist/index.cjs" 23 | } 24 | } 25 | }, 26 | "main": "./dist/index.cjs", 27 | "module": "./dist/index.js", 28 | "files": [ 29 | "dist" 30 | ], 31 | "scripts": { 32 | "build": "bunchee", 33 | "fix": "biome check --fix", 34 | "lint": "biome check", 35 | "prepack": "bunchee", 36 | "test": "vitest" 37 | }, 38 | "dependencies": { 39 | "lodash.debounce": "^4.0.8" 40 | }, 41 | "devDependencies": { 42 | "@biomejs/biome": "1.9.4", 43 | "@testing-library/react-hooks": "3.7.0", 44 | "@vitest/coverage-v8": "3.1.2", 45 | "bunchee": "6.5.1", 46 | "jsdom": "26.1.0", 47 | "react": "16.14.0", 48 | "react-dom": "16.14.0", 49 | "react-test-renderer": "16.14.0", 50 | "typescript": "5.8.3", 51 | "vitest": "3.1.2" 52 | }, 53 | "peerDependencies": { 54 | "react": ">=16.8.0" 55 | }, 56 | "packageManager": "yarn@4.9.1", 57 | "publishConfig": { 58 | "access": "public", 59 | "provenance": true, 60 | "registry": "https://registry.npmjs.org" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/NativeSpeechRecognition.js: -------------------------------------------------------------------------------- 1 | const NativeSpeechRecognition = 2 | typeof window !== "undefined" && 3 | (window.SpeechRecognition || 4 | window.webkitSpeechRecognition || 5 | window.mozSpeechRecognition || 6 | window.msSpeechRecognition || 7 | window.oSpeechRecognition); 8 | 9 | export const isNative = (SpeechRecognition) => 10 | SpeechRecognition === NativeSpeechRecognition; 11 | 12 | export default NativeSpeechRecognition; 13 | -------------------------------------------------------------------------------- /src/RecognitionManager.js: -------------------------------------------------------------------------------- 1 | import debounce from "lodash.debounce"; 2 | import { isNative } from "./NativeSpeechRecognition"; 3 | import isAndroid from "./isAndroid"; 4 | import { browserSupportsPolyfills, concatTranscripts } from "./utils"; 5 | 6 | export default class RecognitionManager { 7 | constructor(SpeechRecognition) { 8 | this.recognition = null; 9 | this.pauseAfterDisconnect = false; 10 | this.interimTranscript = ""; 11 | this.finalTranscript = ""; 12 | this.listening = false; 13 | this.isMicrophoneAvailable = true; 14 | this.subscribers = {}; 15 | this.onStopListening = () => {}; 16 | this.previousResultWasFinalOnly = false; 17 | 18 | this.resetTranscript = this.resetTranscript.bind(this); 19 | this.startListening = this.startListening.bind(this); 20 | this.stopListening = this.stopListening.bind(this); 21 | this.abortListening = this.abortListening.bind(this); 22 | this.setSpeechRecognition = this.setSpeechRecognition.bind(this); 23 | this.disableRecognition = this.disableRecognition.bind(this); 24 | 25 | this.setSpeechRecognition(SpeechRecognition); 26 | 27 | if (isAndroid()) { 28 | this.updateFinalTranscript = debounce(this.updateFinalTranscript, 250, { 29 | leading: true, 30 | }); 31 | } 32 | } 33 | 34 | setSpeechRecognition(SpeechRecognition) { 35 | const browserSupportsRecogniser = 36 | !!SpeechRecognition && 37 | (isNative(SpeechRecognition) || browserSupportsPolyfills()); 38 | if (browserSupportsRecogniser) { 39 | this.disableRecognition(); 40 | this.recognition = new SpeechRecognition(); 41 | this.recognition.continuous = false; 42 | this.recognition.interimResults = true; 43 | this.recognition.onresult = this.updateTranscript.bind(this); 44 | this.recognition.onend = this.onRecognitionDisconnect.bind(this); 45 | this.recognition.onerror = this.onError.bind(this); 46 | } 47 | this.emitBrowserSupportsSpeechRecognitionChange(browserSupportsRecogniser); 48 | } 49 | 50 | subscribe(id, callbacks) { 51 | this.subscribers[id] = callbacks; 52 | } 53 | 54 | unsubscribe(id) { 55 | delete this.subscribers[id]; 56 | } 57 | 58 | emitListeningChange(listening) { 59 | this.listening = listening; 60 | Object.keys(this.subscribers).forEach((id) => { 61 | const { onListeningChange } = this.subscribers[id]; 62 | onListeningChange(listening); 63 | }); 64 | } 65 | 66 | emitMicrophoneAvailabilityChange(isMicrophoneAvailable) { 67 | this.isMicrophoneAvailable = isMicrophoneAvailable; 68 | Object.keys(this.subscribers).forEach((id) => { 69 | const { onMicrophoneAvailabilityChange } = this.subscribers[id]; 70 | onMicrophoneAvailabilityChange(isMicrophoneAvailable); 71 | }); 72 | } 73 | 74 | emitTranscriptChange(interimTranscript, finalTranscript) { 75 | Object.keys(this.subscribers).forEach((id) => { 76 | const { onTranscriptChange } = this.subscribers[id]; 77 | onTranscriptChange(interimTranscript, finalTranscript); 78 | }); 79 | } 80 | 81 | emitClearTranscript() { 82 | Object.keys(this.subscribers).forEach((id) => { 83 | const { onClearTranscript } = this.subscribers[id]; 84 | onClearTranscript(); 85 | }); 86 | } 87 | 88 | emitBrowserSupportsSpeechRecognitionChange( 89 | browserSupportsSpeechRecognitionChange, 90 | ) { 91 | Object.keys(this.subscribers).forEach((id) => { 92 | const { 93 | onBrowserSupportsSpeechRecognitionChange, 94 | onBrowserSupportsContinuousListeningChange, 95 | } = this.subscribers[id]; 96 | onBrowserSupportsSpeechRecognitionChange( 97 | browserSupportsSpeechRecognitionChange, 98 | ); 99 | onBrowserSupportsContinuousListeningChange( 100 | browserSupportsSpeechRecognitionChange, 101 | ); 102 | }); 103 | } 104 | 105 | disconnect(disconnectType) { 106 | if (this.recognition && this.listening) { 107 | switch (disconnectType) { 108 | case "ABORT": 109 | this.pauseAfterDisconnect = true; 110 | this.abort(); 111 | break; 112 | case "RESET": 113 | this.pauseAfterDisconnect = false; 114 | this.abort(); 115 | break; 116 | case "STOP": 117 | default: 118 | this.pauseAfterDisconnect = true; 119 | this.stop(); 120 | } 121 | } 122 | } 123 | 124 | disableRecognition() { 125 | if (this.recognition) { 126 | this.recognition.onresult = () => {}; 127 | this.recognition.onend = () => {}; 128 | this.recognition.onerror = () => {}; 129 | if (this.listening) { 130 | this.stopListening(); 131 | } 132 | } 133 | } 134 | 135 | onError(event) { 136 | if (event && event.error && event.error === "not-allowed") { 137 | this.emitMicrophoneAvailabilityChange(false); 138 | this.disableRecognition(); 139 | } 140 | } 141 | 142 | onRecognitionDisconnect() { 143 | this.onStopListening(); 144 | this.listening = false; 145 | if (this.pauseAfterDisconnect) { 146 | this.emitListeningChange(false); 147 | } else if (this.recognition) { 148 | if (this.recognition.continuous) { 149 | this.startListening({ continuous: this.recognition.continuous }); 150 | } else { 151 | this.emitListeningChange(false); 152 | } 153 | } 154 | this.pauseAfterDisconnect = false; 155 | } 156 | 157 | updateTranscript({ results, resultIndex }) { 158 | const currentIndex = 159 | resultIndex === undefined ? results.length - 1 : resultIndex; 160 | this.interimTranscript = ""; 161 | this.finalTranscript = ""; 162 | for (let i = currentIndex; i < results.length; ++i) { 163 | if ( 164 | results[i].isFinal && 165 | (!isAndroid() || results[i][0].confidence > 0) 166 | ) { 167 | this.updateFinalTranscript(results[i][0].transcript); 168 | } else { 169 | this.interimTranscript = concatTranscripts( 170 | this.interimTranscript, 171 | results[i][0].transcript, 172 | ); 173 | } 174 | } 175 | let isDuplicateResult = false; 176 | if (this.interimTranscript === "" && this.finalTranscript !== "") { 177 | if (this.previousResultWasFinalOnly) { 178 | isDuplicateResult = true; 179 | } 180 | this.previousResultWasFinalOnly = true; 181 | } else { 182 | this.previousResultWasFinalOnly = false; 183 | } 184 | if (!isDuplicateResult) { 185 | this.emitTranscriptChange(this.interimTranscript, this.finalTranscript); 186 | } 187 | } 188 | 189 | updateFinalTranscript(newFinalTranscript) { 190 | this.finalTranscript = concatTranscripts( 191 | this.finalTranscript, 192 | newFinalTranscript, 193 | ); 194 | } 195 | 196 | resetTranscript() { 197 | this.disconnect("RESET"); 198 | } 199 | 200 | async startListening({ continuous = false, language } = {}) { 201 | if (!this.recognition) { 202 | return; 203 | } 204 | 205 | const isContinuousChanged = continuous !== this.recognition.continuous; 206 | const isLanguageChanged = language && language !== this.recognition.lang; 207 | if (isContinuousChanged || isLanguageChanged) { 208 | if (this.listening) { 209 | await this.stopListening(); 210 | } 211 | this.recognition.continuous = isContinuousChanged 212 | ? continuous 213 | : this.recognition.continuous; 214 | this.recognition.lang = isLanguageChanged 215 | ? language 216 | : this.recognition.lang; 217 | } 218 | if (!this.listening) { 219 | if (!this.recognition.continuous) { 220 | this.resetTranscript(); 221 | this.emitClearTranscript(); 222 | } 223 | try { 224 | await this.start(); 225 | this.emitListeningChange(true); 226 | } catch (e) { 227 | // DOMExceptions indicate a redundant microphone start - safe to swallow 228 | if (!(e instanceof DOMException)) { 229 | this.emitMicrophoneAvailabilityChange(false); 230 | } 231 | } 232 | } 233 | } 234 | 235 | async abortListening() { 236 | this.disconnect("ABORT"); 237 | this.emitListeningChange(false); 238 | await new Promise((resolve) => { 239 | this.onStopListening = resolve; 240 | }); 241 | } 242 | 243 | async stopListening() { 244 | this.disconnect("STOP"); 245 | this.emitListeningChange(false); 246 | await new Promise((resolve) => { 247 | this.onStopListening = resolve; 248 | }); 249 | } 250 | 251 | getRecognition() { 252 | return this.recognition; 253 | } 254 | 255 | async start() { 256 | if (this.recognition && !this.listening) { 257 | await this.recognition.start(); 258 | this.listening = true; 259 | } 260 | } 261 | 262 | stop() { 263 | if (this.recognition && this.listening) { 264 | this.recognition.stop(); 265 | this.listening = false; 266 | } 267 | } 268 | 269 | abort() { 270 | if (this.recognition && this.listening) { 271 | this.recognition.abort(); 272 | this.listening = false; 273 | } 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/SpeechRecognition.js: -------------------------------------------------------------------------------- 1 | import { useCallback, useEffect, useReducer, useRef, useState } from "react"; 2 | import NativeSpeechRecognition from "./NativeSpeechRecognition"; 3 | import RecognitionManager from "./RecognitionManager"; 4 | import { appendTranscript, clearTranscript } from "./actions"; 5 | import isAndroid from "./isAndroid"; 6 | import { transcriptReducer } from "./reducers"; 7 | import { 8 | browserSupportsPolyfills, 9 | commandToRegExp, 10 | compareTwoStringsUsingDiceCoefficient, 11 | concatTranscripts, 12 | } from "./utils"; 13 | 14 | let _browserSupportsSpeechRecognition = !!NativeSpeechRecognition; 15 | let _browserSupportsContinuousListening = 16 | _browserSupportsSpeechRecognition && !isAndroid(); 17 | let recognitionManager; 18 | 19 | const useSpeechRecognition = ({ 20 | transcribing = true, 21 | clearTranscriptOnListen = true, 22 | commands = [], 23 | } = {}) => { 24 | const [recognitionManager] = useState( 25 | SpeechRecognition.getRecognitionManager(), 26 | ); 27 | const [ 28 | browserSupportsSpeechRecognition, 29 | setBrowserSupportsSpeechRecognition, 30 | ] = useState(_browserSupportsSpeechRecognition); 31 | const [ 32 | browserSupportsContinuousListening, 33 | setBrowserSupportsContinuousListening, 34 | ] = useState(_browserSupportsContinuousListening); 35 | const [{ interimTranscript, finalTranscript }, dispatch] = useReducer( 36 | transcriptReducer, 37 | { 38 | interimTranscript: recognitionManager.interimTranscript, 39 | finalTranscript: "", 40 | }, 41 | ); 42 | const [listening, setListening] = useState(recognitionManager.listening); 43 | const [isMicrophoneAvailable, setMicrophoneAvailable] = useState( 44 | recognitionManager.isMicrophoneAvailable, 45 | ); 46 | const commandsRef = useRef(commands); 47 | commandsRef.current = commands; 48 | 49 | const dispatchClearTranscript = () => { 50 | dispatch(clearTranscript()); 51 | }; 52 | 53 | const resetTranscript = useCallback(() => { 54 | recognitionManager.resetTranscript(); 55 | dispatchClearTranscript(); 56 | }, [recognitionManager]); 57 | 58 | const testFuzzyMatch = (command, input, fuzzyMatchingThreshold) => { 59 | const commandToString = 60 | typeof command === "object" ? command.toString() : command; 61 | const commandWithoutSpecials = commandToString 62 | .replace(/[&/\\#,+()!$~%.'":*?<>{}]/g, "") 63 | .replace(/ +/g, " ") 64 | .trim(); 65 | const howSimilar = compareTwoStringsUsingDiceCoefficient( 66 | commandWithoutSpecials, 67 | input, 68 | ); 69 | if (howSimilar >= fuzzyMatchingThreshold) { 70 | return { 71 | command, 72 | commandWithoutSpecials, 73 | howSimilar, 74 | isFuzzyMatch: true, 75 | }; 76 | } 77 | return null; 78 | }; 79 | 80 | const testMatch = (command, input) => { 81 | const pattern = commandToRegExp(command); 82 | const result = pattern.exec(input); 83 | if (result) { 84 | return { 85 | command, 86 | parameters: result.slice(1), 87 | }; 88 | } 89 | return null; 90 | }; 91 | 92 | const matchCommands = useCallback( 93 | (newInterimTranscript, newFinalTranscript) => { 94 | commandsRef.current.forEach( 95 | ({ 96 | command, 97 | callback, 98 | matchInterim = false, 99 | isFuzzyMatch = false, 100 | fuzzyMatchingThreshold = 0.8, 101 | bestMatchOnly = false, 102 | }) => { 103 | const input = 104 | !newFinalTranscript && matchInterim 105 | ? newInterimTranscript.trim() 106 | : newFinalTranscript.trim(); 107 | const subcommands = Array.isArray(command) ? command : [command]; 108 | const results = subcommands 109 | .map((subcommand) => { 110 | if (isFuzzyMatch) { 111 | return testFuzzyMatch( 112 | subcommand, 113 | input, 114 | fuzzyMatchingThreshold, 115 | ); 116 | } 117 | return testMatch(subcommand, input); 118 | }) 119 | .filter((x) => x); 120 | if (isFuzzyMatch && bestMatchOnly && results.length >= 2) { 121 | results.sort((a, b) => b.howSimilar - a.howSimilar); 122 | const { command, commandWithoutSpecials, howSimilar } = results[0]; 123 | callback(commandWithoutSpecials, input, howSimilar, { 124 | command, 125 | resetTranscript, 126 | }); 127 | } else { 128 | results.forEach((result) => { 129 | if (result.isFuzzyMatch) { 130 | const { command, commandWithoutSpecials, howSimilar } = result; 131 | callback(commandWithoutSpecials, input, howSimilar, { 132 | command, 133 | resetTranscript, 134 | }); 135 | } else { 136 | const { command, parameters } = result; 137 | callback(...parameters, { command, resetTranscript }); 138 | } 139 | }); 140 | } 141 | }, 142 | ); 143 | }, 144 | [resetTranscript], 145 | ); 146 | 147 | const handleTranscriptChange = useCallback( 148 | (newInterimTranscript, newFinalTranscript) => { 149 | if (transcribing) { 150 | dispatch(appendTranscript(newInterimTranscript, newFinalTranscript)); 151 | } 152 | matchCommands(newInterimTranscript, newFinalTranscript); 153 | }, 154 | [matchCommands, transcribing], 155 | ); 156 | 157 | const handleClearTranscript = useCallback(() => { 158 | if (clearTranscriptOnListen) { 159 | dispatchClearTranscript(); 160 | } 161 | }, [clearTranscriptOnListen]); 162 | 163 | useEffect(() => { 164 | const id = SpeechRecognition.counter; 165 | SpeechRecognition.counter += 1; 166 | const callbacks = { 167 | onListeningChange: setListening, 168 | onMicrophoneAvailabilityChange: setMicrophoneAvailable, 169 | onTranscriptChange: handleTranscriptChange, 170 | onClearTranscript: handleClearTranscript, 171 | onBrowserSupportsSpeechRecognitionChange: 172 | setBrowserSupportsSpeechRecognition, 173 | onBrowserSupportsContinuousListeningChange: 174 | setBrowserSupportsContinuousListening, 175 | }; 176 | recognitionManager.subscribe(id, callbacks); 177 | 178 | return () => { 179 | recognitionManager.unsubscribe(id); 180 | }; 181 | }, [ 182 | transcribing, 183 | clearTranscriptOnListen, 184 | recognitionManager, 185 | handleTranscriptChange, 186 | handleClearTranscript, 187 | ]); 188 | 189 | const transcript = concatTranscripts(finalTranscript, interimTranscript); 190 | return { 191 | transcript, 192 | interimTranscript, 193 | finalTranscript, 194 | listening, 195 | isMicrophoneAvailable, 196 | resetTranscript, 197 | browserSupportsSpeechRecognition, 198 | browserSupportsContinuousListening, 199 | }; 200 | }; 201 | const SpeechRecognition = { 202 | counter: 0, 203 | applyPolyfill: (PolyfillSpeechRecognition) => { 204 | if (recognitionManager) { 205 | recognitionManager.setSpeechRecognition(PolyfillSpeechRecognition); 206 | } else { 207 | recognitionManager = new RecognitionManager(PolyfillSpeechRecognition); 208 | } 209 | const browserSupportsPolyfill = 210 | !!PolyfillSpeechRecognition && browserSupportsPolyfills(); 211 | _browserSupportsSpeechRecognition = browserSupportsPolyfill; 212 | _browserSupportsContinuousListening = browserSupportsPolyfill; 213 | }, 214 | removePolyfill: () => { 215 | if (recognitionManager) { 216 | recognitionManager.setSpeechRecognition(NativeSpeechRecognition); 217 | } else { 218 | recognitionManager = new RecognitionManager(NativeSpeechRecognition); 219 | } 220 | _browserSupportsSpeechRecognition = !!NativeSpeechRecognition; 221 | _browserSupportsContinuousListening = 222 | _browserSupportsSpeechRecognition && !isAndroid(); 223 | }, 224 | getRecognitionManager: () => { 225 | if (!recognitionManager) { 226 | recognitionManager = new RecognitionManager(NativeSpeechRecognition); 227 | } 228 | return recognitionManager; 229 | }, 230 | getRecognition: () => { 231 | const recognitionManager = SpeechRecognition.getRecognitionManager(); 232 | return recognitionManager.getRecognition(); 233 | }, 234 | startListening: async ({ continuous, language } = {}) => { 235 | const recognitionManager = SpeechRecognition.getRecognitionManager(); 236 | await recognitionManager.startListening({ continuous, language }); 237 | }, 238 | stopListening: async () => { 239 | const recognitionManager = SpeechRecognition.getRecognitionManager(); 240 | await recognitionManager.stopListening(); 241 | }, 242 | abortListening: async () => { 243 | const recognitionManager = SpeechRecognition.getRecognitionManager(); 244 | await recognitionManager.abortListening(); 245 | }, 246 | browserSupportsSpeechRecognition: () => _browserSupportsSpeechRecognition, 247 | browserSupportsContinuousListening: () => _browserSupportsContinuousListening, 248 | }; 249 | 250 | export { useSpeechRecognition }; 251 | export default SpeechRecognition; 252 | -------------------------------------------------------------------------------- /src/SpeechRecognition.test.js: -------------------------------------------------------------------------------- 1 | // @vitest-environment jsdom 2 | import { renderHook } from "@testing-library/react-hooks"; 3 | import { beforeEach, describe, expect, test, vi } from "vitest"; 4 | import { CortiSpeechRecognition } from "../tests/vendor/corti.js"; 5 | import RecognitionManager from "./RecognitionManager.js"; 6 | import SpeechRecognition, { 7 | useSpeechRecognition, 8 | } from "./SpeechRecognition.js"; 9 | import isAndroid from "./isAndroid.js"; 10 | import { browserSupportsPolyfills } from "./utils.js"; 11 | 12 | vi.mock("./isAndroid"); 13 | vi.mock("./utils", async () => { 14 | return { 15 | ...(await vi.importActual("./utils")), 16 | browserSupportsPolyfills: vi.fn(), 17 | }; 18 | }); 19 | 20 | const mockRecognitionManager = () => { 21 | const recognitionManager = new RecognitionManager(window.SpeechRecognition); 22 | SpeechRecognition.getRecognitionManager = () => recognitionManager; 23 | return recognitionManager; 24 | }; 25 | 26 | const mockMicrophoneUnavailable = () => { 27 | const mockSpeechRecognition = vi.fn().mockImplementation(() => ({ 28 | start: async () => Promise.reject(new Error()), 29 | })); 30 | SpeechRecognition.applyPolyfill(mockSpeechRecognition); 31 | const recognitionManager = new RecognitionManager(mockSpeechRecognition); 32 | SpeechRecognition.getRecognitionManager = () => recognitionManager; 33 | }; 34 | 35 | describe("SpeechRecognition", () => { 36 | beforeEach(() => { 37 | isAndroid.mockClear(); 38 | browserSupportsPolyfills.mockImplementation(() => true); 39 | SpeechRecognition.applyPolyfill(CortiSpeechRecognition); 40 | }); 41 | 42 | test("sets applyPolyfill correctly", () => { 43 | const MockSpeechRecognition = class {}; 44 | 45 | expect( 46 | SpeechRecognition.getRecognition() instanceof CortiSpeechRecognition, 47 | ).toBe(true); 48 | 49 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 50 | 51 | expect(SpeechRecognition.browserSupportsSpeechRecognition()).toBe(true); 52 | expect( 53 | SpeechRecognition.getRecognition() instanceof MockSpeechRecognition, 54 | ).toBe(true); 55 | }); 56 | 57 | test("does not collect transcripts from previous speech recognition after polyfill applied", async () => { 58 | const cortiSpeechRecognition = SpeechRecognition.getRecognition(); 59 | 60 | const { result } = renderHook(() => useSpeechRecognition()); 61 | const speech = "This is a test"; 62 | await SpeechRecognition.startListening(); 63 | SpeechRecognition.applyPolyfill(class {}); 64 | cortiSpeechRecognition.say(speech); 65 | 66 | const { transcript, interimTranscript, finalTranscript } = result.current; 67 | expect(transcript).toBe(""); 68 | expect(interimTranscript).toBe(""); 69 | expect(finalTranscript).toBe(""); 70 | }); 71 | 72 | test("stops listening after polyfill applied", async () => { 73 | const { result } = renderHook(() => useSpeechRecognition()); 74 | await SpeechRecognition.startListening(); 75 | SpeechRecognition.applyPolyfill(class {}); 76 | 77 | const { listening } = result.current; 78 | expect(listening).toBe(false); 79 | }); 80 | 81 | test("sets browserSupportsContinuousListening to false when using polyfill on unsupported browser", () => { 82 | browserSupportsPolyfills.mockImplementation(() => false); 83 | const MockSpeechRecognition = class {}; 84 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 85 | 86 | const { result } = renderHook(() => useSpeechRecognition()); 87 | const { browserSupportsContinuousListening } = result.current; 88 | 89 | expect(browserSupportsContinuousListening).toBe(false); 90 | expect(SpeechRecognition.browserSupportsContinuousListening()).toBe(false); 91 | }); 92 | 93 | test("sets browserSupportsSpeechRecognition to false when using polyfill on unsupported browser", () => { 94 | browserSupportsPolyfills.mockImplementation(() => false); 95 | const MockSpeechRecognition = class {}; 96 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 97 | 98 | const { result } = renderHook(() => useSpeechRecognition()); 99 | const { browserSupportsSpeechRecognition } = result.current; 100 | 101 | expect(browserSupportsSpeechRecognition).toBe(false); 102 | expect(SpeechRecognition.browserSupportsSpeechRecognition()).toBe(false); 103 | }); 104 | 105 | test("reverts to native recognition when removePolyfill called", () => { 106 | const MockSpeechRecognition = class {}; 107 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 108 | 109 | expect( 110 | SpeechRecognition.getRecognition() instanceof MockSpeechRecognition, 111 | ).toBe(true); 112 | 113 | browserSupportsPolyfills.mockImplementation(() => false); 114 | SpeechRecognition.applyPolyfill(); 115 | 116 | expect(SpeechRecognition.browserSupportsSpeechRecognition()).toBe(false); 117 | expect(SpeechRecognition.browserSupportsContinuousListening()).toBe(false); 118 | 119 | SpeechRecognition.removePolyfill(); 120 | 121 | expect(SpeechRecognition.browserSupportsSpeechRecognition()).toBe(true); 122 | expect(SpeechRecognition.browserSupportsContinuousListening()).toBe(true); 123 | expect( 124 | SpeechRecognition.getRecognition() instanceof CortiSpeechRecognition, 125 | ).toBe(true); 126 | }); 127 | 128 | test("sets browserSupportsContinuousListening to false when given falsey SpeechRecognition", () => { 129 | SpeechRecognition.applyPolyfill(); 130 | 131 | const { result } = renderHook(() => useSpeechRecognition()); 132 | const { browserSupportsContinuousListening } = result.current; 133 | 134 | expect(browserSupportsContinuousListening).toBe(false); 135 | expect(SpeechRecognition.browserSupportsContinuousListening()).toBe(false); 136 | }); 137 | 138 | test("sets browserSupportsSpeechRecognition to false when given falsey SpeechRecognition", () => { 139 | SpeechRecognition.applyPolyfill(); 140 | 141 | const { result } = renderHook(() => useSpeechRecognition()); 142 | const { browserSupportsSpeechRecognition } = result.current; 143 | 144 | expect(browserSupportsSpeechRecognition).toBe(false); 145 | expect(SpeechRecognition.browserSupportsSpeechRecognition()).toBe(false); 146 | }); 147 | 148 | test("sets default transcripts correctly", () => { 149 | const { result } = renderHook(() => useSpeechRecognition()); 150 | 151 | const { transcript, interimTranscript, finalTranscript } = result.current; 152 | expect(transcript).toBe(""); 153 | expect(interimTranscript).toBe(""); 154 | expect(finalTranscript).toBe(""); 155 | }); 156 | 157 | test("updates transcripts correctly", async () => { 158 | mockRecognitionManager(); 159 | const { result } = renderHook(() => useSpeechRecognition()); 160 | const speech = "This is a test"; 161 | 162 | await SpeechRecognition.startListening(); 163 | SpeechRecognition.getRecognition().say(speech); 164 | 165 | const { transcript, interimTranscript, finalTranscript } = result.current; 166 | expect(transcript).toBe(speech); 167 | expect(interimTranscript).toBe(""); 168 | expect(finalTranscript).toBe(speech); 169 | }); 170 | 171 | test("resets transcripts correctly", async () => { 172 | mockRecognitionManager(); 173 | const { result } = renderHook(() => useSpeechRecognition()); 174 | const speech = "This is a test"; 175 | 176 | await SpeechRecognition.startListening(); 177 | SpeechRecognition.getRecognition().say(speech); 178 | result.current.resetTranscript(); 179 | 180 | const { transcript, interimTranscript, finalTranscript } = result.current; 181 | expect(transcript).toBe(""); 182 | expect(interimTranscript).toBe(""); 183 | expect(finalTranscript).toBe(""); 184 | }); 185 | 186 | test("is listening when Speech Recognition is listening", async () => { 187 | mockRecognitionManager(); 188 | const { result } = renderHook(() => useSpeechRecognition()); 189 | await SpeechRecognition.startListening(); 190 | 191 | expect(result.current.listening).toBe(true); 192 | }); 193 | 194 | test("is not listening when Speech Recognition is not listening", () => { 195 | mockRecognitionManager(); 196 | const { result } = renderHook(() => useSpeechRecognition()); 197 | 198 | expect(result.current.listening).toBe(false); 199 | }); 200 | 201 | test("exposes Speech Recognition object", () => { 202 | const recognitionManager = mockRecognitionManager(); 203 | 204 | expect(SpeechRecognition.getRecognition()).toBe( 205 | recognitionManager.recognition, 206 | ); 207 | }); 208 | 209 | test("ignores speech when listening is stopped", () => { 210 | mockRecognitionManager(); 211 | const { result } = renderHook(() => useSpeechRecognition()); 212 | const speech = "This is a test"; 213 | 214 | SpeechRecognition.getRecognition().say(speech); 215 | 216 | const { transcript, interimTranscript, finalTranscript } = result.current; 217 | expect(transcript).toBe(""); 218 | expect(interimTranscript).toBe(""); 219 | expect(finalTranscript).toBe(""); 220 | }); 221 | 222 | test("ignores speech when listening is aborted", async () => { 223 | mockRecognitionManager(); 224 | const { result } = renderHook(() => useSpeechRecognition()); 225 | const speech = "This is a test"; 226 | 227 | await SpeechRecognition.startListening(); 228 | SpeechRecognition.abortListening(); 229 | SpeechRecognition.getRecognition().say(speech); 230 | 231 | const { transcript, interimTranscript, finalTranscript } = result.current; 232 | expect(transcript).toBe(""); 233 | expect(interimTranscript).toBe(""); 234 | expect(finalTranscript).toBe(""); 235 | }); 236 | 237 | test("transcibes when listening is started", async () => { 238 | mockRecognitionManager(); 239 | const { result } = renderHook(() => useSpeechRecognition()); 240 | const speech = "This is a test"; 241 | 242 | await SpeechRecognition.startListening(); 243 | SpeechRecognition.getRecognition().say(speech); 244 | 245 | const { transcript, interimTranscript, finalTranscript } = result.current; 246 | expect(transcript).toBe(speech); 247 | expect(interimTranscript).toBe(""); 248 | expect(finalTranscript).toBe(speech); 249 | }); 250 | 251 | test("does not transcibe when listening is started but not transcribing", async () => { 252 | mockRecognitionManager(); 253 | const { result } = renderHook(() => 254 | useSpeechRecognition({ transcribing: false }), 255 | ); 256 | const speech = "This is a test"; 257 | 258 | await SpeechRecognition.startListening(); 259 | SpeechRecognition.getRecognition().say(speech); 260 | 261 | const { transcript, interimTranscript, finalTranscript } = result.current; 262 | expect(transcript).toBe(""); 263 | expect(interimTranscript).toBe(""); 264 | expect(finalTranscript).toBe(""); 265 | }); 266 | 267 | test("listens discontinuously by default", async () => { 268 | mockRecognitionManager(); 269 | renderHook(() => useSpeechRecognition()); 270 | const speech = "This is a test"; 271 | 272 | await SpeechRecognition.startListening(); 273 | SpeechRecognition.getRecognition().say(speech); 274 | SpeechRecognition.getRecognition().say(speech); 275 | }); 276 | 277 | test("can turn continuous listening on", async () => { 278 | mockRecognitionManager(); 279 | const { result } = renderHook(() => useSpeechRecognition()); 280 | const speech = "This is a test"; 281 | const expectedTranscript = [speech, speech].join(" "); 282 | 283 | await SpeechRecognition.startListening({ continuous: true }); 284 | SpeechRecognition.getRecognition().say(speech); 285 | SpeechRecognition.getRecognition().say(speech); 286 | 287 | const { transcript, interimTranscript, finalTranscript } = result.current; 288 | expect(transcript).toBe(expectedTranscript); 289 | expect(interimTranscript).toBe(""); 290 | expect(finalTranscript).toBe(expectedTranscript); 291 | }); 292 | 293 | test("can reset transcript from command callback", async () => { 294 | mockRecognitionManager(); 295 | const commands = [ 296 | { 297 | command: "clear", 298 | callback: ({ resetTranscript }) => resetTranscript(), 299 | }, 300 | ]; 301 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 302 | 303 | await SpeechRecognition.startListening({ continuous: true }); 304 | SpeechRecognition.getRecognition().say("test"); 305 | 306 | expect(result.current.transcript).toBe("test"); 307 | 308 | SpeechRecognition.getRecognition().say("clear"); 309 | 310 | const { transcript, interimTranscript, finalTranscript } = result.current; 311 | expect(transcript).toBe(""); 312 | expect(interimTranscript).toBe(""); 313 | expect(finalTranscript).toBe(""); 314 | }); 315 | 316 | test("can set language", async () => { 317 | mockRecognitionManager(); 318 | renderHook(() => useSpeechRecognition()); 319 | 320 | await SpeechRecognition.startListening({ language: "zh-CN" }); 321 | 322 | expect(SpeechRecognition.getRecognition().lang).toBe("zh-CN"); 323 | }); 324 | 325 | test("does not collect transcript after listening is stopped", async () => { 326 | mockRecognitionManager(); 327 | const { result } = renderHook(() => useSpeechRecognition()); 328 | const speech = "This is a test"; 329 | 330 | await SpeechRecognition.startListening(); 331 | SpeechRecognition.stopListening(); 332 | SpeechRecognition.getRecognition().say(speech); 333 | 334 | const { transcript, interimTranscript, finalTranscript } = result.current; 335 | expect(transcript).toBe(""); 336 | expect(interimTranscript).toBe(""); 337 | expect(finalTranscript).toBe(""); 338 | }); 339 | 340 | test("sets interim transcript correctly", async () => { 341 | mockRecognitionManager(); 342 | const { result } = renderHook(() => useSpeechRecognition()); 343 | const speech = "This is a test"; 344 | 345 | await SpeechRecognition.startListening(); 346 | SpeechRecognition.getRecognition().say(speech, { onlyFirstResult: true }); 347 | 348 | const { transcript, interimTranscript, finalTranscript } = result.current; 349 | expect(transcript).toBe("This"); 350 | expect(interimTranscript).toBe("This"); 351 | expect(finalTranscript).toBe(""); 352 | }); 353 | 354 | test("appends interim transcript correctly", async () => { 355 | mockRecognitionManager(); 356 | const { result } = renderHook(() => useSpeechRecognition()); 357 | const speech = "This is a test"; 358 | 359 | await SpeechRecognition.startListening({ continuous: true }); 360 | SpeechRecognition.getRecognition().say(speech); 361 | SpeechRecognition.getRecognition().say(speech, { onlyFirstResult: true }); 362 | 363 | const { transcript, interimTranscript, finalTranscript } = result.current; 364 | expect(transcript).toBe("This is a test This"); 365 | expect(interimTranscript).toBe("This"); 366 | expect(finalTranscript).toBe(speech); 367 | }); 368 | 369 | test("appends interim transcript correctly on Android", async () => { 370 | isAndroid.mockReturnValue(true); 371 | mockRecognitionManager(); 372 | const { result } = renderHook(() => useSpeechRecognition()); 373 | const speech = "This is a test"; 374 | 375 | await SpeechRecognition.startListening({ continuous: true }); 376 | SpeechRecognition.getRecognition().say(speech, { isAndroid: true }); 377 | SpeechRecognition.getRecognition().say(speech, { 378 | onlyFirstResult: true, 379 | isAndroid: true, 380 | }); 381 | 382 | const { transcript, interimTranscript, finalTranscript } = result.current; 383 | expect(transcript).toBe("This is a test This"); 384 | expect(interimTranscript).toBe("This"); 385 | expect(finalTranscript).toBe(speech); 386 | }); 387 | 388 | test("resets transcript on subsequent discontinuous speech when clearTranscriptOnListen set", async () => { 389 | mockRecognitionManager(); 390 | const { result } = renderHook(() => useSpeechRecognition()); 391 | const speech = "This is a test"; 392 | 393 | await SpeechRecognition.startListening(); 394 | SpeechRecognition.getRecognition().say(speech); 395 | 396 | expect(result.current.transcript).toBe(speech); 397 | expect(result.current.interimTranscript).toBe(""); 398 | expect(result.current.finalTranscript).toBe(speech); 399 | 400 | SpeechRecognition.stopListening(); 401 | 402 | expect(result.current.transcript).toBe(speech); 403 | expect(result.current.interimTranscript).toBe(""); 404 | expect(result.current.finalTranscript).toBe(speech); 405 | 406 | await SpeechRecognition.startListening(); 407 | 408 | expect(result.current.transcript).toBe(""); 409 | expect(result.current.interimTranscript).toBe(""); 410 | expect(result.current.finalTranscript).toBe(""); 411 | }); 412 | 413 | test("does not reset transcript on subsequent discontinuous speech when clearTranscriptOnListen not set", async () => { 414 | mockRecognitionManager(); 415 | const { result } = renderHook(() => 416 | useSpeechRecognition({ clearTranscriptOnListen: false }), 417 | ); 418 | const speech = "This is a test"; 419 | 420 | await SpeechRecognition.startListening(); 421 | SpeechRecognition.getRecognition().say(speech); 422 | expect(result.current.transcript).toBe(speech); 423 | expect(result.current.interimTranscript).toBe(""); 424 | expect(result.current.finalTranscript).toBe(speech); 425 | 426 | SpeechRecognition.stopListening(); 427 | 428 | expect(result.current.transcript).toBe(speech); 429 | expect(result.current.interimTranscript).toBe(""); 430 | expect(result.current.finalTranscript).toBe(speech); 431 | 432 | await SpeechRecognition.startListening(); 433 | 434 | expect(result.current.transcript).toBe(speech); 435 | expect(result.current.interimTranscript).toBe(""); 436 | expect(result.current.finalTranscript).toBe(speech); 437 | }); 438 | 439 | test("does not call command callback when no command matched", async () => { 440 | mockRecognitionManager(); 441 | const mockCommandCallback = vi.fn(); 442 | const commands = [ 443 | { 444 | command: "hello world", 445 | callback: mockCommandCallback, 446 | matchInterim: false, 447 | }, 448 | ]; 449 | renderHook(() => useSpeechRecognition({ commands })); 450 | const speech = "This is a test"; 451 | 452 | await SpeechRecognition.startListening(); 453 | SpeechRecognition.getRecognition().say(speech); 454 | 455 | expect(mockCommandCallback).not.toHaveBeenCalled(); 456 | }); 457 | 458 | test("matches simple command", async () => { 459 | mockRecognitionManager(); 460 | const mockCommandCallback = vi.fn(); 461 | const commands = [ 462 | { 463 | command: "hello world", 464 | callback: mockCommandCallback, 465 | }, 466 | ]; 467 | renderHook(() => useSpeechRecognition({ commands })); 468 | const speech = "hello world"; 469 | 470 | await SpeechRecognition.startListening(); 471 | SpeechRecognition.getRecognition().say(speech); 472 | 473 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 474 | }); 475 | 476 | test("matches one splat", async () => { 477 | mockRecognitionManager(); 478 | const mockCommandCallback = vi.fn(); 479 | const command = "I want to eat * and fries"; 480 | const commands = [ 481 | { 482 | command, 483 | callback: mockCommandCallback, 484 | }, 485 | ]; 486 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 487 | const { resetTranscript } = result.current; 488 | const speech = "I want to eat pizza and fries"; 489 | 490 | await SpeechRecognition.startListening(); 491 | SpeechRecognition.getRecognition().say(speech); 492 | 493 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 494 | expect(mockCommandCallback).toHaveBeenCalledWith("pizza", { 495 | command, 496 | resetTranscript, 497 | }); 498 | }); 499 | 500 | test("matches one splat at the end of the sentence", async () => { 501 | mockRecognitionManager(); 502 | const mockCommandCallback = vi.fn(); 503 | const command = "I want to eat *"; 504 | const commands = [ 505 | { 506 | command, 507 | callback: mockCommandCallback, 508 | }, 509 | ]; 510 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 511 | const { resetTranscript } = result.current; 512 | const speech = "I want to eat pizza and fries"; 513 | 514 | await SpeechRecognition.startListening(); 515 | SpeechRecognition.getRecognition().say(speech); 516 | 517 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 518 | expect(mockCommandCallback).toHaveBeenCalledWith("pizza and fries", { 519 | command, 520 | resetTranscript, 521 | }); 522 | }); 523 | 524 | test("matches two splats", async () => { 525 | mockRecognitionManager(); 526 | const mockCommandCallback = vi.fn(); 527 | const command = "I want to eat * and *"; 528 | const commands = [ 529 | { 530 | command, 531 | callback: mockCommandCallback, 532 | }, 533 | ]; 534 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 535 | const { resetTranscript } = result.current; 536 | const speech = "I want to eat pizza and fries"; 537 | 538 | await SpeechRecognition.startListening(); 539 | SpeechRecognition.getRecognition().say(speech); 540 | 541 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 542 | expect(mockCommandCallback).toHaveBeenCalledWith("pizza", "fries", { 543 | command, 544 | resetTranscript, 545 | }); 546 | }); 547 | 548 | test("matches optional words when optional word spoken", async () => { 549 | mockRecognitionManager(); 550 | const mockCommandCallback = vi.fn(); 551 | const commands = [ 552 | { 553 | command: "Hello (to) you", 554 | callback: mockCommandCallback, 555 | }, 556 | ]; 557 | renderHook(() => useSpeechRecognition({ commands })); 558 | const speech = "Hello to you"; 559 | 560 | await SpeechRecognition.startListening(); 561 | SpeechRecognition.getRecognition().say(speech); 562 | 563 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 564 | }); 565 | 566 | test("matches optional words when optional word not spoken", async () => { 567 | mockRecognitionManager(); 568 | const mockCommandCallback = vi.fn(); 569 | const commands = [ 570 | { 571 | command: "Hello (to) you", 572 | callback: mockCommandCallback, 573 | }, 574 | ]; 575 | renderHook(() => useSpeechRecognition({ commands })); 576 | const speech = "Hello you"; 577 | 578 | await SpeechRecognition.startListening(); 579 | SpeechRecognition.getRecognition().say(speech); 580 | 581 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 582 | }); 583 | 584 | test("matches named variable", async () => { 585 | mockRecognitionManager(); 586 | const mockCommandCallback = vi.fn(); 587 | const command = "I :action with my little eye"; 588 | const commands = [ 589 | { 590 | command, 591 | callback: mockCommandCallback, 592 | }, 593 | ]; 594 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 595 | const { resetTranscript } = result.current; 596 | const speech = "I spy with my little eye"; 597 | 598 | await SpeechRecognition.startListening(); 599 | SpeechRecognition.getRecognition().say(speech); 600 | 601 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 602 | expect(mockCommandCallback).toHaveBeenCalledWith("spy", { 603 | command, 604 | resetTranscript, 605 | }); 606 | }); 607 | 608 | test("matches regex", async () => { 609 | mockRecognitionManager(); 610 | const mockCommandCallback = vi.fn(); 611 | const commands = [ 612 | { 613 | command: /This is a \s+ test\.+/, 614 | callback: mockCommandCallback, 615 | }, 616 | ]; 617 | renderHook(() => useSpeechRecognition({ commands })); 618 | const speech = "This is a test......."; 619 | 620 | await SpeechRecognition.startListening(); 621 | SpeechRecognition.getRecognition().say(speech); 622 | 623 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 624 | }); 625 | 626 | test("matches regex case-insensitively", async () => { 627 | mockRecognitionManager(); 628 | const mockCommandCallback = vi.fn(); 629 | const commands = [ 630 | { 631 | command: /This is a \s+ test\.+/, 632 | callback: mockCommandCallback, 633 | }, 634 | ]; 635 | renderHook(() => useSpeechRecognition({ commands })); 636 | const speech = "this is a TEST......."; 637 | 638 | await SpeechRecognition.startListening(); 639 | SpeechRecognition.getRecognition().say(speech); 640 | 641 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 642 | }); 643 | 644 | test("matches multiple commands", async () => { 645 | mockRecognitionManager(); 646 | const mockCommandCallback1 = vi.fn(); 647 | const mockCommandCallback2 = vi.fn(); 648 | const mockCommandCallback3 = vi.fn(); 649 | const command1 = "I want to eat * and *"; 650 | const command2 = "* and fries are great"; 651 | const commands = [ 652 | { 653 | command: command1, 654 | callback: mockCommandCallback1, 655 | }, 656 | { 657 | command: command2, 658 | callback: mockCommandCallback2, 659 | }, 660 | { 661 | command: "flibble", 662 | callback: mockCommandCallback3, 663 | }, 664 | ]; 665 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 666 | const { resetTranscript } = result.current; 667 | const speech = "I want to eat pizza and fries are great"; 668 | 669 | await SpeechRecognition.startListening(); 670 | SpeechRecognition.getRecognition().say(speech); 671 | 672 | expect(mockCommandCallback1).toHaveBeenCalledTimes(1); 673 | expect(mockCommandCallback1).toHaveBeenCalledWith( 674 | "pizza", 675 | "fries are great", 676 | { command: command1, resetTranscript }, 677 | ); 678 | expect(mockCommandCallback2).toHaveBeenCalledTimes(1); 679 | expect(mockCommandCallback2).toHaveBeenCalledWith("I want to eat pizza", { 680 | command: command2, 681 | resetTranscript, 682 | }); 683 | expect(mockCommandCallback3).not.toHaveBeenCalled(); 684 | }); 685 | 686 | test("matches arrays of commands", async () => { 687 | mockRecognitionManager(); 688 | const mockCommandCallback1 = vi.fn(); 689 | const mockCommandCallback2 = vi.fn(); 690 | const command1 = "I want to eat * and *"; 691 | const command2 = "* and fries are great"; 692 | const command3 = "* and * are great"; 693 | const commands = [ 694 | { 695 | command: [command1, command2], 696 | callback: mockCommandCallback1, 697 | }, 698 | { 699 | command: command3, 700 | callback: mockCommandCallback2, 701 | }, 702 | ]; 703 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 704 | const { resetTranscript } = result.current; 705 | const speech = "I want to eat pizza and fries are great"; 706 | 707 | await SpeechRecognition.startListening(); 708 | SpeechRecognition.getRecognition().say(speech); 709 | 710 | expect(mockCommandCallback1).toHaveBeenCalledTimes(2); 711 | expect(mockCommandCallback1).toHaveBeenNthCalledWith( 712 | 1, 713 | "pizza", 714 | "fries are great", 715 | { command: command1, resetTranscript }, 716 | ); 717 | expect(mockCommandCallback1).toHaveBeenNthCalledWith( 718 | 2, 719 | "I want to eat pizza", 720 | { command: command2, resetTranscript }, 721 | ); 722 | expect(mockCommandCallback2).toHaveBeenCalledTimes(1); 723 | expect(mockCommandCallback2).toHaveBeenCalledWith( 724 | "I want to eat pizza", 725 | "fries", 726 | { command: command3, resetTranscript }, 727 | ); 728 | }); 729 | 730 | test("does not match interim results by default", async () => { 731 | mockRecognitionManager(); 732 | const mockCommandCallback = vi.fn(); 733 | const commands = [ 734 | { 735 | command: "This is", 736 | callback: mockCommandCallback, 737 | }, 738 | ]; 739 | renderHook(() => useSpeechRecognition({ commands })); 740 | const speech = "This is a test"; 741 | 742 | await SpeechRecognition.startListening(); 743 | SpeechRecognition.getRecognition().say(speech); 744 | 745 | expect(mockCommandCallback).not.toHaveBeenCalled(); 746 | }); 747 | 748 | test("matches interim results when configured", async () => { 749 | mockRecognitionManager(); 750 | const mockCommandCallback = vi.fn(); 751 | const commands = [ 752 | { 753 | command: "This is", 754 | callback: mockCommandCallback, 755 | matchInterim: true, 756 | }, 757 | ]; 758 | renderHook(() => useSpeechRecognition({ commands })); 759 | const speech = "This is a test"; 760 | 761 | await SpeechRecognition.startListening(); 762 | SpeechRecognition.getRecognition().say(speech); 763 | 764 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 765 | }); 766 | 767 | test("transcript resets should be per instance, not global", async () => { 768 | mockRecognitionManager(); 769 | const hook1 = renderHook(() => useSpeechRecognition()); 770 | const hook2 = renderHook(() => useSpeechRecognition()); 771 | const speech = "This is a test"; 772 | 773 | await SpeechRecognition.startListening({ continuous: true }); 774 | SpeechRecognition.getRecognition().say(speech); 775 | hook2.result.current.resetTranscript(); 776 | 777 | expect(hook2.result.current.transcript).toBe(""); 778 | expect(hook2.result.current.interimTranscript).toBe(""); 779 | expect(hook2.result.current.finalTranscript).toBe(""); 780 | expect(hook1.result.current.transcript).toBe(speech); 781 | expect(hook1.result.current.interimTranscript).toBe(""); 782 | expect(hook1.result.current.finalTranscript).toBe(speech); 783 | }); 784 | 785 | test("does not call command callback when isFuzzyMatch is not true", async () => { 786 | mockRecognitionManager(); 787 | const mockCommandCallback = vi.fn(); 788 | const commands = [ 789 | { 790 | command: "hello world", 791 | callback: mockCommandCallback, 792 | }, 793 | ]; 794 | renderHook(() => useSpeechRecognition({ commands })); 795 | const speech = "This is a test"; 796 | 797 | await SpeechRecognition.startListening(); 798 | SpeechRecognition.getRecognition().say(speech); 799 | 800 | expect(mockCommandCallback).not.toHaveBeenCalled(); 801 | }); 802 | 803 | test("does not call command callback when isFuzzyMatch is true and similarity is less than fuzzyMatchingThreshold", async () => { 804 | mockRecognitionManager(); 805 | const mockCommandCallback = vi.fn(); 806 | const commands = [ 807 | { 808 | command: "hello world", 809 | callback: mockCommandCallback, 810 | isFuzzyMatch: true, 811 | fuzzyMatchingThreshold: 0.7, 812 | }, 813 | ]; 814 | renderHook(() => useSpeechRecognition({ commands })); 815 | const speech = "Hello"; 816 | 817 | await SpeechRecognition.startListening(); 818 | SpeechRecognition.getRecognition().say(speech); 819 | 820 | expect(mockCommandCallback).not.toHaveBeenCalled(); 821 | }); 822 | 823 | test("does call command callback when isFuzzyMatch is true and similarity is equal or greater than fuzzyMatchingThreshold", async () => { 824 | mockRecognitionManager(); 825 | const mockCommandCallback = vi.fn(); 826 | const commands = [ 827 | { 828 | command: "hello world", 829 | callback: mockCommandCallback, 830 | isFuzzyMatch: true, 831 | fuzzyMatchingThreshold: 0.5, 832 | }, 833 | ]; 834 | renderHook(() => useSpeechRecognition({ commands })); 835 | const speech = "Hello"; 836 | 837 | await SpeechRecognition.startListening(); 838 | SpeechRecognition.getRecognition().say(speech); 839 | 840 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 841 | }); 842 | 843 | test("callback is called with command, transcript and similarity ratio between those", async () => { 844 | mockRecognitionManager(); 845 | const mockCommandCallback = vi.fn(); 846 | const command = "I want to eat"; 847 | const commands = [ 848 | { 849 | command, 850 | callback: mockCommandCallback, 851 | isFuzzyMatch: true, 852 | fuzzyMatchingThreshold: 0.5, 853 | }, 854 | ]; 855 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 856 | const { resetTranscript } = result.current; 857 | const speech = "I want to drink"; 858 | 859 | await SpeechRecognition.startListening(); 860 | SpeechRecognition.getRecognition().say(speech); 861 | 862 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 863 | expect(mockCommandCallback).toHaveBeenCalledWith( 864 | "I want to eat", 865 | "I want to drink", 866 | 0.6, 867 | { command, resetTranscript }, 868 | ); 869 | }); 870 | 871 | test("different callbacks can be called for the same speech and with fuzzyMatchingThreshold", async () => { 872 | mockRecognitionManager(); 873 | const mockCommandCallback1 = vi.fn(); 874 | const mockCommandCallback2 = vi.fn(); 875 | const commands = [ 876 | { 877 | command: "I want to eat", 878 | callback: mockCommandCallback1, 879 | isFuzzyMatch: true, 880 | fuzzyMatchingThreshold: 1, 881 | }, 882 | { 883 | command: "I want to sleep", 884 | callback: mockCommandCallback2, 885 | isFuzzyMatch: true, 886 | fuzzyMatchingThreshold: 0.2, 887 | }, 888 | ]; 889 | renderHook(() => useSpeechRecognition({ commands })); 890 | const speech = "I want to eat"; 891 | 892 | await SpeechRecognition.startListening(); 893 | SpeechRecognition.getRecognition().say(speech); 894 | 895 | expect(mockCommandCallback1).toHaveBeenCalledTimes(1); 896 | expect(mockCommandCallback2).toHaveBeenCalledTimes(1); 897 | }); 898 | 899 | test("fuzzy callback called for each matching command in array by default", async () => { 900 | mockRecognitionManager(); 901 | const mockCommandCallback = vi.fn(); 902 | const command1 = "I want to eat"; 903 | const command2 = "I want to sleep"; 904 | const commands = [ 905 | { 906 | command: [command1, command2], 907 | callback: mockCommandCallback, 908 | isFuzzyMatch: true, 909 | fuzzyMatchingThreshold: 0.2, 910 | }, 911 | ]; 912 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 913 | const { resetTranscript } = result.current; 914 | const speech = "I want to leap"; 915 | 916 | await SpeechRecognition.startListening(); 917 | SpeechRecognition.getRecognition().say(speech); 918 | 919 | expect(mockCommandCallback).toHaveBeenCalledTimes(2); 920 | expect(mockCommandCallback).toHaveBeenNthCalledWith( 921 | 1, 922 | command1, 923 | "I want to leap", 924 | 0.7368421052631579, 925 | { command: command1, resetTranscript }, 926 | ); 927 | expect(mockCommandCallback).toHaveBeenNthCalledWith( 928 | 2, 929 | command2, 930 | "I want to leap", 931 | 0.6666666666666666, 932 | { command: command2, resetTranscript }, 933 | ); 934 | }); 935 | 936 | test("fuzzy callback called only for best matching command in array when bestMatchOnly is true", async () => { 937 | mockRecognitionManager(); 938 | const mockCommandCallback = vi.fn(); 939 | const command1 = "I want to eat"; 940 | const command2 = "I want to sleep"; 941 | const commands = [ 942 | { 943 | command: [command1, command2], 944 | callback: mockCommandCallback, 945 | isFuzzyMatch: true, 946 | fuzzyMatchingThreshold: 0.2, 947 | bestMatchOnly: true, 948 | }, 949 | ]; 950 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 951 | const { resetTranscript } = result.current; 952 | const speech = "I want to leap"; 953 | 954 | await SpeechRecognition.startListening(); 955 | SpeechRecognition.getRecognition().say(speech); 956 | 957 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 958 | expect(mockCommandCallback).toHaveBeenNthCalledWith( 959 | 1, 960 | command1, 961 | "I want to leap", 962 | 0.7368421052631579, 963 | { command: command1, resetTranscript }, 964 | ); 965 | }); 966 | 967 | test("when command is regex with fuzzy match true runs similarity check with regex converted to string", async () => { 968 | mockRecognitionManager(); 969 | const mockCommandCallback = vi.fn(); 970 | const command = /This is a \s+ test\.+/; 971 | const commands = [ 972 | { 973 | command, 974 | callback: mockCommandCallback, 975 | isFuzzyMatch: true, 976 | }, 977 | ]; 978 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 979 | const { resetTranscript } = result.current; 980 | const speech = "This is a test"; 981 | 982 | await SpeechRecognition.startListening(); 983 | SpeechRecognition.getRecognition().say(speech); 984 | 985 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 986 | expect(mockCommandCallback).toHaveBeenCalledWith( 987 | "This is a s test", 988 | "This is a test", 989 | 0.8571428571428571, 990 | { command, resetTranscript }, 991 | ); 992 | }); 993 | 994 | test("when command is string special characters with fuzzy match true, special characters are removed from string and then we test similarity", async () => { 995 | mockRecognitionManager(); 996 | const mockCommandCallback = vi.fn(); 997 | const command = "! (I would :like) : * a :pizza "; 998 | const commands = [ 999 | { 1000 | command, 1001 | callback: mockCommandCallback, 1002 | isFuzzyMatch: true, 1003 | }, 1004 | ]; 1005 | const { result } = renderHook(() => useSpeechRecognition({ commands })); 1006 | const { resetTranscript } = result.current; 1007 | const speech = "I would like a pizza"; 1008 | 1009 | await SpeechRecognition.startListening(); 1010 | SpeechRecognition.getRecognition().say(speech); 1011 | 1012 | expect(mockCommandCallback).toHaveBeenCalledTimes(1); 1013 | expect(mockCommandCallback).toHaveBeenCalledWith( 1014 | "I would like a pizza", 1015 | "I would like a pizza", 1016 | 1, 1017 | { command, resetTranscript }, 1018 | ); 1019 | }); 1020 | 1021 | test("sets isMicrophoneAvailable to false when recognition.start() throws", async () => { 1022 | mockMicrophoneUnavailable(); 1023 | const { result } = renderHook(() => useSpeechRecognition()); 1024 | 1025 | expect(result.current.isMicrophoneAvailable).toBe(true); 1026 | 1027 | await SpeechRecognition.startListening(); 1028 | 1029 | expect(result.current.isMicrophoneAvailable).toBe(false); 1030 | }); 1031 | 1032 | test("sets isMicrophoneAvailable to false when not-allowed error emitted", async () => { 1033 | mockRecognitionManager(); 1034 | const { result } = renderHook(() => useSpeechRecognition()); 1035 | 1036 | expect(result.current.isMicrophoneAvailable).toBe(true); 1037 | 1038 | await SpeechRecognition.getRecognitionManager().recognition.onerror({ 1039 | error: "not-allowed", 1040 | }); 1041 | 1042 | expect(result.current.isMicrophoneAvailable).toBe(false); 1043 | }); 1044 | }); 1045 | -------------------------------------------------------------------------------- /src/actions.js: -------------------------------------------------------------------------------- 1 | import { APPEND_TRANSCRIPT, CLEAR_TRANSCRIPT } from "./constants"; 2 | 3 | export const clearTranscript = () => { 4 | return { type: CLEAR_TRANSCRIPT }; 5 | }; 6 | 7 | export const appendTranscript = (interimTranscript, finalTranscript) => { 8 | return { 9 | type: APPEND_TRANSCRIPT, 10 | payload: { 11 | interimTranscript, 12 | finalTranscript, 13 | }, 14 | }; 15 | }; 16 | -------------------------------------------------------------------------------- /src/android.test.js: -------------------------------------------------------------------------------- 1 | // @vitest-environment jsdom 2 | import { renderHook } from "@testing-library/react-hooks"; 3 | import { beforeEach, describe, expect, test, vi } from "vitest"; 4 | import "../tests/vendor/corti.js"; 5 | import RecognitionManager from "./RecognitionManager.js"; 6 | import SpeechRecognition, { 7 | useSpeechRecognition, 8 | } from "./SpeechRecognition.js"; 9 | import { browserSupportsPolyfills } from "./utils.js"; 10 | 11 | vi.mock("./isAndroid", () => ({ 12 | default: () => true, 13 | })); 14 | 15 | vi.mock("./utils", async () => { 16 | return { 17 | ...(await vi.importActual("./utils")), 18 | browserSupportsPolyfills: vi.fn(), 19 | }; 20 | }); 21 | 22 | const mockRecognitionManager = () => { 23 | const recognitionManager = new RecognitionManager(window.SpeechRecognition); 24 | SpeechRecognition.getRecognitionManager = () => recognitionManager; 25 | return recognitionManager; 26 | }; 27 | 28 | describe("SpeechRecognition (Android)", () => { 29 | beforeEach(() => { 30 | browserSupportsPolyfills.mockImplementation(() => true); 31 | }); 32 | 33 | test("sets browserSupportsContinuousListening to false on Android", async () => { 34 | mockRecognitionManager(); 35 | 36 | const { result } = renderHook(() => useSpeechRecognition()); 37 | const { browserSupportsContinuousListening } = result.current; 38 | 39 | expect(browserSupportsContinuousListening).toEqual(false); 40 | expect(SpeechRecognition.browserSupportsContinuousListening()).toEqual( 41 | false, 42 | ); 43 | }); 44 | 45 | test("sets browserSupportsContinuousListening to true when using polyfill", () => { 46 | const MockSpeechRecognition = class {}; 47 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 48 | 49 | const { result } = renderHook(() => useSpeechRecognition()); 50 | const { browserSupportsContinuousListening } = result.current; 51 | 52 | expect(browserSupportsContinuousListening).toEqual(true); 53 | expect(SpeechRecognition.browserSupportsContinuousListening()).toEqual( 54 | true, 55 | ); 56 | }); 57 | 58 | test("sets browserSupportsContinuousListening to false when using polyfill on unsupported browser", () => { 59 | browserSupportsPolyfills.mockImplementation(() => false); 60 | const MockSpeechRecognition = class {}; 61 | SpeechRecognition.applyPolyfill(MockSpeechRecognition); 62 | 63 | const { result } = renderHook(() => useSpeechRecognition()); 64 | const { browserSupportsContinuousListening } = result.current; 65 | 66 | expect(browserSupportsContinuousListening).toEqual(false); 67 | expect(SpeechRecognition.browserSupportsContinuousListening()).toEqual( 68 | false, 69 | ); 70 | }); 71 | }); 72 | -------------------------------------------------------------------------------- /src/constants.js: -------------------------------------------------------------------------------- 1 | export const CLEAR_TRANSCRIPT = "CLEAR_TRANSCRIPT"; 2 | export const APPEND_TRANSCRIPT = "APPEND_TRANSCRIPT"; 3 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import SpeechRecognition, { useSpeechRecognition } from "./SpeechRecognition"; 2 | 3 | export { useSpeechRecognition }; 4 | export default SpeechRecognition; 5 | -------------------------------------------------------------------------------- /src/isAndroid.js: -------------------------------------------------------------------------------- 1 | export default () => 2 | /(android)/i.test( 3 | typeof navigator !== "undefined" ? navigator.userAgent : "", 4 | ); 5 | -------------------------------------------------------------------------------- /src/isAndroid.test.js: -------------------------------------------------------------------------------- 1 | import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; 2 | import isAndroid from "./isAndroid.js"; 3 | 4 | describe("isAndroid", () => { 5 | beforeEach(() => { 6 | vi.stubGlobal("navigator", { userAgent: "" }); 7 | }); 8 | 9 | afterEach(() => { 10 | vi.unstubAllGlobals(); 11 | }); 12 | 13 | test("returns false when navigator.userAgent does not contain android string", () => { 14 | vi.stubGlobal("navigator", { userAgent: "safari browser" }); 15 | const result = isAndroid(); 16 | 17 | expect(result).toBe(false); 18 | }); 19 | 20 | test("returns true when navigator.userAgent contains android string", () => { 21 | vi.stubGlobal("navigator", { userAgent: "android browser" }); 22 | const result = isAndroid(); 23 | 24 | expect(result).toBe(true); 25 | }); 26 | 27 | test("returns false when navigator is undefined", () => { 28 | // navigatorをundefinedにモック 29 | vi.stubGlobal("navigator", undefined); 30 | const result = isAndroid(); 31 | 32 | expect(result).toBe(false); 33 | }); 34 | }); 35 | -------------------------------------------------------------------------------- /src/reducers.js: -------------------------------------------------------------------------------- 1 | import { APPEND_TRANSCRIPT, CLEAR_TRANSCRIPT } from "./constants"; 2 | import { concatTranscripts } from "./utils"; 3 | 4 | const transcriptReducer = (state, action) => { 5 | switch (action.type) { 6 | case CLEAR_TRANSCRIPT: 7 | return { 8 | interimTranscript: "", 9 | finalTranscript: "", 10 | }; 11 | case APPEND_TRANSCRIPT: 12 | return { 13 | interimTranscript: action.payload.interimTranscript, 14 | finalTranscript: concatTranscripts( 15 | state.finalTranscript, 16 | action.payload.finalTranscript, 17 | ), 18 | }; 19 | default: 20 | throw new Error(); 21 | } 22 | }; 23 | 24 | export { transcriptReducer }; 25 | -------------------------------------------------------------------------------- /src/utils.js: -------------------------------------------------------------------------------- 1 | const concatTranscripts = (...transcriptParts) => { 2 | return transcriptParts 3 | .map((t) => t.trim()) 4 | .join(" ") 5 | .trim(); 6 | }; 7 | 8 | // The command matching code is a modified version of Backbone.Router by Jeremy Ashkenas, under the MIT license. 9 | const optionalParam = /\s*\((.*?)\)\s*/g; 10 | const optionalRegex = /(\(\?:[^)]+\))\?/g; 11 | const namedParam = /(\(\?)?:\w+/g; 12 | const splatParam = /\*/g; 13 | const escapeRegExp = /[-{}[\]+?.,\\^$|#]/g; 14 | const commandToRegExp = (command) => { 15 | if (command instanceof RegExp) { 16 | return new RegExp(command.source, "i"); 17 | } 18 | command = command 19 | .replace(escapeRegExp, "\\$&") 20 | .replace(optionalParam, "(?:$1)?") 21 | .replace(namedParam, (match, optional) => { 22 | return optional ? match : "([^\\s]+)"; 23 | }) 24 | .replace(splatParam, "(.*?)") 25 | .replace(optionalRegex, "\\s*$1?\\s*"); 26 | return new RegExp("^" + command + "$", "i"); 27 | }; 28 | 29 | // this is from https://github.com/aceakash/string-similarity 30 | const compareTwoStringsUsingDiceCoefficient = (first, second) => { 31 | first = first.replace(/\s+/g, "").toLowerCase(); 32 | second = second.replace(/\s+/g, "").toLowerCase(); 33 | 34 | if (!first.length && !second.length) return 1; // if both are empty strings 35 | if (!first.length || !second.length) return 0; // if only one is empty string 36 | if (first === second) return 1; // identical 37 | if (first.length === 1 && second.length === 1) return 0; // both are 1-letter strings 38 | if (first.length < 2 || second.length < 2) return 0; // if either is a 1-letter string 39 | 40 | const firstBigrams = new Map(); 41 | for (let i = 0; i < first.length - 1; i++) { 42 | const bigram = first.substring(i, i + 2); 43 | const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) + 1 : 1; 44 | 45 | firstBigrams.set(bigram, count); 46 | } 47 | 48 | let intersectionSize = 0; 49 | for (let i = 0; i < second.length - 1; i++) { 50 | const bigram = second.substring(i, i + 2); 51 | const count = firstBigrams.has(bigram) ? firstBigrams.get(bigram) : 0; 52 | 53 | if (count > 0) { 54 | firstBigrams.set(bigram, count - 1); 55 | intersectionSize++; 56 | } 57 | } 58 | 59 | return (2.0 * intersectionSize) / (first.length + second.length - 2); 60 | }; 61 | 62 | const browserSupportsPolyfills = () => { 63 | return ( 64 | typeof window !== "undefined" && 65 | window.navigator !== undefined && 66 | window.navigator.mediaDevices !== undefined && 67 | window.navigator.mediaDevices.getUserMedia !== undefined && 68 | (window.AudioContext !== undefined || 69 | window.webkitAudioContext !== undefined) 70 | ); 71 | }; 72 | 73 | export { 74 | concatTranscripts, 75 | commandToRegExp, 76 | compareTwoStringsUsingDiceCoefficient, 77 | browserSupportsPolyfills, 78 | }; 79 | -------------------------------------------------------------------------------- /tests/vendor/corti.js: -------------------------------------------------------------------------------- 1 | //! Corti - Replaces the browser's SpeechRecognition with a fake object. 2 | //! version : 0.4.0 3 | //! author : Tal Ater @TalAter 4 | //! license : MIT 5 | //! https://github.com/TalAter/Corti 6 | 7 | const Corti = (_root) => { 8 | // Holds the browser's implementation 9 | var _productionVersion = false; 10 | 11 | // Patch DOMException 12 | // eslint-disable-next-line no-use-before-define 13 | var DOMException = DOMException || TypeError; 14 | 15 | // Speech Recognition attributes 16 | var _maxAlternatives = 1; 17 | var _lang = ""; 18 | var _continuous = false; 19 | var _interimResults = false; 20 | 21 | var newSpeechRecognition = function () { 22 | var _self = this; 23 | var _listeners = document.createElement("div"); 24 | _self._started = false; 25 | _self._soundStarted = false; 26 | _self.eventListenerTypes = ["start", "soundstart", "end", "result"]; 27 | _self.maxAlternatives = 1; 28 | 29 | // Add listeners for events registered through attributes (e.g. recognition.onend = function) and not as proper listeners 30 | _self.eventListenerTypes.forEach(function (eventName) { 31 | _listeners.addEventListener( 32 | eventName, 33 | function () { 34 | if (typeof _self["on" + eventName] === "function") { 35 | _self["on" + eventName].apply(_listeners, arguments); 36 | } 37 | }, 38 | false, 39 | ); 40 | }); 41 | 42 | Object.defineProperty(this, "maxAlternatives", { 43 | get: function () { 44 | return _maxAlternatives; 45 | }, 46 | set: function (val) { 47 | if (typeof val === "number") { 48 | _maxAlternatives = Math.floor(val); 49 | } else { 50 | _maxAlternatives = 0; 51 | } 52 | }, 53 | }); 54 | 55 | Object.defineProperty(this, "lang", { 56 | get: function () { 57 | return _lang; 58 | }, 59 | set: function (val) { 60 | if (val === undefined) { 61 | val = "undefined"; 62 | } 63 | _lang = val.toString(); 64 | }, 65 | }); 66 | 67 | Object.defineProperty(this, "continuous", { 68 | get: function () { 69 | return _continuous; 70 | }, 71 | set: function (val) { 72 | _continuous = Boolean(val); 73 | }, 74 | }); 75 | 76 | Object.defineProperty(this, "interimResults", { 77 | get: function () { 78 | return _interimResults; 79 | }, 80 | set: function (val) { 81 | _interimResults = Boolean(val); 82 | }, 83 | }); 84 | 85 | this.start = function () { 86 | if (_self._started) { 87 | throw new DOMException( 88 | "Failed to execute 'start' on 'SpeechRecognition': recognition has already started.", 89 | ); 90 | } 91 | _self._started = true; 92 | // Create and dispatch an event 93 | var event = document.createEvent("CustomEvent"); 94 | event.initCustomEvent("start", false, false, null); 95 | _listeners.dispatchEvent(event); 96 | }; 97 | 98 | this.abort = function () { 99 | if (!_self._started) { 100 | return; 101 | } 102 | _self._started = false; 103 | _self._soundStarted = false; 104 | // Create and dispatch an event 105 | var event = document.createEvent("CustomEvent"); 106 | event.initCustomEvent("end", false, false, null); 107 | _listeners.dispatchEvent(event); 108 | }; 109 | 110 | this.stop = function () { 111 | return _self.abort(); 112 | }; 113 | 114 | this.isStarted = function () { 115 | return _self._started; 116 | }; 117 | 118 | this.emitStartEvent = function (text, isFinal, itemFunction, isAndroid) { 119 | var startEvent = document.createEvent("CustomEvent"); 120 | startEvent.initCustomEvent("result", false, false, { sentence: text }); 121 | startEvent.resultIndex = 0; 122 | startEvent.results = { 123 | item: itemFunction, 124 | 0: { 125 | item: itemFunction, 126 | isFinal: isFinal || isAndroid, 127 | }, 128 | }; 129 | startEvent.results[0][0] = { 130 | transcript: text, 131 | confidence: isAndroid && !isFinal ? 0 : 1, 132 | }; 133 | Object.defineProperty(startEvent.results, "length", { 134 | get: function () { 135 | return 1; 136 | }, 137 | }); 138 | Object.defineProperty(startEvent.results[0], "length", { 139 | get: function () { 140 | return _maxAlternatives; 141 | }, 142 | }); 143 | startEvent.interpretation = null; 144 | startEvent.emma = null; 145 | _listeners.dispatchEvent(startEvent); 146 | 147 | // Create soundstart event 148 | if (!_self._soundStarted) { 149 | _self._soundStarted = true; 150 | var soundStartEvent = document.createEvent("CustomEvent"); 151 | soundStartEvent.initCustomEvent("soundstart", false, false, null); 152 | _listeners.dispatchEvent(soundStartEvent); 153 | } 154 | }; 155 | 156 | this.say = function ( 157 | sentence, 158 | { onlyFirstResult = false, isAndroid = false } = {}, 159 | ) { 160 | if (!_self._started) { 161 | return; 162 | } 163 | 164 | var itemFunction = function (index) { 165 | if (undefined === index) { 166 | throw new DOMException( 167 | "Failed to execute 'item' on 'SpeechRecognitionResult': 1 argument required, but only 0 present.", 168 | ); 169 | } 170 | index = Number(index); 171 | if (isNaN(index)) { 172 | index = 0; 173 | } 174 | if (index >= this.length) { 175 | return null; 176 | } else { 177 | return this[index]; 178 | } 179 | }; 180 | 181 | const words = sentence.split(" "); 182 | if (onlyFirstResult) { 183 | this.emitStartEvent(words[0], false, itemFunction); 184 | } else { 185 | let text = ""; 186 | words.forEach((word) => { 187 | text = [text, word].join(" "); 188 | this.emitStartEvent(text, false, itemFunction, isAndroid); 189 | }); 190 | this.emitStartEvent(sentence, true, itemFunction); 191 | if (isAndroid) { 192 | this.emitStartEvent(sentence, true, itemFunction); 193 | } 194 | } 195 | 196 | // stop if not set to continuous mode 197 | if (!_self.continuous) { 198 | _self.abort(); 199 | } 200 | }; 201 | 202 | this.addEventListener = function (event, callback) { 203 | _listeners.addEventListener(event, callback, false); 204 | }; 205 | }; 206 | 207 | // Expose functionality 208 | return { 209 | patch: function () { 210 | if (_productionVersion === false) { 211 | _productionVersion = 212 | _root.SpeechRecognition || 213 | _root.webkitSpeechRecognition || 214 | _root.mozSpeechRecognition || 215 | _root.msSpeechRecognition || 216 | _root.oSpeechRecognition; 217 | } 218 | _root.SpeechRecognition = newSpeechRecognition; 219 | }, 220 | unpatch: function () { 221 | _root.SpeechRecognition = _productionVersion; 222 | }, 223 | newSpeechRecognition, 224 | }; 225 | }; 226 | 227 | const mockSpeechRecognition = Corti(global); 228 | mockSpeechRecognition.patch(); 229 | export const CortiSpeechRecognition = 230 | mockSpeechRecognition.newSpeechRecognition; 231 | export default mockSpeechRecognition; 232 | --------------------------------------------------------------------------------