├── .editorconfig
├── .github
├── FUNDING.yml
└── workflows
│ ├── main.yml
│ └── publish.yml
├── .gitignore
├── .nvmrc
├── .yarnrc.yml
├── LICENSE
├── README.md
├── biome.json
├── docs
├── API.md
├── POLYFILLS.md
├── V3-MIGRATION.md
└── logos
│ ├── microsoft.png
│ └── speechly.png
├── package.json
├── src
├── NativeSpeechRecognition.js
├── RecognitionManager.js
├── SpeechRecognition.js
├── SpeechRecognition.test.js
├── actions.js
├── android.test.js
├── constants.js
├── index.js
├── isAndroid.js
├── isAndroid.test.js
├── reducers.js
└── utils.js
├── tests
└── vendor
│ └── corti.js
└── yarn.lock
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 2
6 | end_of_line = lf
7 | charset = utf-8
8 | trim_trailing_whitespace = true
9 | insert_final_newline = true
10 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | ko_fi: jamesbrill
2 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: Builds, tests & co
2 |
3 | on:
4 | - push
5 | - pull_request
6 |
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Checkout tree
12 | uses: actions/checkout@v4
13 |
14 | - name: Set-up Node.js
15 | uses: actions/setup-node@v4
16 | with:
17 | check-latest: true
18 | node-version-file: .nvmrc
19 |
20 | - run: corepack enable
21 |
22 | - run: yarn install --immutable
23 |
24 | - run: yarn lint
25 |
26 | - run: yarn build
27 |
28 | - run: yarn test --coverage
29 |
30 | - name: Generate code coverage report
31 | uses: coverallsapp/github-action@v2
32 | with:
33 | github-token: ${{ github.token }}
34 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | release:
5 | types:
6 | - published
7 |
8 | permissions: read-all
9 |
10 | jobs:
11 | publish:
12 | runs-on: ubuntu-latest
13 |
14 | permissions:
15 | contents: read
16 | id-token: write
17 |
18 | steps:
19 | - name: Checkout tree
20 | uses: actions/checkout@v4
21 |
22 | - name: Set-up Node.js
23 | uses: actions/setup-node@v4
24 | with:
25 | check-latest: true
26 | node-version-file: .nvmrc
27 |
28 | - run: corepack enable
29 |
30 | - run: yarn install --immutable
31 |
32 | - run: yarn npm publish --tolerate-republish
33 | env:
34 | YARN_NPM_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
35 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.yarn/
2 | /coverage/
3 | /dist/
4 | /node_modules/
5 |
--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | 23
2 |
--------------------------------------------------------------------------------
/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | defaultSemverRangePrefix: ""
2 |
3 | nodeLinker: node-modules
4 |
5 | preferInteractive: true
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 James Brill
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # react-speech-recognition
2 | A React hook that converts speech from the microphone to text and makes it available to your React components.
3 |
4 | [](https://www.npmjs.com/package/react-speech-recognition)
5 | [](https://www.npmjs.com/package/react-speech-recognition)
6 | [](https://opensource.org/licenses/MIT)
7 | [](https://coveralls.io/github/JamesBrill/react-speech-recognition?branch=master)
8 |
9 | ## How it works
10 | `useSpeechRecognition` is a React hook that gives a component access to a transcript of speech picked up from the user's microphone.
11 |
12 | `SpeechRecognition` manages the global state of the Web Speech API, exposing functions to turn the microphone on and off.
13 |
14 | Under the hood,
15 | it uses [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition). Note that browser support for this API is currently limited, with Chrome having the best experience - see [supported browsers](#supported-browsers) for more information.
16 |
17 | This version requires React 16.8 so that React hooks can be used. If you're used to version 2.x of `react-speech-recognition` or want to use an older version of React, you can see the old README [here](https://github.com/JamesBrill/react-speech-recognition/tree/v2.1.4). If you want to migrate to version 3.x, see the migration guide [here](docs/V3-MIGRATION.md).
18 |
19 | ## Useful links
20 |
21 | * [Basic example](#basic-example)
22 | * [Why you should use a polyfill with this library](#why-you-should-use-a-polyfill-with-this-library)
23 | * [Cross-browser example](#cross-browser-example)
24 | * [Supported browsers](#supported-browsers)
25 | * [Polyfills](docs/POLYFILLS.md)
26 | * [API docs](docs/API.md)
27 | * [Troubleshooting](#troubleshooting)
28 | * [Version 3 migration guide](docs/V3-MIGRATION.md)
29 | * [TypeScript declaration file in DefinitelyTyped](https://github.com/OleksandrYehorov/DefinitelyTyped/blob/master/types/react-speech-recognition/index.d.ts)
30 |
31 | ## Installation
32 |
33 | To install:
34 |
35 | ```shell
36 | npm install --save react-speech-recognition
37 | ```
38 |
39 | To import in your React code:
40 |
41 | ```js
42 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
43 | ```
44 |
45 | ## Basic example
46 |
47 | The most basic example of a component using this hook would be:
48 |
49 | ```jsx
50 | import React from 'react';
51 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
52 |
53 | const Dictaphone = () => {
54 | const {
55 | transcript,
56 | listening,
57 | resetTranscript,
58 | browserSupportsSpeechRecognition
59 | } = useSpeechRecognition();
60 |
61 | if (!browserSupportsSpeechRecognition) {
62 | return Browser doesn't support speech recognition.;
63 | }
64 |
65 | return (
66 |
67 |
Microphone: {listening ? 'on' : 'off'}
68 |
69 |
70 |
71 |
{transcript}
72 |
73 | );
74 | };
75 | export default Dictaphone;
76 | ```
77 |
78 | You can see more examples in the example React app attached to this repo. See [Developing](#developing).
79 |
80 | ## Why you should use a polyfill with this library
81 |
82 | By default, speech recognition is not supported in all browsers, with the best native experience being available on desktop Chrome. To avoid the limitations of native browser speech recognition, it's recommended that you combine `react-speech-recognition` with a [speech recognition polyfill](docs/POLYFILLS.md). Why? Here's a comparison with and without polyfills:
83 | * ✅ With a polyfill, your web app will be voice-enabled on all modern browsers (except Internet Explorer)
84 | * ❌ Without a polyfill, your web app will only be voice-enabled on the browsers listed [here](#supported-browsers)
85 | * ✅ With a polyfill, your web app will have a consistent voice experience across browsers
86 | * ❌ Without a polyfill, different native implementations will produce different transcriptions, have different levels of accuracy, and have different formatting styles
87 | * ✅ With a polyfill, you control who is processing your users' voice data
88 | * ❌ Without a polyfill, your users' voice data will be sent to big tech companies like Google or Apple to be transcribed
89 | * ✅ With a polyfill, `react-speech-recognition` will be suitable for use in commercial applications
90 | * ❌ Without a polyfill, `react-speech-recognition` will still be fine for personal projects or use cases where cross-browser support is not needed
91 |
92 | `react-speech-recognition` currently supports polyfills for the following cloud providers:
93 |
94 |
99 |
100 | ## Cross-browser example
101 |
102 | You can find the full guide for setting up a polyfill [here](docs/POLYFILLS.md). Alternatively, here is a quick example using Azure:
103 | * Install `web-speech-cognitive-services` and `microsoft-cognitiveservices-speech-sdk` in your web app.
104 | * You will need two things to configure this polyfill: the name of the Azure region your Speech Service is deployed in, plus a subscription key (or better still, an authorization token). [This doc](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/overview#find-keys-and-region) explains how to find those
105 | * Here's a component for a push-to-talk button. The basic example above would also work fine.
106 | ```jsx
107 | import React from 'react';
108 | import createSpeechServicesPonyfill from 'web-speech-cognitive-services';
109 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
110 |
111 | const SUBSCRIPTION_KEY = '';
112 | const REGION = '';
113 |
114 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({
115 | credentials: {
116 | region: REGION,
117 | subscriptionKey: SUBSCRIPTION_KEY,
118 | }
119 | });
120 | SpeechRecognition.applyPolyfill(AzureSpeechRecognition);
121 |
122 | const Dictaphone = () => {
123 | const {
124 | transcript,
125 | resetTranscript,
126 | browserSupportsSpeechRecognition
127 | } = useSpeechRecognition();
128 |
129 | const startListening = () => SpeechRecognition.startListening({
130 | continuous: true,
131 | language: 'en-US'
132 | });
133 |
134 | if (!browserSupportsSpeechRecognition) {
135 | return null;
136 | }
137 |
138 | return (
139 |
140 |
141 |
142 |
143 |
{transcript}
144 |
145 | );
146 | };
147 | export default Dictaphone;
148 | ```
149 |
150 | ## Detecting browser support for Web Speech API
151 |
152 | If you choose not to use a polyfill, this library still fails gracefully on browsers that don't support speech recognition. It is recommended that you render some fallback content if it is not supported by the user's browser:
153 |
154 | ```js
155 | if (!browserSupportsSpeechRecognition) {
156 | // Render some fallback content
157 | }
158 | ```
159 |
160 | ### Supported browsers
161 |
162 | Without a polyfill, the Web Speech API is largely only supported by Google browsers. As of May 2021, the following browsers support the Web Speech API:
163 |
164 | * Chrome (desktop): this is by far the smoothest experience
165 | * Safari 14.1
166 | * Microsoft Edge
167 | * Chrome (Android): a word of warning about this platform, which is that there can be an annoying beeping sound when turning the microphone on. This is part of the Android OS and cannot be controlled from the browser
168 | * Android webview
169 | * Samsung Internet
170 |
171 | For all other browsers, you can render fallback content using the `SpeechRecognition.browserSupportsSpeechRecognition` function described above. Alternatively, as mentioned before, you can integrate a [polyfill](docs/POLYFILLS.md).
172 |
173 | ## Detecting when the user denies access to the microphone
174 |
175 | Even if the browser supports the Web Speech API, the user still has to give permission for their microphone to be used before transcription can begin. They are asked for permission when `react-speech-recognition` first tries to start listening. At this point, you can detect when the user denies access via the `isMicrophoneAvailable` state. When this becomes `false`, it's advised that you disable voice-driven features and indicate that microphone access is needed for them to work.
176 |
177 | ```js
178 | if (!isMicrophoneAvailable) {
179 | // Render some fallback content
180 | }
181 | ```
182 |
183 | ## Controlling the microphone
184 |
185 | Before consuming the transcript, you should be familiar with `SpeechRecognition`, which gives you control over the microphone. The state of the microphone is global, so any functions you call on this object will affect _all_ components using `useSpeechRecognition`.
186 |
187 | ### Turning the microphone on
188 |
189 | To start listening to speech, call the `startListening` function.
190 |
191 | ```js
192 | SpeechRecognition.startListening()
193 | ```
194 |
195 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned on.
196 |
197 | ### Turning the microphone off
198 |
199 | To turn the microphone off, but still finish processing any speech in progress, call `stopListening`.
200 |
201 | ```js
202 | SpeechRecognition.stopListening()
203 | ```
204 |
205 | To turn the microphone off, and cancel the processing of any speech in progress, call `abortListening`.
206 |
207 | ```js
208 | SpeechRecognition.abortListening()
209 | ```
210 |
211 | ## Consuming the microphone transcript
212 |
213 | To make the microphone transcript available in your component, simply add:
214 |
215 | ```js
216 | const { transcript } = useSpeechRecognition()
217 | ```
218 |
219 | ## Resetting the microphone transcript
220 |
221 | To set the transcript to an empty string, you can call the `resetTranscript` function provided by `useSpeechRecognition`. Note that this is local to your component and does not affect any other components using Speech Recognition.
222 |
223 | ```js
224 | const { resetTranscript } = useSpeechRecognition()
225 | ```
226 |
227 | ## Commands
228 |
229 | To respond when the user says a particular phrase, you can pass in a list of commands to the `useSpeechRecognition` hook. Each command is an object with the following properties:
230 | - `command`: This is a string or `RegExp` representing the phrase you want to listen for. If you want to use the same callback for multiple commands, you can also pass in an array here, with each value being a string or `RegExp`
231 | - `callback`: The function that is executed when the command is spoken. The last argument that this function receives will always be an object containing the following properties:
232 | - `command`: The command phrase that was matched. This can be useful when you provide an array of command phrases for the same callback and need to know which one triggered it
233 | - `resetTranscript`: A function that sets the transcript to an empty string
234 | - `matchInterim`: Boolean that determines whether "interim" results should be matched against the command. This will make your component respond faster to commands, but also makes false positives more likely - i.e. the command may be detected when it is not spoken. This is `false` by default and should only be set for simple commands.
235 | - `isFuzzyMatch`: Boolean that determines whether the comparison between speech and `command` is based on similarity rather than an exact match. Fuzzy matching is useful for commands that are easy to mispronounce or be misinterpreted by the Speech Recognition engine (e.g. names of places, sports teams, restaurant menu items). It is intended for commands that are string literals without special characters. If `command` is a string with special characters or a `RegExp`, it will be converted to a string without special characters when fuzzy matching. The similarity that is needed to match the command can be configured with `fuzzyMatchingThreshold`. `isFuzzyMatch` is `false` by default. When it is set to `true`, it will pass four arguments to `callback`:
236 | - The value of `command` (with any special characters removed)
237 | - The speech that matched `command`
238 | - The similarity between `command` and the speech
239 | - The object mentioned in the `callback` description above
240 | - `fuzzyMatchingThreshold`: If the similarity of speech to `command` is higher than this value when `isFuzzyMatch` is turned on, the `callback` will be invoked. You should set this only if `isFuzzyMatch` is `true`. It takes values between `0` (will match anything) and `1` (needs an exact match). The default value is `0.8`.
241 | - `bestMatchOnly`: Boolean that, when `isFuzzyMatch` is `true`, determines whether the callback should only be triggered by the command phrase that _best_ matches the speech, rather than being triggered by all matching fuzzy command phrases. This is useful for fuzzy commands with multiple command phrases assigned to the same callback function - you may only want the callback to be triggered once for each spoken command. You should set this only if `isFuzzyMatch` is `true`. The default value is `false`.
242 |
243 | ### Command symbols
244 |
245 | To make commands easier to write, the following symbols are supported:
246 | - Splats: this is just a `*` and will match multi-word text:
247 | - Example: `'I would like to order *'`
248 | - The words that match the splat will be passed into the callback, one argument per splat
249 | - Named variables: this is written `:` and will match a single word:
250 | - Example: `'I am :height metres tall'`
251 | - The one word that matches the named variable will be passed into the callback
252 | - Optional words: this is a phrase wrapped in parentheses `(` and `)`, and is not required to match the command:
253 | - Example: `'Pass the salt (please)'`
254 | - The above example would match both `'Pass the salt'` and `'Pass the salt please'`
255 |
256 | ### Example with commands
257 |
258 | ```jsx
259 | import React, { useState } from 'react'
260 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
261 |
262 | const Dictaphone = () => {
263 | const [message, setMessage] = useState('')
264 | const commands = [
265 | {
266 | command: 'I would like to order *',
267 | callback: (food) => setMessage(`Your order is for: ${food}`)
268 | },
269 | {
270 | command: 'The weather is :condition today',
271 | callback: (condition) => setMessage(`Today, the weather is ${condition}`)
272 | },
273 | {
274 | command: 'My top sports are * and *',
275 | callback: (sport1, sport2) => setMessage(`#1: ${sport1}, #2: ${sport2}`)
276 | },
277 | {
278 | command: 'Pass the salt (please)',
279 | callback: () => setMessage('My pleasure')
280 | },
281 | {
282 | command: ['Hello', 'Hi'],
283 | callback: ({ command }) => setMessage(`Hi there! You said: "${command}"`),
284 | matchInterim: true
285 | },
286 | {
287 | command: 'Beijing',
288 | callback: (command, spokenPhrase, similarityRatio) => setMessage(`${command} and ${spokenPhrase} are ${similarityRatio * 100}% similar`),
289 | // If the spokenPhrase is "Benji", the message would be "Beijing and Benji are 40% similar"
290 | isFuzzyMatch: true,
291 | fuzzyMatchingThreshold: 0.2
292 | },
293 | {
294 | command: ['eat', 'sleep', 'leave'],
295 | callback: (command) => setMessage(`Best matching command: ${command}`),
296 | isFuzzyMatch: true,
297 | fuzzyMatchingThreshold: 0.2,
298 | bestMatchOnly: true
299 | },
300 | {
301 | command: 'clear',
302 | callback: ({ resetTranscript }) => resetTranscript()
303 | }
304 | ]
305 |
306 | const { transcript, browserSupportsSpeechRecognition } = useSpeechRecognition({ commands })
307 |
308 | if (!browserSupportsSpeechRecognition) {
309 | return null
310 | }
311 |
312 | return (
313 |
314 |
{message}
315 |
{transcript}
316 |
317 | )
318 | }
319 | export default Dictaphone
320 | ```
321 |
322 | ## Continuous listening
323 |
324 | By default, the microphone will stop listening when the user stops speaking. This reflects the approach taken by "press to talk" buttons on modern devices.
325 |
326 | If you want to listen continuously, set the `continuous` property to `true` when calling `startListening`. The microphone will continue to listen, even after the user has stopped speaking.
327 |
328 | ```js
329 | SpeechRecognition.startListening({ continuous: true })
330 | ```
331 |
332 | Be warned that not all browsers have good support for continuous listening. Chrome on Android in particular constantly restarts the microphone, leading to a frustrating and noisy (from the beeping) experience. To avoid enabling continuous listening on these browsers, you can make use of the `browserSupportsContinuousListening` state from `useSpeechRecognition` to detect support for this feature.
333 |
334 | ```js
335 | if (browserSupportsContinuousListening) {
336 | SpeechRecognition.startListening({ continuous: true })
337 | } else {
338 | // Fallback behaviour
339 | }
340 | ```
341 |
342 | Alternatively, you can try one of the [polyfills](docs/POLYFILLS.md) to enable continuous listening on these browsers.
343 |
344 | ## Changing language
345 |
346 | To listen for a specific language, you can pass a language tag (e.g. `'zh-CN'` for Chinese) when calling `startListening`. See [here](docs/API.md#language-string) for a list of supported languages.
347 |
348 | ```js
349 | SpeechRecognition.startListening({ language: 'zh-CN' })
350 | ```
351 |
352 | ## Troubleshooting
353 |
354 | ### `regeneratorRuntime is not defined`
355 |
356 | If you see the error `regeneratorRuntime is not defined` when using this library, you will need to ensure your web app installs `regenerator-runtime`:
357 | * `npm i --save regenerator-runtime`
358 | * If you are using NextJS, put this at the top of your `_app.js` file: `import 'regenerator-runtime/runtime'`. For any other framework, put it at the top of your `index.js` file
359 |
360 | ### How to use `react-speech-recognition` offline?
361 |
362 | Unfortunately, speech recognition will not function in Chrome when offline. According to the [Web Speech API docs](https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API/Using_the_Web_Speech_API): `On Chrome, using Speech Recognition on a web page involves a server-based recognition engine. Your audio is sent to a web service for recognition processing, so it won't work offline.`
363 |
364 | If you are building an offline web app, you can detect when the browser is offline by inspecting the value of `navigator.onLine`. If it is `true`, you can render the transcript generated by React Speech Recognition. If it is `false`, it's advisable to render offline fallback content that signifies that speech recognition is disabled. The online/offline API is simple to use - you can read how to use it [here](https://developer.mozilla.org/en-US/docs/Web/API/NavigatorOnLine/Online_and_offline_events).
365 |
366 | ## Developing
367 |
368 | You can run an example React app that uses `react-speech-recognition` with:
369 | ```shell
370 | npm i
371 | npm run dev
372 | ```
373 |
374 | On `http://localhost:3000`, you'll be able to speak into the microphone and see your speech as text on the web page. There are also controls for turning speech recognition on and off. You can make changes to the web app itself in the `example` directory. Any changes you make to the web app or `react-speech-recognition` itself will be live reloaded in the browser.
375 |
376 | ## API docs
377 |
378 | View the API docs [here](docs/API.md) or follow the guide above to learn how to use `react-speech-recognition`.
379 |
380 | ## License
381 |
382 | MIT
383 |
--------------------------------------------------------------------------------
/biome.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "node_modules/@biomejs/biome/configuration_schema.json",
3 | "files": {
4 | "ignore": ["package.json"]
5 | },
6 | "formatter": {
7 | "enabled": true,
8 | "useEditorconfig": true
9 | },
10 | "linter": {
11 | "enabled": false,
12 | "rules": {
13 | "recommended": true
14 | }
15 | },
16 | "organizeImports": {
17 | "enabled": true
18 | },
19 | "vcs": {
20 | "clientKind": "git",
21 | "enabled": true,
22 | "useIgnoreFile": true
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/docs/API.md:
--------------------------------------------------------------------------------
1 | # API docs
2 |
3 | ## Interface
4 |
5 | * [useSpeechRecognition](#useSpeechRecognition)
6 | * [SpeechRecognition](#SpeechRecognition)
7 |
8 | ## useSpeechRecognition
9 |
10 | React hook for consuming speech recorded by the microphone. Import with:
11 |
12 | ```js
13 | import { useSpeechRecognition } from 'react-speech-recognition'
14 | ```
15 |
16 | ### Input props
17 |
18 | These are passed as an object argument to `useSpeechRecognition`:
19 |
20 | ```js
21 | useSpeechRecognition({ transcribing, clearTranscriptOnListen, commands })
22 | ```
23 |
24 | #### transcribing [bool]
25 |
26 | Is this component collecting a transcript or not? This is independent of the global `listening` state of the microphone. `true` by default.
27 |
28 | #### clearTranscriptOnListen [bool]
29 |
30 | Does this component clear its transcript when the microphone is turned on? Has no effect when continuous listening is enabled. `true` by default.
31 |
32 | #### commands [list]
33 |
34 | See [Commands](../README.md#Commands).
35 |
36 | ### Output state
37 |
38 | These are returned from `useSpeechRecognition`:
39 |
40 | ```js
41 | const {
42 | transcript,
43 | interimTranscript,
44 | finalTranscript,
45 | resetTranscript,
46 | listening,
47 | browserSupportsSpeechRecognition,
48 | isMicrophoneAvailable,
49 | } = useSpeechRecognition()
50 | ```
51 |
52 | #### transcript [string]
53 |
54 | Transcription of all speech that has been spoken into the microphone. Is equivalent to the final transcript followed by the interim transcript, separated by a space.
55 |
56 | #### resetTranscript [function]
57 |
58 | Sets `transcript` to an empty string.
59 |
60 | #### listening [bool]
61 |
62 | If true, the Web Speech API is listening to speech from the microphone.
63 |
64 | #### interimTranscript [string]
65 |
66 | Transcription of speech that the Web Speech API is still processing (i.e. it's still deciding what has just been spoken).
67 |
68 | For the current words being spoken, the interim transcript reflects each successive guess made by the transcription algorithm. When the browser’s confidence in its guess is maximized, it is added to the final transcript.
69 |
70 | The difference between interim and final transcripts can be illustrated by an example over four iterations of the transcription algorithm:
71 |
72 | | Final transcript | Interim transcript |
73 | |-------------------|--------------------|
74 | | 'Hello, I am' | 'jam' |
75 | | 'Hello, I am' | 'jams' |
76 | | 'Hello, I am' | 'James' |
77 | | 'Hello, I am James' | '' |
78 |
79 | #### finalTranscript [string]
80 |
81 | Transcription of speech that the Web Speech API has finished processing.
82 |
83 | #### browserSupportsSpeechRecognition [bool]
84 |
85 | The Web Speech API is not supported on all browsers, so it is recommended that you render some fallback content if it is not supported by the user's browser:
86 |
87 | ```js
88 | if (!browserSupportsSpeechRecognition) {
89 | // Render some fallback content
90 | }
91 | ```
92 |
93 | #### browserSupportsContinuousListening [bool]
94 |
95 | Continuous listening is not supported on all browsers, so it is recommended that you apply some fallback behaviour if your web app uses continuous listening and is running on a browser that doesn't support it:
96 |
97 | ```js
98 | if (browserSupportsContinuousListening) {
99 | SpeechRecognition.startListening({ continuous: true })
100 | } else {
101 | // Fallback behaviour
102 | }
103 | ```
104 |
105 | #### isMicrophoneAvailable [bool]
106 |
107 | The user has to give permission for their microphone to be used before transcription can begin. They are asked for permission when `react-speech-recognition` first tries to start listening. This state will become `false` if they deny access. In this case, it's advised that you disable voice-driven features and indicate that microphone access is needed for them to work.
108 |
109 | ```js
110 | if (!isMicrophoneAvailable) {
111 | // Render some fallback content
112 | }
113 | ```
114 |
115 | ## SpeechRecognition
116 |
117 | Object providing functions to manage the global state of the microphone. Import with:
118 |
119 | ```js
120 | import SpeechRecognition from 'react-speech-recognition'
121 | ```
122 |
123 | ### Functions
124 |
125 | #### startListening (async)
126 |
127 | Start listening to speech.
128 |
129 | ```js
130 | SpeechRecognition.startListening()
131 | ```
132 |
133 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned on.
134 |
135 | It can be called with an options argument. For example:
136 |
137 | ```js
138 | SpeechRecognition.startListening({
139 | continuous: true,
140 | language: 'zh-CN'
141 | })
142 | ```
143 |
144 | The following options are available:
145 |
146 | ##### continuous [bool]
147 |
148 | By default, the microphone will stop listening when the user stops speaking (`continuous: false`). This reflects the approach taken by "press to talk" buttons on modern devices.
149 |
150 | If you want to listen continuously, set the `continuous` property to `true` when calling `startListening`. The microphone will continue to listen, even after the user has stopped speaking.
151 |
152 | ```js
153 | SpeechRecognition.startListening({ continuous: true })
154 | ```
155 |
156 | ##### language [string]
157 |
158 | To listen for a specific language, you can pass a language tag (e.g. `'zh-CN'` for Chinese) when calling `startListening`.
159 |
160 | ```js
161 | SpeechRecognition.startListening({ language: 'zh-CN' })
162 | ```
163 |
164 | Some known supported languages (based on [this Stack Overflow post](http://stackoverflow.com/a/14302134/338039)):
165 |
166 | * Afrikaans `af`
167 | * Basque `eu`
168 | * Bulgarian `bg`
169 | * Catalan `ca`
170 | * Arabic (Egypt) `ar-EG`
171 | * Arabic (Jordan) `ar-JO`
172 | * Arabic (Kuwait) `ar-KW`
173 | * Arabic (Lebanon) `ar-LB`
174 | * Arabic (Qatar) `ar-QA`
175 | * Arabic (UAE) `ar-AE`
176 | * Arabic (Morocco) `ar-MA`
177 | * Arabic (Iraq) `ar-IQ`
178 | * Arabic (Algeria) `ar-DZ`
179 | * Arabic (Bahrain) `ar-BH`
180 | * Arabic (Lybia) `ar-LY`
181 | * Arabic (Oman) `ar-OM`
182 | * Arabic (Saudi Arabia) `ar-SA`
183 | * Arabic (Tunisia) `ar-TN`
184 | * Arabic (Yemen) `ar-YE`
185 | * Czech `cs`
186 | * Dutch `nl-NL`
187 | * English (Australia) `en-AU`
188 | * English (Canada) `en-CA`
189 | * English (India) `en-IN`
190 | * English (New Zealand) `en-NZ`
191 | * English (South Africa) `en-ZA`
192 | * English(UK) `en-GB`
193 | * English(US) `en-US`
194 | * Finnish `fi`
195 | * French `fr-FR`
196 | * Galician `gl`
197 | * German `de-DE`
198 | * Greek `el-GR`
199 | * Hebrew `he`
200 | * Hungarian `hu`
201 | * Icelandic `is`
202 | * Italian `it-IT`
203 | * Indonesian `id`
204 | * Japanese `ja`
205 | * Korean `ko`
206 | * Latin `la`
207 | * Mandarin Chinese `zh-CN`
208 | * Taiwanese `zh-TW`
209 | * Cantonese `zh-HK`
210 | * Malaysian `ms-MY`
211 | * Norwegian `no-NO`
212 | * Polish `pl`
213 | * Pig Latin `xx-piglatin`
214 | * Portuguese `pt-PT`
215 | * Portuguese (Brasil) `pt-br`
216 | * Romanian `ro-RO`
217 | * Russian `ru`
218 | * Serbian `sr-SP`
219 | * Slovak `sk`
220 | * Spanish (Argentina) `es-AR`
221 | * Spanish (Bolivia) `es-BO`
222 | * Spanish (Chile) `es-CL`
223 | * Spanish (Colombia) `es-CO`
224 | * Spanish (Costa Rica) `es-CR`
225 | * Spanish (Dominican Republic) `es-DO`
226 | * Spanish (Ecuador) `es-EC`
227 | * Spanish (El Salvador) `es-SV`
228 | * Spanish (Guatemala) `es-GT`
229 | * Spanish (Honduras) `es-HN`
230 | * Spanish (Mexico) `es-MX`
231 | * Spanish (Nicaragua) `es-NI`
232 | * Spanish (Panama) `es-PA`
233 | * Spanish (Paraguay) `es-PY`
234 | * Spanish (Peru) `es-PE`
235 | * Spanish (Puerto Rico) `es-PR`
236 | * Spanish (Spain) `es-ES`
237 | * Spanish (US) `es-US`
238 | * Spanish (Uruguay) `es-UY`
239 | * Spanish (Venezuela) `es-VE`
240 | * Swedish `sv-SE`
241 | * Turkish `tr`
242 | * Zulu `zu`
243 |
244 | #### stopListening (async)
245 |
246 | Turn the microphone off, but still finish processing any speech in progress.
247 |
248 | ```js
249 | SpeechRecognition.stopListening()
250 | ```
251 |
252 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned off.
253 |
254 | #### abortListening (async)
255 |
256 | Turn the microphone off, and cancel the processing of any speech in progress.
257 |
258 | ```js
259 | SpeechRecognition.abortListening()
260 | ```
261 |
262 | This is an asynchronous function, so it will need to be awaited if you want to do something after the microphone has been turned off.
263 |
264 | #### getRecognition
265 |
266 | This returns the underlying [object](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition) used by Web Speech API.
267 |
268 | #### applyPolyfill
269 |
270 | Replace the native Speech Recognition engine (if there is one) with a custom implementation of the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). If there is a Speech Recognition implementation already listening to the microphone, this will be turned off. See [Polyfills](./POLYFILLS.md) for more information on how to use this.
271 |
272 | ```js
273 | SpeechRecognition.applyPolyfill(SpeechRecognitionPolyfill)
274 | ```
275 |
276 | #### removePolyfill
277 |
278 | If a polyfill was applied using `applyPolyfill`, reset the Speech Recognition engine to the native implementation. This can be useful when the user switches to a language that is supported by the native engine but not the polyfill engine.
279 |
280 | ```js
281 | SpeechRecognition.removePolyfill()
282 | ```
--------------------------------------------------------------------------------
/docs/POLYFILLS.md:
--------------------------------------------------------------------------------
1 | # Polyfills
2 |
3 | If you want `react-speech-recognition` to work on more browsers than just Chrome, you can integrate a polyfill. This is a piece of code that fills in some missing feature in browsers that don't support it.
4 |
5 | Under the hood, Web Speech API in Chrome uses Google's speech recognition servers. To replicate this functionality elsewhere, you will need to host your own speech recognition service and implement the Web Speech API using that service. That implementation, which is essentially a polyfill, can then be plugged into `react-speech-recognition`. You can write that polyfill yourself, but it's recommended you use one someone else has already made.
6 |
7 | # Basic usage
8 |
9 | The `SpeechRecognition` class exported by `react-speech-recognition` has the method `applyPolyfill`. This can take an implementation of the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). From then on, that implementation will used by `react-speech-recognition` to transcribe speech picked up by the microphone.
10 |
11 | ```js
12 | SpeechRecognition.applyPolyfill(SpeechRecognitionPolyfill)
13 | ```
14 |
15 | Note that this type of polyfill that does not pollute the global scope is known as a "ponyfill" - the distinction is explained [here](https://ponyfoo.com/articles/polyfills-or-ponyfills). `react-speech-recognition` will also pick up traditional polyfills - just make sure you import them before `react-speech-recognition`.
16 |
17 | Polyfills can be removed using `removePolyfill`. This can be useful when the user switches to a language that is supported by the native Speech Recognition engine but not the polyfill engine.
18 |
19 | ```js
20 | SpeechRecognition.removePolyfill()
21 | ```
22 |
23 | ## Usage recommendations
24 | * Call this as early as possible to minimise periods where fallback content, which you should render while the polyfill is loading, is rendered. Also note that if there is a Speech Recognition implementation already listening to the microphone, this will be turned off when the polyfill is applied, so make sure the polyfill is applied before rendering any buttons to start listening
25 | * After `applyPolyfill` has been called, `browserSupportsSpeechRecognition` will be `true` on _most_ browsers, but there are still exceptions. Browsers like Internet Explorer do not support the APIs needed for polyfills - in these cases where `browserSupportsSpeechRecognition` is `false`, you should still have some suitable fallback content
26 | * Do not rely on polyfills being perfect implementations of the Speech Recognition specification - make sure you have tested them in different browsers and are aware of their individual limitations
27 |
28 | # Polyfill libraries
29 |
30 | Rather than roll your own, you should use a ready-made polyfill for a cloud provider's speech recognition service. `react-speech-recognition` currently supports polyfills for the following cloud providers:
31 |
32 | ## Microsoft Azure Cognitive Services
33 |
34 |
35 |
36 |
37 |
38 | This is Microsoft's offering for speech recognition (among many other features). The free trial gives you $200 of credit to get started. It's pretty easy to set up - see the [documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/).
39 |
40 | * Polyfill repo: [web-speech-cognitive-services](https://github.com/compulim/web-speech-cognitive-services)
41 | * Polyfill author: [compulim](https://github.com/compulim)
42 | * Requirements:
43 | * Install `web-speech-cognitive-services` and `microsoft-cognitiveservices-speech-sdk` in your web app for this polyfill to function
44 | * You will need two things to configure this polyfill: the name of the Azure region your Speech Service is deployed in, plus a subscription key (or better still, an authorization token). [This doc](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/overview#find-keys-and-region) explains how to find those
45 |
46 | Here is a basic example combining `web-speech-cognitive-services` and `react-speech-recognition` to get you started (do not use this in production; for a production-friendly version, read on below). This code worked with version 7.1.0 of the polyfill in February 2021 - if it has become outdated due to changes in the polyfill or in Azure Cognitive Services, please raise a GitHub issue or PR to get this updated.
47 |
48 | ```jsx
49 | import React from 'react';
50 | import createSpeechServicesPonyfill from 'web-speech-cognitive-services';
51 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
52 |
53 | const SUBSCRIPTION_KEY = '';
54 | const REGION = '';
55 |
56 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({
57 | credentials: {
58 | region: REGION,
59 | subscriptionKey: SUBSCRIPTION_KEY,
60 | }
61 | });
62 | SpeechRecognition.applyPolyfill(AzureSpeechRecognition);
63 |
64 | const Dictaphone = () => {
65 | const {
66 | transcript,
67 | resetTranscript,
68 | browserSupportsSpeechRecognition
69 | } = useSpeechRecognition();
70 |
71 | const startListening = () => SpeechRecognition.startListening({
72 | continuous: true,
73 | language: 'en-US'
74 | });
75 |
76 | if (!browserSupportsSpeechRecognition) {
77 | return null;
78 | }
79 |
80 | return (
81 |
82 |
83 |
84 |
85 |
{transcript}
86 |
87 | );
88 | };
89 | export default Dictaphone;
90 | ```
91 |
92 | ### Usage in production
93 |
94 | Your subscription key is a secret that you should not be leaking to your users in production. In other words, it should never be downloaded to your users' browsers. A more secure approach that's recommended by Microsoft is to exchange your subscription key for an authorization token, which has a limited lifetime. You should get this token on your backend and pass this to your frontend React app. Microsoft give guidance on how to do this [here](https://docs.microsoft.com/en-us/azure/cognitive-services/authentication?tabs=powershell).
95 |
96 | Once your React app has the authorization token, it should be passed into the polyfill creator instead of the subscription key like this:
97 | ```js
98 | const { SpeechRecognition: AzureSpeechRecognition } = createSpeechServicesPonyfill({
99 | credentials: {
100 | region: REGION,
101 | authorizationToken: AUTHORIZATION_TOKEN,
102 | }
103 | });
104 | ```
105 |
106 | ### Limitations
107 | * There is currently a [bug](https://github.com/compulim/web-speech-cognitive-services/issues/166) in this polyfill's `stop` method when using continuous listening. If you are using `continuous: true`, use `abortListening` to stop the transcription. Otherwise, you can use `stopListening`.
108 | * On Safari and Firefox, an error will be thrown if calling `startListening` to switch to a different language without first calling `stopListening`. It's recommended that you stick to one language and, if you do need to change languages, call `stopListening` first
109 | * If you don't specify a language, Azure will return a 400 response. When calling `startListening`, you will need to explicitly provide one of the language codes defined [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support). For English, use `en-GB` or `en-US`
110 | * Currently untested on iOS (let me know if it works!)
111 |
112 |
113 |
114 |
115 | ## AWS Transcribe
116 |
117 | There is no polyfill for [AWS Transcribe](https://aws.amazon.com/transcribe/) in the ecosystem yet, though a promising project can be found [here](https://github.com/ceuk/speech-recognition-aws-polyfill).
118 |
119 | # Providing your own polyfill
120 |
121 | If you want to roll your own implementation of the Speech Recognition API, follow the [W3C SpeechRecognition specification](https://wicg.github.io/speech-api/#speechreco-section). You should implement at least the following for `react-speech-recognition` to work:
122 | * `continuous` (property)
123 | * `lang` (property)
124 | * `interimResults` (property)
125 | * `onresult` (property). On the events received, the following properties are used:
126 | * `event.resultIndex`
127 | * `event.results[i].isFinal`
128 | * `event.results[i][0].transcript`
129 | * `event.results[i][0].confidence`
130 | * `onend` (property)
131 | * `onerror` (property)
132 | * `start` (method)
133 | * `stop` (method)
134 | * `abort` (method)
135 |
136 |
--------------------------------------------------------------------------------
/docs/V3-MIGRATION.md:
--------------------------------------------------------------------------------
1 | # Migrating from v2 to v3
2 |
3 | v3 makes use of React hooks to simplify the consumption of `react-speech-recognition`:
4 | * Replacing the higher order component with a React hook
5 | * Introducing commands, functions that get executed when the user says a particular phrase
6 | * A clear separation between all parts of `react-speech-recognition` that are global (e.g. whether the microphone is listening or not) and local (e.g. transcripts). This makes it possible to have multiple components consuming the global microphone input while maintaining their own transcripts and commands
7 | * Some default prop values have changed so check those out below
8 |
9 | ## The original Dictaphone example
10 |
11 | ### In v2
12 |
13 | ```jsx
14 | import React, { Component } from "react";
15 | import PropTypes from "prop-types";
16 | import SpeechRecognition from "react-speech-recognition";
17 |
18 | const propTypes = {
19 | // Props injected by SpeechRecognition
20 | transcript: PropTypes.string,
21 | resetTranscript: PropTypes.func,
22 | browserSupportsSpeechRecognition: PropTypes.bool
23 | };
24 |
25 | const Dictaphone = ({
26 | transcript,
27 | resetTranscript,
28 | browserSupportsSpeechRecognition
29 | }) => {
30 | if (!browserSupportsSpeechRecognition) {
31 | return null;
32 | }
33 |
34 | return (
35 |
36 |
37 | {transcript}
38 |
39 | );
40 | };
41 |
42 | Dictaphone.propTypes = propTypes;
43 |
44 | export default SpeechRecognition(Dictaphone);
45 | ```
46 |
47 | ### In v3
48 |
49 | Automatically enabling the microphone without any user input is no longer encouraged as most browsers now prevent this. This is due to concerns about privacy - users don't necessarily want their browser listening to them without being asked. The "auto-start" has been replaced with a button to trigger the microphone being turned on.
50 |
51 | ```jsx
52 | import React, { useEffect } from 'react'
53 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
54 |
55 | const Dictaphone = () => {
56 | const { transcript, resetTranscript, browserSupportsSpeechRecognition } = useSpeechRecognition()
57 | const startListening = () => SpeechRecognition.startListening({ continuous: true })
58 |
59 | if (!browserSupportsSpeechRecognition) {
60 | return null
61 | }
62 |
63 | return (
64 |