├── .env ├── .github ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── LICENSE.md ├── README.md ├── package-lock.json ├── package.json ├── public ├── favicon.ico ├── index.html ├── logo192.png ├── logo512.png ├── manifest.json └── robots.txt ├── server └── index.js └── src ├── App.js ├── custom.css ├── index.js └── token_util.js /.env: -------------------------------------------------------------------------------- 1 | SPEECH_KEY=paste-your-speech-key-here 2 | SPEECH_REGION=paste-your-speech-region-here 3 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | > Please provide us with the following information: 5 | > --------------------------------------------------------------- 6 | 7 | ### This issue is for a: (mark with an `x`) 8 | ``` 9 | - [ ] bug report -> please search issues before submitting 10 | - [ ] feature request 11 | - [ ] documentation issue or request 12 | - [ ] regression (a behavior that used to work and stopped in a new release) 13 | ``` 14 | 15 | ### Minimal steps to reproduce 16 | > 17 | 18 | ### Any log messages given by the failure 19 | > 20 | 21 | ### Expected/desired behavior 22 | > 23 | 24 | ### OS and Version? 25 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?) 26 | 27 | ### Versions 28 | > 29 | 30 | ### Mention any other details that might be useful 31 | 32 | > --------------------------------------------------------------- 33 | > Thanks! We'll be in touch soon. 34 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Purpose 2 | 3 | * ... 4 | 5 | ## Does this introduce a breaking change? 6 | 7 | ``` 8 | [ ] Yes 9 | [ ] No 10 | ``` 11 | 12 | ## Pull Request Type 13 | What kind of change does this Pull Request introduce? 14 | 15 | 16 | ``` 17 | [ ] Bugfix 18 | [ ] Feature 19 | [ ] Code style update (formatting, local variables) 20 | [ ] Refactoring (no functional changes, no api changes) 21 | [ ] Documentation content changes 22 | [ ] Other... Please describe: 23 | ``` 24 | 25 | ## How to Test 26 | * Get the code 27 | 28 | ``` 29 | git clone [repo-address] 30 | cd [repo-name] 31 | git checkout [branch-name] 32 | npm install 33 | ``` 34 | 35 | * Test the code 36 | 37 | ``` 38 | ``` 39 | 40 | ## What to Check 41 | Verify that the following are valid 42 | * ... 43 | 44 | ## Other Information 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # React Speech service sample app 2 | 3 | This sample shows how to integrate the Azure Speech service into a sample React application. This sample shows design pattern examples for authentication token exchange and management, as well as capturing audio from a microphone or file for speech-to-text conversions. 4 | 5 | ## Prerequisites 6 | 7 | 1. This article assumes that you have an Azure account and Speech service subscription. If you don't have an account and subscription, [try the Speech service for free](https://docs.microsoft.com/azure/cognitive-services/speech-service/overview#try-the-speech-service-for-free). 8 | 1. Ensure you have [Node.js](https://nodejs.org/en/download/) installed. 9 | 10 | ## How to run the app 11 | 12 | 1. Clone this repo, then change directory to the project root and run `npm install` to install dependencies. 13 | 1. Add your Azure Speech key and region to the `.env` file, replacing the placeholder text. 14 | 1. To run the Express server and React app together, run `npm run dev`. 15 | 16 | ## Change recognition language 17 | 18 | To change the source recognition language, change the locale strings in `App.js` lines **32** and **66**, which sets the recognition language property on the `SpeechConfig` object. 19 | 20 | ```javascript 21 | speechConfig.speechRecognitionLanguage = 'en-US' 22 | ``` 23 | 24 | For a full list of supported locales, see the [language support article](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support#speech-to-text). 25 | 26 | ## Speech-to-text from microphone 27 | 28 | To convert speech-to-text using a microphone, run the app and then click **Convert speech to text from your mic.**. This will prompt you for access to your microphone, and then listen for you to speak. The following function `sttFromMic` in `App.js` contains the implementation. 29 | 30 | ```javascript 31 | async sttFromMic() { 32 | const tokenObj = await getTokenOrRefresh(); 33 | const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region); 34 | speechConfig.speechRecognitionLanguage = 'en-US'; 35 | 36 | const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput(); 37 | const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig); 38 | 39 | this.setState({ 40 | displayText: 'speak into your microphone...' 41 | }); 42 | 43 | recognizer.recognizeOnceAsync(result => { 44 | let displayText; 45 | if (result.reason === ResultReason.RecognizedSpeech) { 46 | displayText = `RECOGNIZED: Text=${result.text}` 47 | } else { 48 | displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.'; 49 | } 50 | 51 | this.setState({ 52 | displayText: displayText 53 | }); 54 | }); 55 | } 56 | ``` 57 | 58 | Running speech-to-text from a microphone is done by creating an `AudioConfig` object and using it with the recognizer. 59 | 60 | ```javascript 61 | const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput(); 62 | const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig); 63 | ``` 64 | 65 | ## Speech-to-text from file 66 | 67 | To convert speech-to-text from an audio file, run the app and then click **Convert speech to text from an audio file.**. This will open a file browser and allow you to select an audio file. The following function `fileChange` is bound to an event handler that detects the file change. 68 | 69 | ```javascript 70 | async fileChange(event) { 71 | const audioFile = event.target.files[0]; 72 | console.log(audioFile); 73 | const fileInfo = audioFile.name + ` size=${audioFile.size} bytes `; 74 | 75 | this.setState({ 76 | displayText: fileInfo 77 | }); 78 | 79 | const tokenObj = await getTokenOrRefresh(); 80 | const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region); 81 | speechConfig.speechRecognitionLanguage = 'en-US'; 82 | 83 | const audioConfig = speechsdk.AudioConfig.fromWavFileInput(audioFile); 84 | const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig); 85 | 86 | recognizer.recognizeOnceAsync(result => { 87 | let displayText; 88 | if (result.reason === ResultReason.RecognizedSpeech) { 89 | displayText = `RECOGNIZED: Text=${result.text}` 90 | } else { 91 | displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.'; 92 | } 93 | 94 | this.setState({ 95 | displayText: fileInfo + displayText 96 | }); 97 | }); 98 | } 99 | ``` 100 | 101 | You need the audio file as a JavaScript [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File) object, so you can grab it directly off the event target using `const audioFile = event.target.files[0];`. Next, you use the file to create the `AudioConfig` and then pass it to the recognizer. 102 | 103 | ```javascript 104 | const audioConfig = speechsdk.AudioConfig.fromWavFileInput(audioFile); 105 | const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig); 106 | ``` 107 | 108 | ## Token exchange process 109 | 110 | This sample application shows an example design pattern for retrieving and managing tokens, a common task when using the Speech JavaScript SDK in a browser environment. A simple Express back-end is implemented in the same project under `server/index.js`, which abstracts the token retrieval process. 111 | 112 | The reason for this design is to prevent your speech key from being exposed on the front-end, since it can be used to make calls directly to your subscription. By using an ephemeral token, you are able to protect your speech key from being used directly. To get a token, you use the Speech REST API and make a call using your speech key and region. In the Express part of the app, this is implemented in `index.js` behind the endpoint `/api/get-speech-token`, which the front-end uses to get tokens. 113 | 114 | ```javascript 115 | app.get('/api/get-speech-token', async (req, res, next) => { 116 | res.setHeader('Content-Type', 'application/json'); 117 | const speechKey = process.env.SPEECH_KEY; 118 | const speechRegion = process.env.SPEECH_REGION; 119 | 120 | if (speechKey === 'paste-your-speech-key-here' || speechRegion === 'paste-your-speech-region-here') { 121 | res.status(400).send('You forgot to add your speech key or region to the .env file.'); 122 | } else { 123 | const headers = { 124 | headers: { 125 | 'Ocp-Apim-Subscription-Key': speechKey, 126 | 'Content-Type': 'application/x-www-form-urlencoded' 127 | } 128 | }; 129 | 130 | try { 131 | const tokenResponse = await axios.post(`https://${speechRegion}.api.cognitive.microsoft.com/sts/v1.0/issueToken`, null, headers); 132 | res.send({ token: tokenResponse.data, region: speechRegion }); 133 | } catch (err) { 134 | res.status(401).send('There was an error authorizing your speech key.'); 135 | } 136 | } 137 | }); 138 | ``` 139 | 140 | In the request, you create a `Ocp-Apim-Subscription-Key` header, and pass your speech key as the value. Then you make a request to the **issueToken** endpoint for your region, and an authorization token is returned. In a production application, this endpoint returning the token should be *restricted by additional user authentication* whenever possible. 141 | 142 | On the front-end, `token_util.js` contains the helper function `getTokenOrRefresh` that is used to manage the refresh and retrieval process. 143 | 144 | ```javascript 145 | export async function getTokenOrRefresh() { 146 | const cookie = new Cookie(); 147 | const speechToken = cookie.get('speech-token'); 148 | 149 | if (speechToken === undefined) { 150 | try { 151 | const res = await axios.get('/api/get-speech-token'); 152 | const token = res.data.token; 153 | const region = res.data.region; 154 | cookie.set('speech-token', region + ':' + token, {maxAge: 540, path: '/'}); 155 | 156 | console.log('Token fetched from back-end: ' + token); 157 | return { authToken: token, region: region }; 158 | } catch (err) { 159 | console.log(err.response.data); 160 | return { authToken: null, error: err.response.data }; 161 | } 162 | } else { 163 | console.log('Token fetched from cookie: ' + speechToken); 164 | const idx = speechToken.indexOf(':'); 165 | return { authToken: speechToken.slice(idx + 1), region: speechToken.slice(0, idx) }; 166 | } 167 | } 168 | ``` 169 | 170 | This function uses the `universal-cookie` library to store and retrieve the token from local storage. It first checks to see if there is an existing cookie, and in that case it returns the token without hitting the Express back-end. If there is no existing cookie for a token, it makes the call to `/api/get-speech-token` to fetch a new one. Since we need both the token and its corresponding region later, the cookie is stored in the format `token:region` and upon retrieval is spliced into each value. 171 | 172 | Tokens for the service expire after 10 minutes, so the sample uses the `maxAge` property of the cookie to act as a trigger for when a new token needs to be generated. It is reccommended to use 9 minutes as the expiry time to act as a buffer, so we set `maxAge` to **540 seconds**. 173 | 174 | In `App.js` you use `getTokenOrRefresh` in the functions for speech-to-text from a microphone, and from a file. Finally, use the `SpeechConfig.fromAuthorizationToken` function to create an auth context using the token. 175 | 176 | ```javascript 177 | const tokenObj = await getTokenOrRefresh(); 178 | const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region); 179 | ``` 180 | 181 | In many other Speech service samples, you will see the function `SpeechConfig.fromSubscription` used instead of `SpeechConfig.fromAuthorizationToken`, but by **avoiding the usage** of `fromSubscription` on the front-end, you prevent your speech subscription key from becoming exposed, and instead utilize the token authentication process. `fromSubscription` is safe to use in a Node.js environment, or in other Speech SDK programming languages when the call is made on a back-end, but it is best to avoid using in a browser-based JavaScript environment. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "speech-sample", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^5.16.5", 7 | "@testing-library/react": "^14.0.0", 8 | "@testing-library/user-event": "^14.4.3", 9 | "axios": "^1.4.0", 10 | "bootstrap": "^5.3.0", 11 | "dotenv": "^16.3.1", 12 | "microsoft-cognitiveservices-speech-sdk": "^1.32.0", 13 | "react": "^18.2.0", 14 | "react-dom": "^18.0.1", 15 | "react-scripts": "5.0.1", 16 | "reactstrap": "^9.2.0", 17 | "universal-cookie": "^4.0.4", 18 | "web-vitals": "^3.3.2" 19 | }, 20 | "scripts": { 21 | "start": "react-scripts start", 22 | "build": "react-scripts build", 23 | "test": "react-scripts test", 24 | "eject": "react-scripts eject", 25 | "server": "node-env-run server --exec nodemon | pino-colada", 26 | "dev": "run-p server start" 27 | }, 28 | "eslintConfig": { 29 | "extends": [ 30 | "react-app", 31 | "react-app/jest" 32 | ] 33 | }, 34 | "browserslist": { 35 | "production": [ 36 | ">0.2%", 37 | "not dead", 38 | "not op_mini all" 39 | ], 40 | "development": [ 41 | "last 1 chrome version", 42 | "last 1 firefox version", 43 | "last 1 safari version" 44 | ] 45 | }, 46 | "devDependencies": { 47 | "body-parser": "^1.20.2", 48 | "express": "^4.18.2", 49 | "express-pino-logger": "^7.0.0", 50 | "node-env-run": "^4.0.2", 51 | "nodemon": "^3.0.1", 52 | "npm-run-all": "^4.1.5", 53 | "pino-colada": "^2.2.2", 54 | "@babel/plugin-proposal-private-property-in-object": "^7.21.11" 55 | }, 56 | "overrides": { 57 | "css-select": "^5.1.0" 58 | }, 59 | "proxy": "http://localhost:3001" 60 | } 61 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/AzureSpeechReactSample/e30c6a281fade97af4e6d0e2bb7b037f38419ea1/public/favicon.ico -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | 28 |{displayText}
133 |