├── .husky
    ├── .gitignore
    └── pre-commit
├── .eslintignore
├── .gitignore
├── test-harness
    ├── .env
    ├── public
    │   ├── favicon.ico
    │   └── index.html
    ├── src
    │   ├── Dictaphone.module.css
    │   ├── index.js
    │   └── Dictaphone.js
    ├── .gitignore
    ├── package.json
    └── LICENSE
├── src
    ├── declarations.d.ts
    ├── index.ts
    ├── testUtils.ts
    ├── createSpeechRecognition.ts
    ├── types.ts
    ├── testData.ts
    └── createSpeechRecognition.test.ts
├── .prettierrc
├── docs
    ├── .nojekyll
    ├── interfaces
    │   ├── speechrecognitionerrorevent.md
    │   ├── speechrecognitionalternative.md
    │   ├── speechrecognitionclass.md
    │   ├── speechrecognitionevent.md
    │   ├── speechrecognitionresult.md
    │   └── speechrecognition.md
    └── README.md
├── .github
    ├── pull_request_template.md
    ├── workflows
    │   ├── publish.yml
    │   └── test.yml
    └── ISSUE_TEMPLATE
    │   ├── enhancement---feature-request.md
    │   └── bug_report.md
├── tsconfig.json
├── jest.config.js
├── .eslintrc
├── api-extractor.json
├── LICENSE
├── etc
    └── speech-recognition-polyfill.api.md
├── CONTRIBUTING.md
├── package.json
└── README.md


/.husky/.gitignore:
--------------------------------------------------------------------------------
1 | _
2 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | src/**/*test*


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /node_modules
2 | /dist
3 | /.vscode
4 | /temp


--------------------------------------------------------------------------------
/test-harness/.env:
--------------------------------------------------------------------------------
1 | SKIP_PREFLIGHT_CHECK=true
2 | REACT_APP_APP_ID=${APP_ID}


--------------------------------------------------------------------------------
/src/declarations.d.ts:
--------------------------------------------------------------------------------
1 | interface Window {
2 |   webkitAudioContext: typeof AudioContext
3 | }
4 | 


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | 
4 | npm run api-extractor
5 | npm run docs
6 | 


--------------------------------------------------------------------------------
/test-harness/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speechly/speech-recognition-polyfill/HEAD/test-harness/public/favicon.ico


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 120,
3 |   "singleQuote": true,
4 |   "semi": false,
5 |   "trailingComma": "all",
6 |   "arrowParens": "avoid"
7 | }


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
1 | TypeDoc added this file to prevent GitHub Pages from using Jekyll. You can turn off this behavior by setting the `githubPages` option to false.


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ### What
2 | 
3 | Describe the scope of changes in this pull request.
4 | 
5 | ### Why
6 | 
7 | Describe the reasoning behind this pull request.
8 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "outDir": "dist/",
 4 |     "target": "es6",
 5 |     "module": "commonjs",
 6 |     "moduleResolution": "node",
 7 |     "declaration": true,
 8 |     "sourceMap": true,
 9 |     "strict": true,
10 |     "noImplicitAny": true,
11 |     "esModuleInterop": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["src/**/*test*"]
15 | }


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   rootDir: '.',
 3 |   transform: {
 4 |     '.(ts|tsx)': 'ts-jest',
 5 |   },
 6 |   globals: {
 7 |     'ts-jest': {
 8 |       diagnostics: false,
 9 |     },
10 |   },
11 |   moduleFileExtensions: ['ts', 'js'],
12 |   testPathIgnorePatterns: [
13 |     '/node_modules/',
14 |     '/dist/',
15 |     '/test-harness/'
16 |   ],
17 |   modulePathIgnorePatterns: ['/dist/'],
18 | };


--------------------------------------------------------------------------------
/test-harness/src/Dictaphone.module.css:
--------------------------------------------------------------------------------
 1 | .Dictaphone {
 2 |   display: flex;
 3 |   flex-direction: column;
 4 | }
 5 | 
 6 | .Dictaphone__holdToTalk {
 7 |   display: flex;
 8 |   justify-content: center;
 9 |   align-items: center;
10 |   width: 6rem;
11 |   height: 6rem;
12 |   border-radius: 50%;
13 |   background-color: grey;
14 | }
15 | 
16 | .Dictaphone__holdToTalk:active {
17 |   background-color: green;
18 | }


--------------------------------------------------------------------------------
/test-harness/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # production
12 | /build
13 | 
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 | 
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | 
25 | /src/SpeechlySpeechRecognition
26 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   publish-npm:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v2
12 |       - uses: actions/setup-node@v1
13 |         with:
14 |           node-version: 12
15 |           registry-url: https://registry.npmjs.org/
16 |       - run: npm ci
17 |       - run: npm publish
18 |         env:
19 |           NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Check out Git repository
10 |         uses: actions/checkout@v2
11 |       - name: Set up Node.js
12 |         uses: actions/setup-node@v1
13 |         with:
14 |           node-version: 16
15 |       - name: Install Node.js dependencies
16 |         run: npm ci
17 |       - name: Run JS linter
18 |         run: npm run lint
19 |       - name: Run unit tests
20 |         run: npm test
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement---feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement / Feature request
 3 | about: Suggest an enhancement or request a feature from this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### What
11 | 
12 | Describe the feature or enhancement that you are proposing.
13 | 
14 | ### Why
15 | 
16 | Describe a clear and concise reasoning behind your request. Feel free to provide example use-cases.
17 | 
18 | ### How
19 | 
20 | Describe your preferred solution and / or how you would implement it.


--------------------------------------------------------------------------------
/test-harness/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom';
 3 | import SpeechRecognition from 'react-speech-recognition'
 4 | import Dictaphone from './Dictaphone';
 5 | import { createSpeechlySpeechRecognition } from './SpeechlySpeechRecognition'
 6 | 
 7 | const appId = process.env.REACT_APP_APP_ID
 8 | const SpeechlySpeechRecognition = createSpeechlySpeechRecognition(appId)
 9 | SpeechRecognition.applyPolyfill(SpeechlySpeechRecognition);
10 | 
11 | ReactDOM.render(
12 |   <React.StrictMode>
13 |     <Dictaphone />
14 |   </React.StrictMode>,
15 |   document.getElementById('root')
16 | );
17 | 
18 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A polyfill (a "{@link https://ponyfoo.com/articles/polyfills-or-ponyfills | ponyfill}" to be more precise) for the
 3 |  * {@link https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition | SpeechRecognition API} that uses
 4 |  * {@link https://www.speechly.com/ | Speechly} to implement the transcription functionality
 5 |  *
 6 |  * @remarks
 7 |  * The implementation of the {@link https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition | SpeechRecognition spec}
 8 |  * is incomplete, but should enable the majority of use cases
 9 |  *
10 |  * @packageDocumentation
11 |  */
12 | 
13 | export * from './createSpeechRecognition'
14 | export * from './types'
15 | 


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "parser": "@typescript-eslint/parser",
 4 |   "plugins": ["@typescript-eslint", "eslint-plugin-tsdoc", "jest"],
 5 |   "extends": [
 6 |     "standard-with-typescript",
 7 |     "plugin:prettier/recommended",
 8 |     "plugin:jest/recommended"
 9 |   ],
10 |   "rules": {
11 |     "tsdoc/syntax": "error",
12 |     "comma-dangle": ["error", "always-multiline"],
13 |     "no-case-declarations": "off",
14 |     "@typescript-eslint/no-empty-function": "off",
15 |     "@typescript-eslint/space-before-function-paren": "off",
16 |     "@typescript-eslint/return-await": ["error", "in-try-catch"]
17 |   },
18 |   "parserOptions": {
19 |     "project": "./tsconfig.json"
20 |   }
21 | }


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: File a bug report
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### Describe the bug
11 | A clear and concise description of what the bug is.
12 | 
13 | ### To Reproduce
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | ### Expected behaviour
21 | A clear and concise description of what you expected to happen.
22 | 
23 | ### Screenshots
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | ### Environment
27 | 
28 | - Platform: [e.g. Mobile, Desktop, React Native]
29 | - OS: [e.g. iOS]
30 | - Browser [e.g. chrome, safari]
31 | - Version [e.g. 22]
32 | - Package version [e.g. 1.2.3]
33 | 
34 | ### Additional context
35 | Add any other context about the problem here.


--------------------------------------------------------------------------------
/api-extractor.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
 3 |   "mainEntryPointFilePath": "dist/index.d.ts",
 4 |   "bundledPackages": [],
 5 |   "compiler": {},
 6 |   "apiReport": {
 7 |     "enabled": true
 8 |   },
 9 |   "docModel": {
10 |     "enabled": false
11 |   },
12 |   "dtsRollup": {
13 |     "enabled": true,
14 |     "untrimmedFilePath": "dist/index.d.ts"
15 |   },
16 |   "tsdocMetadata": {
17 |     "enabled": true
18 |   },
19 |   "messages": {
20 |     "compilerMessageReporting": {
21 |       "default": {
22 |         "logLevel": "error"
23 |       }
24 |     },
25 |     "extractorMessageReporting": {
26 |       "default": {
27 |         "logLevel": "error"
28 |       }
29 |     },
30 |     "tsdocMessageReporting": {
31 |       "default": {
32 |         "logLevel": "error"
33 |       }
34 |     }
35 |   }
36 | }


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognitionerrorevent.md:
--------------------------------------------------------------------------------
 1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognitionErrorEvent
 2 | 
 3 | # Interface: SpeechRecognitionErrorEvent
 4 | 
 5 | Data associated with an error emitted from the recognition service
 6 | 
 7 | ## Table of contents
 8 | 
 9 | ### Properties
10 | 
11 | - [error](SpeechRecognitionErrorEvent.md#error)
12 | - [message](SpeechRecognitionErrorEvent.md#message)
13 | 
14 | ## Properties
15 | 
16 | ### error
17 | 
18 | • **error**: ``"not-allowed"`` \| ``"audio-capture"``
19 | 
20 | Type of error raised
21 | 
22 | #### Defined in
23 | 
24 | [types.ts:61](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L61)
25 | 
26 | ___
27 | 
28 | ### message
29 | 
30 | • **message**: `string`
31 | 
32 | Message describing the error in more detail
33 | 
34 | #### Defined in
35 | 
36 | [types.ts:65](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L65)
37 | 


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognitionalternative.md:
--------------------------------------------------------------------------------
 1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognitionAlternative
 2 | 
 3 | # Interface: SpeechRecognitionAlternative
 4 | 
 5 | Transcript for the ongoing utterance, including the level of confidence in that transcript
 6 | 
 7 | ## Table of contents
 8 | 
 9 | ### Properties
10 | 
11 | - [confidence](SpeechRecognitionAlternative.md#confidence)
12 | - [transcript](SpeechRecognitionAlternative.md#transcript)
13 | 
14 | ## Properties
15 | 
16 | ### confidence
17 | 
18 | • **confidence**: `number`
19 | 
20 | Level of confidence in the correctness of the transcript (from 0 to 1)
21 | 
22 | #### Defined in
23 | 
24 | [types.ts:13](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L13)
25 | 
26 | ___
27 | 
28 | ### transcript
29 | 
30 | • **transcript**: `string`
31 | 
32 | Current transcript of the ongoing utterance (the words spoken by the user)
33 | 
34 | #### Defined in
35 | 
36 | [types.ts:9](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L9)
37 | 


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognitionclass.md:
--------------------------------------------------------------------------------
 1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognitionClass
 2 | 
 3 | # Interface: SpeechRecognitionClass
 4 | 
 5 | Class that implements the SpeechRecognition interface
 6 | 
 7 | ## Table of contents
 8 | 
 9 | ### Constructors
10 | 
11 | - [constructor](SpeechRecognitionClass.md#constructor)
12 | 
13 | ### Properties
14 | 
15 | - [hasBrowserSupport](SpeechRecognitionClass.md#hasbrowsersupport)
16 | 
17 | ## Constructors
18 | 
19 | ### constructor
20 | 
21 | • **new SpeechRecognitionClass**()
22 | 
23 | Constructor for a SpeechRecognition implementation
24 | 
25 | #### Defined in
26 | 
27 | [types.ts:160](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L160)
28 | 
29 | ## Properties
30 | 
31 | ### hasBrowserSupport
32 | 
33 | • `Readonly` **hasBrowserSupport**: `boolean`
34 | 
35 | Does the browser support the APIs needed for this polyfill?
36 | 
37 | #### Defined in
38 | 
39 | [types.ts:156](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L156)
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Speechly
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/test-harness/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@speechly/speech-recognition-polyfill-example",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "description": "Example React app using the Speechly speech recognition polyfill",
 6 |   "license": "MIT",
 7 |   "dependencies": {
 8 |     "@testing-library/jest-dom": "^4.2.4",
 9 |     "@testing-library/react": "^9.5.0",
10 |     "@testing-library/user-event": "^7.2.1",
11 |     "prop-types": "^15.7.2",
12 |     "react": "^16.13.1",
13 |     "react-dom": "^16.13.1",
14 |     "react-scripts": "5.0.1",
15 |     "react-speech-recognition": "^3.9.0"
16 |   },
17 |   "scripts": {
18 |     "start": "react-scripts start",
19 |     "build": "react-scripts build",
20 |     "test": "react-scripts test",
21 |     "eject": "react-scripts eject"
22 |   },
23 |   "eslintConfig": {
24 |     "extends": "react-app"
25 |   },
26 |   "browserslist": {
27 |     "production": [
28 |       ">0.2%",
29 |       "not dead",
30 |       "not op_mini all"
31 |     ],
32 |     "development": [
33 |       "last 1 chrome version",
34 |       "last 1 firefox version",
35 |       "last 1 safari version"
36 |     ]
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/test-harness/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Speechly
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognitionevent.md:
--------------------------------------------------------------------------------
 1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognitionEvent
 2 | 
 3 | # Interface: SpeechRecognitionEvent
 4 | 
 5 | Data associated with an update to the transcript for the ongoing utterance
 6 | 
 7 | ## Table of contents
 8 | 
 9 | ### Properties
10 | 
11 | - [resultIndex](SpeechRecognitionEvent.md#resultindex)
12 | - [results](SpeechRecognitionEvent.md#results)
13 | 
14 | ## Properties
15 | 
16 | ### resultIndex
17 | 
18 | • **resultIndex**: `number`
19 | 
20 | Index of the earliest speech recognition result that has changed
21 | 
22 | #### Defined in
23 | 
24 | [types.ts:50](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L50)
25 | 
26 | ___
27 | 
28 | ### results
29 | 
30 | • **results**: [`SpeechRecognitionResult`](SpeechRecognitionResult.md)[]
31 | 
32 | List of speech recognition results, containing all transcripts collected in the current session. This represents the
33 | native [SpeechRecognitionResultList](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognitionResultList).
34 | Note that the Speechly implementation currently does not maintain a history of results, only returning the single
35 | result for the ongoing utterance
36 | 
37 | #### Defined in
38 | 
39 | [types.ts:46](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L46)
40 | 


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognitionresult.md:
--------------------------------------------------------------------------------
 1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognitionResult
 2 | 
 3 | # Interface: SpeechRecognitionResult
 4 | 
 5 | Object containing a transcript for the ongoing utterance and an indicator of whether that transcript is final or not
 6 | 
 7 | ## Table of contents
 8 | 
 9 | ### Properties
10 | 
11 | - [0](SpeechRecognitionResult.md#0)
12 | - [isFinal](SpeechRecognitionResult.md#isfinal)
13 | 
14 | ## Properties
15 | 
16 | ### 0
17 | 
18 | • **0**: [`SpeechRecognitionAlternative`](SpeechRecognitionAlternative.md)
19 | 
20 | Object containing a transcript for the ongoing utterance (the use of an integer index key is to mimic the
21 | structure used in the native [SpeechRecognitionResult spec](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognitionResult)),
22 | which contains an "array" of alternative transcripts. In the Speechly implementation, there is never more than one
23 | alternative, so only the first index is specified in the interface
24 | 
25 | #### Defined in
26 | 
27 | [types.ts:27](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L27)
28 | 
29 | ___
30 | 
31 | ### isFinal
32 | 
33 | • **isFinal**: `boolean`
34 | 
35 | Is this transcript "final"? That is, has the transcription algorithm concluded that the utterance has finished and
36 | that the trancript will have no further updates?
37 | 
38 | #### Defined in
39 | 
40 | [types.ts:32](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L32)
41 | 


--------------------------------------------------------------------------------
/test-harness/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <meta name="theme-color" content="#000000" />
 8 |     <meta
 9 |       name="description"
10 |       content="Web site created using create-react-app"
11 |     />
12 |     <!--
13 |       Notice the use of %PUBLIC_URL% in the tags above.
14 |       It will be replaced with the URL of the `public` folder during the build.
15 |       Only files inside the `public` folder can be referenced from the HTML.
16 | 
17 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
18 |       work correctly both with client-side routing and a non-root public URL.
19 |       Learn how to configure a non-root public URL by running `npm run build`.
20 |     -->
21 |     <title>React App</title>
22 |   </head>
23 |   <body>
24 |     <noscript>You need to enable JavaScript to run this app.</noscript>
25 |     <div id="root"></div>
26 |     <!--
27 |       This HTML file is a template.
28 |       If you open it directly in the browser, you will see an empty page.
29 | 
30 |       You can add webfonts, meta tags, or analytics to this file.
31 |       The build step will place the bundled scripts into the <body> tag.
32 | 
33 |       To begin the development, run `npm start` or `yarn start`.
34 |       To create a production bundle, use `npm run build` or `yarn build`.
35 |     -->
36 |   </body>
37 | </html>
38 | 


--------------------------------------------------------------------------------
/test-harness/src/Dictaphone.js:
--------------------------------------------------------------------------------
 1 | import React, { useEffect } from 'react'
 2 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
 3 | 
 4 | import STYLES from './Dictaphone.module.css'
 5 | 
 6 | export default () => {
 7 |   const {
 8 |     transcript,
 9 |     interimTranscript,
10 |     finalTranscript,
11 |     resetTranscript,
12 |     listening,
13 |     browserSupportsSpeechRecognition,
14 |     isMicrophoneAvailable,
15 |   } = useSpeechRecognition()
16 |   useEffect(() => {
17 |     if (interimTranscript !== '') {
18 |       console.log('Got interim result:', interimTranscript)
19 |     }
20 |     if (finalTranscript !== '') {
21 |       console.log('Got final result:', finalTranscript)
22 |     }
23 |   }, [interimTranscript, finalTranscript]);
24 |   const listenContinuously = () => SpeechRecognition.startListening({
25 |     continuous: true,
26 |     language: 'en-GB'
27 |   })
28 |   const listenOnce = () => SpeechRecognition.startListening({ continuous: false })
29 | 
30 |   if (!browserSupportsSpeechRecognition) {
31 |     return <span>No browser support</span>
32 |   }
33 | 
34 |   if (!isMicrophoneAvailable) {
35 |     return <span>Please allow access to the microphone</span>
36 |   }
37 | 
38 |   return (
39 |     <div>
40 |       <div className={STYLES.Dictaphone}>
41 |         <span>Listening: {listening ? 'on' : 'off'}</span>
42 |         <button onClick={resetTranscript}>Reset</button>
43 |         <span>{transcript}</span>
44 |       </div>
45 |       <div
46 |         className={STYLES.Dictaphone__holdToTalk}
47 |         onTouchStart={listenContinuously}
48 |         onMouseDown={listenContinuously}
49 |         onTouchEnd={SpeechRecognition.stopListening}
50 |         onMouseUp={SpeechRecognition.stopListening}
51 |       >Hold to talk</div>
52 |       <button onClick={listenOnce}>Listen once</button>
53 |       <button onClick={listenContinuously}>Listen continuously</button>
54 |       <button onClick={SpeechRecognition.stopListening}>Stop</button>
55 |     </div>
56 |   )
57 | }
58 | 


--------------------------------------------------------------------------------
/etc/speech-recognition-polyfill.api.md:
--------------------------------------------------------------------------------
 1 | ## API Report File for "@speechly/speech-recognition-polyfill"
 2 | 
 3 | > Do not edit this file. It is a report generated by [API Extractor](https://api-extractor.com/).
 4 | 
 5 | ```ts
 6 | 
 7 | // @public
 8 | export const createSpeechlySpeechRecognition: (appId: string) => SpeechRecognitionClass;
 9 | 
10 | // @public
11 | export const MicrophoneNotAllowedError: SpeechRecognitionErrorEvent;
12 | 
13 | // @public
14 | export type SpeechEndCallback = () => void;
15 | 
16 | // @public
17 | export type SpeechErrorCallback = (speechRecognitionErrorEvent: SpeechRecognitionErrorEvent) => void;
18 | 
19 | // @public
20 | export interface SpeechRecognition {
21 |     abort: () => Promise<void>;
22 |     continuous: boolean;
23 |     interimResults: boolean;
24 |     onend: SpeechEndCallback;
25 |     onerror: SpeechErrorCallback;
26 |     onresult: SpeechRecognitionEventCallback;
27 |     start: () => Promise<void>;
28 |     stop: () => Promise<void>;
29 | }
30 | 
31 | // @public
32 | interface SpeechRecognitionAlternative_2 {
33 |     confidence: number;
34 |     transcript: string;
35 | }
36 | export { SpeechRecognitionAlternative_2 as SpeechRecognitionAlternative }
37 | 
38 | // @public
39 | export interface SpeechRecognitionClass {
40 |     new (): SpeechRecognition;
41 |     readonly hasBrowserSupport: boolean;
42 | }
43 | 
44 | // @public
45 | export interface SpeechRecognitionErrorEvent {
46 |     error: 'not-allowed' | 'audio-capture';
47 |     message: string;
48 | }
49 | 
50 | // @public
51 | export interface SpeechRecognitionEvent {
52 |     resultIndex: number;
53 |     results: SpeechRecognitionResult_2[];
54 | }
55 | 
56 | // @public
57 | export type SpeechRecognitionEventCallback = (speechRecognitionEvent: SpeechRecognitionEvent) => void;
58 | 
59 | // @public
60 | export const SpeechRecognitionFailedError: SpeechRecognitionErrorEvent;
61 | 
62 | // @public
63 | interface SpeechRecognitionResult_2 {
64 |     0: SpeechRecognitionAlternative_2;
65 |     isFinal: boolean;
66 | }
67 | export { SpeechRecognitionResult_2 as SpeechRecognitionResult }
68 | 
69 | ```
70 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution guide
 2 | 
 3 | ## Bugs and improvements
 4 | 
 5 | Please submit an issue if you have found a bug or would like to propose an improvement. When submitting a bug, please provide the environment and steps to reproduce it. If you are proposing an improvement, please provide some reasoning behind it, ideally with a few use-cases.
 6 | 
 7 | Please do make use of the issue templates. If you think that a template is lacking for your case, feel free to suggeest a new one.
 8 | 
 9 | ## Pull requests
10 | 
11 | We are happy to accept your PRs! When submitting, however, please make sure that you do the following:
12 | 
13 | - Ensure that your code is properly linted and tested. Don't forget to add tests and update existing ones, as necessary.
14 | - Make sure to update the API report and documentation. This will be automatically generated via a pre-commit hook.
15 | 
16 | ## Initial setup
17 | 
18 | Installing dependencies and setting up pre-commit hook:
19 | 
20 | ```
21 | npm i
22 | ```
23 | 
24 | Setting your Speechly app ID for use in the test harness:
25 | 
26 | ```
27 | APP_ID=<your_speechly_app_id> npm run set-app-id
28 | ```
29 | 
30 | ## Development
31 | 
32 | We recommend you use the test app in `test-harness` to develop `speech-recognition-polyfill`. This is a simple React app with a hold-to-talk button and uses `react-speech-recognition` to display the transcript. This will hot reload whenever it or the contents of `src` change.
33 | 
34 | After the initial setup above, this can be run with:
35 | 
36 | ```
37 | npm run dev
38 | ```
39 | 
40 | And then opened at `http://localhost:3000/`.
41 | 
42 | ## Linting
43 | 
44 | The linter can be run with:
45 | 
46 | ```
47 | npm run lint
48 | ```
49 | 
50 | ## Testing
51 | 
52 | For a one-off test run:
53 | 
54 | ```
55 | npm test
56 | ```
57 | 
58 | To run the tests with watch, run:
59 | 
60 | ```
61 | npm run test:watch
62 | ```
63 | 
64 | ## Generating API report and documentation
65 | 
66 | This is done automatically before you make a commit - please include these changes in your commits to keep them up-to-date. You can update these manually with:
67 | 
68 | ```
69 | npm run api-extractor
70 | npm run docs
71 | ```
72 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@speechly/speech-recognition-polyfill",
 3 |   "version": "1.3.0",
 4 |   "description": "Polyfill for the Speech Recognition API using Speechly",
 5 |   "main": "./dist/index.js",
 6 |   "types": "./dist/index.d.ts",
 7 |   "keywords": [
 8 |     "client",
 9 |     "voice",
10 |     "speech",
11 |     "slu",
12 |     "spoken language understanding",
13 |     "speechly",
14 |     "asr",
15 |     "nlp",
16 |     "natural language processing",
17 |     "nlu",
18 |     "natural language understanding",
19 |     "natural language",
20 |     "vui",
21 |     "voice ui",
22 |     "voice user interface",
23 |     "multimodal",
24 |     "speech recognition"
25 |   ],
26 |   "scripts": {
27 |     "lint": "eslint 'src/**/*.{ts,tsx}'",
28 |     "set-app-id": "echo \"REACT_APP_APP_ID=${APP_ID}\" > test-harness/.env.local",
29 |     "update-test-harness": "tsc && cp -R dist/ test-harness/src/SpeechlySpeechRecognition",
30 |     "run-test-harness": "cd test-harness && npm i && npm start",
31 |     "dev": "npm-watch update-test-harness & npm run update-test-harness && npm run run-test-harness",
32 |     "test": "jest --config ./jest.config.js",
33 |     "test:watch": "jest --watch --config ./jest.config.js",
34 |     "docs": "typedoc --readme none --gitRevision HEAD src/index.ts",
35 |     "api-extractor": "tsc && api-extractor run --local --verbose",
36 |     "prepublishOnly": "tsc && api-extractor run --verbose",
37 |     "prepare": "husky install"
38 |   },
39 |   "watch": {
40 |     "update-test-harness": {
41 |       "patterns": ["src"],
42 |       "extensions": "ts"
43 |     }
44 |   },
45 |   "files": ["dist"],
46 |   "repository": {
47 |     "type": "git",
48 |     "url": "git+https://github.com/speechly/speech-recognition-polyfill.git"
49 |   },
50 |   "bugs": {
51 |     "url": "https://github.com/speechly/speech-recognition-polyfill/issues"
52 |   },
53 |   "author": "Speechly",
54 |   "license": "MIT",
55 |   "devDependencies": {
56 |     "@microsoft/api-extractor": "^7.15.0",
57 |     "@types/jest": "^26.0.23",
58 |     "@typescript-eslint/eslint-plugin": "^5.56.0",
59 |     "@typescript-eslint/parser": "^5.56.0",
60 |     "eslint": "^8.36.0",
61 |     "eslint-config-prettier": "^8.8.0",
62 |     "eslint-config-standard-with-typescript": "^34.0.1",
63 |     "eslint-plugin-import": "^2.27.5",
64 |     "eslint-plugin-jest": "^27.2.1",
65 |     "eslint-plugin-node": "^11.1.0",
66 |     "eslint-plugin-prettier": "^4.2.1",
67 |     "eslint-plugin-promise": "^6.1.1",
68 |     "eslint-plugin-tsdoc": "^0.2.17",
69 |     "husky": "^8.0.3",
70 |     "jest": "^26.6.3",
71 |     "npm-watch": "^0.11.0",
72 |     "prettier": "^2.8.6",
73 |     "ts-jest": "^26.5.5",
74 |     "typedoc": "^0.23.28",
75 |     "typedoc-plugin-markdown": "^3.14.0",
76 |     "typescript": "^4.9.4"
77 |   },
78 |   "dependencies": {
79 |     "@speechly/browser-client": "^2.6.5"
80 |   },
81 |   "publishConfig": {
82 |     "access": "public"
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/testUtils.ts:
--------------------------------------------------------------------------------
  1 | // @ts-nocheck
  2 | import TEST_DATA from './testData';
  3 | 
  4 | const { SENTENCE_ONE } = TEST_DATA;
  5 | 
  6 | export const mockUndefinedWindow = () => {
  7 |   delete global.window;
  8 | }
  9 | 
 10 | export const mockUndefinedNavigator = () => {
 11 |   global.navigator = undefined;
 12 | }
 13 | 
 14 | export const mockMediaDevices = () => {
 15 |   global.navigator.mediaDevices = jest.fn();
 16 | }
 17 | 
 18 | export const mockUndefinedMediaDevices = () => {
 19 |   global.navigator.mediaDevices = undefined;
 20 | }
 21 | 
 22 | export const mockAudioContext = () => {
 23 |   global.AudioContext = jest.fn();
 24 | }
 25 | 
 26 | export const mockWebkitAudioContext = () => {
 27 |   global.webkitAudioContext = jest.fn();
 28 | }
 29 | 
 30 | export const mockUndefinedAudioContext = () => {
 31 |   global.AudioContext = undefined;
 32 | }
 33 | 
 34 | export const mockUndefinedWebkitAudioContext = () => {
 35 |   global.webkitAudioContext = undefined;
 36 | }
 37 | 
 38 | export const expectSentenceToBeTranscribedWithFirstInitialResult = (sentence: any, mockOnResult: any) => {
 39 |   expect(mockOnResult).toHaveBeenNthCalledWith(1, { results: [
 40 |     {
 41 |       0: {
 42 |         transcript: 'SENT',
 43 |         confidence: 1,
 44 |       },
 45 |       isFinal: false,
 46 |     },
 47 |   ], resultIndex: 0})
 48 | }
 49 | 
 50 | export const expectSentenceToBeTranscribedWithFinalResult = (sentence: any, mockOnResult: any, startIndex = 1) => {
 51 |   const secondWord = sentence === SENTENCE_ONE ? 'ONE': 'TWO';
 52 |   expect(mockOnResult).toHaveBeenNthCalledWith(startIndex, { results: [
 53 |     {
 54 |       0: {
 55 |         transcript: `SENTENCE ${secondWord}`,
 56 |         confidence: 1,
 57 |       },
 58 |       isFinal: true,
 59 |     },
 60 |   ], resultIndex: 0})
 61 | }
 62 | 
 63 | export const expectSentenceToBeTranscribedWithInterimAndFinalResults = (sentence: any, mockOnResult: any, startIndex = 1) => {
 64 |   const secondWord = sentence === SENTENCE_ONE ? 'ONE': 'TWO';
 65 |   expect(mockOnResult).toHaveBeenNthCalledWith(startIndex, { results: [
 66 |     {
 67 |       0: {
 68 |         transcript: 'SENT',
 69 |         confidence: 1,
 70 |       },
 71 |       isFinal: false,
 72 |     },
 73 |   ], resultIndex: 0})
 74 |   expect(mockOnResult).toHaveBeenNthCalledWith(startIndex + 1, { results: [
 75 |     {
 76 |       0: {
 77 |         transcript: 'SENTENCE',
 78 |         confidence: 1,
 79 |       },
 80 |       isFinal: false,
 81 |     },
 82 |   ], resultIndex: 0})
 83 |   for (let i = startIndex + 2; i < startIndex + sentence.length - 1; i += 1) {
 84 |     expect(mockOnResult).toHaveBeenNthCalledWith(i, { results: [
 85 |       {
 86 |         0: {
 87 |           transcript: `SENTENCE ${secondWord}`,
 88 |           confidence: 1,
 89 |         },
 90 |         isFinal: false,
 91 |       },
 92 |     ], resultIndex: 0})
 93 |   }
 94 |   expect(mockOnResult).toHaveBeenNthCalledWith(startIndex + sentence.length - 1, { results: [
 95 |     {
 96 |       0: {
 97 |         transcript: `SENTENCE ${secondWord}`,
 98 |         confidence: 1,
 99 |       },
100 |       isFinal: true,
101 |     },
102 |   ], resultIndex: 0})
103 | }


--------------------------------------------------------------------------------
/docs/interfaces/speechrecognition.md:
--------------------------------------------------------------------------------
  1 | [@speechly/speech-recognition-polyfill](../README.md) / SpeechRecognition
  2 | 
  3 | # Interface: SpeechRecognition
  4 | 
  5 | Subset of the [W3C SpeechRecognition spec](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition) that
  6 | can be used for basic transcription
  7 | 
  8 | ## Table of contents
  9 | 
 10 | ### Properties
 11 | 
 12 | - [abort](SpeechRecognition.md#abort)
 13 | - [continuous](SpeechRecognition.md#continuous)
 14 | - [interimResults](SpeechRecognition.md#interimresults)
 15 | - [onend](SpeechRecognition.md#onend)
 16 | - [onerror](SpeechRecognition.md#onerror)
 17 | - [onresult](SpeechRecognition.md#onresult)
 18 | - [start](SpeechRecognition.md#start)
 19 | - [stop](SpeechRecognition.md#stop)
 20 | 
 21 | ## Properties
 22 | 
 23 | ### abort
 24 | 
 25 | • **abort**: () => `Promise`<`void`\>
 26 | 
 27 | #### Type declaration
 28 | 
 29 | ▸ (): `Promise`<`void`\>
 30 | 
 31 | Stop transcribing utterances received from the microphone, and cut off the current utterance
 32 | 
 33 | ##### Returns
 34 | 
 35 | `Promise`<`void`\>
 36 | 
 37 | #### Defined in
 38 | 
 39 | [types.ts:145](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L145)
 40 | 
 41 | ___
 42 | 
 43 | ### continuous
 44 | 
 45 | • **continuous**: `boolean`
 46 | 
 47 | Should the microphone listen continuously (true) or should it stop after the first utterance (false)?
 48 | 
 49 | #### Defined in
 50 | 
 51 | [types.ts:114](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L114)
 52 | 
 53 | ___
 54 | 
 55 | ### interimResults
 56 | 
 57 | • **interimResults**: `boolean`
 58 | 
 59 | Should interim results be emitted? These are parts of an ongoing utterance for which transcription hasn't
 60 | completed yet
 61 | 
 62 | #### Defined in
 63 | 
 64 | [types.ts:119](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L119)
 65 | 
 66 | ___
 67 | 
 68 | ### onend
 69 | 
 70 | • **onend**: [`SpeechEndCallback`](../README.md#speechendcallback)
 71 | 
 72 | Callback that is invoked when transcription ends
 73 | 
 74 | **`Param`**
 75 | 
 76 | Event containing updates to the transcript
 77 | 
 78 | #### Defined in
 79 | 
 80 | [types.ts:128](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L128)
 81 | 
 82 | ___
 83 | 
 84 | ### onerror
 85 | 
 86 | • **onerror**: [`SpeechErrorCallback`](../README.md#speecherrorcallback)
 87 | 
 88 | Callback that is invoked when an error occurs
 89 | 
 90 | **`Param`**
 91 | 
 92 | Event containing details of the error
 93 | 
 94 | #### Defined in
 95 | 
 96 | [types.ts:133](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L133)
 97 | 
 98 | ___
 99 | 
100 | ### onresult
101 | 
102 | • **onresult**: [`SpeechRecognitionEventCallback`](../README.md#speechrecognitioneventcallback)
103 | 
104 | Callback that is invoked whenever the transcript updates
105 | 
106 | #### Defined in
107 | 
108 | [types.ts:123](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L123)
109 | 
110 | ___
111 | 
112 | ### start
113 | 
114 | • **start**: () => `Promise`<`void`\>
115 | 
116 | #### Type declaration
117 | 
118 | ▸ (): `Promise`<`void`\>
119 | 
120 | Start transcribing utterances received from the microphone
121 | 
122 | ##### Returns
123 | 
124 | `Promise`<`void`\>
125 | 
126 | #### Defined in
127 | 
128 | [types.ts:137](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L137)
129 | 
130 | ___
131 | 
132 | ### stop
133 | 
134 | • **stop**: () => `Promise`<`void`\>
135 | 
136 | #### Type declaration
137 | 
138 | ▸ (): `Promise`<`void`\>
139 | 
140 | Stop transcribing utterances received from the microphone, but finish processing the current utterance
141 | 
142 | ##### Returns
143 | 
144 | `Promise`<`void`\>
145 | 
146 | #### Defined in
147 | 
148 | [types.ts:141](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L141)
149 | 


--------------------------------------------------------------------------------
/src/createSpeechRecognition.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   BrowserClient,
  3 |   BrowserMicrophone,
  4 |   ErrDeviceNotSupported,
  5 |   ErrNoAudioConsent,
  6 |   type Segment,
  7 | } from '@speechly/browser-client'
  8 | import {
  9 |   type SpeechRecognitionEventCallback,
 10 |   type SpeechEndCallback,
 11 |   type SpeechErrorCallback,
 12 |   type SpeechRecognitionResult,
 13 |   type SpeechRecognitionClass,
 14 |   type SpeechRecognition,
 15 |   MicrophoneNotAllowedError,
 16 |   SpeechRecognitionFailedError,
 17 | } from './types'
 18 | 
 19 | /**
 20 |  * Returns a SpeechRecognition implementation that uses a given Speechly app ID
 21 |  * to generate transcriptions using the Speechly API
 22 |  *
 23 |  * @param appId - Speechly app ID
 24 |  * @returns Class that implements the SpeechRecognition interface
 25 |  * @public
 26 |  */
 27 | export const createSpeechlySpeechRecognition = (appId: string): SpeechRecognitionClass => {
 28 |   const browserSupportsAudioApis: boolean =
 29 |     typeof window !== 'undefined' &&
 30 |     window.navigator?.mediaDevices !== undefined &&
 31 |     (window.AudioContext !== undefined || window.webkitAudioContext !== undefined)
 32 | 
 33 |   return class SpeechlySpeechRecognition implements SpeechRecognition {
 34 |     static readonly hasBrowserSupport: boolean = browserSupportsAudioApis
 35 | 
 36 |     private readonly client: BrowserClient
 37 |     private readonly microphone: BrowserMicrophone
 38 |     private aborted = false
 39 |     private transcribing = false
 40 |     private taskQueue: Promise<void> | null = null
 41 | 
 42 |     continuous = false
 43 |     interimResults = false
 44 |     onresult: SpeechRecognitionEventCallback = () => {}
 45 |     onend: SpeechEndCallback = () => {}
 46 |     onerror: SpeechErrorCallback = () => {}
 47 | 
 48 |     constructor() {
 49 |       this.client = new BrowserClient({ appId })
 50 |       this.microphone = new BrowserMicrophone()
 51 |       this.client.onSegmentChange(this.handleResult)
 52 |     }
 53 | 
 54 |     public start = async (): Promise<void> => {
 55 |       try {
 56 |         this.aborted = false
 57 |         await this._start()
 58 |       } catch (e) {
 59 |         if (e === ErrNoAudioConsent) {
 60 |           this.onerror(MicrophoneNotAllowedError)
 61 |         } else {
 62 |           this.onerror(SpeechRecognitionFailedError)
 63 |         }
 64 |       }
 65 |     }
 66 | 
 67 |     public stop = async (): Promise<void> => {
 68 |       await this._stop()
 69 |     }
 70 | 
 71 |     public abort = async (): Promise<void> => {
 72 |       this.aborted = true
 73 |       await this._stop()
 74 |     }
 75 | 
 76 |     private readonly _start = async (): Promise<void> => {
 77 |       if (this.transcribing) {
 78 |         return
 79 |       }
 80 | 
 81 |       this.transcribing = true
 82 | 
 83 |       const startTask = async (): Promise<void> => {
 84 |         await this.microphone.initialize()
 85 |         const { mediaStream } = this.microphone
 86 |         if (mediaStream === null || mediaStream === undefined) {
 87 |           throw ErrDeviceNotSupported
 88 |         }
 89 |         await this.client.attach(mediaStream)
 90 |         await this.client.start()
 91 |       }
 92 |       await this.enqueueTask(startTask)
 93 |     }
 94 | 
 95 |     private readonly _stop = async (): Promise<void> => {
 96 |       if (!this.transcribing) {
 97 |         return
 98 |       }
 99 | 
100 |       this.transcribing = false
101 | 
102 |       const stopTask = async (): Promise<void> => {
103 |         try {
104 |           await this.client.stop()
105 |           await this.client.detach()
106 |           await this.microphone.close()
107 |           this.onend()
108 |         } catch (e) {
109 |           // swallow errors
110 |         }
111 |       }
112 |       await this.enqueueTask(stopTask)
113 |     }
114 | 
115 |     private readonly enqueueTask = async (task: () => Promise<void>): Promise<void> => {
116 |       const queuedTask = async (): Promise<void> => {
117 |         // Wait for earlier task(s) to complete, effectively adding to a task queue
118 |         await this.taskQueue
119 |         await task()
120 |       }
121 |       this.taskQueue = queuedTask()
122 | 
123 |       await this.taskQueue
124 |     }
125 | 
126 |     private readonly handleResult = (segment: Segment): void => {
127 |       if (this.aborted) {
128 |         return
129 |       }
130 |       if (!this.interimResults && !segment.isFinal) {
131 |         return
132 |       }
133 |       const transcript = segment.words
134 |         .map(x => x.value)
135 |         .filter(x => x)
136 |         .join(' ')
137 |       const results: SpeechRecognitionResult[] = [
138 |         {
139 |           0: {
140 |             transcript,
141 |             confidence: 1,
142 |           },
143 |           isFinal: segment.isFinal,
144 |         },
145 |       ]
146 |       this.onresult({ results, resultIndex: 0 })
147 |       if (!this.continuous && segment.isFinal) {
148 |         this.abort().catch(() => {}) // swallow errors
149 |       }
150 |     }
151 |   }
152 | }
153 | 
154 | export default createSpeechlySpeechRecognition
155 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | @speechly/speech-recognition-polyfill
  2 | 
  3 | # @speechly/speech-recognition-polyfill
  4 | 
  5 | A polyfill (a "[ponyfill](https://ponyfoo.com/articles/polyfills-or-ponyfills)" to be more precise) for the
  6 | [SpeechRecognition API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition) that uses
  7 | [Speechly](https://www.speechly.com/) to implement the transcription functionality
  8 | 
  9 | **`Remarks`**
 10 | 
 11 | The implementation of the [SpeechRecognition spec](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition)
 12 | is incomplete, but should enable the majority of use cases
 13 | 
 14 | ## Table of contents
 15 | 
 16 | ### Interfaces
 17 | 
 18 | - [SpeechRecognition](interfaces/SpeechRecognition.md)
 19 | - [SpeechRecognitionAlternative](interfaces/SpeechRecognitionAlternative.md)
 20 | - [SpeechRecognitionClass](interfaces/SpeechRecognitionClass.md)
 21 | - [SpeechRecognitionErrorEvent](interfaces/SpeechRecognitionErrorEvent.md)
 22 | - [SpeechRecognitionEvent](interfaces/SpeechRecognitionEvent.md)
 23 | - [SpeechRecognitionResult](interfaces/SpeechRecognitionResult.md)
 24 | 
 25 | ### Type Aliases
 26 | 
 27 | - [SpeechEndCallback](README.md#speechendcallback)
 28 | - [SpeechErrorCallback](README.md#speecherrorcallback)
 29 | - [SpeechRecognitionEventCallback](README.md#speechrecognitioneventcallback)
 30 | 
 31 | ### Variables
 32 | 
 33 | - [MicrophoneNotAllowedError](README.md#microphonenotallowederror)
 34 | - [SpeechRecognitionFailedError](README.md#speechrecognitionfailederror)
 35 | 
 36 | ### Functions
 37 | 
 38 | - [createSpeechlySpeechRecognition](README.md#createspeechlyspeechrecognition)
 39 | 
 40 | ## Type Aliases
 41 | 
 42 | ### SpeechEndCallback
 43 | 
 44 | Ƭ **SpeechEndCallback**: () => `void`
 45 | 
 46 | #### Type declaration
 47 | 
 48 | ▸ (): `void`
 49 | 
 50 | Callback that is invoked when transcription ends
 51 | 
 52 | ##### Returns
 53 | 
 54 | `void`
 55 | 
 56 | #### Defined in
 57 | 
 58 | [types.ts:97](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L97)
 59 | 
 60 | ___
 61 | 
 62 | ### SpeechErrorCallback
 63 | 
 64 | Ƭ **SpeechErrorCallback**: (`speechRecognitionErrorEvent`: [`SpeechRecognitionErrorEvent`](interfaces/SpeechRecognitionErrorEvent.md)) => `void`
 65 | 
 66 | #### Type declaration
 67 | 
 68 | ▸ (`speechRecognitionErrorEvent`): `void`
 69 | 
 70 | Callback that is invoked when an error occurs
 71 | 
 72 | ##### Parameters
 73 | 
 74 | | Name | Type |
 75 | | :------ | :------ |
 76 | | `speechRecognitionErrorEvent` | [`SpeechRecognitionErrorEvent`](interfaces/SpeechRecognitionErrorEvent.md) |
 77 | 
 78 | ##### Returns
 79 | 
 80 | `void`
 81 | 
 82 | #### Defined in
 83 | 
 84 | [types.ts:103](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L103)
 85 | 
 86 | ___
 87 | 
 88 | ### SpeechRecognitionEventCallback
 89 | 
 90 | Ƭ **SpeechRecognitionEventCallback**: (`speechRecognitionEvent`: [`SpeechRecognitionEvent`](interfaces/SpeechRecognitionEvent.md)) => `void`
 91 | 
 92 | #### Type declaration
 93 | 
 94 | ▸ (`speechRecognitionEvent`): `void`
 95 | 
 96 | Callback that is invoked whenever the transcript gets updated
 97 | 
 98 | ##### Parameters
 99 | 
100 | | Name | Type | Description |
101 | | :------ | :------ | :------ |
102 | | `speechRecognitionEvent` | [`SpeechRecognitionEvent`](interfaces/SpeechRecognitionEvent.md) | Event containing updates to the transcript |
103 | 
104 | ##### Returns
105 | 
106 | `void`
107 | 
108 | #### Defined in
109 | 
110 | [types.ts:91](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L91)
111 | 
112 | ## Variables
113 | 
114 | ### MicrophoneNotAllowedError
115 | 
116 | • `Const` **MicrophoneNotAllowedError**: [`SpeechRecognitionErrorEvent`](interfaces/SpeechRecognitionErrorEvent.md)
117 | 
118 | Error emitted when the user does not give permission to use the microphone
119 | 
120 | #### Defined in
121 | 
122 | [types.ts:72](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L72)
123 | 
124 | ___
125 | 
126 | ### SpeechRecognitionFailedError
127 | 
128 | • `Const` **SpeechRecognitionFailedError**: [`SpeechRecognitionErrorEvent`](interfaces/SpeechRecognitionErrorEvent.md)
129 | 
130 | Generic error when speech recognition fails due to an unknown cause
131 | 
132 | #### Defined in
133 | 
134 | [types.ts:81](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/types.ts#L81)
135 | 
136 | ## Functions
137 | 
138 | ### createSpeechlySpeechRecognition
139 | 
140 | ▸ **createSpeechlySpeechRecognition**(`appId`): [`SpeechRecognitionClass`](interfaces/SpeechRecognitionClass.md)
141 | 
142 | Returns a SpeechRecognition implementation that uses a given Speechly app ID
143 | to generate transcriptions using the Speechly API
144 | 
145 | #### Parameters
146 | 
147 | | Name | Type | Description |
148 | | :------ | :------ | :------ |
149 | | `appId` | `string` | Speechly app ID |
150 | 
151 | #### Returns
152 | 
153 | [`SpeechRecognitionClass`](interfaces/SpeechRecognitionClass.md)
154 | 
155 | Class that implements the SpeechRecognition interface
156 | 
157 | #### Defined in
158 | 
159 | [createSpeechRecognition.ts:27](https://github.com/speechly/speech-recognition-polyfill/blob/HEAD/src/createSpeechRecognition.ts#L27)
160 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Transcript for the ongoing utterance, including the level of confidence in that transcript
  3 |  * @public
  4 |  */
  5 | export interface SpeechRecognitionAlternative {
  6 |   /**
  7 |    * Current transcript of the ongoing utterance (the words spoken by the user)
  8 |    */
  9 |   transcript: string
 10 |   /**
 11 |    * Level of confidence in the correctness of the transcript (from 0 to 1)
 12 |    */
 13 |   confidence: number
 14 | }
 15 | 
 16 | /**
 17 |  * Object containing a transcript for the ongoing utterance and an indicator of whether that transcript is final or not
 18 |  * @public
 19 |  */
 20 | export interface SpeechRecognitionResult {
 21 |   /**
 22 |    * Object containing a transcript for the ongoing utterance (the use of an integer index key is to mimic the
 23 |    * structure used in the native {@link https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognitionResult | SpeechRecognitionResult spec}),
 24 |    * which contains an "array" of alternative transcripts. In the Speechly implementation, there is never more than one
 25 |    * alternative, so only the first index is specified in the interface
 26 |    */
 27 |   0: SpeechRecognitionAlternative
 28 |   /**
 29 |    * Is this transcript "final"? That is, has the transcription algorithm concluded that the utterance has finished and
 30 |    * that the trancript will have no further updates?
 31 |    */
 32 |   isFinal: boolean
 33 | }
 34 | 
 35 | /**
 36 |  * Data associated with an update to the transcript for the ongoing utterance
 37 |  * @public
 38 |  */
 39 | export interface SpeechRecognitionEvent {
 40 |   /**
 41 |    * List of speech recognition results, containing all transcripts collected in the current session. This represents the
 42 |    * native {@link https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognitionResultList | SpeechRecognitionResultList}.
 43 |    * Note that the Speechly implementation currently does not maintain a history of results, only returning the single
 44 |    * result for the ongoing utterance
 45 |    */
 46 |   results: SpeechRecognitionResult[]
 47 |   /**
 48 |    * Index of the earliest speech recognition result that has changed
 49 |    */
 50 |   resultIndex: number
 51 | }
 52 | 
 53 | /**
 54 |  * Data associated with an error emitted from the recognition service
 55 |  * @public
 56 |  */
 57 | export interface SpeechRecognitionErrorEvent {
 58 |   /**
 59 |    * Type of error raised
 60 |    */
 61 |   error: 'not-allowed' | 'audio-capture'
 62 |   /**
 63 |    * Message describing the error in more detail
 64 |    */
 65 |   message: string
 66 | }
 67 | 
 68 | /**
 69 |  * Error emitted when the user does not give permission to use the microphone
 70 |  * @public
 71 |  */
 72 | export const MicrophoneNotAllowedError: SpeechRecognitionErrorEvent = {
 73 |   error: 'not-allowed',
 74 |   message: 'User did not give permission to use the microphone',
 75 | }
 76 | 
 77 | /**
 78 |  * Generic error when speech recognition fails due to an unknown cause
 79 |  * @public
 80 |  */
 81 | export const SpeechRecognitionFailedError: SpeechRecognitionErrorEvent = {
 82 |   error: 'audio-capture',
 83 |   message: 'Speech recognition failed',
 84 | }
 85 | 
 86 | /**
 87 |  * Callback that is invoked whenever the transcript gets updated
 88 |  * @param speechRecognitionEvent - Event containing updates to the transcript
 89 |  * @public
 90 |  */
 91 | export type SpeechRecognitionEventCallback = (speechRecognitionEvent: SpeechRecognitionEvent) => void
 92 | 
 93 | /**
 94 |  * Callback that is invoked when transcription ends
 95 |  * @public
 96 |  */
 97 | export type SpeechEndCallback = () => void
 98 | 
 99 | /**
100 |  * Callback that is invoked when an error occurs
101 |  * @public
102 |  */
103 | export type SpeechErrorCallback = (speechRecognitionErrorEvent: SpeechRecognitionErrorEvent) => void
104 | 
105 | /**
106 |  * Subset of the {@link https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition | W3C SpeechRecognition spec} that
107 |  * can be used for basic transcription
108 |  * @public
109 |  */
110 | export interface SpeechRecognition {
111 |   /**
112 |    * Should the microphone listen continuously (true) or should it stop after the first utterance (false)?
113 |    */
114 |   continuous: boolean
115 |   /**
116 |    * Should interim results be emitted? These are parts of an ongoing utterance for which transcription hasn't
117 |    * completed yet
118 |    */
119 |   interimResults: boolean
120 |   /**
121 |    * Callback that is invoked whenever the transcript updates
122 |    */
123 |   onresult: SpeechRecognitionEventCallback
124 |   /**
125 |    * Callback that is invoked when transcription ends
126 |    * @param speechRecognitionEvent - Event containing updates to the transcript
127 |    */
128 |   onend: SpeechEndCallback
129 |   /**
130 |    * Callback that is invoked when an error occurs
131 |    * @param speechRecognitionErrorEvent - Event containing details of the error
132 |    */
133 |   onerror: SpeechErrorCallback
134 |   /**
135 |    * Start transcribing utterances received from the microphone
136 |    */
137 |   start: () => Promise<void>
138 |   /**
139 |    * Stop transcribing utterances received from the microphone, but finish processing the current utterance
140 |    */
141 |   stop: () => Promise<void>
142 |   /**
143 |    * Stop transcribing utterances received from the microphone, and cut off the current utterance
144 |    */
145 |   abort: () => Promise<void>
146 | }
147 | 
148 | /**
149 |  * Class that implements the SpeechRecognition interface
150 |  * @public
151 |  */
152 | export interface SpeechRecognitionClass {
153 |   /**
154 |    * Does the browser support the APIs needed for this polyfill?
155 |    */
156 |   readonly hasBrowserSupport: boolean
157 |   /**
158 |    * Constructor for a SpeechRecognition implementation
159 |    */
160 |   new (): SpeechRecognition
161 | }
162 | 


--------------------------------------------------------------------------------
/src/testData.ts:
--------------------------------------------------------------------------------
  1 | const SENTENCE_ONE = [
  2 |   {
  3 |     id: 0,
  4 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
  5 |     isFinal: false,
  6 |     words: [
  7 |       {
  8 |         value: 'SENT',
  9 |         index: 2,
 10 |         startTimestamp: 540,
 11 |         endTimestamp: 840,
 12 |         isFinal: false,
 13 |       },
 14 |     ],
 15 |     entities: [],
 16 |     intent: {
 17 |       intent: '',
 18 |       isFinal: false,
 19 |     },
 20 |   },
 21 |   {
 22 |     id: 0,
 23 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
 24 |     isFinal: false,
 25 |     words: [
 26 |       {
 27 |         value: 'SENTENCE',
 28 |         index: 2,
 29 |         startTimestamp: 540,
 30 |         endTimestamp: 900,
 31 |         isFinal: false,
 32 |       },
 33 |     ],
 34 |     entities: [],
 35 |     intent: {
 36 |       intent: '',
 37 |       isFinal: false,
 38 |     },
 39 |   },
 40 |   {
 41 |     id: 0,
 42 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
 43 |     isFinal: false,
 44 |     words: [
 45 |       {
 46 |         value: 'SENTENCE',
 47 |         index: 2,
 48 |         startTimestamp: 540,
 49 |         endTimestamp: 1080,
 50 |         isFinal: false,
 51 |       },
 52 |       {
 53 |         value: 'ONE',
 54 |         index: 3,
 55 |         startTimestamp: 1080,
 56 |         endTimestamp: 1200,
 57 |         isFinal: false,
 58 |       },
 59 |     ],
 60 |     entities: [],
 61 |     intent: {
 62 |       intent: '',
 63 |       isFinal: false,
 64 |     },
 65 |   },
 66 |   {
 67 |     id: 0,
 68 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
 69 |     isFinal: false,
 70 |     words: [
 71 |       {
 72 |         value: 'SENTENCE',
 73 |         index: 2,
 74 |         startTimestamp: 540,
 75 |         endTimestamp: 1080,
 76 |         isFinal: false,
 77 |       },
 78 |       {
 79 |         value: 'ONE',
 80 |         index: 3,
 81 |         startTimestamp: 1080,
 82 |         endTimestamp: 1320,
 83 |         isFinal: false,
 84 |       },
 85 |     ],
 86 |     entities: [],
 87 |     intent: {
 88 |       intent: '',
 89 |       isFinal: false,
 90 |     },
 91 |   },
 92 |   {
 93 |     id: 0,
 94 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
 95 |     isFinal: false,
 96 |     words: [
 97 |       {
 98 |         value: 'SENTENCE',
 99 |         index: 2,
100 |         startTimestamp: 540,
101 |         endTimestamp: 1080,
102 |         isFinal: false,
103 |       },
104 |       {
105 |         value: 'ONE',
106 |         index: 3,
107 |         startTimestamp: 1080,
108 |         endTimestamp: 1320,
109 |         isFinal: false,
110 |       },
111 |     ],
112 |     entities: [],
113 |     intent: {
114 |       intent: '',
115 |       isFinal: false,
116 |     },
117 |   },
118 |   {
119 |     id: 0,
120 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
121 |     isFinal: false,
122 |     words: [
123 |       {
124 |         value: 'SENTENCE',
125 |         index: 2,
126 |         startTimestamp: 540,
127 |         endTimestamp: 1080,
128 |         isFinal: false,
129 |       },
130 |       {
131 |         value: 'ONE',
132 |         index: 3,
133 |         startTimestamp: 1080,
134 |         endTimestamp: 1320,
135 |         isFinal: false,
136 |       },
137 |     ],
138 |     entities: [],
139 |     intent: {
140 |       intent: '',
141 |       isFinal: false,
142 |     },
143 |   },
144 |   {
145 |     id: 0,
146 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
147 |     isFinal: false,
148 |     words: [
149 |       {
150 |         value: 'SENTENCE',
151 |         index: 2,
152 |         startTimestamp: 540,
153 |         endTimestamp: 1080,
154 |         isFinal: false,
155 |       },
156 |       {
157 |         value: 'ONE',
158 |         index: 3,
159 |         startTimestamp: 1080,
160 |         endTimestamp: 1320,
161 |         isFinal: false,
162 |       },
163 |     ],
164 |     entities: [],
165 |     intent: {
166 |       intent: '',
167 |       isFinal: false,
168 |     },
169 |   },
170 |   {
171 |     id: 0,
172 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
173 |     isFinal: false,
174 |     words: [
175 |       {
176 |         value: 'SENTENCE',
177 |         index: 2,
178 |         startTimestamp: 540,
179 |         endTimestamp: 1080,
180 |         isFinal: false,
181 |       },
182 |       {
183 |         value: 'ONE',
184 |         index: 3,
185 |         startTimestamp: 1080,
186 |         endTimestamp: 1320,
187 |         isFinal: false,
188 |       },
189 |     ],
190 |     entities: [],
191 |     intent: {
192 |       intent: '',
193 |       isFinal: false,
194 |     },
195 |   },
196 |   {
197 |     id: 0,
198 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
199 |     isFinal: false,
200 |     words: [
201 |       {
202 |         value: 'SENTENCE',
203 |         index: 2,
204 |         startTimestamp: 540,
205 |         endTimestamp: 1080,
206 |         isFinal: true,
207 |       },
208 |       {
209 |         value: 'ONE',
210 |         index: 3,
211 |         startTimestamp: 1080,
212 |         endTimestamp: 1320,
213 |         isFinal: false,
214 |       },
215 |     ],
216 |     entities: [],
217 |     intent: {
218 |       intent: '',
219 |       isFinal: false,
220 |     },
221 |   },
222 |   {
223 |     id: 0,
224 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
225 |     isFinal: false,
226 |     words: [
227 |       {
228 |         value: 'SENTENCE',
229 |         index: 2,
230 |         startTimestamp: 540,
231 |         endTimestamp: 1080,
232 |         isFinal: true,
233 |       },
234 |       {
235 |         value: 'ONE',
236 |         index: 3,
237 |         startTimestamp: 1080,
238 |         endTimestamp: 1320,
239 |         isFinal: false,
240 |       },
241 |     ],
242 |     entities: [],
243 |     intent: {
244 |       intent: '',
245 |       isFinal: false,
246 |     },
247 |   },
248 |   {
249 |     id: 0,
250 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
251 |     isFinal: false,
252 |     words: [
253 |       {
254 |         value: 'SENTENCE',
255 |         index: 2,
256 |         startTimestamp: 540,
257 |         endTimestamp: 1080,
258 |         isFinal: true,
259 |       },
260 |       {
261 |         value: 'ONE',
262 |         index: 3,
263 |         startTimestamp: 1080,
264 |         endTimestamp: 1320,
265 |         isFinal: true,
266 |       },
267 |     ],
268 |     entities: [],
269 |     intent: {
270 |       intent: '',
271 |       isFinal: false,
272 |     },
273 |   },
274 |   {
275 |     id: 0,
276 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
277 |     isFinal: true,
278 |     words: [
279 |       {
280 |         value: 'SENTENCE',
281 |         index: 2,
282 |         startTimestamp: 540,
283 |         endTimestamp: 1080,
284 |         isFinal: true,
285 |       },
286 |       {
287 |         value: 'ONE',
288 |         index: 3,
289 |         startTimestamp: 1080,
290 |         endTimestamp: 1320,
291 |         isFinal: true,
292 |       },
293 |     ],
294 |     entities: [],
295 |     intent: {
296 |       intent: '',
297 |       isFinal: true,
298 |     },
299 |   },
300 | ];
301 | 
302 | const SENTENCE_TWO = [
303 |   {
304 |     id: 0,
305 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
306 |     isFinal: false,
307 |     words: [
308 |       {
309 |         value: 'SENT',
310 |         index: 2,
311 |         startTimestamp: 540,
312 |         endTimestamp: 840,
313 |         isFinal: false,
314 |       },
315 |     ],
316 |     entities: [],
317 |     intent: {
318 |       intent: '',
319 |       isFinal: false,
320 |     },
321 |   },
322 |   {
323 |     id: 0,
324 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
325 |     isFinal: false,
326 |     words: [
327 |       {
328 |         value: 'SENTENCE',
329 |         index: 2,
330 |         startTimestamp: 540,
331 |         endTimestamp: 900,
332 |         isFinal: false,
333 |       },
334 |     ],
335 |     entities: [],
336 |     intent: {
337 |       intent: '',
338 |       isFinal: false,
339 |     },
340 |   },
341 |   {
342 |     id: 0,
343 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
344 |     isFinal: false,
345 |     words: [
346 |       {
347 |         value: 'SENTENCE',
348 |         index: 2,
349 |         startTimestamp: 540,
350 |         endTimestamp: 1080,
351 |         isFinal: false,
352 |       },
353 |       {
354 |         value: 'TWO',
355 |         index: 3,
356 |         startTimestamp: 1080,
357 |         endTimestamp: 1200,
358 |         isFinal: false,
359 |       },
360 |     ],
361 |     entities: [],
362 |     intent: {
363 |       intent: '',
364 |       isFinal: false,
365 |     },
366 |   },
367 |   {
368 |     id: 0,
369 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
370 |     isFinal: false,
371 |     words: [
372 |       {
373 |         value: 'SENTENCE',
374 |         index: 2,
375 |         startTimestamp: 540,
376 |         endTimestamp: 1080,
377 |         isFinal: false,
378 |       },
379 |       {
380 |         value: 'TWO',
381 |         index: 3,
382 |         startTimestamp: 1080,
383 |         endTimestamp: 1320,
384 |         isFinal: false,
385 |       },
386 |     ],
387 |     entities: [],
388 |     intent: {
389 |       intent: '',
390 |       isFinal: false,
391 |     },
392 |   },
393 |   {
394 |     id: 0,
395 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
396 |     isFinal: false,
397 |     words: [
398 |       {
399 |         value: 'SENTENCE',
400 |         index: 2,
401 |         startTimestamp: 540,
402 |         endTimestamp: 1080,
403 |         isFinal: false,
404 |       },
405 |       {
406 |         value: 'TWO',
407 |         index: 3,
408 |         startTimestamp: 1080,
409 |         endTimestamp: 1320,
410 |         isFinal: false,
411 |       },
412 |     ],
413 |     entities: [],
414 |     intent: {
415 |       intent: '',
416 |       isFinal: false,
417 |     },
418 |   },
419 |   {
420 |     id: 0,
421 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
422 |     isFinal: false,
423 |     words: [
424 |       {
425 |         value: 'SENTENCE',
426 |         index: 2,
427 |         startTimestamp: 540,
428 |         endTimestamp: 1080,
429 |         isFinal: false,
430 |       },
431 |       {
432 |         value: 'TWO',
433 |         index: 3,
434 |         startTimestamp: 1080,
435 |         endTimestamp: 1320,
436 |         isFinal: false,
437 |       },
438 |     ],
439 |     entities: [],
440 |     intent: {
441 |       intent: '',
442 |       isFinal: false,
443 |     },
444 |   },
445 |   {
446 |     id: 0,
447 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
448 |     isFinal: false,
449 |     words: [
450 |       {
451 |         value: 'SENTENCE',
452 |         index: 2,
453 |         startTimestamp: 540,
454 |         endTimestamp: 1080,
455 |         isFinal: false,
456 |       },
457 |       {
458 |         value: 'TWO',
459 |         index: 3,
460 |         startTimestamp: 1080,
461 |         endTimestamp: 1320,
462 |         isFinal: false,
463 |       },
464 |     ],
465 |     entities: [],
466 |     intent: {
467 |       intent: '',
468 |       isFinal: false,
469 |     },
470 |   },
471 |   {
472 |     id: 0,
473 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
474 |     isFinal: false,
475 |     words: [
476 |       {
477 |         value: 'SENTENCE',
478 |         index: 2,
479 |         startTimestamp: 540,
480 |         endTimestamp: 1080,
481 |         isFinal: false,
482 |       },
483 |       {
484 |         value: 'TWO',
485 |         index: 3,
486 |         startTimestamp: 1080,
487 |         endTimestamp: 1320,
488 |         isFinal: false,
489 |       },
490 |     ],
491 |     entities: [],
492 |     intent: {
493 |       intent: '',
494 |       isFinal: false,
495 |     },
496 |   },
497 |   {
498 |     id: 0,
499 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
500 |     isFinal: false,
501 |     words: [
502 |       {
503 |         value: 'SENTENCE',
504 |         index: 2,
505 |         startTimestamp: 540,
506 |         endTimestamp: 1080,
507 |         isFinal: true,
508 |       },
509 |       {
510 |         value: 'TWO',
511 |         index: 3,
512 |         startTimestamp: 1080,
513 |         endTimestamp: 1320,
514 |         isFinal: false,
515 |       },
516 |     ],
517 |     entities: [],
518 |     intent: {
519 |       intent: '',
520 |       isFinal: false,
521 |     },
522 |   },
523 |   {
524 |     id: 0,
525 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
526 |     isFinal: false,
527 |     words: [
528 |       {
529 |         value: 'SENTENCE',
530 |         index: 2,
531 |         startTimestamp: 540,
532 |         endTimestamp: 1080,
533 |         isFinal: true,
534 |       },
535 |       {
536 |         value: 'TWO',
537 |         index: 3,
538 |         startTimestamp: 1080,
539 |         endTimestamp: 1320,
540 |         isFinal: false,
541 |       },
542 |     ],
543 |     entities: [],
544 |     intent: {
545 |       intent: '',
546 |       isFinal: false,
547 |     },
548 |   },
549 |   {
550 |     id: 0,
551 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
552 |     isFinal: false,
553 |     words: [
554 |       {
555 |         value: 'SENTENCE',
556 |         index: 2,
557 |         startTimestamp: 540,
558 |         endTimestamp: 1080,
559 |         isFinal: true,
560 |       },
561 |       {
562 |         value: 'TWO',
563 |         index: 3,
564 |         startTimestamp: 1080,
565 |         endTimestamp: 1320,
566 |         isFinal: true,
567 |       },
568 |     ],
569 |     entities: [],
570 |     intent: {
571 |       intent: '',
572 |       isFinal: false,
573 |     },
574 |   },
575 |   {
576 |     id: 0,
577 |     contextId: '6b742e79-bd6d-4f28-a4d9-aeae71c54728',
578 |     isFinal: true,
579 |     words: [
580 |       {
581 |         value: 'SENTENCE',
582 |         index: 2,
583 |         startTimestamp: 540,
584 |         endTimestamp: 1080,
585 |         isFinal: true,
586 |       },
587 |       {
588 |         value: 'TWO',
589 |         index: 3,
590 |         startTimestamp: 1080,
591 |         endTimestamp: 1320,
592 |         isFinal: true,
593 |       },
594 |     ],
595 |     entities: [],
596 |     intent: {
597 |       intent: '',
598 |       isFinal: true,
599 |     },
600 |   },
601 | ];
602 | 
603 | const TEST_DATA = {
604 |   SENTENCE_ONE,
605 |   SENTENCE_TWO
606 | };
607 | 
608 | export default TEST_DATA;
609 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center" markdown="1">
  2 | <br/>
  3 | 
  4 | ![speechly-logo-duo-black](https://user-images.githubusercontent.com/2579244/193574443-130d16d6-76f1-4401-90f2-0ed753b39bc0.svg)
  5 | 
  6 | [Website](https://www.speechly.com/)
  7 | &ensp;&middot;&ensp;
  8 | [Docs](https://docs.speechly.com/)
  9 | &ensp;&middot;&ensp;
 10 | [Support](https://github.com/speechly/speechly/discussions)
 11 | &ensp;&middot;&ensp;
 12 | [Blog](https://www.speechly.com/blog/)
 13 | &ensp;&middot;&ensp;
 14 | [Login](https://api.speechly.com/dashboard/)
 15 | 
 16 | <br/>
 17 | </div>
 18 | 
 19 | # Speech recognition polyfill
 20 | 
 21 | Polyfill for the [SpeechRecognition](https://wicg.github.io/speech-api/#speechreco-section) standard on web, using [Speechly](https://www.speechly.com/) as the underlying API. The primary use of this library is to enable speech recognition on browsers that would not normally support it natively.
 22 | 
 23 | Speechly offers a free tier for its speech recognition API with a generous usage limit.
 24 | 
 25 | ## Useful links
 26 | 
 27 | * [Quickstart](#quickstart)
 28 | * [Browser support](#browser-support)
 29 | * [Handling errors](#handling-errors)
 30 | * [Examples](#examples)
 31 | * [Integrating with react-speech-recognition](#integrating-with-react-speech-recognition)
 32 | * [Limitations](#limitations)
 33 | * [Type docs](docs/README.md)
 34 | * [Contributing](#contributing)
 35 | * [About Speechly](#about-speechly)
 36 | 
 37 | ## Quickstart
 38 | 
 39 | ### Installation
 40 | 
 41 | ```
 42 | npm install --save @speechly/speech-recognition-polyfill
 43 | ```
 44 | 
 45 | ### Basic usage
 46 | 
 47 | First, you need a Speechly Application ID:
 48 | 
 49 | 1. Log in to [Speechly Dashboard](https://api.speechly.com/dashboard/)
 50 | 2. Open [Create a new application](https://api.speechly.com/dashboard/#/app/new)
 51 | 3. Give your application a name and press **Create application**
 52 | 4. **Deploy** the application
 53 | 5. Copy the **App ID**, you'll need it the next step.
 54 | 
 55 | Once you have your App ID, you can use it to create a recognition object that can start transcribing anything the user speaks into the microphone:
 56 | 
 57 | ```
 58 | import { createSpeechlySpeechRecognition } from '@speechly/speech-recognition-polyfill';
 59 | 
 60 | const appId = '<your_speechly_app_id>';
 61 | const SpeechlySpeechRecognition = createSpeechlySpeechRecognition(appId);
 62 | const speechRecognition = new SpeechlySpeechRecognition();
 63 | ```
 64 | 
 65 | Before you start using `speechRecognition` to start transcribing, you should provide a callback to process any transcripts that get generated. A common use case is to match the transcript against a list of commands and perform an action when you detect a match. Alternatively, you may want to display the transcript in the UI. Here's how to set the callback:
 66 | 
 67 | ```
 68 | speechRecognition.onresult = ({ results }) => {
 69 |   const transcript = results[0][0].transcript;
 70 |   // Process the transcript
 71 | };
 72 | ```
 73 | 
 74 | You may also want to configure the recognition object:
 75 | 
 76 | ```
 77 | // Keep transcribing, even if the user stops speaking
 78 | speechRecognition.continuous = true;
 79 | 
 80 | // Get transcripts while the user is speaking, not just when they've finished
 81 | speechRecognition.interimResults = true;
 82 | ```
 83 | 
 84 | With your recognition object configured, you're ready to start transcribing by using the `start()` method. To comply with rules set by browsers, this _must_ be triggered by a user action such as a button click. For example, in a React component this could look like:
 85 | 
 86 | ```
 87 | const startTranscribing = () => {
 88 |   speechRecognition.start();
 89 | };
 90 | 
 91 | // When rendering component
 92 | <button onClick={startTranscribing}>Push to talk</button>
 93 | ```
 94 | 
 95 | After calling `start()`, the microphone will be turned on and the recognition object will start passing transcripts to the callback you assigned to `onresult`. If you want to stop transcribing, you can call the following:
 96 | 
 97 | ```
 98 | speechRecognition.stop();
 99 | ```
100 | 
101 | ## Browser support
102 | 
103 | This polyfill will work on browsers that support the [MediaDevices](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices) and [AudioContext](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext) APIs, which covers roughly 95% of web users in 2022. The exceptions are Internet Explorer and most browsers from before 2016. On these browsers, an error will be thrown when creating a `SpeechlySpeechRecognition` object.
104 | 
105 | The `SpeechlySpeechRecognition` class offers the `hasBrowserSupport` flag to check whether the browser supports the required APIs. We recommend you do the following when creating your speech recognition object:
106 | ```
107 | if (SpeechlySpeechRecognition.hasBrowserSupport) {
108 |   const speechRecognition = new SpeechlySpeechRecognition();
109 |   // Use speech recognition
110 | } else {
111 |   // Show some fallback UI
112 | }
113 | ```
114 | 
115 | ## Handling errors
116 | 
117 | A common error case is when the user chooses not to give permission for the web app to access the microphone. This, and any other error emitted by this polyfill, can be handled via the `onerror` callback. In such cases, it's advised that you render some fallback UI as these errors will usually mean that voice-driven features will not work and should be disabled:
118 | 
119 | ```
120 | import { MicrophoneNotAllowedError } from '@speechly/speech-recognition-polyfill';
121 | 
122 | ...
123 | 
124 | speechRecognition.onerror = (event) => {
125 |   if (event === MicrophoneNotAllowedError) {
126 |     // Microphone permission denied - show some fallback UI
127 |   } else {
128 |     // Unable to start transcribing - show some fallback UI
129 |   }
130 | };
131 | ```
132 | 
133 | ## Examples
134 | 
135 | The following examples use React to demonstrate how this polyfill can be used in real web components.
136 | 
137 | ### Matching commands
138 | 
139 | A common use case is to enable the user to control a web app using their voice. The following example has a "hold to talk" button that enables transcription while held down. It provides a list of commands that, when matched by anything the user says, will be displayed. In practice, these matched commands could be used to perform actions.
140 | 
141 | ```
142 | import React, { useState, useEffect } from 'react';
143 | import { createSpeechlySpeechRecognition } from '@speechly/speech-recognition-polyfill';
144 | 
145 | const appId = '<your_speechly_app_id>';
146 | const SpeechlySpeechRecognition = createSpeechlySpeechRecognition(appId);
147 | const speechRecognition = new SpeechlySpeechRecognition();
148 | speechRecognition.continuous = true;
149 | speechRecognition.interimResults = true;
150 | 
151 | const COMMANDS = ['PLAY', 'PAUSE', 'REWIND'];
152 | 
153 | export default () => {
154 |   const [matchedCommand, setMatchedCommand] = useState('');
155 | 
156 |   const handleResult = ({ results }) => {
157 |     const { transcript } = results[0][0];
158 |     COMMANDS.forEach(command => {
159 |       if (transcript.indexOf(command) !== -1) {
160 |         setMatchedCommand(command);
161 |       }
162 |     });
163 |   };
164 | 
165 |   useEffect(() => {
166 |     speechRecognition.onresult = handleResult;
167 |   });
168 | 
169 |   return (
170 |     <div>
171 |       <button
172 |         onTouchStart={speechRecognition.start}
173 |         onMouseDown={speechRecognition.start}
174 |         onTouchEnd={speechRecognition.stop}
175 |         onMouseUp={speechRecognition.stop}
176 |         >Hold to talk</button>
177 |       <span>{matchedCommand}</span>
178 |     </div>
179 |   );
180 | };
181 | ```
182 | 
183 | ### Displaying a transcript
184 | 
185 | You may want to simply display everything the user says as text, for composing a message for example. This example uses the same button as before. The transcripts are combined and collected in a local state, which is displayed as one piece of text.
186 | 
187 | ```
188 | import React, { useState, useEffect, useCallback } from 'react';
189 | import { createSpeechlySpeechRecognition } from '@speechly/speech-recognition-polyfill';
190 | 
191 | const appId = '<your_speechly_app_id>';
192 | const SpeechlySpeechRecognition = createSpeechlySpeechRecognition(appId);
193 | const speechRecognition = new SpeechlySpeechRecognition();
194 | speechRecognition.continuous = true;
195 | 
196 | export default () => {
197 |   const [transcript, setTranscript] = useState('');
198 | 
199 |   const handleResult = useCallback(({ results }) => {
200 |     const newTranscript = [transcript, results[0][0].transcript].join(' ');
201 |     setTranscript(newTranscript);
202 |   }, [transcript]);
203 | 
204 |   useEffect(() => {
205 |     speechRecognition.onresult = handleResult;
206 |   });
207 | 
208 |   return (
209 |     <div>
210 |       <button
211 |         onTouchStart={speechRecognition.start}
212 |         onMouseDown={speechRecognition.start}
213 |         onTouchEnd={speechRecognition.stop}
214 |         onMouseUp={speechRecognition.stop}
215 |         >Hold to talk</button>
216 |       <span>{transcript}</span>
217 |     </div>
218 |   );
219 | };
220 | ```
221 | 
222 | ## Integrating with react-speech-recognition
223 | 
224 | This polyfill is compatible with `react-speech-recognition`, a React hook that manages the transcript for you and allows you to provide more powerful commands. For React web apps, we recommend you combine these libraries. See its [README](https://github.com/JamesBrill/react-speech-recognition) for full guidance on how to use `react-speech-recognition`. It can be installed with:
225 | 
226 | ```
227 | npm install --save react-speech-recognition
228 | ```
229 | 
230 | Below is an example with more complex commands, which print a message in response to each command matched. For example, saying "Bob is my name" will result in the message "Hi Bob!".
231 | 
232 | ```
233 | import React, { useState } from 'react';
234 | import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
235 | import { createSpeechlySpeechRecognition } from '@speechly/speech-recognition-polyfill';
236 | 
237 | const appId = '<your_speechly_app_id>';
238 | const SpeechlySpeechRecognition = createSpeechlySpeechRecognition(appId);
239 | SpeechRecognition.applyPolyfill(SpeechlySpeechRecognition);
240 | 
241 | export default () => {
242 |   const [message, setMessage] = useState('');
243 |   const commands = [
244 |     {
245 |       command: '* is my name',
246 |       callback: (name) => setMessage(`Hi ${name}!`),
247 |       matchInterim: true
248 |     },
249 |     {
250 |       command: 'My top sports are * and *',
251 |       callback: (sport1, sport2) => setMessage(`#1: ${sport1}, #2: ${sport2}`)
252 |     },
253 |     {
254 |       command: 'Goodbye',
255 |       callback: () => setMessage('So long!'),
256 |       matchInterim: true
257 |     },
258 |     {
259 |       command: 'Pass the salt (please)',
260 |       callback: () => setMessage('My pleasure')
261 |     }
262 |   ];
263 |   const {
264 |     transcript,
265 |     listening,
266 |     browserSupportsSpeechRecognition,
267 |     isMicrophoneAvailable
268 |   } = useSpeechRecognition({ commands });
269 |   const listenContinuously = () => SpeechRecognition.startListening({ continuous: true });
270 | 
271 |   if (!browserSupportsSpeechRecognition) {
272 |     return <span>No browser support</span>
273 |   }
274 | 
275 |   if (!isMicrophoneAvailable) {
276 |     return <span>Please allow access to the microphone</span>
277 |   }
278 | 
279 |   return (
280 |     <div>
281 |       <p>Microphone: {listening ? 'on' : 'off'}</p>
282 |       <button
283 |         onTouchStart={listenContinuously}
284 |         onMouseDown={listenContinuously}
285 |         onTouchEnd={SpeechRecognition.stopListening}
286 |         onMouseUp={SpeechRecognition.stopListening}
287 |       >Hold to talk</button>
288 |       <p>{transcript}</p>
289 |       <p>{message}</p>
290 |     </div>
291 |   );
292 | };
293 | ```
294 | 
295 | ## Limitations
296 | 
297 | While this polyfill is intended to enable most use cases for voice-driven web apps, it does not implement the full [W3C specification](https://wicg.github.io/speech-api/#speechreco-section) for `SpeechRecognition`, only a subset:
298 | * `start()` method
299 | * `stop()` method
300 | * `abort()` method
301 | * `continuous` property
302 | * `interimResults` property
303 | * `onresult` property
304 | * `onend` property - a callback that is fired when `stop()` or `abort()` is called
305 | * `onerror` property - a callback that is fired when an error occurs when attempting to start speech recognition
306 | 
307 | Some notable limitations:
308 | * The `lang` property is currently unsupported, defaulting to English transcription
309 | * `onresult` will only receive the most recent speech recognition result (the utterance that the user is in the process of saying or has just finished saying) and does not store a history of all transcripts. This can easily be resolved by either managing your own transcript state (see the [Displaying a transcript](#displaying-a-transcript) example above) or using `react-speech-recognition` to do that for you
310 | * Transcripts are generated in uppercase letters without punctuation. If needed, you can transform them using [toLowerCase()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toLowerCase)
311 | * `onerror` currently only supports the `not-allowed` (user denied permission to use the microphone) error and the `audio-capture` error, which is emitted for any other case where speech recognition fails. The full list in the spec can be found [here](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognitionErrorEvent/error)
312 | 
313 | ## Contributing
314 | 
315 | For a guide on how to develop `speech-recognition-polyfill` and contribute changes, see [CONTRIBUTING.md](CONTRIBUTING.md).
316 | 
317 | ## About Speechly
318 | 
319 | Speechly is a developer tool for building real-time multimodal voice user interfaces. It enables developers and designers to enhance their current touch user interface with voice functionalities for better user experience. Speechly key features:
320 | 
321 | ### Speechly key features
322 | 
323 | - Fully streaming API
324 | - Multi modal from the ground up
325 | - Easy to configure for any use case
326 | - Fast to integrate to any touch screen application
327 | - Supports natural corrections such as "Show me red – i mean blue t-shirts"
328 | - Real time visual feedback encourages users to go on with their voice
329 | 
330 | |                  Example application                  | Description                                                                                                                                                                                                                                                                                                                               |
331 | | :---------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
332 | | <img src="https://i.imgur.com/v9o1JHf.gif" width=50%> | Instead of using buttons, input fields and dropdowns, Speechly enables users to interact with the application by using voice. <br />User gets real-time visual feedback on the form as they speak and are encouraged to go on. If there's an error, the user can either correct it by using traditional touch user interface or by voice. |
333 | 


--------------------------------------------------------------------------------
/src/createSpeechRecognition.test.ts:
--------------------------------------------------------------------------------
  1 | import { mocked } from 'ts-jest/utils';
  2 | import { BrowserMicrophone, ErrNoAudioConsent } from '@speechly/browser-client'
  3 | import createSpeechlySpeechRecognition from './createSpeechRecognition';
  4 | import { MicrophoneNotAllowedError, SpeechRecognitionFailedError } from './types';
  5 | import {
  6 |   mockUndefinedWindow,
  7 |   mockUndefinedNavigator,
  8 |   mockMediaDevices,
  9 |   mockUndefinedMediaDevices,
 10 |   mockAudioContext,
 11 |   mockWebkitAudioContext,
 12 |   mockUndefinedAudioContext,
 13 |   mockUndefinedWebkitAudioContext,
 14 |   expectSentenceToBeTranscribedWithFinalResult,
 15 |   expectSentenceToBeTranscribedWithInterimAndFinalResults,
 16 |   expectSentenceToBeTranscribedWithFirstInitialResult,
 17 | } from './testUtils';
 18 | import TEST_DATA from './testData';
 19 | 
 20 | const { SENTENCE_ONE, SENTENCE_TWO } = TEST_DATA;
 21 | 
 22 | let _callback: any;
 23 | const mockOnSegmentChange = jest.fn((callback) => {
 24 |   _callback = callback;
 25 | });
 26 | const mockMicrophoneInitialize = jest.fn(() => Promise.resolve());
 27 | const mockMicrophoneClose = jest.fn(() => Promise.resolve());
 28 | const mockStart = jest.fn(() => Promise.resolve());
 29 | const mockStop = jest.fn(() => Promise.resolve());
 30 | const mockAttach = jest.fn(() => Promise.resolve());
 31 | const mockDetach = jest.fn(() => Promise.resolve());
 32 | const mockMediaStream = { data: 'mockData' };
 33 | const MockBrowserMicrophone = mocked(BrowserMicrophone, true);
 34 | 
 35 | const mockBrowserMicrophone = ({ mediaStream }: { mediaStream: typeof mockMediaStream | null }) => {
 36 |   MockBrowserMicrophone.mockImplementation(function () {
 37 |     return {
 38 |       initialize: mockMicrophoneInitialize,
 39 |       close: mockMicrophoneClose,
 40 |       mediaStream,
 41 |     } as any;
 42 |   });
 43 | };
 44 | 
 45 | jest.mock('@speechly/browser-client', () => ({
 46 |   BrowserClient: function () {
 47 |     return {
 48 |       onSegmentChange: mockOnSegmentChange,
 49 |       start: mockStart,
 50 |       stop: mockStop,
 51 |       attach: mockAttach,
 52 |       detach: mockDetach,
 53 |     };
 54 |   },
 55 |   BrowserMicrophone: jest.fn(),
 56 |   ErrNoAudioConsent: jest.fn(),
 57 | }));
 58 | 
 59 | const speak = (sentence: any) => {
 60 |   sentence.forEach(_callback)
 61 | }
 62 | 
 63 | const speakAndInterrupt = (sentence: any, interrupt: any) => {
 64 |   _callback(sentence[0]);
 65 |   interrupt();
 66 |   sentence.slice(1).forEach(_callback);
 67 | }
 68 | 
 69 | describe('createSpeechlySpeechRecognition', () => {
 70 |   beforeEach(() => {
 71 |     MockBrowserMicrophone.mockClear();
 72 |     mockBrowserMicrophone({ mediaStream: mockMediaStream });
 73 |     mockMicrophoneInitialize.mockClear();
 74 |     mockMicrophoneClose.mockClear();
 75 |     mockStart.mockClear();
 76 |     mockStop.mockClear();
 77 |     mockOnSegmentChange.mockClear();
 78 |     mockAttach.mockClear();
 79 |     mockDetach.mockClear();
 80 |   });
 81 | 
 82 |   it('calls initialize on browser microphone when starting transcription', async () => {
 83 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
 84 |     const speechRecognition = new SpeechRecognition();
 85 | 
 86 |     await speechRecognition.start();
 87 | 
 88 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(1);
 89 |   })
 90 | 
 91 |   it('calls attach on Speechly client with browser microphone media stream when starting transcription', async () => {
 92 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
 93 |     const speechRecognition = new SpeechRecognition();
 94 | 
 95 |     await speechRecognition.start();
 96 | 
 97 |     expect(mockAttach).toHaveBeenCalledTimes(1);
 98 |     expect(mockAttach).toHaveBeenCalledWith(mockMediaStream);
 99 |   })
100 | 
101 |   it('calls start on Speechly client when starting transcription', async () => {
102 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
103 |     const speechRecognition = new SpeechRecognition();
104 | 
105 |     await speechRecognition.start();
106 | 
107 |     expect(mockStart).toHaveBeenCalledTimes(1);
108 |   })
109 | 
110 |   it('calls given onresult for only the final result (interimResults: false)', async () => {
111 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
112 |     const speechRecognition = new SpeechRecognition();
113 |     const mockOnResult = jest.fn();
114 |     speechRecognition.onresult = mockOnResult;
115 | 
116 |     await speechRecognition.start();
117 |     speak(SENTENCE_ONE);
118 | 
119 |     expect(mockOnResult).toHaveBeenCalledTimes(1);
120 |     expectSentenceToBeTranscribedWithFinalResult(SENTENCE_ONE, mockOnResult);
121 |   })
122 | 
123 |   it('calls given onresult for each interim or final result (interimResults: true)', async () => {
124 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
125 |     const speechRecognition = new SpeechRecognition();
126 |     const mockOnResult = jest.fn();
127 |     speechRecognition.onresult = mockOnResult;
128 |     speechRecognition.interimResults = true;
129 | 
130 |     await speechRecognition.start();
131 |     speak(SENTENCE_ONE);
132 | 
133 |     expect(mockOnResult).toHaveBeenCalledTimes(SENTENCE_ONE.length);
134 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_ONE, mockOnResult);
135 |   })
136 | 
137 |   it('transcribes two utterances when continuous is turned on (interimResults: false)', async () => {
138 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
139 |     const speechRecognition = new SpeechRecognition();
140 |     const mockOnResult = jest.fn();
141 |     speechRecognition.onresult = mockOnResult;
142 |     speechRecognition.continuous = true;
143 | 
144 |     await speechRecognition.start();
145 |     speak(SENTENCE_ONE);
146 |     speak(SENTENCE_TWO);
147 | 
148 |     expect(mockOnResult).toHaveBeenCalledTimes(2);
149 |     expectSentenceToBeTranscribedWithFinalResult(SENTENCE_ONE, mockOnResult);
150 |     expectSentenceToBeTranscribedWithFinalResult(SENTENCE_TWO, mockOnResult, 2);
151 |   })
152 | 
153 |   it('transcribes only one of two utterances when continuous is turned off (interimResults: false)', async () => {
154 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
155 |     const speechRecognition = new SpeechRecognition();
156 |     const mockOnResult = jest.fn();
157 |     speechRecognition.onresult = mockOnResult;
158 | 
159 |     await speechRecognition.start();
160 |     speak(SENTENCE_ONE);
161 |     speak(SENTENCE_TWO);
162 | 
163 |     expect(mockOnResult).toHaveBeenCalledTimes(1);
164 |     expectSentenceToBeTranscribedWithFinalResult(SENTENCE_ONE, mockOnResult);
165 |   })
166 | 
167 |   it('transcribes two utterances when continuous is turned on (interimResults: true)', async () => {
168 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
169 |     const speechRecognition = new SpeechRecognition();
170 |     const mockOnResult = jest.fn();
171 |     speechRecognition.onresult = mockOnResult;
172 |     speechRecognition.interimResults = true;
173 |     speechRecognition.continuous = true;
174 | 
175 |     await speechRecognition.start();
176 |     speak(SENTENCE_ONE);
177 |     speak(SENTENCE_TWO);
178 | 
179 |     expect(mockOnResult).toHaveBeenCalledTimes(SENTENCE_ONE.length + SENTENCE_TWO.length);
180 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_ONE, mockOnResult);
181 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_TWO, mockOnResult, SENTENCE_ONE.length + 1);
182 |   })
183 | 
184 |   it('transcribes only one of two utterances when continuous is turned off (interimResults: true)', async () => {
185 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
186 |     const speechRecognition = new SpeechRecognition();
187 |     const mockOnResult = jest.fn();
188 |     speechRecognition.onresult = mockOnResult;
189 |     speechRecognition.interimResults = true;
190 | 
191 |     await speechRecognition.start();
192 |     speak(SENTENCE_ONE);
193 |     speak(SENTENCE_TWO);
194 | 
195 |     expect(mockOnResult).toHaveBeenCalledTimes(SENTENCE_ONE.length);
196 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_ONE, mockOnResult);
197 |   })
198 | 
199 |   it('does not call initialize, stop or onend when stopping a transcription that was never started', async () => {
200 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
201 |     const speechRecognition = new SpeechRecognition();
202 |     const mockOnEnd = jest.fn();
203 |     speechRecognition.onend = mockOnEnd;
204 | 
205 |     await speechRecognition.stop();
206 | 
207 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(0);
208 |     expect(mockStop).toHaveBeenCalledTimes(0);
209 |     expect(mockDetach).toHaveBeenCalledTimes(0);
210 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(0);
211 |     expect(mockOnEnd).toHaveBeenCalledTimes(0);
212 |   })
213 | 
214 |   it('calls initialize, stop or onend when stopping a transcription that has been started', async () => {
215 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
216 |     const speechRecognition = new SpeechRecognition();
217 |     const mockOnEnd = jest.fn();
218 |     speechRecognition.onend = mockOnEnd;
219 | 
220 |     await speechRecognition.start();
221 |     await speechRecognition.stop();
222 | 
223 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(1);
224 |     expect(mockStop).toHaveBeenCalledTimes(1);
225 |     expect(mockDetach).toHaveBeenCalledTimes(1);
226 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(1);
227 |     expect(mockOnEnd).toHaveBeenCalledTimes(1);
228 |   })
229 | 
230 |   it('does not call initialize, stop or onend a second time when stopping a transcription that was already stopped', async () => {
231 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
232 |     const speechRecognition = new SpeechRecognition();
233 |     const mockOnEnd = jest.fn();
234 |     speechRecognition.onend = mockOnEnd;
235 | 
236 |     await speechRecognition.start();
237 |     await speechRecognition.stop();
238 |     await speechRecognition.stop();
239 | 
240 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(1);
241 |     expect(mockStop).toHaveBeenCalledTimes(1);
242 |     expect(mockDetach).toHaveBeenCalledTimes(1);
243 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(1);
244 |     expect(mockOnEnd).toHaveBeenCalledTimes(1);
245 |   })
246 | 
247 |   it('does not call initialize, stop or onend when aborting a transcription that was never started', async () => {
248 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
249 |     const speechRecognition = new SpeechRecognition();
250 |     const mockOnEnd = jest.fn();
251 |     speechRecognition.onend = mockOnEnd;
252 | 
253 |     await speechRecognition.abort();
254 | 
255 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(0);
256 |     expect(mockStop).toHaveBeenCalledTimes(0);
257 |     expect(mockDetach).toHaveBeenCalledTimes(0);
258 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(0);
259 |     expect(mockOnEnd).toHaveBeenCalledTimes(0);
260 |   })
261 | 
262 |   it('calls initialize, stop or onend when aborting a transcription that has been started', async () => {
263 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
264 |     const speechRecognition = new SpeechRecognition();
265 |     const mockOnEnd = jest.fn();
266 |     speechRecognition.onend = mockOnEnd;
267 | 
268 |     await speechRecognition.start();
269 |     await speechRecognition.abort();
270 | 
271 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(1);
272 |     expect(mockStop).toHaveBeenCalledTimes(1);
273 |     expect(mockDetach).toHaveBeenCalledTimes(1);
274 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(1);
275 |     expect(mockOnEnd).toHaveBeenCalledTimes(1);
276 |   })
277 | 
278 |   it('does not call initialize, stop or onend a second time when aborting a transcription that was already aborted', async () => {
279 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
280 |     const speechRecognition = new SpeechRecognition();
281 |     const mockOnEnd = jest.fn();
282 |     speechRecognition.onend = mockOnEnd;
283 | 
284 |     await speechRecognition.start();
285 |     await speechRecognition.abort();
286 |     await speechRecognition.abort();
287 | 
288 |     expect(mockMicrophoneInitialize).toHaveBeenCalledTimes(1);
289 |     expect(mockStop).toHaveBeenCalledTimes(1);
290 |     expect(mockDetach).toHaveBeenCalledTimes(1);
291 |     expect(mockMicrophoneClose).toHaveBeenCalledTimes(1);
292 |     expect(mockOnEnd).toHaveBeenCalledTimes(1);
293 |   })
294 | 
295 |   it('calling stop does not prevent ongoing utterance from being transcribed', async () => {
296 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
297 |     const speechRecognition = new SpeechRecognition();
298 |     const mockOnResult = jest.fn();
299 |     speechRecognition.onresult = mockOnResult;
300 |     const mockOnEnd = jest.fn();
301 |     speechRecognition.onend = mockOnEnd;
302 |     speechRecognition.interimResults = true;
303 | 
304 |     await speechRecognition.start();
305 |     speakAndInterrupt(SENTENCE_ONE, speechRecognition.stop);
306 | 
307 |     expect(mockOnResult).toHaveBeenCalledTimes(SENTENCE_ONE.length);
308 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_ONE, mockOnResult);
309 |   })
310 | 
311 |   it('calling abort prevents ongoing utterance from being transcribed', async () => {
312 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
313 |     const speechRecognition = new SpeechRecognition();
314 |     const mockOnResult = jest.fn();
315 |     speechRecognition.onresult = mockOnResult;
316 |     const mockOnEnd = jest.fn();
317 |     speechRecognition.onend = mockOnEnd;
318 |     speechRecognition.interimResults = true;
319 | 
320 |     await speechRecognition.start();
321 |     speakAndInterrupt(SENTENCE_ONE, speechRecognition.abort);
322 | 
323 |     expect(mockOnResult).toHaveBeenCalledTimes(1);
324 |     expectSentenceToBeTranscribedWithFirstInitialResult(SENTENCE_ONE, mockOnResult);
325 |   })
326 | 
327 |   it('calling stop prevents subsequent utterances from being transcribed', async () => {
328 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
329 |     const speechRecognition = new SpeechRecognition();
330 |     const mockOnResult = jest.fn();
331 |     speechRecognition.onresult = mockOnResult;
332 |     const mockOnEnd = jest.fn();
333 |     speechRecognition.onend = mockOnEnd;
334 |     speechRecognition.interimResults = true;
335 | 
336 |     await speechRecognition.start();
337 |     speakAndInterrupt(SENTENCE_ONE, speechRecognition.stop);
338 |     speak(SENTENCE_TWO);
339 | 
340 |     expect(mockOnResult).toHaveBeenCalledTimes(SENTENCE_ONE.length);
341 |     expectSentenceToBeTranscribedWithInterimAndFinalResults(SENTENCE_ONE, mockOnResult);
342 |   })
343 | 
344 |   it('calling abort prevents subsequent utterances from being transcribed', async () => {
345 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
346 |     const speechRecognition = new SpeechRecognition();
347 |     const mockOnResult = jest.fn();
348 |     speechRecognition.onresult = mockOnResult;
349 |     const mockOnEnd = jest.fn();
350 |     speechRecognition.onend = mockOnEnd;
351 |     speechRecognition.interimResults = true;
352 | 
353 |     await speechRecognition.start();
354 |     speakAndInterrupt(SENTENCE_ONE, speechRecognition.abort);
355 |     speak(SENTENCE_TWO);
356 | 
357 |     expect(mockOnResult).toHaveBeenCalledTimes(1);
358 |     expectSentenceToBeTranscribedWithFirstInitialResult(SENTENCE_ONE, mockOnResult);
359 |   })
360 | 
361 |   it('sets hasBrowserSupport to true when required APIs are defined (non-WebKit)', async () => {
362 |     mockAudioContext();
363 |     mockUndefinedWebkitAudioContext();
364 |     mockMediaDevices();
365 | 
366 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
367 | 
368 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(true);
369 |   })
370 | 
371 |   it('sets hasBrowserSupport to true when required APIs are defined (WebKit)', async () => {
372 |     mockUndefinedAudioContext();
373 |     mockWebkitAudioContext();
374 |     mockMediaDevices();
375 | 
376 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
377 | 
378 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(true);
379 |   })
380 | 
381 |   it('sets hasBrowserSupport to false when all AudioContext APIs are undefined', async () => {
382 |     mockUndefinedAudioContext();
383 |     mockUndefinedWebkitAudioContext();
384 |     mockMediaDevices();
385 | 
386 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
387 | 
388 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(false);
389 |   })
390 | 
391 |   it('sets hasBrowserSupport to false when MediaDevices API is undefined', async () => {
392 |     mockAudioContext();
393 |     mockUndefinedMediaDevices();
394 | 
395 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
396 | 
397 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(false);
398 |   })
399 | 
400 |   it('sets hasBrowserSupport to false when Navigator API is undefined', async () => {
401 |     mockAudioContext();
402 |     mockUndefinedNavigator();
403 | 
404 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
405 | 
406 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(false);
407 |   })
408 | 
409 |   it('sets hasBrowserSupport to false when window is undefined', async () => {
410 |     mockUndefinedWindow();
411 | 
412 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
413 | 
414 |     expect(SpeechRecognition.hasBrowserSupport).toEqual(false);
415 |   })
416 | 
417 |   it('calls onerror with MicrophoneNotAllowedError error when no microphone permission given on start', async () => {
418 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
419 |     const speechRecognition = new SpeechRecognition();
420 |     const mockOnError = jest.fn();
421 |     speechRecognition.onerror = mockOnError;
422 |     mockMicrophoneInitialize.mockImplementationOnce(() => Promise.reject(ErrNoAudioConsent))
423 | 
424 |     await speechRecognition.start();
425 | 
426 |     expect(mockOnError).toHaveBeenCalledTimes(1);
427 |     expect(mockOnError).toHaveBeenCalledWith(MicrophoneNotAllowedError);
428 |   })
429 | 
430 |   it('calls onerror with SpeechRecognitionFailedError error when speech recognition fails on start', async () => {
431 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
432 |     const speechRecognition = new SpeechRecognition();
433 |     const mockOnError = jest.fn();
434 |     speechRecognition.onerror = mockOnError;
435 |     mockMicrophoneInitialize.mockImplementationOnce(() => Promise.reject(new Error('generic failure')))
436 | 
437 |     await speechRecognition.start();
438 | 
439 |     expect(mockOnError).toHaveBeenCalledTimes(1);
440 |     expect(mockOnError).toHaveBeenCalledWith(SpeechRecognitionFailedError);
441 |   })
442 | 
443 |   it('calls onerror with SpeechRecognitionFailedError error when speech recognition fails on attach', async () => {
444 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
445 |     const speechRecognition = new SpeechRecognition();
446 |     const mockOnError = jest.fn();
447 |     speechRecognition.onerror = mockOnError;
448 |     mockAttach.mockImplementationOnce(() => Promise.reject(new Error('generic failure')))
449 | 
450 |     await speechRecognition.start();
451 | 
452 |     expect(mockOnError).toHaveBeenCalledTimes(1);
453 |     expect(mockOnError).toHaveBeenCalledWith(SpeechRecognitionFailedError);
454 |   })
455 | 
456 |   it('calls onerror with SpeechRecognitionFailedError error when browser microphone media stream is falsey', async () => {
457 |     mockBrowserMicrophone({ mediaStream: null });
458 |     const SpeechRecognition = createSpeechlySpeechRecognition('app id');
459 |     const speechRecognition = new SpeechRecognition();
460 |     const mockOnError = jest.fn();
461 |     speechRecognition.onerror = mockOnError;
462 | 
463 |     await speechRecognition.start();
464 | 
465 |     expect(mockOnError).toHaveBeenCalledTimes(1);
466 |     expect(mockOnError).toHaveBeenCalledWith(SpeechRecognitionFailedError);
467 |   })
468 | })


--------------------------------------------------------------------------------