├── .gitignore
├── ui-example.png
├── w3c.json
├── .pr-preview.json
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── auto-publish.yml
├── .travis.yml
├── package.json
├── README.md
├── Makefile
├── explainers
    ├── contextual-biasing.md
    └── on-device-speech-recognition.md
└── index.bs


/.gitignore:
--------------------------------------------------------------------------------
1 | index.html
2 | node_modules/
3 | .DS_Store
4 | .idea/
5 | 
6 | 


--------------------------------------------------------------------------------
/ui-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebAudio/web-speech-api/HEAD/ui-example.png


--------------------------------------------------------------------------------
/w3c.json:
--------------------------------------------------------------------------------
1 | {
2 |     "group": "cg/audio-comgp",
3 |     "contacts": ["svgeesus"],
4 |     "repo-type": "cg-report"
5 | }
6 | 


--------------------------------------------------------------------------------
/.pr-preview.json:
--------------------------------------------------------------------------------
1 | {
2 |     "src_file": "index.bs",
3 |     "type": "bikeshed",
4 |     "params": {
5 |         "force": 1
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Closes #???
 2 | 
 3 | The following tasks have been completed:
 4 | 
 5 |  * [ ] Updated web-platform-tests: (link to pull request)
 6 | 
 7 | Implementation commitment:
 8 | 
 9 |  * [ ] Blink: (link to issue)
10 |  * [ ] Gecko: (link to issue)
11 |  * [ ] WebKit: (link to issue)
12 |  
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | branches:
 2 |   only:
 3 |     - master
 4 | language: python
 5 | python:
 6 |   - "3.8"
 7 | install:
 8 |   - pip install bikeshed
 9 |   - bikeshed update
10 | script:
11 |   - bikeshed spec
12 | before_deploy:
13 |   - mkdir out
14 |   - mv *.html *.png out/
15 | deploy:
16 |   local-dir: out
17 |   provider: pages
18 |   skip-cleanup: true
19 |   github-token: $GITHUB_TOKEN
20 |   keep-history: true
21 |   on:
22 |     branch: master
23 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Auto-publish
 2 | on:
 3 |   pull_request: {}
 4 |   push:
 5 |     paths:
 6 |       - index.bs
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   main:
11 |     name: Build, Validate and Deploy
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: w3c/spec-prod@v2
16 |         with:
17 |           GH_PAGES_BRANCH: gh-pages
18 |           BUILD_FAIL_ON: link-error
19 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "web-speech-api",
 3 |   "version": "1.0.0",
 4 |   "description": "This is the source for the [Web Speech API](https://webaudio.github.io/web-speech-api/) spec.",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/WebAudio/web-speech-api.git"
12 |   },
13 |   "bugs": {
14 |     "url": "https://github.com/WebAudio/web-speech-api/issues"
15 |   },
16 |   "homepage": "https://github.com/WebAudio/web-speech-api#readme",
17 |   "devDependencies": {
18 |     "vnu-jar": "^24.10.17"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Web Speech API
 2 | 
 3 | This is the source for the [Web Speech API](https://webaudio.github.io/web-speech-api/) spec.
 4 | 
 5 | ## Tests
 6 | 
 7 | For normative changes, a corresponding
 8 | [web-platform-tests](https://github.com/w3c/web-platform-tests) PR is highly appreciated. Typically,
 9 | both PRs will be merged at the same time. Note that a test change that contradicts the spec should
10 | not be merged before the corresponding spec change. If testing is not practical, please explain why
11 | and if appropriate [file a web-platform-tests issue](https://github.com/w3c/web-platform-tests/issues/new)
12 | to follow up later. Add the `type:untestable` or `type:missing-coverage` label as appropriate.
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /bin/bash
 2 | 
 3 | DST := $(patsubst %.bs,%.html,$(wildcard *.bs))
 4 | REMOTE := $(filter remote,$(MAKECMDGOALS))
 5 | 
 6 | all: $(DST)
 7 | 	@ echo "All done"
 8 | 
 9 | %.html : %.bs node_modules/vnu-jar/build/dist/vnu.jar
10 | ifndef REMOTE
11 | 	@ echo "Building $@"
12 | 	bikeshed --die-on=warning spec $< $@
13 | 	java -jar node_modules/vnu-jar/build/dist/vnu.jar --also-check-css $@
14 | else
15 | 	@ echo "Building $@ remotely"
16 | 	@ (HTTP_STATUS=$$(curl https://api.csswg.org/bikeshed/ \
17 | 	                       --output $@ \
18 | 	                       --write-out "%{http_code}" \
19 | 	                       --header "Accept: text/plain, text/html" \
20 | 	                       -F die-on=warning \
21 | 	                       -F file=@$<) && \
22 | 	[[ "$$HTTP_STATUS" -eq "200" ]]) || ( \
23 | 		echo ""; cat $@; echo ""; \
24 | 		rm -f index.html; \
25 | 		exit 22 \
26 | 	);
27 | endif
28 | 
29 | node_modules/vnu-jar/build/dist/vnu.jar:
30 | 	npm install vnu-jar
31 | 
32 | remote: all
33 | 
34 | 


--------------------------------------------------------------------------------
/explainers/contextual-biasing.md:
--------------------------------------------------------------------------------
 1 | # Explainer: Contextual Biasing for the Web Speech API
 2 | 
 3 | ## Introduction
 4 | 
 5 | The Web Speech API provides powerful speech recognition capabilities to web applications. However, general-purpose speech recognition models can sometimes struggle with domain-specific terminology, proper nouns, or other words that are unlikely to appear in general conversation. This can lead to a frustrating user experience where the user's intent is frequently misrecognized.
 6 | 
 7 | To address this, we introduce **contextual biasing** to the Web Speech API. This feature allows developers to provide "hints" to the speech recognition engine in the form of a list of phrases and boost values. By biasing the recognizer towards these phrases, applications can significantly improve the accuracy for vocabulary that is important in their specific context.
 8 | 
 9 | ## Why Use Contextual Biasing?
10 | 
11 | ### 1. **Improved Accuracy**
12 | By providing a list of likely phrases, developers can dramatically increase the probability of those phrases being recognized correctly. This is especially useful for words that are acoustically similar to more common words.
13 | 
14 | ### 2. **Enhanced User Experience**
15 | When speech recognition "just works" for the user's context, it leads to a smoother, faster, and less frustrating interaction. Users don't have to repeat themselves or manually correct transcription errors.
16 | 
17 | ### 3. **Enabling Specialized Applications**
18 | Contextual biasing makes the Web Speech API a more viable option for specialized applications in fields like medicine, law, science, or gaming, where precise and often uncommon terminology is essential.
19 | 
20 | ## Example Use Cases
21 | 
22 | ### 1. Voice-controlled Video Game
23 | A video game might have characters with unique names like "Zoltan," "Xylia," or "Grog." Without contextual biasing, a command like "Attack Zoltan" might be misheard as "Attack Sultan." By providing a list of character and location names, the game can ensure commands are understood reliably.
24 | 
25 | ### 2. E-commerce Product Search
26 | An online store can bias the speech recognizer towards its product catalog. When a user says "Show me Fujifilm cameras," the recognizer is more likely to correctly identify "Fujifilm" instead of a more common but similar-sounding word.
27 | 
28 | ### 3. Medical Dictation
29 | A web-based application for doctors could be biased towards recognizing complex medical terms, drug names, and procedures. This allows for accurate and efficient voice-based note-taking.
30 | 
31 | ## New API Components
32 | 
33 | Contextual biasing is implemented through a new `phrases` attribute on the `SpeechRecognition` interface, which uses the new `SpeechRecognitionPhrase` interface.
34 | 
35 | ### 1. `SpeechRecognition.phrases` attribute
36 | This attribute is an `ObservableArray<SpeechRecognitionPhrase>` that allows developers to provide contextual hints for the recognition session. It can be modified like a JavaScript `Array`.
37 | 
38 | ### 2. `SpeechRecognitionPhrase` interface
39 | Represents a single phrase and its associated boost value.
40 | 
41 | - `constructor(DOMString phrase, optional float boost = 1.0)`: Creates a new phrase object.
42 | - `phrase`: The text string to be boosted.
43 | - `boost`: A float between 0.0 and 10.0. Higher values make the phrase more likely to be recognized.
44 | 
45 | 
46 | ### Example Usage
47 | 
48 | ```javascript
49 | // A list of phrases relevant to our application's context.
50 | const phraseData = [
51 |   { phrase: 'Zoltan', boost: 3.0 },
52 |   { phrase: 'Grog', boost: 2.0 },
53 | ];
54 | 
55 | // Create SpeechRecognitionPhrase objects.
56 | const phraseObjects = phraseData.map(p => new SpeechRecognitionPhrase(p.phrase, p.boost));
57 | 
58 | const recognition = new SpeechRecognition();
59 | 
60 | // Assign the phrase objects to the recognition instance.
61 | // The attribute is an ObservableArray, so we can assign an array to it.
62 | recognition.phrases = phraseObjects;
63 | 
64 | // We can also dynamically add/remove phrases.
65 | recognition.phrases.push(new SpeechRecognitionPhrase('Xylia', 2.5));
66 | 
67 | // Some user agents (e.g. Chrome) might only support on-device contextual biasing.
68 | recognition.processLocally = true;
69 | 
70 | recognition.onresult = (event) => {
71 |   const transcript = event.results[0][0].transcript;
72 |   console.log(`Result: ${transcript}`);
73 | };
74 | 
75 | recognition.onerror = (event) => {
76 |   if (event.error === 'phrases-not-supported') {
77 |     console.warn('Contextual biasing is not supported by this browser/service.');
78 |   }
79 | };
80 | 
81 | // Start recognition when the user clicks a button.
82 | document.getElementById('speak-button').onclick = () => {
83 |   recognition.start();
84 | };
85 | ```
86 | 
87 | ## Conclusion
88 | 
89 | Contextual biasing is a powerful enhancement to the Web Speech API that gives developers finer control over the speech recognition process. By allowing applications to provide context-specific hints, this feature improves accuracy, creates a better user experience, and makes voice-enabled web applications more practical for a wider range of specialized use cases.


--------------------------------------------------------------------------------
/explainers/on-device-speech-recognition.md:
--------------------------------------------------------------------------------
  1 | # Explainer: On-Device Speech Recognition for the Web Speech API
  2 | 
  3 | ## Introduction
  4 | 
  5 | The Web Speech API is a powerful browser feature that enables applications to perform speech recognition. Traditionally, this functionality relies on sending audio data to cloud-based services for recognition. While this approach is effective, it has certain drawbacks:
  6 | 
  7 | - **Privacy concerns:** Raw and transcribed audio is transmitted over the network.
  8 | - **Latency issues:** Users may experience delays due to network communication.
  9 | - **Offline limitations:** Speech recognition does not work without an internet connection.
 10 | 
 11 | To address these issues, we introduce **on-device speech recognition capabilities** as part of the Web Speech API. This enhancement allows speech recognition to run locally on user devices, providing a faster, more private, and offline-compatible experience.
 12 | 
 13 | ## Why Use On-Device Speech Recognition?
 14 |  
 15 | ### 1. **Privacy**
 16 | On-device processing ensures that neither raw audio nor transcriptions leave the user's device, enhancing data security and user trust.
 17 | 
 18 | ### 2. **Performance**
 19 | Local processing reduces latency, providing a smoother and faster user experience.
 20 | 
 21 | ### 3. **Offline Functionality**
 22 | Applications can offer speech recognition capabilities even without an active internet connection, increasing their utility in remote or low-connectivity environments.
 23 | ## New API Members
 24 | 
 25 | This enhancement introduces new members to the Web Speech API to support on-device recognition: a dictionary for configuration, an instance attribute, and static methods for managing capabilities.
 26 | 
 27 | ### `SpeechRecognitionOptions` Dictionary
 28 | 
 29 | This dictionary is used to configure speech recognition preferences, both for individual sessions and for querying or installing capabilities.
 30 | 
 31 | It includes the following members:
 32 | 
 33 | - `langs`: A required sequence of `DOMString` representing BCP-47 language tags (e.g., `['en-US']`).
 34 | - `processLocally`: A boolean that, if `true`, instructs the recognition to be performed on-device. If `false` (the default), any available recognition method (cloud-based or on-device) may be used.
 35 | 
 36 | 
 37 | ```idl
 38 | dictionary SpeechRecognitionOptions {
 39 |   required sequence<DOMString> langs; // BCP-47 language tags
 40 |   boolean processLocally = false;  // Instructs the recognition to be performed on-device. If `false` (default), any available recognition method may be used.
 41 | };
 42 | ```
 43 | 
 44 | #### Example Usage
 45 | ```javascript
 46 | const recognition = new SpeechRecognition();
 47 | recognition.options = {
 48 |   langs: ['en-US'],
 49 |   processLocally: true
 50 | };
 51 | recognition.start();
 52 | ```
 53 | 
 54 | ## Example use cases
 55 | ### 1. Company with data residency requirements
 56 | Websites with strict data residency requirements (i.e., regulatory, legal, or company policy) can ensure that audio data remains on the user's device and is not sent over the network for processing. This is particularly crucial for compliance with regulations like GDPR, which considers voice as personally identifiable information (PII) as voice recordings can reveal information about an individual's gender, ethnic origin, or even potential health conditions. On-device processing significantly enhances user privacy by minimizing the exposure of sensitive voice data.
 57 | 
 58 | ### 2. Video conferencing service with strict performance requirements (e.g. meet.google.com)
 59 | Some websites would only adopt the Web Speech API if it meets strict performance requirements. On-device speech recognition may provide better accuracy and latency as well as provide additional features (e.g. contextual biasing) that may not be available by the cloud-based service used by the user agent. In the event on-device speech recognition is not available, these websites may elect to use an alternative cloud-based speech recognition provider that meet these requirements instead of the default one provided by the user agent.
 60 | 
 61 | ### 3. Educational website (e.g. khanacademy.org)
 62 | Applications that need to function in unreliable or offline network conditions—such as voice-based productivity tools, educational software, or accessibility features—benefit from on-device speech recognition. This enables uninterrupted functionality during flights, remote travel, or in areas with limited connectivity. When on-device recognition is unavailable, a website can choose to hide the UI or gracefully degrade functionality to maintain a coherent user experience.
 63 | 
 64 | ## New API Components
 65 | 
 66 | ### 1. `static Promise<AvailabilityStatus> SpeechRecognition.available(SpeechRecognitionOptions options)`
 67 | This static method checks the availability of speech recognition capabilities matching the provided `SpeechRecognitionOptions`.
 68 | 
 69 | The method returns a `Promise` that resolves to an `AvailabilityStatus` enum string:
 70 | - `"available"`: Ready to use according to the specified options.
 71 | - `"downloadable"`: Not currently available, but resources (e.g., language packs for on-device) can be downloaded.
 72 | - `"downloading"`: Resources are currently being downloaded.
 73 | - `"unavailable"`: Not available and not downloadable.
 74 | 
 75 | #### Example Usage
 76 | ```javascript
 77 | // Check availability for on-device English (US)
 78 | const options = { langs: ['en-US'], processLocally: true };
 79 | 
 80 | SpeechRecognition.available(options).then((status) => {
 81 |     console.log(`Speech recognition status for ${options.langs.join(', ')} (on-device): ${status}.`);
 82 |     if (status === 'available') {
 83 |         console.log('Ready to use on-device speech recognition.');
 84 |     } else if (status === 'downloadable') {
 85 |         console.log('Resources are downloadable. Call install() if needed.');
 86 |     } else if (status === 'downloading') {
 87 |         console.log('Resources are currently downloading.');
 88 |     } else {
 89 |         console.log('Not available for on-device speech recognition.');
 90 |     }
 91 | });
 92 | ```
 93 | 
 94 | ### 2. `Promise<boolean> install(SpeechRecognitionOptions options)`
 95 | This method installs the resources required for speech recognition matching the provided `SpeechRecognitionOptions`. The installation process may download and configure necessary language models.
 96 | 
 97 | #### Example Usage
 98 | ```javascript
 99 | // Install on-device resources for English (US)
100 | const options = { langs: ['en-US'], processLocally: true };
101 | SpeechRecognition.install(options).then((success) => {
102 |     if (success) {
103 |         console.log(`On-device speech recognition resources for ${options.langs.join(', ')} installed successfully.`);
104 |     } else {
105 |         console.error(`Unable to install on-device speech recognition resources for ${options.langs.join(', ')}. This could be due to unsupported languages or download issues.`);
106 |     }
107 | });
108 | ```
109 | 
110 | ## Supported languages
111 | The availability of on-device speech recognition languages is user-agent dependent. As an example, Google Chrome supports the following languages for on-device recognition:
112 | * de-DE (German, Germany)
113 | * en-US (English, United States)
114 | * es-ES (Spanish, Spain)
115 | * fr-FR (French, France)
116 | * hi-IN (Hindi, India)
117 | * id-ID (Indonesian, Indonesia)
118 | * it-IT (Italian, Italy)
119 | * ja-JP (Japanese, Japan)
120 | * ko-KR (Korean, South Korea)
121 | * pl-PL (Polish, Poland)
122 | * pt-BR (Portuguese, Brazil)
123 | * ru-RU (Russian, Russia)
124 | * th-TH (Thai, Thailand)
125 | * tr-TR (Turkish, Turkey)
126 | * vi-VN (Vietnamese, Vietnam)
127 | * zh-CN (Chinese, Mandarin, Simplified)
128 | * zh-TW (Chinese, Mandarin, Traditional)
129 | 
130 | ## Privacy considerations
131 | To reduce the risk of fingerprinting, user agents must implement privacy-preserving countermeasures. The Web Speech API will employ the same masking techniques used by the [Web Translation API](https://github.com/webmachinelearning/writing-assistance-apis/pull/47).
132 | 
133 | ## Conclusion
134 | The addition of on-device speech recognition capabilities to the Web Speech API marks a significant step forward in creating more private, performant, and accessible web applications. By leveraging these new methods, developers can enhance user experiences while addressing key concerns around privacy and connectivity.


--------------------------------------------------------------------------------
/index.bs:
--------------------------------------------------------------------------------
   1 | <pre class='metadata'>
   2 | Title: Web Speech API
   3 | Level:
   4 | Status: CG-DRAFT
   5 | Group: audiocg
   6 | TR:
   7 | URL: https://webaudio.github.io/web-speech-api/
   8 | Repository: WebAudio/web-speech-api
   9 | Shortname: speech-api
  10 | Editor: Evan Liu, Google
  11 | Former Editor: André Natal, Mozilla
  12 | Former Editor: Glen Shires, Google
  13 | Former Editor: Philip Jägenstedt, Google
  14 | Former Editor: Hans Wennborg, Google
  15 | !Tests: <a href=https://github.com/web-platform-tests/wpt/tree/master/speech-api>web-platform-tests speech-api/</a> (<a href=https://github.com/web-platform-tests/wpt/labels/speech-api>ongoing work</a>)
  16 | Abstract: This specification defines a JavaScript API to enable web developers to incorporate speech recognition and synthesis into their web pages.
  17 | Abstract: It enables developers to use scripting to generate text-to-speech output and to use speech recognition as an input for forms, continuous dictation and control.
  18 | Abstract: The JavaScript API allows web pages to control activation and timing and to handle results and alternatives.
  19 | Markup Shorthands:css no, markdown yes, dfn yes
  20 | Complain about:accidental-2119 yes, missing-example-ids yes
  21 | </pre>
  22 | 
  23 | <pre class=biblio>
  24 | {
  25 |   "HTMLSPEECH": {
  26 |     "authors": [
  27 |       "Michael Bodell",
  28 |       "Björn Bringert",
  29 |       "Robert Brown",
  30 |       "Daniel C. Burnett",
  31 |       "Deborah Dahl",
  32 |       "Dan Druta",
  33 |       "Patrick Ehlen",
  34 |       "Charles Hemphill",
  35 |       "Michael Johnston",
  36 |       "Olli Pettay",
  37 |       "Satish Sampath",
  38 |       "Marc Schröder",
  39 |       "Glen Shires",
  40 |       "Raj Tumuluri",
  41 |       "Milan Young"
  42 |     ],
  43 |     "href": "https://www.w3.org/2005/Incubator/htmlspeech/XGR-htmlspeech-20111206/",
  44 |     "title": "HTML Speech Incubator Group Final Report"
  45 |   }
  46 | }
  47 | </pre>
  48 | 
  49 | <h2 id=introduction>Introduction</h2>
  50 | 
  51 | <p><em>This section is non-normative.</em></p>
  52 | 
  53 | <p>The Web Speech API aims to enable web developers to provide, in a web browser, speech-input and text-to-speech output features that are typically not available when using standard speech-recognition or screen-reader software.
  54 | The API itself is agnostic of the underlying speech recognition and synthesis implementation and can support both server-based and client-based/embedded recognition and synthesis.
  55 | The API is designed to enable both brief (one-shot) speech input and continuous speech input.
  56 | Speech recognition results are provided to the web page as a list of hypotheses, along with other relevant information for each hypothesis.</p>
  57 | 
  58 | <p>This specification is a subset of the API defined in the [[HTMLSPEECH|HTML Speech Incubator Group Final Report]].
  59 | That report is entirely informative since it is not a standards track document.
  60 | All portions of that report may be considered informative with regards to this document, and provide an informative background to this document.
  61 | This specification is a fully-functional subset of that report.
  62 | Specifically, this subset excludes the underlying transport protocol, the proposed additions to HTML markup, and it defines a simplified subset of the JavaScript API.
  63 | This subset supports the majority of use-cases and sample code in the Incubator Group Final Report.
  64 | This subset does not preclude future standardization of additions to the markup, API or underlying transport protocols, and indeed the Incubator Report defines a potential roadmap for such future work.</p>
  65 | 
  66 | 
  67 | <h2 id=use_cases>Use Cases</h2>
  68 | 
  69 | <p><em>This section is non-normative.</em></p>
  70 | 
  71 | <p>This specification supports the following use cases, as defined in [[HTMLSPEECH#use-cases|Section 4 of the Incubator Report]].</p>
  72 | 
  73 | <ul>
  74 |   <li>Voice Web Search</li>
  75 |   <li>Speech Command Interface</li>
  76 |   <li>Continuous Recognition of Open Dialog</li>
  77 |   <li>Speech UI present when no visible UI need be present</li>
  78 |   <li>Voice Activity Detection</li>
  79 |   <li>Temporal Structure of Synthesis to Provide Visual Feedback</li>
  80 |   <li>Hello World</li>
  81 |   <li>Speech Translation</li>
  82 |   <li>Speech Enabled Email Client</li>
  83 |   <li>Dialog Systems</li>
  84 |   <li>Multimodal Interaction</li>
  85 |   <li>Speech Driving Directions</li>
  86 |   <li>Multimodal Video Game</li>
  87 |   <li>Multimodal Search</li>
  88 | </ul>
  89 | 
  90 | <p>To keep the API to a minimum, this specification does not directly support the following use case.
  91 | This does not preclude adding support for this as a future API enhancement, and indeed the Incubator report provides a roadmap for doing so.</p>
  92 | 
  93 | <ul>
  94 |   <li>Rerecognition</li>
  95 | </ul>
  96 | 
  97 | <h2 id=security>Security and privacy considerations</h2>
  98 | 
  99 | <ol>
 100 |   <li>User agents must only start speech input sessions with explicit, informed user consent.
 101 |   User consent can include, for example:
 102 |   <ul>
 103 |     <li>User click on a visible speech input element which has an obvious graphical representation showing that it will start speech input.</li>
 104 |     <li>Accepting a permission prompt shown as the result of a call to {{SpeechRecognition/start()}}.</li>
 105 |     <li>Consent previously granted to always allow speech input for this web page.</li>
 106 |   </ul>
 107 |   </li>
 108 | 
 109 |   <li>User agents must give the user an obvious indication when audio is being recorded.
 110 |   <ul>
 111 |     <li>In a graphical user agent, this could be a mandatory notification displayed by the user agent as part of its chrome and not accessible by the web page.
 112 |     This could for example be a pulsating/blinking record icon as part of the browser chrome/address bar, an indication in the status bar, an audible notification, or anything else relevant and accessible to the user.
 113 |     This UI element must also allow the user to stop recording.<br>
 114 |     <img src="ui-example.png" alt="Example UI recording notification."></li>
 115 | 
 116 |     <li>In a speech-only user agent, the indication may for example take the form of the system speaking the label of the speech input element, followed by a short beep.</li>
 117 |   </ul>
 118 |   </li>
 119 | 
 120 |   <li>The user agent may also give the user a longer explanation the first time speech input is used, to let the user know what it is and how they can tune their privacy settings to disable speech recording if required.</li>
 121 | 
 122 |   <li>To mitigate the risk of fingerprinting, user agents MUST NOT personalize speech recognition when performing speech recognition on a {{MediaStreamTrack}}.</li>
 123 | </ol>
 124 | 
 125 | <h3 id="implementation-considerations">Implementation considerations</h3>
 126 | 
 127 | <p><em>This section is non-normative.</em></p>
 128 | 
 129 | <ol>
 130 |   <li>Spoken password inputs can be problematic from a security perspective, but it is up to the user to decide if they want to speak their password.</li>
 131 | 
 132 |   <li>Speech input could potentially be used to eavesdrop on users.
 133 |   Malicious webpages could use tricks such as hiding the input element or otherwise making the user believe that it has stopped recording speech while continuing to do so.
 134 |   They could also potentially style the input element to appear as something else and trick the user into clicking them.
 135 |   An example of styling the file input element can be seen at <a href="https://www.quirksmode.org/dom/inputfile.html">https://www.quirksmode.org/dom/inputfile.html</a>.
 136 |   The above recommendations are intended to reduce this risk of such attacks.</li>
 137 | </ol>
 138 | 
 139 | <h2 id="api_description">API Description</h2>
 140 | 
 141 | <p><em>This section is normative.</em></p>
 142 | 
 143 | <h3 id="speechreco-section">The SpeechRecognition Interface</h3>
 144 | 
 145 | <p>The speech recognition interface is the scripted web API for controlling a given recognition.</p>
 146 | The term "final result" indicates a {{SpeechRecognitionResult}} in which the {{SpeechRecognitionResult/isFinal}} attribute is true.
 147 | The term "interim result" indicates a {{SpeechRecognitionResult}} in which the {{SpeechRecognitionResult/isFinal}} attribute is false.
 148 | 
 149 | {{SpeechRecognition}} has the following internal slots:
 150 | 
 151 | <dl dfn-type=attribute dfn-for="SpeechRecognition">
 152 |     : <dfn>[[started]]</dfn>
 153 |     ::
 154 |         A boolean flag representing whether the speech recognition started. The initial value is <code>false</code>.
 155 | </dl>
 156 | 
 157 | <dl dfn-type=attribute dfn-for="SpeechRecognition">
 158 |     : <dfn>[[processLocally]]</dfn>
 159 |     ::
 160 |         A boolean flag indicating whether recognition <em class="rfc2119" title="MUST">MUST</em> be performed locally. The initial value is <code>false</code>.
 161 | </dl>
 162 | 
 163 | <dl dfn-type=attribute dfn-for="SpeechRecognition">
 164 |     : <dfn>[[phrases]]</dfn>
 165 |     ::
 166 |         An {{ObservableArray}} of {{SpeechRecognitionPhrase}} objects representing a list of phrases for contextual biasing. The initial value is a new empty {{ObservableArray}}.
 167 | </dl>
 168 | 
 169 | <xmp class="idl">
 170 | [SecureContext, Exposed=Window]
 171 | interface SpeechRecognition : EventTarget {
 172 |     constructor();
 173 | 
 174 |     // recognition parameters
 175 |     attribute SpeechGrammarList grammars;
 176 |     attribute DOMString lang;
 177 |     attribute boolean continuous;
 178 |     attribute boolean interimResults;
 179 |     attribute unsigned long maxAlternatives;
 180 |     attribute boolean processLocally;
 181 |     attribute ObservableArray<SpeechRecognitionPhrase> phrases;
 182 | 
 183 |     // methods to drive the speech interaction
 184 |     undefined start();
 185 |     undefined start(MediaStreamTrack audioTrack);
 186 |     undefined stop();
 187 |     undefined abort();
 188 |     static Promise<AvailabilityStatus> available(SpeechRecognitionOptions options);
 189 |     static Promise<boolean> install(SpeechRecognitionOptions options);
 190 | 
 191 |     // event methods
 192 |     attribute EventHandler onaudiostart;
 193 |     attribute EventHandler onsoundstart;
 194 |     attribute EventHandler onspeechstart;
 195 |     attribute EventHandler onspeechend;
 196 |     attribute EventHandler onsoundend;
 197 |     attribute EventHandler onaudioend;
 198 |     attribute EventHandler onresult;
 199 |     attribute EventHandler onnomatch;
 200 |     attribute EventHandler onerror;
 201 |     attribute EventHandler onstart;
 202 |     attribute EventHandler onend;
 203 | };
 204 | 
 205 | dictionary SpeechRecognitionOptions {
 206 |   required sequence<DOMString> langs;
 207 |   boolean processLocally = false;
 208 | };
 209 | 
 210 | enum SpeechRecognitionErrorCode {
 211 |     "no-speech",
 212 |     "aborted",
 213 |     "audio-capture",
 214 |     "network",
 215 |     "not-allowed",
 216 |     "service-not-allowed",
 217 |     "language-not-supported",
 218 |     "phrases-not-supported"
 219 | };
 220 | 
 221 | enum AvailabilityStatus {
 222 |     "unavailable",
 223 |     "downloadable",
 224 |     "downloading",
 225 |     "available"
 226 | };
 227 | 
 228 | [SecureContext, Exposed=Window]
 229 | interface SpeechRecognitionErrorEvent : Event {
 230 |     constructor(DOMString type, SpeechRecognitionErrorEventInit eventInitDict);
 231 |     readonly attribute SpeechRecognitionErrorCode error;
 232 |     readonly attribute DOMString message;
 233 | };
 234 | 
 235 | dictionary SpeechRecognitionErrorEventInit : EventInit {
 236 |     required SpeechRecognitionErrorCode error;
 237 |     DOMString message = "";
 238 | };
 239 | 
 240 | // Item in N-best list
 241 | [SecureContext, Exposed=Window]
 242 | interface SpeechRecognitionAlternative {
 243 |     readonly attribute DOMString transcript;
 244 |     readonly attribute float confidence;
 245 | };
 246 | 
 247 | // A complete one-shot simple response
 248 | [SecureContext, Exposed=Window]
 249 | interface SpeechRecognitionResult {
 250 |     readonly attribute unsigned long length;
 251 |     getter SpeechRecognitionAlternative item(unsigned long index);
 252 |     readonly attribute boolean isFinal;
 253 | };
 254 | 
 255 | // A collection of responses (used in continuous mode)
 256 | [SecureContext, Exposed=Window]
 257 | interface SpeechRecognitionResultList {
 258 |     readonly attribute unsigned long length;
 259 |     getter SpeechRecognitionResult item(unsigned long index);
 260 | };
 261 | 
 262 | // A full response, which could be interim or final, part of a continuous response or not
 263 | [SecureContext, Exposed=Window]
 264 | interface SpeechRecognitionEvent : Event {
 265 |     constructor(DOMString type, SpeechRecognitionEventInit eventInitDict);
 266 |     readonly attribute unsigned long resultIndex;
 267 |     readonly attribute SpeechRecognitionResultList results;
 268 | };
 269 | 
 270 | dictionary SpeechRecognitionEventInit : EventInit {
 271 |     unsigned long resultIndex = 0;
 272 |     required SpeechRecognitionResultList results;
 273 | };
 274 | 
 275 | // The object representing a speech grammar. This interface has been deprecated and exists in this spec for the sole purpose of maintaining backwards compatibility.
 276 | [Exposed=Window]
 277 | interface SpeechGrammar {
 278 |     attribute DOMString src;
 279 |     attribute float weight;
 280 | };
 281 | 
 282 | // The object representing a speech grammar collection. This interface has been deprecated and exists in this spec for the sole purpose of maintaining backwards compatibility.
 283 | [Exposed=Window]
 284 | interface SpeechGrammarList {
 285 |     constructor();
 286 |     readonly attribute unsigned long length;
 287 |     getter SpeechGrammar item(unsigned long index);
 288 |     undefined addFromURI(DOMString src,
 289 |                     optional float weight = 1.0);
 290 |     undefined addFromString(DOMString string,
 291 |                     optional float weight = 1.0);
 292 | };
 293 | 
 294 | // The object representing a phrase for contextual biasing.
 295 | [SecureContext, Exposed=Window]
 296 | interface SpeechRecognitionPhrase {
 297 |     constructor(DOMString phrase, optional float boost = 1.0);
 298 |     readonly attribute DOMString phrase;
 299 |     readonly attribute float boost;
 300 | };
 301 | </xmp>
 302 | 
 303 | <h4 id="speechreco-attributes">SpeechRecognition Attributes</h4>
 304 | 
 305 | <dl>
 306 |   <dt><dfn attribute for=SpeechRecognition>grammars</dfn> attribute</dt>
 307 |   <dd>The grammars attribute stores the collection of SpeechGrammar objects which represent the grammars that are active for this recognition. 
 308 |   This attribute does nothing and exists in this spec for the sole purpose of maintaining backwards compatibility.</dd>
 309 | 
 310 |   <dt><dfn attribute for=SpeechRecognition>lang</dfn> attribute</dt>
 311 |   <dd>This attribute will set the language of the recognition for the request, using a valid BCP 47 language tag. [[!BCP47]]
 312 |   If unset it remains unset for getting in script, but will default to use the language of the html document root element and associated hierarchy.
 313 |   This default value is computed and used when the input request opens a connection to the recognition service.</dd>
 314 | 
 315 |   <dt><dfn attribute for=SpeechRecognition>continuous</dfn> attribute</dt>
 316 |   <dd>When the continuous attribute is set to false, the user agent must return no more than one final result in response to starting recognition,
 317 |   for example a single turn pattern of interaction.
 318 |   When the continuous attribute is set to true, the user agent must return zero or more final results representing multiple consecutive recognitions in response to starting recognition,
 319 |   for example a dictation.
 320 |   The default value must be false.  Note, this attribute setting does not affect interim results.</dd>
 321 | 
 322 |   <dt><dfn attribute for=SpeechRecognition>interimResults</dfn> attribute</dt>
 323 |   <dd>Controls whether interim results are returned.
 324 |   When set to true, interim results should be returned.
 325 |   When set to false, interim results must not be returned.
 326 |   The default value must be false. Note, this attribute setting does not affect final results.</dd>
 327 | 
 328 |   <dt><dfn attribute for=SpeechRecognition>maxAlternatives</dfn> attribute</dt>
 329 |   <dd>This attribute will set the maximum number of {{SpeechRecognitionAlternative}}s per result.
 330 |   The default value is 1.</dd>
 331 | 
 332 |   <dt><dfn attribute for=SpeechRecognition>processLocally</dfn> attribute</dt>
 333 |   <dd>This attribute, when set to true, indicates a requirement that the speech recognition process <em class="rfc2119" title="MUST">MUST</em> be performed locally on the user's device.
 334 |   If set to false, the user agent can choose between local and remote processing.
 335 |   The default value is false.
 336 |   </dd>
 337 | 
 338 |   <dt><dfn attribute for=SpeechRecognition>phrases</dfn> attribute</dt>
 339 |   <dd>
 340 |     The `phrases` attribute provides a list of {{SpeechRecognitionPhrase}} objects to be used for contextual biasing. This is an {{ObservableArray}}, which can be modified like a JavaScript `Array` (e.g., using `push()`).
 341 |   </dd>
 342 |   <dd>
 343 |     The getter steps are to return the value of {{SpeechRecognition/[[phrases]]}}.
 344 |   </dd>
 345 | </dl>
 346 | 
 347 | <p class=issue>The group has discussed whether WebRTC might be used to specify selection of audio sources and remote recognizers.
 348 | See <a href="https://lists.w3.org/Archives/Public/public-speech-api/2012Sep/0072.html">Interacting with WebRTC, the Web Audio API and other external sources</a> thread on public-speech-api@w3.org.</p>
 349 | 
 350 | <h4 id="speechreco-methods">SpeechRecognition Methods</h4>
 351 | 
 352 | <dl>
 353 |   <dt><dfn method for=SpeechRecognition>start()</dfn> method</dt>
 354 |   <dd>
 355 |     Start the speech recognition process, directly from a microphone on the device.
 356 |     When invoked, run the following steps:
 357 | 
 358 |     1. Let |requestMicrophonePermission| be a boolan variable set to to `true`.
 359 |     1. Run the [=start session algorithm=] with |requestMicrophonePermission|.
 360 |   </dd>
 361 | 
 362 |   <dt><dfn method for=SpeechRecognition>start({{MediaStreamTrack}} audioTrack)</dfn> method</dt>
 363 |   <dd>
 364 |     Start the speech recognition process, using a {{MediaStreamTrack}}
 365 |     When invoked, run the following steps:
 366 | 
 367 |     1. Let |audioTrack| be the first argument.
 368 |     1. If |audioTrack|'s {{MediaStreamTrack/kind}} attribute is NOT `"audio"`,
 369 |         throw an {{InvalidStateError}} and abort these steps.
 370 |     1. If |audioTrack|'s {{MediaStreamTrack/readyState}} attribute is NOT
 371 |         `"live"`, throw an {{InvalidStateError}} and abort these steps.
 372 |     1. Let |requestMicrophonePermission| be `false`.
 373 |     1. Run the [=start session algorithm=] with |requestMicrophonePermission|.
 374 |   </dd>
 375 | 
 376 |   <dt><dfn method for=SpeechRecognition>stop()</dfn> method</dt>
 377 |   <dd>The stop method represents an instruction to the recognition service to stop listening to more audio, and to try and return a result using just the audio that it has already received for this recognition.
 378 |   A typical use of the stop method might be for a web application where the end user is doing the end pointing, similar to a walkie-talkie.
 379 |   The end user might press and hold the space bar to talk to the system and on the space down press the start call would have occurred and when the space bar is released the stop method is called to ensure that the system is no longer listening to the user.
 380 |   Once the stop method is called the speech service must not collect additional audio and must not continue to listen to the user.
 381 |   The speech service must attempt to return a recognition result (or a nomatch) based on the audio that it has already collected for this recognition.
 382 |   If the stop method is called on an object which is already stopped or being stopped (that is, start was never called on it, the <a event for=SpeechRecognition>end</a> or <a event for=SpeechRecognition>error</a> event has fired on it, or stop was previously called on it), the user agent must ignore the call.</dd>
 383 | 
 384 |   <dt><dfn method for=SpeechRecognition>abort()</dfn> method</dt>
 385 |   <dd>The abort method is a request to immediately stop listening and stop recognizing and do not return any information but that the system is done.
 386 |   When the abort method is called, the speech service must stop recognizing.
 387 |   The user agent must raise an <a event for=SpeechRecognition>end</a> event once the speech service is no longer connected.
 388 |   If the abort method is called on an object which is already stopped or aborting (that is, start was never called on it, the <a event for=SpeechRecognition>end</a> or <a event for=SpeechRecognition>error</a> event has fired on it, or abort was previously called on it), the user agent must ignore the call.</dd>
 389 | 
 390 |   <dt><dfn method for=SpeechRecognition>available({{SpeechRecognitionOptions}} options)</dfn> method</dt>
 391 |   <dd>
 392 |     The {{SpeechRecognition/available}} method returns a {{Promise}} that resolves to a {{AvailabilityStatus}} indicating the recognition availability matching the {{SpeechRecognitionOptions}} argument.
 393 |     Access to this method is gated behind the [=policy-controlled feature=] "on-device-speech-recognition", which has a [=policy-controlled feature/default allowlist=] of <code>[=default allowlist/'self'=]</code>.
 394 | 
 395 |     When invoked, run these steps:
 396 |     1. Let <var>promise</var> be <a>a new promise</a>.
 397 |     1. Run the <a>availability algorithm</a> with <var>options</var> and <var>promise</var>. If it returns an exception, throw it and abort these steps.
 398 |     1. Return <var>promise</var>.
 399 |   </dd>
 400 | 
 401 |   <dt><dfn method for=SpeechRecognition>install({{SpeechRecognitionOptions}} options)</dfn> method</dt>
 402 |   <dd>
 403 |     The {{SpeechRecognition/install}} method attempts to install speech recognition language packs for all languages specified in `options.langs`.
 404 |     It returns a {{Promise}} that resolves to a {{boolean}}.
 405 |     The promise resolves to `true` when all installation attempts for requested and supported languages succeed (or the languages were already installed).
 406 |     The promise resolves to `false` if `options.langs` is empty, if not all of the requested languages are supported, or if any installation attempt for a supported language fails.
 407 |     Access to this method is gated behind the [=policy-controlled feature=] "on-device-speech-recognition", which has a [=policy-controlled feature/default allowlist=] of <code>[=default allowlist/'self'=]</code>.
 408 | 
 409 |     When invoked, run these steps:
 410 |     1. If the [=current settings object=]'s [=relevant global object=]'s [=associated Document=] is NOT [=fully active=], throw an {{InvalidStateError}} and abort these steps.
 411 |     1. If any <var>lang</var> in {{SpeechRecognitionOptions/langs}} of <var>options</var> is not a valid [[!BCP47]] language tag, throw a {{SyntaxError}} and abort these steps.
 412 |     1. If the on-device speech recognition language pack for any <var>lang</var> in {{SpeechRecognitionOptions/langs}} of <var>options</var> is unsupported, return a resolved {{Promise}} with false and skip the rest of these steps.
 413 |     1. Let <var>promise</var> be <a>a new promise</a>.
 414 |     1. For each <var>lang</var> in {{SpeechRecognitionOptions/langs}} of <var>options</var>, initiate the download of the on-device speech recognition language for <var>lang</var>.
 415 |         <p class=note>
 416 |           Note: The user agent can prompt the user for explicit permission to download the on-device speech recognition language pack.
 417 |         </p>
 418 |     1. [=Queue a task=] on the [=relevant global object=]'s [=task queue=] to run the following step:
 419 |         - When the download of all languages specified by {{SpeechRecognitionOptions/langs}} of <var>options</var> succeeds, resolve <var>promise</var> with <code>true</code>, otherwise resolve it with <code>false</code>.
 420 |             <p class="note">
 421 |               Note: The <code>false</code> resolution of the Promise does not indicate the specific cause of failure. User agents are encouraged to provide more detailed information about the failure in developer tools console messages. However, this detailed error information is not exposed to the script.
 422 |             </p>
 423 |     1. Return <var>promise</var>.
 424 |         <p class=note>
 425 |           {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is not used in this algorithm.
 426 |         </p>
 427 |   </dd>
 428 | 
 429 | </dl>
 430 | 
 431 | <h4 id="availability-status-values">AvailabilityStatus Enum Values</h4>
 432 | <p>The {{AvailabilityStatus}} enum indicates the availability of speech recognition capabilities. Its values are:</p>
 433 | <dl>
 434 |   <dt><dfn enum-value for="AvailabilityStatus">"unavailable"</dfn></dt>
 435 |   <dd>Indicates that speech recognition is not available for the specified language(s) and processing preference.
 436 |   If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is `true`, this means on-device recognition for the language is not supported by the user agent.
 437 |   If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is `false`, it means neither local nor remote recognition is available for at least one of the specified languages.</dd>
 438 | 
 439 |   <dt><dfn enum-value for="AvailabilityStatus">"downloadable"</dfn></dt>
 440 |   <dd>Indicates that on-device speech recognition for the specified language(s) is supported by the user agent but not yet installed. It can potentially be installed using the {{SpeechRecognition/install()}} method. This status is primarily relevant when {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is true.</dd>
 441 | 
 442 |   <dt><dfn enum-value for="AvailabilityStatus">"downloading"</dfn></dt>
 443 |   <dd>Indicates that on-device speech recognition for the specified language(s) is currently in the process of being downloaded. This status is primarily relevant when {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is true.</dd>
 444 | 
 445 |   <dt><dfn enum-value for="AvailabilityStatus">"available"</dfn></dt>
 446 |   <dd>Indicates that speech recognition is available for all specified language(s) and the given processing preference.
 447 |   If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is true, this means on-device recognition is installed and ready.
 448 |   If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is false, it means recognition (which could be local or remote) is available.</dd>
 449 | </dl>
 450 | 
 451 | <p>When the  <dfn>availability algorithm</dfn> with <var>options</var> and <var>promise</var> is invoked, the user agent MUST run the following steps:
 452 | 1. If the [=current settings object=]'s [=relevant global object=]'s [=associated Document=] is NOT [=fully active=], throw an {{InvalidStateError}} and abort these steps.
 453 | 1. Let <var>langs</var> be {{SpeechRecognitionOptions/langs}} of <var>options</var>.
 454 | 1. If any <var>lang</var> in <var>langs</var> is not a valid [[!BCP47]] language tag, throw a {{SyntaxError}} and abort these steps.
 455 | 1. If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is `false`:
 456 |     1. If <var>langs</var> is an empty sequence, let <var>status</var> be {{AvailabilityStatus/unavailable}}.
 457 |     1. Else if speech recognition (which may be remote) is available for all <var>language</var> in <var>langs</var>, let <var>status</var> be {{AvailabilityStatus/available}}.
 458 |     1. Else, let <var>status</var> be {{AvailabilityStatus/unavailable}}.
 459 | 1. If {{SpeechRecognitionOptions/processLocally}} of <var>options</var> is `true`:
 460 |       <ol type=a>
 461 |         <li>If <var>langs</var> is an empty sequence, let <var>status</var> be {{AvailabilityStatus/unavailable}}.</li>
 462 |         <li>Else:
 463 |           <ol type=i>
 464 |             <li>Let <var>finalStatus</var> be {{AvailabilityStatus/available}}.</li>
 465 |             <li>For each <var>language</var> in <var>langs</var>:
 466 |               <ol>
 467 |                 <li>Let <var>currentLanguageStatus</var>.</li>
 468 |                 <li>If on-device speech recognition for <var>language</var> is installed, set <var>currentLanguageStatus</var> to {{AvailabilityStatus/available}}.</li>
 469 |                 <li>Else if on-device speech recognition for <var>language</var> is currently being downloaded, set <var>currentLanguageStatus</var> to {{AvailabilityStatus/downloading}}.</li>
 470 |                 <li>Else if on-device speech recognition for <var>language</var> is supported by the user agent but not yet installed, set <var>currentLanguageStatus</var> to {{AvailabilityStatus/downloadable}}.</li>
 471 |                 <li>Else (on-device speech recognition for <var>language</var> is not supported), set <var>currentLanguageStatus</var> to {{AvailabilityStatus/unavailable}}.</li>
 472 |                 <li>If <var>currentLanguageStatus</var> comes after <var>finalStatus</var> in the ordered list `[{{AvailabilityStatus/available}}, {{AvailabilityStatus/downloading}}, {{AvailabilityStatus/downloadable}}, {{AvailabilityStatus/unavailable}}]`, set <var>finalStatus</var> to <var>currentLanguageStatus</var>.</li>
 473 |               </ol>
 474 |             </li>
 475 |             <li>Let <var>status</var> be <var>finalStatus</var>.</li>
 476 |           </ol>
 477 |         </li>
 478 |       </ol>
 479 | 1. [=Queue a task=] on the [=relevant global object=]'s [=task queue=] to run the following step:
 480 |     - Resolve <var>promise</var> with <var>status</var>.
 481 | 
 482 | When the <dfn>start session algorithm</dfn> with
 483 | |requestMicrophonePermission| is invoked, the user agent MUST run the
 484 | following steps:
 485 | 
 486 | 1. If the [=current settings object=]'s [=relevant global object=]'s
 487 |      [=associated Document=] is NOT [=fully active=], throw an {{InvalidStateError}}
 488 |      and abort these steps.
 489 | 1. If {{SpeechRecognition/[[started]]}} is `true` and no <a event
 490 |     for=SpeechRecognition>error</a> event or <a event for=SpeechRecognition>end</a> event
 491 |     has fired on it, throw an {{InvalidStateError}} and abort these steps.
 492 | 1. If this.{{SpeechRecognition/phrases}}'s `length` is greater than 0 and the user agent does not support contextual biasing:
 493 |     1. [=Queue a task=] to [=fire an event=] named <a event for=SpeechRecognition>error</a> at [=this=] using {{SpeechRecognitionErrorEvent}} with its {{SpeechRecognitionErrorEvent/error}} attribute initialized to `phrases-not-supported` and its {{SpeechRecognitionErrorEvent/message}} attribute set to an implementation-defined string detailing the reason.
 494 |     1. Abort these steps.
 495 | 1. If this.{{SpeechRecognition/[[processLocally]]}} is `true`:
 496 |     1. If the user agent determines that local speech recognition is not available for this.{{SpeechRecognition/lang}}, or if it cannot fulfill the local processing requirement for other reasons:
 497 |         1. [=Queue a task=] to [=fire an event=] named <a event for=SpeechRecognition>error</a> at [=this=] using {{SpeechRecognitionErrorEvent}} with its {{SpeechRecognitionErrorEvent/error}} attribute initialized to {{SpeechRecognitionErrorCode/service-not-allowed}} and its {{SpeechRecognitionErrorEvent/message}} attribute set to an implementation-defined string detailing the reason.
 498 |         1. Abort these steps.
 499 | 1. Set {{[[started]]}} to `true`.
 500 | 1. If |requestMicrophonePermission| is `true` and [=request
 501 |     permission to use=] "`microphone`" is [=permission/"denied"=]:
 502 |     1. [=Queue a task=] to [=fire an event=] named <a event for=SpeechRecognition>error</a> at [=this=] using {{SpeechRecognitionErrorEvent}} with its {{SpeechRecognitionErrorEvent/error}} attribute initialized to {{SpeechRecognitionErrorCode/not-allowed}} and its {{SpeechRecognitionErrorEvent/message}} attribute set to an implementation-defined string detailing the reason.
 503 |     1. Abort these steps.
 504 | 1. Once the system is successfully listening to the recognition, queue a task to
 505 |     [=fire an event=] named <a event for=SpeechRecognition>start</a> at [=this=].
 506 | 
 507 | <h4 id="speechreco-events">SpeechRecognition Events</h4>
 508 | 
 509 | <p>The DOM Level 2 Event Model is used for speech recognition events.
 510 | The methods in the EventTarget interface should be used for registering event listeners.
 511 | The SpeechRecognition interface also contains convenience attributes for registering a single event handler for each event type.
 512 | These events do not bubble and are not cancelable.</p>
 513 | 
 514 | <p>For all these events, the timeStamp attribute defined in the DOM Level 2 Event interface must be set to the best possible estimate of when the real-world event which the event object represents occurred.
 515 | This timestamp must be represented in the user agent's view of time, even for events where the timestamps in question could be raised on a different machine like a remote recognition service (i.e., in a <a event for=SpeechRecognition>speechend</a> event with a remote speech endpointer).</p>
 516 | 
 517 | <p>Unless specified below, the ordering of the different events is undefined.
 518 | For example, some implementations may fire <a event for=SpeechRecognition>audioend</a> before <a event for=SpeechRecognition>speechstart</a> or <a event for=SpeechRecognition>speechend</a> if the audio detector is client-side and the speech detector is server-side.</p>
 519 | 
 520 | <dl>
 521 |   <dt><dfn event for=SpeechRecognition>audiostart</dfn> event</dt>
 522 |   <dd>Fired when the user agent has started to capture audio.</dd>
 523 | 
 524 |   <dt><dfn event for=SpeechRecognition>soundstart</dfn> event</dt>
 525 |   <dd>Fired when some sound, possibly speech, has been detected.
 526 |   This must be fired with low latency, e.g. by using a client-side energy detector.
 527 |   The <a event for=SpeechRecognition>audiostart</a> event must always have been fired before the soundstart event.</dd>
 528 | 
 529 |   <dt><dfn event for=SpeechRecognition>speechstart</dfn> event</dt>
 530 |   <dd>Fired when the speech that will be used for speech recognition has started.
 531 |   The <a event for=SpeechRecognition>audiostart</a> event must always have been fired before the speechstart event.</dd>
 532 | 
 533 |   <dt><dfn event for=SpeechRecognition>speechend</dfn> event</dt>
 534 |   <dd>Fired when the speech that will be used for speech recognition has ended.
 535 |   The <a event for=SpeechRecognition>speechstart</a> event must always have been fired before speechend.</dd>
 536 | 
 537 |   <dt><dfn event for=SpeechRecognition>soundend</dfn> event</dt>
 538 |   <dd>Fired when some sound is no longer detected.
 539 |   This must be fired with low latency, e.g. by using a client-side energy detector.
 540 |   The <a event for=SpeechRecognition>soundstart</a> event must always have been fired before soundend.</dd>
 541 | 
 542 |   <dt><dfn event for=SpeechRecognition>audioend</dfn> event</dt>
 543 |   <dd>Fired when the user agent has finished capturing audio.
 544 |   The <a event for=SpeechRecognition>audiostart</a> event must always have been fired before audioend.</dd>
 545 | 
 546 |   <dt><dfn event for=SpeechRecognition>result</dfn> event</dt>
 547 |   <dd>Fired when the speech recognizer returns a result.
 548 |   The event must use the {{SpeechRecognitionEvent}} interface.
 549 |   The <a event for=SpeechRecognition>audiostart</a> event must always have been fired before the result event.</dd>
 550 | 
 551 |   <dt><dfn event for=SpeechRecognition>nomatch</dfn> event</dt>
 552 |   <dd>Fired when the speech recognizer returns a final result with no recognition hypothesis that meet or exceed the confidence threshold.
 553 |   The event must use the {{SpeechRecognitionEvent}} interface.
 554 |   The {{SpeechRecognitionEvent/results}} attribute in the event may contain speech recognition results that are below the confidence threshold or may be null.
 555 |   The {{audiostart}} event must always have been fired before the nomatch event.</dd>
 556 | 
 557 |   <dt><dfn event for=SpeechRecognition>error</dfn> event</dt>
 558 |   <dd>Fired when a speech recognition error occurs.
 559 |   The event must use the {{SpeechRecognitionErrorEvent}} interface.</dd>
 560 | 
 561 |   <dt><dfn event for=SpeechRecognition>start</dfn> event</dt>
 562 |   <dd>Fired when the recognition service has begun to listen to the audio with the intention of recognizing.
 563 | 
 564 |   </dd><dt><dfn event for=SpeechRecognition>end</dfn> event</dt>
 565 |   <dd>Fired when the service has disconnected.
 566 |   The event must always be generated when the session ends no matter the reason for the end.</dd>
 567 | </dl>
 568 | 
 569 | <h4 id="speechreco-error">SpeechRecognitionErrorEvent</h4>
 570 | 
 571 | <p>The {{SpeechRecognitionErrorEvent}} interface is used for the <a event for=SpeechRecognition>error</a> event.</p>
 572 | <dl>
 573 |   <dt><dfn attribute for=SpeechRecognitionErrorEvent>error</dfn> attribute</dt>
 574 |   <dd>The errorCode is an enumeration indicating what has gone wrong.
 575 |   The values are:
 576 |   <dl>
 577 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"no-speech"</dfn></dt>
 578 |     <dd>No speech was detected.</dd>
 579 | 
 580 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"aborted"</dfn></dt>
 581 |     <dd>Speech input was aborted somehow, maybe by some user-agent-specific behavior such as UI that lets the user cancel speech input.</dd>
 582 | 
 583 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"audio-capture"</dfn></dt>
 584 |     <dd>Audio capture failed.</dd>
 585 | 
 586 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"network"</dfn></dt>
 587 |     <dd>Some network communication that was required to complete the recognition failed.</dd>
 588 | 
 589 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"not-allowed"</dfn></dt>
 590 |     <dd>The user agent is not allowing any speech input to occur for reasons of security, privacy or user preference.</dd>
 591 | 
 592 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"service-not-allowed"</dfn></dt>
 593 |     <dd>The user agent is not allowing the web application requested speech service, but would allow some speech service, to be used either because the user agent doesn't support the selected one or because of reasons of security, privacy or user preference.</dd>
 594 | 
 595 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"language-not-supported"</dfn></dt>
 596 |     <dd>The language was not supported.</dd>
 597 | 
 598 |     <dt><dfn enum-value for=SpeechRecognitionErrorCode>"phrases-not-supported"</dfn></dt>
 599 |     <dd>The speech recognition model does not support phrases for contextual biasing.</dd>
 600 |   </dl>
 601 |   </dd>
 602 | 
 603 |   <dt><dfn attribute for=SpeechRecognitionErrorEvent>message</dfn> attribute</dt>
 604 |   <dd>The message content is implementation specific.
 605 |   This attribute is primarily intended for debugging and developers should not use it directly in their application user interface.</dd>
 606 | </dl>
 607 | 
 608 | <h4 id="speechreco-alternative">SpeechRecognitionAlternative</h4>
 609 | 
 610 | <p>The SpeechRecognitionAlternative represents a simple view of the response that gets used in a n-best list.
 611 | 
 612 | <dl>
 613 |   <dt><dfn attribute for=SpeechRecognitionAlternative>transcript</dfn> attribute</dt>
 614 |   <dd>The transcript string represents the raw words that the user spoke.
 615 |   For continuous recognition, leading or trailing whitespace MUST be included where necessary such that concatenation of consecutive SpeechRecognitionResults produces a proper transcript of the session.</dd>
 616 | 
 617 |   <dt><dfn attribute for=SpeechRecognitionAlternative>confidence</dfn> attribute</dt>
 618 |   <dd>The confidence represents a numeric estimate between 0 and 1 of how confident the recognition system is that the recognition is correct.
 619 |   A higher number means the system is more confident.
 620 |   <p class=issue>The group has discussed whether confidence can be specified in a speech-recognition-engine-independent manner and whether confidence threshold and nomatch should be included, because this is not a dialog API.
 621 |   See <a href="https://lists.w3.org/Archives/Public/public-speech-api/2012Jun/0143.html">Confidence property</a> thread on public-speech-api@w3.org.</p></dd>
 622 | </dl>
 623 | 
 624 | <h4 id="speechreco-result">SpeechRecognitionResult</h4>
 625 | 
 626 | <p>The SpeechRecognitionResult object represents a single one-shot recognition match, either as one small part of a continuous recognition or as the complete return result of a non-continuous recognition.</p>
 627 | 
 628 | <dl>
 629 |   <dt><dfn attribute for=SpeechRecognitionResult>length</dfn> attribute</dt>
 630 |   <dd>The long attribute represents how many n-best alternatives are represented in the item array.</dd>
 631 | 
 632 |   <dt><dfn method for=SpeechRecognitionResult>item(<var>index</var>)</dfn> getter</dt>
 633 |   <dd>The item getter returns a SpeechRecognitionAlternative from the index into an array of n-best values.
 634 |   If index is greater than or equal to length, this returns null.
 635 |   The user agent must ensure that the length attribute is set to the number of elements in the array.
 636 |   The user agent must ensure that the n-best list is sorted in non-increasing confidence order (each element must be less than or equal to the confidence of the preceding elements).</dd>
 637 | 
 638 |   <dt><dfn attribute for=SpeechRecognitionResult>isFinal</dfn> attribute</dt>
 639 |   <dd>The final boolean must be set to true if this is the final time the speech service will return this particular index value.
 640 |   If the value is false, then this represents an interim result that could still be changed.</dd>
 641 | </dl>
 642 | 
 643 | <h4 id="speechreco-resultlist">SpeechRecognitionResultList</h4>
 644 | 
 645 | <p>The SpeechRecognitionResultList object holds a sequence of recognition results representing the complete return result of a continuous recognition.
 646 | For a non-continuous recognition it will hold only a single value.</p>
 647 | 
 648 | <dl>
 649 |   <dt><dfn attribute for=SpeechRecognitionResultList>length</dfn> attribute</dt>
 650 |   <dd>The length attribute indicates how many results are represented in the item array.</dd>
 651 | 
 652 |   <dt><dfn method for=SpeechRecognitionResultList>item(<var>index</var>)</dfn> getter</dt>
 653 |   <dd>The item getter returns a SpeechRecognitionResult from the index into an array of result values.
 654 |   If index is greater than or equal to length, this returns null.
 655 |   The user agent must ensure that the length attribute is set to the number of elements in the array.</dd>
 656 | </dl>
 657 | 
 658 | <h4 id="speechreco-event">SpeechRecognitionEvent</h4>
 659 | 
 660 | <p>The SpeechRecognitionEvent is the event that is raised each time there are any changes to interim or final results.</p>
 661 | 
 662 | <dl>
 663 |   <dt><dfn attribute for=SpeechRecognitionEvent>resultIndex</dfn> attribute</dt>
 664 |   <dd>The resultIndex must be set to the lowest index in the "results" array that has changed.</dd>
 665 | 
 666 |   <dt><dfn attribute for=SpeechRecognitionEvent>results</dfn> attribute</dt>
 667 |   <dd>The array of all current recognition results for this session.
 668 |   Specifically all final results that have been returned, followed by the current best hypothesis for all interim results.
 669 |   It must consist of zero or more final results followed by zero or more interim results.
 670 |   On subsequent SpeechRecognitionResultEvent events, interim results may be overwritten by a newer interim result or by a final result or may be removed (when at the end of the "results" array and the array length decreases).
 671 |   Final results must not be overwritten or removed.
 672 |   All entries for indexes less than resultIndex must be identical to the array that was present when the last SpeechRecognitionResultEvent was raised.
 673 |   All array entries (if any) for indexes equal or greater than resultIndex that were present in the array when the last SpeechRecognitionResultEvent was raised are removed and overwritten with new results.
 674 |   The length of the "results" array may increase or decrease, but must not be less than resultIndex.
 675 |   Note that when resultIndex equals results.length, no new results are returned, this may occur when the array length decreases to remove one or more interim results.</dd>
 676 | </dl>
 677 | 
 678 | <h4 id="speechreco-phrase">SpeechRecognitionPhrase</h4>
 679 | 
 680 | <p>The SpeechRecognitionPhrase object represents a phrase for contextual biasing and has the following internal slots:</p>
 681 | 
 682 | <dl dfn-type=attribute dfn-for="SpeechRecognitionPhrase">
 683 |     : <dfn>[[phrase]]</dfn>
 684 |     ::
 685 |         A {{DOMString}} representing the text string to be boosted. The initial value is null.
 686 |         An empty value is allowed but should be ignored by the speech recognition model.
 687 | </dl>
 688 | 
 689 | <dl dfn-type=attribute dfn-for="SpeechRecognitionPhrase">
 690 |     : <dfn>[[boost]]</dfn>
 691 |     ::
 692 |         A float representing approximately the natural log of the number of times more likely the website thinks this phrase is
 693 |         than what the speech recognition model knows.
 694 |         A valid boost must be a float value inside the range [0.0, 10.0], with a default value of 1.0 if not specified.
 695 |         A boost of 0.0 means the phrase is not boosted at all, and a higher boost means the phrase is more likely to appear.
 696 |         A boost of 10.0 means the phrase is extremely likely to appear and should be rarely set.
 697 | </dl>
 698 | 
 699 | <dl>
 700 |   <dt><dfn constructor for=SpeechRecognitionPhrase>SpeechRecognitionPhrase(|phrase|, |boost|)</dfn> constructor</dt>
 701 |   <dd>
 702 |     When this constructor is invoked, run the following steps:
 703 |     1. If |boost| is smaller than 0.0 or greater than 10.0, throw a {{SyntaxError}} and abort these steps.
 704 |     1. Let |phr| be a new object of type {{SpeechRecognitionPhrase}}.
 705 |     1. Set |phr|.{{[[phrase]]}} to be the value of |phrase|.
 706 |     1. Set |phr|.{{[[boost]]}} to be the value of |boost|.
 707 |     1. Return |phr|.
 708 |   </dd>
 709 | 
 710 |   <dt><dfn attribute for=SpeechRecognitionPhrase>phrase</dfn> attribute</dt>
 711 |   <dd>This attribute returns the value of {{[[phrase]]}}.</dd>
 712 | 
 713 |   <dt><dfn attribute for=SpeechRecognitionPhrase>boost</dfn> attribute</dt>
 714 |   <dd>This attribute returns the value of {{[[boost]]}}.</dd>
 715 | </dl>
 716 | 
 717 | <h4 id="speechreco-speechgrammar">SpeechGrammar</h4>
 718 | 
 719 | <p>The SpeechGrammar object represents a container for a grammar.</p>
 720 | <p class=note>Grammar support has been deprecated and removed. The grammar objects remain in the spec for backwards compatibility purposes only and do not affect speech recognition.</p>
 721 | <p>This structure has the following attributes:</p>
 722 | 
 723 | <dl>
 724 |   <dt><dfn attribute for=SpeechGrammar>src</dfn> attribute</dt>
 725 |   <dd>The required src attribute is the URI for the grammar.</dd>
 726 | 
 727 |   <dt><dfn attribute for=SpeechGrammar>weight</dfn> attribute</dt>
 728 |   <dd>The optional weight attribute controls the weight that the speech recognition service should use with this grammar.
 729 |   By default, a grammar has a weight of 1.
 730 |   Larger weight values positively weight the grammar while smaller weight values make the grammar weighted less strongly.</dd>
 731 | </dl>
 732 | 
 733 | <h4 id="speechreco-speechgrammarlist">SpeechGrammarList</h4>
 734 | 
 735 | <p>The SpeechGrammarList object represents a collection of SpeechGrammar objects.
 736 | This structure has the following attributes:</p>
 737 | <p class=note>Grammar support has been deprecated and removed. The grammar objects remain in the spec for backwards compatibility purposes only and do not affect speech recognition.</p>
 738 | 
 739 | <dl>
 740 |   <dt><dfn attribute for=SpeechGrammarList>length</dfn> attribute</dt>
 741 |   <dd>The length attribute represents how many grammars are currently in the array.</dd>
 742 | 
 743 |   <dt><dfn method for=SpeechGrammarList>item(<var>index</var>)</dfn> getter</dt>
 744 |   <dd>The item getter returns a SpeechGrammar from the index into an array of grammars.
 745 |   The user agent must ensure that the length attribute is set to the number of elements in the array.
 746 |   The user agent must ensure that the index order from smallest to largest matches the order in which grammars were added to the array.</dd>
 747 | 
 748 |   <dt><dfn method for=SpeechGrammarList>addFromURI(<var>src</var>, <var>weight</var>)</dfn> method</dt>
 749 |   <dd>This method appends a grammar to the grammars array parameter based on URI.
 750 |   The URI for the grammar is specified by the <var>src</var> parameter, which represents the URI for the grammar.
 751 |   Note, some services may support builtin grammars that can be specified by URI.
 752 |   The <var>weight</var> parameter represents this grammar's weight relative to the other grammar.
 753 | 
 754 |   <dt><dfn method for=SpeechGrammarList>addFromString(<var>string</var>, <var>weight</var>)</dfn> method</dt>
 755 |   <dd>This method appends a grammar to the grammars array parameter based on text.
 756 |   The content of the grammar is specified by the <var>string</var> parameter.
 757 |   This content should be encoded into a data: URI when the SpeechGrammar object is created.
 758 |   The <var>weight</var> parameter represents this grammar's weight relative to the other grammar.
 759 | </dl>
 760 | 
 761 | <h3 id="tts-section">The SpeechSynthesis Interface</h3>
 762 | 
 763 | <p>The SpeechSynthesis interface is the scripted web API for controlling a text-to-speech output.</p>
 764 | 
 765 | <pre class="idl">
 766 | [Exposed=Window]
 767 | interface SpeechSynthesis : EventTarget {
 768 |     readonly attribute boolean pending;
 769 |     readonly attribute boolean speaking;
 770 |     readonly attribute boolean paused;
 771 | 
 772 |     attribute EventHandler onvoiceschanged;
 773 | 
 774 |     undefined speak(SpeechSynthesisUtterance utterance);
 775 |     undefined cancel();
 776 |     undefined pause();
 777 |     undefined resume();
 778 |     sequence&lt;SpeechSynthesisVoice> getVoices();
 779 | };
 780 | 
 781 | partial interface Window {
 782 |     [SameObject] readonly attribute SpeechSynthesis speechSynthesis;
 783 | };
 784 | 
 785 | [Exposed=Window]
 786 | interface SpeechSynthesisUtterance : EventTarget {
 787 |     constructor(optional DOMString text);
 788 | 
 789 |     attribute DOMString text;
 790 |     attribute DOMString lang;
 791 |     attribute SpeechSynthesisVoice? voice;
 792 |     attribute float volume;
 793 |     attribute float rate;
 794 |     attribute float pitch;
 795 | 
 796 |     attribute EventHandler onstart;
 797 |     attribute EventHandler onend;
 798 |     attribute EventHandler onerror;
 799 |     attribute EventHandler onpause;
 800 |     attribute EventHandler onresume;
 801 |     attribute EventHandler onmark;
 802 |     attribute EventHandler onboundary;
 803 | };
 804 | 
 805 | [Exposed=Window]
 806 | interface SpeechSynthesisEvent : Event {
 807 |     constructor(DOMString type, SpeechSynthesisEventInit eventInitDict);
 808 |     readonly attribute SpeechSynthesisUtterance utterance;
 809 |     readonly attribute unsigned long charIndex;
 810 |     readonly attribute unsigned long charLength;
 811 |     readonly attribute float elapsedTime;
 812 |     readonly attribute DOMString name;
 813 | };
 814 | 
 815 | dictionary SpeechSynthesisEventInit : EventInit {
 816 |     required SpeechSynthesisUtterance utterance;
 817 |     unsigned long charIndex = 0;
 818 |     unsigned long charLength = 0;
 819 |     float elapsedTime = 0;
 820 |     DOMString name = "";
 821 | };
 822 | 
 823 | enum SpeechSynthesisErrorCode {
 824 |     "canceled",
 825 |     "interrupted",
 826 |     "audio-busy",
 827 |     "audio-hardware",
 828 |     "network",
 829 |     "synthesis-unavailable",
 830 |     "synthesis-failed",
 831 |     "language-unavailable",
 832 |     "voice-unavailable",
 833 |     "text-too-long",
 834 |     "invalid-argument",
 835 |     "not-allowed",
 836 | };
 837 | 
 838 | [Exposed=Window]
 839 | interface SpeechSynthesisErrorEvent : SpeechSynthesisEvent {
 840 |     constructor(DOMString type, SpeechSynthesisErrorEventInit eventInitDict);
 841 |     readonly attribute SpeechSynthesisErrorCode error;
 842 | };
 843 | 
 844 | dictionary SpeechSynthesisErrorEventInit : SpeechSynthesisEventInit {
 845 |     required SpeechSynthesisErrorCode error;
 846 | };
 847 | 
 848 | [Exposed=Window]
 849 | interface SpeechSynthesisVoice {
 850 |     readonly attribute DOMString voiceURI;
 851 |     readonly attribute DOMString name;
 852 |     readonly attribute DOMString lang;
 853 |     readonly attribute boolean localService;
 854 |     readonly attribute boolean default;
 855 | };
 856 | </pre>
 857 | 
 858 | <h4 id="tts-attributes">SpeechSynthesis Attributes</h4>
 859 | 
 860 | <dl>
 861 |   <dt><dfn attribute for=SpeechSynthesis>pending</dfn> attribute</dt>
 862 |   <dd>This attribute is true if the queue for the global SpeechSynthesis instance contains any utterances which have not started speaking.</dd>
 863 | 
 864 |   <dt><dfn attribute for=SpeechSynthesis>speaking</dfn> attribute</dt>
 865 |   <dd>This attribute is true if an utterance is being spoken.
 866 |   Specifically if an utterance has begun being spoken and has not completed being spoken.
 867 |   This is independent of whether the global SpeechSynthesis instance is in the paused state.</dd>
 868 | 
 869 |   <dt><dfn attribute for=SpeechSynthesis>paused</dfn> attribute</dt>
 870 |   <dd>This attribute is true when the global SpeechSynthesis instance is in the paused state.
 871 |   This state is independent of whether anything is in the queue.
 872 |   The default state of a the global SpeechSynthesis instance for a new window is the non-paused state.</dd>
 873 | </dl>
 874 | 
 875 | <h4 id="tts-methods">SpeechSynthesis Methods</h4>
 876 | 
 877 | <dl>
 878 |   <dt><dfn method for=SpeechSynthesis>speak(<var>utterance</var>)</dfn> method</dt>
 879 |   <dd>This method appends the SpeechSynthesisUtterance object <var>utterance</var> to the end of the queue for the global SpeechSynthesis instance.
 880 |   It does not change the paused state of the SpeechSynthesis instance.
 881 |   If the SpeechSynthesis instance is paused, it remains paused.
 882 |   If it is not paused and no other utterances are in the queue, then this utterance is spoken immediately,
 883 |   else this utterance is queued to begin speaking after the other utterances in the queue have been spoken.
 884 |   If changes are made to the SpeechSynthesisUtterance object after calling this method and prior to the corresponding <a event for=SpeechSynthesisUtterance>end</a> or <a event for=SpeechSynthesisUtterance>error</a> event,
 885 |   it is not defined whether those changes will affect what is spoken, and those changes may cause an error to be returned.
 886 |   The SpeechSynthesis object takes exclusive ownership of the SpeechSynthesisUtterance object.
 887 |   Passing it as a speak() argument to another SpeechSynthesis object should throw an exception.
 888 |   (For example, two frames may have the same origin and each will contain a SpeechSynthesis object.)</dd>
 889 | 
 890 |   <dt><dfn method for=SpeechSynthesis>cancel()</dfn> method</dt>
 891 |   <dd>This method removes all utterances from the queue.
 892 |   If an utterance is being spoken, speaking ceases immediately.
 893 |   This method does not change the paused state of the global SpeechSynthesis instance.</dd>
 894 | 
 895 |   <dt><dfn method for=SpeechSynthesis>pause()</dfn> method</dt>
 896 |   <dd>This method puts the global SpeechSynthesis instance into the paused state.
 897 |   If an utterance was being spoken, it pauses mid-utterance.
 898 |   (If called when the SpeechSynthesis instance was already in the paused state, it does nothing.)</dd>
 899 | 
 900 |   <dt><dfn method for=SpeechSynthesis>resume()</dfn> method</dt>
 901 |   <dd>This method puts the global SpeechSynthesis instance into the non-paused state.
 902 |   If an utterance was speaking, it continues speaking the utterance at the point at which it was paused, else it begins speaking the next utterance in the queue (if any).
 903 |   (If called when the SpeechSynthesis instance was already in the non-paused state, it does nothing.)</dd>
 904 | 
 905 |   <dt><dfn method for=SpeechSynthesis>getVoices()</dfn> method</dt>
 906 |   <dd>This method returns the available voices.
 907 |   It is user agent dependent which voices are available.
 908 |   If there are no voices available, or if the the list of available voices is not yet known (for example: server-side synthesis where the list is determined asynchronously),
 909 |   then this method must return a SpeechSynthesisVoiceList of length zero.</dd>
 910 | </dl>
 911 | 
 912 | <h4 id="tts-events">SpeechSynthesis Events</h4>
 913 | 
 914 | <dl>
 915 |   <dt><dfn event for=SpeechSynthesis>voiceschanged</dfn> event</dt>
 916 |   <dd>Fired when the contents of the SpeechSynthesisVoiceList, that the getVoices method will return, have changed.
 917 |   Examples include: server-side synthesis where the list is determined asynchronously, or when client-side voices are installed/uninstalled.</dd>
 918 | </dl>
 919 | 
 920 | <h4 id="utterance-attributes">SpeechSynthesisUtterance Attributes</h4>
 921 | 
 922 | <dl>
 923 |   <dt><dfn attribute for=SpeechSynthesisUtterance>text</dfn> attribute</dt>
 924 |   <dd>This attribute specifies the text to be synthesized and spoken for this utterance.
 925 |   This may be either plain text or a complete, well-formed SSML document. [[!SSML]]
 926 |   For speech synthesis engines that do not support SSML, or only support certain tags, the user agent or speech engine must strip away the tags they do not support and speak the text.
 927 |   There may be a maximum length of the text, it may be limited to 32,767 characters.</dd>
 928 | 
 929 |   <dt><dfn attribute for=SpeechSynthesisUtterance>lang</dfn> attribute</dt>
 930 |   <dd>This attribute specifies the language of the speech synthesis for the utterance, using a valid BCP 47 language tag. [[!BCP47]]
 931 |   If unset it remains unset for getting in script, but will default to use the language of the html document root element and associated hierarchy.
 932 |   This default value is computed and used when the input request opens a connection to the recognition service.</dd>
 933 | 
 934 |   <dt><dfn attribute for=SpeechSynthesisUtterance>voice</dfn> attribute</dt>
 935 |   <dd>This attribute specifies the speech synthesis voice that the web application wishes to use.
 936 |   When a {{SpeechSynthesisUtterance}} object is created this attribute must be initialized to null.
 937 |   If, at the time of the {{speak()}} method call, this attribute has been set to one of the {{SpeechSynthesisVoice}} objects returned by {{getVoices()}}, then the user agent must use that voice.
 938 |   If this attribute is unset or null at the time of the {{speak()}} method call, then the user agent must use a user agent default voice.
 939 |   The user agent default voice should support the current language (see {{SpeechSynthesisUtterance/lang}}) and can be a local or remote speech service and can incorporate end user choices via interfaces provided by the user agent such as browser configuration parameters.
 940 |   </dd>
 941 | 
 942 |   <dt><dfn attribute for=SpeechSynthesisUtterance>volume</dfn> attribute</dt>
 943 |   <dd>This attribute specifies the speaking volume for the utterance.
 944 |   It ranges between 0 and 1 inclusive, with 0 being the lowest volume and 1 the highest volume, with a default of 1.
 945 |   If SSML is used, this value will be overridden by prosody tags in the markup.</dd>
 946 | 
 947 |   <dt><dfn attribute for=SpeechSynthesisUtterance>rate</dfn> attribute</dt>
 948 |   <dd>This attribute specifies the speaking rate for the utterance.
 949 |   It is relative to the default rate for this voice.
 950 |   1 is the default rate supported by the speech synthesis engine or specific voice (which should correspond to a normal speaking rate).
 951 |   2 is twice as fast, and 0.5 is half as fast.
 952 |   Values below 0.1 or above 10 are strictly disallowed, but speech synthesis engines or specific voices may constrain the minimum and maximum rates further, for example, a particular voice may not actually speak faster than 3 times normal even if you specify a value larger than 3.
 953 |   If SSML is used, this value will be overridden by prosody tags in the markup.</dd>
 954 | 
 955 |   <dt><dfn attribute for=SpeechSynthesisUtterance>pitch</dfn> attribute</dt>
 956 |   <dd>This attribute specifies the speaking pitch for the utterance.
 957 |   It ranges between 0 and 2 inclusive, with 0 being the lowest pitch and 2 the highest pitch.
 958 |   1 corresponds to the default pitch of the speech synthesis engine or specific voice.
 959 |   Speech synthesis engines or voices may constrain the minimum and maximum rates further.
 960 |   If SSML is used, this value will be overridden by prosody tags in the markup.</dd>
 961 | </dl>
 962 | 
 963 | <h4 id="utterance-events">SpeechSynthesisUtterance Events</h4>
 964 | 
 965 | Each of these events must use the {{SpeechSynthesisEvent}} interface,
 966 | except the error event which must use the {{SpeechSynthesisErrorEvent}} interface.
 967 | These events do not bubble and are not cancelable.
 968 | 
 969 | <dl>
 970 |   <dt><dfn event for=SpeechSynthesisUtterance>start</dfn> event</dt>
 971 |   <dd>Fired when this utterance has begun to be spoken.</dd>
 972 | 
 973 |   <dt><dfn event for=SpeechSynthesisUtterance>end</dfn> event</dt>
 974 |   <dd>Fired when this utterance has completed being spoken.
 975 |   If this event fires, the <a event for=SpeechSynthesisUtterance>error</a> event must not be fired for this utterance.</dd>
 976 | 
 977 |   <dt><dfn event for=SpeechSynthesisUtterance>error</dfn> event</dt>
 978 |   <dd>Fired if there was an error that prevented successful speaking of this utterance.
 979 |   If this event fires, the <a event for=SpeechSynthesisUtterance>end</a> event must not be fired for this utterance.</dd>
 980 | 
 981 |   <dt><dfn event for=SpeechSynthesisUtterance>pause</dfn> event</dt>
 982 |   <dd>Fired when and if this utterance is paused mid-utterance.</dd>
 983 | 
 984 |   <dt><dfn event for=SpeechSynthesisUtterance>resume</dfn> event</dt>
 985 |   <dd>Fired when and if this utterance is resumed after being paused mid-utterance.
 986 |   Adding the utterance to the queue while the global SpeechSynthesis instance is in the paused state, and then calling the resume method
 987 |   does not cause the resume event to be fired, in this case the utterance's <a event for=SpeechSynthesisUtterance>start</a> event will be called when the utterance starts.</dd>
 988 | 
 989 |   <dt><dfn event for=SpeechSynthesisUtterance>mark</dfn> event</dt>
 990 |   <dd>Fired when the spoken utterance reaches a named "mark" tag in SSML. [[!SSML]]
 991 |   The user agent must fire this event if the speech synthesis engine provides the event.</dd>
 992 | 
 993 |   <dt><dfn event for=SpeechSynthesisUtterance>boundary</dfn> event</dt>
 994 |   <dd>Fired when the spoken utterance reaches a word or sentence boundary.
 995 |   The user agent must fire this event if the speech synthesis engine provides the event.</dd>
 996 | </dl>
 997 | 
 998 | <h4 id="speechsynthesisevent-attributes">SpeechSynthesisEvent Attributes</h4>
 999 | 
1000 | <dl>
1001 |   <dt><dfn attribute for=SpeechSynthesisEvent>utterance</dfn> attribute</dt>
1002 |   <dd>This attribute contains the SpeechSynthesisUtterance that triggered this event.</dd>
1003 | 
1004 |   <dt><dfn attribute for=SpeechSynthesisEvent>charIndex</dfn> attribute</dt>
1005 |   <dd>This attribute indicates the zero-based character index into the original utterance string that most closely approximates the current speaking position of the speech engine.
1006 |   No guarantee is given as to where charIndex will be with respect to word boundaries (such as at the end of the previous word or the beginning of the next word), only that all text before charIndex has already been spoken, and all text after charIndex has not yet been spoken.
1007 |   The user agent must return this value if the speech synthesis engine supports it, otherwise the user agent must return 0.</dd>
1008 | 
1009 |   <dt><dfn attribute for=SpeechSynthesisEvent>charLength</dfn> attribute</dt>
1010 |   <dd>This attribute indicates the length of the text (word or sentence) that will be spoken corresponding to this event.
1011 |   This attribute is the length, in characters, starting from this event's {{SpeechSynthesisEvent/charIndex}}.
1012 |   The user agent must return this value if the speech synthesis engine supports it or the user agent can otherwise determine it, otherwise the user agent must return 0.</dd>
1013 | 
1014 |   <dt><dfn attribute for=SpeechSynthesisEvent>elapsedTime</dfn> attribute</dt>
1015 |   <dd>This attribute indicates the time, in seconds, that this event triggered, relative to when this utterance has begun to be spoken.
1016 |   The user agent must return this value if the speech synthesis engine supports it or the user agent can otherwise determine it, otherwise the user agent must return 0.</dd>
1017 | 
1018 |   <dt><dfn attribute for=SpeechSynthesisEvent>name</dfn> attribute</dt>
1019 |   <dd>For <a event for=SpeechSynthesisUtterance>mark</a> events, this attribute indicates the name of the marker, as defined in SSML as the name attribute of a mark element. [[!SSML]]
1020 |   For <a event for=SpeechSynthesisUtterance>boundary</a> events, this attribute indicates the type of boundary that caused the event: "word" or "sentence".
1021 |   For all other events, this value should return "".</dd>
1022 | </dl>
1023 | 
1024 | <h4 id="speechsynthesiserrorevent-attributes">SpeechSynthesisErrorEvent Attributes</h4>
1025 | 
1026 | <p>The SpeechSynthesisErrorEvent is the interface used for the SpeechSynthesisUtterance <a event for=SpeechSynthesisUtterance>error</a> event.</p>
1027 | <dl>
1028 |   <dt><dfn attribute for=SpeechSynthesisErrorEvent>error</dfn> attribute</dt>
1029 |   <dd>The errorCode is an enumeration indicating what has gone wrong.
1030 |   The values are:
1031 |   <dl>
1032 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"canceled"</dfn></dt>
1033 |     <dd>A cancel method call caused the SpeechSynthesisUtterance to be removed from the queue before it had begun being spoken.</dd>
1034 | 
1035 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"interrupted"</dfn></dt>
1036 |     <dd>A cancel method call caused the SpeechSynthesisUtterance to be interrupted after it has begun being spoken and before it completed.</dd>
1037 | 
1038 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"audio-busy"</dfn></dt>
1039 |     <dd>The operation cannot be completed at this time because the user-agent cannot access the audio output device.
1040 |     (For example, the user may need to correct this by closing another application.)</dd>
1041 | 
1042 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"audio-hardware"</dfn></dt>
1043 |     <dd>The operation cannot be completed at this time because the user-agent cannot identify an audio output device.
1044 |     (For example, the user may need to connect a speaker or configure system settings.)</dd>
1045 | 
1046 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"network"</dfn></dt>
1047 |     <dd>The operation cannot be completed at this time because some required network communication failed.</dd>
1048 | 
1049 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"synthesis-unavailable"</dfn></dt>
1050 |     <dd>The operation cannot be completed at this time because no synthesis engine is available.
1051 |     (For example, the user may need to install or configure a synthesis engine.)</dd>
1052 | 
1053 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"synthesis-failed"</dfn></dt>
1054 |     <dd>The operation failed because synthesis engine had an error.</dd>
1055 | 
1056 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"language-unavailable"</dfn></dt>
1057 |     <dd>No appropriate voice is available for the language designated in SpeechSynthesisUtterance lang.</dd>
1058 | 
1059 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"voice-unavailable"</dfn></dt>
1060 |     <dd>The voice designated in SpeechSynthesisUtterance voice attribute is not available.</dd>
1061 | 
1062 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"text-too-long"</dfn></dt>
1063 |     <dd>The contents of the SpeechSynthesisUtterance text attribute is too long to synthesize.</dd>
1064 | 
1065 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"invalid-argument"</dfn></dt>
1066 |     <dd>The contents of the SpeechSynthesisUtterance rate, pitch or volume attribute is not supported by synthesizer.</dd>
1067 | 
1068 |     <dt><dfn enum-value for=SpeechSynthesisErrorCode>"not-allowed"</dfn></dt>
1069 |     <dd>Synthesis was not allowed to start by the user agent or system in the current context.</dd>
1070 |   </dl>
1071 |   </dd>
1072 | </dl>
1073 | 
1074 | <h4 id="speechsynthesisvoice-attributes">SpeechSynthesisVoice Attributes</h4>
1075 | 
1076 | <dl>
1077 |   <dt><dfn attribute for=SpeechSynthesisVoice>voiceURI</dfn> attribute</dt>
1078 |   <dd>The voiceURI attribute specifies the speech synthesis voice and the location of the speech synthesis service for this voice.
1079 |   Note that the voiceURI is a generic URI and can thus point to local or remote services, either through use of a URN with meaning to the user agent or by specifying a URL that the user agent recognizes as a local service.</dd>
1080 | 
1081 |   <dt><dfn attribute for=SpeechSynthesisVoice>name</dfn> attribute</dt>
1082 |   <dd>This attribute is a human-readable name that represents the voice.
1083 |   There is no guarantee that all names returned are unique.</dd>
1084 | 
1085 |   <dt><dfn attribute for=SpeechSynthesisVoice>lang</dfn> attribute</dt>
1086 |   <dd>This attribute is a BCP 47 language tag indicating the language of the voice. [[!BCP47]]</dd>
1087 | 
1088 |   <dt><dfn attribute for=SpeechSynthesisVoice>localService</dfn> attribute</dt>
1089 |   <dd>This attribute is true for voices supplied by a local speech synthesizer, and is false for voices supplied by a remote speech synthesizer service.
1090 |   (This may be useful because remote services may imply additional latency, bandwidth or cost, whereas local voices may imply lower quality, however there is no guarantee that any of these implications are true.)</dd>
1091 | 
1092 |   <dt><dfn attribute for=SpeechSynthesisVoice>default</dfn> attribute</dt>
1093 |   <dd>This attribute is true for at most one voice per language.
1094 |   There may be a different default for each language.
1095 |   It is user agent dependent how default voices are determined.</dd>
1096 | </dl>
1097 | 
1098 | <h2 id="examples">Examples</h2>
1099 | 
1100 | <p><em>This section is non-normative.</em></p>
1101 | 
1102 | <h3 id="examples-recognition">Speech Recognition Examples</h3>
1103 | 
1104 | <div class="example" id="input-field">
1105 |   <p>Using speech recognition to fill an input-field and perform a web search.</p>
1106 | 
1107 |   <pre class="lang-html">
1108 |     &lt;script type="text/javascript"&gt;
1109 |       var recognition = new SpeechRecognition();
1110 |       recognition.onresult = function(event) {
1111 |         if (event.results.length &gt; 0) {
1112 |           q.value = event.results[0][0].transcript;
1113 |           q.form.submit();
1114 |         }
1115 |       }
1116 |     &lt;/script&gt;
1117 | 
1118 |     &lt;form action="https://www.example.com/search"&gt;
1119 |       &lt;input type="search" id="q" name="q" size=60&gt;
1120 |       &lt;input type="button" value="Click to Speak" onclick="recognition.start()"&gt;
1121 |     &lt;/form&gt;
1122 |   </pre>
1123 | </div>
1124 | 
1125 | <div class="example" id="fill-option-list">
1126 |   <p>Using speech recognition to fill an options list with alternative speech results.</p>
1127 | 
1128 |   <pre class="lang-html">
1129 |     &lt;script type="text/javascript"&gt;
1130 |       var recognition = new SpeechRecognition();
1131 |       recognition.maxAlternatives = 10;
1132 |       recognition.onresult = function(event) {
1133 |         if (event.results.length &gt; 0) {
1134 |           var result = event.results[0];
1135 |           for (var i = 0; i &lt; result.length; ++i) {
1136 |             var text = result[i].transcript;
1137 |             select.options[i] = new Option(text, text);
1138 |           }
1139 |         }
1140 |       }
1141 | 
1142 |       function start() {
1143 |         select.options.length = 0;
1144 |         recognition.start();
1145 |       }
1146 |     &lt;/script&gt;
1147 | 
1148 |     &lt;select id="select"&gt;&lt;/select&gt;
1149 |     &lt;button onclick="start()"&gt;Click to Speak&lt;/button&gt;
1150 |   </pre>
1151 | </div>
1152 | 
1153 | <div class="example" id="fill-textarea">
1154 |   <p>Using continuous speech recognition to fill a textarea.</p>
1155 | 
1156 |   <pre class="lang-html">
1157 |     &lt;textarea id="textarea" rows=10 cols=80&gt;&lt;/textarea&gt;
1158 |     &lt;button id="button" onclick="toggleStartStop()"&gt;&lt;/button&gt;
1159 | 
1160 |     &lt;script type="text/javascript"&gt;
1161 |       var recognizing;
1162 |       var recognition = new SpeechRecognition();
1163 |       recognition.continuous = true;
1164 |       reset();
1165 |       recognition.onend = reset;
1166 | 
1167 |       recognition.onresult = function (event) {
1168 |         for (var i = event.resultIndex; i &lt; event.results.length; ++i) {
1169 |           if (event.results[i].isFinal) {
1170 |             textarea.value += event.results[i][0].transcript;
1171 |           }
1172 |         }
1173 |       }
1174 | 
1175 |       function reset() {
1176 |         recognizing = false;
1177 |         button.innerHTML = "Click to Speak";
1178 |       }
1179 | 
1180 |       function toggleStartStop() {
1181 |         if (recognizing) {
1182 |           recognition.stop();
1183 |           reset();
1184 |         } else {
1185 |           recognition.start();
1186 |           recognizing = true;
1187 |           button.innerHTML = "Click to Stop";
1188 |         }
1189 |       }
1190 |     &lt;/script&gt;
1191 |   </pre>
1192 | </div>
1193 | 
1194 | <div class="example" id="continuous-recognition">
1195 |   <p>Using continuous speech recognition, showing final results in black and interim results in grey.</p>
1196 | 
1197 |   <pre class="lang-html">
1198 |     &lt;button id="button" onclick="toggleStartStop()"&gt;&lt;/button&gt;
1199 |     &lt;div style="border:dotted;padding:10px"&gt;
1200 |       &lt;span id="final_span"&gt;&lt;/span&gt;
1201 |       &lt;span id="interim_span" style="color:grey"&gt;&lt;/span&gt;
1202 |     &lt;/div&gt;
1203 | 
1204 |     &lt;script type="text/javascript"&gt;
1205 |       var recognizing;
1206 |       var recognition = new SpeechRecognition();
1207 |       recognition.continuous = true;
1208 |       recognition.interimResults = true;
1209 |       reset();
1210 |       recognition.onend = reset;
1211 | 
1212 |       recognition.onresult = function (event) {
1213 |         var final = "";
1214 |         var interim = "";
1215 |         for (var i = 0; i &lt; event.results.length; ++i) {
1216 |           if (event.results[i].isFinal) {
1217 |             final += event.results[i][0].transcript;
1218 |           } else {
1219 |             interim += event.results[i][0].transcript;
1220 |           }
1221 |         }
1222 |         final_span.innerHTML = final;
1223 |         interim_span.innerHTML = interim;
1224 |       }
1225 | 
1226 |       function reset() {
1227 |         recognizing = false;
1228 |         button.innerHTML = "Click to Speak";
1229 |       }
1230 | 
1231 |       function toggleStartStop() {
1232 |         if (recognizing) {
1233 |           recognition.stop();
1234 |           reset();
1235 |         } else {
1236 |           recognition.start();
1237 |           recognizing = true;
1238 |           button.innerHTML = "Click to Stop";
1239 |           final_span.innerHTML = "";
1240 |           interim_span.innerHTML = "";
1241 |         }
1242 |       }
1243 |     &lt;/script&gt;
1244 |   </pre>
1245 | </div>
1246 | 
1247 | <h3 id="examples-synthesis">Speech Synthesis Examples</h3>
1248 | 
1249 | <div class="example" id="speak-text">
1250 |   <p>Spoken text.</p>
1251 | 
1252 |   <pre class="lang-html">
1253 |     &lt;script type="text/javascript"&gt;
1254 |       speechSynthesis.speak(new SpeechSynthesisUtterance('Hello World'));
1255 |     &lt;/script&gt;
1256 |   </pre>
1257 | </div>
1258 | 
1259 | <div class="example" id="speak-text-advanced">
1260 |   <p>Spoken text with attributes and events.</p>
1261 | 
1262 |   <pre class="lang-html">
1263 |     &lt;script type="text/javascript"&gt;
1264 |       var u = new SpeechSynthesisUtterance();
1265 |       u.text = 'Hello World';
1266 |       u.lang = 'en-US';
1267 |       u.rate = 1.2;
1268 |       u.onend = function(event) { alert('Finished in ' + event.elapsedTime + ' seconds.'); }
1269 |       speechSynthesis.speak(u);
1270 |     &lt;/script&gt;
1271 |   </pre>
1272 | </div>
1273 | 
1274 | <h2 class="no-num" id="acknowledgments">Acknowledgments</h2>
1275 | 
1276 | <p style="white-space: pre-line">
1277 |   Adam Sobieski (Phoster)
1278 |   Björn Bringert (Google)
1279 |   Charles Pritchard
1280 |   Dominic Mazzoni (Google)
1281 |   Gerardo Capiel (Benetech)
1282 |   Jerry Carter
1283 |   Kagami Sascha Rosylight
1284 |   Marcos Cáceres (Mozilla)
1285 |   Nagesh Kharidi (Openstream)
1286 |   Olli Pettay (Mozilla)
1287 |   Peter Beverloo (Google)
1288 |   Raj Tumuluri (Openstream)
1289 |   Satish Sampath (Google)
1290 | </p>
1291 | 
1292 | <p>Also, the members of the HTML Speech Incubator Group, and the corresponding [[HTMLSPEECH|Final Report]], which created the basis for this specification.</p>
1293 | 


--------------------------------------------------------------------------------