├── .gitignore ├── .pr-preview.json ├── w3c.json ├── package.json ├── LICENSE.md ├── scripts ├── test-cddl.sh ├── validate-json-schema.js └── extract-schemas.js ├── CONTRIBUTING.md ├── schemas ├── at-driver-local.cddl ├── at-driver-remote.cddl ├── at-driver-local.json └── at-driver-remote.json ├── .github └── workflows │ └── auto-publish.yml ├── README.md └── index.bs /.gitignore: -------------------------------------------------------------------------------- 1 | index.html 2 | node_modules/ 3 | -------------------------------------------------------------------------------- /.pr-preview.json: -------------------------------------------------------------------------------- 1 | { 2 | "src_file": "index.bs", 3 | "type": "bikeshed", 4 | "params": { 5 | "force": 1 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /w3c.json: -------------------------------------------------------------------------------- 1 | { 2 | "group": "wg/browser-tools-testing", 3 | "contacts": ["tidoust", "jugglinmike"], 4 | "repo-type": "rec-track" 5 | } 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module", 3 | "scripts": { 4 | "extract-schemas": "node scripts/extract-schemas.js" 5 | }, 6 | "dependencies": { 7 | "ajv": "^8.11.2" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | All Reports in this Repository are licensed by Contributors under the 2 | [W3C Software and Document License](http://www.w3.org/Consortium/Legal/2015/copyright-software-and-document). 3 | Contributions to Specifications are made under the [W3C CLA](https://www.w3.org/community/about/agreements/cla/). 4 | -------------------------------------------------------------------------------- /scripts/test-cddl.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -ex 3 | 4 | SCRIPTS_DIR=$(cd $(dirname "$0") && pwd -P) 5 | ROOT=$(dirname $SCRIPTS_DIR) 6 | 7 | if ! [ -x "$(command -v cddl)" ] || [ "$1" = "--upgrade" ]; then 8 | echo 'Installing cddl' 9 | cargo install cddl 10 | fi 11 | 12 | cddl compile-cddl --cddl ${ROOT}/schemas/at-driver-local.cddl 13 | cddl compile-cddl --cddl ${ROOT}/schemas/at-driver-remote.cddl 14 | -------------------------------------------------------------------------------- /scripts/validate-json-schema.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import {readFileSync} from 'fs'; 4 | import Ajv from 'ajv/dist/2020.js'; 5 | 6 | function validateJsonSchema(filename) { 7 | const ajv = new Ajv({strict: true}); 8 | try { 9 | const text = readFileSync(filename, 'utf-8'); 10 | const schema = JSON.parse(text); 11 | ajv.compile(schema); 12 | } catch (error) { 13 | console.error(`Error validating "${filename}": ${error}`); 14 | process.exitCode = 1; 15 | return; 16 | } 17 | console.log(`JSON Schema in "${filename}" is valid.`); 18 | } 19 | 20 | validateJsonSchema('./schemas/at-driver-local.json'); 21 | validateJsonSchema('./schemas/at-driver-remote.json'); 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # AT Driver 2 | 3 | This repository is being used for work in the W3C ARIA-AT Community Group, governed by the [W3C Community License 4 | Agreement (CLA)](http://www.w3.org/community/about/agreements/cla/). To make substantive contributions, 5 | you must join the CG. 6 | 7 | If you are not the sole contributor to a contribution (pull request), please identify all 8 | contributors in the pull request comment. 9 | 10 | To add a contributor (other than yourself, that's automatic), mark them one per line as follows: 11 | 12 | ``` 13 | +@github_username 14 | ``` 15 | 16 | If you added a contributor by mistake, you can remove them in a comment with: 17 | 18 | ``` 19 | -@github_username 20 | ``` 21 | 22 | If you are making a pull request on behalf of someone else but you had no part in designing the 23 | feature, you can remove yourself with the above syntax. 24 | -------------------------------------------------------------------------------- /schemas/at-driver-local.cddl: -------------------------------------------------------------------------------- 1 | Message = ( 2 | CommandResponse // 3 | ErrorResponse // 4 | Event 5 | ) 6 | 7 | CommandResponse = { 8 | id: uint, 9 | result: ResultData, 10 | Extensible, 11 | } 12 | 13 | ErrorResponse = { 14 | id: uint / null, 15 | error: "unknown error" / "unknown command" / "invalid argument" / "session not created", 16 | message: text, 17 | ?stacktrace: text, 18 | Extensible, 19 | } 20 | 21 | ResultData = ( 22 | EmptyResult / 23 | SessionResult / 24 | SettingsResult 25 | ) 26 | 27 | EmptyResult = {} 28 | 29 | Event = { 30 | EventData, 31 | Extensible, 32 | } 33 | 34 | EventData = ( 35 | InteractionEvent 36 | ) 37 | 38 | Extensible = { 39 | *text => any 40 | } 41 | 42 | SessionResult = (SessionNewResult) 43 | 44 | CapabilitiesRequest = { 45 | ?atName: text, 46 | ?atVersion: text, 47 | ?platformName: text, 48 | Extensible, 49 | } 50 | 51 | SessionNewResult = { 52 | sessionId: text, 53 | capabilities: { 54 | atName: text, 55 | atVersion: text, 56 | platformName: text, 57 | Extensible, 58 | } 59 | } 60 | 61 | SettingsResult = { 62 | SettingsGetSettingsResult 63 | } 64 | 65 | SettingsGetSettingsResult = { 66 | settings: [1* SettingsGetSettingsResultItem ], 67 | } 68 | 69 | SettingsGetSettingsResultItem = { 70 | name: text, 71 | value: any, 72 | Extensible, 73 | } 74 | 75 | InteractionEvent = (InteractionCapturedOutputEvent) 76 | 77 | InteractionCapturedOutputParameters = { 78 | data: text, 79 | Extensible, 80 | } 81 | 82 | InteractionCapturedOutputEvent = { 83 | method: "interaction.capturedOutput", 84 | params: InteractionCapturedOutputParameters 85 | } 86 | -------------------------------------------------------------------------------- /scripts/extract-schemas.js: -------------------------------------------------------------------------------- 1 | import * as fs from 'node:fs/promises'; 2 | 3 | const removeIndentation = (indentation, cddl) => { 4 | if (indentation.length === 0) { 5 | return cddl; 6 | } 7 | 8 | let indentationRegexp = new RegExp(`^${indentation}`); 9 | return cddl 10 | .split('\n') 11 | .map(line => line.replace(indentationRegexp, '')) 12 | .join('\n'); 13 | } 14 | 15 | const formatCddl = cddl => cddl.join('\n\n').trim() + '\n'; 16 | 17 | const extractCddlFromSpec = async () => { 18 | const source = await fs.readFile('index.bs', { encoding: 'utf8' }); 19 | const matches = [...source.matchAll(/^([ \t]*)<(?:pre|xmp) class=['"]cddl['"] data-cddl-module=['"]((?:[a-zA-Z0-9_,-]+)+)['"]>([\s\S]*?)<\/(?:pre|xmp)>/gm)]; 20 | 21 | const [local, remote] = matches.reduce(([local, remote], match) => { 22 | const [_, indentation, cddlModules, content] = match; 23 | 24 | let isLocal = cddlModules.indexOf('local-cddl') > -1; 25 | let isRemote = cddlModules.indexOf('remote-cddl') > -1; 26 | 27 | if (!isLocal && !isRemote) { 28 | return [local, remote]; 29 | } 30 | 31 | let cddl = removeIndentation(indentation, content.trim()); 32 | 33 | if (isLocal) { 34 | local.push(cddl); 35 | } 36 | if (isRemote) { 37 | remote.push(cddl); 38 | } 39 | 40 | return [local, remote]; 41 | }, [[], []]) 42 | 43 | return [ 44 | formatCddl(local), 45 | formatCddl(remote) 46 | ] 47 | } 48 | 49 | try { 50 | await fs.mkdir('schemas'); 51 | } catch(ex) { 52 | if (ex.code !== 'EEXIST') { 53 | throw ex; 54 | } 55 | } 56 | 57 | const [localCddl, remoteCddl] = await extractCddlFromSpec(); 58 | 59 | await fs.writeFile('schemas/at-driver-local.cddl', localCddl); 60 | await fs.writeFile('schemas/at-driver-remote.cddl', remoteCddl); 61 | -------------------------------------------------------------------------------- /.github/workflows/auto-publish.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: {} 4 | push: 5 | branches: [main] 6 | jobs: 7 | main: 8 | name: Build, Validate and Deploy 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: w3c/spec-prod@v2 13 | with: 14 | GH_PAGES_BRANCH: gh-pages 15 | # equivalent to the Bikeshed CLI argument, `--die-on=link-error` 16 | BUILD_FAIL_ON: link-error 17 | check-cddl-consistency: 18 | name: Verify that the generated CDDL files are in sync with the inline CDDL 19 | runs-on: ubuntu-22.04 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: actions/setup-node@v4 23 | with: 24 | node-version: 18.x 25 | - name: Extract CDDL 26 | run: npm run extract-schemas 27 | - name: Verify that the generated files match the in-line CDDL 28 | run: git diff --exit-code 29 | check-cddl-validity: 30 | name: Verify that the generated CDDL files are valid 31 | runs-on: ubuntu-22.04 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Get cddl version 35 | run: curl -s https://crates.io/api/v1/crates/cddl | python3 -c "import sys, json; print(json.load(sys.stdin)['crate']['max_stable_version'])" | tee cddl.txt 36 | - name: "Cache rust binaries" 37 | uses: actions/cache@v3 38 | id: cache-cddl 39 | env: 40 | cache-name: cache-cddl 41 | with: 42 | path: | 43 | ~/.cargo/bin/ 44 | ~/.cargo/git/db/ 45 | ~/.cargo/registry/cache/ 46 | ~/.cargo/registry/index/ 47 | ./target/ 48 | key: cddl-${{ hashFiles('cddl.txt') }} 49 | - uses: actions-rs/toolchain@v1 50 | with: 51 | toolchain: stable 52 | - name: Validate CDDL files 53 | run: ./scripts/test-cddl.sh 54 | check-json-schema-validity: 55 | name: Verify that the JSON schema files are valid 56 | runs-on: ubuntu-22.04 57 | steps: 58 | - uses: actions/checkout@v4 59 | - uses: actions/setup-node@v4 60 | with: 61 | node-version: 18 62 | - run: npm ci 63 | - name: Validate JSON Schema files 64 | run: ./scripts/validate-json-schema.js 65 | -------------------------------------------------------------------------------- /schemas/at-driver-remote.cddl: -------------------------------------------------------------------------------- 1 | Command = { 2 | id: uint, 3 | CommandData, 4 | Extensible, 5 | } 6 | 7 | CommandData = ( 8 | SessionCommand // 9 | SettingsCommand // 10 | InteractionCommand 11 | ) 12 | 13 | EmptyParams = { Extensible } 14 | 15 | Extensible = { 16 | *text => any 17 | } 18 | 19 | SessionCommand = (SessionNewCommand) 20 | 21 | CapabilitiesRequest = { 22 | ?atName: text, 23 | ?atVersion: text, 24 | ?platformName: text, 25 | Extensible, 26 | } 27 | 28 | SessionNewCommand = { 29 | method: "session.new", 30 | params: {capabilities: CapabilitiesRequestParameters}, 31 | } 32 | 33 | CapabilitiesRequestParameters = { 34 | ?alwaysMatch: CapabilitiesRequest, 35 | } 36 | 37 | SettingsCommand = { 38 | SettingsSetSettingsCommand // 39 | SettingsGetSettingsCommand // 40 | SettingsGetSupportedSettingsCommand 41 | } 42 | 43 | SettingsSetSettingsCommand = { 44 | method: "settings.setSettings", 45 | params: SettingsSetSettingsParameters 46 | } 47 | 48 | SettingsSetSettingsParameters = { 49 | settings: [1* SettingsSetSettingsParametersItem ], 50 | } 51 | 52 | SettingsSetSettingsParametersItem = { 53 | name: text, 54 | value: any, 55 | Extensible, 56 | } 57 | 58 | SettingsGetSettingsCommand = { 59 | method: "settings.getSettings", 60 | params: SettingsGetSettingsParameters 61 | } 62 | 63 | SettingsGetSettingsParameters = { 64 | settings: [1* SettingsGetSettingsParametersItem ], 65 | } 66 | 67 | SettingsGetSettingsParametersItem = { 68 | name: text, 69 | Extensible, 70 | } 71 | 72 | SettingsGetSupportedSettingsCommand = { 73 | method: "settings.getSupportedSettings", 74 | params: EmptyParams 75 | } 76 | 77 | InteractionCommand = (InteractionUserIntentCommand) 78 | 79 | InteractionUserIntentCommand = { 80 | method: "interaction.userIntent", 81 | params: InteractionUserIntentParameters 82 | } 83 | 84 | InteractionUserIntentParameters = ( 85 | PressKeysIntentParameters / 86 | ExtensionIntentParameters 87 | ) 88 | 89 | PressKeysIntentParameters = { 90 | "name" => "pressKeys", 91 | "keys" => KeyCombination, 92 | } 93 | 94 | KeyCombination = [ 95 | 1* text 96 | ] 97 | 98 | ExtensionIntentParameters = { 99 | "name" => text, 100 | Extensible, 101 | } 102 | -------------------------------------------------------------------------------- /schemas/at-driver-local.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "TODO", 4 | "$defs": { 5 | "Message": { 6 | "oneOf": [ 7 | { "$ref": "#/$defs/CommandResponse" }, 8 | { "$ref": "#/$defs/ErrorResponse" }, 9 | { "$ref": "#/$defs/Event" } 10 | ] 11 | }, 12 | "CommandResponse": { 13 | "type": "object", 14 | "properties": { 15 | "id": { "type": "number" }, 16 | "result": { "$ref": "#/$defs/ResultData" } 17 | }, 18 | "required": ["id", "result"] 19 | }, 20 | "ErrorResponse": { 21 | "type": "object", 22 | "properties": { 23 | "id": { "type": [ "number", "null" ] }, 24 | "error": { "enum": [ "unknown error", "unknown command", "invalid argument", "session not created" ] }, 25 | "message": { "type": "string" }, 26 | "stacktrace": { "type": "string" } 27 | }, 28 | "required": ["id", "error", "message"] 29 | }, 30 | "ResultData": { 31 | "oneOf": [ 32 | { "$ref": "#/$defs/EmptyResult" }, 33 | { "$ref": "#/$defs/SessionResult" }, 34 | { "$ref": "#/$defs/SettingsResult" } 35 | ] 36 | }, 37 | "EmptyResult": { 38 | "type": "object", 39 | "additionalProperties": false 40 | }, 41 | "Event": { 42 | "type": "object", 43 | "allOf": [ 44 | { "$ref": "#/$defs/EventData" }, 45 | {} 46 | ] 47 | }, 48 | "EventData": { 49 | "oneOf": [ 50 | { "$ref": "#/$defs/InteractionEvent" } 51 | ] 52 | }, 53 | "SessionResult": { 54 | "oneOf": [ 55 | { "$ref": "#/$defs/SessionNewResult" } 56 | ] 57 | }, 58 | "CapabilitiesRequest": { 59 | "type": "object", 60 | "properties": { 61 | "atName": { "type": "string" }, 62 | "atVersion": { "type": "string" }, 63 | "platformName": { "type": "string" } 64 | } 65 | }, 66 | "SessionNewResult": { 67 | "type": "object", 68 | "properties": { 69 | "sessionId": { "type": "string" }, 70 | "capabilities": { 71 | "type": "object", 72 | "properties": { 73 | "atName": { "type": "string" }, 74 | "atVersion": { "type": "string" }, 75 | "platformName": { "type": "string" } 76 | }, 77 | "required": [ "atName", "atVersion", "platformName" ] 78 | } 79 | }, 80 | "required": ["sessionId", "capabilities"], 81 | "additionalProperties": false 82 | }, 83 | "SettingsResult": { 84 | "oneOf": [ 85 | { "$ref": "#/$defs/SettingsGetSettingsResult" } 86 | ] 87 | }, 88 | "SettingsGetSettingsResult": { 89 | "type": "object", 90 | "properties": { 91 | "settings": { 92 | "type": "array", 93 | "minItems": 1, 94 | "items": { "$ref": "#/$defs/SettingsGetSettingsResultItem" } 95 | } 96 | }, 97 | "required": ["settings"], 98 | "additionalProperties": false 99 | }, 100 | "SettingsGetSettingsResultItem": { 101 | "type": "object", 102 | "properties": { 103 | "name": { "type": "string" }, 104 | "value": { "oneOf": [ {"type": "null"}, {"type": "boolean"}, {"type": "object"}, {"type": "array"}, {"type": "number"}, {"type": "string"} ] } 105 | }, 106 | "required": [ "name", "value" ] 107 | }, 108 | "InteractionEvent": { 109 | "oneOf": [ 110 | { "$ref": "#/$defs/InteractionCapturedOutputEvent" } 111 | ] 112 | }, 113 | "InteractionCapturedOutputPameters": { 114 | "type": "object", 115 | "properties": { 116 | "data": { "type": "string" } 117 | }, 118 | "required": [ "data" ] 119 | }, 120 | "InteractionCapturedOutputEvent": { 121 | "type": "object", 122 | "properties": { 123 | "method": { "enum": [ "interaction.capturedOutput" ] }, 124 | "params": { "$ref": "#/$defs/InteractionCapturedOutputPameters" } 125 | }, 126 | "required": [ "method", "params" ], 127 | "additionalProperties": false 128 | } 129 | }, 130 | "$ref": "#/$defs/Message" 131 | } 132 | -------------------------------------------------------------------------------- /schemas/at-driver-remote.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "TODO", 4 | "$defs": { 5 | "Command": { 6 | "allOf": [ 7 | { 8 | "type": "object", 9 | "properties": { 10 | "id": { "type": "number" } 11 | }, 12 | "required": ["id"] 13 | }, 14 | { "$ref": "#/$defs/CommandData" } 15 | ] 16 | }, 17 | "CommandData": { 18 | "oneOf": [ 19 | { "$ref": "#/$defs/SessionCommand" }, 20 | { "$ref": "#/$defs/SettingsCommand" }, 21 | { "$ref": "#/$defs/InteractionCommand" } 22 | ] 23 | }, 24 | "EmptyParams": {}, 25 | "SessionCommand": { 26 | "oneOf": [ 27 | { "$ref": "#/$defs/SessionNewCommand" } 28 | ] 29 | }, 30 | "CapabilitiesRequest": { 31 | "type": "object", 32 | "properties": { 33 | "atName": { "type": "string" }, 34 | "atVersion": { "type": "string" }, 35 | "platformName": { "type": "string" } 36 | } 37 | }, 38 | "SessionNewCommand": { 39 | "type": "object", 40 | "properties": { 41 | "method": { "enum": [ "session.new" ] }, 42 | "params": { 43 | "type": "object", 44 | "properties": { 45 | "capabilities": { "$ref": "#/$defs/CapabilitiesRequestParameters" } 46 | }, 47 | "required": ["capabilities"], 48 | "additionalProperties": false 49 | } 50 | }, 51 | "required": ["method", "params"], 52 | "additionalProperties": false 53 | }, 54 | "CapabilitiesRequestParameters": { 55 | "type": "object", 56 | "properties": { 57 | "alwaysMatch": { "$ref": "#/$defs/CapabilitiesRequest" } 58 | }, 59 | "additionalProperties": false 60 | }, 61 | "SettingsCommand": { 62 | "oneOf": [ 63 | { "$ref": "#/$defs/SettingsSetSettingsCommand" }, 64 | { "$ref": "#/$defs/SettingsGetSettingsCommand" }, 65 | { "$ref": "#/$defs/SettingsGetSupportedSettingsCommand" } 66 | ] 67 | }, 68 | "SettingsSetSettingsCommand": { 69 | "type": "object", 70 | "properties": { 71 | "method": { "enum": [ "settings.setSettings" ] }, 72 | "params": { "$ref": "#/$defs/SettingsSetSettingsParameters" } 73 | }, 74 | "required": [ "method", "params" ], 75 | "additionalProperties": false 76 | }, 77 | "SettingsSetSettingsParameters": { 78 | "type": "object", 79 | "properties": { 80 | "settings": { 81 | "type": "array", 82 | "minItems": 1, 83 | "items": { "$ref": "#/$defs/SettingsSetSettingsParametersItem" } 84 | } 85 | }, 86 | "required": [ "settings" ], 87 | "additionalProperties": false 88 | }, 89 | "SettingsSetSettingsParametersItem": { 90 | "type": "object", 91 | "properties": { 92 | "name": { "type": "string" }, 93 | "value": { "oneOf": [ {"type": "null"}, {"type": "boolean"}, {"type": "object"}, {"type": "array"}, {"type": "number"}, {"type": "string"} ] } 94 | }, 95 | "required": [ "name", "value" ] 96 | }, 97 | "SettingsGetSettingsCommand": { 98 | "type": "object", 99 | "properties": { 100 | "method": { "enum": [ "settings.getSettings" ] }, 101 | "params": { "$ref": "#/$defs/SettingsGetSettingsParameters" } 102 | }, 103 | "required": [ "method", "params" ], 104 | "additionalProperties": false 105 | }, 106 | "SettingsGetSettingsParameters": { 107 | "type": "object", 108 | "properties": { 109 | "settings": { 110 | "type": "array", 111 | "minItems": 1, 112 | "items": { "$ref": "#/$defs/SettingsGetSettingsParametersItem" } 113 | } 114 | }, 115 | "required": [ "settings" ], 116 | "additionalProperties": false 117 | }, 118 | "SettingsGetSettingsParametersItem": { 119 | "type": "object", 120 | "properties": { 121 | "name": { "type": "string" } 122 | }, 123 | "required": [ "name" ] 124 | }, 125 | "SettingsGetSupportedSettingsCommand": { 126 | "type": "object", 127 | "properties": { 128 | "method": { "enum": [ "settings.getSupportedSettings" ] }, 129 | "params": { "$ref": "#/$defs/EmptyParams" } 130 | }, 131 | "required": [ "method", "params" ], 132 | "additionalProperties": false 133 | }, 134 | "InteractionCommand": { 135 | "oneOf": [ 136 | { "$ref": "#/$defs/InteractionPressKeysCommand" } 137 | ] 138 | }, 139 | "InteractionPressKeysCommand": { 140 | "type": "object", 141 | "properties": { 142 | "method": { "enum": [ "interaction.pressKeys" ] }, 143 | "params": { "$ref": "#/$defs/InteractionPressKeysParameters" } 144 | }, 145 | "required": [ "method", "params" ], 146 | "additionalProperties": false 147 | }, 148 | "InteractionPressKeysParameters": { 149 | "type": "object", 150 | "properties": { 151 | "keys": { "$ref": "#/$defs/KeyCombination" } 152 | }, 153 | "required": [ "keys" ] 154 | }, 155 | "KeyCombination": { 156 | "type": "array", 157 | "minItems": 1, 158 | "items": { "type": "string" } 159 | } 160 | }, 161 | "$ref": "#/$defs/Command" 162 | } 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AT Driver API Explainer 2 | 3 | 4 | ## Overview 5 | 6 | [The WebDriver protocol](https://w3c.github.io/webdriver/#extensions-0) is a [W3C](https://www.w3.org/) standard created for automating web browsers. The goal of [the ARIA-AT project](https://aria-at.w3.org/) is to enable the automation of screen readers and web browsers, and for that purpose, WebDriver is insufficient. 7 | 8 | We review the needs of the ARIA-AT project’s automated tests through the lens of the WebDriver protocol. We identify which needs are already met by WebDriver and which needs require new infrastructure. Finally, we outline alternative proposals that we have considered. 9 | 10 | 11 | ## Motivating Use Cases 12 | 13 | * It is difficult for web developers to know whether a particular design pattern or web platform feature is supported by all of the accessibility stack (browser, operating system, screen reader) without manually testing. 14 | * It is difficult for screen reader implementers to compare their product’s compliance to web standards to their competitors’ products without manually testing. 15 | 16 | 17 | ## Goals 18 | 19 | * Enable automation of screen reader and web browser combinations. 20 | * Ability to start and quit the screen reader. 21 | * Ability to change settings in the screen reader in a robust way. 22 | * Ability to access the spoken output of the screen reader. 23 | * Ability to access the internal state of the screen reader, e.g. virtual focus position, mode (interaction mode vs. reading mode). 24 | * Define an API that can be consistently implemented by all screen readers. 25 | * Enable open experimentation across a large set of platforms and by a diverse group of consumers. 26 | * Do not duplicate functionality where WebDriver is already suitable. 27 | 28 | 29 | ## Requirements 30 | 31 | Web developers will need to trigger specific behaviors in web browsers and screen readers. This section enumerates a minimal set of such behaviors. It differentiates the features which are already available through standard interfaces (as of 2023) from those which can only be accessed through proprietary interfaces (if at all). 32 | 33 | 34 | ### Start browser (already possible today) 35 | 36 | Web developers will need to create web browser instances before testing can begin. 37 | 38 | The WebDriver protocol already provides this feature through [its "New Session" command](https://w3c.github.io/webdriver/#new-session). 39 | 40 | 41 | ### Visit web page (already possible today) 42 | 43 | Web developers will need to navigate web browser instances to documents designed to demonstrate accessibility behaviors. 44 | 45 | The WebDriver protocol already provides this feature through [its "Navigate To" command](https://w3c.github.io/webdriver/#navigate-to). 46 | 47 | 48 | ### Quit browser (already possible today) 49 | 50 | Web developers will need to destroy web browser instances after testing is complete. 51 | 52 | The WebDriver protocol already provides this feature through [its "Delete Session" command](https://w3c.github.io/webdriver/#delete-session). 53 | 54 | 55 | ### Configure screen reader 56 | 57 | Web developers will need to set initial conditions for the screen reader under test, e.g. instructing the screen reader to convey mode changes in speech instead of via sound files. 58 | 59 | The WebDriver protocol already provides a mechanism for altering characteristics of the testing session via [its "Capabilities" mechanism](https://w3c.github.io/webdriver/#capabilities), but screen reader settings are not included in the set of capabilities. 60 | 61 | 62 | ### Press keyboard keys 63 | 64 | Web developers will need to simulate keyboard key presses which can be received by a screen reader. 65 | 66 | Although the WebDriver protocol already provides two commands for simulating key presses (["Element Send Keys"](https://w3c.github.io/webdriver/#element-send-keys) and ["Perform Actions"](https://w3c.github.io/webdriver/#dfn-perform-actions)), those commands are unsuitable for the purposes of AT Driver because they operate by simulating input within the web browser instance. The operating system and screen reader cannot observe keyboard interaction simulated using these commands. 67 | 68 | ### Inspect screen reader internal state 69 | 70 | Web developers will need to verify certain aspects of screen readers' status which are not directly observable by end users. The properties of interest have not yet been identified (nor have they been standardized across screen readers), but they may include, for instance, whether [so-called "modal" screen readers](https://github.com/w3c/aria-at/wiki/Screen-Reader-Terminology-Translation) are in "interaction mode" or "reading mode." 71 | 72 | WebDriver currently does not provide a mechanism for retrieving this information. 73 | 74 | 75 | ### Observe spoken text 76 | 77 | Web developers will need to verify the correctness of the words that the screen reader vocalizes to the end user. The screen reader under test may attempt to vocalize at any moment (e.g. due to [ARIA live regions](https://www.w3.org/TR/wai-aria/#dfn-live-region) or due to screen reader implementation bugs), and this unique aspect warrants special consideration when evaluating potential paths forward. 78 | 79 | WebDriver currently does not provide a mechanism for retrieving this information. 80 | 81 | 82 | ## Proposal: Specify a new service to compliment WebDriver 83 | 84 | The required functionality which is _not_ available in WebDriver could be provided by a new service. In the immediate term, this service could be implemented as a standalone process, similar to the “WebDriver servers” which operate in parallel to the web browsers under test. In contrast to the alternatives described below, the software architecture would not necessarily be restricted by the design of the WebDriver standard, potentially reducing the overall complexity. 85 | 86 | As the solution matures, it could become a part of the assistive technology itself, obviating the need for an additional process. In the longer term, developer ergonomics could be further improved by [extending the WebDriver standard](https://w3c.github.io/webdriver/#extensions-0) with commands to integrate with this new specification. 87 | 88 | We feel this is the most promising direction for a standardization effort. 89 | 90 | 91 | ## Considered alternatives 92 | 93 | 94 | ### Extend WebDriver with desired functionality 95 | 96 | The required functionality could be specified in terms of the WebDriver standard. Initially, [the WebDriver standard’s built-in extension mechanism](https://w3c.github.io/webdriver/#extensions-0) could be used to publish normative text in a distinct document. As the text matures and becomes implemented, it might be moved into the WebDriver standard itself. 97 | 98 | We have chosen not to proceed in this direction because it extends the responsibilities of the WebDriver server in a way that the maintainers are unlikely to support. In addition to integrating with their respective web browser, implementations like [GeckoDriver](https://github.com/mozilla/geckodriver) and [ChromeDriver](https://chromedriver.chromium.org/) would need to integrate with every available screen reader. 99 | 100 | A second hurdle to this approach concerns the state of the relevant standard. The particular needs of spoken text retrieval could not be met by the WebDriver standard in its current form. Standardizing this feature would require extending [WebDriver BiDi, the bi-directional version of WebDriver](https://w3c.github.io/webdriver-bidi/). WebDriver BiDi is still being designed in 2023, making it a more volatile basis for extension. 101 | 102 | 103 | ### Extend one or more existing WebDriver servers 104 | 105 | The required functionality could be built into an existing WebDriver server (e.g. [GeckoDriver](https://github.com/mozilla/geckodriver) or [ChromeDriver](https://chromedriver.chromium.org/)) using appropriate vendor prefixes. This would limit the number of subsystems involved, and like any reduction in complexity, it would help mitigate bugs. 106 | 107 | We have chosen not to proceed in this direction because while we are interested in testing multiple browsers, integrating with multiple browsers directly (instead of building "on top" of multiple browsers) will increase the development effort required to achieve that goal. Further, none of the participants in this effort have the expertise necessary to rapidly implement a solution so tightly-coupled to the existing technology. 108 | 109 | 110 | ### Build a tool which integrates with operating systems to observe and control screen readers 111 | 112 | Some of the required functionality could be provided by a tool that does not integrate with screen reader directly. Such a tool could give instructions to the screen reader by simulating keyboard key presses at the level of the operating system (OS). Also at the OS-level, the tool could implement a text-to-speech "voice" which exposes the vocalizations as a stream of textual data. A general audience could benefit from this work if the source code and documentation were published under a free-and-open-source-software license. 113 | 114 | It is unclear whether some requirements (namely, configuring screen readers and observing their state) could be satisfied using this approach because there are no consistent operating-system-level facilities for these features. Even within the subset of required capabilities which can be realized via these means, the absence of a standard would undermine stability, and the commitment to a concrete implementation would limit adoption. While we recognize that this approach may yield helpful implementation experience in advance of consensus around a standard (see [the at-driver-driver project](https://github.com/w3c/at-driver-driver)), we recognize that it is fundamentally insufficient. 115 | 116 | 117 | ### Promote nascent standard for introspecting accessibility properties 118 | 119 | [The Accessibility Object Model](https://wicg.github.io/aom/) is an effort whose goal is "to create a JavaScript API to allow developers to modify (and eventually explore) the accessibility tree for an HTML page." Developers empowered in this way could validate their code in terms of the data structure which the browser provides to the screen reader, giving them some confidence about the accessibility of their work. 120 | 121 | We have chosen not to proceed in this direction because we believe developers would be well-served by being able to observe the complete user experience *alongside* the low-level accessibility primitives. Assistive technologies play a critical role in shaping user experience, and this proposal's "end-to-end" nature (which encompasses the assistive technology in addition to the developer's code and the web browser) will give developers insight into that experience. Also, because this proposal exposes information about the end-user's experience (rather than a diagnostic data structure), we expect more people will be able to participate in the design and maintenance of systems built on it. 122 | 123 | 124 | ## References 125 | 126 | * [ARIA-AT](https://aria-at.w3.org/) 127 | * [WebDriver](https://w3c.github.io/webdriver/) 128 | 129 | ## Implementations 130 | 131 | **[Generic Driver](https://github.com/w3c/aria-at-automation-driver)** 132 | A partial implementation which integrates with operating system APIs rather than any particular assistive technology. 133 | 134 | **[NVDA AT Automation](https://github.com/Prime-Access-Consulting/nvda-at-automation)** 135 | An implementation which integrates with [the NVDA screen reader](https://www.nvaccess.org/about-nvda/) via its "addon" system. 136 | -------------------------------------------------------------------------------- /index.bs: -------------------------------------------------------------------------------- 1 | 2 | Title: AT Driver 3 | Shortname: at-driver 4 | Level: 1 5 | Status: ED 6 | Group: browser-testing-tools 7 | Repository: w3c/at-driver 8 | URL: https://w3c.github.io/at-driver/ 9 | Editor: Mike Pennisi, Bocoup https://bocoup.com, mike@bocoup.com 10 | Former Editor: Simon Pieters, Bocoup https://bocoup.com, simon@bocoup.com 11 | Abstract: A protocol for introspection and remote control of assistive technology software 12 | Markup Shorthands: markdown yes 13 | 14 | 15 | 16 | spec:infra; type:dfn; for:/; text:set 17 | spec:infra; type:dfn; text:list 18 | 19 | 20 | 27 | 28 | 32 | 33 | 47 | 48 | Introduction {#intro} 49 | ===================== 50 | 51 | AT Driver defines a protocol for introspection and remote control of assistive technology software, using a bidirectional communication channel. 52 | 53 | Explainer {#explainer} 54 | ====================== 55 | 56 | Specify a protocol using WebSocket that maximally reuses concepts and conventions from [WebDriver BiDi](https://w3c.github.io/webdriver-bidi/). 57 | 58 | A connection has two endpoints: remote and local. The remote end can control and read from the screen reader, which can either be implemented as a standalone application or be implemented as part of the AT software. The local end is what the test interfaces with, usually in the form of language-specific libraries providing an API. 59 | 60 | There should only be the WebSocket form of communication -- as in [BiDi-only sessions for WebDriver BiDi](https://w3c.github.io/webdriver-bidi/#supports-bidi-only-sessions). 61 | 62 | A connection can have 0 or more [=sessions=]. Each session corresponds to an instance of an AT. We may limit the maximum number of sessions per AT to 1 initially. 63 | 64 | When a remote end supports multiple sessions, it does not necessarily mean that there will be multiple ATs running at the same time in the same instance of an OS. Some ATs might not be able to function properly if there are other ATs running at the same time. The AT Driver [=session=] concept can still be used by having the remote end run in a separate environment and each AT is run in its own OS instance (for example in a virtual machine), and the remote end proxies messages in some fashion. 65 | 66 | Commands are grouped into [=modules=]. The modules could be: Sessions, Settings, Actions. 67 | 68 | Message transport is provided using the WebSocket protocol. 69 | 70 | The protocol is defined using a Concise Data Definition Language (CDDL) definition. The serialization is JSON. 71 | 72 | Example {#explainer-example} 73 | ---------------------------- 74 | 75 | First, the local end would establish a WebSocket connection. 76 | 77 | The local end then creates a session by sending 78 | 79 | ```json 80 | {"method":"session.new","params":{...}} 81 | ``` 82 | 83 | The local end can then send commands to change settings or send key press actions for that session. The local end assigns a command id (which is included in the message). The remote end sends a message back with the result and the command id, so the local end knows which command the message applies to. 84 | 85 | When the screen reader speaks, the remote end will send a message as to the local end with the spoken text. This could be in the form of an event, which is not tied to any particular command. 86 | 87 | Infrastructure {#infra} 88 | ======================= 89 | 90 | This specification depends on the Infra Standard. [[!INFRA]] 91 | 92 | Network protocol messages are defined using CDDL. [[!RFC8610]] 93 | 94 | A Universally Unique Identifier (UUID) is a 128 bits long URN that requires no central registration process. Generating a UUID means creating a UUID Version 4 value and converting it to the string representation. [[!RFC9562]] 95 | 96 | Where algorithms that return values are fallible, they are written in terms of returning either success or error. A [=success=] value has an associated data field which encapsulates the value returned, whereas an [=error=] response has an associated [=error code=]. 97 | 98 | When calling a fallible algorithm, the construct "Let |result| be the result of trying to call |algorithm|" is equivalent to: 99 | 100 | 1. Let |temp| be the result of calling |algorithm|. 101 | 2. If |temp| is an [=error=], then return |temp|. Otherwise, let |result| be |temp|'s data field. 102 | 103 | Note: This means that errors are propagated upwards when using "trying". 104 | 105 | Nodes {#nodes} 106 | ============== 107 | 108 | The AT Driver protocol consists of communication between: 109 | 110 | : local end 111 | :: The local end represents the client side of the protocol, which is usually in the form of language-specific libraries providing an API on top of the AT Driver protocol. This specification does not place any restrictions on the details of those libraries above the level of the wire protocol. 112 | : remote end 113 | :: The remote end hosts the server side of the protocol. The remote end is responsible for driving and listening to the assistive technology and sending information to the local end as defined in this specification. 114 | 115 | Protocol {#protocol} 116 | ==================== 117 | 118 | This section defines the basic concepts of the AT Driver protocol. These terms are distinct from their representation at the transport layer. 119 | 120 | The protocol is defined using a CDDL definition. For the convenience of implementors two separate CDDL definitions are defined; the remote end definition which defines the format of messages produced on the [=local end=] and consumed on the [=remote end=], and the local end definition which defines the format of messages produced on the [=remote end=] and consumed on the [=local end=]. 121 | 122 | 123 | Definition {#protocol-definition} 124 | --------------------------------- 125 | 126 | This section gives the initial contents of the remote end definition and local end definition. These are augmented by the definition fragments defined in the remainder of the specification. 127 | 128 | {^Remote end definition^} 129 | 130 | 131 | Command = { 132 | id: uint, 133 | CommandData, 134 | Extensible, 135 | } 136 | 137 | CommandData = ( 138 | SessionCommand // 139 | SettingsCommand // 140 | InteractionCommand 141 | ) 142 | 143 | EmptyParams = { Extensible } 144 | 145 | 146 | {^Local end definition^}: 147 | 148 | 149 | Message = ( 150 | CommandResponse // 151 | ErrorResponse // 152 | Event 153 | ) 154 | 155 | CommandResponse = { 156 | id: uint, 157 | result: ResultData, 158 | Extensible, 159 | } 160 | 161 | ErrorResponse = { 162 | id: uint / null, 163 | error: "unknown error" / "unknown command" / "invalid argument" / "session not created", 164 | message: text, 165 | ?stacktrace: text, 166 | Extensible, 167 | } 168 | 169 | ResultData = ( 170 | EmptyResult / 171 | SessionResult / 172 | SettingsResult 173 | ) 174 | 175 | EmptyResult = {} 176 | 177 | Event = { 178 | EventData, 179 | Extensible, 180 | } 181 | 182 | EventData = ( 183 | InteractionEvent 184 | ) 185 | 186 | 187 | {^Remote end definition^} and {^local end definition^}: 188 | 189 | 190 | Extensible = { 191 | *text => any 192 | } 193 | 194 | 195 | Capabilities {#protocol-capabilities} 196 | ------------------------------------- 197 | 198 | Capabilities are used to communicate the features supported by a given implementation. The [=local end=] may use capabilities to define which features it requires the [=remote end=] to satisfy when creating a new [=session=]. Likewise, the [=remote end=] uses capabilities to describe the full feature set for a [=session=]. 199 | 200 | The following table of standard capabilities enumerates the capabilities each implementation must support. 201 | 202 | 203 | 204 | 205 | 210 | 215 | 220 |
Standard capabilities
Capability 206 | Key 207 | Value type 208 | Description 209 |
AT name 211 | "`atName`" 212 | [=string=] 213 | Identifies the assistive technology. 214 |
AT version 216 | "`atVersion`" 217 | [=string=] 218 | Identifies the version of the assistive technology. 219 |
Platform 221 | "`platformName`" 222 | [=string=] 223 | Identifies the operating system of the [=remote end=]. 224 |
225 | 226 | [=Remote ends=] may introduce extension capabilities that are extra capabilities used to provide configuration or fulfill other vendor-specific needs. Extension capabilities' key must contain a "`:`" (colon) character, denoting an implementation specific namespace. The value can be arbitrary JSON types. 227 | 228 |
229 | 230 | To process capabilities with argument |parameters|, the [=remote end=] must: 231 | 232 | 1. If |parameters|["`capabilities`"] [=map/exists=] and |parameters|["`capabilities`"]["`alwaysMatch`"] [=map/exists=]: 233 | 1. Let |required capabilities| be |parameters|["`capabilities`"]["`alwaysMatch`"]. 234 | 2. Otherwise: 235 | 1. Let |required capabilities| be a new [=map=]. 236 | 3. Return the result of [=match capabilities=] given |required capabilities|. 237 | 238 |
239 | 240 |
241 | 242 | To match capabilities given |requested capabilities|, the [=remote end=] must: 243 | 244 | 1. Let |matched capabilities| be a [=map=] with the following entries: 245 | : "`atName`" 246 | :: [=ASCII lowercase=] name of the assistive technology as a [=string=]. 247 | : "`atVersion`" 248 | :: The assistive technology version, as a [=string=]. 249 | : "`platformName`" 250 | :: [=ASCII lowercase=] name of the current platform as a [=string=]. 251 | 2. Optionally add [=extension capabilities=] as entries to |matched capabilities|. 252 | 3. [=map/For each=] |key| → |value| of |requested capabilities|: 253 | 1. Let |match value| be |value|. 254 | 2. Switch on |key|: 255 | : "`atName`" 256 | :: If |value| is not equal to |matched capabilities|["`atName`"], then return [=success=] with data null. 257 | : "`atVersion`" 258 | :: Compare |value| to |matched capabilities|["`browserVersion`"] using an [=implementation-defined=] comparison algorithm. The comparison is to accept a value that places constraints on the version using the "`<`", "`<=`", "`>`", and "`>=`" operators. 259 | : "`platformName`" 260 | :: If |value| is not equal to |matched capabilities|["`platformName`"], then return [=success=] with data null. 261 | : Otherwise 262 | :: If |key| is the key of an [=extension capability=], set |match value| to the result of [=trying=] implementation-specific steps to match on |key| with |value|. If the match is not successful, return [=success=] with data null. 263 | 3. [=map/Set=] |matched capabilities|[|key|] to |match value|. 264 | 4. Return [=success=] with data |matched capabilities|. 265 | 266 |
267 | 268 | Session {#protocol-session} 269 | --------------------------- 270 | 271 | A session represents the connection between a [=local end=] and a specific [=remote end=]. 272 | 273 | A [=remote end=] has an associated list of active sessions, which is a list of all [=sessions=] that are currently started. A remote end has at most one [=active session=] at a given time. 274 | 275 | A [=session=] has an associated session ID (a string representation of a [=UUID=]) used to uniquely identify this session. Unless stated otherwise it is null. 276 | 277 | 278 | Modules {#protocol-modules} 279 | --------------------------- 280 | 281 | The AT Driver protocol is organized into modules. 282 | 283 | Each module represents a collection of related [=commands=] and [=events=] pertaining to a certain aspect of the assistive technology. 284 | 285 | Each module has a module name which is a string. The [=command name=] and [=event name=] for commands and events defined in the module start with the [=module name=] followed by a period "`.`". 286 | 287 | Modules which contain [=commands=] define {^remote end definition^} fragments. 288 | 289 | An implementation may define extension modules. These must have a module name that contains a single colon "`:`" character. The part before the colon is the prefix; this is typically the same for all extension modules specific to a given implementation and should be unique for a given implementation. Such modules extend the {^local end definition^} and {^remote end definition^} providing additional groups as choices for the defined [=commands=] and [=events=]. 290 | 291 | Commands {#protocol-commands} 292 | ----------------------------- 293 | 294 | A command is an asynchronous operation, requested by the [=local end=] and run on the [=remote end=], resulting in either a [=success=] or an [=error=] being returned to the [=local end=]. Multiple commands can run at the same time, and commands can potentially be long-running. As a consequence, commands can finish out-of-order. 295 | 296 | Each [=command=] is defined by: 297 | 298 | * A command type which is defined by a {^remote end definition^} fragment containing a group. Each such group has two fields: 299 | * `method` which is a string literal in the form `[module name].[method name]`. This is the command name. 300 | * `params` which defines a mapping containing data that to be passed into the command. The populated value of this map is the command parameters. 301 | * A result type, which is defined by the {^local end definition^} fragment. 302 | * A set of remote end steps which define the actions to take for a command given a [=session=] and [=command parameters=] and return an instance of the command [=result type=]. 303 | 304 | A command that can run without an active session is a static command. Commands are not static commands unless stated in their definition. 305 | 306 | When commands are sent from the [=local end=] they have a command id. This is an identifier used by the [=local end=] to identify the response from a particular command. From the point of view of the [=remote end=] this identifier is opaque and cannot be used internally to identify the command. 307 | 308 | The set of all command names is a [=set=] containing all the defined [=command names=], including any belonging to [=extension modules=]. 309 | 310 | Events {#protocol-events} 311 | ------------------------- 312 | 313 | An event is a notification, sent by the [=remote end=] to the [=local end=], signaling that something of interest has occurred on the [=remote end=]. 314 | 315 | * An event type is defined by a {^local end definition^} fragment containing a group. Each such group has two fields: 316 | * `method` which is a string literal of the form `[module name].[event name]`. This is the event name. 317 | * `params` which defines a mapping containing event data. The populated value of this map is the event parameters. 318 | * A remote end event trigger which defines when the event is triggered and steps to construct the [=event type=] data. 319 | 320 | Errors {#protocol-errors} 321 | ------------------------- 322 | 323 | The following table lists each error code, its associated JSON `error` code, and a non-normative description of the error. 324 | 325 | 326 | 327 | 328 | 332 | 336 | 340 | 344 | 348 | 352 | 356 |
Error codes
Error code 329 | JSON error code 330 | Description 331 |
invalid argument 333 | `invalid argument` 334 | The arguments passed to a [=command=] are either invalid or malformed. 335 |
invalid session id 337 | `invalid session id` 338 | The [=session=] either does not exist or it's not active. 339 |
unknown command 341 | `unknown command` 342 | A [=command=] could not be executed because the [=remote end=] is not aware of it. 343 |
session not created 345 | `session not created` 346 | A new [=session=] could not be created. 347 |
unknown user intent 349 | `unknown user intent` 350 | The remote end does not support a user intent with the provided name. 351 |
cannot simulate keyboard interaction 353 | `cannot simulate keyboard interaction` 354 | The [=remote end=] cannot simulate keyboard interaction. 355 |
invalid OS focus state 357 | `invalid OS focus state` 358 | The application that currently has OS focus is not one of the expected applications. 359 |
360 | 361 | Security checks {#security-checks} 362 | ---------------------------------- 363 | 364 | In order to mitigate security risks when using this API, there are some security checks for certain commands. 365 | 366 | To check that keyboard interaction can be simulated: 367 | 368 | 1. If the remote end cannot simulate keyboard interaction for any [=implementation-defined=] reason, then return an [=error=] with [=error code=] [=cannot simulate keyboard interaction=]. 369 | 2. Return [=success=] with data null. 370 | 371 | To check that one of the expected applications has focus: 372 | 373 | 1. If the application that currently has OS focus (and so could act on simulated key presses from this API) is not one of the expected applications, then return an error with error code invalid OS focus state. Which applications are expected is implementation-defined. 374 | 375 | Issue(77): Is the "OS focus" check a viable security restriction for "send keys"? 376 | 2. Return success with data null. 377 | 378 | To determine whether a string |text| should be withheld: 379 | 380 | 1. If the [=remote end=] determines that it is unsafe to expose |text| to an external process for any [=implementation-defined=] reason: 381 | 2. Return true. 382 | 2. Return false. 383 | 384 | Transport {#transport} 385 | ====================== 386 | 387 | Message transport is provided using the WebSocket protocol. [[!RFC6455]] 388 | 389 | A WebSocket listener is a network endpoint that is able to accept incoming WebSocket connections. 390 | 391 | A [=WebSocket listener=] has a host, 392 | a port, 393 | and a secure flag. 394 | 395 | When a [=WebSocket listener=] |listener| is created, a [=remote end=] must start to listen for WebSocket connections on the host and port given by |listener|'s host and port. If |listener|'s [=secure flag=] is set, then connections established from |listener| must be TLS encrypted. 396 | 397 | A [=remote end=] has a [=set=] of [=WebSocket listeners=] active listeners, which is initially empty. 398 | 399 | A [=remote end=] has a [=set=] of WebSocket connections not associated with a session, which is initially empty. 400 | 401 | A WebSocket connection is a network connection that follows the requirements of the WebSocket protocol. [[!RFC6455]] 402 | 403 | A [=session=] has a [=set=] of session WebSocket connections whose elements are [=WebSocket connections=]. This is initially empty. 404 | 405 | A [=session=] |session| is associated with connection |connection| if |session|'s [=session WebSocket connections=] contains |connection|. 406 | 407 | Note: Each [=WebSocket connection=] is associated with at most one [=session=]. 408 | 409 | When a client [establishes a WebSocket connection](https://tools.ietf.org/html/rfc6455#section-4.1) |connection| by connecting to one of the set of [=active listeners=] |listener|, the implementation must proceed according to the [WebSocket server-side requirements](https://tools.ietf.org/html/rfc6455#section-4.2), with the following steps run when deciding whether to accept the incoming connection: 410 | 411 | 1. Let |resource name| be the resource name from [reading the client's opening handshake](https://tools.ietf.org/html/rfc6455#section-4.2.1). If |resource name| is not "`/session`", then stop running these steps and act as if the requested service is not available. 412 | 2. Run any other [=implementation-defined=] steps to decide if the connection should be accepted, and if it is not stop running these steps and act as if the requested service is not available. 413 | 3. Add the connection to the set of [=WebSocket connections not associated with a session=]. 414 | 415 | When a [WebSocket message has been received](https://tools.ietf.org/html/rfc6455#section-6.2) for a [=WebSocket connection=] |connection| with type |type| and data |data|, a [=remote end=] must [=handle an incoming message=] given |connection|, |type| and |data|. 416 | 417 | When the [WebSocket closing handshake is started](https://tools.ietf.org/html/rfc6455#section-7.1.3) or when the [WebSocket connection is closed](https://tools.ietf.org/html/rfc6455#section-7.1.4) for a [=WebSocket connection=] |connection|, a [=remote end=] must [=handle a connection closing=] given |connection|. 418 | 419 | Note: Both conditions are needed because it is possible for a WebSocket connection to be closed without a closing handshake. 420 | 421 | To start listening for a WebSocket connection: 422 | 423 | 1. Let |listener| be a new [=WebSocket listener=] with [=implementation-defined=] host, port, and secure flag. 424 | 2. Append |listener| to the [=remote end=]'s [=active listeners=]. 425 | 3. Return |listener|. 426 | 427 | Note: a future iteration of this specification may allow multiple connections, to support [intermediary nodes like in WebDriver](https://w3c.github.io/webdriver/#dfn-intermediary-nodes). 428 | 429 |
430 | 431 | To handle an incoming message given a [=WebSocket connection=] |connection|, type |type| and data |data|: 432 | 433 | 1. If |type| is not [text](https://tools.ietf.org/html/rfc6455#section-5.2): 434 | 1. [=Send an error response=] given |connection|, null, and invalid argument. 435 | 2. Return. 436 | 2. [=Assert=]: |data| is a [=scalar value string=], because the [WebSocket handling errors in UTF-8-encoded data](https://tools.ietf.org/html/rfc6455#section-8.1) would already have [failed the WebSocket connection](https://tools.ietf.org/html/rfc6455#section-7.1.7) otherwise. 437 | 3. If there is a [=session=] [=associated with connection=] |connection|, let |session| be that session. Otherwise if |connection| is in the set of [=WebSocket connections not associated with a session=], let |session| be null. Otherwise, return. 438 | 4. Let |parsed| be the result of [=parsing JSON into Infra values=] given |data|. If this throws an exception, then [=send an error response=] given |connection|, null, and invalid argument, and finally return. 439 | 5. Match |parsed| against the {^remote end definition^}. If this results in a match: 440 | 1. Let |matched| be the map representing the matched data. 441 | 2. [=Assert=]: |matched| contains "`id`", "`method`", and "`params`". 442 | 3. Let |command id| be |matched|["`id`"]. 443 | 4. Let |method| be |matched|["`method`"]. 444 | 5. Let |command| be the command with [=command name=] |method|. 445 | 6. If |session| is null and |command| is not a [=static command=]: 446 | 1. [=Send an error response=] given |connection|, |command id|, and [=invalid session id=]. 447 | 2. Return. 448 | 7. Run the following steps [=in parallel=]: 449 | 1. Let |result| be the result of running the [=remote end steps=] for |command| given |session| and [=command parameters=] |matched|["`params`"]. 450 | 2. If |result| is an [=error=]: 451 | 1. [=Send an error response=] given |connection|, |command id|, and |result|'s [=error code=]. 452 | 2. Return. 453 | 3. Let |value| be |result|'s data. 454 | 4. [=Assert=]: |value| matches the definition for the [=result type=] corresponding to the command with [=command name=] |method|. 455 | 5. If |method| is "`session.new`": 456 | 1. Let |session| be the entry in the list of [=active sessions=] whose [=session ID=] is equal to the "`sessionId`" property of |value|. 457 | 2. [=set/Append=] |connection| to |session|'s [=session WebSocket connections=]. 458 | 3. Remove |connection| from the set of [=WebSocket connections not associated with a session=]. 459 | 6. Let |response| be a new map matching the `CommandResponse` production in the {^local end definition^} with the `id` field set to |command id| and the `value` field set to |value|. 460 | 7. Let |serialized| be the result of [=serialize an infra value to JSON bytes=] given |response|. 461 | 8. [Send a WebSocket message](https://tools.ietf.org/html/rfc6455#section-6.1) comprised of |serialized| over |connection|. 462 | 6. Otherwise: 463 | 1. Let |command id| be null. 464 | 2. If |parsed| is a [=map=] and |parsed|["`id`"] exists and is an integer greater than or equal to zero, set |command id| to that integer. 465 | 3. Let |error code| be invalid argument. 466 | 4. If |parsed| is a [=map=] and |parsed|["`method`"] exists and is a string, but |parsed|["`method`"] is not in the [=set of all command names=], set |error code| to [=unknown command=]. 467 | 5. [=Send an error response=] given |connection|, |command id|, and |error code|. 468 | 469 |
470 | 471 |
472 | 473 | To emit an event given |session|, and |body|: 474 | 475 | 1. [=Assert=]: |body| has size 2 and contains "`method`" and "`params`". 476 | 2. Let |serialized| be the result of [=serialize an infra value to JSON bytes=] given |body|. 477 | 3. [=list/For each=] |connection| in |session|'s [=session WebSocket connections=]: 478 | 1. [Send a WebSocket message](https://tools.ietf.org/html/rfc6455#section-6.1) comprised of |serialized| over |connection|. 479 | 480 |
481 | 482 |
483 | 484 | To send an error response given a [=WebSocket connection=] |connection|, |command id|, and |error code|: 485 | 486 | 1. Let |error data| be a new [=map=] matching the `ErrorResponse` production in the {^local end definition^}, with the `id` field set to |command id|, the `error` field set to |error code|, the `message` field set to an [=implementation-defined=] string containing a human-readable definition of the error that occurred and the `stacktrace` field optionally set to an [=implementation-defined=] string containing a stack trace report of the active stack frames at the time when the error occurred. 487 | 2. Let |response| be the result of [=serialize an infra value to JSON bytes=] given |error data|. 488 | 489 | Note: |command id| can be null, in which case the `id` field will also be set to null, not omitted from |response|. 490 | 3. [Send a WebSocket message](https://tools.ietf.org/html/rfc6455#section-6.1) comprised of |response| over |connection|. 491 | 492 |
493 | 494 |
495 | 496 | To handle a connection closing given a [=WebSocket connection=] |connection|: 497 | 498 | 1. If there is a [=session=] [=associated with connection=] |connection|: 499 | 1. Let |session| be the [=session=] [=associated with connection=] |connection|. 500 | 2. Remove |connection| from |session|'s [=session WebSocket connections=]. 501 | 3. If |session|'s [=session WebSocket connections=] is [=list/empty=]: 502 | 1. Remove |session| from [=active sessions=]. 503 | 2. Otherwise, if the set of [=WebSocket connections not associated with a session=] contains |connection|, remove |connection| from that set. 504 | 505 |
506 | 507 | Establishing a Connection {#transport-establishing} 508 | ---------------------------------------------------- 509 | 510 | The URL to the WebSocket server is communicated out-of-band. When an implementation is ready to accept requests to start an AT Driver session, it must: 511 | 512 | 1. [=Start listening for a WebSocket connection=]. 513 | 514 | 515 | Modules {#modules} 516 | ================== 517 | 518 | The session Module {#module-session} 519 | ------------------------------------ 520 | 521 | ### Definition ### {#module-session-definition} 522 | 523 | {^Remote end definition^}: 524 | 525 | 526 | SessionCommand = (SessionNewCommand) 527 | 528 | 529 | {^Local end definition^} 530 | 531 | 532 | SessionResult = (SessionNewResult) 533 | 534 | 535 | ### Types ### {#module-session-types} 536 | 537 | #### The session.CapabilitiesRequest Type #### {#module-session-CapabilitiesRequest} 538 | 539 | {^Remote end definition^} and {^local end definition^}: 540 | 541 | 542 | CapabilitiesRequest = { 543 | ?atName: text, 544 | ?atVersion: text, 545 | ?platformName: text, 546 | Extensible, 547 | } 548 | 549 | 550 | The `CapabilitiesRequest` type represents capabilities requested for a session. 551 | 552 | ### Commands ###{#module-session-commands} 553 | 554 | #### The session.new Command #### {#module-session-new} 555 | 556 | The session.new command allows creating a new [=session=]. This is a [=static command=]. 557 | 558 |
559 |
[=Command Type=] 560 |
561 | 562 | 563 | SessionNewCommand = { 564 | method: "session.new", 565 | params: {capabilities: CapabilitiesRequestParameters}, 566 | } 567 | 568 | CapabilitiesRequestParameters = { 569 | ?alwaysMatch: CapabilitiesRequest, 570 | } 571 | 572 | 573 | Note: `firstMatch` is not included currently to reduce complexity. 574 | 575 |
[=Result Type=] 576 |
577 | 578 | 579 | SessionNewResult = { 580 | sessionId: text, 581 | capabilities: { 582 | atName: text, 583 | atVersion: text, 584 | platformName: text, 585 | Extensible, 586 | } 587 | } 588 | 589 | 590 |
591 | 592 |
593 | 594 | The [=remote end steps=] given |session| and |command parameters| are: 595 | 596 | 1. If |session| is not null, return an [=error=] with error code [=session not created=]. 597 | 2. If the list of [=active sessions=] is not empty, then return [=error=] with error code [=session not created=]. 598 | 3. If the implementation is unable to start a new session for any reason, return an [=error=] with error code [=session not created=]. 599 | 4. Let |capabilities| be the result of [=trying=] to [=process capabilities=] with |command parameters|. 600 | 5. If |capabilities| is null, return [=error=] with error code [=session not created=]. 601 | 6. Let |session id| be the result of [=generating a UUID=]. 602 | 7. Let |session| be a new [=session=] with the [=session ID=] of |session id|. 603 | 8. Append |session| to [=active sessions=]. 604 | 9. Start an instance of the appropriate assistive technology, given |capabilities|. 605 | 10. Let |body| be a new [=map=] matching the `SessionNewResult` production, with the `sessionId` field set to |session|'s [=session ID=], and the `capabilities` field set to |capabilities|. 606 | 11. Return [=success=] with data |body|. 607 | 608 |
609 | 610 |
611 | A [=local end=] that wishes to create a new [=Session=] which is furnished with 612 | an assistive technology named `NVDA` might send a Command such as: 613 | 614 | ```json 615 | { 616 | "id": 24601, 617 | "method": "session.new", 618 | "params": { 619 | "capabilities": { 620 | "alwaysMatch": { 621 | "atName": "NVDA", 622 | "user-defined property": "user-defined value" 623 | } 624 | } 625 | } 626 | } 627 | ``` 628 | 629 | And the [=remote end=] which receives that Command could respond with the 630 | following Message: 631 | 632 | ```json 633 | { 634 | "id": 24601, 635 | "result": { 636 | "sessionId": "4f974515-cac5-4e45-9604-95d918116e83", 637 | "capabilities": { 638 | "atName": "NVDA", 639 | "atVersion": "2025.1", 640 | "platformName": "Windows" 641 | } 642 | } 643 | } 644 | ``` 645 |
646 | 647 | The settings Module {#module-settings} 648 | -------------------------------------- 649 | 650 | Currently, there are no standardized settings. Implementations are strongly encouraged to review the security implications of each setting they offer to end users, and only expose the settings that they deem safe. This specification does not define what constitutes a setting, but the settings module is designed to control user preferences such as the default voice, or the default rate of speech. 651 | 652 | A [=remote end=] has an associated set of supported settings, which is either null or a [=set=] of strings which contains the name of every setting that may be referenced by this [=module=]. 653 | 654 |
655 | 656 | To validate setting name given string |name|: 657 | 658 | 1. If [=supported settings=] is null: 659 | 1. Return `"unknown"`. 660 | 2. If [=supported settings=] [=list/contains=] |name|: 661 | 1. Return `"valid"`. 662 | 3. Return `"invalid"`. 663 | 664 |
665 | 666 |
667 | 668 | To get settings given a [=list=] of strings |names|: 669 | 670 | 1. Let |items| be a new [=list=]. 671 | 2. [=list/For each=] |name| of |names|: 672 | 1. If [=validate setting name=] given |name| is `"invalid"`: 673 | 1. Return an [=error=] with [=error code=] invalid argument. 674 | 2. Let |value| be the value of the setting named |name|. 675 | 3. Let |item| be a new [=map=] matching the `SettingsGetSettingsResultItem` production in the {^local end definition^} with the `name` field set to |name| and the `value` field set to |value|. 676 | 4. [=list/Append=] |item| to |items|. 677 | 3. Let |body| be a new [=map=] matching the `SettingsGetSettingsResult` production, with the `settings` field set to |items|. 678 | 4. Return [=success=] with data |body|. 679 | 680 |
681 | 682 |
683 | 684 | To modify setting given string |name| and |value|: 685 | 686 | 1. If [=validate setting name=] given |name| is `"invalid"`: 687 | 1. Return an [=error=] with [=error code=] invalid argument. 688 | 2. Take any [=implementation-defined=] steps to change the [=remote end=] setting named |name| to the value |value|. 689 | 3. If there is any [=implementation-defined=] indication that the setting named |name| does not hold the value |value|: 690 | 1. Return an [=error=] with [=error code=] invalid argument. 691 | 4. Return [=success=] with data null. 692 | 693 |
694 | 695 | Note: Today's implementations may not be able to detect invalid setting names or values, limiting their ability to report when operations do not model authentic interactions with the internal state. The algorithms in this [=module=] are designed to reflect this. 696 | 697 | Issue: Require implementations to maintain a static list of supported settings. 698 | 699 | ### Definition ### {#module-settings-definition} 700 | 701 | {^Remote end definition^} 702 | 703 | 704 | SettingsCommand = { 705 | SettingsSetSettingsCommand // 706 | SettingsGetSettingsCommand // 707 | SettingsGetSupportedSettingsCommand 708 | } 709 | 710 | 711 | {^Local end definition^} 712 | 713 | 714 | SettingsResult = { 715 | SettingsGetSettingsResult 716 | } 717 | 718 | 719 | ### Types ### {#module-settings-types} 720 | 721 | #### The SettingsGetSettingsResult type #### {#module-settings-get-settings-result} 722 | 723 | {^Local end definition^}: 724 | 725 | 726 | SettingsGetSettingsResult = { 727 | settings: [1* SettingsGetSettingsResultItem ], 728 | } 729 | 730 | SettingsGetSettingsResultItem = { 731 | name: text, 732 | value: any, 733 | Extensible, 734 | } 735 | 736 | 737 | The `SettingsGetSettingsResult` type contains a list of settings and their values. 738 | 739 | ### Commands ### {#module-settings-commands} 740 | 741 | #### The settings.setSettings Command #### {#command-settings-set-settings} 742 | 743 | The settings.setSettings command sets the values of one or more settings. 744 | 745 | Note: Today's implementations may not be able to detect failed modification operations. [=settings.setSettings=] is designed to reflect that reality. Clients should therefore interpret [=successes=] with some skepticism as such results do not necessarily indicate that the referenced setting has the desired value. 746 | 747 | Issue: Require implementations to report failures in settings modification operations. 748 | 749 |
750 |
[=Command Type=]
751 |
752 | 753 | SettingsSetSettingsCommand = { 754 | method: "settings.setSettings", 755 | params: SettingsSetSettingsParameters 756 | } 757 | 758 | SettingsSetSettingsParameters = { 759 | settings: [1* SettingsSetSettingsParametersItem ], 760 | } 761 | 762 | SettingsSetSettingsParametersItem = { 763 | name: text, 764 | value: any, 765 | Extensible, 766 | } 767 | 768 |
769 |
[=Result Type=]
770 |
771 | 772 | EmptyResult 773 | 774 |
775 |
776 | 777 |
778 | 779 | The [=remote end steps=] given session and |command parameters| are: 780 | 781 | 1. Let |settings| be the value of the settings field of |command parameters|. 782 | 2. [=list/For each=] |setting| of |settings|: 783 | 1. Let |name| be the value of the name field of |setting|. 784 | 2. Let |value| be the value of the value field of |setting|. 785 | 3. [=Try=] to [=modify setting=] with |name| and |value|. 786 | 3. Let |body| be a new [=map=]. 787 | 4. Return [=success=] with data |body|. 788 | 789 |
790 | 791 |
792 | A [=local end=] that wishes to modify the value of settings named 793 | `readEntirePage` and `speed` might send a Command such as: 794 | 795 | ```json 796 | { 797 | "id": 1776, 798 | "method": "settings.setSettings", 799 | "params": { 800 | "settings": [ 801 | { 802 | "name": "readEntirePage", 803 | "value": false 804 | }, 805 | { 806 | "name": "speed", 807 | "value": 0.8 808 | } 809 | ] 810 | } 811 | } 812 | ``` 813 | 814 | And the [=remote end=] which receives that Command could respond with the 815 | following Message: 816 | 817 | ```json 818 | { 819 | "id": 1776, 820 | "result": {} 821 | } 822 | ``` 823 |
824 | 825 | #### The settings.getSettings Command #### {#command-settings-get-settings} 826 | 827 | The settings.getSettings command returns a list of the requested settings and their values. 828 | 829 |
830 |
[=Command Type=]
831 |
832 | 833 | SettingsGetSettingsCommand = { 834 | method: "settings.getSettings", 835 | params: SettingsGetSettingsParameters 836 | } 837 | 838 | SettingsGetSettingsParameters = { 839 | settings: [1* SettingsGetSettingsParametersItem ], 840 | } 841 | 842 | SettingsGetSettingsParametersItem = { 843 | name: text, 844 | Extensible, 845 | } 846 | 847 |
848 |
[=Result Type=]
849 |
850 | 851 | SettingsGetSettingsResult 852 | 853 |
854 |
855 | 856 |
857 | 858 | The remote end steps given session and |command parameters| are: 859 | 860 | 1. Let |names| be the value of the settings field of |command parameters|. 861 | 2. Return the result of [=get settings=] with |names|. 862 | 863 |
864 | 865 |
866 | A [=local end=] that wishes to retrieve the value of settings named 867 | `virtualBuffers.passThroughAudioIndication` and `cursor` might send a Command 868 | such as: 869 | 870 | ```json 871 | { 872 | "id": 8675309, 873 | "method": "settings.getSettings", 874 | "params": { 875 | "settings": [ 876 | { 877 | "name": "virtualBuffers.passThroughAudioIndication" 878 | }, 879 | { 880 | "name": "cursor" 881 | } 882 | ] 883 | } 884 | } 885 | ``` 886 | 887 | And the [=remote end=] which receives that Command could respond with the 888 | following Message: 889 | 890 | ```json 891 | { 892 | "id": 8675309, 893 | "result": { 894 | "settings": [ 895 | { 896 | "name": "virtualBuffers.passThroughAudioIndication", 897 | "value": true 898 | }, 899 | { 900 | "name": "cursor", 901 | "value": "VPC" 902 | } 903 | ] 904 | } 905 | } 906 | ``` 907 |
908 | 909 | #### The settings.getSupportedSettings Command #### {#command-settings-get-supported-settings} 910 | 911 | The settings.getSupportedSettings command returns a list of all settings that the [=remote end=] supports, and their values. 912 | 913 |
914 |
[=Command Type=]
915 |
916 | 917 | SettingsGetSupportedSettingsCommand = { 918 | method: "settings.getSupportedSettings", 919 | params: EmptyParams 920 | } 921 | 922 |
923 |
[=Result Type=]
924 |
925 | 926 | SettingsGetSettingsResult 927 | 928 |
929 |
930 | 931 | 932 |
933 | 934 | The remote end steps given session and command parameters are: 935 | 936 | 1. If [=supported settings=] is null: 937 | 1. Let |names| be a new [=list=]. 938 | 2. Otherwise: 939 | 1. Let |names| be [=supported settings=]. 940 | 3. Let |result| be the result of [=get settings=] with |names|. 941 | 4. [=Assert=]: |result| is a [=success=] value. 942 | 5. Return |result|. 943 | 944 |
945 | 946 |
947 | A [=local end=] that wishes to discover the settings available on a given 948 | [=remote end=] might send a Command such as: 949 | 950 | ```json 951 | { 952 | "id": 781, 953 | "method": "settings.getSupportedSettings", 954 | "params": {} 955 | } 956 | ``` 957 | 958 | And the [=remote end=] which receives that Command could respond with the 959 | following Message: 960 | 961 | ```json 962 | { 963 | "id": 781, 964 | "result": { 965 | "settings": [ 966 | { 967 | "name": "virtualBuffers.passThroughAudioIndication", 968 | "value": true 969 | }, 970 | { 971 | "name": "cursor", 972 | "value": "VPC" 973 | } 974 | ] 975 | } 976 | } 977 | ``` 978 |
979 | 980 | ### Events ### {#module-settings-events} 981 | 982 | Issue: Do we need a "setting changed" event? 983 | 984 | The Interaction Module {#module-interaction} 985 | -------------------------------------------- 986 | 987 | The following table of standard user intents enumerates the user intents each implementation must support. 988 | 989 | 990 | 991 | 992 | 995 |
Standard user intents
Name 993 | Algorithm 994 |
"`pressKeys`" 996 | [=press keys=] 997 |
998 | 999 | A [=remote end=] has a table of extension user intents, which is a mapping of zero or more string names of user intents and algorithms for simulating the named intents. Extension user intents' names must contain a "`:`" (colon) character, denoting an implementation specific namespace. 1000 | 1001 |
1002 | 1003 | Note: Each string in `KeyCombination` represents a "raw key" consisting of a 1004 | single code point with the same meaning as in WebDriver's keyboard 1006 | actions. For example, `["\uE008", "a"]` means holding the left shift key 1007 | and pressing "a", and then releasing the left shift key. [[WEBDRIVER]] 1008 | 1009 | Issue(34): This algorithm does not yet have a means for indicating a screen-reader specific modifier key (or keys). 1010 | 1011 | Issue(51): This algorithm only supports one specific kind of press/release sequence, and it is not clear if that is sufficient to express all keyboard commands in all implementations. 1012 | 1013 | To press keys given |command parameters|: 1014 | 1015 | 1. [=Try=] to [=check that keyboard interaction can be simulated=]. 1016 | 2. [=Try=] to [=check that one of the expected applications has focus=]. 1017 | 3. Let |keys| be the value of the keys field of |command 1018 | parameters|. 1019 | 4. [=list/For each=] |key| of |keys|: 1020 | 1. Run [=implementation-defined=] steps to simulate depressing |key|. 1021 | 5. [=list/For each=] |key| of |keys| in reverse [=List=] order: 1022 | 1. Run [=implementation-defined=] steps to simulate releasing |key|. 1023 | 6. Let |body| be a new [=map=]. 1024 | 7. Return [=success=] with data |body|. 1025 | 1026 |
1027 | 1028 | ### Definition ### {#module-interaction-definition} 1029 | 1030 | {^Remote end definition^}: 1031 | 1032 | 1033 | InteractionCommand = (InteractionUserIntentCommand) 1034 | 1035 | 1036 | {^Local end definition^}: 1037 | 1038 | 1039 | 1040 | InteractionEvent = (InteractionCapturedOutputEvent) 1041 | 1042 | 1043 | 1044 | ### Types ### {#module-interaction-types} 1045 | 1046 | 1047 | InteractionCapturedOutputParameters = { 1048 | data: text, 1049 | Extensible, 1050 | } 1051 | 1052 | 1053 | ### Commands ### {#module-interaction-commands} 1054 | 1055 | #### The interaction.userIntent Command #### {#module-interaction-userintent} 1056 | 1057 | The interaction.userIntent command simulates pressing a key combination on a keyboard. 1058 | 1059 |
1060 |
[=Command Type=] 1061 |
1062 | 1063 |
1064 |   InteractionUserIntentCommand = {
1065 |     method: "interaction.userIntent",
1066 |     params: InteractionUserIntentParameters
1067 |   }
1068 | 
1069 |   InteractionUserIntentParameters = (
1070 |     PressKeysIntentParameters /
1071 |     ExtensionIntentParameters
1072 |   )
1073 | 
1074 |   PressKeysIntentParameters = {
1075 |     "name" => "pressKeys",
1076 |     "keys" => KeyCombination,
1077 |   }
1078 | 
1079 |   KeyCombination = [
1080 |     1* text
1081 |   ]
1082 | 
1083 |   ExtensionIntentParameters = {
1084 |     "name" => text,
1085 |     Extensible,
1086 |   }
1087 |   
1088 | 1089 |
Result Type 1090 |
1091 | 1092 | 1093 | EmptyResult 1094 | 1095 | 1096 |
1097 | 1098 |
1099 | 1100 | The [=remote end steps=] given session and |command parameters| are: 1101 | 1102 | 1. Let |name| be the value of the name field of |command 1103 | parameters|. 1104 | 2. If there is an entry in the [=table of standard user intents=] with name |name|: 1105 | 1. Let |algorithm| be the algorithm associated with user intent |name| in the [=table of standard user intents=]. 1106 | 3. Otherwise, if there is an entry in the [=table of extension user intents=] with name |name|: 1107 | 1. Let |algorithm| be the algorithm associated with user intent |name| in the [=table of extension user intents=]. 1108 | 4. Otherwise: 1109 | 1. Return an [=error=] with [=error code=] [=unknown user intent=]. 1110 | 5. Return the result of evaluating |algorithm| given |command parameters|. 1111 | 1112 |
1113 | 1114 |
1115 | A [=local end=] that wishes to simulate pressing 1116 | Control+h (or, stated more formally: 1117 | depressing the Control key, depressing the h key, 1118 | releasing the h key, and finally releasing the Control 1119 | key) might send a Command such as: 1120 | 1121 | ```json 1122 | { 1123 | "id": 19860814, 1124 | "method": "interaction.userIntent", 1125 | "params": { 1126 | "name": "pressKeys", 1127 | "keys": ["\uE009", "h"] 1128 | } 1129 | } 1130 | ``` 1131 | 1132 | And the [=remote end=] which receives that Command could respond with the 1133 | following Message: 1134 | 1135 | ```json 1136 | { 1137 | "id": 19860814, 1138 | "result": {} 1139 | } 1140 | ``` 1141 |
1142 | 1143 | ### Events ### {#module-interaction-events} 1144 | 1145 | #### The interaction.capturedOutput Event #### {#event-interaction-capturedOutput} 1146 | 1147 |
1148 |
Event Type 1149 |
1150 | 1151 | 1152 | InteractionCapturedOutputEvent = { 1153 | method: "interaction.capturedOutput", 1154 | params: InteractionCapturedOutputParameters 1155 | } 1156 | 1157 | 1158 |
1159 | 1160 |
1161 | 1162 | The [=remote end event trigger=] is: 1163 | 1164 | When the assistive technology would send some text |data| (a string, without speech-specific markup or annotations) to the Text-To-Speech system, or equivalent for non-speech assistive technology software, run these steps: 1165 | 1166 | 1. If |data| [=should be withheld=]: 1167 | 1. Return. 1168 | 2. Let |params| be a [=map=] matching the `InteractionCapturedOutputParameters` production with the `data` field set to |data|. 1169 | 3. Let |body| be a [=map=] matching the `InteractionCapturedOutputEvent` production with the `params` field set to |params|. 1170 | 4. [=list/For each=] |session| of [=active sessions=]: 1171 | 1. [=Emit an event=] with |session| and |body|. 1172 | 1173 |
1174 | 1175 | Privacy {#privacy} 1176 | ================== 1177 | 1178 | It is advisable that [=remote ends=] create a new profile when creating a new session. This prevents potentially sensitive session data from being accessible to new sessions, ensuring both privacy and preventing state from bleeding through to the next session. 1179 | 1180 | Security {#security} 1181 | ==================== 1182 | 1183 | An assistive technology can rely on a command-line flag or a configuration option to test whether to enable AT Driver, or alternatively make the assistive technology initiate or confirm the connection through a privileged content document or control widget, in case the assistive technology does not directly implement the WebSocket endpoints. 1184 | 1185 | It is strongly suggested that assistive technology require users to take explicit action to enable AT Driver, and that AT Driver remains disabled in publicly consumed versions of the assistive technology. 1186 | 1187 | To prevent arbitrary machines on the network from connecting and creating sessions, it is suggested that only connections from loopback devices are allowed by default. 1188 | 1189 | The remote end can include a configuration option to limit the accepted IP range allowed to connect and make requests. The default setting for this might be to limit connections to the IPv4 localhost CIDR range 127.0.0.0/8 and the IPv6 localhost address ::1. [[RFC4632]] 1190 | 1191 | It is also suggested that assistive technologies make an effort to indicate that a session that is under control of AT Driver. The indication should be accessible also for non-visual users. For example, this can be done through an OS-level notification or alert dialog. 1192 | 1193 | Issue: TODO sandbox (limit availability to information that apps usually can't access, e.g. login screen). 1194 | 1195 | Issue: TODO no HID level simulated keypresses. 1196 | 1197 | Issue: TODO exclude access to any security-sensitive settings. 1198 | 1199 | Issue: TODO exclude access to any security-sensitive commands. 1200 | 1201 | Appendix A: Schemas {#schemas} 1202 | ============================== 1203 | 1204 | The {^remote end definition^} and {^local end definition^} are available as non-normative CDDL and JSON Schema schemas: 1205 | 1206 | - [at-driver-remote.cddl](schemas/at-driver-remote.cddl) 1207 | - [at-driver-local.cddl](schemas/at-driver-local.cddl) 1208 | - [at-driver-remote.json](schemas/at-driver-remote.json) 1209 | - [at-driver-local.json](schemas/at-driver-local.json) 1210 | 1211 | Issue(23): The JSON Schema files are not yet generated from the CDDL and so might be out of date. 1212 | --------------------------------------------------------------------------------