├── .gitignore ├── .prettierignore ├── package.json └── spec.txt /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.md 3 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "common-markup-state-machine", 3 | "version": "0.0.0", 4 | "license": "MIT", 5 | "repository": "wooorm/common-markup-state-machine", 6 | "devDependencies": { 7 | "github-slugger": "^1.0.0", 8 | "mdast-util-to-string": "^1.0.0", 9 | "prettier": "^2.0.0", 10 | "remark-cli": "^8.0.0", 11 | "remark-parse": "^8.0.0", 12 | "remark-preset-wooorm": "^7.0.0", 13 | "remark-slug": "^6.0.0", 14 | "remark-stringify": "^8.0.0", 15 | "remark-toc": "^7.0.0", 16 | "to-vfile": "^6.0.0", 17 | "unified": "^9.0.0", 18 | "unist-util-visit-parents": "^3.0.0", 19 | "vfile-reporter": "^6.0.0", 20 | "xo": "^0.33.0" 21 | }, 22 | "scripts": { 23 | "build": "node build", 24 | "format": "remark . -qo && prettier . --write && xo --fix", 25 | "test": "npm run build && npm run format" 26 | }, 27 | "prettier": { 28 | "tabWidth": 2, 29 | "useTabs": false, 30 | "singleQuote": true, 31 | "bracketSpacing": false, 32 | "semi": false, 33 | "trailingComma": "none" 34 | }, 35 | "xo": { 36 | "prettier": true, 37 | "esnext": false 38 | }, 39 | "remarkConfig": { 40 | "settings": { 41 | "commonmark": true 42 | }, 43 | "plugins": [ 44 | "preset-wooorm", 45 | [ 46 | "lint-no-unneeded-full-reference-link", 47 | false 48 | ], 49 | [ 50 | "lint-no-html", 51 | false 52 | ], 53 | [ 54 | "lint-maximum-line-length", 55 | false 56 | ], 57 | [ 58 | "lint-maximum-heading-length", 59 | false 60 | ], 61 | [ 62 | "lint-no-undefined-references", 63 | false 64 | ], 65 | [ 66 | "validate-links", 67 | false 68 | ] 69 | ] 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /spec.txt: -------------------------------------------------------------------------------- 1 | # CMSM 2 | 3 | > 🪦 **Archived**: this document is not maintained. 4 | > This document was made jointly with `micromark`, 5 | > which was later also turned into `markdown-rs`. 6 | > At present, 7 | > I don’t have the bandwidth to maintain 2 reference parsers 8 | > *and* a spec. 9 | 10 | *** 11 | 12 | > Common markup state machine. 13 | 14 | Together, the parsing rules described below define what is referred to as a 15 | Common Markup parser. 16 | 17 | > This document is currently in progress. 18 | > It is developed jointly with a reference parser: 19 | > [`micromark`](https://github.com/micromark/micromark). 20 | > Contributions are welcome. 21 | > 22 | > Some parts that are still in progress: 23 | > 24 | > * Adapters 25 | > * Define the regular constructs 26 | > * Adapter for rich text to check whether emphasis, strong, resource, or 27 | > reference sequences make up syntax or text 28 | > * Tokenizing the input stream in reverse (GFM allows `asd@asd.com`, so it 29 | > seems we need to somehow allow to match the `@` and parse backwards) 30 | > * Add an appendix of extensions 31 | 32 | ## Table of contents 33 | 34 | ## Background 35 | 36 | The common markup parser parses a markup language that is commonly known as 37 | *Markdown*. 38 | 39 | The first definition of this format gave several examples of how it worked, 40 | showing input Markdown and output HTML, and came with a reference implementation 41 | (known as Markdown.pl). 42 | When new implementations followed, they mostly followed the first definition, 43 | but deviated from the first implementation, thus making the format a family of 44 | formats. 45 | 46 | Some years later, an attempt was made to standardize the differences between 47 | implementations, by specifying how several edge cases should be handled, through 48 | more input and output examples. 49 | This attempt is known as CommonMark, and many implementations now follow it. 50 | 51 | This document defines a more formal format, based on CommonMark, by documenting 52 | how to parse it, instead of documenting input and output examples. 53 | This format is: 54 | 55 | * **strict**, as it defines a state machine, which leaves significantly less 56 | room for interpretation 57 | * **agnostic** of HTML, as it does not show examples of output, which lets 58 | the format be used in new ways 59 | * **streaming**, because coupling with HTML is what requires a whole stream to 60 | be buffered as references can resolve to later definitions 61 | * **complete**, as it defines different types of tokens and how they are 62 | grouped, which allows the format to be represented as a concrete syntax tree 63 | * **extensible**, because the format is often used in combination with new 64 | and custom constructs 65 | 66 | The origin story of Markdown is similar to that of HTML, which at a time was 67 | also a family of formats. 68 | Through incredible efforts of the WHATWG, a Living Standard was created on how 69 | to parse the format, by defining a state machine. 70 | 71 | ## Overview 72 | 73 | The common markup parser receives input, typically coming over the network or 74 | from the local file system. 75 | This input is represented as characters in the input stream. 76 | Depending on a character, certain effects occur, such as that a new token is 77 | created, one state is switched to another, or something is labelled. 78 | Each line is made up of tokens, such as whitespace, markers, sequences, and 79 | content, and labels, that are both enqueued. 80 | At a certain point, it is known what to do with the queue, whether to discard it 81 | or to use it, in which case it is adapted. 82 | 83 | The parser parses in three stages: flow, content, and text, respectively coming 84 | with their own state machines ([flow state machine][], [content state machine][], 85 | [text state machine][]), and their own adapters. 86 | 87 | ## Infra 88 | 89 | > *This section defines the fundamental concepts upon which this document is 90 | > built.* 91 | 92 | A variable is declared in the shared state with `let`, cleared with `unset`, or 93 | changed with `set`, `increment`, `decrement`, `append` or `prepend`. 94 | 95 | ## Characters 96 | 97 | A character is a Unicode code point and is represented as a four to six digit 98 | hexadecimal number, prefixed with `U+` (**\[UNICODE]**). 99 | 100 | ### Character groups 101 | 102 | An [ASCII digit](#) is a character in the inclusive range c:0 to c:9. 103 | 104 | An [ASCII upper hex digit](#) a character in the inclusive range c:A to c:F. 105 | 106 | An [ASCII lower hex digit](#) a character in the inclusive range c:a to c:f. 107 | 108 | An [ASCII hex digit](#) is an [ASCII digit][], [ASCII upper hex digit][], or an 109 | [ASCII lower hex digit][] 110 | 111 | An [ASCII upper alpha](#) is a character in the inclusive range c:A to c:Z. 112 | 113 | An [ASCII lower alpha](#) is a character in the inclusive range c:a to c:z. 114 | 115 | An [ASCII alpha](#) is an [ASCII upper alpha][] or [ASCII lower alpha][]. 116 | 117 | An [ASCII alphanumeric](#) is an [ASCII digit][] or [ASCII alpha][]. 118 | 119 | An [ASCII punctuation](#) is a character in the inclusive ranges c:! to c:/, c:: 120 | to c:@, c:[ to c:`, or c:{ to c:~. 121 | 122 | An [ASCII control](#) is a character in the inclusive range c:NUL to c:US, or 123 | c:DEL. 124 | 125 | A [Unicode whitespace](#) is a character in the Unicode `Zs` (Separator, Space) 126 | category, or c:HT, c:LF, c:FF, or c:CR (**\[UNICODE]**). 127 | 128 | A [Unicode punctuation](#) is a character in the Unicode `Pc` (Punctuation, 129 | Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf` 130 | (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po` 131 | (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an [ASCII 132 | punctuation][] (**\[UNICODE]**). 133 | 134 | An [atext](#) is an [ASCII alphanumeric][], or a character in the inclusive 135 | ranges c:# to c:', c:*, c:+, c:-, c:/, c:=, c:?, c:^ to c:`, or c:{ to c:~ 136 | (**\[RFC5322]**). 137 | 138 | To [ASCII-lowercase](#) a character, is to increase it by `0x20`, if it an 139 | [ASCII upper alpha][]. 140 | 141 | To [digitize](#) a character, is to decrease it by `0x30`, `0x37`, or `0x57`, 142 | if it is an [ASCII digit][], [ASCII upper hex digit][], or 143 | [ASCII lower hex digit][], respectively. 144 | 145 | ### Conceptual characters 146 | 147 | A [c:VS](#) character is a conceptual character representing an expanded column 148 | size of a c:HT. 149 | 150 | An [c:EOL](#) character is a conceptual character representing a break between 151 | two lines. 152 | 153 | An [c:EOF](#) character is a conceptual character representing the end of the 154 | input. 155 | 156 | c:VS, c:EOL, and c:EOF are not real characters, but rather represent a character 157 | increase the size of a character, a break between characters, or the lack of any 158 | further characters. 159 | 160 | ### Tabs 161 | 162 | Tabs (c:HT) are typically not expanded into spaces, but do behave as if they 163 | were replaced by spaces with a tab stop of 4 characters. 164 | These character increments are represented by [c:VS] characters. 165 | 166 | For the following markup (where `␉` represent a tab): 167 | 168 | ```markdown 169 | >␉␉a 170 | ``` 171 | 172 | We have the characters: c:>, c:HT, c:VS, c:VS, c:HT, c:VS, c:VS, c:VS, and c:a. 173 | 174 | When transforming to an output format, tab characters that are not part of 175 | syntax should be present in the output format. 176 | When the tab itself (and zero or more c:VS characters) are part of syntax, but 177 | some c:VS characters are not, the remaining c:VS characters should be considered 178 | a prefix of the content. 179 | 180 | ## Input stream 181 | 182 | The [input stream](#) consists of the characters pushed into it. 183 | 184 | The [input character](#) is the first character in the [input stream][] that has 185 | not yet been consumed. 186 | Initially, the input character is the first character in the input. 187 | When the last character in a line is consumed, the input character is an 188 | [c:EOL][]. 189 | Finally, when all character are consumed, the input character is an [c:EOF][]. 190 | 191 | Any occurrences of c:HT in the [input stream][] is represented by that 192 | character and 0-3 [c:VS][] characters. 193 | 194 | ### Preprocessing the input stream 195 | 196 | The [input stream][] consists of the characters pushed into it as the input is 197 | decoded. 198 | 199 | The input, when decoded, is preprocessed and pushed into the input stream as 200 | described in the following algorithm: 201 | 202 | * Let `tabSize` be `4` 203 | * Let `line` be `1` 204 | * Let `column` be `1` 205 | * Let `offset` be `0` 206 | * *Check*: 207 | 208 | If `offset` is equal to the length of the document, push an c:EOF into the 209 | [input stream][] representing the lack of any further characters, and return 210 | 211 | Otherwise, if the current character is: 212 | 213 | * ↪ **c:NUL** 214 | 215 | Increment `offset` by `1`, increment `column` by `1`, push a c:� into 216 | the [input stream][], and go to the step labelled *check* 217 | * ↪ **c:HT** 218 | 219 | Set `count` to the result of calculating `(tabSize - 1) - (column % 220 | tabSize)`. 221 | Increment `offset` by `1`, increment `column` by `1`, and push the 222 | character into the [input stream][]. 223 | 224 | Perform the following steps `count` times: increment `column` by `1` and 225 | push a c:VS into the [input stream][] representing the size increase. 226 | 227 | Finally, go to the step labelled *check* 228 | * ↪ **c:LF** 229 | 230 | Increment `offset` by `1`, increment `line` by `1`, and set `column` to 231 | `1`, push an c:EOL into the [input stream][] representing the character, 232 | and go to the step labelled *check* 233 | * ↪ **c:CR** 234 | 235 | Increment `offset` by `1`, increment `line` by `1`, set `column` to `1`, 236 | and go to the step labelled *carriage return check* 237 | * ↪ **Anything else** 238 | 239 | Increment `offset` by `1`, increment `column` by `1`, push the character 240 | into the [input stream][], and go to the step labelled *check* 241 | * *Carriage return check*: if the current character is: 242 | 243 | * ↪ **c:LF** 244 | 245 | Increment `offset` by `1` and push an c:EOL into the [input stream][] 246 | representing the previous and current characters 247 | * ↪ **Anything else** 248 | 249 | Push an c:EOL into the [input stream][] representing the previous 250 | character and perform the step labelled *check* on the current character 251 | 252 | ## Parsing 253 | 254 | The states of state machines have certain effects, such as that they create 255 | items in the [queue][] (tokens and labels). 256 | The queue is used by tree adapters, in case a valid construct is found. 257 | After using the queue, or when in a bogus construct is found, the queue is 258 | discarded. 259 | 260 | The [shared space][] is accessed and mutated by both the tree adapter and the 261 | states of the state machine. 262 | 263 | [Construct][]s are registered by hooking a case (one or more characters or 264 | character groups) into certain states. 265 | Upon registration, they define the states used to parse a construct, and the 266 | adapter used to handle the construct. 267 | 268 | ### Tokenization 269 | 270 | Implementations must act as if they use several state machines to tokenize 271 | common markup. 272 | The [flow state machine][] is used to tokenize the line constructs that make up 273 | the structure of the document. 274 | The [content state machine][] is used to tokenize the inline constructs part of 275 | content blocks. 276 | The [text state machine][] is used to tokenize the inline constructs part of 277 | rich or plain text. 278 | 279 | Most states [consume][] the [input character][], and either remain in the state 280 | to consume the next character, [reconsume][] the input character in a different 281 | state, or [switch][] to a different state to consume the next character. 282 | States [enqueue][] tokens and labels. 283 | 284 | ### State 285 | 286 | The [shared space](#) is a map of key/value pairs. 287 | 288 | The [queue](#) is a list of tokens and labels that are enqueued. 289 | The [current token](#) is the last token in the [queue][]. 290 | 291 | ### Constructs 292 | 293 | Markup is parsed per [construct](#). 294 | Some constructs are considered regular (those from CommonMark, such as ATX 295 | headings) and other constructs are extensions (such as YAML frontmatter or MDX). 296 | 297 | > ❗️ Define constructs. 298 | 299 | ### Effects 300 | 301 | #### Switch 302 | 303 | To [switch](#) to a state is to wait for the next character in the given state. 304 | 305 | #### Consume 306 | 307 | To [consume](#) the [input character][] affects the [current token][]. 308 | Due to the nature of the state machine, it is not possible to consume if there 309 | is no current token. 310 | 311 | #### Reconsume 312 | 313 | To [reconsume](#) is to [switch][] to the given state, and [consume][] the 314 | [input character][] there. 315 | 316 | #### Enqueue 317 | 318 | To [enqueue](#) a label is to mark a point between two tokens with a semantic 319 | name, at which point there is no [current token][]. 320 | 321 | To enqueue a token is to add a new token of the given type to the [queue][], 322 | making it the new [current token][]. 323 | 324 | #### Ensure 325 | 326 | To ensure a token is to enqueue that token if the [current token][] is not of 327 | the given type, and otherwise do nothing. 328 | 329 | ## Flow state machine 330 | 331 | The [flow state machine](#) is used to tokenize the line constructs that make up 332 | the structure of the document (such as headings or thematic breaks) and must 333 | start in the s:flow-prefix-start. 334 | 335 | ### Flow prefix start state 336 | 337 | * ↪ **[c:VS][]**\ 338 | ↪ **c:HT**\ 339 | ↪ **c:SP** 340 | 341 | Ensure a t:whitespace and consume 342 | * ↪ **Anything else** 343 | 344 | Reconsume in the s:flow-start 345 | 346 | ### Flow start state 347 | 348 | > **Hookable**, there are no regular hooks 349 | 350 | * ↪ **Anything else** 351 | 352 | Reconsume in the s:flow-initial 353 | 354 | ### Flow prefix initial state 355 | 356 | * ↪ **[c:VS][]**\ 357 | ↪ **c:HT**\ 358 | ↪ **c:SP** 359 | 360 | Ensure a t:whitespace and consume 361 | * ↪ **Anything else** 362 | 363 | Reconsume in the s:flow-initial 364 | 365 | ### Flow initial state 366 | 367 | > ❗️ Todo: Indented code v.s. content 368 | 369 | > **Hookable**, the regular hooks are: 370 | > 371 | > * [x] **[c:EOL][]**: s:blank-line 372 | > * [x] **c:#**: s:atx-heading-start 373 | > * [x] **c:***: s:thematic-break-asterisk-start 374 | > * [ ] **c:***: 375 | > * [ ] **c:+**: 376 | > * [x] **c:-**: s:setext-heading-underline-dash-start 377 | > * [x] **c:-**: s:thematic-break-dash-start 378 | > * [ ] **c:-**: 379 | > * [x] **c:<**: s:flow-html-start 380 | > * [x] **c:=**: s:setext-heading-underline-equals-to-start 381 | > * [ ] **c:>**: 382 | > * [x] **c:_**: s:thematic-break-underscore-start 383 | > * [x] **c:`**: s:fenced-code-grave-accent-start 384 | > * [x] **c:~**: s:fenced-code-tilde-start 385 | > * [ ] **[ASCII digit][]**: 386 | 387 | > ❗️ Todo, continuation: 388 | > * [x] s:flow-html-continuation 389 | > * [x] s:fenced-code-grave-accent-continuation 390 | > * [x] s:fenced-code-tilde-continuation 391 | 392 | * ↪ **[c:EOF][]** 393 | 394 | Enqueue an t:end-of-file 395 | * ↪ **Anything else** 396 | 397 | Reconsume in the s:flow-content 398 | 399 | ### Blank line state 400 | 401 | * ↪ **[c:EOL][]** 402 | 403 | Enqueue a l:blank-line-end, enqueue an t:end-of-line, and consume 404 | * ↪ **Anything else** 405 | 406 | Enqueue a l:nok 407 | 408 | ### ATX heading start state 409 | 410 | * ↪ **c:#** 411 | 412 | Let `sizeFence` be `1`, enqueue an l:atx-heading-start, enqueue an 413 | l:atx-heading-fence-start, enqueue a t:sequence, consume, and switch to the 414 | s:atx-heading-fence-open-inside 415 | * ↪ **Anything else** 416 | 417 | Enqueue a l:nok 418 | 419 | ### ATX heading fence open inside state 420 | 421 | * ↪ **[c:EOF][]**\ 422 | ↪ **[c:EOL][]**\ 423 | ↪ **[c:VS][]**\ 424 | ↪ **c:HT**\ 425 | ↪ **c:SP** 426 | 427 | Unset `sizeFence`, enqueue an l:atx-heading-fence-end and reconsume in the 428 | s:atx-heading-inside 429 | * ↪ **c:#** 430 | 431 | If `sizeFence` is not `6`, increment `sizeScheme` by `1` and consume 432 | 433 | Otherwise, treat it as per the “anything else” entry below 434 | * ↪ **Anything else** 435 | 436 | Unset `sizeFence` and enqueue a l:nok 437 | 438 | ### ATX heading inside state 439 | 440 | * ↪ **[c:EOF][]** 441 | 442 | Enqueue an l:atx-heading-end and enqueue an t:end-of-file 443 | * ↪ **[c:EOL][]** 444 | 445 | Enqueue an l:atx-heading-end, enqueue an t:end-of-line, consume, and switch 446 | to the s:flow-prefix-initial 447 | * ↪ **[c:VS][]**\ 448 | ↪ **c:HT**\ 449 | ↪ **c:SP** 450 | 451 | Ensure a t:whitespace and consume 452 | * ↪ **c:#** 453 | 454 | Ensure a t:sequence and consume 455 | * ↪ **Anything else** 456 | 457 | Ensure a t:content and consume 458 | 459 | ### Thematic break asterisk start state 460 | 461 | * ↪ **c:*** 462 | 463 | Let `sizeTotalSequence` be `1`, enqueue a l:thematic-break-start, enqueue a 464 | t:sequence, consume, and switch to the s:thematic-break-asterisk-inside 465 | * ↪ **Anything else** 466 | 467 | Enqueue a l:nok 468 | 469 | ### Thematic break asterisk inside state 470 | 471 | * ↪ **[c:EOF][]** 472 | 473 | If `sizeTotalSequence` is greater than or equal to `3`, unset 474 | `sizeTotalSequence`, enqueue a l:thematic-break-end, and enqueue an 475 | t:end-of-file 476 | 477 | Otherwise, treat it as per the “anything else” entry below 478 | * ↪ **[c:EOL][]** 479 | 480 | If `sizeTotalSequence` is greater than or equal to `3`, unset 481 | `sizeTotalSequence`, enqueue a l:thematic-break-end, enqueue an 482 | t:end-of-line, consume, and switch to the s:flow-prefix-initial 483 | 484 | Otherwise, treat it as per the “anything else” entry below 485 | * ↪ **[c:VS][]**\ 486 | ↪ **c:HT**\ 487 | ↪ **c:SP** 488 | 489 | Ensure a t:whitespace and consume 490 | * ↪ **c:*** 491 | 492 | Increment `sizeTotalSequence` by `1`, ensure a t:sequence, and consume 493 | * ↪ **Anything else** 494 | 495 | Unset `sizeTotalSequence` and enqueue a l:nok 496 | 497 | ### Setext heading underline dash start state 498 | 499 | > ❗️ Todo: exit if not preceded by content 500 | 501 | * ↪ **c:-** 502 | 503 | Enqueue a l:setext-heading-underline-start, enqueue a t:sequence, consume, 504 | and switch to the s:setext-heading-underline-dash-inside 505 | * ↪ **Anything else** 506 | 507 | Enqueue a l:nok 508 | 509 | ### Setext heading underline dash inside state 510 | 511 | * ↪ **[c:EOF][]**\ 512 | ↪ **[c:EOL][]**\ 513 | ↪ **[c:VS][]**\ 514 | ↪ **c:HT**\ 515 | ↪ **c:SP** 516 | 517 | Reconsume in the s:setext-heading-underline-dash-after 518 | * ↪ **c:-** 519 | 520 | Consume 521 | * ↪ **Anything else** 522 | 523 | Enqueue a l:nok 524 | 525 | ### Setext heading underline dash after state 526 | 527 | > ❗️ Todo: Close content if ok, create a new content if nok 528 | 529 | * ↪ **[c:EOF][]** 530 | 531 | Enqueue a l:setext-heading-underline-end and enqueue an t:end-of-file 532 | * ↪ **[c:EOL][]** 533 | 534 | Enqueue a l:setext-heading-underline-end, enqueue an t:end-of-line, consume, 535 | and switch to the s:flow-prefix-initial 536 | * ↪ **[c:VS][]**\ 537 | ↪ **c:HT**\ 538 | ↪ **c:SP** 539 | 540 | Ensure a t:whitespace and consume 541 | * ↪ **Anything else** 542 | 543 | Enqueue a l:nok 544 | 545 | ### Thematic break dash start state 546 | 547 | * ↪ **c:-** 548 | 549 | Let `sizeTotalSequence` be `1`, enqueue a l:thematic-break-start, enqueue a 550 | t:sequence, consume, and switch to the s:thematic-break-dash-inside 551 | * ↪ **Anything else** 552 | 553 | Enqueue a l:nok 554 | 555 | ### Thematic break dash inside state 556 | 557 | * ↪ **[c:EOF][]** 558 | 559 | If `sizeTotalSequence` is greater than or equal to `3`, unset 560 | `sizeTotalSequence`, enqueue a l:thematic-break-end, and enqueue an 561 | t:end-of-file 562 | 563 | Otherwise, treat it as per the “anything else” entry below 564 | * ↪ **[c:EOL][]** 565 | 566 | If `sizeTotalSequence` is greater than or equal to `3`, unset 567 | `sizeTotalSequence`, enqueue a l:thematic-break-end, enqueue an 568 | t:end-of-line, consume, and switch to the s:flow-prefix-initial 569 | 570 | Otherwise, treat it as per the “anything else” entry below 571 | * ↪ **[c:VS][]**\ 572 | ↪ **c:HT**\ 573 | ↪ **c:SP** 574 | 575 | Ensure a t:whitespace and consume 576 | * ↪ **c:-** 577 | 578 | Increment `sizeTotalSequence` by `1`, ensure a t:sequence, and consume 579 | * ↪ **Anything else** 580 | 581 | Unset `sizeTotalSequence` and enqueue a l:nok 582 | 583 | ### Flow HTML start state 584 | 585 | * ↪ **c:<** 586 | 587 | Let `kind` be `0`, let `endTag` be `null`, let `tagName` be the empty 588 | string, enqueue a t:content, consume, and switch to the s:flow-html-tag-open 589 | * ↪ **Anything else** 590 | 591 | Enqueue a l:nok 592 | 593 | ### Flow HTML tag open state 594 | 595 | * ↪ **c:!** 596 | 597 | Consume and switch to the s:flow-html-markup-declaration-open 598 | * ↪ **c:/** 599 | 600 | Set `endTag` to `true`, consume, and switch to the s:flow-html-end-tag-open 601 | * ↪ **c:?** 602 | 603 | Set `kind` to `3`, unset `endTag`, consume, and switch to the 604 | s:flow-html-continuation-declaration-before 605 | * ↪ **[ASCII alpha][]** 606 | 607 | Append the [ASCII-lowercase][]d character to `tagName`, consume, and switch 608 | to the s:flow-html-tag-name 609 | * ↪ **Anything else** 610 | 611 | Unset `kind`, `endTag`, and `tagName`, and enqueue a l:nok 612 | 613 | ### Flow HTML markup declaration open state 614 | 615 | * ↪ **`--` (two c:- characters)** 616 | 617 | Set `kind` to `2`, unset `endTag`, consume, and switch to the 618 | s:flow-html-continuation-declaration-before 619 | * ↪ **`[CDATA[` (the five upper letters “CDATA” with a c:[ before and 620 | after)** 621 | 622 | Set `kind` to `5`, unset `endTag`, consume, and switch to the 623 | s:flow-html-continuation 624 | * ↪ **[ASCII alpha][]** 625 | 626 | Set `kind` to `4`, unset `endTag`, consume, and switch to the 627 | s:flow-html-continuation 628 | * ↪ **Anything else** 629 | 630 | Unset `kind`, `endTag`, and `tagName`, and enqueue a l:nok 631 | 632 | ### Flow HTML end tag open state 633 | 634 | * ↪ **[ASCII alpha][]** 635 | 636 | Append the [ASCII-lowercase][]d character to `tagName`, consume, and switch 637 | to the s:flow-html-tag-name 638 | * ↪ **Anything else** 639 | 640 | Unset `kind`, `endTag`, and `tagName`, and enqueue a l:nok 641 | 642 | ### Flow HTML tag name state 643 | 644 | * ↪ **[c:EOF][]**\ 645 | ↪ **[c:EOL][]** 646 | 647 | If `tagName` is a [raw tag][] and `endTag` is not `true`, set `kind` to `1`, 648 | unset `tagName`, unset `endTag`, and reconsume in the 649 | s:flow-html-continuation 650 | 651 | Otherwise, if `tagName` is a [basic tag][], set `kind` to `6`, unset 652 | `tagName`, unset `endTag`, and reconsume in the s:flow-html-continuation 653 | 654 | Otherwise, treat it as per the “anything else” entry below 655 | * ↪ **[c:VS][]**\ 656 | ↪ **c:HT**\ 657 | ↪ **c:SP** 658 | 659 | If `tagName` is a [raw tag][] and `endTag` is not `true`, set `kind` to `1`, 660 | unset `tagName`, unset `endTag`, and switch to the s:flow-html-continuation 661 | 662 | > ❗️ Todo: ignore this check if interrupting content. 663 | 664 | Otherwise, if `tagName` is not a [raw tag][], unset `tagName`, consume, and 665 | switch to the s:flow-html-complete-attribute-name-before 666 | 667 | Otherwise, treat it as per the “anything else” entry below 668 | * ↪ **c:-** 669 | 670 | Append the character to `tagName` and consume 671 | * ↪ **c:/** 672 | 673 | If `tagName` is a [basic tag][], unset `tagName`, consume, and switch to the 674 | s:flow-html-basic-self-closing 675 | 676 | > ❗️ Todo: ignore this check if interrupting content. 677 | 678 | Otherwise, if `tagName` is not a [raw tag][] and `endTag` is not `true`, 679 | unset `tagName`, consume, and switch to the 680 | s:flow-html-complete-self-closing 681 | 682 | Otherwise, treat it as per the “anything else” entry below 683 | * ↪ **c:>** 684 | 685 | If `tagName` is a [raw tag][] and `endTag` is not `true`, set `kind` to `1`, 686 | unset `tagName`, unset `endTag`, consume, and switch to the 687 | s:flow-html-continuation 688 | 689 | Otherwise, if `tagName` is a [basic tag][], set `kind` to `6`, unset 690 | `tagName`, unset `endTag`, and reconsume in the s:flow-html-continuation 691 | 692 | > ❗️ Todo: ignore this check if interrupting content. 693 | 694 | Otherwise, if `tagName` is not a [raw tag][], unset `tagName`, consume, and 695 | switch to the s:flow-html-complete-tag-after 696 | 697 | Otherwise, treat it as per the “anything else” entry below 698 | * ↪ **[ASCII alphanumeric][]** 699 | 700 | Append the [ASCII-lowercase][]d character to `tagName` and consume 701 | * ↪ **Anything else** 702 | 703 | Unset `kind`, `endTag`, and `tagName`, and enqueue a l:nok 704 | 705 | ### Flow HTML basic self closing state 706 | 707 | * ↪ **c:>** 708 | 709 | Set `kind` to `6`, unset `endTag`, consume, and switch to the 710 | s:flow-html-continuation 711 | * ↪ **Anything else** 712 | 713 | Unset `kind` and `endTag` and enqueue a l:nok 714 | 715 | ### Flow HTML complete attribute name before state 716 | 717 | * ↪ **[c:VS][]**\ 718 | ↪ **c:HT**\ 719 | ↪ **c:SP** 720 | 721 | Consume 722 | * ↪ **c:/** 723 | 724 | If `endTag` is not `true`, consume and switch to the 725 | s:flow-html-complete-self-closing 726 | 727 | Otherwise, treat it as per the “anything else” entry below 728 | * ↪ **c::**\ 729 | ↪ **c:_**\ 730 | ↪ **[ASCII alpha][]** 731 | 732 | If `endTag` is not `true`, consume and switch to the 733 | s:flow-html-complete-attribute-name 734 | 735 | Otherwise, treat it as per the “anything else” entry below 736 | * ↪ **c:>** 737 | 738 | Consume and switch to the s:flow-html-complete-tag-after 739 | * ↪ **Anything else** 740 | 741 | Unset `kind` and `endTag` and enqueue a l:nok 742 | 743 | ### Flow HTML complete attribute name state 744 | 745 | * ↪ **c:-**\ 746 | ↪ **c:.**\ 747 | ↪ **c::**\ 748 | ↪ **c:_**\ 749 | ↪ **[ASCII alphanumeric][]** 750 | 751 | Consume 752 | * ↪ **Anything else** 753 | 754 | Reconsume in the s:flow-html-complete-attribute-name-after 755 | 756 | ### Flow HTML complete attribute name after state 757 | 758 | * ↪ **[c:VS][]**\ 759 | ↪ **c:HT**\ 760 | ↪ **c:SP** 761 | 762 | Consume 763 | * ↪ **c:/** 764 | 765 | If `endTag` is not `true`, consume and switch to the 766 | s:flow-html-complete-self-closing 767 | 768 | Otherwise, treat it as per the “anything else” entry below 769 | * ↪ **c:=** 770 | 771 | Consume and switch to the s:flow-html-complete-attribute-value-before 772 | * ↪ **c:>** 773 | 774 | Consume and switch to the s:flow-html-complete-tag-after 775 | * ↪ **Anything else** 776 | 777 | Unset `kind` and `endTag` and enqueue a l:nok 778 | 779 | ### Flow HTML complete attribute value before state 780 | 781 | * ↪ **[c:EOF][]**\ 782 | ↪ **[c:EOL][]**\ 783 | ↪ **c:<**\ 784 | ↪ **c:=**\ 785 | ↪ **c:>**\ 786 | ↪ **c:`** 787 | 788 | Unset `kind` and `endTag` and enqueue a l:nok 789 | * ↪ **[c:VS][]**\ 790 | ↪ **c:HT**\ 791 | ↪ **c:SP** 792 | 793 | Consume 794 | * ↪ **c:"** 795 | 796 | Consume and switch to the s:flow-html-complete-attribute-value-double-quoted 797 | * ↪ **c:'** 798 | 799 | Consume and switch to the s:flow-html-complete-attribute-value-single-quoted 800 | * ↪ **Anything else** 801 | 802 | Consume and switch to the s:flow-html-complete-attribute-value-unquoted 803 | 804 | ### Flow HTML complete attribute value double quoted state 805 | 806 | * ↪ **[c:EOF][]**\ 807 | ↪ **[c:EOL][]** 808 | 809 | Unset `kind` and `endTag` and enqueue a l:nok 810 | * ↪ **c:"** 811 | 812 | Consume and switch to the s:flow-html-complete-attribute-name-before 813 | * ↪ **Anything else** 814 | 815 | Consume 816 | 817 | ### Flow HTML complete attribute value single quoted state 818 | 819 | * ↪ **[c:EOF][]**\ 820 | ↪ **[c:EOL][]** 821 | 822 | Unset `kind` and `endTag` and enqueue a l:nok 823 | * ↪ **c:'** 824 | 825 | Consume and switch to the s:flow-html-complete-attribute-name-before 826 | * ↪ **Anything else** 827 | 828 | Consume 829 | 830 | ### Flow HTML complete attribute value unquoted state 831 | 832 | * ↪ **[c:EOF][]**\ 833 | ↪ **[c:EOL][]**\ 834 | ↪ **[c:VS][]**\ 835 | ↪ **c:HT**\ 836 | ↪ **c:SP**\ 837 | ↪ **c:"**\ 838 | ↪ **c:'**\ 839 | ↪ **c:<**\ 840 | ↪ **c:=**\ 841 | ↪ **c:>**\ 842 | ↪ **c:`** 843 | 844 | Reconsume in the s:flow-html-complete-attribute-name-after 845 | * ↪ **Anything else** 846 | 847 | Consume 848 | 849 | ### Flow HTML complete self closing state 850 | 851 | * ↪ **c:>** 852 | 853 | Consume and switch to the s:flow-html-complete-tag-after 854 | * ↪ **Anything else** 855 | 856 | Unset `kind` and `endTag` and enqueue a l:nok 857 | 858 | ### Flow HTML complete tag after state 859 | 860 | * ↪ **[c:EOF][]**\ 861 | ↪ **[c:EOL][]**\ 862 | ↪ **[c:VS][]**\ 863 | ↪ **c:HT**\ 864 | ↪ **c:SP** 865 | 866 | Set `kind` to `7`, unset `endTag`, and reconsume in the 867 | s:flow-html-continuation 868 | * ↪ **Anything else** 869 | 870 | Unset `kind` and `endTag` and enqueue a l:nok 871 | 872 | ### Flow HTML continuation state 873 | 874 | * ↪ **[c:EOF][]** 875 | 876 | Unset `kind`, enqueue an l:html-end, and enqueue an t:end-of-file 877 | * ↪ **[c:EOL][]** 878 | 879 | Enqueue an t:end-of-line, consume, and switch to the s:flow-prefix-initial 880 | * ↪ **c:-** 881 | 882 | If `kind` is `7`, consume and switch to the 883 | s:flow-html-continuation-comment-inside 884 | 885 | Otherwise, treat it as per the “anything else” entry below 886 | * ↪ **c:<** 887 | 888 | If `kind` is `1`, consume and switch to the 889 | s:flow-html-continuation-raw-tag-open 890 | 891 | Otherwise, treat it as per the “anything else” entry below 892 | * ↪ **c:>** 893 | 894 | If `kind` is `4`, consume and switch to the s:flow-html-continuation-close 895 | 896 | Otherwise, treat it as per the “anything else” entry below 897 | * ↪ **c:?** 898 | 899 | If `kind` is `3`, consume and switch to the 900 | s:flow-html-continuation-declaration-before 901 | 902 | Otherwise, treat it as per the “anything else” entry below 903 | * ↪ **c:]** 904 | 905 | If `kind` is `5`, consume and switch to the 906 | s:flow-html-continuation-character-data-inside. 907 | 908 | Otherwise, treat it as per the “anything else” entry below 909 | * ↪ **Anything else** 910 | 911 | Consume 912 | 913 | ### Flow HTML continuation comment inside state 914 | 915 | * ↪ **c:-** 916 | 917 | Consume and switch to the s:flow-html-continuation-declaration-before 918 | * ↪ **Anything else** 919 | 920 | Reconsume in the s:flow-html-continuation 921 | 922 | ### Flow HTML continuation raw tag open state 923 | 924 | * ↪ **c:/** 925 | 926 | Let `tagName` be the empty string, consume, and switch to the 927 | s:flow-html-continuation-raw-end-tag 928 | * ↪ **Anything else** 929 | 930 | Reconsume in the s:flow-html-continuation 931 | 932 | ### Flow HTML continuation raw end tag state 933 | 934 | > **Note**: This state can be optimized by either imposing a maximum size (the 935 | > size of the longest possible raw tag name) or by using a trie of the possible 936 | > raw tag names. 937 | 938 | * ↪ **[ASCII alphanumeric][]** 939 | 940 | Append the [ASCII-lowercase][]d character to `tagName` and consume 941 | * ↪ **c:>** 942 | 943 | If `tagName` is a [raw tag][], unset `tagName, consume, and switch to the 944 | s:flow-html-continuation-close 945 | 946 | Otherwise, treat it as per the “anything else” entry below 947 | * ↪ **Anything else** 948 | 949 | Unset `tagName and reconsume in the s:flow-html-continuation 950 | 951 | ### Flow HTML continuation character data inside state 952 | 953 | * ↪ **c:]** 954 | 955 | Consume and switch to the s:flow-html-continuation-declaration-before 956 | * ↪ **Anything else** 957 | 958 | Reconsume in the s:flow-html-continuation 959 | 960 | ### Flow HTML continuation declaration before state 961 | 962 | * ↪ **c:>** 963 | 964 | Consume and switch to the s:flow-html-continuation-close 965 | * ↪ **Anything else** 966 | 967 | Reconsume in the s:flow-html-continuation 968 | 969 | ### Flow HTML continuation close state 970 | 971 | * ↪ **[c:EOF][]** 972 | 973 | Unset `kind`, enqueue an l:html-end, and enqueue an t:end-of-file 974 | * ↪ **[c:EOL][]** 975 | 976 | Unset `kind`, enqueue an l:html-end, and enqueue an t:end-of-line, consume, 977 | and switch to the s:flow-prefix-initial 978 | * ↪ **Anything else** 979 | 980 | Consume 981 | 982 | ### Setext heading underline equals to start state 983 | 984 | > ❗️ Todo: exit if not preceded by content 985 | 986 | * ↪ **c:=** 987 | 988 | Enqueue a l:setext-heading-underline-start, enqueue a t:sequence, consume, 989 | and switch to the s:setext-heading-underline-equals-to-inside 990 | * ↪ **Anything else** 991 | 992 | Enqueue a l:nok 993 | 994 | ### Setext heading underline equals to inside state 995 | 996 | * ↪ **[c:EOF][]**\ 997 | ↪ **[c:EOL][]**\ 998 | ↪ **[c:VS][]**\ 999 | ↪ **c:HT**\ 1000 | ↪ **c:SP** 1001 | 1002 | Reconsume in the s:setext-heading-underline-equals-to-after 1003 | * ↪ **c:=** 1004 | 1005 | Consume 1006 | * ↪ **Anything else** 1007 | 1008 | Enqueue a l:nok 1009 | 1010 | ### Setext heading underline equals to after state 1011 | 1012 | > ❗️ Todo: Close content if ok, create a new content if nok 1013 | 1014 | * ↪ **[c:EOF][]** 1015 | 1016 | Enqueue a l:setext-heading-underline-end and enqueue an t:end-of-file 1017 | * ↪ **[c:EOL][]** 1018 | 1019 | Enqueue a l:setext-heading-underline-end, enqueue an t:end-of-line, consume, 1020 | and switch to the s:flow-prefix-initial 1021 | * ↪ **[c:VS][]**\ 1022 | ↪ **c:HT**\ 1023 | ↪ **c:SP** 1024 | 1025 | Ensure a t:whitespace and consume 1026 | * ↪ **Anything else** 1027 | 1028 | Enqueue a l:nok 1029 | 1030 | ### Thematic break underscore start state 1031 | 1032 | * ↪ **c:_** 1033 | 1034 | Let `sizeTotalSequence` be `1`, enqueue a l:thematic-break-start, enqueue a 1035 | t:sequence, consume, and switch to the s:thematic-break-underscore-inside 1036 | * ↪ **Anything else** 1037 | 1038 | Enqueue a l:nok 1039 | 1040 | ### Thematic break underscore inside state 1041 | 1042 | * ↪ **[c:EOF][]** 1043 | 1044 | If `sizeTotalSequence` is greater than or equal to `3`, unset 1045 | `sizeTotalSequence`, enqueue a l:thematic-break-end, and enqueue an 1046 | t:end-of-file 1047 | 1048 | Otherwise, treat it as per the “anything else” entry below 1049 | * ↪ **[c:EOL][]** 1050 | 1051 | If `sizeTotalSequence` is greater than or equal to `3`, unset 1052 | `sizeTotalSequence`, enqueue a l:thematic-break-end, enqueue an 1053 | t:end-of-line, consume, and switch to the s:flow-prefix-initial 1054 | 1055 | Otherwise, treat it as per the “anything else” entry below 1056 | * ↪ **[c:VS][]**\ 1057 | ↪ **c:HT**\ 1058 | ↪ **c:SP** 1059 | 1060 | Ensure a t:whitespace and consume 1061 | * ↪ **c:_** 1062 | 1063 | Increment `sizeTotalSequence` by `1`, ensure a t:sequence, and consume 1064 | * ↪ **Anything else** 1065 | 1066 | Unset `sizeTotalSequence` and enqueue a l:nok 1067 | 1068 | ### Fenced code grave accent start state 1069 | 1070 | * ↪ **c:`** 1071 | 1072 | Let `sizeOpen` be `1`, enqueue a l:fenced-code-start, enqueue a 1073 | l:fenced-code-fence-start, enqueue a l:fenced-code-fence-sequence-start, 1074 | enqueue a t:sequence, consume, and switch to the 1075 | s:fenced-code-grave-accent-open-fence-inside 1076 | * ↪ **Anything else** 1077 | 1078 | Enqueue a l:nok 1079 | 1080 | ### Fenced code grave accent open fence inside state 1081 | 1082 | * ↪ **[c:EOF][]**\ 1083 | ↪ **[c:EOL][]**\ 1084 | ↪ **[c:VS][]**\ 1085 | ↪ **c:HT**\ 1086 | ↪ **c:SP** 1087 | 1088 | If `sizeOpen` is greater than or equal to `3`, enqueue a 1089 | l:fenced-code-fence-sequence-end, and reconsume in the 1090 | s:fenced-code-grave-accent-open-fence-after 1091 | 1092 | Otherwise, treat it as per the “anything else” entry below 1093 | * ↪ **c:`** 1094 | 1095 | Increment `sizeOpen` by `1` and consume 1096 | * ↪ **Anything else** 1097 | 1098 | Unset `sizeOpen` and enqueue a l:nok 1099 | 1100 | ### Fenced code grave accent open fence after state 1101 | 1102 | * ↪ **[c:EOF][]** 1103 | 1104 | Enqueue a l:fenced-code-fence-end and enqueue an t:end-of-file 1105 | * ↪ **[c:EOL][]** 1106 | 1107 | Enqueue a l:fenced-code-fence-end, enqueue an t:end-of-line, consume, and 1108 | switch to the s:flow-prefix-initial 1109 | * ↪ **[c:VS][]**\ 1110 | ↪ **c:HT**\ 1111 | ↪ **c:SP** 1112 | 1113 | Ensure a t:whitespace and consume 1114 | * ↪ **c:`** 1115 | 1116 | Unset `sizeOpen` and enqueue a l:nok 1117 | * ↪ **Anything else** 1118 | 1119 | Ensure a t:content and consume 1120 | 1121 | ### Fenced code grave accent continuation state 1122 | 1123 | * ↪ **c:`** 1124 | 1125 | Let `sizeClose` be `1`, enqueue a l:fenced-code-fence-sequence-start, 1126 | enqueue a t:sequence, consume, and switch to the 1127 | s:fenced-code-grave-accent-close-fence-inside 1128 | * ↪ **Anything else** 1129 | 1130 | Reconsume in the s:fenced-code-grave-accent-continuation-inside 1131 | 1132 | ### Fenced code grave accent close fence inside state 1133 | 1134 | * ↪ **[c:EOF][]**\ 1135 | ↪ **[c:EOL][]**\ 1136 | ↪ **[c:VS][]**\ 1137 | ↪ **c:HT**\ 1138 | ↪ **c:SP** 1139 | 1140 | If `sizeClose` is greater than or equal to `sizeOpen`, enqueue a 1141 | l:fenced-code-fence-sequence-end and reconsume in the 1142 | s:fenced-code-grave-accent-close-fence-after 1143 | 1144 | Otherwise, treat it as per the “anything else” entry below 1145 | * ↪ **c:`** 1146 | 1147 | Increment `sizeClose` by `1` and consume 1148 | * ↪ **Anything else** 1149 | 1150 | Unset `sizeClose` and reconsume in the 1151 | s:fenced-code-grave-accent-continuation-inside 1152 | 1153 | ### Fenced code grave accent close fence after state 1154 | 1155 | * ↪ **[c:EOF][]** 1156 | 1157 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1158 | enqueue a l:fenced-code-end, and enqueue an t:end-of-file 1159 | * ↪ **[c:EOL][]** 1160 | 1161 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1162 | enqueue a l:fenced-code-end, enqueue an t:end-of-line, consume, and switch 1163 | to the s:flow-prefix-initial 1164 | * ↪ **[c:VS][]**\ 1165 | ↪ **c:HT**\ 1166 | ↪ **c:SP** 1167 | 1168 | Ensure a t:whitespace and consume 1169 | * ↪ **Anything else** 1170 | 1171 | Reconsume in the s:fenced-code-grave-accent-continuation-inside 1172 | 1173 | ### Fenced code grave accent continuation inside state 1174 | 1175 | * ↪ **[c:EOF][]** 1176 | 1177 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1178 | enqueue a l:fenced-code-end, and enqueue an t:end-of-file 1179 | * ↪ **[c:EOL][]** 1180 | 1181 | Enqueue an t:end-of-line, consume, and switch to the s:flow-prefix-initial 1182 | * ↪ **Anything else** 1183 | 1184 | Ensure a t:content and consume 1185 | 1186 | ### Fenced code tilde start state 1187 | 1188 | * ↪ **c:~** 1189 | 1190 | Let `sizeOpen` be `1`, enqueue a l:fenced-code-start, enqueue a 1191 | l:fenced-code-fence-start, enqueue a l:fenced-code-fence-sequence-start, 1192 | enqueue a t:sequence, consume, and switch to the 1193 | s:fenced-code-tilde-open-fence-inside 1194 | * ↪ **Anything else** 1195 | 1196 | Enqueue a l:nok 1197 | 1198 | ### Fenced code tilde open fence inside state 1199 | 1200 | * ↪ **[c:EOF][]**\ 1201 | ↪ **[c:EOL][]**\ 1202 | ↪ **[c:VS][]**\ 1203 | ↪ **c:HT**\ 1204 | ↪ **c:SP** 1205 | 1206 | If `sizeOpen` is greater than or equal to `3`, enqueue a 1207 | l:fenced-code-fence-sequence-end, and reconsume in the 1208 | s:fenced-code-tilde-open-fence-after 1209 | 1210 | Otherwise, treat it as per the “anything else” entry below 1211 | * ↪ **c:~** 1212 | 1213 | Increment `sizeOpen` by `1` and consume 1214 | * ↪ **Anything else** 1215 | 1216 | Unset `sizeOpen` and enqueue a l:nok 1217 | 1218 | ### Fenced code tilde open fence after state 1219 | 1220 | * ↪ **[c:EOF][]** 1221 | 1222 | Enqueue a l:fenced-code-fence-end and enqueue an t:end-of-file 1223 | * ↪ **[c:EOL][]** 1224 | 1225 | Enqueue a l:fenced-code-fence-end, enqueue an t:end-of-line, consume, and 1226 | switch to the s:flow-prefix-initial 1227 | * ↪ **[c:VS][]**\ 1228 | ↪ **c:HT**\ 1229 | ↪ **c:SP** 1230 | 1231 | Ensure a t:whitespace and consume 1232 | * ↪ **Anything else** 1233 | 1234 | Ensure a t:content and consume 1235 | 1236 | ### Fenced code tilde continuation state 1237 | 1238 | * ↪ **c:~** 1239 | 1240 | Let `sizeClose` be `1`, enqueue a l:fenced-code-fence-sequence-start, 1241 | enqueue a t:sequence, consume, and switch to the 1242 | s:fenced-code-tilde-close-fence-inside 1243 | * ↪ **Anything else** 1244 | 1245 | Reconsume in the s:fenced-code-tilde-continuation-inside 1246 | 1247 | ### Fenced code tilde close fence inside state 1248 | 1249 | * ↪ **[c:EOF][]**\ 1250 | ↪ **[c:EOL][]**\ 1251 | ↪ **[c:VS][]**\ 1252 | ↪ **c:HT**\ 1253 | ↪ **c:SP** 1254 | 1255 | If `sizeClose` is greater than or equal to `sizeOpen`, enqueue a 1256 | l:fenced-code-fence-sequence-end, and reconsume in the 1257 | s:fenced-code-tilde-close-fence-after 1258 | 1259 | Otherwise, treat it as per the “anything else” entry below 1260 | * ↪ **c:~** 1261 | 1262 | Increment `sizeClose` by `1` and consume 1263 | * ↪ **Anything else** 1264 | 1265 | Unset `sizeClose` and reconsume in the 1266 | s:fenced-code-tilde-continuation-inside 1267 | 1268 | ### Fenced code tilde close fence after state 1269 | 1270 | * ↪ **[c:EOF][]** 1271 | 1272 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1273 | enqueue a l:fenced-code-end, and enqueue an t:end-of-file 1274 | * ↪ **[c:EOL][]** 1275 | 1276 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1277 | enqueue a l:fenced-code-end, enqueue an t:end-of-line, consume, and switch 1278 | to the s:flow-prefix-initial 1279 | * ↪ **[c:VS][]**\ 1280 | ↪ **c:HT**\ 1281 | ↪ **c:SP** 1282 | 1283 | Ensure a t:whitespace and consume 1284 | * ↪ **Anything else** 1285 | 1286 | Reconsume in the s:fenced-code-tilde-continuation-inside 1287 | 1288 | ### Fenced code tilde continuation inside state 1289 | 1290 | * ↪ **[c:EOF][]** 1291 | 1292 | Unset `sizeOpen`, unset `sizeClose`, enqueue a l:fenced-code-fence-end, 1293 | enqueue a l:fenced-code-end, and enqueue an t:end-of-file 1294 | * ↪ **[c:EOL][]** 1295 | 1296 | Enqueue an t:end-of-line, consume, and switch to the s:flow-prefix-initial 1297 | * ↪ **Anything else** 1298 | 1299 | Ensure a t:content and consume 1300 | 1301 | ### Flow content state 1302 | 1303 | * ↪ **[c:EOF][]** 1304 | 1305 | Enqueue an t:end-of-file 1306 | * ↪ **[c:EOL][]** 1307 | 1308 | Enqueue an t:end-of-line, consume, and switch to the s:flow-prefix-initial 1309 | * ↪ **Anything else** 1310 | 1311 | Consume 1312 | 1313 | ## Content state machine 1314 | 1315 | The [content state machine](#) is used to tokenize the inline constructs part of 1316 | content blocks in a document (such as regular definitions and phrasing) and must 1317 | start in the s:content-start. 1318 | 1319 | ### Content start state 1320 | 1321 | > **Hookable**, the regular hooks are: 1322 | > 1323 | > * **c:[**: s:definition-label-start 1324 | 1325 | * ↪ **Anything else** 1326 | 1327 | Reconsume in the s:content-initial 1328 | 1329 | ### Content initial state 1330 | 1331 | > **Hookable**, there are no regular hooks. 1332 | 1333 | * ↪ **Anything else** 1334 | 1335 | Reconsume in the s:phrasing-content 1336 | 1337 | ### Definition label start state 1338 | 1339 | * ↪ **c:[** 1340 | 1341 | Enqueue a l:content-definition-start, enqueue a 1342 | l:content-definition-label-start, enqueue a t:marker, consume, enqueue a 1343 | l:content-definition-label-open, and switch to the s:definition-label-before 1344 | * ↪ **Anything else** 1345 | 1346 | Enqueue a l:nok 1347 | 1348 | ### Definition label before state 1349 | 1350 | * ↪ **[c:EOF][]** 1351 | 1352 | Enqueue a l:nok 1353 | * ↪ **[c:EOL][]** 1354 | 1355 | Enqueue an t:end-of-line and consume 1356 | * ↪ **[c:VS][]**\ 1357 | ↪ **c:HT**\ 1358 | ↪ **c:SP** 1359 | 1360 | Ensure a t:whitespace and consume 1361 | * ↪ **c:]** 1362 | 1363 | Enqueue a l:nok 1364 | * ↪ **Anything else** 1365 | 1366 | Enqueue a t:content, consume, and switch to the s:definition-label-inside 1367 | 1368 | ### Definition label inside state 1369 | 1370 | * ↪ **[c:EOF][]**\ 1371 | ↪ **[c:EOL][]**\ 1372 | ↪ **[c:VS][]**\ 1373 | ↪ **c:HT**\ 1374 | ↪ **c:SP** 1375 | 1376 | Reconsume in the s:definition-label-between 1377 | * ↪ **U+005C BACKSLASH (`\`)** 1378 | 1379 | Ensure a t:content, consume, and switch to the s:definition-label-escape 1380 | * ↪ **c:]** 1381 | 1382 | Enqueue a l:content-definition-label-close, enqueue a t:marker, consume, 1383 | enqueue a l:content-definition-label-end, and switch to the 1384 | s:definition-label-after 1385 | * ↪ **Anything else** 1386 | 1387 | Ensure a t:content and consume 1388 | 1389 | ### Definition label between state 1390 | 1391 | * ↪ **[c:EOF][]** 1392 | 1393 | Enqueue a l:nok 1394 | * ↪ **[c:EOL][]** 1395 | 1396 | Enqueue an t:end-of-line and consume 1397 | * ↪ **[c:VS][]**\ 1398 | ↪ **c:HT**\ 1399 | ↪ **c:SP** 1400 | 1401 | Ensure a t:whitespace and consume 1402 | * ↪ **Anything else** 1403 | 1404 | Reconsume in the s:definition-label-inside 1405 | 1406 | ### Definition label escape state 1407 | 1408 | * ↪ **U+005C BACKSLASH (`\`)**\ 1409 | ↪ **c:]** 1410 | 1411 | Consume and switch to the s:definition-label-inside 1412 | * ↪ **Anything else** 1413 | 1414 | Reconsume in the s:definition-label-inside 1415 | 1416 | ### Definition label after state 1417 | 1418 | * ↪ **c::** 1419 | 1420 | Enqueue a t:marker, consume, and switch to the 1421 | s:definition-destination-before 1422 | * ↪ **Anything else** 1423 | 1424 | Enqueue a l:nok 1425 | 1426 | ### Definition destination before state 1427 | 1428 | * ↪ **[c:EOF][]** 1429 | 1430 | Enqueue a l:nok 1431 | * ↪ **[c:EOL][]** 1432 | 1433 | Enqueue an t:end-of-line and consume 1434 | * ↪ **[c:VS][]**\ 1435 | ↪ **c:HT**\ 1436 | ↪ **c:SP** 1437 | 1438 | Ensure a t:whitespace and consume 1439 | * ↪ **c:<** 1440 | 1441 | Enqueue a l:content-definition-destination-start, enqueue a t:marker, 1442 | enqueue a l:content-definition-destination-quoted-open, consume, and switch 1443 | to the s:definition-destination-quoted-inside 1444 | * ↪ **[ASCII control][]** 1445 | 1446 | Enqueue a l:nok 1447 | * ↪ **Anything else** 1448 | 1449 | Let `balance` be `0`, enqueue a l:content-definition-destination-start, 1450 | enqueue a l:content-definition-destination-unquoted-open, enqueue a 1451 | t:content, and reconsume in the s:definition-destination-unquoted-inside 1452 | 1453 | ### Definition destination quoted inside state 1454 | 1455 | * ↪ **[c:EOF][]**\ 1456 | ↪ **[c:EOL][]**\ 1457 | ↪ **c:<** 1458 | 1459 | Enqueue a l:nok 1460 | * ↪ **c:>** 1461 | 1462 | Enqueue a l:content-definition-destination-quoted-close, enqueue a t:marker, 1463 | consume, enqueue a l:content-definition-destination-end, and switch to the 1464 | s:definition-destination-after 1465 | * ↪ **U+005C BACKSLASH (`\`)** 1466 | 1467 | Ensure a t:content, consume, and switch to the 1468 | s:definition-destination-quoted-escape 1469 | * ↪ **Anything else** 1470 | 1471 | Ensure a t:content and consume 1472 | 1473 | ### Definition destination quoted escape state 1474 | 1475 | * ↪ **c:<**\ 1476 | ↪ **c:>**\ 1477 | ↪ **U+005C BACKSLASH (`\`)** 1478 | 1479 | Consume and switch to the s:definition-destination-quoted-inside 1480 | * ↪ **Anything else** 1481 | 1482 | Reconsume in the s:definition-destination-quoted-inside 1483 | 1484 | ### Definition destination unquoted inside state 1485 | 1486 | * ↪ **[c:EOF][]**\ 1487 | ↪ **[c:EOL][]**\ 1488 | ↪ **[c:VS][]**\ 1489 | ↪ **c:HT**\ 1490 | ↪ **c:SP** 1491 | 1492 | Unset `balance`, enqueue a l:content-definition-destination-unquoted-close, 1493 | enqueue a l:content-definition-destination-end, and reconsume in the 1494 | s:definition-title-before 1495 | * ↪ **c:(** 1496 | 1497 | Increment `balance` by `1`, ensure a t:content, and consume 1498 | * ↪ **c:)** 1499 | 1500 | If `balance` is `0`, treat it as per the “ASCII control” entry below 1501 | 1502 | Otherwise, decrement `balance` by `1`, ensure a t:content, and consume 1503 | * ↪ **U+005C BACKSLASH (`\`)** 1504 | 1505 | Ensure a t:content, consume, and switch to the 1506 | s:definition-destination-unquoted-escape 1507 | * ↪ **[ASCII control][]** 1508 | 1509 | Unset `balance` and enqueue a l:nok 1510 | * ↪ **Anything else** 1511 | 1512 | Ensure a t:content and consume 1513 | 1514 | ### Definition destination unquoted escape state 1515 | 1516 | * ↪ **c:(**\ 1517 | ↪ **c:)**\ 1518 | ↪ **U+005C BACKSLASH (`\`)** 1519 | 1520 | Consume and switch to the s:definition-destination-unquoted-inside 1521 | * ↪ **Anything else** 1522 | 1523 | Reconsume in the s:definition-destination-unquoted-inside 1524 | 1525 | ### Definition destination after state 1526 | 1527 | * ↪ **[c:EOF][]**\ 1528 | ↪ **[c:EOL][]**\ 1529 | ↪ **[c:VS][]**\ 1530 | ↪ **c:HT**\ 1531 | ↪ **c:SP** 1532 | 1533 | Reconsume in the s:definition-title-before 1534 | * ↪ **Anything else** 1535 | 1536 | Enqueue a l:nok 1537 | 1538 | ### Definition title before state 1539 | 1540 | * ↪ **[c:EOL][]** 1541 | 1542 | Enqueue a l:content-definition-partial, enqueue an t:end-of-line, consume, 1543 | and switch to the s:definition-title-or-label-before 1544 | * ↪ **[c:VS][]**\ 1545 | ↪ **c:HT**\ 1546 | ↪ **c:SP** 1547 | 1548 | Ensure a t:whitespace and consume 1549 | * ↪ **c:[** 1550 | 1551 | Enqueue a l:nok 1552 | * ↪ **Anything else** 1553 | 1554 | Reconsume in the s:definition-title-or-label-before 1555 | 1556 | ### Definition title or label before state 1557 | 1558 | * ↪ **[c:EOF][]** 1559 | 1560 | Enqueue a l:content-definition-end and enqueue an t:end-of-file 1561 | * ↪ **[c:EOL][]** 1562 | 1563 | Enqueue an t:end-of-line and consume 1564 | * ↪ **[c:VS][]**\ 1565 | ↪ **c:HT**\ 1566 | ↪ **c:SP** 1567 | 1568 | Ensure a t:whitespace and consume 1569 | * ↪ **c:"** 1570 | 1571 | Enqueue a l:content-definition-title-start, enqueue a t:marker, consume, 1572 | enqueue a l:content-definition-title-open, and switch to the 1573 | s:definition-title-double-quoted 1574 | * ↪ **c:'** 1575 | 1576 | Enqueue a l:content-definition-title-start, enqueue a t:marker, consume, 1577 | enqueue a l:content-definition-title-open, and switch to the 1578 | s:definition-title-single-quoted 1579 | * ↪ **c:(** 1580 | 1581 | Enqueue a l:content-definition-title-start, enqueue a t:marker, consume, 1582 | enqueue a l:content-definition-title-open, and switch to the 1583 | s:definition-title-paren-quoted 1584 | * ↪ **Anything else** 1585 | 1586 | Reconsume in the s:content-start 1587 | 1588 | ### Definition title double quoted state 1589 | 1590 | * ↪ **[c:EOF][]**\ 1591 | ↪ **[c:EOL][]**\ 1592 | ↪ **[c:VS][]**\ 1593 | ↪ **c:HT**\ 1594 | ↪ **c:SP** 1595 | 1596 | Reconsume in the s:definition-title-double-quoted-between 1597 | * ↪ **c:"** 1598 | 1599 | Enqueue a l:content-definition-title-close, enqueue a t:marker, consume, 1600 | enqueue a l:content-definition-title-end, and switch to the 1601 | s:definition-title-after 1602 | * ↪ **U+005C BACKSLASH (`\`)** 1603 | 1604 | Ensure a t:content, consume, and switch to the 1605 | s:definition-title-double-quoted-escape 1606 | * ↪ **Anything else** 1607 | 1608 | Ensure a t:content and consume 1609 | 1610 | ### Definition title double quoted between state 1611 | 1612 | * ↪ **[c:EOF][]** 1613 | 1614 | Enqueue a l:nok 1615 | * ↪ **[c:EOL][]** 1616 | 1617 | Enqueue an t:end-of-line and consume 1618 | * ↪ **[c:VS][]**\ 1619 | ↪ **c:HT**\ 1620 | ↪ **c:SP** 1621 | 1622 | Ensure a t:whitespace and consume 1623 | * ↪ **Anything else** 1624 | 1625 | Reconsume in the s:definition-title-double-quoted 1626 | 1627 | ### Definition title double quoted escape state 1628 | 1629 | * ↪ **c:"**\ 1630 | ↪ **U+005C BACKSLASH (`\`)** 1631 | 1632 | Consume and switch to the s:definition-title-double-quoted 1633 | * ↪ **Anything else** 1634 | 1635 | Reconsume in the s:definition-title-double-quoted 1636 | 1637 | ### Definition title single quoted state 1638 | 1639 | * ↪ **[c:EOF][]**\ 1640 | ↪ **[c:EOL][]**\ 1641 | ↪ **[c:VS][]**\ 1642 | ↪ **c:HT**\ 1643 | ↪ **c:SP** 1644 | 1645 | Reconsume in the s:definition-title-single-quoted-between 1646 | * ↪ **c:'** 1647 | 1648 | Enqueue a l:content-definition-title-close, enqueue a t:marker, consume, 1649 | enqueue a l:content-definition-title-end, and switch to the 1650 | s:definition-title-after 1651 | * ↪ **U+005C BACKSLASH (`\`)** 1652 | 1653 | Ensure a t:content, consume, and switch to the 1654 | s:definition-title-single-quoted-escape 1655 | * ↪ **Anything else** 1656 | 1657 | Ensure a t:content and consume 1658 | 1659 | ### Definition title single quoted between state 1660 | 1661 | * ↪ **[c:EOF][]** 1662 | 1663 | Enqueue a l:nok 1664 | * ↪ **[c:EOL][]** 1665 | 1666 | Enqueue an t:end-of-line and consume 1667 | * ↪ **[c:VS][]**\ 1668 | ↪ **c:HT**\ 1669 | ↪ **c:SP** 1670 | 1671 | Ensure a t:whitespace and consume 1672 | * ↪ **Anything else** 1673 | 1674 | Reconsume in the s:definition-title-single-quoted 1675 | 1676 | ### Definition title single quoted escape state 1677 | 1678 | * ↪ **c:'**\ 1679 | ↪ **U+005C BACKSLASH (`\`)** 1680 | 1681 | Consume and switch to the s:definition-title-single-quoted 1682 | * ↪ **Anything else** 1683 | 1684 | Reconsume in the s:definition-title-single-quoted 1685 | 1686 | ### Definition title paren quoted state 1687 | 1688 | * ↪ **[c:EOF][]**\ 1689 | ↪ **[c:EOL][]**\ 1690 | ↪ **[c:VS][]**\ 1691 | ↪ **c:HT**\ 1692 | ↪ **c:SP** 1693 | 1694 | Reconsume in the s:definition-title-paren-quoted-between 1695 | * ↪ **c:)** 1696 | 1697 | Enqueue a l:content-definition-title-close, enqueue a t:marker, consume, 1698 | enqueue a l:content-definition-title-end, and switch to the 1699 | s:definition-title-after 1700 | * ↪ **U+005C BACKSLASH (`\`)** 1701 | 1702 | Ensure a t:content, consume, and switch to the 1703 | s:definition-title-paren-quoted-escape 1704 | * ↪ **Anything else** 1705 | 1706 | Ensure a t:content and consume 1707 | 1708 | ### Definition title paren quoted between state 1709 | 1710 | * ↪ **[c:EOF][]** 1711 | 1712 | Enqueue a l:nok 1713 | * ↪ **[c:EOL][]** 1714 | 1715 | Enqueue an t:end-of-line and consume 1716 | * ↪ **[c:VS][]**\ 1717 | ↪ **c:HT**\ 1718 | ↪ **c:SP** 1719 | 1720 | Ensure a t:whitespace and consume 1721 | * ↪ **Anything else** 1722 | 1723 | Reconsume in the s:definition-title-paren-quoted 1724 | 1725 | ### Definition title paren quoted escape state 1726 | 1727 | * ↪ **c:)**\ 1728 | ↪ **U+005C BACKSLASH (`\`)** 1729 | 1730 | Consume and switch to the s:definition-title-paren-quoted 1731 | * ↪ **Anything else** 1732 | 1733 | Reconsume in the s:definition-title-paren-quoted 1734 | 1735 | ### Definition title after state 1736 | 1737 | * ↪ **[c:EOF][]** 1738 | 1739 | Enqueue a l:content-definition-end and enqueue an t:end-of-file 1740 | * ↪ **[c:EOL][]** 1741 | 1742 | Enqueue a l:content-definition-end, enqueue an t:end-of-line, consume, and 1743 | switch to the s:content-start 1744 | * ↪ **[c:VS][]**\ 1745 | ↪ **c:HT**\ 1746 | ↪ **c:SP** 1747 | 1748 | Ensure a t:whitespace and consume 1749 | * ↪ **Anything else** 1750 | 1751 | Enqueue a l:nok 1752 | 1753 | ### Phrasing content state 1754 | 1755 | * ↪ **[c:EOF][]** 1756 | 1757 | Enqueue an t:end-of-file 1758 | * ↪ **[c:EOL][]** 1759 | 1760 | Enqueue an t:end-of-line, consume, and switch to the s:content-initial 1761 | * ↪ **Anything else** 1762 | 1763 | Consume 1764 | 1765 | ## Text state machine 1766 | 1767 | The [text state machine](#) is used to tokenize the inline constructs part of 1768 | rich text (such as regular resources and emphasis) or plain text (such as 1769 | regular character escapes or character references) in a document and must start 1770 | in the s:text-start. 1771 | 1772 | If text is parsed as plain text, the s:text-start, s:text-initial, and s:text 1773 | all forward to the s:plain-text. 1774 | 1775 | If text is parsed as rich text, an additional variable `prev` must be tracked. 1776 | Initial set to [c:EOF][], it must be set to the [input character][] right before 1777 | a character is consumed. 1778 | 1779 | ### Text start state 1780 | 1781 | > **Hookable**, there are no regular hooks. 1782 | 1783 | * ↪ **Anything else** 1784 | 1785 | Reconsume in the s:text-initial 1786 | 1787 | ### Text initial state 1788 | 1789 | > **Hookable**, there are no regular hooks. 1790 | 1791 | * ↪ **Anything else** 1792 | 1793 | Reconsume in the s:text 1794 | 1795 | ### Text state 1796 | 1797 | > **Hookable**, the regular hooks are: 1798 | > 1799 | > * **[c:EOL][]**: s:end-of-line 1800 | > * **c:!**: s:image-label-start 1801 | > * **c:&**: s:character-reference 1802 | > * **c:***: s:delimiter-run-asterisk-start 1803 | > * **c:<**: s:autolink 1804 | > * **c:<**: s:html 1805 | > * **c:[**: s:link-label-start 1806 | > * **U+005C BACKSLASH (`\`)**: s:character-escape 1807 | > * **U+005C BACKSLASH (`\`)**: s:break-escape 1808 | > * **c:]**: s:label-resource-close 1809 | > * **c:]**: s:label-reference-close 1810 | > * **c:]**: s:label-reference-shortcut-close 1811 | > * **c:_**: s:delimiter-run-underscore-start 1812 | > * **c:`**: s:code-start 1813 | 1814 | * ↪ **[c:EOF][]** 1815 | 1816 | Enqueue an t:end-of-file 1817 | * ↪ **Anything else** 1818 | 1819 | Ensure a t:content and consume 1820 | 1821 | ### Plain text state 1822 | 1823 | > **Hookable**, the regular hooks are: 1824 | > 1825 | > * **[c:EOL][]**: s:plain-end-of-line 1826 | > * **c:&**: s:character-reference 1827 | > * **U+005C BACKSLASH (`\`)**: s:character-escape 1828 | 1829 | * ↪ **[c:EOF][]** 1830 | 1831 | Enqueue an t:end-of-file 1832 | * ↪ **Anything else** 1833 | 1834 | Ensure a t:content and consume 1835 | 1836 | ### End-of-line state 1837 | 1838 | * ↪ **[c:EOL][]** 1839 | 1840 | If the break represented by the character starts with two or more of 1841 | [c:VS][], c:HT, or c:SP, enqueue a l:hard-break, enqueue an t:end-of-line, 1842 | consume, and switch to the s:text-initial 1843 | 1844 | Otherwise, enqueue a l:soft-break, enqueue an t:end-of-line, consume, 1845 | and switch to the s:text-initial 1846 | * ↪ **Anything else** 1847 | 1848 | Enqueue a l:nok 1849 | 1850 | ### Plain end-of-line state 1851 | 1852 | * ↪ **[c:EOL][]** 1853 | 1854 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and switch to the 1855 | s:text-initial 1856 | * ↪ **Anything else** 1857 | 1858 | Enqueue a l:nok 1859 | 1860 | ### Image label start state 1861 | 1862 | * ↪ **c:!** 1863 | 1864 | Enqueue an l:image-label-start, enqueue a t:marker, consume, and switch to 1865 | the s:image-label-start-after 1866 | * ↪ **Anything else** 1867 | 1868 | Enqueue a l:nok 1869 | 1870 | ### Image label start after state 1871 | 1872 | * ↪ **c:[** 1873 | 1874 | Enqueue a t:marker, consume, enqueue an l:image-label-open, and switch to 1875 | the s:text 1876 | * ↪ **Anything else** 1877 | 1878 | Enqueue a l:nok 1879 | 1880 | ### Character reference state 1881 | 1882 | * ↪ **c:&** 1883 | 1884 | Enqueue a l:character-reference-start, enqueue a t:marker, consume, and 1885 | switch to the s:character-reference-start-after 1886 | * ↪ **Anything else** 1887 | 1888 | Enqueue a l:nok 1889 | 1890 | ### Character reference start after state 1891 | 1892 | * ↪ **c:#** 1893 | 1894 | Enqueue a t:marker, consume, and switch to the s:character-reference-numeric 1895 | * ↪ **[ASCII alphanumeric][]** 1896 | 1897 | Let `entityName` be the empty string, append the character to `entityName`, 1898 | enqueue a t:content, consume, and switch to the s:character-reference-named 1899 | * ↪ **Anything else** 1900 | 1901 | Enqueue a l:nok 1902 | 1903 | ### Character reference named state 1904 | 1905 | > **Note**: This state can be optimized by either imposing a maximum size (the 1906 | > size of the longest possible named character reference) or by using a trie of 1907 | > the possible named character references. 1908 | 1909 | * ↪ **c:;** 1910 | 1911 | If `entityName` is a [character reference name][], unset `entityName`, 1912 | enqueue a t:marker, consume, enqueue a l:character-reference-end, and switch 1913 | to the s:text 1914 | 1915 | Otherwise, treat it as per the “anything else” entry below 1916 | * ↪ **[ASCII alphanumeric][]** 1917 | 1918 | Append the character to `entityName` and consume 1919 | * ↪ **Anything else** 1920 | 1921 | Unset `entityName` and enqueue a l:nok 1922 | 1923 | ### Character reference numeric state 1924 | 1925 | * ↪ **c:X**\ 1926 | ↪ **c:x** 1927 | 1928 | Let `characterReferenceCode` be `0`, enqueue a t:marker, consume, and switch 1929 | to the s:character-reference-hexadecimal-start 1930 | * ↪ **[ASCII digit][]** 1931 | 1932 | Let `characterReferenceCode` be `0`, enqueue a t:content and reconsume in 1933 | the s:character-reference-decimal 1934 | * ↪ **Anything else** 1935 | 1936 | Enqueue a l:nok 1937 | 1938 | ### Character reference hexadecimal start state 1939 | 1940 | * ↪ **[ASCII hex digit][]** 1941 | 1942 | Enqueue a t:content and reconsume in the s:character-reference-hexadecimal 1943 | * ↪ **Anything else** 1944 | 1945 | Unset `characterReferenceCode` and enqueue a l:nok 1946 | 1947 | ### Character reference hexadecimal state 1948 | 1949 | > **Note**: This state can be optimized by imposing a maximum size (the size of 1950 | > the longest possible valid hexadecimal character reference, 6). 1951 | 1952 | * ↪ **c:;** 1953 | 1954 | Unset `characterReferenceCode`, enqueue a t:marker, consume, enqueue a 1955 | l:character-reference-end, and switch to the s:text 1956 | * ↪ **[ASCII digit][]** 1957 | 1958 | Multiply `characterReferenceCode` by `0x10`, add the [digitize][]d 1959 | [input character][] to `characterReferenceCode`, and consume 1960 | * ↪ **[ASCII upper hex digit][]** 1961 | 1962 | Multiply `characterReferenceCode` by `0x10`, add the [digitize][]d 1963 | [input character][] to `characterReferenceCode`, and consume 1964 | * ↪ **[ASCII lower hex digit][]** 1965 | 1966 | Multiply `characterReferenceCode` by `0x10`, add the [digitize][]d 1967 | [input character][] to `characterReferenceCode`, and consume 1968 | * ↪ **Anything else** 1969 | 1970 | Unset `characterReferenceCode` and enqueue a l:nok 1971 | 1972 | ### Character reference decimal state 1973 | 1974 | > **Note**: This state can be optimized by imposing a maximum size (the size of 1975 | > the longest possible valid decimal character reference, 7). 1976 | 1977 | * ↪ **c:;** 1978 | 1979 | Unset `characterReferenceCode`, enqueue a t:marker, consume, enqueue a 1980 | l:character-reference-end, and switch to the s:text 1981 | * ↪ **[ASCII digit][]** 1982 | 1983 | Multiply `characterReferenceCode` by `10`, add the [digitize][]d 1984 | [input character][] to `characterReferenceCode`, and consume 1985 | * ↪ **Anything else** 1986 | 1987 | Unset `characterReferenceCode` and enqueue a l:nok 1988 | 1989 | ### Delimiter run asterisk start state 1990 | 1991 | * ↪ **c:*** 1992 | 1993 | Let `delimiterRunAfter` be `null` and let `delimiterRunBefore` be 1994 | `'whitespace'` if `prev` is [c:EOF][], [c:EOL][], or [Unicode whitespace][], 1995 | `'punctuation'` if it is [Unicode punctuation][], or `null` otherwise 1996 | 1997 | Enqueue a l:delimiter-run-start, enqueue a t:sequence, consume, and switch 1998 | to the s:delimiter-run-asterisk 1999 | * ↪ **Anything else** 2000 | 2001 | Enqueue a l:nok 2002 | 2003 | ### Delimiter run asterisk state 2004 | 2005 | * ↪ **[c:EOF][]**\ 2006 | ↪ **[c:EOL][]**\ 2007 | ↪ **[Unicode whitespace][]** 2008 | 2009 | Let `delimiterRunAfter` be `'whitespace'` and treat it as per the “anything 2010 | else” entry below 2011 | * ↪ **c:*** 2012 | 2013 | Consume 2014 | * ↪ **[Unicode punctuation][]** 2015 | 2016 | Let `delimiterRunAfter` be `'punctuation'` and treat it as per the “anything 2017 | else” entry below 2018 | * ↪ **Anything else** 2019 | 2020 | Let `leftFlanking` be whether both `delimiterRunAfter` is not 2021 | `'whitespace'`, and that either `delimiterRunAfter` is not `'punctuation'` 2022 | or that `delimiterRunBefore` is not `null` 2023 | 2024 | Let `rightFlanking` be whether both `delimiterRunBefore` is not 2025 | `'whitespace'`, and that either `delimiterRunBefore` is not `'punctuation'` 2026 | or that `delimiterRunAfter` is not `null` 2027 | 2028 | Unset `delimiterRunBefore`, unset `delimiterRunAfter`, enqueue a 2029 | l:delimiter-run-end, and reconsume in the s:text 2030 | 2031 | ### Autolink state 2032 | 2033 | * ↪ **c:<** 2034 | 2035 | Enqueue an l:autolink-start, enqueue a t:marker, consume, enqueue an 2036 | l:autolink-open, and switch to the s:autolink-open 2037 | * ↪ **Anything else** 2038 | 2039 | Enqueue a l:nok 2040 | 2041 | ### Autolink open state 2042 | 2043 | * ↪ **[ASCII alpha][]** 2044 | 2045 | Consume, let `sizeScheme` be `1`, and switch to the 2046 | s:autolink-scheme-or-email-atext 2047 | * ↪ **[atext][]**\ 2048 | ↪ **c:.** 2049 | 2050 | Consume and switch to the s:autolink-email-atext 2051 | * ↪ **Anything else** 2052 | 2053 | Enqueue a l:nok 2054 | 2055 | ### Autolink email atext state 2056 | 2057 | * ↪ **c:@** 2058 | 2059 | Enqueue a t:marker, consume, let `sizeLabel` be `1`, enqueue a t:content, 2060 | and switch to the s:autolink-email-at-sign-or-dot 2061 | * ↪ **[atext][]**\ 2062 | ↪ **c:.** 2063 | 2064 | Consume 2065 | * ↪ **Anything else** 2066 | 2067 | Enqueue a l:nok 2068 | 2069 | ### Autolink email label state 2070 | 2071 | * ↪ **c:-** 2072 | 2073 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1`, consume, and 2074 | switch to the s:autolink-email-dash 2075 | 2076 | Otherwise, treat it as per the “anything else” entry below 2077 | * ↪ **c:.** 2078 | 2079 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1`, consume, and 2080 | switch to the s:autolink-email-at-sign-or-dot 2081 | 2082 | Otherwise, treat it as per the “anything else” entry below 2083 | * ↪ **c:>** 2084 | 2085 | Unset `sizeLabel`, enqueue an l:autolink-email-close, enqueue a t:marker, 2086 | consume, enqueue an l:autolink-email-end, and switch to the s:text 2087 | * ↪ **[ASCII alphanumeric][]** 2088 | 2089 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1` and consume 2090 | 2091 | Otherwise, treat it as per the “anything else” entry below 2092 | * ↪ **Anything else** 2093 | 2094 | Unset `sizeLabel` and enqueue a l:nok 2095 | 2096 | ### Autolink email at sign or dot state 2097 | 2098 | * ↪ **[ASCII alphanumeric][]** 2099 | 2100 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1`, consume, and 2101 | switch to the s:autolink-email-label 2102 | 2103 | Otherwise, treat it as per the “anything else” entry below 2104 | * ↪ **Anything else** 2105 | 2106 | Unset `sizeLabel` and enqueue a l:nok 2107 | 2108 | ### Autolink email dash state 2109 | 2110 | * ↪ **c:-** 2111 | 2112 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1` and consume 2113 | 2114 | Otherwise, treat it as per the “anything else” entry below 2115 | * ↪ **[ASCII alphanumeric][]** 2116 | 2117 | If `sizeLabel` is not `63`, increment `sizeLabel` by `1`, consume, and 2118 | switch to the s:autolink-email-label 2119 | 2120 | Otherwise, treat it as per the “anything else” entry below 2121 | * ↪ **Anything else** 2122 | 2123 | Unset `sizeLabel` and enqueue a l:nok 2124 | 2125 | ### Autolink scheme or email atext state 2126 | 2127 | * ↪ **c:+**\ 2128 | ↪ **c:.**\ 2129 | ↪ **c:-**\ 2130 | ↪ **[ASCII alphanumeric][]** 2131 | 2132 | Increment `sizeScheme` by `1`, consume, and switch to the 2133 | s:autolink-scheme-inside-or-email-atext 2134 | * ↪ **[atext][]** 2135 | 2136 | Unset `sizeScheme`, consume, and switch to the s:autolink-email-atext 2137 | * ↪ **Anything else** 2138 | 2139 | Unset `sizeScheme` and enqueue a l:nok 2140 | 2141 | ### Autolink scheme inside or email atext state 2142 | 2143 | * ↪ **c::** 2144 | 2145 | Unset `sizeScheme`, enqueue a t:marker, consume, enqueue a t:content, and 2146 | switch to the s:autolink-uri-inside 2147 | * ↪ **c:@** 2148 | 2149 | Unset `sizeScheme`, enqueue a t:marker, consume, let `sizeLabel` be `1`, 2150 | enqueue a t:content, and switch to the s:autolink-email-at-sign-or-dot 2151 | * ↪ **c:+**\ 2152 | ↪ **c:.**\ 2153 | ↪ **c:-**\ 2154 | ↪ **[ASCII alphanumeric][]** 2155 | 2156 | If `sizeScheme` is not `32`, increment `sizeScheme` by `1` and consume 2157 | 2158 | Otherwise, treat it as per the “atext” entry below 2159 | * ↪ **[atext][]** 2160 | 2161 | Unset `sizeScheme`, consume, and switch to the s:autolink-email-atext 2162 | * ↪ **Anything else** 2163 | 2164 | Unset `sizeScheme` and enqueue a l:nok 2165 | 2166 | ### Autolink URI inside state 2167 | 2168 | * ↪ **[c:EOF][]**\ 2169 | ↪ **[c:EOL][]**\ 2170 | ↪ **[ASCII control][]**\ 2171 | ↪ **c:SP**\ 2172 | ↪ **c:<** 2173 | 2174 | Enqueue a l:nok 2175 | * ↪ **c:>** 2176 | 2177 | Enqueue an l:autolink-uri-close, enqueue a t:marker, consume, enqueue an 2178 | l:autolink-uri-end, and switch to the s:text 2179 | * ↪ **Anything else** 2180 | 2181 | Consume 2182 | 2183 | ### HTML state 2184 | 2185 | * ↪ **c:<** 2186 | 2187 | Enqueue an l:html-start, enqueue a t:content, consume, and switch to the 2188 | s:html-open 2189 | * ↪ **Anything else** 2190 | 2191 | Enqueue a l:nok 2192 | 2193 | ### HTML open state 2194 | 2195 | * ↪ **c:!** 2196 | 2197 | Consume and switch to the s:html-declaration-start 2198 | * ↪ **c:/** 2199 | 2200 | Consume and switch to the s:html-tag-close-start 2201 | * ↪ **c:?** 2202 | 2203 | Consume and switch to the s:html-instruction-inside 2204 | * ↪ **[ASCII alpha][]** 2205 | 2206 | Consume and switch to the s:html-tag-open-inside 2207 | * ↪ **Anything else** 2208 | 2209 | Enqueue a l:nok 2210 | 2211 | ### HTML declaration start state 2212 | 2213 | * ↪ **c:-** 2214 | 2215 | Consume and switch to the s:html-comment-open-inside 2216 | * ↪ **`[CDATA[` (the five upper letters “CDATA” with a c:[ before and 2217 | after)** 2218 | 2219 | Consume and switch to the s:html-cdata-inside 2220 | * ↪ **[ASCII alpha][]** 2221 | 2222 | Reconsume in the s:html-declaration-inside 2223 | * ↪ **Anything else** 2224 | 2225 | Enqueue a l:nok 2226 | 2227 | ### HTML comment open inside state 2228 | 2229 | * ↪ **c:-** 2230 | 2231 | Consume and switch to the s:html-comment-inside 2232 | * ↪ **Anything else** 2233 | 2234 | Enqueue a l:nok 2235 | 2236 | ### HTML comment inside state 2237 | 2238 | * ↪ **[c:EOF][]** 2239 | 2240 | Enqueue a l:nok 2241 | * ↪ **[c:EOL][]** 2242 | 2243 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2244 | t:content 2245 | * ↪ **c:-** 2246 | 2247 | Consume and switch to the s:html-comment-close-inside 2248 | * ↪ **Anything else** 2249 | 2250 | Consume 2251 | 2252 | ### HTML comment close inside state 2253 | 2254 | * ↪ **c:-** 2255 | 2256 | Consume and switch to the s:html-comment-close 2257 | * ↪ **Anything else** 2258 | 2259 | Reconsume in the s:html-comment-inside 2260 | 2261 | ### HTML comment close state 2262 | 2263 | > **Note**: a CM comment may not contain two dashes (`--`), and may not end in a 2264 | > dash (which would result in `--->`). 2265 | > Here we have seen two dashes, so we can either be at the end of a comment, or 2266 | > no longer in a comment. 2267 | 2268 | * ↪ **c:>** 2269 | 2270 | Consume, enqueue an l:html-end, and switch to the s:text 2271 | * ↪ **Anything else** 2272 | 2273 | Enqueue a l:nok 2274 | 2275 | ### HTML CDATA inside state 2276 | 2277 | * ↪ **[c:EOF][]** 2278 | 2279 | Enqueue a l:nok 2280 | * ↪ **[c:EOL][]** 2281 | 2282 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2283 | t:content 2284 | * ↪ **`]]>` (two of c:], with a c:> after)** 2285 | 2286 | Consume, enqueue an l:html-end, and switch to the s:text 2287 | * ↪ **Anything else** 2288 | 2289 | Consume 2290 | 2291 | ### HTML declaration inside state 2292 | 2293 | * ↪ **[c:EOF][]** 2294 | 2295 | Enqueue a l:nok 2296 | * ↪ **[c:EOL][]** 2297 | 2298 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2299 | t:content 2300 | * ↪ **c:>** 2301 | 2302 | Consume, enqueue an l:html-end, and switch to the s:text 2303 | * ↪ **Anything else** 2304 | 2305 | Consume 2306 | 2307 | ### HTML instruction inside state 2308 | 2309 | * ↪ **[c:EOF][]** 2310 | 2311 | Enqueue a l:nok 2312 | * ↪ **[c:EOL][]** 2313 | 2314 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2315 | t:content 2316 | * ↪ **c:?** 2317 | 2318 | Consume and switch to the s:html-instruction-close 2319 | * ↪ **Anything else** 2320 | 2321 | Consume 2322 | 2323 | ### HTML instruction close state 2324 | 2325 | * ↪ **c:>** 2326 | 2327 | Consume, enqueue an l:html-end, and switch to the s:text 2328 | * ↪ **Anything else** 2329 | 2330 | Reconsume in the s:html-instruction-inside 2331 | 2332 | ### HTML tag close start state 2333 | 2334 | * ↪ **[ASCII alpha][]** 2335 | 2336 | Consume and switch to the s:html-tag-close-inside 2337 | * ↪ **Anything else** 2338 | 2339 | Enqueue a l:nok 2340 | 2341 | ### HTML tag close inside state 2342 | 2343 | * ↪ **[c:EOL][]**\ 2344 | ↪ **[c:VS][]**\ 2345 | ↪ **c:HT**\ 2346 | ↪ **c:SP** 2347 | 2348 | Reconsume in the s:html-tag-close-between 2349 | * ↪ **c:>** 2350 | 2351 | Consume, enqueue an l:html-end, and switch to the s:text 2352 | * ↪ **[ASCII alphanumeric][]**\ 2353 | ↪ **c:-** 2354 | 2355 | Consume 2356 | * ↪ **Anything else** 2357 | 2358 | Enqueue a l:nok 2359 | 2360 | ### HTML tag close between state 2361 | 2362 | > **Note**: an EOL is technically allowed here, but as a `>` after an EOL would 2363 | > start a blockquote, practically it’s not possible. 2364 | 2365 | * ↪ **[c:EOL][]** 2366 | 2367 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2368 | t:content 2369 | * ↪ **[c:VS][]**\ 2370 | ↪ **c:HT**\ 2371 | ↪ **c:SP** 2372 | 2373 | Consume 2374 | * ↪ **c:>** 2375 | 2376 | Consume, enqueue an l:html-end, and switch to the s:text 2377 | * ↪ **Anything else** 2378 | 2379 | Enqueue a l:nok 2380 | 2381 | ### HTML tag open inside state 2382 | 2383 | * ↪ **[c:EOL][]**\ 2384 | ↪ **[c:VS][]**\ 2385 | ↪ **c:HT**\ 2386 | ↪ **c:SP** 2387 | 2388 | Reconsume in the s:html-tag-open-between 2389 | * ↪ **c:>** 2390 | 2391 | Consume, enqueue an l:html-end, and switch to the s:text 2392 | * ↪ **[ASCII alphanumeric][]**\ 2393 | ↪ **c:-** 2394 | 2395 | Consume 2396 | * ↪ **Anything else** 2397 | 2398 | Enqueue a l:nok 2399 | 2400 | ### HTML tag open between state 2401 | 2402 | * ↪ **[c:EOL][]** 2403 | 2404 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2405 | t:content 2406 | * ↪ **[c:VS][]**\ 2407 | ↪ **c:HT**\ 2408 | ↪ **c:SP** 2409 | 2410 | Consume 2411 | * ↪ **c:/** 2412 | 2413 | Consume and switch to the s:html-tag-open-self-closing 2414 | * ↪ **c:>** 2415 | 2416 | Consume, enqueue an l:html-end, and switch to the s:text 2417 | * ↪ **[ASCII alpha][]**\ 2418 | ↪ **c::**\ 2419 | ↪ **c:_** 2420 | 2421 | Consume and switch to the s:html-tag-open-attribute-name 2422 | * ↪ **Anything else** 2423 | 2424 | Enqueue a l:nok 2425 | 2426 | ### HTML tag open self closing state 2427 | 2428 | * ↪ **c:>** 2429 | 2430 | Consume, enqueue an l:html-end, and switch to the s:text 2431 | * ↪ **Anything else** 2432 | 2433 | Enqueue a l:nok 2434 | 2435 | ### HTML tag open attribute name state 2436 | 2437 | * ↪ **[ASCII alphanumeric][]**\ 2438 | ↪ **c:-**\ 2439 | ↪ **c:.**\ 2440 | ↪ **c::**\ 2441 | ↪ **c:_** 2442 | 2443 | Consume 2444 | * ↪ **Anything else** 2445 | 2446 | Reconsume in the s:html-tag-open-attribute-name-after 2447 | 2448 | ### HTML tag open attribute name after state 2449 | 2450 | * ↪ **[c:EOL][]** 2451 | 2452 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2453 | t:content 2454 | * ↪ **[c:VS][]**\ 2455 | ↪ **c:HT**\ 2456 | ↪ **c:SP** 2457 | 2458 | Consume 2459 | * ↪ **c:/** 2460 | 2461 | Consume and switch to the s:html-tag-open-self-closing 2462 | * ↪ **c:=** 2463 | 2464 | Consume and switch to the s:html-tag-open-attribute-before 2465 | * ↪ **c:>** 2466 | 2467 | Consume, enqueue an l:html-end, and switch to the s:text 2468 | * ↪ **Anything else** 2469 | 2470 | Enqueue a l:nok 2471 | 2472 | ### HTML tag open attribute before state 2473 | 2474 | * ↪ **[c:EOL][]** 2475 | 2476 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2477 | t:content 2478 | * ↪ **[c:VS][]**\ 2479 | ↪ **c:HT**\ 2480 | ↪ **c:SP** 2481 | 2482 | Consume 2483 | * ↪ **c:"** 2484 | 2485 | Consume and switch to the s:html-tag-open-double-quoted-attribute 2486 | * ↪ **c:'** 2487 | 2488 | Consume and switch to the s:html-tag-open-single-quoted-attribute 2489 | * ↪ **c:<**\ 2490 | ↪ **c:=**\ 2491 | ↪ **c:>**\ 2492 | ↪ **c:`** 2493 | 2494 | Enqueue a l:nok 2495 | * ↪ **Anything else** 2496 | 2497 | Consume and switch to the s:html-tag-open-unquoted-attribute 2498 | 2499 | ### HTML tag open double quoted attribute state 2500 | 2501 | * ↪ **[c:EOF][]** 2502 | 2503 | Enqueue a l:nok 2504 | * ↪ **[c:EOL][]** 2505 | 2506 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2507 | t:content 2508 | * ↪ **c:"** 2509 | 2510 | Consume and switch to the s:html-tag-open-between 2511 | * ↪ **Anything else** 2512 | 2513 | Consume 2514 | 2515 | ### HTML tag open single quoted attribute state 2516 | 2517 | * ↪ **[c:EOF][]** 2518 | 2519 | Enqueue a l:nok 2520 | * ↪ **[c:EOL][]** 2521 | 2522 | Enqueue a l:soft-break, enqueue an t:end-of-line, consume, and enqueue a 2523 | t:content 2524 | * ↪ **c:'** 2525 | 2526 | Consume and switch to the s:html-tag-open-between 2527 | * ↪ **Anything else** 2528 | 2529 | Consume 2530 | 2531 | ### HTML tag open unquoted attribute state 2532 | 2533 | * ↪ **[c:EOF][]**\ 2534 | ↪ **c:"**\ 2535 | ↪ **c:'**\ 2536 | ↪ **c:<**\ 2537 | ↪ **c:=**\ 2538 | ↪ **c:`** 2539 | 2540 | Enqueue a l:nok 2541 | * ↪ **[c:EOL][]**\ 2542 | ↪ **[c:VS][]**\ 2543 | ↪ **c:HT**\ 2544 | ↪ **c:SP**\ 2545 | ↪ **c:>** 2546 | 2547 | Reconsume in the s:html-tag-open-between 2548 | * ↪ **Anything else** 2549 | 2550 | Consume 2551 | 2552 | ### Link label start state 2553 | 2554 | * ↪ **c:[** 2555 | 2556 | Enqueue a l:link-label-start, enqueue a t:marker, consume, enqueue a 2557 | l:link-label-open, and switch to the s:text 2558 | * ↪ **Anything else** 2559 | 2560 | Enqueue a l:nok 2561 | 2562 | ### Character escape state 2563 | 2564 | * ↪ **U+005C BACKSLASH (`\`)** 2565 | 2566 | Enqueue a l:character-escape-start, enqueue a t:marker, consume, and switch 2567 | to the s:character-escape-after 2568 | * ↪ **Anything else** 2569 | 2570 | Enqueue a l:nok 2571 | 2572 | ### Character escape after state 2573 | 2574 | * ↪ **[ASCII punctuation][]** 2575 | 2576 | Enqueue a t:content, consume, enqueue a l:character-escape-end, and switch 2577 | to the s:text 2578 | * ↪ **Anything else** 2579 | 2580 | Enqueue a l:nok 2581 | 2582 | ### Break escape state 2583 | 2584 | * ↪ **U+005C BACKSLASH (`\`)** 2585 | 2586 | Enqueue a l:break-escape-start, enqueue a l:hard-break, enqueue a t:marker, 2587 | consume, and switch to the s:break-escape-after 2588 | * ↪ **Anything else** 2589 | 2590 | Enqueue a l:nok 2591 | 2592 | ### Break escape after state 2593 | 2594 | * ↪ **[c:EOL][]** 2595 | 2596 | If the break represented by the character does not start with a [c:VS][], 2597 | c:HT, or c:SP, enqueue an t:end-of-line, consume, enqueue a 2598 | l:break-escape-end, and switch to the s:text-initial 2599 | 2600 | Otherwise, treat it as per the “anything else” entry below 2601 | * ↪ **Anything else** 2602 | 2603 | Enqueue a l:nok 2604 | 2605 | ### Label resource close state 2606 | 2607 | * ↪ **c:]** 2608 | 2609 | Enqueue a l:label-close, enqueue a t:marker, consume, enqueue a l:label-end, 2610 | and switch to the s:label-resource-end-after 2611 | * ↪ **Anything else** 2612 | 2613 | Enqueue a l:nok 2614 | 2615 | ### Label resource end after state 2616 | 2617 | * ↪ **c:(** 2618 | 2619 | Enqueue a l:resource-information-start, enqueue a t:marker, consume, enqueue 2620 | a l:resource-information-open, and switch to the s:resource-information-open 2621 | * ↪ **Anything else** 2622 | 2623 | Enqueue a l:nok 2624 | 2625 | ### Resource information open state 2626 | 2627 | * ↪ **[c:EOF][]** 2628 | 2629 | Enqueue a l:nok 2630 | * ↪ **[c:EOL][]** 2631 | 2632 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2633 | * ↪ **[c:VS][]**\ 2634 | ↪ **c:HT**\ 2635 | ↪ **c:SP** 2636 | 2637 | Ensure a t:whitespace and consume 2638 | * ↪ **c:)** 2639 | 2640 | Enqueue a l:resource-information-close, enqueue a t:marker, consume, enqueue 2641 | a l:resource-information-end, and switch to the s:text 2642 | * ↪ **c:<** 2643 | 2644 | Enqueue a l:resource-information-destination-quoted-start, enqueue a 2645 | t:marker, consume, enqueue a l:resource-information-destination-quoted-open, 2646 | and switch to the s:resource-information-destination-quoted-inside 2647 | * ↪ **[ASCII control][]** 2648 | 2649 | Enqueue a l:nok 2650 | * ↪ **Anything else** 2651 | 2652 | Enqueue a l:resource-information-destination-unquoted-start, enqueue a 2653 | t:content, consume, enqueue a 2654 | l:resource-information-destination-unquoted-open, and switch to the 2655 | s:resource-information-destination-unquoted-inside 2656 | 2657 | ### Resource information destination quoted inside state 2658 | 2659 | * ↪ **[c:EOF][]**\ 2660 | ↪ **[c:EOL][]**\ 2661 | ↪ **c:<** 2662 | 2663 | Enqueue a l:nok 2664 | * ↪ **c:>** 2665 | 2666 | Enqueue a l:resource-information-destination-quoted-close, enqueue a 2667 | t:marker, consume, enqueue a l:resource-information-destination-quoted-end, 2668 | and switch to the s:resource-information-destination-quoted-after 2669 | * ↪ **U+005C BACKSLASH (`\`)** 2670 | 2671 | Ensure a t:content, consume, and switch to the 2672 | s:resource-information-destination-quoted-escape 2673 | * ↪ **Anything else** 2674 | 2675 | Ensure a t:content and consume 2676 | 2677 | ### Resource information destination quoted escape state 2678 | 2679 | * ↪ **c:<**\ 2680 | ↪ **c:>**\ 2681 | ↪ **U+005C BACKSLASH (`\`)** 2682 | 2683 | Consume and switch to the s:resource-information-destination-quoted-inside 2684 | * ↪ **Anything else** 2685 | 2686 | Reconsume in the s:resource-information-destination-quoted-inside 2687 | 2688 | ### Resource information destination quoted after state 2689 | 2690 | * ↪ **[c:EOL][]**\ 2691 | ↪ **[c:VS][]**\ 2692 | ↪ **c:HT**\ 2693 | ↪ **c:SP**\ 2694 | ↪ **c:>** 2695 | 2696 | Reconsume in the s:resource-information-between 2697 | * ↪ **c:)** 2698 | 2699 | Enqueue a l:resource-information-close, enqueue a t:marker, consume, enqueue 2700 | a l:resource-information-end, and switch to the s:text 2701 | * ↪ **Anything else** 2702 | 2703 | Enqueue a l:nok 2704 | 2705 | ### Resource information destination unquoted inside state 2706 | 2707 | * ↪ **[c:EOF][]** 2708 | 2709 | Unset `balance` and enqueue a l:nok 2710 | * ↪ **[c:EOL][]**\ 2711 | ↪ **[c:VS][]**\ 2712 | ↪ **c:HT**\ 2713 | ↪ **c:SP**\ 2714 | ↪ **c:>** 2715 | 2716 | Enqueue a l:resource-information-destination-unquoted-close, enqueue a 2717 | l:resource-information-destination-unquoted-end, and reconsume in the 2718 | s:resource-information-between 2719 | * ↪ **c:(** 2720 | 2721 | Increment `balance` by `1` and consume 2722 | * ↪ **c:)** 2723 | 2724 | If `balance` is `0`, unset `balance`, enqueue a 2725 | l:resource-information-destination-unquoted-close, enqueue a 2726 | l:resource-information-destination-unquoted-end, and reconsume in the 2727 | s:resource-information-between 2728 | 2729 | Otherwise, decrement `balance` by `1`, and consume 2730 | * ↪ **U+005C BACKSLASH (`\`)** 2731 | 2732 | Consume and switch to the s:resource-information-destination-unquoted-escape 2733 | * ↪ **[ASCII control][]** 2734 | 2735 | Unset `balance` and enqueue a l:nok 2736 | * ↪ **Anything else** 2737 | 2738 | Consume 2739 | 2740 | ### Resource information destination unquoted escape state 2741 | 2742 | * ↪ **c:(**\ 2743 | ↪ **c:)**\ 2744 | ↪ **U+005C BACKSLASH (`\`)** 2745 | 2746 | Consume and switch to the s:resource-information-destination-unquoted-inside 2747 | * ↪ **Anything else** 2748 | 2749 | Reconsume in the s:resource-information-destination-unquoted-inside 2750 | 2751 | ### Resource information between state 2752 | 2753 | * ↪ **[c:EOL][]** 2754 | 2755 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2756 | * ↪ **[c:VS][]**\ 2757 | ↪ **c:HT**\ 2758 | ↪ **c:SP** 2759 | 2760 | Ensure a t:whitespace and consume 2761 | * ↪ **c:"** 2762 | 2763 | Enqueue a l:resource-information-title-start, enqueue a t:marker, consume, 2764 | enqueue a l:resource-information-title-open, and switch to the 2765 | s:resource-information-title-double-quoted-inside 2766 | * ↪ **c:'** 2767 | 2768 | Enqueue a l:resource-information-title-start, enqueue a t:marker, consume, 2769 | enqueue a l:resource-information-title-open, and switch to the 2770 | s:resource-information-title-single-quoted-inside 2771 | * ↪ **c:(** 2772 | 2773 | Enqueue a l:resource-information-title-start, enqueue a t:marker, consume, 2774 | enqueue a l:resource-information-title-open, and switch to the 2775 | s:resource-information-title-paren-quoted-inside 2776 | * ↪ **c:)** 2777 | 2778 | Enqueue a l:resource-information-close, enqueue a t:marker, consume, enqueue 2779 | a l:resource-information-end, and switch to the s:text 2780 | * ↪ **Anything else** 2781 | 2782 | Enqueue a l:nok 2783 | 2784 | ### Resource information title double quoted inside state 2785 | 2786 | * ↪ **[c:EOF][]** 2787 | 2788 | Enqueue a l:nok 2789 | * ↪ **[c:EOL][]** 2790 | 2791 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2792 | * ↪ **c:"** 2793 | 2794 | Enqueue a l:resource-information-title-close, enqueue a t:marker, consume, 2795 | enqueue a l:resource-information-title-end, and switch to the 2796 | s:resource-information-title-after 2797 | * ↪ **U+005C BACKSLASH (`\`)** 2798 | 2799 | Ensure a t:content, consume, and switch to the 2800 | s:resource-information-title-double-quoted-escape 2801 | * ↪ **Anything else** 2802 | 2803 | Ensure a t:content and consume 2804 | 2805 | ### Resource information title double quoted escape state 2806 | 2807 | * ↪ **c:"**\ 2808 | ↪ **U+005C BACKSLASH (`\`)** 2809 | 2810 | Consume and switch to the s:resource-information-title-double-quoted-inside 2811 | * ↪ **Anything else** 2812 | 2813 | Reconsume in the s:resource-information-title-double-quoted-inside 2814 | 2815 | ### Resource information title single quoted inside state 2816 | 2817 | * ↪ **[c:EOF][]** 2818 | 2819 | Enqueue a l:nok 2820 | * ↪ **[c:EOL][]** 2821 | 2822 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2823 | * ↪ **c:'** 2824 | 2825 | Enqueue a l:resource-information-title-close, enqueue a t:marker, consume, 2826 | enqueue a l:resource-information-title-end, and switch to the 2827 | s:resource-information-title-after 2828 | * ↪ **U+005C BACKSLASH (`\`)** 2829 | 2830 | Ensure a t:content, consume, and switch to the 2831 | s:resource-information-title-single-quoted-escape 2832 | * ↪ **Anything else** 2833 | 2834 | Ensure a t:content and consume 2835 | 2836 | ### Resource information title single quoted escape state 2837 | 2838 | * ↪ **c:'**\ 2839 | ↪ **U+005C BACKSLASH (`\`)** 2840 | 2841 | Consume and switch to the s:resource-information-title-single-quoted-inside 2842 | * ↪ **Anything else** 2843 | 2844 | Reconsume in the s:resource-information-title-single-quoted-inside 2845 | 2846 | ### Resource information title paren quoted inside state 2847 | 2848 | * ↪ **[c:EOF][]** 2849 | 2850 | Enqueue a l:nok 2851 | * ↪ **[c:EOL][]** 2852 | 2853 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2854 | * ↪ **c:)** 2855 | 2856 | Enqueue a l:resource-information-title-close, enqueue a t:marker, consume, 2857 | enqueue a l:resource-information-title-end, and switch to the 2858 | s:resource-information-title-after 2859 | * ↪ **U+005C BACKSLASH (`\`)** 2860 | 2861 | Ensure a t:content, consume, and switch to the 2862 | s:resource-information-title-paren-quoted-escape 2863 | * ↪ **Anything else** 2864 | 2865 | Ensure a t:content and consume 2866 | 2867 | ### Resource information title paren quoted escape state 2868 | 2869 | * ↪ **c:)**\ 2870 | ↪ **U+005C BACKSLASH (`\`)** 2871 | 2872 | Consume and switch to the s:resource-information-title-paren-quoted-inside 2873 | * ↪ **Anything else** 2874 | 2875 | Reconsume in the s:resource-information-title-paren-quoted-inside 2876 | 2877 | ### Resource information title after state 2878 | 2879 | * ↪ **[c:EOL][]** 2880 | 2881 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2882 | * ↪ **[c:VS][]**\ 2883 | ↪ **c:HT**\ 2884 | ↪ **c:SP** 2885 | 2886 | Ensure a t:whitespace and consume 2887 | * ↪ **c:)** 2888 | 2889 | Enqueue a l:resource-information-close, enqueue a t:marker, consume, enqueue 2890 | a l:resource-information-end, and switch to the s:text 2891 | * ↪ **Anything else** 2892 | 2893 | Enqueue a l:nok 2894 | 2895 | ### Label reference close state 2896 | 2897 | * ↪ **c:]** 2898 | 2899 | Enqueue a l:label-close, enqueue a t:marker, consume, enqueue a l:label-end, 2900 | and switch to the s:label-reference-end-after 2901 | * ↪ **Anything else** 2902 | 2903 | Enqueue a l:nok 2904 | 2905 | ### Label reference end after state 2906 | 2907 | * ↪ **c:[** 2908 | 2909 | Enqueue a l:reference-label-start, enqueue a t:marker, consume, enqueue a 2910 | l:reference-label-open, and switch to the s:reference-label-open 2911 | * ↪ **Anything else** 2912 | 2913 | Enqueue a l:nok 2914 | 2915 | ### Reference label open state 2916 | 2917 | * ↪ **[c:EOF][]** 2918 | 2919 | Enqueue a l:nok 2920 | * ↪ **[c:EOL][]** 2921 | 2922 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2923 | * ↪ **[c:VS][]**\ 2924 | ↪ **c:HT**\ 2925 | ↪ **c:SP** 2926 | 2927 | Ensure a t:whitespace and consume 2928 | * ↪ **c:]** 2929 | 2930 | Enqueue a l:reference-label-collapsed-close, enqueue a t:marker, consume, 2931 | enqueue a l:reference-label-end, and switch to the s:text 2932 | * ↪ **Anything else** 2933 | 2934 | Enqueue a t:content, consume, and switch to the s:reference-label-inside 2935 | 2936 | ### Reference label inside state 2937 | 2938 | * ↪ **[c:EOF][]**\ 2939 | ↪ **[c:EOL][]**\ 2940 | ↪ **[c:VS][]**\ 2941 | ↪ **c:HT**\ 2942 | ↪ **c:SP** 2943 | 2944 | Reconsume in the s:reference-label-between 2945 | * ↪ **U+005C BACKSLASH (`\`)** 2946 | 2947 | Ensure a t:content, consume, and switch to the s:reference-label-escape 2948 | * ↪ **c:]** 2949 | 2950 | Enqueue a l:reference-label-full-close, enqueue a t:marker, consume, enqueue 2951 | a l:reference-label-end, and switch to the s:text 2952 | * ↪ **Anything else** 2953 | 2954 | Ensure a t:content and consume 2955 | 2956 | ### Reference label between state 2957 | 2958 | * ↪ **[c:EOF][]** 2959 | 2960 | Enqueue a l:nok 2961 | * ↪ **[c:EOL][]** 2962 | 2963 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 2964 | * ↪ **[c:VS][]**\ 2965 | ↪ **c:HT**\ 2966 | ↪ **c:SP** 2967 | 2968 | Ensure a t:whitespace and consume 2969 | * ↪ **Anything else** 2970 | 2971 | Reconsume in the s:reference-label-inside 2972 | 2973 | ### Reference label escape state 2974 | 2975 | * ↪ **U+005C BACKSLASH (`\`)**\ 2976 | ↪ **c:]** 2977 | 2978 | Consume and switch to the s:reference-label-inside 2979 | * ↪ **Anything else** 2980 | 2981 | Reconsume in the s:reference-label-inside 2982 | 2983 | ### Label reference shortcut close state 2984 | 2985 | * ↪ **c:]** 2986 | 2987 | Enqueue a l:label-close, enqueue a t:marker, consume, enqueue a l:label-end, 2988 | and switch to the s:text 2989 | * ↪ **Anything else** 2990 | 2991 | Enqueue a l:nok 2992 | 2993 | ### Delimiter run underscore start state 2994 | 2995 | * ↪ **c:_** 2996 | 2997 | Let `delimiterRunAfter` be `null` and let `delimiterRunBefore` be 2998 | `'whitespace'` if `prev` is [c:EOF][], [c:EOL][], or [Unicode whitespace][], 2999 | `'punctuation'` if it is [Unicode punctuation][], or `null` otherwise 3000 | 3001 | Enqueue a l:delimiter-run-start, enqueue a t:sequence, consume, and switch 3002 | to the s:delimiter-run-underscore 3003 | * ↪ **Anything else** 3004 | 3005 | Enqueue a l:nok 3006 | 3007 | ### Delimiter run underscore state 3008 | 3009 | * ↪ **[c:EOF][]**\ 3010 | ↪ **[c:EOL][]**\ 3011 | ↪ **[Unicode whitespace][]** 3012 | 3013 | Let `delimiterRunAfter` be `'whitespace'` and treat it as per the “anything 3014 | else” entry below 3015 | * ↪ **c:_** 3016 | 3017 | Consume 3018 | * ↪ **[Unicode punctuation][]** 3019 | 3020 | Let `delimiterRunAfter` be `'punctuation'` and treat it as per the “anything 3021 | else” entry below 3022 | * ↪ **Anything else** 3023 | 3024 | Let `leftFlanking` be whether both `delimiterRunAfter` is not 3025 | `'whitespace'`, and that either `delimiterRunAfter` is not `'punctuation'` 3026 | or that `delimiterRunBefore` is not `null` 3027 | 3028 | Let `rightFlanking` be whether both `delimiterRunBefore` is not 3029 | `'whitespace'`, and that either `delimiterRunBefore` is not `'punctuation'` 3030 | or that `delimiterRunAfter` is not `null` 3031 | 3032 | Unset `delimiterRunBefore`, unset `delimiterRunAfter`, enqueue a 3033 | l:delimiter-run-end, and reconsume in the s:text 3034 | 3035 | ### Code start state 3036 | 3037 | * ↪ **c:`** 3038 | 3039 | Let `sizeOpen` be `1`, enqueue a l:code-start, enqueue a l:code-fence-start, 3040 | enqueue a t:marker, consume, and switch to the s:code-open-fence-inside 3041 | * ↪ **Anything else** 3042 | 3043 | Enqueue a l:nok 3044 | 3045 | ### Code open fence inside state 3046 | 3047 | * ↪ **[c:EOF][]**\ 3048 | ↪ **[c:EOL][]**\ 3049 | ↪ **[c:VS][]**\ 3050 | ↪ **c:HT**\ 3051 | ↪ **c:SP** 3052 | 3053 | Enqueue a l:code-fence-end and reconsume in the s:code-between 3054 | * ↪ **c:`** 3055 | 3056 | Increment `sizeOpen` by `1` and consume 3057 | * ↪ **Anything else** 3058 | 3059 | Enqueue a l:code-fence-end, enqueue a t:content, consume, and switch to the 3060 | s:code-inside 3061 | 3062 | ### Code between state 3063 | 3064 | * ↪ **[c:EOF][]** 3065 | 3066 | Unset `sizeOpen` and enqueue a l:nok 3067 | * ↪ **[c:EOL][]** 3068 | 3069 | Enqueue a l:soft-break, enqueue an t:end-of-line, and consume 3070 | * ↪ **[c:VS][]**\ 3071 | ↪ **c:HT**\ 3072 | ↪ **c:SP** 3073 | 3074 | Ensure a t:whitespace and consume 3075 | * ↪ **Anything else** 3076 | 3077 | Reconsume in the s:code-inside 3078 | 3079 | ### Code inside state 3080 | 3081 | * ↪ **[c:EOF][]**\ 3082 | ↪ **[c:EOL][]**\ 3083 | ↪ **[c:VS][]**\ 3084 | ↪ **c:HT**\ 3085 | ↪ **c:SP** 3086 | 3087 | Reconsume in the s:code-between 3088 | * ↪ **c:`** 3089 | 3090 | Let `sizeClose` be `1`, enqueue a l:code-fence-start, enqueue a t:marker, 3091 | consume, and switch to the s:code-close-fence-inside 3092 | * ↪ **Anything else** 3093 | 3094 | Ensure a t:content and consume 3095 | 3096 | ### Code close fence inside state 3097 | 3098 | * ↪ **c:`** 3099 | 3100 | Increment `sizeClose` by `1` and consume 3101 | * ↪ **Anything else** 3102 | 3103 | If `sizeOpen` is `sizeClose`, unset `sizeOpen`, unset `sizeClose`, enqueue a 3104 | l:code-fence-end, enqueue a l:code-end, and reconsume in the s:text 3105 | 3106 | Otherwise, unset `sizeClose` and reconsume in the s:code-inside 3107 | 3108 | ## Labels 3109 | 3110 | ### NOK label 3111 | 3112 | ### Blank line end label 3113 | 3114 | ### ATX heading start label 3115 | 3116 | ### ATX heading fence start label 3117 | 3118 | ### ATX heading fence end label 3119 | 3120 | ### ATX heading end label 3121 | 3122 | ### Thematic break start label 3123 | 3124 | ### Thematic break end label 3125 | 3126 | ### Setext heading underline start label 3127 | 3128 | ### Setext heading underline end label 3129 | 3130 | ### Fenced code start label 3131 | 3132 | ### Fenced code fence start label 3133 | 3134 | ### Fenced code fence sequence start label 3135 | 3136 | ### Fenced code fence sequence end label 3137 | 3138 | ### Fenced code fence end label 3139 | 3140 | ### Fenced code end label 3141 | 3142 | 3143 | ### Content definition partial label 3144 | 3145 | ### Content definition start label 3146 | 3147 | ### Content definition label start label 3148 | 3149 | ### Content definition label open label 3150 | 3151 | ### Content definition label close label 3152 | 3153 | ### Content definition label end label 3154 | 3155 | ### Content definition destination start label 3156 | 3157 | ### Content definition destination quoted open label 3158 | 3159 | ### Content definition destination quoted close label 3160 | 3161 | ### Content definition destination unquoted open label 3162 | 3163 | ### Content definition destination unquoted close label 3164 | 3165 | ### Content definition destination end label 3166 | 3167 | ### Content definition title start label 3168 | 3169 | ### Content definition title open label 3170 | 3171 | ### Content definition title close label 3172 | 3173 | ### Content definition title end label 3174 | 3175 | ### Content definition end label 3176 | 3177 | ### Hard break label 3178 | 3179 | ### Soft break label 3180 | 3181 | ### Image label start label 3182 | 3183 | ### Image label open label 3184 | 3185 | ### Character reference start label 3186 | 3187 | ### Character reference end label 3188 | 3189 | ### Delimiter run start label 3190 | 3191 | ### Delimiter run end label 3192 | 3193 | ### Autolink start label 3194 | 3195 | ### Autolink open label 3196 | 3197 | ### Autolink email close label 3198 | 3199 | ### Autolink email end label 3200 | 3201 | ### Autolink uri close label 3202 | 3203 | ### Autolink uri end label 3204 | 3205 | ### HTML start label 3206 | 3207 | ### HTML end label 3208 | 3209 | ### Link label start label 3210 | 3211 | ### Link label open label 3212 | 3213 | ### Character escape start label 3214 | 3215 | ### Character escape end label 3216 | 3217 | ### Break escape start label 3218 | 3219 | ### Break escape end label 3220 | 3221 | ### Label close label 3222 | 3223 | ### Label end label 3224 | 3225 | ### Resource information start label 3226 | 3227 | ### Resource information open label 3228 | 3229 | ### Resource information destination quoted start label 3230 | 3231 | ### Resource information destination quoted open label 3232 | 3233 | ### Resource information destination quoted close label 3234 | 3235 | ### Resource information destination quoted end label 3236 | 3237 | ### Resource information destination unquoted start label 3238 | 3239 | ### Resource information destination unquoted open label 3240 | 3241 | ### Resource information destination unquoted close label 3242 | 3243 | ### Resource information destination unquoted end label 3244 | 3245 | ### Resource information title start label 3246 | 3247 | ### Resource information title open label 3248 | 3249 | ### Resource information title close label 3250 | 3251 | ### Resource information title end label 3252 | 3253 | ### Resource information close label 3254 | 3255 | ### Resource information end label 3256 | 3257 | ### Reference label start label 3258 | 3259 | ### Reference label open label 3260 | 3261 | ### Reference label collapsed close label 3262 | 3263 | ### Reference label full close label 3264 | 3265 | ### Reference label end label 3266 | 3267 | ### Code start label 3268 | 3269 | ### Code fence start label 3270 | 3271 | ### Code fence end label 3272 | 3273 | ### Code end label 3274 | 3275 | ## Tokens 3276 | 3277 | ### Whitespace token 3278 | 3279 | A t:whitespace represents inline whitespace that is part of syntax instead 3280 | of content. 3281 | 3282 | ```idl 3283 | interface Whitespace <: Token { 3284 | size: number 3285 | used: number 3286 | characters: [Character] 3287 | } 3288 | ``` 3289 | 3290 | ```js 3291 | { 3292 | type: 'whitespace', 3293 | characters: [9], 3294 | size: 3, 3295 | used: 0 3296 | } 3297 | ``` 3298 | 3299 | ### Line terminator token 3300 | 3301 | A t:line-terminator represents a line break. 3302 | 3303 | ```idl 3304 | interface LineEnding <: Token {} 3305 | ``` 3306 | 3307 | ```js 3308 | {type: 'lineEnding'} 3309 | ``` 3310 | 3311 | ### End-of-file token 3312 | 3313 | An t:end-of-file represents the end of the syntax. 3314 | 3315 | ```idl 3316 | interface EndOfFile <: Token {} 3317 | ``` 3318 | 3319 | ```js 3320 | {type: 'endOfFile'} 3321 | ``` 3322 | 3323 | ### End-of-line token 3324 | 3325 | An t:end-of-line represents a point between two runs of text in content. 3326 | 3327 | ```idl 3328 | interface EndOfLine <: Token {} 3329 | ``` 3330 | 3331 | ```js 3332 | {type: 'endOfLine'} 3333 | ``` 3334 | 3335 | ### Marker token 3336 | 3337 | A t:marker represents one punctuation character that is part of syntax instead 3338 | of content. 3339 | 3340 | ```idl 3341 | interface Marker <: Token {} 3342 | ``` 3343 | 3344 | ```js 3345 | {type: 'marker'} 3346 | ``` 3347 | 3348 | ### Sequence token 3349 | 3350 | A t:sequence represents one or more of the same punctuation characters that are 3351 | part of syntax instead of content. 3352 | 3353 | ```idl 3354 | interface Sequence <: Token { 3355 | size: number 3356 | } 3357 | ``` 3358 | 3359 | ```js 3360 | {type: 'sequence', size: 3} 3361 | ``` 3362 | 3363 | ### Content token 3364 | 3365 | A t:content represents content. 3366 | 3367 | ```idl 3368 | interface Content <: Token { 3369 | prefix: string 3370 | } 3371 | ``` 3372 | 3373 | ```js 3374 | {type: 'content', prefix: ' '} 3375 | ``` 3376 | 3377 | ## Appendix 3378 | 3379 | ### Raw tags 3380 | 3381 | A [raw tag](#) is one of: `script`, `pre`, and `style`. 3382 | 3383 | ### Basic tags 3384 | 3385 | A [basic tag](#) is one of: `address`, `article`, `aside`, `base`, `basefont`, 3386 | `blockquote`, `body`, `caption`, `center`, `col`, `colgroup`, `dd`, `details`, 3387 | `dialog`, `dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`, 3388 | `footer`, `form`, `frame`, `frameset`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, 3389 | `head`, `header`, `hr`, `html`, `iframe`, `legend`, `li`, `link`, `main`, 3390 | `menu`, `menuitem`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, 3391 | `param`, `section`, `source`, `summary`, `table`, `tbody`, `td`, `tfoot`, `th`, 3392 | `thead`, `title`, `tr`, `track`, and `ul`. 3393 | 3394 | ### Named character references 3395 | 3396 | A [character reference name](#) is one of: 3397 | `AEli`, `AElig`, `AM`, `AMP`, `Aacut`, `Aacute`, 3398 | `Abreve`, `Acir`, `Acirc`, `Acy`, `Afr`, `Agrav`, `Agrave`, `Alpha`, `Amacr`, 3399 | `And`, `Aogon`, `Aopf`, `ApplyFunction`, `Arin`, `Aring`, `Ascr`, `Assign`, 3400 | `Atild`, `Atilde`, `Aum`, `Auml`, `Backslash`, `Barv`, `Barwed`, `Bcy`, 3401 | `Because`, `Bernoullis`, `Beta`, `Bfr`, `Bopf`, `Breve`, `Bscr`, `Bumpeq`, 3402 | `CHcy`, `COP`, `COPY`, `Cacute`, `Cap`, `CapitalDifferentialD`, `Cayleys`, 3403 | `Ccaron`, `Ccedi`, `Ccedil`, `Ccirc`, `Cconint`, `Cdot`, `Cedilla`, `CenterDot`, 3404 | `Cfr`, `Chi`, `CircleDot`, `CircleMinus`, `CirclePlus`, `CircleTimes`, 3405 | `ClockwiseContourIntegral`, `CloseCurlyDoubleQuote`, `CloseCurlyQuote`, `Colon`, 3406 | `Colone`, `Congruent`, `Conint`, `ContourIntegral`, `Copf`, `Coproduct`, 3407 | `CounterClockwiseContourIntegral`, `Cross`, `Cscr`, `Cup`, `CupCap`, `DD`, 3408 | `DDotrahd`, `DJcy`, `DScy`, `DZcy`, `Dagger`, `Darr`, `Dashv`, `Dcaron`, `Dcy`, 3409 | `Del`, `Delta`, `Dfr`, `DiacriticalAcute`, `DiacriticalDot`, 3410 | `DiacriticalDoubleAcute`, `DiacriticalGrave`, `DiacriticalTilde`, `Diamond`, 3411 | `DifferentialD`, `Dopf`, `Dot`, `DotDot`, `DotEqual`, `DoubleContourIntegral`, 3412 | `DoubleDot`, `DoubleDownArrow`, `DoubleLeftArrow`, `DoubleLeftRightArrow`, 3413 | `DoubleLeftTee`, `DoubleLongLeftArrow`, `DoubleLongLeftRightArrow`, 3414 | `DoubleLongRightArrow`, `DoubleRightArrow`, `DoubleRightTee`, `DoubleUpArrow`, 3415 | `DoubleUpDownArrow`, `DoubleVerticalBar`, `DownArrow`, `DownArrowBar`, 3416 | `DownArrowUpArrow`, `DownBreve`, `DownLeftRightVector`, `DownLeftTeeVector`, 3417 | `DownLeftVector`, `DownLeftVectorBar`, `DownRightTeeVector`, `DownRightVector`, 3418 | `DownRightVectorBar`, `DownTee`, `DownTeeArrow`, `Downarrow`, `Dscr`, `Dstrok`, 3419 | `ENG`, `ET`, `ETH`, `Eacut`, `Eacute`, `Ecaron`, `Ecir`, `Ecirc`, `Ecy`, `Edot`, 3420 | `Efr`, `Egrav`, `Egrave`, `Element`, `Emacr`, `EmptySmallSquare`, 3421 | `EmptyVerySmallSquare`, `Eogon`, `Eopf`, `Epsilon`, `Equal`, `EqualTilde`, 3422 | `Equilibrium`, `Escr`, `Esim`, `Eta`, `Eum`, `Euml`, `Exists`, `ExponentialE`, 3423 | `Fcy`, `Ffr`, `FilledSmallSquare`, `FilledVerySmallSquare`, `Fopf`, `ForAll`, 3424 | `Fouriertrf`, `Fscr`, `G`, `GJcy`, `GT`, `Gamma`, `Gammad`, `Gbreve`, `Gcedil`, 3425 | `Gcirc`, `Gcy`, `Gdot`, `Gfr`, `Gg`, `Gopf`, `GreaterEqual`, `GreaterEqualLess`, 3426 | `GreaterFullEqual`, `GreaterGreater`, `GreaterLess`, `GreaterSlantEqual`, 3427 | `GreaterTilde`, `Gscr`, `Gt`, `HARDcy`, `Hacek`, `Hat`, `Hcirc`, `Hfr`, 3428 | `HilbertSpace`, `Hopf`, `HorizontalLine`, `Hscr`, `Hstrok`, `HumpDownHump`, 3429 | `HumpEqual`, `IEcy`, `IJlig`, `IOcy`, `Iacut`, `Iacute`, `Icir`, `Icirc`, `Icy`, 3430 | `Idot`, `Ifr`, `Igrav`, `Igrave`, `Im`, `Imacr`, `ImaginaryI`, `Implies`, `Int`, 3431 | `Integral`, `Intersection`, `InvisibleComma`, `InvisibleTimes`, `Iogon`, `Iopf`, 3432 | `Iota`, `Iscr`, `Itilde`, `Iukcy`, `Ium`, `Iuml`, `Jcirc`, `Jcy`, `Jfr`, `Jopf`, 3433 | `Jscr`, `Jsercy`, `Jukcy`, `KHcy`, `KJcy`, `Kappa`, `Kcedil`, `Kcy`, `Kfr`, 3434 | `Kopf`, `Kscr`, `L`, `LJcy`, `LT`, `Lacute`, `Lambda`, `Lang`, `Laplacetrf`, 3435 | `Larr`, `Lcaron`, `Lcedil`, `Lcy`, `LeftAngleBracket`, `LeftArrow`, 3436 | `LeftArrowBar`, `LeftArrowRightArrow`, `LeftCeiling`, `LeftDoubleBracket`, 3437 | `LeftDownTeeVector`, `LeftDownVector`, `LeftDownVectorBar`, `LeftFloor`, 3438 | `LeftRightArrow`, `LeftRightVector`, `LeftTee`, `LeftTeeArrow`, `LeftTeeVector`, 3439 | `LeftTriangle`, `LeftTriangleBar`, `LeftTriangleEqual`, `LeftUpDownVector`, 3440 | `LeftUpTeeVector`, `LeftUpVector`, `LeftUpVectorBar`, `LeftVector`, 3441 | `LeftVectorBar`, `Leftarrow`, `Leftrightarrow`, `LessEqualGreater`, 3442 | `LessFullEqual`, `LessGreater`, `LessLess`, `LessSlantEqual`, `LessTilde`, 3443 | `Lfr`, `Ll`, `Lleftarrow`, `Lmidot`, `LongLeftArrow`, `LongLeftRightArrow`, 3444 | `LongRightArrow`, `Longleftarrow`, `Longleftrightarrow`, `Longrightarrow`, 3445 | `Lopf`, `LowerLeftArrow`, `LowerRightArrow`, `Lscr`, `Lsh`, `Lstrok`, `Lt`, 3446 | `Map`, `Mcy`, `MediumSpace`, `Mellintrf`, `Mfr`, `MinusPlus`, `Mopf`, `Mscr`, 3447 | `Mu`, `NJcy`, `Nacute`, `Ncaron`, `Ncedil`, `Ncy`, `NegativeMediumSpace`, 3448 | `NegativeThickSpace`, `NegativeThinSpace`, `NegativeVeryThinSpace`, 3449 | `NestedGreaterGreater`, `NestedLessLess`, `NewLine`, `Nfr`, `NoBreak`, 3450 | `NonBreakingSpace`, `Nopf`, `Not`, `NotCongruent`, `NotCupCap`, 3451 | `NotDoubleVerticalBar`, `NotElement`, `NotEqual`, `NotEqualTilde`, `NotExists`, 3452 | `NotGreater`, `NotGreaterEqual`, `NotGreaterFullEqual`, `NotGreaterGreater`, 3453 | `NotGreaterLess`, `NotGreaterSlantEqual`, `NotGreaterTilde`, `NotHumpDownHump`, 3454 | `NotHumpEqual`, `NotLeftTriangle`, `NotLeftTriangleBar`, `NotLeftTriangleEqual`, 3455 | `NotLess`, `NotLessEqual`, `NotLessGreater`, `NotLessLess`, `NotLessSlantEqual`, 3456 | `NotLessTilde`, `NotNestedGreaterGreater`, `NotNestedLessLess`, `NotPrecedes`, 3457 | `NotPrecedesEqual`, `NotPrecedesSlantEqual`, `NotReverseElement`, 3458 | `NotRightTriangle`, `NotRightTriangleBar`, `NotRightTriangleEqual`, 3459 | `NotSquareSubset`, `NotSquareSubsetEqual`, `NotSquareSuperset`, 3460 | `NotSquareSupersetEqual`, `NotSubset`, `NotSubsetEqual`, `NotSucceeds`, 3461 | `NotSucceedsEqual`, `NotSucceedsSlantEqual`, `NotSucceedsTilde`, `NotSuperset`, 3462 | `NotSupersetEqual`, `NotTilde`, `NotTildeEqual`, `NotTildeFullEqual`, 3463 | `NotTildeTilde`, `NotVerticalBar`, `Nscr`, `Ntild`, `Ntilde`, `Nu`, `OElig`, 3464 | `Oacut`, `Oacute`, `Ocir`, `Ocirc`, `Ocy`, `Odblac`, `Ofr`, `Ograv`, `Ograve`, 3465 | `Omacr`, `Omega`, `Omicron`, `Oopf`, `OpenCurlyDoubleQuote`, `OpenCurlyQuote`, 3466 | `Or`, `Oscr`, `Oslas`, `Oslash`, `Otild`, `Otilde`, `Otimes`, `Oum`, `Ouml`, 3467 | `OverBar`, `OverBrace`, `OverBracket`, `OverParenthesis`, `PartialD`, `Pcy`, 3468 | `Pfr`, `Phi`, `Pi`, `PlusMinus`, `Poincareplane`, `Popf`, `Pr`, `Precedes`, 3469 | `PrecedesEqual`, `PrecedesSlantEqual`, `PrecedesTilde`, `Prime`, `Product`, 3470 | `Proportion`, `Proportional`, `Pscr`, `Psi`, `QUO`, `QUOT`, `Qfr`, `Qopf`, 3471 | `Qscr`, `RBarr`, `RE`, `REG`, `Racute`, `Rang`, `Rarr`, `Rarrtl`, `Rcaron`, 3472 | `Rcedil`, `Rcy`, `Re`, `ReverseElement`, `ReverseEquilibrium`, 3473 | `ReverseUpEquilibrium`, `Rfr`, `Rho`, `RightAngleBracket`, `RightArrow`, 3474 | `RightArrowBar`, `RightArrowLeftArrow`, `RightCeiling`, `RightDoubleBracket`, 3475 | `RightDownTeeVector`, `RightDownVector`, `RightDownVectorBar`, `RightFloor`, 3476 | `RightTee`, `RightTeeArrow`, `RightTeeVector`, `RightTriangle`, 3477 | `RightTriangleBar`, `RightTriangleEqual`, `RightUpDownVector`, 3478 | `RightUpTeeVector`, `RightUpVector`, `RightUpVectorBar`, `RightVector`, 3479 | `RightVectorBar`, `Rightarrow`, `Ropf`, `RoundImplies`, `Rrightarrow`, `Rscr`, 3480 | `Rsh`, `RuleDelayed`, `SHCHcy`, `SHcy`, `SOFTcy`, `Sacute`, `Sc`, `Scaron`, 3481 | `Scedil`, `Scirc`, `Scy`, `Sfr`, `ShortDownArrow`, `ShortLeftArrow`, 3482 | `ShortRightArrow`, `ShortUpArrow`, `Sigma`, `SmallCircle`, `Sopf`, `Sqrt`, 3483 | `Square`, `SquareIntersection`, `SquareSubset`, `SquareSubsetEqual`, 3484 | `SquareSuperset`, `SquareSupersetEqual`, `SquareUnion`, `Sscr`, `Star`, `Sub`, 3485 | `Subset`, `SubsetEqual`, `Succeeds`, `SucceedsEqual`, `SucceedsSlantEqual`, 3486 | `SucceedsTilde`, `SuchThat`, `Sum`, `Sup`, `Superset`, `SupersetEqual`, 3487 | `Supset`, `THOR`, `THORN`, `TRADE`, `TSHcy`, `TScy`, `Tab`, `Tau`, `Tcaron`, 3488 | `Tcedil`, `Tcy`, `Tfr`, `Therefore`, `Theta`, `ThickSpace`, `ThinSpace`, 3489 | `Tilde`, `TildeEqual`, `TildeFullEqual`, `TildeTilde`, `Topf`, `TripleDot`, 3490 | `Tscr`, `Tstrok`, `Uacut`, `Uacute`, `Uarr`, `Uarrocir`, `Ubrcy`, `Ubreve`, 3491 | `Ucir`, `Ucirc`, `Ucy`, `Udblac`, `Ufr`, `Ugrav`, `Ugrave`, `Umacr`, `UnderBar`, 3492 | `UnderBrace`, `UnderBracket`, `UnderParenthesis`, `Union`, `UnionPlus`, `Uogon`, 3493 | `Uopf`, `UpArrow`, `UpArrowBar`, `UpArrowDownArrow`, `UpDownArrow`, 3494 | `UpEquilibrium`, `UpTee`, `UpTeeArrow`, `Uparrow`, `Updownarrow`, 3495 | `UpperLeftArrow`, `UpperRightArrow`, `Upsi`, `Upsilon`, `Uring`, `Uscr`, 3496 | `Utilde`, `Uum`, `Uuml`, `VDash`, `Vbar`, `Vcy`, `Vdash`, `Vdashl`, `Vee`, 3497 | `Verbar`, `Vert`, `VerticalBar`, `VerticalLine`, `VerticalSeparator`, 3498 | `VerticalTilde`, `VeryThinSpace`, `Vfr`, `Vopf`, `Vscr`, `Vvdash`, `Wcirc`, 3499 | `Wedge`, `Wfr`, `Wopf`, `Wscr`, `Xfr`, `Xi`, `Xopf`, `Xscr`, `YAcy`, `YIcy`, 3500 | `YUcy`, `Yacut`, `Yacute`, `Ycirc`, `Ycy`, `Yfr`, `Yopf`, `Yscr`, `Yuml`, 3501 | `ZHcy`, `Zacute`, `Zcaron`, `Zcy`, `Zdot`, `ZeroWidthSpace`, `Zeta`, `Zfr`, 3502 | `Zopf`, `Zscr`, `aacut`, `aacute`, `abreve`, `ac`, `acE`, `acd`, `acir`, 3503 | `acirc`, `acut`, `acute`, `acy`, `aeli`, `aelig`, `af`, `afr`, `agrav`, 3504 | `agrave`, `alefsym`, `aleph`, `alpha`, `am`, `amacr`, `amalg`, `amp`, `and`, 3505 | `andand`, `andd`, `andslope`, `andv`, `ang`, `ange`, `angle`, `angmsd`, 3506 | `angmsdaa`, `angmsdab`, `angmsdac`, `angmsdad`, `angmsdae`, `angmsdaf`, 3507 | `angmsdag`, `angmsdah`, `angrt`, `angrtvb`, `angrtvbd`, `angsph`, `angst`, 3508 | `angzarr`, `aogon`, `aopf`, `ap`, `apE`, `apacir`, `ape`, `apid`, `apos`, 3509 | `approx`, `approxeq`, `arin`, `aring`, `ascr`, `ast`, `asymp`, `asympeq`, 3510 | `atild`, `atilde`, `aum`, `auml`, `awconint`, `awint`, `bNot`, `backcong`, 3511 | `backepsilon`, `backprime`, `backsim`, `backsimeq`, `barvee`, `barwed`, 3512 | `barwedge`, `bbrk`, `bbrktbrk`, `bcong`, `bcy`, `bdquo`, `becaus`, `because`, 3513 | `bemptyv`, `bepsi`, `bernou`, `beta`, `beth`, `between`, `bfr`, `bigcap`, 3514 | `bigcirc`, `bigcup`, `bigodot`, `bigoplus`, `bigotimes`, `bigsqcup`, `bigstar`, 3515 | `bigtriangledown`, `bigtriangleup`, `biguplus`, `bigvee`, `bigwedge`, `bkarow`, 3516 | `blacklozenge`, `blacksquare`, `blacktriangle`, `blacktriangledown`, 3517 | `blacktriangleleft`, `blacktriangleright`, `blank`, `blk12`, `blk14`, `blk34`, 3518 | `block`, `bne`, `bnequiv`, `bnot`, `bopf`, `bot`, `bottom`, `bowtie`, `boxDL`, 3519 | `boxDR`, `boxDl`, `boxDr`, `boxH`, `boxHD`, `boxHU`, `boxHd`, `boxHu`, `boxUL`, 3520 | `boxUR`, `boxUl`, `boxUr`, `boxV`, `boxVH`, `boxVL`, `boxVR`, `boxVh`, `boxVl`, 3521 | `boxVr`, `boxbox`, `boxdL`, `boxdR`, `boxdl`, `boxdr`, `boxh`, `boxhD`, `boxhU`, 3522 | `boxhd`, `boxhu`, `boxminus`, `boxplus`, `boxtimes`, `boxuL`, `boxuR`, `boxul`, 3523 | `boxur`, `boxv`, `boxvH`, `boxvL`, `boxvR`, `boxvh`, `boxvl`, `boxvr`, `bprime`, 3524 | `breve`, `brvba`, `brvbar`, `bscr`, `bsemi`, `bsim`, `bsime`, `bsol`, `bsolb`, 3525 | `bsolhsub`, `bull`, `bullet`, `bump`, `bumpE`, `bumpe`, `bumpeq`, `cacute`, 3526 | `cap`, `capand`, `capbrcup`, `capcap`, `capcup`, `capdot`, `caps`, `caret`, 3527 | `caron`, `ccaps`, `ccaron`, `ccedi`, `ccedil`, `ccirc`, `ccups`, `ccupssm`, 3528 | `cdot`, `cedi`, `cedil`, `cemptyv`, `cen`, `cent`, `centerdot`, `cfr`, `chcy`, 3529 | `check`, `checkmark`, `chi`, `cir`, `cirE`, `circ`, `circeq`, `circlearrowleft`, 3530 | `circlearrowright`, `circledR`, `circledS`, `circledast`, `circledcirc`, 3531 | `circleddash`, `cire`, `cirfnint`, `cirmid`, `cirscir`, `clubs`, `clubsuit`, 3532 | `colon`, `colone`, `coloneq`, `comma`, `commat`, `comp`, `compfn`, `complement`, 3533 | `complexes`, `cong`, `congdot`, `conint`, `cop`, `copf`, `coprod`, `copy`, 3534 | `copysr`, `crarr`, `cross`, `cscr`, `csub`, `csube`, `csup`, `csupe`, `ctdot`, 3535 | `cudarrl`, `cudarrr`, `cuepr`, `cuesc`, `cularr`, `cularrp`, `cup`, `cupbrcap`, 3536 | `cupcap`, `cupcup`, `cupdot`, `cupor`, `cups`, `curarr`, `curarrm`, 3537 | `curlyeqprec`, `curlyeqsucc`, `curlyvee`, `curlywedge`, `curre`, `curren`, 3538 | `curvearrowleft`, `curvearrowright`, `cuvee`, `cuwed`, `cwconint`, `cwint`, 3539 | `cylcty`, `dArr`, `dHar`, `dagger`, `daleth`, `darr`, `dash`, `dashv`, 3540 | `dbkarow`, `dblac`, `dcaron`, `dcy`, `dd`, `ddagger`, `ddarr`, `ddotseq`, `de`, 3541 | `deg`, `delta`, `demptyv`, `dfisht`, `dfr`, `dharl`, `dharr`, `diam`, `diamond`, 3542 | `diamondsuit`, `diams`, `die`, `digamma`, `disin`, `div`, `divid`, `divide`, 3543 | `divideontimes`, `divonx`, `djcy`, `dlcorn`, `dlcrop`, `dollar`, `dopf`, `dot`, 3544 | `doteq`, `doteqdot`, `dotminus`, `dotplus`, `dotsquare`, `doublebarwedge`, 3545 | `downarrow`, `downdownarrows`, `downharpoonleft`, `downharpoonright`, `drbkarow`, 3546 | `drcorn`, `drcrop`, `dscr`, `dscy`, `dsol`, `dstrok`, `dtdot`, `dtri`, `dtrif`, 3547 | `duarr`, `duhar`, `dwangle`, `dzcy`, `dzigrarr`, `eDDot`, `eDot`, `eacut`, 3548 | `eacute`, `easter`, `ecaron`, `ecir`, `ecir`, `ecirc`, `ecolon`, `ecy`, `edot`, 3549 | `ee`, `efDot`, `efr`, `eg`, `egrav`, `egrave`, `egs`, `egsdot`, `el`, 3550 | `elinters`, `ell`, `els`, `elsdot`, `emacr`, `empty`, `emptyset`, `emptyv`, 3551 | `emsp`, `emsp13`, `emsp14`, `eng`, `ensp`, `eogon`, `eopf`, `epar`, `eparsl`, 3552 | `eplus`, `epsi`, `epsilon`, `epsiv`, `eqcirc`, `eqcolon`, `eqsim`, `eqslantgtr`, 3553 | `eqslantless`, `equals`, `equest`, `equiv`, `equivDD`, `eqvparsl`, `erDot`, 3554 | `erarr`, `escr`, `esdot`, `esim`, `et`, `eta`, `eth`, `eum`, `euml`, `euro`, 3555 | `excl`, `exist`, `expectation`, `exponentiale`, `fallingdotseq`, `fcy`, 3556 | `female`, `ffilig`, `fflig`, `ffllig`, `ffr`, `filig`, `fjlig`, `flat`, `fllig`, 3557 | `fltns`, `fnof`, `fopf`, `forall`, `fork`, `forkv`, `fpartint`, `frac1`, 3558 | `frac1`, `frac12`, `frac13`, `frac14`, `frac15`, `frac16`, `frac18`, `frac23`, 3559 | `frac25`, `frac3`, `frac34`, `frac35`, `frac38`, `frac45`, `frac56`, `frac58`, 3560 | `frac78`, `frasl`, `frown`, `fscr`, `g`, `gE`, `gEl`, `gacute`, `gamma`, 3561 | `gammad`, `gap`, `gbreve`, `gcirc`, `gcy`, `gdot`, `ge`, `gel`, `geq`, `geqq`, 3562 | `geqslant`, `ges`, `gescc`, `gesdot`, `gesdoto`, `gesdotol`, `gesl`, `gesles`, 3563 | `gfr`, `gg`, `ggg`, `gimel`, `gjcy`, `gl`, `glE`, `gla`, `glj`, `gnE`, `gnap`, 3564 | `gnapprox`, `gne`, `gneq`, `gneqq`, `gnsim`, `gopf`, `grave`, `gscr`, `gsim`, 3565 | `gsime`, `gsiml`, `gt`, `gtcc`, `gtcir`, `gtdot`, `gtlPar`, `gtquest`, 3566 | `gtrapprox`, `gtrarr`, `gtrdot`, `gtreqless`, `gtreqqless`, `gtrless`, `gtrsim`, 3567 | `gvertneqq`, `gvnE`, `hArr`, `hairsp`, `half`, `hamilt`, `hardcy`, `harr`, 3568 | `harrcir`, `harrw`, `hbar`, `hcirc`, `hearts`, `heartsuit`, `hellip`, `hercon`, 3569 | `hfr`, `hksearow`, `hkswarow`, `hoarr`, `homtht`, `hookleftarrow`, 3570 | `hookrightarrow`, `hopf`, `horbar`, `hscr`, `hslash`, `hstrok`, `hybull`, 3571 | `hyphen`, `iacut`, `iacute`, `ic`, `icir`, `icirc`, `icy`, `iecy`, `iexc`, 3572 | `iexcl`, `iff`, `ifr`, `igrav`, `igrave`, `ii`, `iiiint`, `iiint`, `iinfin`, 3573 | `iiota`, `ijlig`, `imacr`, `image`, `imagline`, `imagpart`, `imath`, `imof`, 3574 | `imped`, `in`, `incare`, `infin`, `infintie`, `inodot`, `int`, `intcal`, 3575 | `integers`, `intercal`, `intlarhk`, `intprod`, `iocy`, `iogon`, `iopf`, `iota`, 3576 | `iprod`, `iques`, `iquest`, `iscr`, `isin`, `isinE`, `isindot`, `isins`, 3577 | `isinsv`, `isinv`, `it`, `itilde`, `iukcy`, `ium`, `iuml`, `jcirc`, `jcy`, 3578 | `jfr`, `jmath`, `jopf`, `jscr`, `jsercy`, `jukcy`, `kappa`, `kappav`, `kcedil`, 3579 | `kcy`, `kfr`, `kgreen`, `khcy`, `kjcy`, `kopf`, `kscr`, `l`, `lAarr`, `lArr`, 3580 | `lAtail`, `lBarr`, `lE`, `lEg`, `lHar`, `lacute`, `laemptyv`, `lagran`, 3581 | `lambda`, `lang`, `langd`, `langle`, `lap`, `laqu`, `laquo`, `larr`, `larrb`, 3582 | `larrbfs`, `larrfs`, `larrhk`, `larrlp`, `larrpl`, `larrsim`, `larrtl`, `lat`, 3583 | `latail`, `late`, `lates`, `lbarr`, `lbbrk`, `lbrace`, `lbrack`, `lbrke`, 3584 | `lbrksld`, `lbrkslu`, `lcaron`, `lcedil`, `lceil`, `lcub`, `lcy`, `ldca`, 3585 | `ldquo`, `ldquor`, `ldrdhar`, `ldrushar`, `ldsh`, `le`, `leftarrow`, 3586 | `leftarrowtail`, `leftharpoondown`, `leftharpoonup`, `leftleftarrows`, 3587 | `leftrightarrow`, `leftrightarrows`, `leftrightharpoons`, `leftrightsquigarrow`, 3588 | `leftthreetimes`, `leg`, `leq`, `leqq`, `leqslant`, `les`, `lescc`, `lesdot`, 3589 | `lesdoto`, `lesdotor`, `lesg`, `lesges`, `lessapprox`, `lessdot`, `lesseqgtr`, 3590 | `lesseqqgtr`, `lessgtr`, `lesssim`, `lfisht`, `lfloor`, `lfr`, `lg`, `lgE`, 3591 | `lhard`, `lharu`, `lharul`, `lhblk`, `ljcy`, `ll`, `llarr`, `llcorner`, 3592 | `llhard`, `lltri`, `lmidot`, `lmoust`, `lmoustache`, `lnE`, `lnap`, `lnapprox`, 3593 | `lne`, `lneq`, `lneqq`, `lnsim`, `loang`, `loarr`, `lobrk`, `longleftarrow`, 3594 | `longleftrightarrow`, `longmapsto`, `longrightarrow`, `looparrowleft`, 3595 | `looparrowright`, `lopar`, `lopf`, `loplus`, `lotimes`, `lowast`, `lowbar`, 3596 | `loz`, `lozenge`, `lozf`, `lpar`, `lparlt`, `lrarr`, `lrcorner`, `lrhar`, 3597 | `lrhard`, `lrm`, `lrtri`, `lsaquo`, `lscr`, `lsh`, `lsim`, `lsime`, `lsimg`, 3598 | `lsqb`, `lsquo`, `lsquor`, `lstrok`, `lt`, `ltcc`, `ltcir`, `ltdot`, `lthree`, 3599 | `ltimes`, `ltlarr`, `ltquest`, `ltrPar`, `ltri`, `ltrie`, `ltrif`, `lurdshar`, 3600 | `luruhar`, `lvertneqq`, `lvnE`, `mDDot`, `mac`, `macr`, `male`, `malt`, 3601 | `maltese`, `map`, `mapsto`, `mapstodown`, `mapstoleft`, `mapstoup`, `marker`, 3602 | `mcomma`, `mcy`, `mdash`, `measuredangle`, `mfr`, `mho`, `micr`, `micro`, 3603 | `mid`, `midast`, `midcir`, `middo`, `middot`, `minus`, `minusb`, `minusd`, 3604 | `minusdu`, `mlcp`, `mldr`, `mnplus`, `models`, `mopf`, `mp`, `mscr`, `mstpos`, 3605 | `mu`, `multimap`, `mumap`, `nGg`, `nGt`, `nGtv`, `nLeftarrow`, 3606 | `nLeftrightarrow`, `nLl`, `nLt`, `nLtv`, `nRightarrow`, `nVDash`, `nVdash`, 3607 | `nabla`, `nacute`, `nang`, `nap`, `napE`, `napid`, `napos`, `napprox`, `natur`, 3608 | `natural`, `naturals`, `nbs`, `nbsp`, `nbump`, `nbumpe`, `ncap`, `ncaron`, 3609 | `ncedil`, `ncong`, `ncongdot`, `ncup`, `ncy`, `ndash`, `ne`, `neArr`, `nearhk`, 3610 | `nearr`, `nearrow`, `nedot`, `nequiv`, `nesear`, `nesim`, `nexist`, `nexists`, 3611 | `nfr`, `ngE`, `nge`, `ngeq`, `ngeqq`, `ngeqslant`, `nges`, `ngsim`, `ngt`, 3612 | `ngtr`, `nhArr`, `nharr`, `nhpar`, `ni`, `nis`, `nisd`, `niv`, `njcy`, `nlArr`, 3613 | `nlE`, `nlarr`, `nldr`, `nle`, `nleftarrow`, `nleftrightarrow`, `nleq`, 3614 | `nleqq`, `nleqslant`, `nles`, `nless`, `nlsim`, `nlt`, `nltri`, `nltrie`, 3615 | `nmid`, `no`, `nopf`, `not`, `notin`, `notinE`, `notindot`, `notinva`, 3616 | `notinvb`, `notinvc`, `notni`, `notniva`, `notnivb`, `notnivc`, `npar`, 3617 | `nparallel`, `nparsl`, `npart`, `npolint`, `npr`, `nprcue`, `npre`, `nprec`, 3618 | `npreceq`, `nrArr`, `nrarr`, `nrarrc`, `nrarrw`, `nrightarrow`, `nrtri`, 3619 | `nrtrie`, `nsc`, `nsccue`, `nsce`, `nscr`, `nshortmid`, `nshortparallel`, 3620 | `nsim`, `nsime`, `nsimeq`, `nsmid`, `nspar`, `nsqsube`, `nsqsupe`, `nsub`, 3621 | `nsubE`, `nsube`, `nsubset`, `nsubseteq`, `nsubseteqq`, `nsucc`, `nsucceq`, 3622 | `nsup`, `nsupE`, `nsupe`, `nsupset`, `nsupseteq`, `nsupseteqq`, `ntgl`, `ntild`, 3623 | `ntilde`, `ntlg`, `ntriangleleft`, `ntrianglelefteq`, `ntriangleright`, 3624 | `ntrianglerighteq`, `nu`, `num`, `numero`, `numsp`, `nvDash`, `nvHarr`, `nvap`, 3625 | `nvdash`, `nvge`, `nvgt`, `nvinfin`, `nvlArr`, `nvle`, `nvlt`, `nvltrie`, 3626 | `nvrArr`, `nvrtrie`, `nvsim`, `nwArr`, `nwarhk`, `nwarr`, `nwarrow`, `nwnear`, 3627 | `oS`, `oacut`, `oacute`, `oast`, `ocir`, `ocir`, `ocirc`, `ocy`, `odash`, 3628 | `odblac`, `odiv`, `odot`, `odsold`, `oelig`, `ofcir`, `ofr`, `ogon`, `ograv`, 3629 | `ograve`, `ogt`, `ohbar`, `ohm`, `oint`, `olarr`, `olcir`, `olcross`, `oline`, 3630 | `olt`, `omacr`, `omega`, `omicron`, `omid`, `ominus`, `oopf`, `opar`, `operp`, 3631 | `oplus`, `or`, `orarr`, `ord`, `ord`, `ord`, `order`, `orderof`, `ordf`, `ordm`, 3632 | `origof`, `oror`, `orslope`, `orv`, `oscr`, `oslas`, `oslash`, `osol`, `otild`, 3633 | `otilde`, `otimes`, `otimesas`, `oum`, `ouml`, `ovbar`, `par`, `par`, `para`, 3634 | `parallel`, `parsim`, `parsl`, `part`, `pcy`, `percnt`, `period`, `permil`, 3635 | `perp`, `pertenk`, `pfr`, `phi`, `phiv`, `phmmat`, `phone`, `pi`, `pitchfork`, 3636 | `piv`, `planck`, `planckh`, `plankv`, `plus`, `plusacir`, `plusb`, `pluscir`, 3637 | `plusdo`, `plusdu`, `pluse`, `plusm`, `plusmn`, `plussim`, `plustwo`, `pm`, 3638 | `pointint`, `popf`, `poun`, `pound`, `pr`, `prE`, `prap`, `prcue`, `pre`, 3639 | `prec`, `precapprox`, `preccurlyeq`, `preceq`, `precnapprox`, `precneqq`, 3640 | `precnsim`, `precsim`, `prime`, `primes`, `prnE`, `prnap`, `prnsim`, `prod`, 3641 | `profalar`, `profline`, `profsurf`, `prop`, `propto`, `prsim`, `prurel`, `pscr`, 3642 | `psi`, `puncsp`, `qfr`, `qint`, `qopf`, `qprime`, `qscr`, `quaternions`, 3643 | `quatint`, `quest`, `questeq`, `quo`, `quot`, `rAarr`, `rArr`, `rAtail`, 3644 | `rBarr`, `rHar`, `race`, `racute`, `radic`, `raemptyv`, `rang`, `rangd`, 3645 | `range`, `rangle`, `raqu`, `raquo`, `rarr`, `rarrap`, `rarrb`, `rarrbfs`, 3646 | `rarrc`, `rarrfs`, `rarrhk`, `rarrlp`, `rarrpl`, `rarrsim`, `rarrtl`, `rarrw`, 3647 | `ratail`, `ratio`, `rationals`, `rbarr`, `rbbrk`, `rbrace`, `rbrack`, `rbrke`, 3648 | `rbrksld`, `rbrkslu`, `rcaron`, `rcedil`, `rceil`, `rcub`, `rcy`, `rdca`, 3649 | `rdldhar`, `rdquo`, `rdquor`, `rdsh`, `re`, `real`, `realine`, `realpart`, 3650 | `reals`, `rect`, `reg`, `rfisht`, `rfloor`, `rfr`, `rhard`, `rharu`, `rharul`, 3651 | `rho`, `rhov`, `rightarrow`, `rightarrowtail`, `rightharpoondown`, 3652 | `rightharpoonup`, `rightleftarrows`, `rightleftharpoons`, `rightrightarrows`, 3653 | `rightsquigarrow`, `rightthreetimes`, `ring`, `risingdotseq`, `rlarr`, `rlhar`, 3654 | `rlm`, `rmoust`, `rmoustache`, `rnmid`, `roang`, `roarr`, `robrk`, `ropar`, 3655 | `ropf`, `roplus`, `rotimes`, `rpar`, `rpargt`, `rppolint`, `rrarr`, `rsaquo`, 3656 | `rscr`, `rsh`, `rsqb`, `rsquo`, `rsquor`, `rthree`, `rtimes`, `rtri`, `rtrie`, 3657 | `rtrif`, `rtriltri`, `ruluhar`, `rx`, `sacute`, `sbquo`, `sc`, `scE`, `scap`, 3658 | `scaron`, `sccue`, `sce`, `scedil`, `scirc`, `scnE`, `scnap`, `scnsim`, 3659 | `scpolint`, `scsim`, `scy`, `sdot`, `sdotb`, `sdote`, `seArr`, `searhk`, 3660 | `searr`, `searrow`, `sec`, `sect`, `semi`, `seswar`, `setminus`, `setmn`, 3661 | `sext`, `sfr`, `sfrown`, `sh`, `sharp`, `shchcy`, `shcy`, `shortmid`, 3662 | `shortparallel`, `shy`, `sigma`, `sigmaf`, `sigmav`, `sim`, `simdot`, `sime`, 3663 | `simeq`, `simg`, `simgE`, `siml`, `simlE`, `simne`, `simplus`, `simrarr`, 3664 | `slarr`, `smallsetminus`, `smashp`, `smeparsl`, `smid`, `smile`, `smt`, `smte`, 3665 | `smtes`, `softcy`, `sol`, `solb`, `solbar`, `sopf`, `spades`, `spadesuit`, 3666 | `spar`, `sqcap`, `sqcaps`, `sqcup`, `sqcups`, `sqsub`, `sqsube`, `sqsubset`, 3667 | `sqsubseteq`, `sqsup`, `sqsupe`, `sqsupset`, `sqsupseteq`, `squ`, `square`, 3668 | `squarf`, `squf`, `srarr`, `sscr`, `ssetmn`, `ssmile`, `sstarf`, `star`, 3669 | `starf`, `straightepsilon`, `straightphi`, `strns`, `sub`, `subE`, `subdot`, 3670 | `sube`, `subedot`, `submult`, `subnE`, `subne`, `subplus`, `subrarr`, `subset`, 3671 | `subseteq`, `subseteqq`, `subsetneq`, `subsetneqq`, `subsim`, `subsub`, 3672 | `subsup`, `succ`, `succapprox`, `succcurlyeq`, `succeq`, `succnapprox`, 3673 | `succneqq`, `succnsim`, `succsim`, `sum`, `sung`, `sup`, `sup`, `sup`, `sup`, 3674 | `sup1`, `sup2`, `sup3`, `supE`, `supdot`, `supdsub`, `supe`, `supedot`, 3675 | `suphsol`, `suphsub`, `suplarr`, `supmult`, `supnE`, `supne`, `supplus`, 3676 | `supset`, `supseteq`, `supseteqq`, `supsetneq`, `supsetneqq`, `supsim`, 3677 | `supsub`, `supsup`, `swArr`, `swarhk`, `swarr`, `swarrow`, `swnwar`, `szli`, 3678 | `szlig`, `target`, `tau`, `tbrk`, `tcaron`, `tcedil`, `tcy`, `tdot`, `telrec`, 3679 | `tfr`, `there4`, `therefore`, `theta`, `thetasym`, `thetav`, `thickapprox`, 3680 | `thicksim`, `thinsp`, `thkap`, `thksim`, `thor`, `thorn`, `tilde`, `time`, 3681 | `times`, `timesb`, `timesbar`, `timesd`, `tint`, `toea`, `top`, `topbot`, 3682 | `topcir`, `topf`, `topfork`, `tosa`, `tprime`, `trade`, `triangle`, 3683 | `triangledown`, `triangleleft`, `trianglelefteq`, `triangleq`, `triangleright`, 3684 | `trianglerighteq`, `tridot`, `trie`, `triminus`, `triplus`, `trisb`, `tritime`, 3685 | `trpezium`, `tscr`, `tscy`, `tshcy`, `tstrok`, `twixt`, `twoheadleftarrow`, 3686 | `twoheadrightarrow`, `uArr`, `uHar`, `uacut`, `uacute`, `uarr`, `ubrcy`, 3687 | `ubreve`, `ucir`, `ucirc`, `ucy`, `udarr`, `udblac`, `udhar`, `ufisht`, `ufr`, 3688 | `ugrav`, `ugrave`, `uharl`, `uharr`, `uhblk`, `ulcorn`, `ulcorner`, `ulcrop`, 3689 | `ultri`, `um`, `umacr`, `uml`, `uogon`, `uopf`, `uparrow`, `updownarrow`, 3690 | `upharpoonleft`, `upharpoonright`, `uplus`, `upsi`, `upsih`, `upsilon`, 3691 | `upuparrows`, `urcorn`, `urcorner`, `urcrop`, `uring`, `urtri`, `uscr`, `utdot`, 3692 | `utilde`, `utri`, `utrif`, `uuarr`, `uum`, `uuml`, `uwangle`, `vArr`, `vBar`, 3693 | `vBarv`, `vDash`, `vangrt`, `varepsilon`, `varkappa`, `varnothing`, `varphi`, 3694 | `varpi`, `varpropto`, `varr`, `varrho`, `varsigma`, `varsubsetneq`, 3695 | `varsubsetneqq`, `varsupsetneq`, `varsupsetneqq`, `vartheta`, `vartriangleleft`, 3696 | `vartriangleright`, `vcy`, `vdash`, `vee`, `veebar`, `veeeq`, `vellip`, 3697 | `verbar`, `vert`, `vfr`, `vltri`, `vnsub`, `vnsup`, `vopf`, `vprop`, `vrtri`, 3698 | `vscr`, `vsubnE`, `vsubne`, `vsupnE`, `vsupne`, `vzigzag`, `wcirc`, `wedbar`, 3699 | `wedge`, `wedgeq`, `weierp`, `wfr`, `wopf`, `wp`, `wr`, `wreath`, `wscr`, 3700 | `xcap`, `xcirc`, `xcup`, `xdtri`, `xfr`, `xhArr`, `xharr`, `xi`, `xlArr`, 3701 | `xlarr`, `xmap`, `xnis`, `xodot`, `xopf`, `xoplus`, `xotime`, `xrArr`, `xrarr`, 3702 | `xscr`, `xsqcup`, `xuplus`, `xutri`, `xvee`, `xwedge`, `yacut`, `yacute`, 3703 | `yacy`, `ycirc`, `ycy`, `ye`, `yen`, `yfr`, `yicy`, `yopf`, `yscr`, `yucy`, 3704 | `yum`, `yuml`, `zacute`, `zcaron`, `zcy`, `zdot`, `zeetrf`, `zeta`, `zfr`, 3705 | `zhcy`, `zigrarr`, `zopf`, `zscr`, `zwj`, or `zwnj`. 3706 | 3707 | ## References 3708 | 3709 | * **\[HTML]**: 3710 | [HTML Standard](https://html.spec.whatwg.org/multipage/). 3711 | A. van Kesteren, et al. 3712 | WHATWG. 3713 | * **\[RFC20]**: 3714 | [ASCII format for network interchange](https://tools.ietf.org/html/rfc20). 3715 | V.G. Cerf. 3716 | October 1969. 3717 | IETF. 3718 | * **\[RFC5322]**: 3719 | [Internet Message Format](https://tools.ietf.org/html/rfc5322). 3720 | P. Resnick. 3721 | IETF. 3722 | * **\[UNICODE]**: 3723 | [The Unicode Standard](https://www.unicode.org/versions/). 3724 | Unicode Consortium. 3725 | 3726 | ## Acknowledgments 3727 | 3728 | Thanks to John Gruber for inventing Markdown. 3729 | 3730 | Thanks to John MacFarlane for defining CommonMark. 3731 | 3732 | Thanks to ZEIT, Inc., Gatsby, Inc., Netlify, Inc., Holloway, Inc., and the many 3733 | organizations and individuals for financial support through 3734 | [OpenCollective](https://opencollective.com/unified) 3735 | 3736 | ## License 3737 | 3738 | Copyright © 2019 Titus Wormer. 3739 | This work is licensed under a 3740 | [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). 3741 | --------------------------------------------------------------------------------