├── LICENSE ├── NOTES ├── README.md ├── ast.go ├── ast_test.go ├── doc.go ├── parser.go ├── parser_test.go ├── printer.go ├── printer_test.go ├── scanner.go ├── scanner_test.go ├── walk.go └── walk_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Ben Johnson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /NOTES: -------------------------------------------------------------------------------- 1 | Types 2 | ===== 3 | 4 | Stylesheet 5 | QualifiedRule 6 | AtRule 7 | 8 | 9 | 10 | § 2. Description of CSS's Syntax 11 | 12 | - CSS document is a series of qualified rules and at-rules. 13 | - Qualified rule: prelude followed by block. 14 | - For style rules, prelude is a series of selectors. 15 | - Declarations: a name followed by a colon followed by a value; semicolon separated. 16 | - At-rules have a basic structure: "@" + name. 17 | - Some end with a semicolon. 18 | - Some end with a block. 19 | - Names are always identifiers: start with [-a-z] followed by [-_a-z0-9] or escaped codepoints. 20 | 21 | 22 | § 2.1. Escaping 23 | 24 | - Starts with \ 25 | - Followed by code point that is not a hex digit or newline. 26 | - 1 - 6 hex digits followed by optional whitespace. 27 | 28 | 29 | § 2.2 Error Handling 30 | 31 | - Recover gracefully, only throw away a minimum amount of content. 32 | - At top-level, "@" starts an at-rule, anything else is a qualified rule. 33 | - Once an at-rule starts, nothing is invalid. 34 | - Everything before semicolon or block is prelude. 35 | - Block is parsed according to at-rule's own grammar. 36 | - Qualified rule is similar except semicolons don't end them. 37 | - First block is parsed as list of declarations. 38 | - When parsing declarations, unknown syntax causes parser to move to next semicolon. 39 | - Stylesheets ending with open rule, decl, function, string, etc simply closes everything. 40 | - Does not make them invalid. 41 | 42 | 43 | § 3. Tokenizing and Parsing CSS 44 | 45 | - Error handling for user agents is well defined. 46 | - Must abort at first error they do not wish to apply the rules below. 47 | - Output is a CSSStyleSheet object. 48 | 49 | § 3.2. The input byte stream 50 | 51 | - Stream of bytes. 52 | - Encoding based on: 53 | 1. HTTP protocol specifying it. 54 | 2. Read first 1024 bytes and check for: @chartset "..." 55 | • If 'utf-16be' or 'utf-16le' then use utf-8 56 | • Otherwise use value specified. 57 | 58 | § 3.3. Preprocessing the input stream 59 | 60 | - Must replace CR, FF, or CRLF to a single LF. 61 | - Replace NULL with U+FFFD 62 | 63 | 64 | § 4. Tokenization 65 | 66 | - Each scan returns a single token. 67 | - Types: 68 | * IDENT 69 | * FUNCTION 70 | * ATKEYWORD 71 | * HASH 72 | * STRING 73 | * BADSTRING 74 | * URL 75 | * BADURL 76 | * DELIM 77 | * NUMBER 78 | * PERCENTAGE 79 | * DIMENSION 80 | * UNICODERANGE 81 | * INCLUDEMATCH 82 | * DASHMATCH 83 | * PREFIXMATCH 84 | * SUFFIXMATCH 85 | * SUBSTRINGMATCH 86 | * COLUMN 87 | * WHITESPACE 88 | * CDO 89 | * CDC 90 | * COLON 91 | * SEMICOLON 92 | * COMMA 93 | * LBRACKET 94 | * RBRACKET 95 | * LPAREN 96 | * RPAREN 97 | * LBRACE 98 | * RBRACE 99 | - IDENT, FUNCTION, ATKEYWORD, HASH, STRING, URL have value with 0..* code points. 100 | - HASH has a type flag set to "id" or "restricted". Defaults to "restricted". 101 | - DELIM has a value with 1 code point. 102 | - NUMBER, PERCENTAGE, DIMENSION have 1..* code points and a numeric value. 103 | - NUMBER, DIMENSION have a flag set to "integer" (default) or "number". 104 | - DIMENSION has a unit with 1..* code points. 105 | - UNICODERANGE has a start and end pair of integers. 106 | 107 | - Tokenizer requires LL(3)!!! 108 | - Produces tokens designed to allow selectors to be parsed with LL(1). 109 | 110 | 111 | $ 4.1 Token Railroad diagrams 112 | 113 | * comment: "/*" + (anything but */) + "*/" 114 | * newline: \n | \r\n | \r | \f 115 | * whitespace: " " | \t | newline 116 | * hex-digit: [0-9a-fA-F] 117 | * escape: "\" + (^newline | ^hex-digit) 118 | "\" + hex-digit{1,6} + whitespace{0,1} 119 | * whitespace-token: whitespace+ 120 | * ws*: whitespace-token* 121 | * ident-token: -{0,1} + (a-zA-Z_|non-ASCII|escape) + (a-zA-Z_|0-9|non-ASCII|escape) 122 | * function-token: ident-token + "(" 123 | * at-keyword-token: "@" + ident-token 124 | * hash-token: "#" + (a-zA-Z_|0-9|non-ASCII|escape)* 125 | * string-token: "\"" + (^"|^\n|escape|\+newline) + "\"" (or single quotes) 126 | * url-token: ident-token=="url" + "(" + ws* + (string-token|url-unquoted) + ws* + ")" 127 | * url-unquoted: (^"'()\|^whitespace|^non-printable|escape) 128 | * number-token: ("+"|"-") + digit+ + "." + digit+ + (e|E)+(+|-) + digit+ 129 | * dimension-token: number-token ident-token 130 | * percentage-token: number-token + "%" 131 | * unicode-range-token: (u|U) + "+" + hex-digit{1,6} (or range) 132 | * include-match-token: "~=" 133 | * dash-match-token: "|=" 134 | * prefix-match-token: "^=" 135 | * suffix-match-token: "$=" 136 | * substring-match-token: "*=" 137 | * column-match-token: "||" 138 | * CDO-token: "" 140 | 141 | § 4.2. Definitions 142 | 143 | * code point: Unicode code point. 144 | * next input code point: the first unconsumed code point from the input stream. 145 | * current input code point: the last code point to be consumed. 146 | * reconsume the current input code point: push current to the front of the stream. 147 | * EOF code point: conceptual code point representing the end-of-stream. 148 | * digit: code point between U+0030-U+0039 149 | * hex digi: digit or code point in range of U+0041-U+0046 or U+0061-U+0066 150 | * uppercase letter: code point between "A" - "Z" 151 | * lowercase letter: code point between "a" - "z" 152 | * letter: uppercase or lowercase letter 153 | * non-ASCII code point: code point greater than U+0080 154 | * name-start code point: letter, non-ASCII code point or LOW LINE (_) 155 | * name code point: name code point, digit or HYPHEN-MINUS (-). 156 | * non-printable code point: U+0000-U+0008, U+000B, U+000E-U+001F, U+007F 157 | * newline: U+000A 158 | * whitespace: newline, U+0009 (tab), U+0020 (space) 159 | * surrogate code point: U+D800-U+DFFF inclusive 160 | * maximum allowed code point: U+10FFFF 161 | * identifier: portion of CSS with same syntax as ident-token. Has the "id" type. 162 | 163 | § 4.3. Tokenizer Algorithms 164 | 165 | Transforms a stream of code points into a stream of tokens. 166 | 167 | §4.3.1. Consume a token 168 | 169 | * whitespace: consume as much as possible and return a whitespace-token. 170 | * ": Consume a string-token. 171 | * #: If next code point is a name code point or next two are a valid escape and 172 | the 3 code points would start an identifier then return a hash token with 173 | type flag set to "id" and name set to the identifier. 174 | Otherwise return delim-token. 175 | * $: If next code point is "=" then return suffix-match-token. 176 | Otherwise return delim-token. 177 | * ': Consume a string-token. 178 | * (: Return a (-token 179 | * ): Return a )-token 180 | * *: If next code point is "=" then return substring-match-token. 181 | Otherwise return delim-token. 182 | * +: If next code point is a number, return a numeric-token. 183 | Otherwise return delim-token. 184 | * ,: Return comma-token. 185 | * -: If next code point is a number, return numeric-token. 186 | If next code point is an identifier, return ident-like token. 187 | If next 2 code points are "->", return a CDC-token. 188 | Otherwise return delim-token. 189 | * .: If next code point is a number, return numeric-token. 190 | Otherwise return delim-token. 191 | * /: If next code point is "*", consume it and all code points up to "*/" or EOF. 192 | Otherwise return delim-token. 193 | * :: Return colon-token. 194 | * ;: Return semicolon-token. 195 | * <: If the next 3 code points are "!--" return CDO. 196 | Otherwise return delim-token. 197 | * @: If next 3 code points make an identifier, return at-keyword-token. 198 | Otherwise return delim-token. 199 | * [: Return [-token. 200 | * \: If followed by valid escape, return ident-like token. 201 | Otherwise this is a PARSE ERROR. Return delim-token. 202 | * ]: Return ]-token. 203 | * ^: If next code point is "=", return prefix-match-token. 204 | Otherwise return delim-token. 205 | * {: Return {-token. 206 | * }: Return }-token. 207 | * u|U: If next 2 code points are "+" + hex-digit or "?", return unicode-range-token. 208 | Otherwise return ident-like token. 209 | * |: If next code point is "=", return dash-match-token. 210 | If next code point is "|", return column-token. 211 | Otherwise return delim-token. 212 | * ~: If next code point is "=", return include-match-token. 213 | Otherwise return delim-token. 214 | * EOF: Return eof-token. 215 | 216 | Return a delim-token for anything else. 217 | 218 | 219 | § 4.3.2. Consume a numeric token 220 | 221 | Includes number-token, percentage-token, or dimension-token. 222 | 223 | 1. Consume a number. 224 | 2. If next 3 code points would start an identifier: 225 | a. Create dimension-token 226 | b. Consume a name, set to units. 227 | c. Return dimension-token. 228 | 3. Otherwise if next code point is "%" then return percentage-token. 229 | 4. Otherwise return number-token. 230 | 231 | 232 | § 4.3.3. Consume an ident-like token 233 | 234 | Includes ident-token, function-token, url-token, bad-url-token. 235 | 236 | 1. Consume a name. 237 | 2. If value is case-insensitive "url" followed by a "(", consume a url-token 238 | and return it. 239 | 3. Otherwise if next code point is "(" create a function-token and return it. 240 | 4. Otherwise return an ident-token. 241 | 242 | 243 | § 4.3.4. Consume a string token 244 | 245 | Includes a string-token or bad-string-token. 246 | 247 | Must set the ending code point that ends the string. 248 | 249 | 1. Create a string-token. 250 | 2. Repeatedly consume: 251 | EOF: Return the string-token. 252 | newline: This is a PARSE ERROR. Return bad-string-token. 253 | \: If next code point is EOF, do nothing. 254 | If next code point is newline then consume it. 255 | If starts valid escape, append escaped code point. 256 | Anything else: Append code point. 257 | 258 | 259 | § 4.3.5. Consume a URL token 260 | 261 | Include url-token and bad-url-token. 262 | 263 | Assumes initial "url(" has been consumed. 264 | 265 | 1. Create url-token. 266 | 2. Consume whitespace. 267 | 3. If next code point is EOF, return url-token. 268 | 4. If next code point is "\"", consume a string token. 269 | If bad-string-token returned, consume remenants of bad-url, and return bad-url-token. 270 | Set url-token's value to string-token value. 271 | Consume whitespace. 272 | If next code point is EOF or ) then consume it and return url-token. Otherwise 273 | return remenants of bad url and return bad-url-token. 274 | 5. Repeatedly consume: 275 | ) or EOF: return url-token. 276 | whitespace: consume! if next code point is ) or EOF return url-token. otherwise 277 | consume remenants and return bad-url-token. 278 | " or ' or ( or non-printable: PARSE ERROR! Consume remenants, return bad-url. 279 | \: if valid escape, append escaped code point. Otherwise parse error. Consume remenants, return bad-url. 280 | anything else: append 281 | 282 | 283 | § 4.3.6. Consume unicode-range token 284 | 285 | Includes unicode-range-token. 286 | 287 | Assumes initial "u+" has been consumed and next digit is hex-digit or ? 288 | 289 | 1. Consume up to 6 hex digits. If less than 6, consume ? until chars totals 6. 290 | 291 | If any ? were consumed: 292 | 293 | a. Interpret value as hex number, replace ? with 0. This is the start of the range. 294 | 295 | b. Interpret value as hex number, replace ? with F. This is the end of the range. 296 | 297 | c. Return new unicode range. 298 | 299 | Otherwise interpret as start of the range. 300 | 301 | 2. If next 2 code points are - and hex digit, consume up to 6 hex digits. 302 | This is the end of the range. 303 | 304 | 3. Otherwise the end of the range is equal to the start. 305 | 306 | 4. Return unicode-range-token. 307 | 308 | 309 | § 4.3.7. Consume an escaped code point 310 | 311 | Assumes that \ is already consumed and next char is not a newline. 312 | 313 | Consume next code point. 314 | 315 | hex digit: Consume up to 6 hex digits. If next code point is whitespace then 316 | consume it too. Interpret as hex number. If value is 0, or a 317 | surrogate code point, or greater than max, return U+FFFD. 318 | 319 | EOF: Return U+FFFD 320 | 321 | Anything else: return code point 322 | 323 | 324 | § 4.3.8. Check if two code points are a valid escape. 325 | 326 | If first code point is not \, return false. 327 | 328 | If second code point is newline, return false. 329 | 330 | Otherwise return true. 331 | 332 | 333 | § 4.3.9. Check if three code points would start an identifier 334 | 335 | Look at first code point: 336 | 337 | -: If 2nd is a name-start code point or 2nd and 3rd are a valid escape, return true. 338 | Otherwise return false. 339 | 340 | name-start code point: return true 341 | 342 | \: If 1st and 2nd are valid escape, return true. Otherwise return false. 343 | 344 | 345 | § 4.3.10. Check if three code points would start a number 346 | 347 | Look at first code point: 348 | 349 | + or -: If 2nd is digit, return true. 350 | If 2nd is . and 3rd is digit, return true. 351 | Otherwise return false. 352 | 353 | .: If 2nd is digit return true. Otherwise return false. 354 | 355 | digit: return true 356 | 357 | anything else: return false 358 | 359 | 360 | § 4.3.11. Consume a name 361 | 362 | Does not verify that code points constitute an ident-token. 363 | 364 | Repeatedly consume: 365 | 366 | name code point: append to result 367 | 368 | stream start with valid escape: consume escaped code point and append. 369 | 370 | anything else: return result 371 | 372 | 373 | § 4.3.12. Consume a number 374 | 375 | Returns 3-tuple of string representation, numeric value, and type. 376 | 377 | This does not verify that the first few code points make a number. 378 | 379 | 1. Set *repr* to empty string and type to "integer". 380 | 381 | 2. If next code point is + or -, consume it and append to repr. 382 | 383 | 3. Consume digits and append to repr. 384 | 385 | 4. If next 2 code points are . and digit then: 386 | consume, append to repr, set type to "number", consume digits. 387 | 388 | 5. If next 2 or 3 code points are (e|E) optionally followed by +/-, then a digit: 389 | consume, append to repr, set type to "number", consume digits. 390 | 391 | 6. Convert repr to number and set value. 392 | 393 | 7. Return 3-tuple. 394 | 395 | 396 | § 4.3.13. Convert a string to a number 397 | 398 | ... 399 | 400 | 401 | § 4.3.14. Consume remnants of a bad url 402 | 403 | This returns nothing. Only consume enough input to recover tokenizer. 404 | 405 | Repeatedly consume: 406 | 407 | ) or EOF: Return 408 | 409 | valid escape: consume escaped code point 410 | 411 | anything else: do nothing. 412 | 413 | 414 | 415 | § 5. Parsing 416 | 417 | * at-rule: name, prelude, and optional block. 418 | 419 | * qualified rule: prelude and {} block. 420 | 421 | * declaration: name, value, and important flag. 422 | can be *properties* or *descriptors* 423 | 424 | * component value: preserved tokens, a function, or a simple block. 425 | 426 | * preserved token: any token except function-token, {-token, (-token, or [-token. 427 | }, ), ], bad-string, bad-url are always parse errors but are preserved for higher level error handling. 428 | 429 | * function: name and value (list of component values) 430 | 431 | * simple block: has token ([, (, or {) and a value (list of component values) 432 | 433 | 434 | § 5.1. Parser Railroad Diagrams 435 | 436 | Stylesheet: (at-rule | qualified rule | whitespace-token | CDO | CDC)* 437 | 438 | Rule list: (at-rule | qualified rule | whitespace-token)* 439 | 440 | at-rule: at-keyword-token component-value* ({}-block | ;) 441 | 442 | qualified rule: component-value* {}-block 443 | 444 | declaration list: ws* at-rule declaration-list 445 | declaration 446 | declaration; declaration-list 447 | 448 | declaration: ident-token ws* : component-value* !important 449 | 450 | !important: ! ws* ident-token("important") ws* 451 | 452 | component-value: preserved-token 453 | {}-block 454 | ()-block 455 | []-block 456 | Function block 457 | 458 | {}-block: { component-value* } 459 | 460 | ()-block: ( component-value* ) 461 | 462 | []-block: [ component-value* ] 463 | 464 | function-block: function-token component-value* ) 465 | 466 | 467 | § 5.3. Parser Entry Points 468 | 469 | * Parse stylesheet 470 | * Parse rule list 471 | * Parse rule 472 | * Parse declaration 473 | * Parse declaration list 474 | * Parse component value 475 | * Parse component value list 476 | 477 | 478 | § 5.3.1 Parse stylesheet 479 | 480 | 1. Create stylesheet 481 | 2. Consume list of rules, set "top level" flag. 482 | 3. Return stylesheet. 483 | 484 | § 5.3.2. Parse list of rules 485 | 486 | 1. Consume list of rules from stream of tokens with top-level flag unset. 487 | 2. Return list. 488 | 489 | § 5.3.3. Parse a rule 490 | 491 | 1. Consume next token 492 | 2. While token is whitespace-token, consume next token. 493 | 3. If input token is EOF, return syntax error. 494 | If token is at-keyword-token, consume an at-rule and let "rule" be the return value. 495 | Otherwise consume qualified rule and let rule be the return value. 496 | If nothing was returned the return a syntax error. 497 | 4. While input token is whitespace-token, consume next token. 498 | 5. If current token is EOF, return rule. Otherwise return syntax error. 499 | 500 | § 5.3.4. Parse a declaration 501 | 502 | 1. Consume next input token. 503 | 2. While whitespace, consume next token. 504 | 3. If current token is not ident-token, return syntax error. 505 | 4. Consume a declaration. If anything was returned, return it. 506 | Otherwise syntax error. 507 | 508 | § 5.3.5. Parse a list of declarations 509 | 510 | This mixes declarations and at-rules. 511 | 512 | 1. Consume a list of declarations. 513 | 2. Return the returned list. 514 | 515 | § 5.3.6. Parse a component value 516 | 517 | 1. Consume next input token. 518 | 2. While whitespace-token, consume next token. 519 | 3. If current token is EOF, return syntax error. 520 | 4. Unread token. Consume component value and return value. If nothing returned, syntax error! 521 | 5. While whitespace-token, consume next token. 522 | 6. If current token is EOF, return value. Otherwise syntax error. 523 | 524 | § 5.3.7. Parse list of component values. 525 | 526 | 1. Repeatedly consume component value until EOF. 527 | 528 | 529 | § 5.4. Parser Algorithms 530 | 531 | Algorithms return EOF at their end. 532 | 533 | § 5.4.1. Consume a list of rules 534 | 535 | Repeatedly consume next token: 536 | 537 | whitespace-token: do nothing 538 | EOF: return list of rules 539 | CDO, CDC: If top-level set, do nothing. 540 | Otherwise, unread. Consume a qualified rule. Append return value. 541 | at-keyword-token: Unread. Consume an at-rule. Append return value. 542 | anything else: Unread. Consume a qualified rule. Append return value. 543 | 544 | § 5.4.2. Consume an at-rule 545 | 546 | Create new at-rule with name set to current token. 547 | 548 | Repeatedly consume: 549 | 550 | semicolon-token, EOF: Return at-rule 551 | {: Consume simple block, assign to at-rule's block, return at-rule. 552 | simple-block with {-token: Assign block to at-rule, return at-rule. 553 | anything else: Unread. Consume component value. Append to prelude. 554 | 555 | § 5.4.3. Consume a qualified rule 556 | 557 | Create new qualified rule. 558 | 559 | Repeatedly consume: 560 | 561 | EOF: parse error, return nothing. 562 | {-token: Consume simple block and assign to rule. Return rule. 563 | simple block with {-token: Assign to rule. Return rule. 564 | Anything else: unread. Consume component value. Append to prelude. 565 | 566 | § 5.4.4. Consume list of declarations 567 | 568 | Create empty list. 569 | 570 | Repeatedly consume: 571 | 572 | whitespace-token, semicolon-token: do nothing. 573 | EOF: return list. 574 | at-keyword-token: Consume at-rule. Append to list. 575 | ident-token: Initialize temporary list with current. 576 | Repeatedly append while not semicolon-token or EOF. 577 | Consume declaration from temporary list. Append result to list. 578 | anything else: parse error. Repeatedly consume input until semicolon or EOF. 579 | 580 | 581 | § 5.4.5. Consume a declaration 582 | 583 | Create new declaration with name set to current token. 584 | 585 | 1. Consume next token. 586 | 2. Consume while current is whitespace. 587 | 3. If current is not colon-token, parse error. Return nothing. 588 | Otherwise consume next token. 589 | 4. Append tokens until EOF. 590 | 5. If last two non-whitespace tokens are are delim-token followed by 591 | delim-token, remove from value and set important flag. 592 | 6. Return declaration. 593 | 594 | 595 | § 5.4.6. Consume a component value 596 | 597 | Consume token. 598 | 599 | If current is {, [, or ( then consume a simple block and return it. 600 | 601 | Otherwise if function-token then consume function and return it. 602 | 603 | Otherwise return current. 604 | 605 | 606 | § 5.4.7. Consume simple block 607 | 608 | Ending token is mirror variant of current token. 609 | 610 | Repeatedly consume: 611 | 612 | EOF, ending token: Return block. 613 | anything else: Unread. Consume component value and append to block. 614 | 615 | 616 | § 5.4.8. Consume a function 617 | 618 | Create function with name equal to current. 619 | 620 | Repeatedly consume: 621 | 622 | EOF, )-token: Return function. 623 | anything else: unread. Consume component value and append to function. 624 | 625 | 626 | 627 | § 6. An+B Microsyntax 628 | 629 | * "even" and "odd" keywords allowed. 630 | * A and B can be negative but only positive results are used. 631 | * If A==0 && B==0 then no elements are matched. 632 | 633 | § 6.1. Informal Syntax Description 634 | 635 | * If A==0 then An can be omitted. 636 | * If An is omitted then + can be omitted. 637 | * 1n+0 == n+0 == n 638 | 639 | § 6.2. The type 640 | 641 | * Use regular CSS tokens. 642 | 643 | 644 | 645 | § 7. Defining Grammars for Rules and Other Values 646 | 647 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | css [![Build Status](https://drone.io/github.com/benbjohnson/css/status.png)](https://drone.io/github.com/benbjohnson/css/latest) [![Coverage Status](https://coveralls.io/repos/benbjohnson/css/badge.png?branch=master)](https://coveralls.io/r/benbjohnson/css?branch=master) [![GoDoc](https://godoc.org/github.com/benbjohnson/css?status.png)](https://godoc.org/github.com/benbjohnson/css) ![Project status](http://img.shields.io/status/alpha.png?color=red) 2 | === 3 | 4 | This package provides a CSS parser and scanner in pure Go. It is an 5 | implementation as specified in the W3C's [CSS Syntax Module Level 3][css3-syntax]. 6 | 7 | For documentation on how to use this package, please see the [godoc][godoc]. 8 | 9 | [css3-syntax]: http://www.w3.org/TR/css3-syntax/ 10 | [godoc]: https://godoc.org/github.com/benbjohnson/css 11 | 12 | 13 | ## Project Status 14 | 15 | The scanner and parser are fully compliant with the CSS3 specification. 16 | The printer will print nodes generated from the scanner and parser, however, 17 | it is not fully compliant with the [CSS3 serialization][serialization] spec. 18 | Additionally, the printer does not provide an option to collapse whitespace 19 | although that will be added in the future. 20 | 21 | This project has 100% test coverage, however, it is still a new project. 22 | Please report any bugs you experience or let me know where the documentation 23 | can be clearer. 24 | 25 | [serialization]: http://www.w3.org/TR/css3-syntax/#serialization 26 | 27 | 28 | ## Caveats 29 | 30 | The CSS scanner in this package only supports UTF-8 encoding. The @charset 31 | directive will be ignored. If you need to scan a different encoding then 32 | please convert it to UTF-8 first using a tool such as [iconv][iconv]. 33 | 34 | [iconv]: http://en.wikipedia.org/wiki/Iconv 35 | -------------------------------------------------------------------------------- /ast.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import "fmt" 4 | 5 | // Node represents a node in the CSS3 abstract syntax tree. 6 | type Node interface { 7 | node() 8 | } 9 | 10 | func (_ *StyleSheet) node() {} 11 | func (_ Rules) node() {} 12 | func (_ *AtRule) node() {} 13 | func (_ *QualifiedRule) node() {} 14 | func (_ Declarations) node() {} 15 | func (_ *Declaration) node() {} 16 | func (_ ComponentValues) node() {} 17 | func (_ *SimpleBlock) node() {} 18 | func (_ *Function) node() {} 19 | func (_ *Token) node() {} 20 | 21 | // StyleSheet represents a top-level CSS3 stylesheet. 22 | type StyleSheet struct { 23 | Rules Rules 24 | } 25 | 26 | // Rules represents a list of rules. 27 | type Rules []Rule 28 | 29 | // Rule represents a qualified rule or at-rule. 30 | type Rule interface { 31 | Node 32 | rule() 33 | } 34 | 35 | func (_ *AtRule) rule() {} 36 | func (_ *QualifiedRule) rule() {} 37 | 38 | // AtRule represents a rule starting with an "@" symbol. 39 | type AtRule struct { 40 | Name string 41 | Prelude ComponentValues 42 | Block *SimpleBlock 43 | Pos Pos 44 | } 45 | 46 | // QualifiedRule represents an unnamed rule that includes a prelude and block. 47 | type QualifiedRule struct { 48 | Prelude ComponentValues 49 | Block *SimpleBlock 50 | Pos Pos 51 | } 52 | 53 | // Declarations represents a list of declarations or at-rules. 54 | type Declarations []Node 55 | 56 | // Declaration represents a name/value pair. 57 | type Declaration struct { 58 | Name string 59 | Values ComponentValues 60 | Important bool 61 | Pos Pos 62 | } 63 | 64 | // ComponentValues represents a list of component values. 65 | type ComponentValues []ComponentValue 66 | 67 | // nonwhitespace returns the list of values without whitespace characters. 68 | func (a ComponentValues) nonwhitespace() ComponentValues { 69 | var tmp ComponentValues 70 | for _, v := range a { 71 | if v, ok := v.(*Token); ok && v.Tok == WhitespaceToken { 72 | continue 73 | } 74 | tmp = append(tmp, v) 75 | } 76 | return tmp 77 | } 78 | 79 | // ComponentValue represents a component value. 80 | type ComponentValue interface { 81 | Node 82 | componentValue() 83 | } 84 | 85 | func (_ *SimpleBlock) componentValue() {} 86 | func (_ *Function) componentValue() {} 87 | func (_ *Token) componentValue() {} 88 | 89 | // SimpleBlock represents a {-block, [-block, or (-block. 90 | type SimpleBlock struct { 91 | Token *Token 92 | Values ComponentValues 93 | Pos Pos 94 | } 95 | 96 | // Function represents a function call with a list of arguments. 97 | type Function struct { 98 | Name string 99 | Values ComponentValues 100 | Pos Pos 101 | } 102 | 103 | // Token represents a lexical token. 104 | type Token struct { 105 | // The type of token. 106 | Tok Tok 107 | 108 | // A flag set for ident-like tokens to either "id" or "unrestricted". 109 | // Also set for numeric tokens to either "integer" or "number" 110 | Type string 111 | 112 | // The literal value of the token as parsed. 113 | Value string 114 | 115 | // The rune used to close the token. Used for string tokens. 116 | Ending rune 117 | 118 | // The numeric value and unit used for numeric tokens. 119 | Number float64 120 | Unit string 121 | 122 | // Beginning and ending range for a unicode-range token. 123 | Start int 124 | End int 125 | 126 | // Position of the token in the source document. 127 | Pos Pos 128 | } 129 | 130 | // Tok represents a lexical token type. 131 | type Tok int 132 | 133 | const ( 134 | IdentToken Tok = iota + 1 135 | FunctionToken 136 | AtKeywordToken 137 | HashToken 138 | StringToken 139 | BadStringToken 140 | URLToken 141 | BadURLToken 142 | DelimToken 143 | NumberToken 144 | PercentageToken 145 | DimensionToken 146 | UnicodeRangeToken 147 | IncludeMatchToken 148 | DashMatchToken 149 | PrefixMatchToken 150 | SuffixMatchToken 151 | SubstringMatchToken 152 | ColumnToken 153 | WhitespaceToken 154 | CDOToken 155 | CDCToken 156 | ColonToken 157 | SemicolonToken 158 | CommaToken 159 | LBrackToken 160 | RBrackToken 161 | LParenToken 162 | RParenToken 163 | LBraceToken 164 | RBraceToken 165 | EOFToken 166 | ) 167 | 168 | // Pos specifies the line and character position of a token. 169 | // The Char and Line are both zero-based indexes. 170 | type Pos struct { 171 | Char int 172 | Line int 173 | } 174 | 175 | // Position returns the position for a given Node. 176 | func Position(n Node) Pos { 177 | switch n := n.(type) { 178 | case *StyleSheet: 179 | return Position(n.Rules) 180 | case Rules: 181 | if len(n) > 0 { 182 | return Position(n[0]) 183 | } 184 | case *AtRule: 185 | return n.Pos 186 | case *QualifiedRule: 187 | return n.Pos 188 | case Declarations: 189 | if len(n) > 0 { 190 | return Position(n[0]) 191 | } 192 | case *Declaration: 193 | return n.Pos 194 | case ComponentValues: 195 | if len(n) > 0 { 196 | return Position(n[0]) 197 | } 198 | case *SimpleBlock: 199 | return n.Pos 200 | case *Function: 201 | return n.Pos 202 | case *Token: 203 | return n.Pos 204 | } 205 | return Pos{} 206 | } 207 | 208 | // Error represents a syntax error. 209 | type Error struct { 210 | Message string 211 | Pos Pos 212 | } 213 | 214 | // Error returns the formatted string error message. 215 | func (e *Error) Error() string { 216 | return e.Message 217 | } 218 | 219 | // ErrorList represents a list of syntax errors. 220 | type ErrorList []error 221 | 222 | // Error returns the formatted string error message. 223 | func (a ErrorList) Error() string { 224 | switch len(a) { 225 | case 0: 226 | return "no errors" 227 | case 1: 228 | return a[0].Error() 229 | } 230 | return fmt.Sprintf("%s (and %d more errors)", a[0], len(a)-1) 231 | } 232 | -------------------------------------------------------------------------------- /ast_test.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | // Ensure that all nodes implement the Node interface. 9 | func TestNode(t *testing.T) { 10 | var a []Node 11 | a = append(a, &StyleSheet{}, &AtRule{}, &QualifiedRule{}, &Declaration{}) 12 | a = append(a, &SimpleBlock{}, &Function{}, &Token{}) 13 | a = append(a, Rules{}, Declarations{}, ComponentValues{}) 14 | for _, n := range a { 15 | n.node() 16 | } 17 | } 18 | 19 | // Ensure that all rules implement the Rule interface. 20 | func TestRule(t *testing.T) { 21 | a := []Rule{&AtRule{}, &QualifiedRule{}} 22 | for _, r := range a { 23 | r.rule() 24 | } 25 | } 26 | 27 | // Ensure that all component values implement the ComponentValue interface. 28 | func TestComponentValue(t *testing.T) { 29 | a := []ComponentValue{&SimpleBlock{}, &Function{}, &Token{}} 30 | for _, v := range a { 31 | v.componentValue() 32 | } 33 | } 34 | 35 | // Ensure that node positions can be retrieved. 36 | func TestPosition(t *testing.T) { 37 | var tests = []struct { 38 | in Node 39 | pos Pos 40 | }{ 41 | {in: &StyleSheet{Rules: Rules{&QualifiedRule{Pos: Pos{1, 2}}}}, pos: Pos{1, 2}}, 42 | {in: Rules{&AtRule{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 43 | {in: Rules{}, pos: Pos{}}, 44 | {in: &QualifiedRule{Pos: Pos{1, 2}}, pos: Pos{1, 2}}, 45 | {in: &AtRule{Pos: Pos{1, 2}}, pos: Pos{1, 2}}, 46 | {in: Declarations{&AtRule{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 47 | {in: Declarations{&Declaration{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 48 | {in: Declarations{}, pos: Pos{}}, 49 | {in: ComponentValues{&SimpleBlock{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 50 | {in: ComponentValues{&Function{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 51 | {in: ComponentValues{&Token{Pos: Pos{1, 2}}}, pos: Pos{1, 2}}, 52 | {in: ComponentValues{}, pos: Pos{}}, 53 | {in: &SimpleBlock{Pos: Pos{1, 2}}, pos: Pos{1, 2}}, 54 | {in: &Function{Pos: Pos{1, 2}}, pos: Pos{1, 2}}, 55 | {in: &Token{Pos: Pos{1, 2}}, pos: Pos{1, 2}}, 56 | } 57 | 58 | for _, tt := range tests { 59 | if pos := Position(tt.in); !reflect.DeepEqual(tt.pos, pos) { 60 | t.Errorf("expected: %#v, got: %#v", tt.pos, pos) 61 | } 62 | } 63 | } 64 | 65 | // Ensure that an error list can be properly formatted. 66 | func TestErrorList_Error(t *testing.T) { 67 | var tests = []struct { 68 | in ErrorList 69 | s string 70 | }{ 71 | {in: nil, s: "no errors"}, 72 | {in: ErrorList{}, s: "no errors"}, 73 | {in: ErrorList{&Error{Message: "foo"}}, s: "foo"}, 74 | {in: ErrorList{&Error{Message: "foo"}, &Error{Message: "bar"}}, s: "foo (and 1 more errors)"}, 75 | } 76 | 77 | for _, tt := range tests { 78 | if s := tt.in.Error(); tt.s != s { 79 | t.Errorf("expected: %s, got: %s", tt.s, s) 80 | } 81 | } 82 | 83 | } 84 | 85 | // TODO(benbjohnson): TestPosition_* 86 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package css implements a CSS3 compliant scanner and parser. This is meant to 3 | be a low-level library for extracting a CSS3 abstract syntax tree from raw 4 | CSS text. 5 | 6 | This package can be used for building tools to validate, optimize and format 7 | CSS text. 8 | 9 | 10 | Basics 11 | 12 | CSS parsing occurs in two steps. First the scanner breaks up a stream of code 13 | points (runes) into tokens. These tokens represent the most basic units of 14 | the CSS syntax tree such as identifiers, whitespace, and strings. The second 15 | step is to feed these tokens into the parser which creates the abstract syntax 16 | tree (AST) based on the context of the tokens. 17 | 18 | Unlike many language parsers, the abstract syntax tree for CSS saves many of the 19 | original tokens in the stream so they can be reparsed at different levels. For 20 | example, parsing a @media query will save off the raw tokens found in the 21 | {-block so they can be reparsed as a full style sheet. This package doesn't 22 | understand the specifics of how to parse different types of at-rules (such as 23 | @media queries) so it defers that to the user to handle parsing. 24 | 25 | 26 | Abstract Syntax Tree 27 | 28 | The CSS3 syntax defines a syntax tree of several types. At the top-level there 29 | is a StyleSheet. The style sheet is simply a collection of Rules. A Rule can be 30 | either an AtRule or a QualifiedRule. 31 | 32 | An AtRule is defined as a rule starting with an "@" symbol and an identifier, 33 | then it's followed by zero or more component values and finally ends with either 34 | a {-block or a semicolon. The block is parsed simply as a collection of tokens 35 | and it is up to the user to define the exact grammar. 36 | 37 | A QualifiedRule is defined as a rule starting with one or more component values 38 | and ending with a {-block. 39 | 40 | Inside the {-blocks are a list of declarations. Despite the name, a list of 41 | declarations can be either an AtRule or a Declaration. A Declaration is an 42 | identifier followed by a colon followed by one or more component values. The 43 | declaration can also have it's Important flag set if the last two non-whitespace 44 | tokens are a case-insensitive "!important". 45 | 46 | ComponentValues are the basic unit inside rules and declarations. A 47 | ComponentValue can be either a SimpleBlock, a Function, or a Token. A simple 48 | block starts with either a {, [, or (, has zero or more component values, and 49 | then ends with the mirror of the starting token (}, ], or )). A Function is 50 | an identifier immediately followed by a left parenthesis, then zero or more 51 | component values, and then ending with a right parenthesis. 52 | 53 | 54 | */ 55 | package css 56 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | // Parser represents a CSS3 parser. 9 | type Parser struct { 10 | Errors ErrorList 11 | } 12 | 13 | // ParseStyleSheet parses an input stream into a stylesheet. 14 | func (p *Parser) ParseStyleSheet(s *Scanner) *StyleSheet { 15 | ss := &StyleSheet{} 16 | ss.Rules = p.ConsumeRules(&scanner{s}, true) 17 | return ss 18 | } 19 | 20 | // ParseRule parses a list of rules. 21 | func (p *Parser) ParseRules(s *Scanner) Rules { 22 | return p.ConsumeRules(&scanner{s}, false) 23 | } 24 | 25 | // ParseRule parses a qualified rule or at-rule. 26 | func (p *Parser) ParseRule(s *Scanner) Rule { 27 | var r Rule 28 | 29 | // Skip over initial whitespace. 30 | p.skipWhitespace(&scanner{s}) 31 | 32 | // If the next token is EOF, return syntax error. 33 | // If the next token is at-keyword, consume an at-rule. 34 | // Otherwise consume a qualified rule. If nothing is returned, return error. 35 | tok := s.Scan() 36 | if tok.Tok == EOFToken { 37 | p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: Position(s.current())}) 38 | return nil 39 | } else if tok.Tok == AtKeywordToken { 40 | r = p.ConsumeAtRule(&scanner{s}) 41 | } else { 42 | s.unscan() 43 | r = p.ConsumeQualifiedRule(&scanner{s}) 44 | } 45 | 46 | // Skip over trailing whitespace. 47 | p.skipWhitespace(&scanner{s}) 48 | 49 | if tok := s.Scan(); tok.Tok != EOFToken { 50 | p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected EOF, got %s", print(s.current())), Pos: Position(s.current())}) 51 | return nil 52 | } 53 | 54 | return r 55 | } 56 | 57 | // ParseDeclaration parses a name/value declaration. 58 | func (p *Parser) ParseDeclaration(s *Scanner) *Declaration { 59 | // Skip over initial whitespace. 60 | p.skipWhitespace(&scanner{s}) 61 | 62 | // If the next token is not an ident then return an error. 63 | if tok := s.Scan(); tok.Tok != IdentToken { 64 | p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected ident, got %s", print(s.current())), Pos: Position(s.current())}) 65 | return nil 66 | } 67 | s.unscan() 68 | 69 | // Consume a declaration. 70 | return p.ConsumeDeclaration(&scanner{s}) 71 | } 72 | 73 | // ParseDeclarations parses a list of declarations and at-rules. 74 | func (p *Parser) ParseDeclarations(s *Scanner) Declarations { 75 | return p.ConsumeDeclarations(&scanner{s}) 76 | } 77 | 78 | // ParseComponentValue parses a component value. 79 | func (p *Parser) ParseComponentValue(s *Scanner) ComponentValue { 80 | // Skip over initial whitespace. 81 | p.skipWhitespace(&scanner{s}) 82 | 83 | // If the next token is EOF then return an error. 84 | if tok := s.Scan(); tok.Tok == EOFToken { 85 | p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: Position(s.current())}) 86 | return nil 87 | } 88 | s.unscan() 89 | 90 | // Consume component value. 91 | v := p.ConsumeComponentValue(&scanner{s}) 92 | 93 | // Skip over any trailing whitespace. 94 | p.skipWhitespace(&scanner{s}) 95 | 96 | // If we're not at EOF then return a syntax error. 97 | if tok := s.Scan(); tok.Tok != EOFToken { 98 | s.unscan() 99 | p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected EOF, got %s", print(s.current())), Pos: Position(s.current())}) 100 | return nil 101 | } 102 | 103 | return v 104 | } 105 | 106 | // ParseComponentValues parses a list of component values. 107 | func (p *Parser) ParseComponentValues(s *Scanner) ComponentValues { 108 | var a ComponentValues 109 | 110 | // Repeatedly consume a component value until EOF. 111 | for { 112 | v := p.ConsumeComponentValue(&scanner{s}) 113 | 114 | // If the value is an EOF, then exit. 115 | if tok, ok := v.(*Token); ok && tok.Tok == EOFToken { 116 | break 117 | } 118 | 119 | // Otherwise append to list of component values. 120 | a = append(a, v) 121 | } 122 | 123 | return a 124 | } 125 | 126 | // ConsumeRules consumes a list of rules from a token stream. 127 | func (p *Parser) ConsumeRules(s ComponentValueScanner, topLevel bool) Rules { 128 | var a Rules 129 | for { 130 | tok := s.Scan() 131 | switch tok := tok.(type) { 132 | case *Token: 133 | switch tok.Tok { 134 | case WhitespaceToken: 135 | continue // nop 136 | case EOFToken: 137 | return a 138 | case CDOToken, CDCToken: 139 | if !topLevel { 140 | s.Unscan() 141 | if r := p.ConsumeQualifiedRule(s); r != nil { 142 | a = append(a, r) 143 | } 144 | continue 145 | } 146 | case AtKeywordToken: 147 | if r := p.ConsumeAtRule(s); r != nil { 148 | a = append(a, r) 149 | } 150 | continue 151 | } 152 | } 153 | 154 | // Otherwise consume a qualified rule. 155 | s.Unscan() 156 | if r := p.ConsumeQualifiedRule(s); r != nil { 157 | a = append(a, r) 158 | } 159 | } 160 | } 161 | 162 | // ConsumeAtRule consumes a single at-rule. 163 | func (p *Parser) ConsumeAtRule(s ComponentValueScanner) *AtRule { 164 | var r AtRule 165 | 166 | // Set the name to the value of the current token. 167 | // TODO(benbjohnson): Validate first token. 168 | r.Name = s.Current().(*Token).Value 169 | 170 | // Repeatedly consume the next token. 171 | for { 172 | tok := s.Scan() 173 | switch tok := tok.(type) { 174 | case *Token: 175 | switch tok.Tok { 176 | case SemicolonToken, EOFToken: 177 | return &r 178 | case LBraceToken: 179 | r.Block = p.ConsumeSimpleBlock(s) 180 | return &r 181 | } 182 | case *SimpleBlock: 183 | if tok.Token.Tok == LBraceToken { 184 | r.Block = tok 185 | return &r 186 | } 187 | } 188 | 189 | // Otherwise consume a component value. 190 | s.Unscan() 191 | v := p.ConsumeComponentValue(s) 192 | r.Prelude = append(r.Prelude, v) 193 | } 194 | } 195 | 196 | // ConsumeQualifiedRule consumes a single qualified rule. 197 | func (p *Parser) ConsumeQualifiedRule(s ComponentValueScanner) *QualifiedRule { 198 | var r QualifiedRule 199 | 200 | // Repeatedly consume the next token. 201 | for { 202 | tok := s.Scan() 203 | switch tok := tok.(type) { 204 | case *Token: 205 | switch tok.Tok { 206 | case EOFToken: 207 | p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: tok.Pos}) 208 | return nil 209 | case LBraceToken: 210 | r.Block = p.ConsumeSimpleBlock(s) 211 | return &r 212 | } 213 | case *SimpleBlock: 214 | if tok.Token.Tok == LBraceToken { 215 | r.Block = tok 216 | return &r 217 | } 218 | } 219 | s.Unscan() 220 | r.Prelude = append(r.Prelude, p.ConsumeComponentValue(s)) 221 | } 222 | } 223 | 224 | // ConsumeDeclarations consumes a list of declarations. 225 | func (p *Parser) ConsumeDeclarations(s ComponentValueScanner) Declarations { 226 | var a Declarations 227 | 228 | // Repeatedly consume the next token. 229 | for { 230 | tok := s.Scan() 231 | 232 | if tok, ok := tok.(*Token); ok { 233 | switch tok.Tok { 234 | case WhitespaceToken, SemicolonToken: 235 | continue // nop 236 | case EOFToken: 237 | return a 238 | case AtKeywordToken: 239 | a = append(a, p.ConsumeAtRule(s)) 240 | continue 241 | case IdentToken: 242 | // Generate a list of tokens up to the next semicolon or EOF. 243 | s.Unscan() 244 | values := p.consumeDeclarationValues(s) 245 | 246 | // Consume declaration using temporary list of tokens. 247 | if d := p.ConsumeDeclaration(NewComponentValueScanner(values)); d != nil { 248 | a = append(a, d) 249 | } 250 | continue 251 | } 252 | } 253 | 254 | // Any other token is a syntax error. 255 | p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("unexpected: %s", print(tok)), Pos: Position(tok)}) 256 | 257 | // Repeatedly consume a component values until semicolon or EOF. 258 | p.skipComponentValues(s) 259 | } 260 | } 261 | 262 | // ConsumeDeclaration consumes a single declaration. 263 | func (p *Parser) ConsumeDeclaration(s ComponentValueScanner) *Declaration { 264 | var d Declaration 265 | 266 | // The first token must be an ident. 267 | // TODO(benbjohnson): Validate initial token. 268 | d.Name = s.Scan().(*Token).Value 269 | 270 | // Skip over whitespace. 271 | p.skipWhitespace(s) 272 | 273 | // The next token must be a colon. 274 | if tok := s.Scan().(*Token); tok.Tok != ColonToken { 275 | p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected colon, got %s", print(s.Current())), Pos: Position(s.Current())}) 276 | return nil 277 | } 278 | 279 | // Consume the declaration value until EOF. 280 | for { 281 | tok := s.Scan() 282 | if tok, ok := tok.(*Token); ok && tok.Tok == EOFToken { 283 | break 284 | } 285 | d.Values = append(d.Values, tok) 286 | } 287 | 288 | // Check last two non-whitespace tokens for "!important". 289 | d.Values, d.Important = cleanImportantFlag(d.Values) 290 | 291 | return &d 292 | } 293 | 294 | // Checks if the last two non-whitespace tokens are a case-insensitive "!important". 295 | // If so, it removes them and returns the "important" flag set to true. 296 | func cleanImportantFlag(values ComponentValues) (ComponentValues, bool) { 297 | a := values.nonwhitespace() 298 | if len(a) < 2 { 299 | return values, false 300 | } 301 | 302 | // Check last two tokens for "!important". 303 | if tok, ok := a[len(a)-2].(*Token); !ok || tok.Tok != DelimToken || tok.Value != "!" { 304 | return values, false 305 | } 306 | if tok, ok := a[len(a)-1].(*Token); !ok || tok.Tok != IdentToken || strings.ToLower(tok.Value) != "important" { 307 | return values, false 308 | } 309 | 310 | // Trim "!important" tokens off values. 311 | for i, v := range values { 312 | if v == a[len(a)-2] { 313 | values = values[:i] 314 | break 315 | } 316 | } 317 | 318 | return values, true 319 | } 320 | 321 | // ConsumeComponentValue consumes a single component value. (§5.4.6) 322 | func (p *Parser) ConsumeComponentValue(s ComponentValueScanner) ComponentValue { 323 | tok := s.Scan() 324 | if tok, ok := tok.(*Token); ok { 325 | switch tok.Tok { 326 | case LBraceToken, LBrackToken, LParenToken: 327 | return p.ConsumeSimpleBlock(s) 328 | case FunctionToken: 329 | return p.ConsumeFunction(s) 330 | } 331 | } 332 | return tok 333 | } 334 | 335 | // ConsumeSimpleBlock consumes a simple block. (§5.4.7) 336 | func (p *Parser) ConsumeSimpleBlock(s ComponentValueScanner) *SimpleBlock { 337 | b := &SimpleBlock{} 338 | 339 | // Set the block's associated token to the current token. 340 | // TODO(benbjohnson): Validate first token. 341 | b.Token = s.Current().(*Token) 342 | 343 | for { 344 | tok := s.Scan() 345 | 346 | // If this token is EOF or the mirror of the starting token then return. 347 | if tok, ok := tok.(*Token); ok { 348 | switch tok.Tok { 349 | case EOFToken: 350 | return b 351 | case RBrackToken: 352 | if b.Token.Tok == LBrackToken { 353 | return b 354 | } 355 | case RBraceToken: 356 | if b.Token.Tok == LBraceToken { 357 | return b 358 | } 359 | case RParenToken: 360 | if b.Token.Tok == LParenToken { 361 | return b 362 | } 363 | } 364 | } 365 | 366 | // Otherwise consume a component value. 367 | s.Unscan() 368 | b.Values = append(b.Values, p.ConsumeComponentValue(s)) 369 | } 370 | } 371 | 372 | // ConsumeFunction consumes a function. 373 | func (p *Parser) ConsumeFunction(s ComponentValueScanner) *Function { 374 | f := &Function{} 375 | 376 | // Set the name to the first token. 377 | // TODO(benbjohnson): Validate first token. 378 | f.Name = s.Current().(*Token).Value 379 | 380 | for { 381 | tok := s.Scan() 382 | 383 | // If this token is EOF or the mirror of the starting token then return. 384 | if tok, ok := tok.(*Token); ok && (tok.Tok == EOFToken || tok.Tok == RParenToken) { 385 | return f 386 | } 387 | 388 | // Otherwise consume a component value. 389 | s.Unscan() 390 | f.Values = append(f.Values, p.ConsumeComponentValue(s)) 391 | } 392 | } 393 | 394 | // consumeDeclarationTokens collects contiguous non-semicolon and non-EOF tokens. 395 | func (p *Parser) consumeDeclarationValues(s ComponentValueScanner) ComponentValues { 396 | var a ComponentValues 397 | for { 398 | tok := s.Scan() 399 | if tok, ok := tok.(*Token); ok && (tok.Tok == SemicolonToken || tok.Tok == EOFToken) { 400 | s.Unscan() 401 | return a 402 | } 403 | a = append(a, tok) 404 | } 405 | } 406 | 407 | // skipComponentValues consumes all component values until a semicolon or EOF. 408 | func (p *Parser) skipComponentValues(s ComponentValueScanner) { 409 | for { 410 | v := p.ConsumeComponentValue(s) 411 | if tok, ok := v.(*Token); ok { 412 | switch tok.Tok { 413 | case SemicolonToken, EOFToken: 414 | return 415 | } 416 | } 417 | } 418 | } 419 | 420 | // skipWhitespace skips over all contiguous whitespace tokes. 421 | func (p *Parser) skipWhitespace(s ComponentValueScanner) { 422 | for { 423 | if tok, ok := s.Scan().(*Token); ok && tok.Tok != WhitespaceToken { 424 | s.Unscan() 425 | return 426 | } 427 | } 428 | } 429 | 430 | // ComponentValueScanner represents a type that can retrieve the next component value. 431 | type ComponentValueScanner interface { 432 | Current() ComponentValue 433 | Scan() ComponentValue 434 | Unscan() 435 | } 436 | 437 | // NewComponentValueScanner returns a scanner for a fixed list of component values. 438 | // This can be used with nodes which have blocks such as at-rules. For example, 439 | // a @media query can have a full ruleset inside its block. This block can be 440 | // further parsed using the consume functions on the Parser. 441 | func NewComponentValueScanner(values ComponentValues) ComponentValueScanner { 442 | return &componentValueScanner{i: -1, values: values} 443 | } 444 | 445 | // componentValueScanner represents a scanner for a fixed list of component values. 446 | type componentValueScanner struct { 447 | i int 448 | values ComponentValues 449 | } 450 | 451 | // Current returns the current component value. 452 | func (s *componentValueScanner) Current() ComponentValue { 453 | if s.i >= len(s.values) { 454 | return &Token{Tok: EOFToken} 455 | } 456 | return s.values[s.i] 457 | } 458 | 459 | // Scan returns the next component value. 460 | func (s *componentValueScanner) Scan() ComponentValue { 461 | if s.i < len(s.values) { 462 | s.i++ 463 | } 464 | return s.Current() 465 | } 466 | 467 | // Unscan moves back one component value. 468 | func (s *componentValueScanner) Unscan() { 469 | if s.i > -1 { 470 | s.i-- 471 | } 472 | } 473 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package css_test 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/benbjohnson/css" 9 | ) 10 | 11 | // Ensure that a stylesheet can be parsed into an AST. 12 | func TestParser_ParseStyleSheet(t *testing.T) { 13 | var tests = []ParserTest{ 14 | {in: `foo { padding: 10px; } @bar;`, out: `foo { padding: 10px; } @bar;`}, 15 | } 16 | 17 | for _, tt := range tests { 18 | var p css.Parser 19 | v := p.ParseStyleSheet(css.NewScanner(strings.NewReader(tt.in))) 20 | tt.Assert(t, v, p.Errors) 21 | } 22 | } 23 | 24 | // Ensure that a list of rules can be parsed into an AST. 25 | func TestParser_ParseRules(t *testing.T) { 26 | var tests = []ParserTest{ 27 | {in: `foo { padding: 10px; }`, out: `foo { padding: 10px; }`}, 28 | {in: `@import url(/css/screen.css) screen, projection;`, out: `@import url(/css/screen.css) screen, projection;`}, 29 | {in: `@xxx; foo { padding: 10 0; }`, out: `@xxx; foo { padding: 10 0; }`}, 30 | {in: ` foo { }`, out: ` foo { }`}, 31 | } 32 | 33 | for _, tt := range tests { 34 | var p css.Parser 35 | v := p.ParseRules(css.NewScanner(strings.NewReader(tt.in))) 36 | tt.Assert(t, v, p.Errors) 37 | } 38 | } 39 | 40 | // Ensure that a rule can be parsed into an AST. 41 | func TestParser_ParseRule(t *testing.T) { 42 | var tests = []ParserTest{ 43 | {in: `foo { padding: 10px; }`, out: `foo { padding: 10px; }`}, 44 | {in: `foo { padding: 10px; `, out: `foo { padding: 10px; }`}, 45 | {in: ` #foo bar, .baz bat {} `, out: `#foo bar, .baz bat {}`}, 46 | {in: `@media (max-width: 600px) { .nav { display: none; }}`, out: `@media (max-width: 600px) { .nav { display: none; }}`}, 47 | 48 | {in: ``, err: `unexpected EOF`}, 49 | {in: ` `, err: `unexpected EOF`}, 50 | {in: `foo {} bar`, err: `expected EOF, got bar`}, 51 | } 52 | 53 | for _, tt := range tests { 54 | var p css.Parser 55 | v := p.ParseRule(css.NewScanner(strings.NewReader(tt.in))) 56 | tt.Assert(t, v, p.Errors) 57 | } 58 | } 59 | 60 | // Ensure that a declaration can be parsed into an AST. 61 | func TestParser_ParseDeclaration(t *testing.T) { 62 | var tests = []ParserTest{ 63 | {in: `foo: bar`, out: `foo: bar`}, 64 | {in: `color: #FFFFFF !important`, out: `color: #FFFFFF !important`}, 65 | {in: `color: #FFFFFF ! important `, out: `color: #FFFFFF !important`}, 66 | {in: `color: !important `, out: `color: !important`}, 67 | {in: `color: $ important`, out: `color: $ important`}, 68 | {in: `color: ! importante`, out: `color: ! importante`}, 69 | 70 | {in: ``, err: `expected ident, got EOF`}, 71 | {in: ` foo bar`, err: `expected colon, got bar`}, 72 | } 73 | 74 | for _, tt := range tests { 75 | var p css.Parser 76 | v := p.ParseDeclaration(css.NewScanner(strings.NewReader(tt.in))) 77 | tt.Assert(t, v, p.Errors) 78 | } 79 | } 80 | 81 | // Ensure that a list of declarations can be parsed into an AST. 82 | func TestParser_ParseDeclarations(t *testing.T) { 83 | var tests = []ParserTest{ 84 | {in: `foo: bar`, out: `foo: bar;`}, 85 | {in: `font-size: 20px; font-weight:bold`, out: `font-size: 20px; font-weight:bold;`}, 86 | {in: `font-weight: bold; @page { margin: 1in; };`, out: `font-weight: bold; @page { margin: 1in; };`}, 87 | {in: `@page { margin: 1in; }; font-weight: bold;`, out: `@page { margin: 1in; }; font-weight: bold;`}, 88 | {in: `100; foo: bar`, out: `foo: bar;`, err: `unexpected: 100`}, 89 | } 90 | 91 | for _, tt := range tests { 92 | var p css.Parser 93 | v := p.ParseDeclarations(css.NewScanner(strings.NewReader(tt.in))) 94 | tt.Assert(t, v, p.Errors) 95 | } 96 | } 97 | 98 | // Ensure that component values can be parsed into the correct AST. 99 | func TestParser_ParseComponentValue(t *testing.T) { 100 | var tests = []ParserTest{ 101 | {in: `foo`, out: `foo`}, 102 | {in: ` :`, out: `:`}, 103 | {in: ` : `, out: `:`}, 104 | {in: `{}`, out: `{}`}, 105 | {in: `{foo: bar}`, out: `{foo: bar}`}, 106 | {in: `{foo: {bar}}`, out: `{foo: {bar}}`}, 107 | {in: ` [12.34]`, out: `[12.34]`}, 108 | {in: ` [12.34]`, out: `[12.34]`}, 109 | {in: ` fun(12, 34, "foo")`, out: `fun(12, 34, "foo")`}, 110 | {in: ` fun("hello"`, out: `fun("hello")`}, 111 | 112 | {in: ``, err: `unexpected EOF`}, 113 | {in: ` foo bar`, err: `expected EOF, got bar`}, 114 | } 115 | 116 | for _, tt := range tests { 117 | var p css.Parser 118 | v := p.ParseComponentValue(css.NewScanner(strings.NewReader(tt.in))) 119 | tt.Assert(t, v, p.Errors) 120 | } 121 | } 122 | 123 | // Ensure that a list of component values can be parsed into the correct AST. 124 | func TestParser_ParseComponentValues(t *testing.T) { 125 | var tests = []ParserTest{ 126 | {in: `foo bar`, out: `foo bar`}, 127 | {in: `foo func(bar) { baz }`, out: `foo func(bar) { baz }`}, 128 | } 129 | 130 | for _, tt := range tests { 131 | var p css.Parser 132 | v := p.ParseComponentValues(css.NewScanner(strings.NewReader(tt.in))) 133 | tt.Assert(t, v, p.Errors) 134 | } 135 | } 136 | 137 | // Ensure that a ruleset can be parsed from a list of component values. 138 | func TestParser_ConsumeRules(t *testing.T) { 139 | var tests = []ParserTest{ 140 | {in: `@media (max-width: 600px) { @test xxx { width: 100 } .nav { display: none; } }`, out: `@test xxx { width: 100 } .nav { display: none; }`}, 141 | } 142 | 143 | for _, tt := range tests { 144 | var p css.Parser 145 | r := p.ParseRule(css.NewScanner(strings.NewReader(tt.in))) 146 | s := css.NewComponentValueScanner(r.(*css.AtRule).Block.Values) 147 | v := p.ConsumeRules(s, false) 148 | tt.Assert(t, v, p.Errors) 149 | } 150 | } 151 | 152 | // Ensure that consuming an empty string as a qualified rule returns an error. 153 | func TestParser_ConsumeQualifiedRule_ErrUnexpectedEOF(t *testing.T) { 154 | var p css.Parser 155 | if v := p.ConsumeQualifiedRule(css.NewComponentValueScanner(nil)); v != nil { 156 | t.Errorf("unexpected value: %s", print(v)) 157 | } else if p.Errors.Error() != "unexpected EOF" { 158 | t.Errorf("expected error msg: %s", p.Errors.Error()) 159 | } 160 | } 161 | 162 | // ParserTest represents a generic framework for table tests against the parser. 163 | type ParserTest struct { 164 | in string // input CSS 165 | out string // matches against generated CSS 166 | err string // stringified error, empty string if no error. 167 | } 168 | 169 | // Assert validates the node against the output CSS and checks for errors. 170 | func (tt *ParserTest) Assert(t *testing.T, n css.Node, errors css.ErrorList) { 171 | var errstring string 172 | if len(errors) > 0 { 173 | errstring = errors.Error() 174 | } 175 | 176 | if (tt.err != "" || errstring != "") && tt.err != errstring { 177 | t.Errorf("<%q> error: exp=%q, got=%q", tt.in, tt.err, errstring) 178 | } else if n == nil && tt.out != "" { 179 | t.Errorf("<%q> expected value", tt.in) 180 | } else if print(n) != tt.out { 181 | t.Errorf("<%q>\n\nexp: %s\n\ngot: %s", tt.in, tt.out, print(n)) 182 | } 183 | } 184 | 185 | // print pretty prints an AST node to a string using the default configuration. 186 | func print(n css.Node) string { 187 | var buf bytes.Buffer 188 | var p css.Printer 189 | _ = p.Print(&buf, n) 190 | return buf.String() 191 | } 192 | -------------------------------------------------------------------------------- /printer.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | ) 8 | 9 | // TODO(benbjohnson): Allow collapsing whitespace. 10 | 11 | // Printer represents a configurable CSS printer. 12 | type Printer struct{} 13 | 14 | func (p *Printer) Print(w io.Writer, n Node) (err error) { 15 | switch n := n.(type) { 16 | case *StyleSheet: 17 | if n == nil { 18 | return nil 19 | } 20 | for i, r := range n.Rules { 21 | if i > 0 { 22 | _, err = w.Write([]byte{' '}) 23 | } 24 | _ = p.Print(w, r) 25 | } 26 | 27 | case Rules: 28 | if n == nil { 29 | return nil 30 | } 31 | for i, r := range n { 32 | if i > 0 { 33 | _, _ = w.Write([]byte{' '}) 34 | } 35 | err = p.Print(w, r) 36 | } 37 | 38 | case *AtRule: 39 | if n == nil { 40 | return nil 41 | } 42 | _, _ = w.Write([]byte{'@'}) 43 | _, _ = w.Write([]byte(n.Name)) 44 | if len(n.Prelude) > 0 { 45 | _ = p.Print(w, n.Prelude) 46 | } 47 | if n.Block != nil { 48 | err = p.Print(w, n.Block) 49 | } else { 50 | _, err = w.Write([]byte{';'}) 51 | } 52 | 53 | case *QualifiedRule: 54 | if n == nil { 55 | return nil 56 | } 57 | _ = p.Print(w, n.Prelude) 58 | err = p.Print(w, n.Block) 59 | 60 | case *Declaration: 61 | if n == nil { 62 | return nil 63 | } 64 | _, _ = w.Write([]byte(n.Name)) 65 | _, _ = w.Write([]byte{':'}) 66 | err = p.Print(w, n.Values) 67 | if n.Important { 68 | _, err = w.Write([]byte("!important")) 69 | } 70 | 71 | case Declarations: 72 | if n == nil { 73 | return nil 74 | } 75 | for i, v := range n { 76 | if i > 0 { 77 | _, _ = w.Write([]byte{' '}) 78 | } 79 | _ = p.Print(w, v) 80 | _, err = w.Write([]byte{';'}) 81 | } 82 | 83 | case ComponentValues: 84 | if n == nil { 85 | return nil 86 | } 87 | for _, v := range n { 88 | err = p.Print(w, v) 89 | } 90 | 91 | case *SimpleBlock: 92 | if n == nil { 93 | return nil 94 | } 95 | switch n.Token.Tok { 96 | case LBraceToken: 97 | _, _ = w.Write([]byte{'{'}) 98 | case LBrackToken: 99 | _, _ = w.Write([]byte{'['}) 100 | case LParenToken: 101 | _, _ = w.Write([]byte{'('}) 102 | } 103 | 104 | _ = p.Print(w, n.Values) 105 | 106 | switch n.Token.Tok { 107 | case LBraceToken: 108 | _, _ = w.Write([]byte{'}'}) 109 | case LBrackToken: 110 | _, _ = w.Write([]byte{']'}) 111 | case LParenToken: 112 | _, _ = w.Write([]byte{')'}) 113 | } 114 | 115 | case *Function: 116 | if n == nil { 117 | return nil 118 | } 119 | _, _ = w.Write([]byte(n.Name)) 120 | _, _ = w.Write([]byte{'('}) 121 | _ = p.Print(w, n.Values) 122 | _, err = w.Write([]byte{')'}) 123 | 124 | case *Token: 125 | if n == nil { 126 | return nil 127 | } 128 | switch n.Tok { 129 | case IdentToken: 130 | _, err = w.Write([]byte(n.Value)) 131 | case FunctionToken: 132 | _, err = w.Write([]byte(n.Value + "(")) 133 | case AtKeywordToken: 134 | _, err = w.Write([]byte("@" + n.Value)) 135 | case HashToken: 136 | _, err = w.Write([]byte("#" + n.Value)) 137 | case StringToken: 138 | _, err = w.Write([]byte(string(n.Ending) + n.Value + string(n.Ending))) 139 | case BadStringToken: 140 | _, err = w.Write([]byte("''")) 141 | case URLToken: 142 | _, err = w.Write([]byte("url(" + n.Value + ")")) 143 | case BadURLToken: 144 | _, err = w.Write([]byte("url()")) 145 | case DelimToken, NumberToken, PercentageToken, DimensionToken, WhitespaceToken: 146 | _, err = w.Write([]byte(n.Value)) 147 | case UnicodeRangeToken: 148 | if n.Start == n.End { 149 | _, err = fmt.Fprintf(w, "U+%06x", n.Start) 150 | } else { 151 | _, err = fmt.Fprintf(w, "U+%06x-U+%06x", n.Start, n.End) 152 | } 153 | case IncludeMatchToken: 154 | _, err = w.Write([]byte("~=")) 155 | case DashMatchToken: 156 | _, err = w.Write([]byte("|=")) 157 | case PrefixMatchToken: 158 | _, err = w.Write([]byte("^=")) 159 | case SuffixMatchToken: 160 | _, err = w.Write([]byte("$=")) 161 | case SubstringMatchToken: 162 | _, err = w.Write([]byte("*=")) 163 | case ColumnToken: 164 | _, err = w.Write([]byte("||")) 165 | case CDOToken: 166 | _, err = w.Write([]byte("")) 169 | case ColonToken: 170 | _, err = w.Write([]byte{':'}) 171 | case SemicolonToken: 172 | _, err = w.Write([]byte{';'}) 173 | case CommaToken: 174 | _, err = w.Write([]byte{','}) 175 | case LBrackToken: 176 | _, err = w.Write([]byte{'['}) 177 | case RBrackToken: 178 | _, err = w.Write([]byte{']'}) 179 | case LParenToken: 180 | _, err = w.Write([]byte{'('}) 181 | case RParenToken: 182 | _, err = w.Write([]byte{')'}) 183 | case LBraceToken: 184 | _, err = w.Write([]byte{'{'}) 185 | case RBraceToken: 186 | _, err = w.Write([]byte{'}'}) 187 | case EOFToken: 188 | _, err = w.Write([]byte("EOF")) 189 | } 190 | } 191 | 192 | return 193 | } 194 | 195 | // print pretty prints an AST node to a string using the default configuration. 196 | func print(n Node) string { 197 | var p Printer 198 | var buf bytes.Buffer 199 | _ = p.Print(&buf, n) 200 | return buf.String() 201 | } 202 | -------------------------------------------------------------------------------- /printer_test.go: -------------------------------------------------------------------------------- 1 | package css_test 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/benbjohnson/css" 8 | ) 9 | 10 | // Ensure than the printer prints nodes correctly. 11 | func TestPrinter_Print(t *testing.T) { 12 | var tests = []struct { 13 | in css.Node 14 | s string 15 | }{ 16 | // 0. Full stylesheet with multiple rules. 17 | {in: &css.StyleSheet{ 18 | Rules: []css.Rule{ 19 | &css.QualifiedRule{ 20 | Prelude: []css.ComponentValue{ 21 | &css.Token{Tok: css.IdentToken, Value: "foo"}, 22 | &css.Token{Tok: css.WhitespaceToken, Value: " "}, 23 | &css.Token{Tok: css.IdentToken, Value: "bar"}, 24 | }, 25 | Block: &css.SimpleBlock{ 26 | Token: &css.Token{Tok: css.LBraceToken}, 27 | Values: []css.ComponentValue{ 28 | &css.Token{Tok: css.IdentToken, Value: "font-size"}, 29 | &css.Token{Tok: css.ColonToken}, 30 | &css.Token{Tok: css.IdentToken, Value: "10px"}, 31 | }, 32 | }, 33 | }, 34 | &css.AtRule{ 35 | Name: "baz", 36 | Prelude: []css.ComponentValue{ 37 | &css.Token{Tok: css.WhitespaceToken, Value: " "}, 38 | &css.Token{Tok: css.IdentToken, Value: "my-rule"}, 39 | }, 40 | }, 41 | }, 42 | }, s: `foo bar{font-size:10px} @baz my-rule;`}, 43 | 44 | // Test that nil values are safe to print. 45 | {in: (*css.StyleSheet)(nil), s: ``}, // 1 46 | {in: (css.Rules)(nil), s: ``}, // 2 47 | {in: (*css.AtRule)(nil), s: ``}, // 3 48 | {in: (*css.QualifiedRule)(nil), s: ``}, // 4 49 | {in: (css.Declarations)(nil), s: ``}, // 5 50 | {in: (*css.Declaration)(nil), s: ``}, // 6 51 | {in: (css.ComponentValues)(nil), s: ``}, // 7 52 | {in: (*css.SimpleBlock)(nil), s: ``}, // 8 53 | {in: (*css.Function)(nil), s: ``}, // 9 54 | {in: (*css.Token)(nil), s: ``}, // 10 55 | 56 | // Test individual tokens. 57 | {in: &css.Token{Tok: css.IdentToken, Value: "foo"}, s: `foo`}, // 11 58 | {in: &css.Token{Tok: css.FunctionToken, Value: "foo"}, s: `foo(`}, // 11 59 | {in: &css.Token{Tok: css.AtKeywordToken, Value: "☃"}, s: `@☃`}, // 11 60 | {in: &css.Token{Tok: css.HashToken, Value: "foo"}, s: `#foo`}, // 11 61 | {in: &css.Token{Tok: css.StringToken, Value: "foo", Ending: '"'}, s: `"foo"`}, // 11 62 | {in: &css.Token{Tok: css.StringToken, Value: "foo", Ending: '\''}, s: `'foo'`}, // 11 63 | {in: &css.Token{Tok: css.BadStringToken}, s: `''`}, // 11 64 | {in: &css.Token{Tok: css.URLToken, Value: "foo"}, s: `url(foo)`}, // 11 65 | {in: &css.Token{Tok: css.BadURLToken, Value: "foo"}, s: `url()`}, // 11 66 | {in: &css.Token{Tok: css.DelimToken, Value: "."}, s: `.`}, // 11 67 | {in: &css.Token{Tok: css.NumberToken, Value: "-20.3E2"}, s: `-20.3E2`}, // 11 68 | {in: &css.Token{Tok: css.PercentageToken, Value: "100%"}, s: `100%`}, // 11 69 | {in: &css.Token{Tok: css.DimensionToken, Value: "10cm"}, s: `10cm`}, // 11 70 | {in: &css.Token{Tok: css.WhitespaceToken, Value: " "}, s: ` `}, // 11 71 | {in: &css.Token{Tok: css.DelimToken, Value: "."}, s: `.`}, // 11 72 | {in: &css.Token{Tok: css.IncludeMatchToken}, s: `~=`}, // 11 73 | {in: &css.Token{Tok: css.DashMatchToken}, s: `|=`}, // 11 74 | {in: &css.Token{Tok: css.PrefixMatchToken}, s: `^=`}, // 11 75 | {in: &css.Token{Tok: css.SuffixMatchToken}, s: `$=`}, // 11 76 | {in: &css.Token{Tok: css.SubstringMatchToken}, s: `*=`}, // 11 77 | {in: &css.Token{Tok: css.ColumnToken}, s: `||`}, // 11 78 | {in: &css.Token{Tok: css.CDOToken}, s: ``}, // 11 80 | {in: &css.Token{Tok: css.ColonToken}, s: `:`}, // 11 81 | {in: &css.Token{Tok: css.SemicolonToken}, s: `;`}, // 11 82 | {in: &css.Token{Tok: css.CommaToken}, s: `,`}, // 11 83 | {in: &css.Token{Tok: css.LBrackToken}, s: `[`}, // 11 84 | {in: &css.Token{Tok: css.RBrackToken}, s: `]`}, // 11 85 | {in: &css.Token{Tok: css.LParenToken}, s: `(`}, // 11 86 | {in: &css.Token{Tok: css.RParenToken}, s: `)`}, // 11 87 | {in: &css.Token{Tok: css.LBraceToken}, s: `{`}, // 11 88 | {in: &css.Token{Tok: css.RBraceToken}, s: `}`}, // 11 89 | 90 | {in: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 10}, s: `U+00000a`}, // 11 91 | {in: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 20}, s: `U+00000a-U+000014`}, // 11 92 | 93 | {in: &css.Token{Tok: css.EOFToken}, s: `EOF`}, // 11 94 | } 95 | 96 | for i, tt := range tests { 97 | var buf bytes.Buffer 98 | var p css.Printer 99 | err := p.Print(&buf, tt.in) 100 | 101 | if err != nil { 102 | t.Errorf("%d. unexpected error: %s", i, tt.s) 103 | } else if tt.s != buf.String() { 104 | t.Errorf("%d. \n\nexp: %s\n\ngot: %s\n\n", i, tt.s, buf.String()) 105 | } 106 | } 107 | } 108 | 109 | // TODO(benbjohnson): Example: Printer.Print() 110 | -------------------------------------------------------------------------------- /scanner.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | "strings" 10 | ) 11 | 12 | // eof represents an EOF file byte. 13 | var eof rune = -1 14 | 15 | // Scanner implements a CSS3 standard compliant tokenizer. 16 | // 17 | // This implementation only allows UTF-8 encoding. 18 | // @charset directives will be ignored. 19 | type Scanner struct { 20 | // Errors contains a list of all errors that occur during scanning. 21 | Errors []*Error 22 | 23 | rd io.RuneReader 24 | 25 | tokbuf *Token // last token read from the scanner. 26 | tokbufn bool // whether the token buffer is in use. 27 | 28 | buf [4]rune // circular buffer for runes 29 | bufpos [4]Pos // circular buffer for position 30 | bufi int // circular buffer index 31 | bufn int // number of buffered characters 32 | } 33 | 34 | // New returns a new instance of Scanner. 35 | func NewScanner(r io.Reader) *Scanner { 36 | return &Scanner{rd: bufio.NewReader(r)} 37 | } 38 | 39 | // Scan returns the next token from the reader. 40 | func (s *Scanner) Scan() *Token { 41 | // If unscan was the last call then return the previous token again. 42 | if s.tokbufn { 43 | s.tokbufn = false 44 | return s.tokbuf 45 | } 46 | 47 | // Otherwise read from the reader and save the token. 48 | tok := s.scan() 49 | s.tokbuf = tok 50 | return tok 51 | } 52 | 53 | func (s *Scanner) scan() *Token { 54 | for { 55 | // Read next code point. 56 | ch := s.read() 57 | pos := s.pos() 58 | 59 | // If whitespace code point found, then consume all contiguous whitespace. 60 | if isWhitespace(ch) { 61 | return s.scanWhitespace() 62 | } 63 | 64 | // If a digit is found, consume a number. 65 | if isDigit(ch) { 66 | s.unread(1) 67 | return s.scanNumeric(pos) 68 | } 69 | 70 | // If a u or U is found, attempt to scan a unicode range. 71 | // If it's the beginning of a name then consume an identifier. 72 | if ch == 'u' || ch == 'U' { 73 | // Peek "+[0-9a-f]" or "+?", consume next code point, consume unicode-range. 74 | ch1, ch2 := s.read(), s.read() 75 | if ch1 == '+' && (isHexDigit(ch2) || ch2 == '?') { 76 | s.unread(1) 77 | return s.scanUnicodeRange() 78 | } 79 | // Otherwise reconsume as ident. 80 | s.unread(2) 81 | return s.scanIdent() 82 | } else if isNameStart(ch) { 83 | return s.scanIdent() 84 | } 85 | 86 | // Check against individual code points next. 87 | switch ch { 88 | case eof: 89 | return &Token{Tok: EOFToken, Pos: pos} 90 | case '"', '\'': 91 | return s.scanString() 92 | case '#': 93 | return s.scanHash() 94 | 95 | case '$': 96 | if next := s.read(); next == '=' { 97 | return &Token{Tok: SuffixMatchToken, Pos: pos} 98 | } 99 | s.unread(1) 100 | return &Token{Tok: DelimToken, Value: string(ch), Pos: pos} 101 | 102 | case '*': 103 | if next := s.read(); next == '=' { 104 | return &Token{Tok: SubstringMatchToken, Pos: pos} 105 | } 106 | s.unread(1) 107 | return &Token{Tok: DelimToken, Value: string(ch), Pos: pos} 108 | 109 | case '^': 110 | if next := s.read(); next == '=' { 111 | return &Token{Tok: PrefixMatchToken, Pos: pos} 112 | } 113 | s.unread(1) 114 | return &Token{Tok: DelimToken, Value: string(ch), Pos: pos} 115 | 116 | case '~': 117 | if next := s.read(); next == '=' { 118 | return &Token{Tok: IncludeMatchToken, Pos: pos} 119 | } 120 | s.unread(1) 121 | return &Token{Tok: DelimToken, Value: string(ch), Pos: pos} 122 | 123 | case ',': 124 | return &Token{Tok: CommaToken, Pos: pos} 125 | 126 | case '-': 127 | // Check for a number or identifier. 128 | if s.peekNumber() { 129 | s.unread(1) 130 | return s.scanNumeric(pos) 131 | } else if s.peekIdent() { 132 | return s.scanIdent() 133 | } 134 | 135 | // Scan next two code points to see if we have a CDC (-->). 136 | ch1, ch2 := s.read(), s.read() 137 | if ch1 == '-' && ch2 == '>' { 138 | return &Token{Tok: CDCToken, Pos: pos} 139 | } 140 | s.unread(2) 141 | 142 | // Otherwise return the hyphen by itself. 143 | return &Token{Tok: DelimToken, Value: "-", Pos: pos} 144 | 145 | case '/': 146 | // Comments are ignored by the scanner so restart the loop from 147 | // the end of the comment and get the next token. 148 | if ch1 := s.read(); ch1 == '*' { 149 | s.scanComment() 150 | continue 151 | } 152 | s.unread(1) 153 | return &Token{Tok: DelimToken, Value: "/", Pos: pos} 154 | 155 | case ':': 156 | return &Token{Tok: ColonToken, Pos: pos} 157 | case ';': 158 | return &Token{Tok: SemicolonToken, Pos: pos} 159 | 160 | case '<': 161 | // Attempt to read a comment open ("