├── LICENSE
├── NOTES
├── README.md
├── ast.go
├── ast_test.go
├── doc.go
├── parser.go
├── parser_test.go
├── printer.go
├── printer_test.go
├── scanner.go
├── scanner_test.go
├── walk.go
└── walk_test.go


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Ben Johnson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/NOTES:
--------------------------------------------------------------------------------
  1 | Types
  2 | =====
  3 | 
  4 | Stylesheet
  5 | QualifiedRule
  6 | AtRule
  7 | 
  8 | 
  9 | 
 10 | § 2. Description of CSS's Syntax
 11 | 
 12 | - CSS document is a series of qualified rules and at-rules.
 13 | - Qualified rule: prelude followed by block.
 14 |   - For style rules, prelude is a series of selectors.
 15 | - Declarations: a name followed by a colon followed by a value; semicolon separated.
 16 | - At-rules have a basic structure: "@" + name.
 17 |   - Some end with a semicolon.
 18 |   - Some end with a block.
 19 | - Names are always identifiers: start with [-a-z] followed by [-_a-z0-9] or escaped codepoints.
 20 | 
 21 | 
 22 | § 2.1. Escaping
 23 | 
 24 | - Starts with \
 25 |   - Followed by code point that is not a hex digit or newline.
 26 |   - 1 - 6 hex digits followed by optional whitespace.
 27 | 
 28 | 
 29 | § 2.2 Error Handling
 30 | 
 31 | - Recover gracefully, only throw away a minimum amount of content.
 32 | - At top-level, "@" starts an at-rule, anything else is a qualified rule.
 33 | - Once an at-rule starts, nothing is invalid.
 34 |   - Everything before semicolon or block is prelude.
 35 |   - Block is parsed according to at-rule's own grammar.
 36 | - Qualified rule is similar except semicolons don't end them.
 37 |   - First block is parsed as list of declarations.
 38 |   - When parsing declarations, unknown syntax causes parser to move to next semicolon.
 39 | - Stylesheets ending with open rule, decl, function, string, etc simply closes everything.
 40 |   - Does not make them invalid.
 41 | 
 42 | 
 43 | § 3. Tokenizing and Parsing CSS
 44 | 
 45 | - Error handling for user agents is well defined.
 46 |   - Must abort at first error they do not wish to apply the rules below.
 47 | - Output is a CSSStyleSheet object.
 48 | 
 49 | § 3.2. The input byte stream
 50 | 
 51 | - Stream of bytes.
 52 | - Encoding based on:
 53 |   1. HTTP protocol specifying it.
 54 |   2. Read first 1024 bytes and check for: @chartset "..."
 55 |     • If 'utf-16be' or 'utf-16le' then use utf-8
 56 |     • Otherwise use value specified.
 57 | 
 58 | § 3.3. Preprocessing the input stream
 59 | 
 60 | - Must replace CR, FF, or CRLF to a single LF.
 61 | - Replace NULL with U+FFFD
 62 | 
 63 | 
 64 | § 4. Tokenization
 65 | 
 66 | - Each scan returns a single token.
 67 | - Types:
 68 |   * IDENT
 69 |   * FUNCTION
 70 |   * ATKEYWORD
 71 |   * HASH
 72 |   * STRING
 73 |   * BADSTRING
 74 |   * URL
 75 |   * BADURL
 76 |   * DELIM
 77 |   * NUMBER
 78 |   * PERCENTAGE
 79 |   * DIMENSION
 80 |   * UNICODERANGE
 81 |   * INCLUDEMATCH
 82 |   * DASHMATCH
 83 |   * PREFIXMATCH
 84 |   * SUFFIXMATCH
 85 |   * SUBSTRINGMATCH
 86 |   * COLUMN
 87 |   * WHITESPACE
 88 |   * CDO
 89 |   * CDC
 90 |   * COLON
 91 |   * SEMICOLON
 92 |   * COMMA
 93 |   * LBRACKET
 94 |   * RBRACKET
 95 |   * LPAREN
 96 |   * RPAREN
 97 |   * LBRACE
 98 |   * RBRACE
 99 | - IDENT, FUNCTION, ATKEYWORD, HASH, STRING, URL have value with 0..* code points.
100 | - HASH has a type flag set to "id" or "restricted". Defaults to "restricted".
101 | - DELIM has a value with 1 code point.
102 | - NUMBER, PERCENTAGE, DIMENSION have 1..* code points and a numeric value.
103 | - NUMBER, DIMENSION have a flag set to "integer" (default) or "number".
104 | - DIMENSION has a unit with 1..* code points.
105 | - UNICODERANGE has a start and end pair of integers.
106 | 
107 | - Tokenizer requires LL(3)!!!
108 | - Produces tokens designed to allow selectors to be parsed with LL(1).
109 | 
110 | 
111 | $ 4.1 Token Railroad diagrams
112 | 
113 | * comment: "/*" + (anything but */) + "*/"
114 | * newline: \n | \r\n | \r | \f
115 | * whitespace: " " | \t | newline
116 | * hex-digit: [0-9a-fA-F]
117 | * escape: "\" + (^newline | ^hex-digit)
118 |           "\" + hex-digit{1,6} + whitespace{0,1}
119 | * whitespace-token: whitespace+
120 | * ws*: whitespace-token*
121 | * ident-token: -{0,1} + (a-zA-Z_|non-ASCII|escape) + (a-zA-Z_|0-9|non-ASCII|escape)
122 | * function-token: ident-token + "("
123 | * at-keyword-token: "@" + ident-token
124 | * hash-token: "#" + (a-zA-Z_|0-9|non-ASCII|escape)*
125 | * string-token: "\"" + (^"|^\n|escape|\+newline) + "\"" (or single quotes)
126 | * url-token: ident-token=="url" + "(" + ws* + (string-token|url-unquoted) + ws* + ")"
127 | * url-unquoted: (^"'()\|^whitespace|^non-printable|escape)
128 | * number-token: ("+"|"-") + digit+ + "." + digit+ + (e|E)+(+|-) + digit+
129 | * dimension-token: number-token ident-token
130 | * percentage-token: number-token + "%"
131 | * unicode-range-token: (u|U) + "+" + hex-digit{1,6} (or range)
132 | * include-match-token: "~="
133 | * dash-match-token: "|="
134 | * prefix-match-token: "^="
135 | * suffix-match-token: "$="
136 | * substring-match-token: "*="
137 | * column-match-token: "||"
138 | * CDO-token: "<!--"
139 | * CDC-token: "-->"
140 | 
141 | § 4.2. Definitions
142 | 
143 | * code point: Unicode code point.
144 | * next input code point: the first unconsumed code point from the input stream.
145 | * current input code point: the last code point to be consumed.
146 | * reconsume the current input code point: push current to the front of the stream.
147 | * EOF code point: conceptual code point representing the end-of-stream.
148 | * digit: code point between U+0030-U+0039
149 | * hex digi: digit or code point in range of U+0041-U+0046 or U+0061-U+0066
150 | * uppercase letter: code point between "A" - "Z"
151 | * lowercase letter: code point between "a" - "z"
152 | * letter: uppercase or lowercase letter
153 | * non-ASCII code point: code point greater than U+0080
154 | * name-start code point: letter, non-ASCII code point or LOW LINE (_)
155 | * name code point: name code point, digit or HYPHEN-MINUS (-).
156 | * non-printable code point: U+0000-U+0008, U+000B, U+000E-U+001F, U+007F
157 | * newline: U+000A
158 | * whitespace: newline, U+0009 (tab), U+0020 (space)
159 | * surrogate code point: U+D800-U+DFFF inclusive
160 | * maximum allowed code point: U+10FFFF
161 | * identifier: portion of CSS with same syntax as ident-token. Has the "id" type.
162 | 
163 | § 4.3. Tokenizer Algorithms
164 | 
165 | Transforms a stream of code points into a stream of tokens.
166 | 
167 | §4.3.1. Consume a token
168 | 
169 | * whitespace: consume as much as possible and return a whitespace-token.
170 | * ": Consume a string-token.
171 | * #: If next code point is a name code point or next two are a valid escape and
172 |      the 3 code points would start an identifier then return a hash token with
173 |      type flag set to "id" and name set to the identifier.
174 |      Otherwise return delim-token.
175 | * $: If next code point is "=" then return suffix-match-token.
176 |      Otherwise return delim-token.
177 | * ': Consume a string-token.
178 | * (: Return a (-token
179 | * ): Return a )-token
180 | * *: If next code point is "=" then return substring-match-token.
181 |      Otherwise return delim-token.
182 | * +: If next code point is a number, return a numeric-token.
183 |      Otherwise return delim-token.
184 | * ,: Return comma-token.
185 | * -: If next code point is a number, return numeric-token.
186 |      If next code point is an identifier, return ident-like token.
187 |      If next 2 code points are "->", return a CDC-token.
188 |      Otherwise return delim-token.
189 | * .: If next code point is a number, return numeric-token.
190 |      Otherwise return delim-token.
191 | * /: If next code point is "*", consume it and all code points up to "*/" or EOF.
192 |      Otherwise return delim-token.
193 | * :: Return colon-token.
194 | * ;: Return semicolon-token.
195 | * <: If the next 3 code points are "!--" return CDO.
196 |      Otherwise return delim-token.
197 | * @: If next 3 code points make an identifier, return at-keyword-token.
198 |      Otherwise return delim-token.
199 | * [: Return [-token.
200 | * \: If followed by valid escape, return ident-like token.
201 |      Otherwise this is a PARSE ERROR. Return delim-token.
202 | * ]: Return ]-token.
203 | * ^: If next code point is "=", return prefix-match-token.
204 |      Otherwise return delim-token.
205 | * {: Return {-token.
206 | * }: Return }-token.
207 | * u|U: If next 2 code points are "+" + hex-digit or "?", return unicode-range-token.
208 |        Otherwise return ident-like token.
209 | * |: If next code point is "=", return dash-match-token.
210 |      If next code point is "|", return column-token.
211 |      Otherwise return delim-token.
212 | * ~: If next code point is "=", return include-match-token.
213 |      Otherwise return delim-token.
214 | * EOF: Return eof-token.
215 | 
216 | Return a delim-token for anything else.
217 | 
218 | 
219 | § 4.3.2. Consume a numeric token
220 | 
221 | Includes number-token, percentage-token, or dimension-token.
222 | 
223 | 1. Consume a number.
224 | 2. If next 3 code points would start an identifier:
225 |   a. Create dimension-token
226 |   b. Consume a name, set to units.
227 |   c. Return dimension-token.
228 | 3. Otherwise if next code point is "%" then return percentage-token.
229 | 4. Otherwise return number-token.
230 | 
231 | 
232 | § 4.3.3. Consume an ident-like token
233 | 
234 | Includes ident-token, function-token, url-token, bad-url-token.
235 | 
236 | 1. Consume a name.
237 | 2. If value is case-insensitive "url" followed by a "(", consume a url-token
238 |    and return it.
239 | 3. Otherwise if next code point is "(" create a function-token and return it.
240 | 4. Otherwise return an ident-token.
241 | 
242 | 
243 | § 4.3.4. Consume a string token
244 | 
245 | Includes a string-token or bad-string-token.
246 | 
247 | Must set the ending code point that ends the string.
248 | 
249 | 1. Create a string-token.
250 | 2. Repeatedly consume:
251 |   EOF: Return the string-token.
252 |   newline: This is a PARSE ERROR. Return bad-string-token.
253 |   \: If next code point is EOF, do nothing.
254 |      If next code point is newline then consume it.
255 |      If starts valid escape, append escaped code point.
256 |   Anything else: Append code point.
257 | 
258 | 
259 | § 4.3.5. Consume a URL token
260 | 
261 | Include url-token and bad-url-token.
262 | 
263 | Assumes initial "url(" has been consumed.
264 | 
265 | 1. Create url-token.
266 | 2. Consume whitespace.
267 | 3. If next code point is EOF, return url-token.
268 | 4. If next code point is "\"", consume a string token.
269 |    If bad-string-token returned, consume remenants of bad-url, and return bad-url-token.
270 |    Set url-token's value to string-token value.
271 |    Consume whitespace.
272 |    If next code point is EOF or ) then consume it and return url-token. Otherwise
273 |    return remenants of bad url and return bad-url-token.
274 | 5. Repeatedly consume:
275 |   ) or EOF: return url-token.
276 |   whitespace: consume! if next code point is ) or EOF return url-token. otherwise
277 |     consume remenants and return bad-url-token.
278 |   " or ' or ( or non-printable: PARSE ERROR! Consume remenants, return bad-url.
279 |   \: if valid escape, append escaped code point. Otherwise parse error. Consume remenants, return bad-url.
280 |   anything else: append
281 | 
282 | 
283 | § 4.3.6. Consume unicode-range token
284 | 
285 | Includes unicode-range-token.
286 | 
287 | Assumes initial "u+" has been consumed and next digit is hex-digit or ?
288 | 
289 | 1. Consume up to 6 hex digits. If less than 6, consume ? until chars totals 6.
290 | 
291 |   If any ? were consumed:
292 | 
293 |     a. Interpret value as hex number, replace ? with 0. This is the start of the range.
294 | 
295 |     b. Interpret value as hex number, replace ? with F. This is the end of the range.
296 | 
297 |     c. Return new unicode range.
298 | 
299 |   Otherwise interpret as start of the range.
300 | 
301 | 2. If next 2 code points are - and hex digit, consume up to 6 hex digits.
302 |    This is the end of the range.
303 | 
304 | 3. Otherwise the end of the range is equal to the start.
305 | 
306 | 4. Return unicode-range-token.
307 | 
308 | 
309 | § 4.3.7. Consume an escaped code point
310 | 
311 | Assumes that \ is already consumed and next char is not a newline.
312 | 
313 | Consume next code point.
314 | 
315 |   hex digit: Consume up to 6 hex digits. If next code point is whitespace then
316 |              consume it too. Interpret as hex number. If value is 0, or a
317 |              surrogate code point, or greater than max, return U+FFFD.
318 | 
319 |   EOF: Return U+FFFD
320 | 
321 |   Anything else: return code point
322 | 
323 | 
324 | § 4.3.8. Check if two code points are a valid escape.
325 | 
326 | If first code point is not \, return false.
327 | 
328 | If second code point is newline, return false.
329 | 
330 | Otherwise return true.
331 | 
332 | 
333 | § 4.3.9. Check if three code points would start an identifier
334 | 
335 | Look at first code point:
336 | 
337 |   -: If 2nd is a name-start code point or 2nd and 3rd are a valid escape, return true.
338 |      Otherwise return false.
339 | 
340 |   name-start code point: return true
341 | 
342 |   \: If 1st and 2nd are valid escape, return true. Otherwise return false.
343 | 
344 | 
345 | § 4.3.10. Check if three code points would start a number
346 | 
347 | Look at first code point:
348 | 
349 |   + or -: If 2nd is digit, return true.
350 |           If 2nd is . and 3rd is digit, return true.
351 |           Otherwise return false.
352 | 
353 |   .: If 2nd is digit return true. Otherwise return false.
354 | 
355 |   digit: return true
356 | 
357 |   anything else: return false
358 | 
359 | 
360 | § 4.3.11. Consume a name
361 | 
362 | Does not verify that code points constitute an ident-token.
363 | 
364 | Repeatedly consume:
365 | 
366 |   name code point: append to result
367 | 
368 |   stream start with valid escape: consume escaped code point and append.
369 | 
370 |   anything else: return result
371 | 
372 | 
373 | § 4.3.12. Consume a number
374 | 
375 | Returns 3-tuple of string representation, numeric value, and type.
376 | 
377 | This does not verify that the first few code points make a number.
378 | 
379 | 1. Set *repr* to empty string and type to "integer".
380 | 
381 | 2. If next code point is + or -, consume it and append to repr.
382 | 
383 | 3. Consume digits and append to repr.
384 | 
385 | 4. If next 2 code points are . and digit then:
386 |    consume, append to repr, set type to "number", consume digits.
387 | 
388 | 5. If next 2 or 3 code points are (e|E) optionally followed by +/-, then a digit:
389 |    consume, append to repr, set type to "number", consume digits.
390 | 
391 | 6. Convert repr to number and set value.
392 | 
393 | 7. Return 3-tuple.
394 | 
395 | 
396 | § 4.3.13. Convert a string to a number
397 | 
398 |   ...
399 | 
400 | 
401 | § 4.3.14. Consume remnants of a bad url
402 | 
403 | This returns nothing. Only consume enough input to recover tokenizer.
404 | 
405 | Repeatedly consume:
406 | 
407 |   ) or EOF: Return
408 | 
409 |   valid escape: consume escaped code point
410 | 
411 |   anything else: do nothing.
412 | 
413 | 
414 | 
415 | § 5. Parsing
416 | 
417 | * at-rule: name, prelude, and optional block.
418 | 
419 | * qualified rule: prelude and {} block.
420 | 
421 | * declaration: name, value, and important flag.
422 |                can be *properties* or *descriptors*
423 | 
424 | * component value: preserved tokens, a function, or a simple block.
425 | 
426 | * preserved token: any token except function-token, {-token, (-token, or [-token.
427 |                    }, ), ], bad-string, bad-url are always parse errors but are preserved for higher level error handling.
428 | 
429 | * function: name and value (list of component values)
430 | 
431 | * simple block: has token ([, (, or {) and a value (list of component values)
432 | 
433 | 
434 | § 5.1. Parser Railroad Diagrams
435 | 
436 | Stylesheet: (at-rule | qualified rule | whitespace-token | CDO | CDC)*
437 | 
438 | Rule list: (at-rule | qualified rule | whitespace-token)*
439 | 
440 | at-rule: at-keyword-token component-value* ({}-block | ;)
441 | 
442 | qualified rule: component-value* {}-block
443 | 
444 | declaration list: ws* at-rule declaration-list
445 |                   declaration
446 |                   declaration; declaration-list
447 | 
448 | declaration: ident-token ws* : component-value* !important
449 | 
450 | !important: ! ws* ident-token("important") ws*
451 | 
452 | component-value: preserved-token
453 |                  {}-block
454 |                  ()-block
455 |                  []-block
456 |                  Function block
457 | 
458 | {}-block: { component-value* }
459 | 
460 | ()-block: ( component-value* )
461 | 
462 | []-block: [ component-value* ]
463 | 
464 | function-block: function-token component-value* )
465 | 
466 | 
467 | § 5.3. Parser Entry Points
468 | 
469 | * Parse stylesheet
470 | * Parse rule list
471 | * Parse rule
472 | * Parse declaration
473 | * Parse declaration list
474 | * Parse component value
475 | * Parse component value list
476 | 
477 | 
478 | § 5.3.1 Parse stylesheet
479 | 
480 | 1. Create stylesheet
481 | 2. Consume list of rules, set "top level" flag.
482 | 3. Return stylesheet.
483 | 
484 | § 5.3.2. Parse list of rules
485 | 
486 | 1. Consume list of rules from stream of tokens with top-level flag unset.
487 | 2. Return list.
488 | 
489 | § 5.3.3. Parse a rule
490 | 
491 | 1. Consume next token
492 | 2. While token is whitespace-token, consume next token.
493 | 3. If input token is EOF, return syntax error.
494 |    If token is at-keyword-token, consume an at-rule and let "rule" be the return value.
495 |    Otherwise consume qualified rule and let rule be the return value.
496 |    If nothing was returned the return a syntax error.
497 | 4. While input token is whitespace-token, consume next token.
498 | 5. If current token is EOF, return rule. Otherwise return syntax error.
499 | 
500 | § 5.3.4. Parse a declaration
501 | 
502 | 1. Consume next input token.
503 | 2. While whitespace, consume next token.
504 | 3. If current token is not ident-token, return syntax error.
505 | 4. Consume a declaration. If anything was returned, return it.
506 |    Otherwise syntax error.
507 | 
508 | § 5.3.5. Parse a list of declarations
509 | 
510 | This mixes declarations and at-rules.
511 | 
512 | 1. Consume a list of declarations.
513 | 2. Return the returned list.
514 | 
515 | § 5.3.6. Parse a component value
516 | 
517 | 1. Consume next input token.
518 | 2. While whitespace-token, consume next token.
519 | 3. If current token is EOF, return syntax error.
520 | 4. Unread token. Consume component value and return value. If nothing returned, syntax error!
521 | 5. While whitespace-token, consume next token.
522 | 6. If current token is EOF, return value. Otherwise syntax error.
523 | 
524 | § 5.3.7. Parse list of component values.
525 | 
526 | 1. Repeatedly consume component value until EOF.
527 | 
528 | 
529 | § 5.4. Parser Algorithms
530 | 
531 | Algorithms return EOF at their end.
532 | 
533 | § 5.4.1. Consume a list of rules
534 | 
535 | Repeatedly consume next token:
536 | 
537 |   whitespace-token: do nothing
538 |   EOF: return list of rules
539 |   CDO, CDC: If top-level set, do nothing.
540 |             Otherwise, unread. Consume a qualified rule. Append return value.
541 |   at-keyword-token: Unread. Consume an at-rule. Append return value.
542 |   anything else: Unread. Consume a qualified rule. Append return value.
543 | 
544 | § 5.4.2. Consume an at-rule
545 | 
546 | Create new at-rule with name set to current token.
547 | 
548 | Repeatedly consume:
549 | 
550 |   semicolon-token, EOF: Return at-rule
551 |   {: Consume simple block, assign to at-rule's block, return at-rule.
552 |   simple-block with {-token: Assign block to at-rule, return at-rule.
553 |   anything else: Unread. Consume component value. Append to prelude.
554 | 
555 | § 5.4.3. Consume a qualified rule
556 | 
557 | Create new qualified rule.
558 | 
559 | Repeatedly consume:
560 | 
561 |   EOF: parse error, return nothing.
562 |   {-token: Consume simple block and assign to rule. Return rule.
563 |   simple block with {-token: Assign to rule. Return rule.
564 |   Anything else: unread. Consume component value. Append to prelude.
565 | 
566 | § 5.4.4. Consume list of declarations
567 | 
568 | Create empty list.
569 | 
570 | Repeatedly consume:
571 | 
572 |   whitespace-token, semicolon-token: do nothing.
573 |   EOF: return list.
574 |   at-keyword-token: Consume at-rule. Append to list.
575 |   ident-token: Initialize temporary list with current.
576 |                Repeatedly append while not semicolon-token or EOF.
577 |                Consume declaration from temporary list. Append result to list.
578 |   anything else: parse error. Repeatedly consume input until semicolon or EOF.
579 | 
580 | 
581 | § 5.4.5. Consume a declaration
582 | 
583 | Create new declaration with name set to current token.
584 | 
585 | 1. Consume next token.
586 | 2. Consume while current is whitespace.
587 | 3. If current is not colon-token, parse error. Return nothing.
588 |    Otherwise consume next token.
589 | 4. Append tokens until EOF.
590 | 5. If last two non-whitespace tokens are are delim-token<!> followed by
591 |    delim-token<important>, remove from value and set important flag.
592 | 6. Return declaration.
593 | 
594 | 
595 | § 5.4.6. Consume a component value
596 | 
597 | Consume token.
598 | 
599 | If current is {, [, or ( then consume a simple block and return it.
600 | 
601 | Otherwise if function-token then consume function and return it.
602 | 
603 | Otherwise return current.
604 | 
605 | 
606 | § 5.4.7. Consume simple block
607 | 
608 | Ending token is mirror variant of current token.
609 | 
610 | Repeatedly consume:
611 |   
612 |   EOF, ending token: Return block.
613 |   anything else: Unread. Consume component value and append to block.
614 | 
615 | 
616 | § 5.4.8. Consume a function
617 | 
618 | Create function with name equal to current.
619 | 
620 | Repeatedly consume:
621 | 
622 |   EOF, )-token: Return function.
623 |   anything else: unread. Consume component value and append to function.
624 | 
625 | 
626 | 
627 | § 6. An+B Microsyntax
628 | 
629 | * "even" and "odd" keywords allowed.
630 | * A and B can be negative but only positive results are used.
631 | * If A==0 && B==0 then no elements are matched.
632 | 
633 | § 6.1. Informal Syntax Description
634 | 
635 | * If A==0 then An can be omitted.
636 | * If An is omitted then + can be omitted.
637 | * 1n+0 == n+0 == n
638 | 
639 | § 6.2. The <an+b> type
640 | 
641 | * Use regular CSS tokens.
642 | 
643 | 
644 | 
645 | § 7. Defining Grammars for Rules and Other Values
646 | 
647 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | css [![Build Status](https://drone.io/github.com/benbjohnson/css/status.png)](https://drone.io/github.com/benbjohnson/css/latest) [![Coverage Status](https://coveralls.io/repos/benbjohnson/css/badge.png?branch=master)](https://coveralls.io/r/benbjohnson/css?branch=master) [![GoDoc](https://godoc.org/github.com/benbjohnson/css?status.png)](https://godoc.org/github.com/benbjohnson/css) ![Project status](http://img.shields.io/status/alpha.png?color=red)
 2 | ===
 3 | 
 4 | This package provides a CSS parser and scanner in pure Go. It is an
 5 | implementation as specified in the W3C's [CSS Syntax Module Level 3][css3-syntax].
 6 | 
 7 | For documentation on how to use this package, please see the [godoc][godoc].
 8 | 
 9 | [css3-syntax]: http://www.w3.org/TR/css3-syntax/
10 | [godoc]: https://godoc.org/github.com/benbjohnson/css
11 | 
12 | 
13 | ## Project Status
14 | 
15 | The scanner and parser are fully compliant with the CSS3 specification.
16 | The printer will print nodes generated from the scanner and parser, however,
17 | it is not fully compliant with the [CSS3 serialization][serialization] spec.
18 | Additionally, the printer does not provide an option to collapse whitespace
19 | although that will be added in the future.
20 | 
21 | This project has 100% test coverage, however, it is still a new project.
22 | Please report any bugs you experience or let me know where the documentation
23 | can be clearer.
24 | 
25 | [serialization]: http://www.w3.org/TR/css3-syntax/#serialization
26 | 
27 | 
28 | ## Caveats
29 | 
30 | The CSS scanner in this package only supports UTF-8 encoding. The @charset
31 | directive will be ignored. If you need to scan a different encoding then
32 | please convert it to UTF-8 first using a tool such as [iconv][iconv].
33 | 
34 | [iconv]: http://en.wikipedia.org/wiki/Iconv
35 | 


--------------------------------------------------------------------------------
/ast.go:
--------------------------------------------------------------------------------
  1 | package css
  2 | 
  3 | import "fmt"
  4 | 
  5 | // Node represents a node in the CSS3 abstract syntax tree.
  6 | type Node interface {
  7 | 	node()
  8 | }
  9 | 
 10 | func (_ *StyleSheet) node()     {}
 11 | func (_ Rules) node()           {}
 12 | func (_ *AtRule) node()         {}
 13 | func (_ *QualifiedRule) node()  {}
 14 | func (_ Declarations) node()    {}
 15 | func (_ *Declaration) node()    {}
 16 | func (_ ComponentValues) node() {}
 17 | func (_ *SimpleBlock) node()    {}
 18 | func (_ *Function) node()       {}
 19 | func (_ *Token) node()          {}
 20 | 
 21 | // StyleSheet represents a top-level CSS3 stylesheet.
 22 | type StyleSheet struct {
 23 | 	Rules Rules
 24 | }
 25 | 
 26 | // Rules represents a list of rules.
 27 | type Rules []Rule
 28 | 
 29 | // Rule represents a qualified rule or at-rule.
 30 | type Rule interface {
 31 | 	Node
 32 | 	rule()
 33 | }
 34 | 
 35 | func (_ *AtRule) rule()        {}
 36 | func (_ *QualifiedRule) rule() {}
 37 | 
 38 | // AtRule represents a rule starting with an "@" symbol.
 39 | type AtRule struct {
 40 | 	Name    string
 41 | 	Prelude ComponentValues
 42 | 	Block   *SimpleBlock
 43 | 	Pos     Pos
 44 | }
 45 | 
 46 | // QualifiedRule represents an unnamed rule that includes a prelude and block.
 47 | type QualifiedRule struct {
 48 | 	Prelude ComponentValues
 49 | 	Block   *SimpleBlock
 50 | 	Pos     Pos
 51 | }
 52 | 
 53 | // Declarations represents a list of declarations or at-rules.
 54 | type Declarations []Node
 55 | 
 56 | // Declaration represents a name/value pair.
 57 | type Declaration struct {
 58 | 	Name      string
 59 | 	Values    ComponentValues
 60 | 	Important bool
 61 | 	Pos       Pos
 62 | }
 63 | 
 64 | // ComponentValues represents a list of component values.
 65 | type ComponentValues []ComponentValue
 66 | 
 67 | // nonwhitespace returns the list of values without whitespace characters.
 68 | func (a ComponentValues) nonwhitespace() ComponentValues {
 69 | 	var tmp ComponentValues
 70 | 	for _, v := range a {
 71 | 		if v, ok := v.(*Token); ok && v.Tok == WhitespaceToken {
 72 | 			continue
 73 | 		}
 74 | 		tmp = append(tmp, v)
 75 | 	}
 76 | 	return tmp
 77 | }
 78 | 
 79 | // ComponentValue represents a component value.
 80 | type ComponentValue interface {
 81 | 	Node
 82 | 	componentValue()
 83 | }
 84 | 
 85 | func (_ *SimpleBlock) componentValue() {}
 86 | func (_ *Function) componentValue()    {}
 87 | func (_ *Token) componentValue()       {}
 88 | 
 89 | // SimpleBlock represents a {-block, [-block, or (-block.
 90 | type SimpleBlock struct {
 91 | 	Token  *Token
 92 | 	Values ComponentValues
 93 | 	Pos    Pos
 94 | }
 95 | 
 96 | // Function represents a function call with a list of arguments.
 97 | type Function struct {
 98 | 	Name   string
 99 | 	Values ComponentValues
100 | 	Pos    Pos
101 | }
102 | 
103 | // Token represents a lexical token.
104 | type Token struct {
105 | 	// The type of token.
106 | 	Tok Tok
107 | 
108 | 	// A flag set for ident-like tokens to either "id" or "unrestricted".
109 | 	// Also set for numeric tokens to either "integer" or "number"
110 | 	Type string
111 | 
112 | 	// The literal value of the token as parsed.
113 | 	Value string
114 | 
115 | 	// The rune used to close the token. Used for string tokens.
116 | 	Ending rune
117 | 
118 | 	// The numeric value and unit used for numeric tokens.
119 | 	Number float64
120 | 	Unit   string
121 | 
122 | 	// Beginning and ending range for a unicode-range token.
123 | 	Start int
124 | 	End   int
125 | 
126 | 	// Position of the token in the source document.
127 | 	Pos Pos
128 | }
129 | 
130 | // Tok represents a lexical token type.
131 | type Tok int
132 | 
133 | const (
134 | 	IdentToken Tok = iota + 1
135 | 	FunctionToken
136 | 	AtKeywordToken
137 | 	HashToken
138 | 	StringToken
139 | 	BadStringToken
140 | 	URLToken
141 | 	BadURLToken
142 | 	DelimToken
143 | 	NumberToken
144 | 	PercentageToken
145 | 	DimensionToken
146 | 	UnicodeRangeToken
147 | 	IncludeMatchToken
148 | 	DashMatchToken
149 | 	PrefixMatchToken
150 | 	SuffixMatchToken
151 | 	SubstringMatchToken
152 | 	ColumnToken
153 | 	WhitespaceToken
154 | 	CDOToken
155 | 	CDCToken
156 | 	ColonToken
157 | 	SemicolonToken
158 | 	CommaToken
159 | 	LBrackToken
160 | 	RBrackToken
161 | 	LParenToken
162 | 	RParenToken
163 | 	LBraceToken
164 | 	RBraceToken
165 | 	EOFToken
166 | )
167 | 
168 | // Pos specifies the line and character position of a token.
169 | // The Char and Line are both zero-based indexes.
170 | type Pos struct {
171 | 	Char int
172 | 	Line int
173 | }
174 | 
175 | // Position returns the position for a given Node.
176 | func Position(n Node) Pos {
177 | 	switch n := n.(type) {
178 | 	case *StyleSheet:
179 | 		return Position(n.Rules)
180 | 	case Rules:
181 | 		if len(n) > 0 {
182 | 			return Position(n[0])
183 | 		}
184 | 	case *AtRule:
185 | 		return n.Pos
186 | 	case *QualifiedRule:
187 | 		return n.Pos
188 | 	case Declarations:
189 | 		if len(n) > 0 {
190 | 			return Position(n[0])
191 | 		}
192 | 	case *Declaration:
193 | 		return n.Pos
194 | 	case ComponentValues:
195 | 		if len(n) > 0 {
196 | 			return Position(n[0])
197 | 		}
198 | 	case *SimpleBlock:
199 | 		return n.Pos
200 | 	case *Function:
201 | 		return n.Pos
202 | 	case *Token:
203 | 		return n.Pos
204 | 	}
205 | 	return Pos{}
206 | }
207 | 
208 | // Error represents a syntax error.
209 | type Error struct {
210 | 	Message string
211 | 	Pos     Pos
212 | }
213 | 
214 | // Error returns the formatted string error message.
215 | func (e *Error) Error() string {
216 | 	return e.Message
217 | }
218 | 
219 | // ErrorList represents a list of syntax errors.
220 | type ErrorList []error
221 | 
222 | // Error returns the formatted string error message.
223 | func (a ErrorList) Error() string {
224 | 	switch len(a) {
225 | 	case 0:
226 | 		return "no errors"
227 | 	case 1:
228 | 		return a[0].Error()
229 | 	}
230 | 	return fmt.Sprintf("%s (and %d more errors)", a[0], len(a)-1)
231 | }
232 | 


--------------------------------------------------------------------------------
/ast_test.go:
--------------------------------------------------------------------------------
 1 | package css
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | // Ensure that all nodes implement the Node interface.
 9 | func TestNode(t *testing.T) {
10 | 	var a []Node
11 | 	a = append(a, &StyleSheet{}, &AtRule{}, &QualifiedRule{}, &Declaration{})
12 | 	a = append(a, &SimpleBlock{}, &Function{}, &Token{})
13 | 	a = append(a, Rules{}, Declarations{}, ComponentValues{})
14 | 	for _, n := range a {
15 | 		n.node()
16 | 	}
17 | }
18 | 
19 | // Ensure that all rules implement the Rule interface.
20 | func TestRule(t *testing.T) {
21 | 	a := []Rule{&AtRule{}, &QualifiedRule{}}
22 | 	for _, r := range a {
23 | 		r.rule()
24 | 	}
25 | }
26 | 
27 | // Ensure that all component values implement the ComponentValue interface.
28 | func TestComponentValue(t *testing.T) {
29 | 	a := []ComponentValue{&SimpleBlock{}, &Function{}, &Token{}}
30 | 	for _, v := range a {
31 | 		v.componentValue()
32 | 	}
33 | }
34 | 
35 | // Ensure that node positions can be retrieved.
36 | func TestPosition(t *testing.T) {
37 | 	var tests = []struct {
38 | 		in  Node
39 | 		pos Pos
40 | 	}{
41 | 		{in: &StyleSheet{Rules: Rules{&QualifiedRule{Pos: Pos{1, 2}}}}, pos: Pos{1, 2}},
42 | 		{in: Rules{&AtRule{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
43 | 		{in: Rules{}, pos: Pos{}},
44 | 		{in: &QualifiedRule{Pos: Pos{1, 2}}, pos: Pos{1, 2}},
45 | 		{in: &AtRule{Pos: Pos{1, 2}}, pos: Pos{1, 2}},
46 | 		{in: Declarations{&AtRule{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
47 | 		{in: Declarations{&Declaration{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
48 | 		{in: Declarations{}, pos: Pos{}},
49 | 		{in: ComponentValues{&SimpleBlock{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
50 | 		{in: ComponentValues{&Function{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
51 | 		{in: ComponentValues{&Token{Pos: Pos{1, 2}}}, pos: Pos{1, 2}},
52 | 		{in: ComponentValues{}, pos: Pos{}},
53 | 		{in: &SimpleBlock{Pos: Pos{1, 2}}, pos: Pos{1, 2}},
54 | 		{in: &Function{Pos: Pos{1, 2}}, pos: Pos{1, 2}},
55 | 		{in: &Token{Pos: Pos{1, 2}}, pos: Pos{1, 2}},
56 | 	}
57 | 
58 | 	for _, tt := range tests {
59 | 		if pos := Position(tt.in); !reflect.DeepEqual(tt.pos, pos) {
60 | 			t.Errorf("expected: %#v, got: %#v", tt.pos, pos)
61 | 		}
62 | 	}
63 | }
64 | 
65 | // Ensure that an error list can be properly formatted.
66 | func TestErrorList_Error(t *testing.T) {
67 | 	var tests = []struct {
68 | 		in ErrorList
69 | 		s  string
70 | 	}{
71 | 		{in: nil, s: "no errors"},
72 | 		{in: ErrorList{}, s: "no errors"},
73 | 		{in: ErrorList{&Error{Message: "foo"}}, s: "foo"},
74 | 		{in: ErrorList{&Error{Message: "foo"}, &Error{Message: "bar"}}, s: "foo (and 1 more errors)"},
75 | 	}
76 | 
77 | 	for _, tt := range tests {
78 | 		if s := tt.in.Error(); tt.s != s {
79 | 			t.Errorf("expected: %s, got: %s", tt.s, s)
80 | 		}
81 | 	}
82 | 
83 | }
84 | 
85 | // TODO(benbjohnson): TestPosition_*
86 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Package css implements a CSS3 compliant scanner and parser. This is meant to
 3 | be a low-level library for extracting a CSS3 abstract syntax tree from raw
 4 | CSS text.
 5 | 
 6 | This package can be used for building tools to validate, optimize and format
 7 | CSS text.
 8 | 
 9 | 
10 | Basics
11 | 
12 | CSS parsing occurs in two steps. First the scanner breaks up a stream of code
13 | points (runes) into tokens. These tokens represent the most basic units of
14 | the CSS syntax tree such as identifiers, whitespace, and strings. The second
15 | step is to feed these tokens into the parser which creates the abstract syntax
16 | tree (AST) based on the context of the tokens.
17 | 
18 | Unlike many language parsers, the abstract syntax tree for CSS saves many of the
19 | original tokens in the stream so they can be reparsed at different levels. For
20 | example, parsing a @media query will save off the raw tokens found in the
21 | {-block so they can be reparsed as a full style sheet. This package doesn't
22 | understand the specifics of how to parse different types of at-rules (such as
23 | @media queries) so it defers that to the user to handle parsing.
24 | 
25 | 
26 | Abstract Syntax Tree
27 | 
28 | The CSS3 syntax defines a syntax tree of several types. At the top-level there
29 | is a StyleSheet. The style sheet is simply a collection of Rules. A Rule can be
30 | either an AtRule or a QualifiedRule.
31 | 
32 | An AtRule is defined as a rule starting with an "@" symbol and an identifier,
33 | then it's followed by zero or more component values and finally ends with either
34 | a {-block or a semicolon. The block is parsed simply as a collection of tokens
35 | and it is up to the user to define the exact grammar.
36 | 
37 | A QualifiedRule is defined as a rule starting with one or more component values
38 | and ending with a {-block.
39 | 
40 | Inside the {-blocks are a list of declarations. Despite the name, a list of
41 | declarations can be either an AtRule or a Declaration. A Declaration is an
42 | identifier followed by a colon followed by one or more component values. The
43 | declaration can also have it's Important flag set if the last two non-whitespace
44 | tokens are a case-insensitive "!important".
45 | 
46 | ComponentValues are the basic unit inside rules and declarations. A
47 | ComponentValue can be either a SimpleBlock, a Function, or a Token. A simple
48 | block starts with either a {, [, or (, has zero or more component values, and
49 | then ends with the mirror of the starting token (}, ], or )). A Function is
50 | an identifier immediately followed by a left parenthesis, then zero or more
51 | component values, and then ending with a right parenthesis.
52 | 
53 | 
54 | */
55 | package css
56 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | package css
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | )
  7 | 
  8 | // Parser represents a CSS3 parser.
  9 | type Parser struct {
 10 | 	Errors ErrorList
 11 | }
 12 | 
 13 | // ParseStyleSheet parses an input stream into a stylesheet.
 14 | func (p *Parser) ParseStyleSheet(s *Scanner) *StyleSheet {
 15 | 	ss := &StyleSheet{}
 16 | 	ss.Rules = p.ConsumeRules(&scanner{s}, true)
 17 | 	return ss
 18 | }
 19 | 
 20 | // ParseRule parses a list of rules.
 21 | func (p *Parser) ParseRules(s *Scanner) Rules {
 22 | 	return p.ConsumeRules(&scanner{s}, false)
 23 | }
 24 | 
 25 | // ParseRule parses a qualified rule or at-rule.
 26 | func (p *Parser) ParseRule(s *Scanner) Rule {
 27 | 	var r Rule
 28 | 
 29 | 	// Skip over initial whitespace.
 30 | 	p.skipWhitespace(&scanner{s})
 31 | 
 32 | 	// If the next token is EOF, return syntax error.
 33 | 	// If the next token is at-keyword, consume an at-rule.
 34 | 	// Otherwise consume a qualified rule. If nothing is returned, return error.
 35 | 	tok := s.Scan()
 36 | 	if tok.Tok == EOFToken {
 37 | 		p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: Position(s.current())})
 38 | 		return nil
 39 | 	} else if tok.Tok == AtKeywordToken {
 40 | 		r = p.ConsumeAtRule(&scanner{s})
 41 | 	} else {
 42 | 		s.unscan()
 43 | 		r = p.ConsumeQualifiedRule(&scanner{s})
 44 | 	}
 45 | 
 46 | 	// Skip over trailing whitespace.
 47 | 	p.skipWhitespace(&scanner{s})
 48 | 
 49 | 	if tok := s.Scan(); tok.Tok != EOFToken {
 50 | 		p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected EOF, got %s", print(s.current())), Pos: Position(s.current())})
 51 | 		return nil
 52 | 	}
 53 | 
 54 | 	return r
 55 | }
 56 | 
 57 | // ParseDeclaration parses a name/value declaration.
 58 | func (p *Parser) ParseDeclaration(s *Scanner) *Declaration {
 59 | 	// Skip over initial whitespace.
 60 | 	p.skipWhitespace(&scanner{s})
 61 | 
 62 | 	// If the next token is not an ident then return an error.
 63 | 	if tok := s.Scan(); tok.Tok != IdentToken {
 64 | 		p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected ident, got %s", print(s.current())), Pos: Position(s.current())})
 65 | 		return nil
 66 | 	}
 67 | 	s.unscan()
 68 | 
 69 | 	// Consume a declaration.
 70 | 	return p.ConsumeDeclaration(&scanner{s})
 71 | }
 72 | 
 73 | // ParseDeclarations parses a list of declarations and at-rules.
 74 | func (p *Parser) ParseDeclarations(s *Scanner) Declarations {
 75 | 	return p.ConsumeDeclarations(&scanner{s})
 76 | }
 77 | 
 78 | // ParseComponentValue parses a component value.
 79 | func (p *Parser) ParseComponentValue(s *Scanner) ComponentValue {
 80 | 	// Skip over initial whitespace.
 81 | 	p.skipWhitespace(&scanner{s})
 82 | 
 83 | 	// If the next token is EOF then return an error.
 84 | 	if tok := s.Scan(); tok.Tok == EOFToken {
 85 | 		p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: Position(s.current())})
 86 | 		return nil
 87 | 	}
 88 | 	s.unscan()
 89 | 
 90 | 	// Consume component value.
 91 | 	v := p.ConsumeComponentValue(&scanner{s})
 92 | 
 93 | 	// Skip over any trailing whitespace.
 94 | 	p.skipWhitespace(&scanner{s})
 95 | 
 96 | 	// If we're not at EOF then return a syntax error.
 97 | 	if tok := s.Scan(); tok.Tok != EOFToken {
 98 | 		s.unscan()
 99 | 		p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected EOF, got %s", print(s.current())), Pos: Position(s.current())})
100 | 		return nil
101 | 	}
102 | 
103 | 	return v
104 | }
105 | 
106 | // ParseComponentValues parses a list of component values.
107 | func (p *Parser) ParseComponentValues(s *Scanner) ComponentValues {
108 | 	var a ComponentValues
109 | 
110 | 	// Repeatedly consume a component value until EOF.
111 | 	for {
112 | 		v := p.ConsumeComponentValue(&scanner{s})
113 | 
114 | 		// If the value is an EOF, then exit.
115 | 		if tok, ok := v.(*Token); ok && tok.Tok == EOFToken {
116 | 			break
117 | 		}
118 | 
119 | 		// Otherwise append to list of component values.
120 | 		a = append(a, v)
121 | 	}
122 | 
123 | 	return a
124 | }
125 | 
126 | // ConsumeRules consumes a list of rules from a token stream.
127 | func (p *Parser) ConsumeRules(s ComponentValueScanner, topLevel bool) Rules {
128 | 	var a Rules
129 | 	for {
130 | 		tok := s.Scan()
131 | 		switch tok := tok.(type) {
132 | 		case *Token:
133 | 			switch tok.Tok {
134 | 			case WhitespaceToken:
135 | 				continue // nop
136 | 			case EOFToken:
137 | 				return a
138 | 			case CDOToken, CDCToken:
139 | 				if !topLevel {
140 | 					s.Unscan()
141 | 					if r := p.ConsumeQualifiedRule(s); r != nil {
142 | 						a = append(a, r)
143 | 					}
144 | 					continue
145 | 				}
146 | 			case AtKeywordToken:
147 | 				if r := p.ConsumeAtRule(s); r != nil {
148 | 					a = append(a, r)
149 | 				}
150 | 				continue
151 | 			}
152 | 		}
153 | 
154 | 		// Otherwise consume a qualified rule.
155 | 		s.Unscan()
156 | 		if r := p.ConsumeQualifiedRule(s); r != nil {
157 | 			a = append(a, r)
158 | 		}
159 | 	}
160 | }
161 | 
162 | // ConsumeAtRule consumes a single at-rule.
163 | func (p *Parser) ConsumeAtRule(s ComponentValueScanner) *AtRule {
164 | 	var r AtRule
165 | 
166 | 	// Set the name to the value of the current token.
167 | 	// TODO(benbjohnson): Validate first token.
168 | 	r.Name = s.Current().(*Token).Value
169 | 
170 | 	// Repeatedly consume the next token.
171 | 	for {
172 | 		tok := s.Scan()
173 | 		switch tok := tok.(type) {
174 | 		case *Token:
175 | 			switch tok.Tok {
176 | 			case SemicolonToken, EOFToken:
177 | 				return &r
178 | 			case LBraceToken:
179 | 				r.Block = p.ConsumeSimpleBlock(s)
180 | 				return &r
181 | 			}
182 | 		case *SimpleBlock:
183 | 			if tok.Token.Tok == LBraceToken {
184 | 				r.Block = tok
185 | 				return &r
186 | 			}
187 | 		}
188 | 
189 | 		// Otherwise consume a component value.
190 | 		s.Unscan()
191 | 		v := p.ConsumeComponentValue(s)
192 | 		r.Prelude = append(r.Prelude, v)
193 | 	}
194 | }
195 | 
196 | // ConsumeQualifiedRule consumes a single qualified rule.
197 | func (p *Parser) ConsumeQualifiedRule(s ComponentValueScanner) *QualifiedRule {
198 | 	var r QualifiedRule
199 | 
200 | 	// Repeatedly consume the next token.
201 | 	for {
202 | 		tok := s.Scan()
203 | 		switch tok := tok.(type) {
204 | 		case *Token:
205 | 			switch tok.Tok {
206 | 			case EOFToken:
207 | 				p.Errors = append(p.Errors, &Error{Message: "unexpected EOF", Pos: tok.Pos})
208 | 				return nil
209 | 			case LBraceToken:
210 | 				r.Block = p.ConsumeSimpleBlock(s)
211 | 				return &r
212 | 			}
213 | 		case *SimpleBlock:
214 | 			if tok.Token.Tok == LBraceToken {
215 | 				r.Block = tok
216 | 				return &r
217 | 			}
218 | 		}
219 | 		s.Unscan()
220 | 		r.Prelude = append(r.Prelude, p.ConsumeComponentValue(s))
221 | 	}
222 | }
223 | 
224 | // ConsumeDeclarations consumes a list of declarations.
225 | func (p *Parser) ConsumeDeclarations(s ComponentValueScanner) Declarations {
226 | 	var a Declarations
227 | 
228 | 	// Repeatedly consume the next token.
229 | 	for {
230 | 		tok := s.Scan()
231 | 
232 | 		if tok, ok := tok.(*Token); ok {
233 | 			switch tok.Tok {
234 | 			case WhitespaceToken, SemicolonToken:
235 | 				continue // nop
236 | 			case EOFToken:
237 | 				return a
238 | 			case AtKeywordToken:
239 | 				a = append(a, p.ConsumeAtRule(s))
240 | 				continue
241 | 			case IdentToken:
242 | 				// Generate a list of tokens up to the next semicolon or EOF.
243 | 				s.Unscan()
244 | 				values := p.consumeDeclarationValues(s)
245 | 
246 | 				// Consume declaration using temporary list of tokens.
247 | 				if d := p.ConsumeDeclaration(NewComponentValueScanner(values)); d != nil {
248 | 					a = append(a, d)
249 | 				}
250 | 				continue
251 | 			}
252 | 		}
253 | 
254 | 		// Any other token is a syntax error.
255 | 		p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("unexpected: %s", print(tok)), Pos: Position(tok)})
256 | 
257 | 		// Repeatedly consume a component values until semicolon or EOF.
258 | 		p.skipComponentValues(s)
259 | 	}
260 | }
261 | 
262 | // ConsumeDeclaration consumes a single declaration.
263 | func (p *Parser) ConsumeDeclaration(s ComponentValueScanner) *Declaration {
264 | 	var d Declaration
265 | 
266 | 	// The first token must be an ident.
267 | 	// TODO(benbjohnson): Validate initial token.
268 | 	d.Name = s.Scan().(*Token).Value
269 | 
270 | 	// Skip over whitespace.
271 | 	p.skipWhitespace(s)
272 | 
273 | 	// The next token must be a colon.
274 | 	if tok := s.Scan().(*Token); tok.Tok != ColonToken {
275 | 		p.Errors = append(p.Errors, &Error{Message: fmt.Sprintf("expected colon, got %s", print(s.Current())), Pos: Position(s.Current())})
276 | 		return nil
277 | 	}
278 | 
279 | 	// Consume the declaration value until EOF.
280 | 	for {
281 | 		tok := s.Scan()
282 | 		if tok, ok := tok.(*Token); ok && tok.Tok == EOFToken {
283 | 			break
284 | 		}
285 | 		d.Values = append(d.Values, tok)
286 | 	}
287 | 
288 | 	// Check last two non-whitespace tokens for "!important".
289 | 	d.Values, d.Important = cleanImportantFlag(d.Values)
290 | 
291 | 	return &d
292 | }
293 | 
294 | // Checks if the last two non-whitespace tokens are a case-insensitive "!important".
295 | // If so, it removes them and returns the "important" flag set to true.
296 | func cleanImportantFlag(values ComponentValues) (ComponentValues, bool) {
297 | 	a := values.nonwhitespace()
298 | 	if len(a) < 2 {
299 | 		return values, false
300 | 	}
301 | 
302 | 	// Check last two tokens for "!important".
303 | 	if tok, ok := a[len(a)-2].(*Token); !ok || tok.Tok != DelimToken || tok.Value != "!" {
304 | 		return values, false
305 | 	}
306 | 	if tok, ok := a[len(a)-1].(*Token); !ok || tok.Tok != IdentToken || strings.ToLower(tok.Value) != "important" {
307 | 		return values, false
308 | 	}
309 | 
310 | 	// Trim "!important" tokens off values.
311 | 	for i, v := range values {
312 | 		if v == a[len(a)-2] {
313 | 			values = values[:i]
314 | 			break
315 | 		}
316 | 	}
317 | 
318 | 	return values, true
319 | }
320 | 
321 | // ConsumeComponentValue consumes a single component value. (§5.4.6)
322 | func (p *Parser) ConsumeComponentValue(s ComponentValueScanner) ComponentValue {
323 | 	tok := s.Scan()
324 | 	if tok, ok := tok.(*Token); ok {
325 | 		switch tok.Tok {
326 | 		case LBraceToken, LBrackToken, LParenToken:
327 | 			return p.ConsumeSimpleBlock(s)
328 | 		case FunctionToken:
329 | 			return p.ConsumeFunction(s)
330 | 		}
331 | 	}
332 | 	return tok
333 | }
334 | 
335 | // ConsumeSimpleBlock consumes a simple block. (§5.4.7)
336 | func (p *Parser) ConsumeSimpleBlock(s ComponentValueScanner) *SimpleBlock {
337 | 	b := &SimpleBlock{}
338 | 
339 | 	// Set the block's associated token to the current token.
340 | 	// TODO(benbjohnson): Validate first token.
341 | 	b.Token = s.Current().(*Token)
342 | 
343 | 	for {
344 | 		tok := s.Scan()
345 | 
346 | 		// If this token is EOF or the mirror of the starting token then return.
347 | 		if tok, ok := tok.(*Token); ok {
348 | 			switch tok.Tok {
349 | 			case EOFToken:
350 | 				return b
351 | 			case RBrackToken:
352 | 				if b.Token.Tok == LBrackToken {
353 | 					return b
354 | 				}
355 | 			case RBraceToken:
356 | 				if b.Token.Tok == LBraceToken {
357 | 					return b
358 | 				}
359 | 			case RParenToken:
360 | 				if b.Token.Tok == LParenToken {
361 | 					return b
362 | 				}
363 | 			}
364 | 		}
365 | 
366 | 		// Otherwise consume a component value.
367 | 		s.Unscan()
368 | 		b.Values = append(b.Values, p.ConsumeComponentValue(s))
369 | 	}
370 | }
371 | 
372 | // ConsumeFunction consumes a function.
373 | func (p *Parser) ConsumeFunction(s ComponentValueScanner) *Function {
374 | 	f := &Function{}
375 | 
376 | 	// Set the name to the first token.
377 | 	// TODO(benbjohnson): Validate first token.
378 | 	f.Name = s.Current().(*Token).Value
379 | 
380 | 	for {
381 | 		tok := s.Scan()
382 | 
383 | 		// If this token is EOF or the mirror of the starting token then return.
384 | 		if tok, ok := tok.(*Token); ok && (tok.Tok == EOFToken || tok.Tok == RParenToken) {
385 | 			return f
386 | 		}
387 | 
388 | 		// Otherwise consume a component value.
389 | 		s.Unscan()
390 | 		f.Values = append(f.Values, p.ConsumeComponentValue(s))
391 | 	}
392 | }
393 | 
394 | // consumeDeclarationTokens collects contiguous non-semicolon and non-EOF tokens.
395 | func (p *Parser) consumeDeclarationValues(s ComponentValueScanner) ComponentValues {
396 | 	var a ComponentValues
397 | 	for {
398 | 		tok := s.Scan()
399 | 		if tok, ok := tok.(*Token); ok && (tok.Tok == SemicolonToken || tok.Tok == EOFToken) {
400 | 			s.Unscan()
401 | 			return a
402 | 		}
403 | 		a = append(a, tok)
404 | 	}
405 | }
406 | 
407 | // skipComponentValues consumes all component values until a semicolon or EOF.
408 | func (p *Parser) skipComponentValues(s ComponentValueScanner) {
409 | 	for {
410 | 		v := p.ConsumeComponentValue(s)
411 | 		if tok, ok := v.(*Token); ok {
412 | 			switch tok.Tok {
413 | 			case SemicolonToken, EOFToken:
414 | 				return
415 | 			}
416 | 		}
417 | 	}
418 | }
419 | 
420 | // skipWhitespace skips over all contiguous whitespace tokes.
421 | func (p *Parser) skipWhitespace(s ComponentValueScanner) {
422 | 	for {
423 | 		if tok, ok := s.Scan().(*Token); ok && tok.Tok != WhitespaceToken {
424 | 			s.Unscan()
425 | 			return
426 | 		}
427 | 	}
428 | }
429 | 
430 | // ComponentValueScanner represents a type that can retrieve the next component value.
431 | type ComponentValueScanner interface {
432 | 	Current() ComponentValue
433 | 	Scan() ComponentValue
434 | 	Unscan()
435 | }
436 | 
437 | // NewComponentValueScanner returns a scanner for a fixed list of component values.
438 | // This can be used with nodes which have blocks such as at-rules. For example,
439 | // a @media query can have a full ruleset inside its block. This block can be
440 | // further parsed using the consume functions on the Parser.
441 | func NewComponentValueScanner(values ComponentValues) ComponentValueScanner {
442 | 	return &componentValueScanner{i: -1, values: values}
443 | }
444 | 
445 | // componentValueScanner represents a scanner for a fixed list of component values.
446 | type componentValueScanner struct {
447 | 	i      int
448 | 	values ComponentValues
449 | }
450 | 
451 | // Current returns the current component value.
452 | func (s *componentValueScanner) Current() ComponentValue {
453 | 	if s.i >= len(s.values) {
454 | 		return &Token{Tok: EOFToken}
455 | 	}
456 | 	return s.values[s.i]
457 | }
458 | 
459 | // Scan returns the next component value.
460 | func (s *componentValueScanner) Scan() ComponentValue {
461 | 	if s.i < len(s.values) {
462 | 		s.i++
463 | 	}
464 | 	return s.Current()
465 | }
466 | 
467 | // Unscan moves back one component value.
468 | func (s *componentValueScanner) Unscan() {
469 | 	if s.i > -1 {
470 | 		s.i--
471 | 	}
472 | }
473 | 


--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
  1 | package css_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"strings"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/benbjohnson/css"
  9 | )
 10 | 
 11 | // Ensure that a stylesheet can be parsed into an AST.
 12 | func TestParser_ParseStyleSheet(t *testing.T) {
 13 | 	var tests = []ParserTest{
 14 | 		{in: `foo { padding: 10px; } @bar;`, out: `foo { padding: 10px; } @bar;`},
 15 | 	}
 16 | 
 17 | 	for _, tt := range tests {
 18 | 		var p css.Parser
 19 | 		v := p.ParseStyleSheet(css.NewScanner(strings.NewReader(tt.in)))
 20 | 		tt.Assert(t, v, p.Errors)
 21 | 	}
 22 | }
 23 | 
 24 | // Ensure that a list of rules can be parsed into an AST.
 25 | func TestParser_ParseRules(t *testing.T) {
 26 | 	var tests = []ParserTest{
 27 | 		{in: `foo { padding: 10px; }`, out: `foo { padding: 10px; }`},
 28 | 		{in: `@import url(/css/screen.css) screen, projection;`, out: `@import url(/css/screen.css) screen, projection;`},
 29 | 		{in: `@xxx; foo { padding: 10 0; }`, out: `@xxx; foo { padding: 10 0; }`},
 30 | 		{in: `<!-- comment --> foo { }`, out: `<!-- comment --> foo { }`},
 31 | 	}
 32 | 
 33 | 	for _, tt := range tests {
 34 | 		var p css.Parser
 35 | 		v := p.ParseRules(css.NewScanner(strings.NewReader(tt.in)))
 36 | 		tt.Assert(t, v, p.Errors)
 37 | 	}
 38 | }
 39 | 
 40 | // Ensure that a rule can be parsed into an AST.
 41 | func TestParser_ParseRule(t *testing.T) {
 42 | 	var tests = []ParserTest{
 43 | 		{in: `foo { padding: 10px; }`, out: `foo { padding: 10px; }`},
 44 | 		{in: `foo { padding: 10px; `, out: `foo { padding: 10px; }`},
 45 | 		{in: `  #foo bar, .baz bat {}  `, out: `#foo bar, .baz bat {}`},
 46 | 		{in: `@media (max-width: 600px) { .nav { display: none; }}`, out: `@media (max-width: 600px) { .nav { display: none; }}`},
 47 | 
 48 | 		{in: ``, err: `unexpected EOF`},
 49 | 		{in: `  `, err: `unexpected EOF`},
 50 | 		{in: `foo {} bar`, err: `expected EOF, got bar`},
 51 | 	}
 52 | 
 53 | 	for _, tt := range tests {
 54 | 		var p css.Parser
 55 | 		v := p.ParseRule(css.NewScanner(strings.NewReader(tt.in)))
 56 | 		tt.Assert(t, v, p.Errors)
 57 | 	}
 58 | }
 59 | 
 60 | // Ensure that a declaration can be parsed into an AST.
 61 | func TestParser_ParseDeclaration(t *testing.T) {
 62 | 	var tests = []ParserTest{
 63 | 		{in: `foo: bar`, out: `foo: bar`},
 64 | 		{in: `color: #FFFFFF !important`, out: `color: #FFFFFF !important`},
 65 | 		{in: `color: #FFFFFF ! important `, out: `color: #FFFFFF !important`},
 66 | 		{in: `color: !important `, out: `color: !important`},
 67 | 		{in: `color: $ important`, out: `color: $ important`},
 68 | 		{in: `color: ! importante`, out: `color: ! importante`},
 69 | 
 70 | 		{in: ``, err: `expected ident, got EOF`},
 71 | 		{in: ` foo bar`, err: `expected colon, got bar`},
 72 | 	}
 73 | 
 74 | 	for _, tt := range tests {
 75 | 		var p css.Parser
 76 | 		v := p.ParseDeclaration(css.NewScanner(strings.NewReader(tt.in)))
 77 | 		tt.Assert(t, v, p.Errors)
 78 | 	}
 79 | }
 80 | 
 81 | // Ensure that a list of declarations can be parsed into an AST.
 82 | func TestParser_ParseDeclarations(t *testing.T) {
 83 | 	var tests = []ParserTest{
 84 | 		{in: `foo: bar`, out: `foo: bar;`},
 85 | 		{in: `font-size: 20px; font-weight:bold`, out: `font-size: 20px; font-weight:bold;`},
 86 | 		{in: `font-weight: bold; @page { margin: 1in; };`, out: `font-weight: bold; @page { margin: 1in; };`},
 87 | 		{in: `@page { margin: 1in; }; font-weight: bold;`, out: `@page { margin: 1in; }; font-weight: bold;`},
 88 | 		{in: `100; foo: bar`, out: `foo: bar;`, err: `unexpected: 100`},
 89 | 	}
 90 | 
 91 | 	for _, tt := range tests {
 92 | 		var p css.Parser
 93 | 		v := p.ParseDeclarations(css.NewScanner(strings.NewReader(tt.in)))
 94 | 		tt.Assert(t, v, p.Errors)
 95 | 	}
 96 | }
 97 | 
 98 | // Ensure that component values can be parsed into the correct AST.
 99 | func TestParser_ParseComponentValue(t *testing.T) {
100 | 	var tests = []ParserTest{
101 | 		{in: `foo`, out: `foo`},
102 | 		{in: `  :`, out: `:`},
103 | 		{in: `  :   `, out: `:`},
104 | 		{in: `{}`, out: `{}`},
105 | 		{in: `{foo: bar}`, out: `{foo: bar}`},
106 | 		{in: `{foo: {bar}}`, out: `{foo: {bar}}`},
107 | 		{in: ` [12.34]`, out: `[12.34]`},
108 | 		{in: ` [12.34]`, out: `[12.34]`},
109 | 		{in: ` fun(12, 34, "foo")`, out: `fun(12, 34, "foo")`},
110 | 		{in: ` fun("hello"`, out: `fun("hello")`},
111 | 
112 | 		{in: ``, err: `unexpected EOF`},
113 | 		{in: ` foo bar`, err: `expected EOF, got bar`},
114 | 	}
115 | 
116 | 	for _, tt := range tests {
117 | 		var p css.Parser
118 | 		v := p.ParseComponentValue(css.NewScanner(strings.NewReader(tt.in)))
119 | 		tt.Assert(t, v, p.Errors)
120 | 	}
121 | }
122 | 
123 | // Ensure that a list of component values can be parsed into the correct AST.
124 | func TestParser_ParseComponentValues(t *testing.T) {
125 | 	var tests = []ParserTest{
126 | 		{in: `foo bar`, out: `foo bar`},
127 | 		{in: `foo func(bar) { baz }`, out: `foo func(bar) { baz }`},
128 | 	}
129 | 
130 | 	for _, tt := range tests {
131 | 		var p css.Parser
132 | 		v := p.ParseComponentValues(css.NewScanner(strings.NewReader(tt.in)))
133 | 		tt.Assert(t, v, p.Errors)
134 | 	}
135 | }
136 | 
137 | // Ensure that a ruleset can be parsed from a list of component values.
138 | func TestParser_ConsumeRules(t *testing.T) {
139 | 	var tests = []ParserTest{
140 | 		{in: `@media (max-width: 600px) { @test xxx { width: 100 } .nav { display: none; } }`, out: `@test xxx { width: 100 } .nav { display: none; }`},
141 | 	}
142 | 
143 | 	for _, tt := range tests {
144 | 		var p css.Parser
145 | 		r := p.ParseRule(css.NewScanner(strings.NewReader(tt.in)))
146 | 		s := css.NewComponentValueScanner(r.(*css.AtRule).Block.Values)
147 | 		v := p.ConsumeRules(s, false)
148 | 		tt.Assert(t, v, p.Errors)
149 | 	}
150 | }
151 | 
152 | // Ensure that consuming an empty string as a qualified rule returns an error.
153 | func TestParser_ConsumeQualifiedRule_ErrUnexpectedEOF(t *testing.T) {
154 | 	var p css.Parser
155 | 	if v := p.ConsumeQualifiedRule(css.NewComponentValueScanner(nil)); v != nil {
156 | 		t.Errorf("unexpected value: %s", print(v))
157 | 	} else if p.Errors.Error() != "unexpected EOF" {
158 | 		t.Errorf("expected error msg: %s", p.Errors.Error())
159 | 	}
160 | }
161 | 
162 | // ParserTest represents a generic framework for table tests against the parser.
163 | type ParserTest struct {
164 | 	in  string // input CSS
165 | 	out string // matches against generated CSS
166 | 	err string // stringified error, empty string if no error.
167 | }
168 | 
169 | // Assert validates the node against the output CSS and checks for errors.
170 | func (tt *ParserTest) Assert(t *testing.T, n css.Node, errors css.ErrorList) {
171 | 	var errstring string
172 | 	if len(errors) > 0 {
173 | 		errstring = errors.Error()
174 | 	}
175 | 
176 | 	if (tt.err != "" || errstring != "") && tt.err != errstring {
177 | 		t.Errorf("<%q> error: exp=%q, got=%q", tt.in, tt.err, errstring)
178 | 	} else if n == nil && tt.out != "" {
179 | 		t.Errorf("<%q> expected value", tt.in)
180 | 	} else if print(n) != tt.out {
181 | 		t.Errorf("<%q>\n\nexp: %s\n\ngot: %s", tt.in, tt.out, print(n))
182 | 	}
183 | }
184 | 
185 | // print pretty prints an AST node to a string using the default configuration.
186 | func print(n css.Node) string {
187 | 	var buf bytes.Buffer
188 | 	var p css.Printer
189 | 	_ = p.Print(&buf, n)
190 | 	return buf.String()
191 | }
192 | 


--------------------------------------------------------------------------------
/printer.go:
--------------------------------------------------------------------------------
  1 | package css
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"io"
  7 | )
  8 | 
  9 | // TODO(benbjohnson): Allow collapsing whitespace.
 10 | 
 11 | // Printer represents a configurable CSS printer.
 12 | type Printer struct{}
 13 | 
 14 | func (p *Printer) Print(w io.Writer, n Node) (err error) {
 15 | 	switch n := n.(type) {
 16 | 	case *StyleSheet:
 17 | 		if n == nil {
 18 | 			return nil
 19 | 		}
 20 | 		for i, r := range n.Rules {
 21 | 			if i > 0 {
 22 | 				_, err = w.Write([]byte{' '})
 23 | 			}
 24 | 			_ = p.Print(w, r)
 25 | 		}
 26 | 
 27 | 	case Rules:
 28 | 		if n == nil {
 29 | 			return nil
 30 | 		}
 31 | 		for i, r := range n {
 32 | 			if i > 0 {
 33 | 				_, _ = w.Write([]byte{' '})
 34 | 			}
 35 | 			err = p.Print(w, r)
 36 | 		}
 37 | 
 38 | 	case *AtRule:
 39 | 		if n == nil {
 40 | 			return nil
 41 | 		}
 42 | 		_, _ = w.Write([]byte{'@'})
 43 | 		_, _ = w.Write([]byte(n.Name))
 44 | 		if len(n.Prelude) > 0 {
 45 | 			_ = p.Print(w, n.Prelude)
 46 | 		}
 47 | 		if n.Block != nil {
 48 | 			err = p.Print(w, n.Block)
 49 | 		} else {
 50 | 			_, err = w.Write([]byte{';'})
 51 | 		}
 52 | 
 53 | 	case *QualifiedRule:
 54 | 		if n == nil {
 55 | 			return nil
 56 | 		}
 57 | 		_ = p.Print(w, n.Prelude)
 58 | 		err = p.Print(w, n.Block)
 59 | 
 60 | 	case *Declaration:
 61 | 		if n == nil {
 62 | 			return nil
 63 | 		}
 64 | 		_, _ = w.Write([]byte(n.Name))
 65 | 		_, _ = w.Write([]byte{':'})
 66 | 		err = p.Print(w, n.Values)
 67 | 		if n.Important {
 68 | 			_, err = w.Write([]byte("!important"))
 69 | 		}
 70 | 
 71 | 	case Declarations:
 72 | 		if n == nil {
 73 | 			return nil
 74 | 		}
 75 | 		for i, v := range n {
 76 | 			if i > 0 {
 77 | 				_, _ = w.Write([]byte{' '})
 78 | 			}
 79 | 			_ = p.Print(w, v)
 80 | 			_, err = w.Write([]byte{';'})
 81 | 		}
 82 | 
 83 | 	case ComponentValues:
 84 | 		if n == nil {
 85 | 			return nil
 86 | 		}
 87 | 		for _, v := range n {
 88 | 			err = p.Print(w, v)
 89 | 		}
 90 | 
 91 | 	case *SimpleBlock:
 92 | 		if n == nil {
 93 | 			return nil
 94 | 		}
 95 | 		switch n.Token.Tok {
 96 | 		case LBraceToken:
 97 | 			_, _ = w.Write([]byte{'{'})
 98 | 		case LBrackToken:
 99 | 			_, _ = w.Write([]byte{'['})
100 | 		case LParenToken:
101 | 			_, _ = w.Write([]byte{'('})
102 | 		}
103 | 
104 | 		_ = p.Print(w, n.Values)
105 | 
106 | 		switch n.Token.Tok {
107 | 		case LBraceToken:
108 | 			_, _ = w.Write([]byte{'}'})
109 | 		case LBrackToken:
110 | 			_, _ = w.Write([]byte{']'})
111 | 		case LParenToken:
112 | 			_, _ = w.Write([]byte{')'})
113 | 		}
114 | 
115 | 	case *Function:
116 | 		if n == nil {
117 | 			return nil
118 | 		}
119 | 		_, _ = w.Write([]byte(n.Name))
120 | 		_, _ = w.Write([]byte{'('})
121 | 		_ = p.Print(w, n.Values)
122 | 		_, err = w.Write([]byte{')'})
123 | 
124 | 	case *Token:
125 | 		if n == nil {
126 | 			return nil
127 | 		}
128 | 		switch n.Tok {
129 | 		case IdentToken:
130 | 			_, err = w.Write([]byte(n.Value))
131 | 		case FunctionToken:
132 | 			_, err = w.Write([]byte(n.Value + "("))
133 | 		case AtKeywordToken:
134 | 			_, err = w.Write([]byte("@" + n.Value))
135 | 		case HashToken:
136 | 			_, err = w.Write([]byte("#" + n.Value))
137 | 		case StringToken:
138 | 			_, err = w.Write([]byte(string(n.Ending) + n.Value + string(n.Ending)))
139 | 		case BadStringToken:
140 | 			_, err = w.Write([]byte("''"))
141 | 		case URLToken:
142 | 			_, err = w.Write([]byte("url(" + n.Value + ")"))
143 | 		case BadURLToken:
144 | 			_, err = w.Write([]byte("url()"))
145 | 		case DelimToken, NumberToken, PercentageToken, DimensionToken, WhitespaceToken:
146 | 			_, err = w.Write([]byte(n.Value))
147 | 		case UnicodeRangeToken:
148 | 			if n.Start == n.End {
149 | 				_, err = fmt.Fprintf(w, "U+%06x", n.Start)
150 | 			} else {
151 | 				_, err = fmt.Fprintf(w, "U+%06x-U+%06x", n.Start, n.End)
152 | 			}
153 | 		case IncludeMatchToken:
154 | 			_, err = w.Write([]byte("~="))
155 | 		case DashMatchToken:
156 | 			_, err = w.Write([]byte("|="))
157 | 		case PrefixMatchToken:
158 | 			_, err = w.Write([]byte("^="))
159 | 		case SuffixMatchToken:
160 | 			_, err = w.Write([]byte("$="))
161 | 		case SubstringMatchToken:
162 | 			_, err = w.Write([]byte("*="))
163 | 		case ColumnToken:
164 | 			_, err = w.Write([]byte("||"))
165 | 		case CDOToken:
166 | 			_, err = w.Write([]byte("<!--"))
167 | 		case CDCToken:
168 | 			_, err = w.Write([]byte("-->"))
169 | 		case ColonToken:
170 | 			_, err = w.Write([]byte{':'})
171 | 		case SemicolonToken:
172 | 			_, err = w.Write([]byte{';'})
173 | 		case CommaToken:
174 | 			_, err = w.Write([]byte{','})
175 | 		case LBrackToken:
176 | 			_, err = w.Write([]byte{'['})
177 | 		case RBrackToken:
178 | 			_, err = w.Write([]byte{']'})
179 | 		case LParenToken:
180 | 			_, err = w.Write([]byte{'('})
181 | 		case RParenToken:
182 | 			_, err = w.Write([]byte{')'})
183 | 		case LBraceToken:
184 | 			_, err = w.Write([]byte{'{'})
185 | 		case RBraceToken:
186 | 			_, err = w.Write([]byte{'}'})
187 | 		case EOFToken:
188 | 			_, err = w.Write([]byte("EOF"))
189 | 		}
190 | 	}
191 | 
192 | 	return
193 | }
194 | 
195 | // print pretty prints an AST node to a string using the default configuration.
196 | func print(n Node) string {
197 | 	var p Printer
198 | 	var buf bytes.Buffer
199 | 	_ = p.Print(&buf, n)
200 | 	return buf.String()
201 | }
202 | 


--------------------------------------------------------------------------------
/printer_test.go:
--------------------------------------------------------------------------------
  1 | package css_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/benbjohnson/css"
  8 | )
  9 | 
 10 | // Ensure than the printer prints nodes correctly.
 11 | func TestPrinter_Print(t *testing.T) {
 12 | 	var tests = []struct {
 13 | 		in css.Node
 14 | 		s  string
 15 | 	}{
 16 | 		// 0. Full stylesheet with multiple rules.
 17 | 		{in: &css.StyleSheet{
 18 | 			Rules: []css.Rule{
 19 | 				&css.QualifiedRule{
 20 | 					Prelude: []css.ComponentValue{
 21 | 						&css.Token{Tok: css.IdentToken, Value: "foo"},
 22 | 						&css.Token{Tok: css.WhitespaceToken, Value: " "},
 23 | 						&css.Token{Tok: css.IdentToken, Value: "bar"},
 24 | 					},
 25 | 					Block: &css.SimpleBlock{
 26 | 						Token: &css.Token{Tok: css.LBraceToken},
 27 | 						Values: []css.ComponentValue{
 28 | 							&css.Token{Tok: css.IdentToken, Value: "font-size"},
 29 | 							&css.Token{Tok: css.ColonToken},
 30 | 							&css.Token{Tok: css.IdentToken, Value: "10px"},
 31 | 						},
 32 | 					},
 33 | 				},
 34 | 				&css.AtRule{
 35 | 					Name: "baz",
 36 | 					Prelude: []css.ComponentValue{
 37 | 						&css.Token{Tok: css.WhitespaceToken, Value: " "},
 38 | 						&css.Token{Tok: css.IdentToken, Value: "my-rule"},
 39 | 					},
 40 | 				},
 41 | 			},
 42 | 		}, s: `foo bar{font-size:10px} @baz my-rule;`},
 43 | 
 44 | 		// Test that nil values are safe to print.
 45 | 		{in: (*css.StyleSheet)(nil), s: ``},     // 1
 46 | 		{in: (css.Rules)(nil), s: ``},           // 2
 47 | 		{in: (*css.AtRule)(nil), s: ``},         // 3
 48 | 		{in: (*css.QualifiedRule)(nil), s: ``},  // 4
 49 | 		{in: (css.Declarations)(nil), s: ``},    // 5
 50 | 		{in: (*css.Declaration)(nil), s: ``},    // 6
 51 | 		{in: (css.ComponentValues)(nil), s: ``}, // 7
 52 | 		{in: (*css.SimpleBlock)(nil), s: ``},    // 8
 53 | 		{in: (*css.Function)(nil), s: ``},       // 9
 54 | 		{in: (*css.Token)(nil), s: ``},          // 10
 55 | 
 56 | 		// Test individual tokens.
 57 | 		{in: &css.Token{Tok: css.IdentToken, Value: "foo"}, s: `foo`},                  // 11
 58 | 		{in: &css.Token{Tok: css.FunctionToken, Value: "foo"}, s: `foo(`},              // 11
 59 | 		{in: &css.Token{Tok: css.AtKeywordToken, Value: "☃"}, s: `@☃`},                 // 11
 60 | 		{in: &css.Token{Tok: css.HashToken, Value: "foo"}, s: `#foo`},                  // 11
 61 | 		{in: &css.Token{Tok: css.StringToken, Value: "foo", Ending: '"'}, s: `"foo"`},  // 11
 62 | 		{in: &css.Token{Tok: css.StringToken, Value: "foo", Ending: '\''}, s: `'foo'`}, // 11
 63 | 		{in: &css.Token{Tok: css.BadStringToken}, s: `''`},                             // 11
 64 | 		{in: &css.Token{Tok: css.URLToken, Value: "foo"}, s: `url(foo)`},               // 11
 65 | 		{in: &css.Token{Tok: css.BadURLToken, Value: "foo"}, s: `url()`},               // 11
 66 | 		{in: &css.Token{Tok: css.DelimToken, Value: "."}, s: `.`},                      // 11
 67 | 		{in: &css.Token{Tok: css.NumberToken, Value: "-20.3E2"}, s: `-20.3E2`},         // 11
 68 | 		{in: &css.Token{Tok: css.PercentageToken, Value: "100%"}, s: `100%`},           // 11
 69 | 		{in: &css.Token{Tok: css.DimensionToken, Value: "10cm"}, s: `10cm`},            // 11
 70 | 		{in: &css.Token{Tok: css.WhitespaceToken, Value: "  "}, s: `  `},               // 11
 71 | 		{in: &css.Token{Tok: css.DelimToken, Value: "."}, s: `.`},                      // 11
 72 | 		{in: &css.Token{Tok: css.IncludeMatchToken}, s: `~=`},                          // 11
 73 | 		{in: &css.Token{Tok: css.DashMatchToken}, s: `|=`},                             // 11
 74 | 		{in: &css.Token{Tok: css.PrefixMatchToken}, s: `^=`},                           // 11
 75 | 		{in: &css.Token{Tok: css.SuffixMatchToken}, s: `$=`},                           // 11
 76 | 		{in: &css.Token{Tok: css.SubstringMatchToken}, s: `*=`},                        // 11
 77 | 		{in: &css.Token{Tok: css.ColumnToken}, s: `||`},                                // 11
 78 | 		{in: &css.Token{Tok: css.CDOToken}, s: `<!--`},                                 // 11
 79 | 		{in: &css.Token{Tok: css.CDCToken}, s: `-->`},                                  // 11
 80 | 		{in: &css.Token{Tok: css.ColonToken}, s: `:`},                                  // 11
 81 | 		{in: &css.Token{Tok: css.SemicolonToken}, s: `;`},                              // 11
 82 | 		{in: &css.Token{Tok: css.CommaToken}, s: `,`},                                  // 11
 83 | 		{in: &css.Token{Tok: css.LBrackToken}, s: `[`},                                 // 11
 84 | 		{in: &css.Token{Tok: css.RBrackToken}, s: `]`},                                 // 11
 85 | 		{in: &css.Token{Tok: css.LParenToken}, s: `(`},                                 // 11
 86 | 		{in: &css.Token{Tok: css.RParenToken}, s: `)`},                                 // 11
 87 | 		{in: &css.Token{Tok: css.LBraceToken}, s: `{`},                                 // 11
 88 | 		{in: &css.Token{Tok: css.RBraceToken}, s: `}`},                                 // 11
 89 | 
 90 | 		{in: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 10}, s: `U+00000a`},          // 11
 91 | 		{in: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 20}, s: `U+00000a-U+000014`}, // 11
 92 | 
 93 | 		{in: &css.Token{Tok: css.EOFToken}, s: `EOF`}, // 11
 94 | 	}
 95 | 
 96 | 	for i, tt := range tests {
 97 | 		var buf bytes.Buffer
 98 | 		var p css.Printer
 99 | 		err := p.Print(&buf, tt.in)
100 | 
101 | 		if err != nil {
102 | 			t.Errorf("%d. unexpected error: %s", i, tt.s)
103 | 		} else if tt.s != buf.String() {
104 | 			t.Errorf("%d. \n\nexp: %s\n\ngot: %s\n\n", i, tt.s, buf.String())
105 | 		}
106 | 	}
107 | }
108 | 
109 | // TODO(benbjohnson): Example: Printer.Print()
110 | 


--------------------------------------------------------------------------------
/scanner.go:
--------------------------------------------------------------------------------
  1 | package css
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"strconv"
  9 | 	"strings"
 10 | )
 11 | 
 12 | // eof represents an EOF file byte.
 13 | var eof rune = -1
 14 | 
 15 | // Scanner implements a CSS3 standard compliant tokenizer.
 16 | //
 17 | // This implementation only allows UTF-8 encoding.
 18 | // @charset directives will be ignored.
 19 | type Scanner struct {
 20 | 	// Errors contains a list of all errors that occur during scanning.
 21 | 	Errors []*Error
 22 | 
 23 | 	rd io.RuneReader
 24 | 
 25 | 	tokbuf  *Token // last token read from the scanner.
 26 | 	tokbufn bool   // whether the token buffer is in use.
 27 | 
 28 | 	buf    [4]rune // circular buffer for runes
 29 | 	bufpos [4]Pos  // circular buffer for position
 30 | 	bufi   int     // circular buffer index
 31 | 	bufn   int     // number of buffered characters
 32 | }
 33 | 
 34 | // New returns a new instance of Scanner.
 35 | func NewScanner(r io.Reader) *Scanner {
 36 | 	return &Scanner{rd: bufio.NewReader(r)}
 37 | }
 38 | 
 39 | // Scan returns the next token from the reader.
 40 | func (s *Scanner) Scan() *Token {
 41 | 	// If unscan was the last call then return the previous token again.
 42 | 	if s.tokbufn {
 43 | 		s.tokbufn = false
 44 | 		return s.tokbuf
 45 | 	}
 46 | 
 47 | 	// Otherwise read from the reader and save the token.
 48 | 	tok := s.scan()
 49 | 	s.tokbuf = tok
 50 | 	return tok
 51 | }
 52 | 
 53 | func (s *Scanner) scan() *Token {
 54 | 	for {
 55 | 		// Read next code point.
 56 | 		ch := s.read()
 57 | 		pos := s.pos()
 58 | 
 59 | 		// If whitespace code point found, then consume all contiguous whitespace.
 60 | 		if isWhitespace(ch) {
 61 | 			return s.scanWhitespace()
 62 | 		}
 63 | 
 64 | 		// If a digit is found, consume a number.
 65 | 		if isDigit(ch) {
 66 | 			s.unread(1)
 67 | 			return s.scanNumeric(pos)
 68 | 		}
 69 | 
 70 | 		// If a u or U is found, attempt to scan a unicode range.
 71 | 		// If it's the beginning of a name then consume an identifier.
 72 | 		if ch == 'u' || ch == 'U' {
 73 | 			// Peek "+[0-9a-f]" or "+?", consume next code point, consume unicode-range.
 74 | 			ch1, ch2 := s.read(), s.read()
 75 | 			if ch1 == '+' && (isHexDigit(ch2) || ch2 == '?') {
 76 | 				s.unread(1)
 77 | 				return s.scanUnicodeRange()
 78 | 			}
 79 | 			// Otherwise reconsume as ident.
 80 | 			s.unread(2)
 81 | 			return s.scanIdent()
 82 | 		} else if isNameStart(ch) {
 83 | 			return s.scanIdent()
 84 | 		}
 85 | 
 86 | 		// Check against individual code points next.
 87 | 		switch ch {
 88 | 		case eof:
 89 | 			return &Token{Tok: EOFToken, Pos: pos}
 90 | 		case '"', '\'':
 91 | 			return s.scanString()
 92 | 		case '#':
 93 | 			return s.scanHash()
 94 | 
 95 | 		case '$':
 96 | 			if next := s.read(); next == '=' {
 97 | 				return &Token{Tok: SuffixMatchToken, Pos: pos}
 98 | 			}
 99 | 			s.unread(1)
100 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
101 | 
102 | 		case '*':
103 | 			if next := s.read(); next == '=' {
104 | 				return &Token{Tok: SubstringMatchToken, Pos: pos}
105 | 			}
106 | 			s.unread(1)
107 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
108 | 
109 | 		case '^':
110 | 			if next := s.read(); next == '=' {
111 | 				return &Token{Tok: PrefixMatchToken, Pos: pos}
112 | 			}
113 | 			s.unread(1)
114 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
115 | 
116 | 		case '~':
117 | 			if next := s.read(); next == '=' {
118 | 				return &Token{Tok: IncludeMatchToken, Pos: pos}
119 | 			}
120 | 			s.unread(1)
121 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
122 | 
123 | 		case ',':
124 | 			return &Token{Tok: CommaToken, Pos: pos}
125 | 
126 | 		case '-':
127 | 			// Check for a number or identifier.
128 | 			if s.peekNumber() {
129 | 				s.unread(1)
130 | 				return s.scanNumeric(pos)
131 | 			} else if s.peekIdent() {
132 | 				return s.scanIdent()
133 | 			}
134 | 
135 | 			// Scan next two code points to see if we have a CDC (-->).
136 | 			ch1, ch2 := s.read(), s.read()
137 | 			if ch1 == '-' && ch2 == '>' {
138 | 				return &Token{Tok: CDCToken, Pos: pos}
139 | 			}
140 | 			s.unread(2)
141 | 
142 | 			// Otherwise return the hyphen by itself.
143 | 			return &Token{Tok: DelimToken, Value: "-", Pos: pos}
144 | 
145 | 		case '/':
146 | 			// Comments are ignored by the scanner so restart the loop from
147 | 			// the end of the comment and get the next token.
148 | 			if ch1 := s.read(); ch1 == '*' {
149 | 				s.scanComment()
150 | 				continue
151 | 			}
152 | 			s.unread(1)
153 | 			return &Token{Tok: DelimToken, Value: "/", Pos: pos}
154 | 
155 | 		case ':':
156 | 			return &Token{Tok: ColonToken, Pos: pos}
157 | 		case ';':
158 | 			return &Token{Tok: SemicolonToken, Pos: pos}
159 | 
160 | 		case '<':
161 | 			// Attempt to read a comment open ("<!--").
162 | 			// If it's not possible then then rollback and return DELIM.
163 | 			if ch0 := s.read(); ch0 == '!' {
164 | 				if ch1 := s.read(); ch1 == '-' {
165 | 					if ch2 := s.read(); ch2 == '-' {
166 | 						return &Token{Tok: CDOToken, Pos: pos}
167 | 					}
168 | 					s.unread(1)
169 | 				}
170 | 				s.unread(1)
171 | 			}
172 | 			s.unread(1)
173 | 			return &Token{Tok: DelimToken, Value: "<", Pos: pos}
174 | 
175 | 		case '@':
176 | 			// This is an at-keyword token if an identifier follows.
177 | 			// Otherwise it's just a DELIM.
178 | 			if s.read(); s.peekIdent() {
179 | 				return &Token{Tok: AtKeywordToken, Value: s.scanName(), Pos: pos}
180 | 			}
181 | 			return &Token{Tok: DelimToken, Value: "@", Pos: pos}
182 | 
183 | 		case '(':
184 | 			return &Token{Tok: LParenToken, Pos: pos}
185 | 		case ')':
186 | 			return &Token{Tok: RParenToken, Pos: pos}
187 | 		case '[':
188 | 			return &Token{Tok: LBrackToken, Pos: pos}
189 | 		case ']':
190 | 			return &Token{Tok: RBrackToken, Pos: pos}
191 | 		case '{':
192 | 			return &Token{Tok: LBraceToken, Pos: pos}
193 | 		case '}':
194 | 			return &Token{Tok: RBraceToken, Pos: pos}
195 | 
196 | 		case '\\':
197 | 			// Return a valid escape, if possible.
198 | 			if s.peekEscape() {
199 | 				return s.scanIdent()
200 | 			}
201 | 			// Otherwise this is a parse error but continue on as a DELIM.
202 | 			s.Errors = append(s.Errors, &Error{Message: "unescaped \\", Pos: s.pos()})
203 | 			return &Token{Tok: DelimToken, Value: "\\", Pos: pos}
204 | 
205 | 		case '+', '.':
206 | 			if s.peekNumber() {
207 | 				s.unread(1)
208 | 				return s.scanNumeric(pos)
209 | 			}
210 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
211 | 
212 | 		case '|':
213 | 			// If the next token is an equals sign, it's a dash token.
214 | 			// If the next token is a pipe, it's a column token.
215 | 			// Otherwise, just treat this pipe as a delim token.
216 | 			if ch1 := s.read(); ch1 == '=' {
217 | 				return &Token{Tok: DashMatchToken, Pos: pos}
218 | 			} else if ch1 == '|' {
219 | 				return &Token{Tok: ColumnToken, Pos: pos}
220 | 			}
221 | 			s.unread(1)
222 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
223 | 
224 | 		default:
225 | 			return &Token{Tok: DelimToken, Value: string(ch), Pos: pos}
226 | 		}
227 | 	}
228 | }
229 | 
230 | // unscan buffers the previous scan.
231 | func (s *Scanner) unscan() {
232 | 	s.tokbufn = true
233 | }
234 | 
235 | // Current returns the current token.
236 | func (s *Scanner) current() *Token {
237 | 	return s.tokbuf
238 | }
239 | 
240 | // scanWhitespace consumes the current code point and all subsequent whitespace.
241 | func (s *Scanner) scanWhitespace() *Token {
242 | 	pos := s.pos()
243 | 	var buf bytes.Buffer
244 | 	_, _ = buf.WriteRune(s.curr())
245 | 	for {
246 | 		ch := s.read()
247 | 		if ch == eof {
248 | 			break
249 | 		} else if !isWhitespace(ch) {
250 | 			s.unread(1)
251 | 			break
252 | 		}
253 | 		_, _ = buf.WriteRune(ch)
254 | 	}
255 | 	return &Token{Tok: WhitespaceToken, Value: buf.String(), Pos: pos}
256 | }
257 | 
258 | // scanString consumes a quoted string. (§4.3.4)
259 | //
260 | // This assumes that the current token is a single or double quote.
261 | // This function consumes all code points and escaped code points up until
262 | // a matching, unescaped ending quote.
263 | // An EOF closes out a string but does not return an error.
264 | // A newline will close a string and returns a bad-string token.
265 | func (s *Scanner) scanString() *Token {
266 | 	pos, ending := s.pos(), s.curr()
267 | 	var buf bytes.Buffer
268 | 	for {
269 | 		ch := s.read()
270 | 		if ch == eof || ch == ending {
271 | 			return &Token{Tok: StringToken, Value: buf.String(), Ending: ending, Pos: pos}
272 | 		} else if ch == '\n' {
273 | 			s.unread(1)
274 | 			return &Token{Tok: BadStringToken, Pos: pos}
275 | 		} else if ch == '\\' {
276 | 			// If the next code point is EOF then do nothing.
277 | 			// If it is a newline then consume it.
278 | 			if next := s.read(); next == eof {
279 | 				continue
280 | 			} else if next == '\n' {
281 | 				_, _ = buf.WriteRune(next)
282 | 				continue
283 | 			}
284 | 			s.unread(1)
285 | 
286 | 			// If it is an escape then consume the escaped code point.
287 | 			if s.peekEscape() {
288 | 				_, _ = buf.WriteRune(s.scanEscape())
289 | 				continue
290 | 			}
291 | 		}
292 | 
293 | 		// Append anything else to the buffer.
294 | 		_, _ = buf.WriteRune(ch)
295 | 	}
296 | }
297 | 
298 | // scanNumeric consumes a numeric token.
299 | //
300 | // This assumes that the current token is a +, -, . or digit.
301 | func (s *Scanner) scanNumeric(pos Pos) *Token {
302 | 	num, typ, repr := s.scanNumber()
303 | 
304 | 	// If the number is immediately followed by an identifier then scan dimension.
305 | 	if s.read(); s.peekIdent() {
306 | 		unit := s.scanName()
307 | 		return &Token{Tok: DimensionToken, Type: typ, Value: repr + unit, Number: num, Unit: unit, Pos: pos}
308 | 	} else {
309 | 		s.unread(1)
310 | 	}
311 | 
312 | 	// If the number is followed by a percent sign then return a percentage.
313 | 	if ch := s.read(); ch == '%' {
314 | 		return &Token{Tok: PercentageToken, Type: typ, Value: repr + "%", Number: num, Pos: pos}
315 | 	} else {
316 | 		s.unread(1)
317 | 	}
318 | 
319 | 	// Otherwise return a number token.
320 | 	return &Token{Tok: NumberToken, Type: typ, Value: repr, Number: num, Pos: pos}
321 | }
322 | 
323 | // scanNumber consumes a number.
324 | func (s *Scanner) scanNumber() (num float64, typ, repr string) {
325 | 	var buf bytes.Buffer
326 | 	typ = "integer"
327 | 
328 | 	// If initial code point is + or - then store it.
329 | 	if ch := s.read(); ch == '+' || ch == '-' {
330 | 		_, _ = buf.WriteRune(ch)
331 | 	} else {
332 | 		s.unread(1)
333 | 	}
334 | 
335 | 	// Read as many digits as possible.
336 | 	_, _ = buf.WriteString(s.scanDigits())
337 | 
338 | 	// If next code points are a full stop and digit then consume them.
339 | 	if ch0 := s.read(); ch0 == '.' {
340 | 		if ch1 := s.read(); isDigit(ch1) {
341 | 			typ = "number"
342 | 			_, _ = buf.WriteRune(ch0)
343 | 			_, _ = buf.WriteRune(ch1)
344 | 			_, _ = buf.WriteString(s.scanDigits())
345 | 		} else {
346 | 			s.unread(2)
347 | 		}
348 | 	} else {
349 | 		s.unread(1)
350 | 	}
351 | 
352 | 	// Consume scientific notation (e0, e+0, e-0, E0, E+0, E-0).
353 | 	if ch0 := s.read(); ch0 == 'e' || ch0 == 'E' {
354 | 		if ch1 := s.read(); ch1 == '+' || ch1 == '-' {
355 | 			if ch2 := s.read(); isDigit(ch2) {
356 | 				typ = "number"
357 | 				_, _ = buf.WriteRune(ch0)
358 | 				_, _ = buf.WriteRune(ch1)
359 | 				_, _ = buf.WriteRune(ch2)
360 | 			} else {
361 | 				s.unread(3)
362 | 			}
363 | 		} else if isDigit(ch1) {
364 | 			typ = "number"
365 | 			_, _ = buf.WriteRune(ch0)
366 | 			_, _ = buf.WriteRune(ch1)
367 | 		} else {
368 | 			s.unread(2)
369 | 		}
370 | 	} else {
371 | 		s.unread(1)
372 | 	}
373 | 
374 | 	// Parse number.
375 | 	num, _ = strconv.ParseFloat(buf.String(), 64)
376 | 	repr = buf.String()
377 | 	return
378 | }
379 | 
380 | // scanDigits consume a contiguous series of digits.
381 | func (s *Scanner) scanDigits() string {
382 | 	var buf bytes.Buffer
383 | 	for {
384 | 		if ch := s.read(); isDigit(ch) {
385 | 			_, _ = buf.WriteRune(ch)
386 | 		} else {
387 | 			s.unread(1)
388 | 			break
389 | 		}
390 | 	}
391 | 	return buf.String()
392 | }
393 | 
394 | // scanComment consumes all characters up to "*/", inclusive.
395 | // This function assumes that the initial "/*" have just been consumed.
396 | func (s *Scanner) scanComment() {
397 | 	for {
398 | 		ch0 := s.read()
399 | 		if ch0 == eof {
400 | 			break
401 | 		} else if ch0 == '*' {
402 | 			if ch1 := s.read(); ch1 == '/' {
403 | 				break
404 | 			} else {
405 | 				s.unread(1)
406 | 			}
407 | 		}
408 | 	}
409 | }
410 | 
411 | // scanHash consumes a hash token.
412 | //
413 | // This assumes the current token is a '#' code point.
414 | // It will return a hash token if the next code points are a name or valid escape.
415 | // It will return a delim token otherwise.
416 | // Hash tokens' type flag is set to "id" if its value is an identifier.
417 | func (s *Scanner) scanHash() *Token {
418 | 	pos := s.pos()
419 | 
420 | 	// If there is a name following the hash then we have a hash token.
421 | 	if ch := s.read(); isName(ch) || s.peekEscape() {
422 | 		typ := "unrestricted"
423 | 
424 | 		// If the name is an identifier then change the type.
425 | 		if s.peekIdent() {
426 | 			typ = "id"
427 | 		}
428 | 		return &Token{Tok: HashToken, Value: s.scanName(), Type: typ, Pos: pos}
429 | 	}
430 | 	s.unread(1)
431 | 
432 | 	// If there is no name following the hash symbol then return delim-token.
433 | 	return &Token{Tok: DelimToken, Value: "#", Pos: pos}
434 | }
435 | 
436 | // scanName consumes a name.
437 | // Consumes contiguous name code points and escaped code points.
438 | func (s *Scanner) scanName() string {
439 | 	var buf bytes.Buffer
440 | 	s.unread(1)
441 | 	for {
442 | 		if ch := s.read(); isName(ch) {
443 | 			_, _ = buf.WriteRune(ch)
444 | 		} else if s.peekEscape() {
445 | 			_, _ = buf.WriteRune(s.scanEscape())
446 | 		} else {
447 | 			s.unread(1)
448 | 			return buf.String()
449 | 		}
450 | 	}
451 | }
452 | 
453 | // scanIdent consumes a ident-like token.
454 | // This function can return an ident, function, url, or bad-url.
455 | func (s *Scanner) scanIdent() *Token {
456 | 	pos := s.pos()
457 | 	v := s.scanName()
458 | 
459 | 	// Check if this is the start of a url token.
460 | 	if strings.ToLower(v) == "url" {
461 | 		if ch := s.read(); ch == '(' {
462 | 			return s.scanURL(pos)
463 | 		}
464 | 		s.unread(1)
465 | 	} else if ch := s.read(); ch == '(' {
466 | 		return &Token{Tok: FunctionToken, Value: v, Pos: pos}
467 | 	}
468 | 	s.unread(1)
469 | 
470 | 	return &Token{Tok: IdentToken, Value: v, Pos: pos}
471 | }
472 | 
473 | // scanURL consumes the contents of a URL function.
474 | // This function assumes that the "url(" has just been consumed.
475 | // This function can return a url or bad-url token.
476 | func (s *Scanner) scanURL(pos Pos) *Token {
477 | 	// Consume all whitespace after the "(".
478 | 	if ch := s.read(); isWhitespace(ch) {
479 | 		s.scanWhitespace()
480 | 	} else {
481 | 		s.unread(1)
482 | 	}
483 | 
484 | 	// Read the first non-whitespace character.
485 | 	// If it starts with a single or double quote then consume a string and
486 | 	// use the string's value as the URL.
487 | 	if ch := s.read(); ch == eof {
488 | 		return &Token{Tok: URLToken, Pos: pos}
489 | 	} else if ch == '"' || ch == '\'' {
490 | 		// Scan the string as the value.
491 | 		tok := s.scanString()
492 | 
493 | 		// Scanning a bad-string causes a bad-url token.
494 | 		var value string
495 | 		if tok.Tok == StringToken {
496 | 			value = tok.Value
497 | 		} else if tok.Tok == BadStringToken {
498 | 			s.scanBadURL()
499 | 			return &Token{Tok: BadURLToken, Pos: pos}
500 | 		}
501 | 
502 | 		// Scan whitespace after the string.
503 | 		if ch := s.read(); isWhitespace(ch) {
504 | 			s.scanWhitespace()
505 | 		}
506 | 		s.unread(1)
507 | 
508 | 		// Scan right parenthesis.
509 | 		if ch := s.read(); ch != ')' && ch != eof {
510 | 			s.scanBadURL()
511 | 			return &Token{Tok: BadURLToken, Pos: pos}
512 | 		}
513 | 		return &Token{Tok: URLToken, Value: value, Pos: pos}
514 | 	}
515 | 	s.unread(1)
516 | 
517 | 	// If we have a non-quote character then scan all non-whitespace, non-quote
518 | 	// and non-lparen code points to form the URL value.
519 | 	var buf bytes.Buffer
520 | 	for {
521 | 		ch := s.read()
522 | 		if ch == ')' || ch == eof {
523 | 			return &Token{Tok: URLToken, Value: buf.String(), Pos: pos}
524 | 		} else if isWhitespace(ch) {
525 | 			s.scanWhitespace()
526 | 			if ch0 := s.read(); ch0 == ')' || ch0 == eof {
527 | 				return &Token{Tok: URLToken, Value: buf.String(), Pos: pos}
528 | 			} else {
529 | 				s.scanBadURL()
530 | 				return &Token{Tok: BadURLToken, Pos: pos}
531 | 			}
532 | 		} else if ch == '"' || ch == '\'' || ch == '(' || isNonPrintable(ch) {
533 | 			s.Errors = append(s.Errors, &Error{Message: fmt.Sprintf("invalid url code point: %c (%U)", ch, ch), Pos: pos})
534 | 			s.scanBadURL()
535 | 			return &Token{Tok: BadURLToken, Pos: pos}
536 | 		} else if ch == '\\' {
537 | 			if s.peekEscape() {
538 | 				_, _ = buf.WriteRune(s.scanEscape())
539 | 			} else {
540 | 				s.Errors = append(s.Errors, &Error{Message: "unescaped \\ in url", Pos: s.pos()})
541 | 				s.scanBadURL()
542 | 				return &Token{Tok: BadURLToken, Pos: pos}
543 | 			}
544 | 		} else {
545 | 			_, _ = buf.WriteRune(ch)
546 | 		}
547 | 	}
548 | }
549 | 
550 | // scanBadURL recovers the scanner from a malformed URL token.
551 | // We simply consume all non-) and non-eof characters and escaped code points.
552 | // This function does not return anything.
553 | func (s *Scanner) scanBadURL() {
554 | 	for {
555 | 		ch := s.read()
556 | 		if ch == ')' || ch == eof {
557 | 			return
558 | 		} else if s.peekEscape() {
559 | 			s.scanEscape()
560 | 		}
561 | 	}
562 | }
563 | 
564 | // scanUnicodeRange consumes a unicode-range token.
565 | func (s *Scanner) scanUnicodeRange() *Token {
566 | 	var buf bytes.Buffer
567 | 
568 | 	// Move the position back one since the "U" is already consumed.
569 | 	pos := s.pos()
570 | 	pos.Char--
571 | 
572 | 	// Consume up to 6 hex digits first.
573 | 	for i := 0; i < 6; i++ {
574 | 		if ch := s.read(); isHexDigit(ch) {
575 | 			_, _ = buf.WriteRune(ch)
576 | 		} else {
577 | 			s.unread(1)
578 | 			break
579 | 		}
580 | 	}
581 | 
582 | 	// Consume question marks to total 6 characters (hex digits + question marks).
583 | 	n := buf.Len()
584 | 	for i := 0; i < 6-n; i++ {
585 | 		if ch := s.read(); ch == '?' {
586 | 			_, _ = buf.WriteRune(ch)
587 | 		} else {
588 | 			s.unread(1)
589 | 			break
590 | 		}
591 | 	}
592 | 
593 | 	// If we have any question marks then calculate the range.
594 | 	// To calculate the range, we replace "?" with "0" for the start and
595 | 	// we replace "?" with "F" for the end.
596 | 	if buf.Len() > n {
597 | 		start64, _ := strconv.ParseInt(strings.Replace(buf.String(), "?", "0", -1), 16, 0)
598 | 		end64, _ := strconv.ParseInt(strings.Replace(buf.String(), "?", "F", -1), 16, 0)
599 | 		return &Token{Tok: UnicodeRangeToken, Start: int(start64), End: int(end64), Pos: pos}
600 | 	}
601 | 
602 | 	// Otherwise calculate this token is the start of the range.
603 | 	start64, _ := strconv.ParseInt(buf.String(), 16, 0)
604 | 
605 | 	// If the next two code points are a "-" and a hex digit then consume the end.
606 | 	ch1, ch2 := s.read(), s.read()
607 | 	if ch1 == '-' && isHexDigit(ch2) {
608 | 		s.unread(1)
609 | 
610 | 		// Consume up to 6 hex digits for the ending range.
611 | 		buf.Reset()
612 | 		for i := 0; i < 6; i++ {
613 | 			if ch := s.read(); isHexDigit(ch) {
614 | 				_, _ = buf.WriteRune(ch)
615 | 			} else {
616 | 				s.unread(1)
617 | 				break
618 | 			}
619 | 		}
620 | 		end64, _ := strconv.ParseInt(buf.String(), 16, 0)
621 | 		return &Token{Tok: UnicodeRangeToken, Start: int(start64), End: int(end64), Pos: pos}
622 | 	}
623 | 	s.unread(2)
624 | 
625 | 	// Otherwise set the end value to the start value.
626 | 	return &Token{Tok: UnicodeRangeToken, Start: int(start64), End: int(start64), Pos: pos}
627 | }
628 | 
629 | // scanEscape consumes an escaped code point.
630 | func (s *Scanner) scanEscape() rune {
631 | 	var buf bytes.Buffer
632 | 	ch := s.read()
633 | 	if isHexDigit(ch) {
634 | 		_, _ = buf.WriteRune(ch)
635 | 		for i := 0; i < 5; i++ {
636 | 			if next := s.read(); next == eof || isWhitespace(next) {
637 | 				break
638 | 			} else if !isHexDigit(next) {
639 | 				s.unread(1)
640 | 				break
641 | 			} else {
642 | 				_, _ = buf.WriteRune(next)
643 | 			}
644 | 		}
645 | 		v, _ := strconv.ParseInt(buf.String(), 16, 0)
646 | 		return rune(v)
647 | 	} else if ch == eof {
648 | 		return '\uFFFD'
649 | 	} else {
650 | 		return ch
651 | 	}
652 | }
653 | 
654 | // peekEscape checks if the next code points are a valid escape.
655 | func (s *Scanner) peekEscape() bool {
656 | 	// If the current code point is not a backslash then this is not an escape.
657 | 	if s.curr() != '\\' {
658 | 		return false
659 | 	}
660 | 
661 | 	// If the next code point is a newline then this is not an escape.
662 | 	next := s.read()
663 | 	s.unread(1)
664 | 	return next != '\n'
665 | }
666 | 
667 | // peekIdent checks if the next code points are a valid identifier.
668 | func (s *Scanner) peekIdent() bool {
669 | 	if s.curr() == '-' {
670 | 		ch := s.read()
671 | 		s.unread(1)
672 | 		return isNameStart(ch) || s.peekEscape()
673 | 	} else if isNameStart(s.curr()) {
674 | 		return true
675 | 	} else if s.curr() == '\\' && s.peekEscape() {
676 | 		return true
677 | 	}
678 | 	return false
679 | }
680 | 
681 | // peekNumber checks if the next code points are a valid number.
682 | func (s *Scanner) peekNumber() bool {
683 | 	// If this is a plus or minus followed by a digit or a dot+digit, return true.
684 | 	// If this is a dot followed by a digit then return true.
685 | 	switch s.curr() {
686 | 	case '+', '-':
687 | 		ch0, ch1 := s.read(), s.read()
688 | 		s.unread(2)
689 | 		if isDigit(ch0) || (ch0 == '.' && isDigit(ch1)) {
690 | 			return true
691 | 		}
692 | 	case '.':
693 | 		ch0 := s.read()
694 | 		s.unread(1)
695 | 		if isDigit(ch0) {
696 | 			return true
697 | 		}
698 | 	}
699 | 
700 | 	// Note: We don't check for digits here because its done in Scan().
701 | 
702 | 	// Anything else is not a number.
703 | 	return false
704 | }
705 | 
706 | // read reads the next rune from the reader.
707 | // This function will initially check for any characters that have been pushed
708 | // back onto the lookahead buffer and return those. Otherwise it will read from
709 | // the reader and do preprocessing to convert newline characters and NULL.
710 | // EOF is converted to a zero rune (\000) and returned.
711 | func (s *Scanner) read() rune {
712 | 	// If we have runes on our internal lookahead buffer then return those.
713 | 	if s.bufn > 0 {
714 | 		s.bufi = ((s.bufi + 1) % len(s.buf))
715 | 		s.bufn--
716 | 		return s.buf[s.bufi]
717 | 	}
718 | 
719 | 	// Otherwise read from the reader.
720 | 	ch, _, err := s.rd.ReadRune()
721 | 	pos := s.pos()
722 | 	if err != nil {
723 | 		ch = eof
724 | 	} else {
725 | 		// Preprocess the input stream by replacing FF with LF. (§3.3)
726 | 		if ch == '\f' {
727 | 			ch = '\n'
728 | 		}
729 | 
730 | 		// Preprocess the input stream by replacing CR and CRLF with LF. (§3.3)
731 | 		if ch == '\r' {
732 | 			if ch, _, err := s.rd.ReadRune(); err != nil {
733 | 				// nop
734 | 			} else if ch != '\n' {
735 | 				s.unread(1)
736 | 			}
737 | 			ch = '\n'
738 | 		}
739 | 
740 | 		// Replace NULL with Unicode replacement character. (§3.3)
741 | 		if ch == '\000' {
742 | 			ch = '\uFFFD'
743 | 		}
744 | 
745 | 		// Track scanner position.
746 | 		if ch == '\n' {
747 | 			pos.Line++
748 | 			pos.Char = 0
749 | 		} else {
750 | 			pos.Char++
751 | 		}
752 | 	}
753 | 
754 | 	// Add to circular buffer.
755 | 	s.bufi = ((s.bufi + 1) % len(s.buf))
756 | 	s.buf[s.bufi] = ch
757 | 	s.bufpos[s.bufi] = pos
758 | 	return ch
759 | }
760 | 
761 | // unread adds the previous n code points back onto the buffer.
762 | func (s *Scanner) unread(n int) {
763 | 	for i := 0; i < n; i++ {
764 | 		s.bufi = ((s.bufi + len(s.buf) - 1) % len(s.buf))
765 | 		s.bufn++
766 | 	}
767 | }
768 | 
769 | // curr reads the current code point.
770 | func (s *Scanner) curr() rune {
771 | 	return s.buf[s.bufi]
772 | }
773 | 
774 | // Pos reads the current position of the scanner.
775 | func (s *Scanner) pos() Pos {
776 | 	return s.bufpos[s.bufi]
777 | }
778 | 
779 | // isWhitespace returns true if the rune is a space, tab, or newline.
780 | func isWhitespace(ch rune) bool {
781 | 	return ch == ' ' || ch == '\t' || ch == '\n'
782 | }
783 | 
784 | // isLetter returns true if the rune is a letter.
785 | func isLetter(ch rune) bool {
786 | 	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
787 | }
788 | 
789 | // isDigit returns true if the rune is a digit.
790 | func isDigit(ch rune) bool {
791 | 	return (ch >= '0' && ch <= '9')
792 | }
793 | 
794 | // isHexDigit returns true if the rune is a hex digit.
795 | func isHexDigit(ch rune) bool {
796 | 	return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
797 | }
798 | 
799 | // isNonASCII returns true if the rune is greater than U+0080.
800 | func isNonASCII(ch rune) bool {
801 | 	return ch >= '\u0080'
802 | }
803 | 
804 | // isNameStart returns true if the rune can start a name.
805 | func isNameStart(ch rune) bool {
806 | 	return isLetter(ch) || isNonASCII(ch) || ch == '_'
807 | }
808 | 
809 | // isName returns true if the character is a name code point.
810 | func isName(ch rune) bool {
811 | 	return isNameStart(ch) || isDigit(ch) || ch == '-'
812 | }
813 | 
814 | // isNonPrintable returns true if the character is non-printable.
815 | func isNonPrintable(ch rune) bool {
816 | 	return (ch >= '\u0000' && ch <= '\u0008') || ch == '\u000B' || (ch >= '\u000E' && ch <= '\u001F') || ch == '\u007F'
817 | }
818 | 
819 | // scanner wraps the Scanner to provide a componentValueScanner interface.
820 | type scanner struct {
821 | 	*Scanner
822 | }
823 | 
824 | func (s *scanner) Current() ComponentValue { return s.Scanner.current() }
825 | func (s *scanner) Scan() ComponentValue    { return s.Scanner.Scan() }
826 | func (s *scanner) Unscan()                 { s.Scanner.unscan() }
827 | 


--------------------------------------------------------------------------------
/scanner_test.go:
--------------------------------------------------------------------------------
  1 | package css_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"flag"
  6 | 	"reflect"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/benbjohnson/css"
 10 | )
 11 | 
 12 | // testiter sets the table test iteration to run in isolation.
 13 | var testiter = flag.Int("test.iter", -1, "table test number")
 14 | 
 15 | func init() {
 16 | 	flag.Parse()
 17 | }
 18 | 
 19 | // Ensure than the scanner returns appropriate tokens and literals.
 20 | func TestScanner_Scan(t *testing.T) {
 21 | 	var tests = []struct {
 22 | 		s   string
 23 | 		tok css.ComponentValue
 24 | 		err string
 25 | 	}{
 26 | 		{s: ``, tok: &css.Token{Tok: css.EOFToken}},
 27 | 		{s: `   `, tok: &css.Token{Tok: css.WhitespaceToken, Value: `   `, Pos: css.Pos{Char: 1, Line: 0}}},
 28 | 		{s: " \n", tok: &css.Token{Tok: css.WhitespaceToken, Value: " \n", Pos: css.Pos{Char: 1, Line: 0}}},
 29 | 		{s: " \f", tok: &css.Token{Tok: css.WhitespaceToken, Value: " \n", Pos: css.Pos{Char: 1, Line: 0}}},
 30 | 		{s: " \r", tok: &css.Token{Tok: css.WhitespaceToken, Value: " \n", Pos: css.Pos{Char: 1, Line: 0}}},
 31 | 		{s: " \r ", tok: &css.Token{Tok: css.WhitespaceToken, Value: " \n", Pos: css.Pos{Char: 1, Line: 0}}},
 32 | 
 33 | 		{s: `""`, tok: &css.Token{Tok: css.StringToken, Value: ``, Ending: '"', Pos: css.Pos{Char: 1, Line: 0}}},
 34 | 		{s: `"`, tok: &css.Token{Tok: css.StringToken, Value: ``, Ending: '"', Pos: css.Pos{Char: 1, Line: 0}}},
 35 | 		{s: `"foo`, tok: &css.Token{Tok: css.StringToken, Value: `foo`, Ending: '"', Pos: css.Pos{Char: 1, Line: 0}}},
 36 | 		{s: `"hello world"`, tok: &css.Token{Tok: css.StringToken, Value: `hello world`, Ending: '"', Pos: css.Pos{Char: 1, Line: 0}}},
 37 | 		{s: `'hello world'`, tok: &css.Token{Tok: css.StringToken, Value: `hello world`, Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 38 | 		{s: "'foo\\\nbar'", tok: &css.Token{Tok: css.StringToken, Value: "foo\nbar", Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 39 | 		{s: `'foo\ bar'`, tok: &css.Token{Tok: css.StringToken, Value: `foo bar`, Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 40 | 		{s: `'foo\\bar'`, tok: &css.Token{Tok: css.StringToken, Value: `foo\bar`, Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 41 | 		{s: `'foo\`, tok: &css.Token{Tok: css.StringToken, Value: `foo`, Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 42 | 		{s: `'frosty the \2603'`, tok: &css.Token{Tok: css.StringToken, Value: `frosty the ☃`, Ending: '\'', Pos: css.Pos{Char: 1, Line: 0}}},
 43 | 		{s: "'foo bar\n", tok: &css.Token{Tok: css.BadStringToken, Value: ``, Pos: css.Pos{Char: 1, Line: 0}}},
 44 | 
 45 | 		{s: `0`, tok: &css.Token{Tok: css.NumberToken, Type: "integer", Value: `0`, Number: 0.0, Pos: css.Pos{Char: 1, Line: 0}}},
 46 | 		{s: `1.0`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1.0`, Number: 1.0, Pos: css.Pos{Char: 1, Line: 0}}},
 47 | 		{s: `1.123`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1.123`, Number: 1.123, Pos: css.Pos{Char: 1, Line: 0}}},
 48 | 		{s: `.001`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `.001`, Number: 0.001, Pos: css.Pos{Char: 1, Line: 0}}},
 49 | 		{s: `-.001`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `-.001`, Number: -0.001, Pos: css.Pos{Char: 1, Line: 0}}},
 50 | 		{s: `10000`, tok: &css.Token{Tok: css.NumberToken, Type: "integer", Value: `10000`, Number: 10000, Pos: css.Pos{Char: 1, Line: 0}}},
 51 | 		{s: `10000.`, tok: &css.Token{Tok: css.NumberToken, Type: "integer", Value: `10000`, Number: 10000, Pos: css.Pos{Char: 1, Line: 0}}},
 52 | 		{s: `100E`, tok: &css.Token{Tok: css.DimensionToken, Type: "integer", Value: `100E`, Number: 100, Unit: "E", Pos: css.Pos{Char: 1, Line: 0}}},
 53 | 		{s: `100E+`, tok: &css.Token{Tok: css.DimensionToken, Type: "integer", Value: `100E`, Number: 100, Unit: "E", Pos: css.Pos{Char: 1, Line: 0}}},
 54 | 		{s: `100E-`, tok: &css.Token{Tok: css.DimensionToken, Type: "integer", Value: `100E-`, Number: 100, Unit: "E-", Pos: css.Pos{Char: 1, Line: 0}}},
 55 | 		{s: `1E2`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1E2`, Number: 100, Pos: css.Pos{Char: 1, Line: 0}}},
 56 | 		{s: `1.5E2`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1.5E2`, Number: 150, Pos: css.Pos{Char: 1, Line: 0}}},
 57 | 		{s: `1.5E+2`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1.5E+2`, Number: 150, Pos: css.Pos{Char: 1, Line: 0}}},
 58 | 		{s: `1.5E-2`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `1.5E-2`, Number: 0.015, Pos: css.Pos{Char: 1, Line: 0}}},
 59 | 		{s: `+100`, tok: &css.Token{Tok: css.NumberToken, Type: "integer", Value: `+100`, Number: 100, Pos: css.Pos{Char: 1, Line: 0}}},
 60 | 		{s: `+1.0`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `+1.0`, Number: 1, Pos: css.Pos{Char: 1, Line: 0}}},
 61 | 		{s: `-100`, tok: &css.Token{Tok: css.NumberToken, Type: "integer", Value: `-100`, Number: -100, Pos: css.Pos{Char: 1, Line: 0}}},
 62 | 		{s: `-1.0`, tok: &css.Token{Tok: css.NumberToken, Type: "number", Value: `-1.0`, Number: -1, Pos: css.Pos{Char: 1, Line: 0}}},
 63 | 		{s: `-`, tok: &css.Token{Tok: css.DelimToken, Value: `-`, Pos: css.Pos{Char: 1, Line: 0}}},
 64 | 		{s: `-.`, tok: &css.Token{Tok: css.DelimToken, Value: `-`, Pos: css.Pos{Char: 1, Line: 0}}},
 65 | 		{s: `.`, tok: &css.Token{Tok: css.DelimToken, Value: `.`, Pos: css.Pos{Char: 1, Line: 0}}},
 66 | 
 67 | 		{s: `url`, tok: &css.Token{Tok: css.IdentToken, Value: `url`, Pos: css.Pos{Char: 1, Line: 0}}},
 68 | 		{s: `-url`, tok: &css.Token{Tok: css.IdentToken, Value: `-url`, Pos: css.Pos{Char: 1, Line: 0}}},
 69 | 		{s: `myIdent`, tok: &css.Token{Tok: css.IdentToken, Value: `myIdent`, Pos: css.Pos{Char: 1, Line: 0}}},
 70 | 		{s: `my\2603`, tok: &css.Token{Tok: css.IdentToken, Value: `my☃`, Pos: css.Pos{Char: 1, Line: 0}}},
 71 | 		{s: `\2603`, tok: &css.Token{Tok: css.IdentToken, Value: `☃`, Pos: css.Pos{Char: 1, Line: 0}}},
 72 | 		{s: "\000", tok: &css.Token{Tok: css.IdentToken, Value: "\uFFFD", Pos: css.Pos{Char: 1, Line: 0}}},
 73 | 
 74 | 		{s: `url(`, tok: &css.Token{Tok: css.URLToken, Value: ``, Pos: css.Pos{Char: 1, Line: 0}}},
 75 | 		{s: `url(foo`, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 76 | 		{s: `url(http://foo.com#bar?baz=bat)`, tok: &css.Token{Tok: css.URLToken, Value: `http://foo.com#bar?baz=bat`, Pos: css.Pos{Char: 1, Line: 0}}},
 77 | 		{s: `url(  foo`, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 78 | 		{s: `url(  foo  `, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 79 | 		{s: `url(  \2603  `, tok: &css.Token{Tok: css.URLToken, Value: `☃`, Pos: css.Pos{Char: 1, Line: 0}}},
 80 | 		{s: `url(foo)`, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 81 | 		{s: `url("http://foo.com#bar?baz=bat")`, tok: &css.Token{Tok: css.URLToken, Value: `http://foo.com#bar?baz=bat`, Pos: css.Pos{Char: 1, Line: 0}}},
 82 | 		{s: `url(  "foo"  `, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 83 | 		{s: `url("foo"  `, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 84 | 		{s: `url("foo")`, tok: &css.Token{Tok: css.URLToken, Value: `foo`, Pos: css.Pos{Char: 1, Line: 0}}},
 85 | 		{s: `url("foo"x`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}},
 86 | 		{s: `url("foo" x`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}},
 87 | 		{s: "url('foo\n", tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}},
 88 | 		{s: `url(foo"`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: `invalid url code point: " (U+0022)`},
 89 | 		{s: `url(foo bar)`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}},
 90 | 		{s: `url(foo'`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: `invalid url code point: ' (U+0027)`},
 91 | 		{s: `url(foo(`, tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: `invalid url code point: ( (U+0028)`},
 92 | 		{s: "url(foo\001 \\2603", tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: "invalid url code point: \001 (U+0001)"},
 93 | 		{s: "url(foo\\\n", tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: `unescaped \ in url`},
 94 | 		{s: "url(foo\001 \001", tok: &css.Token{Tok: css.BadURLToken, Pos: css.Pos{Char: 1, Line: 0}}, err: "invalid url code point: \001 (U+0001)"},
 95 | 
 96 | 		{s: `myFunc(`, tok: &css.Token{Tok: css.FunctionToken, Value: `myFunc`, Pos: css.Pos{Char: 1, Line: 0}}},
 97 | 
 98 | 		{s: "u+A", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 10, Pos: css.Pos{Char: 1, Line: 0}}},
 99 | 		{s: "u+00000A", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 10, End: 10, Pos: css.Pos{Char: 1, Line: 0}}},
100 | 		{s: "u+000000A", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 0, End: 0, Pos: css.Pos{Char: 1, Line: 0}}},
101 | 		{s: "u+1?", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 16, End: 31, Pos: css.Pos{Char: 1, Line: 0}}},
102 | 		{s: "u+1?F", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 16, End: 31, Pos: css.Pos{Char: 1, Line: 0}}},
103 | 		{s: "u+02-04", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 2, End: 4, Pos: css.Pos{Char: 1, Line: 0}}},
104 | 		{s: "u+02-04?", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 2, End: 4, Pos: css.Pos{Char: 1, Line: 0}}},
105 | 		{s: "u+02-0000004", tok: &css.Token{Tok: css.UnicodeRangeToken, Start: 2, End: 0, Pos: css.Pos{Char: 1, Line: 0}}},
106 | 
107 | 		{s: `100em`, tok: &css.Token{Tok: css.DimensionToken, Type: "integer", Value: `100em`, Number: 100, Unit: "em", Pos: css.Pos{Char: 1, Line: 0}}},
108 | 		{s: `-1.2in`, tok: &css.Token{Tok: css.DimensionToken, Type: "number", Value: `-1.2in`, Number: -1.2, Unit: "in", Pos: css.Pos{Char: 1, Line: 0}}},
109 | 
110 | 		{s: `100%`, tok: &css.Token{Tok: css.PercentageToken, Type: "integer", Value: `100%`, Number: 100, Pos: css.Pos{Char: 1, Line: 0}}},
111 | 		{s: `-0.2%`, tok: &css.Token{Tok: css.PercentageToken, Type: "number", Value: `-0.2%`, Number: -0.2, Pos: css.Pos{Char: 1, Line: 0}}},
112 | 
113 | 		{s: `#foo`, tok: &css.Token{Tok: css.HashToken, Value: `foo`, Type: "id", Pos: css.Pos{Char: 1, Line: 0}}},
114 | 		{s: `#foo\2603 bar`, tok: &css.Token{Tok: css.HashToken, Value: `foo☃bar`, Type: "id", Pos: css.Pos{Char: 1, Line: 0}}},
115 | 		{s: `#-x`, tok: &css.Token{Tok: css.HashToken, Value: `-x`, Type: "id", Pos: css.Pos{Char: 1, Line: 0}}},
116 | 		{s: `#_x`, tok: &css.Token{Tok: css.HashToken, Value: `_x`, Type: "id", Pos: css.Pos{Char: 1, Line: 0}}},
117 | 		{s: `#18273`, tok: &css.Token{Tok: css.HashToken, Value: `18273`, Type: "unrestricted", Pos: css.Pos{Char: 1, Line: 0}}},
118 | 		{s: `#`, tok: &css.Token{Tok: css.DelimToken, Value: `#`, Pos: css.Pos{Char: 1, Line: 0}}},
119 | 
120 | 		{s: `/`, tok: &css.Token{Tok: css.DelimToken, Value: `/`, Pos: css.Pos{Char: 1, Line: 0}}},
121 | 		{s: `/* this is * a comment */#`, tok: &css.Token{Tok: css.DelimToken, Value: "#", Pos: css.Pos{Char: 26, Line: 0}}},
122 | 		{s: `/* this is a comment`, tok: &css.Token{Tok: css.EOFToken, Pos: css.Pos{Char: 20, Line: 0}}},
123 | 
124 | 		{s: `<`, tok: &css.Token{Tok: css.DelimToken, Value: "<", Pos: css.Pos{Char: 1, Line: 0}}},
125 | 		{s: `<!`, tok: &css.Token{Tok: css.DelimToken, Value: "<", Pos: css.Pos{Char: 1, Line: 0}}},
126 | 		{s: `<!-`, tok: &css.Token{Tok: css.DelimToken, Value: "<", Pos: css.Pos{Char: 1, Line: 0}}},
127 | 		{s: `<!--`, tok: &css.Token{Tok: css.CDOToken, Pos: css.Pos{Char: 1, Line: 0}}},
128 | 
129 | 		{s: `@`, tok: &css.Token{Tok: css.DelimToken, Value: "@", Pos: css.Pos{Char: 1, Line: 0}}},
130 | 		{s: `@foo`, tok: &css.Token{Tok: css.AtKeywordToken, Value: "foo", Pos: css.Pos{Char: 1, Line: 0}}},
131 | 		{s: `@\2603`, tok: &css.Token{Tok: css.AtKeywordToken, Value: "☃", Pos: css.Pos{Char: 1, Line: 0}}},
132 | 
133 | 		{s: `\2603`, tok: &css.Token{Tok: css.IdentToken, Value: "☃", Pos: css.Pos{Char: 1, Line: 0}}},
134 | 		{s: `\`, tok: &css.Token{Tok: css.IdentToken, Value: "\uFFFD", Pos: css.Pos{Char: 1, Line: 0}}},
135 | 		{s: `\ `, tok: &css.Token{Tok: css.IdentToken, Value: " ", Pos: css.Pos{Char: 1, Line: 0}}},
136 | 		{s: "\\\n", tok: &css.Token{Tok: css.DelimToken, Value: `\`, Pos: css.Pos{Char: 1, Line: 0}}, err: "unescaped \\"},
137 | 
138 | 		{s: `$=`, tok: &css.Token{Tok: css.SuffixMatchToken, Pos: css.Pos{Char: 1, Line: 0}}},
139 | 		{s: `$X`, tok: &css.Token{Tok: css.DelimToken, Value: `$`, Pos: css.Pos{Char: 1, Line: 0}}},
140 | 		{s: `$`, tok: &css.Token{Tok: css.DelimToken, Value: `$`, Pos: css.Pos{Char: 1, Line: 0}}},
141 | 
142 | 		{s: `*=`, tok: &css.Token{Tok: css.SubstringMatchToken, Pos: css.Pos{Char: 1, Line: 0}}},
143 | 		{s: `*X`, tok: &css.Token{Tok: css.DelimToken, Value: `*`, Pos: css.Pos{Char: 1, Line: 0}}},
144 | 		{s: `*`, tok: &css.Token{Tok: css.DelimToken, Value: `*`, Pos: css.Pos{Char: 1, Line: 0}}},
145 | 
146 | 		{s: `^=`, tok: &css.Token{Tok: css.PrefixMatchToken, Pos: css.Pos{Char: 1, Line: 0}}},
147 | 		{s: `^X`, tok: &css.Token{Tok: css.DelimToken, Value: `^`, Pos: css.Pos{Char: 1, Line: 0}}},
148 | 		{s: `^`, tok: &css.Token{Tok: css.DelimToken, Value: `^`, Pos: css.Pos{Char: 1, Line: 0}}},
149 | 
150 | 		{s: `~=`, tok: &css.Token{Tok: css.IncludeMatchToken, Pos: css.Pos{Char: 1, Line: 0}}},
151 | 		{s: `~X`, tok: &css.Token{Tok: css.DelimToken, Value: `~`, Pos: css.Pos{Char: 1, Line: 0}}},
152 | 		{s: `~`, tok: &css.Token{Tok: css.DelimToken, Value: `~`, Pos: css.Pos{Char: 1, Line: 0}}},
153 | 
154 | 		{s: `|=`, tok: &css.Token{Tok: css.DashMatchToken, Pos: css.Pos{Char: 1, Line: 0}}},
155 | 		{s: `||`, tok: &css.Token{Tok: css.ColumnToken, Pos: css.Pos{Char: 1, Line: 0}}},
156 | 		{s: `|X`, tok: &css.Token{Tok: css.DelimToken, Value: `|`, Pos: css.Pos{Char: 1, Line: 0}}},
157 | 		{s: `|`, tok: &css.Token{Tok: css.DelimToken, Value: `|`, Pos: css.Pos{Char: 1, Line: 0}}},
158 | 
159 | 		{s: `,`, tok: &css.Token{Tok: css.CommaToken, Pos: css.Pos{Char: 1, Line: 0}}},
160 | 		{s: `:`, tok: &css.Token{Tok: css.ColonToken, Pos: css.Pos{Char: 1, Line: 0}}},
161 | 		{s: `;`, tok: &css.Token{Tok: css.SemicolonToken, Pos: css.Pos{Char: 1, Line: 0}}},
162 | 		{s: `(`, tok: &css.Token{Tok: css.LParenToken, Pos: css.Pos{Char: 1, Line: 0}}},
163 | 		{s: `)`, tok: &css.Token{Tok: css.RParenToken, Pos: css.Pos{Char: 1, Line: 0}}},
164 | 		{s: `[`, tok: &css.Token{Tok: css.LBrackToken, Pos: css.Pos{Char: 1, Line: 0}}},
165 | 		{s: `]`, tok: &css.Token{Tok: css.RBrackToken, Pos: css.Pos{Char: 1, Line: 0}}},
166 | 		{s: `{`, tok: &css.Token{Tok: css.LBraceToken, Pos: css.Pos{Char: 1, Line: 0}}},
167 | 		{s: `}`, tok: &css.Token{Tok: css.RBraceToken, Pos: css.Pos{Char: 1, Line: 0}}},
168 | 	}
169 | 
170 | 	for i, tt := range tests {
171 | 		// Skips over tests if test.iter is set.
172 | 		if *testiter > -1 && *testiter != i {
173 | 			continue
174 | 		}
175 | 
176 | 		// Scan token.
177 | 		s := css.NewScanner(bytes.NewBufferString(tt.s))
178 | 		tok := s.Scan()
179 | 
180 | 		// Verify properties.
181 | 		if !reflect.DeepEqual(tok, tt.tok) {
182 | 			t.Errorf("%d. <%q> tok: =>\n\ngot %#v\n\nwant %#v\n\n", i, tt.s, tok, tt.tok)
183 | 		} else if tt.err != "" {
184 | 			if len(s.Errors) == 0 {
185 | 				t.Errorf("%d. <%q> error expected", i, tt.s)
186 | 			} else if len(s.Errors) > 1 {
187 | 				t.Errorf("%d. <%q> too many errors occurred", i, tt.s)
188 | 			} else if s.Errors[0].Message != tt.err {
189 | 				t.Errorf("%d. <%q> error: got %q, want %q", i, tt.s, s.Errors[0].Message, tt.err)
190 | 			}
191 | 		} else if tt.err == "" && len(s.Errors) > 0 {
192 | 			t.Errorf("%d. <%q> unexpected error: %q", i, tt.s, s.Errors[0].Message)
193 | 		}
194 | 	}
195 | }
196 | 


--------------------------------------------------------------------------------
/walk.go:
--------------------------------------------------------------------------------
1 | package css
2 | 
3 | // TODO(benbjohnson): Walk(Visitor, Node) Visitor
4 | 


--------------------------------------------------------------------------------
/walk_test.go:
--------------------------------------------------------------------------------
1 | package css_test
2 | 
3 | // TODO(benbjohnson): Example: Walk()
4 | 


--------------------------------------------------------------------------------