├── .github └── workflows │ └── test.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── css.go ├── css_test.go ├── fuzz_test.go ├── go.mod ├── go.sum ├── lex.go ├── lex_test.go ├── parse.go ├── parse_test.go ├── queue.go ├── queue_test.go └── testdata └── fuzz ├── FuzzParse ├── 771e938e4458e983a736261a702e27c7a414fd660a15b63034f290b146d2f217 ├── 78c19c977ebc4bbd1a8d570c4661a0bd5803195e4c805f7e32109d930b1ac85b └── ee189d6aaeb573cafd396e75348f04c44b83416d61cfca61c8446c5a41317cf8 └── FuzzSelector └── 00e15d22123489fd /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | 10 | jobs: 11 | test: 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest] 15 | go-version: [1.22.x, 1.23.x] 16 | runs-on: ${{ matrix.os }} 17 | steps: 18 | - name: Install Go 19 | uses: actions/setup-go@v2 20 | with: 21 | go-version: ${{ matrix.go-version }} 22 | - name: Checkout code 23 | uses: actions/checkout@v2 24 | - name: Build 25 | run: go build ./... 26 | - name: Test 27 | run: go test -v ./... 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Eric Chiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: cover 2 | cover: 3 | go test -coverprofile=bin/coverage.out 4 | go tool cover -html=bin/coverage.out 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSS selectors in Go 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/ericchiang/css.svg)](https://pkg.go.dev/github.com/ericchiang/css) 4 | 5 | This package implements a CSS selector compiler for Go's HTML parsing package golang.org/x/net/html. 6 | 7 | ```go 8 | package main 9 | 10 | import ( 11 | "fmt" 12 | "os" 13 | "strings" 14 | 15 | "github.com/ericchiang/css" 16 | "golang.org/x/net/html" 17 | ) 18 | 19 | var data = ` 20 |

21 |

a header

22 |

another header

23 |

` 24 | 25 | func main() { 26 | sel, err := css.Parse("h2#foo") 27 | if err != nil { 28 | panic(err) 29 | } 30 | node, err := html.Parse(strings.NewReader(data)) 31 | if err != nil { 32 | panic(err) 33 | } 34 | for _, ele := range sel.Select(node) { 35 | html.Render(os.Stdout, ele) 36 | } 37 | fmt.Println() 38 | } 39 | ``` 40 | 41 | ``` 42 | $ go run example/css.go 43 |

a header

44 | ``` 45 | -------------------------------------------------------------------------------- /css.go: -------------------------------------------------------------------------------- 1 | // Package css implements CSS selector HTML search. 2 | // 3 | // Selectors compiled by this package search through golang.org/x/net/html nodes and should 4 | // be used in conjunction with that package. 5 | // 6 | // data := `

7 | //

a header

8 | //

another header

9 | //

` 10 | // 11 | // sel, err := css.Parse("h2#foo") 12 | // if err != nil { 13 | // // handle error 14 | // } 15 | // node, err := html.Parse(strings.NewReader(data)) 16 | // if err != nil { 17 | // // handle error 18 | // } 19 | // elements := sel.Select(node) 20 | // 21 | // This package aims to support Selectors Level 4 https://www.w3.org/TR/selectors-4/. 22 | // 23 | // The universal selector (*) is supported, along with: 24 | // 25 | // a // Type selector 26 | // ns|a // Type selector with namespace 27 | // .red // Class selector 28 | // #demo // ID selector 29 | // [attr] // Attribute selector 30 | // [attr=value] // Attribute selector value 31 | // [attr~=value] // Attribute selector element of list 32 | // [attr|=value] // Attribute selector value or "{value}-" prefix 33 | // [attr^=value] // Attribute selector prefix 34 | // [attr$=value] // Attribute selector suffix 35 | // [attr*=value] // Attribute selector contains value 36 | // [attr=value i] // Attribute selector case insensitive modifier 37 | // foo, bar // Selector list 38 | // foo bar // Descendant combinator 39 | // foo > bar // Child combinator 40 | // foo ~ bar // General sibling combinator 41 | // foo + bar // Adjacent sibling combinator 42 | // :empty // Element with no children 43 | // :first-child // First child of parent 44 | // :first-of-type // First child of its type of parent 45 | // :last-child // Last child of parent 46 | // :last-of-type // Last child of its type of parent 47 | // :only-child // Only child of parent 48 | // :only-of-type // Only child of its type parent 49 | // :root // Root element 50 | // :nth-child(An+B) // Positional child matcher 51 | // :nth-last-child(An+B) // Reverse positional child matcher 52 | // :nth-last-of-type(An+B) // Reverse positional child matcher of type 53 | // :nth-of-type(An+B) // Positional child matcher of type 54 | package css 55 | 56 | import ( 57 | "errors" 58 | "fmt" 59 | "strings" 60 | 61 | "golang.org/x/net/html" 62 | "golang.org/x/net/html/atom" 63 | ) 64 | 65 | // ParseError is returned indicating an lex, parse, or compilation error with 66 | // the associated position in the string the error occurred. 67 | type ParseError struct { 68 | Pos int 69 | Msg string 70 | } 71 | 72 | // Error returns a formatted version of the error. 73 | func (p *ParseError) Error() string { 74 | return fmt.Sprintf("css: %s at position %d", p.Msg, p.Pos) 75 | } 76 | 77 | func errorf(pos int, msg string, v ...interface{}) error { 78 | return &ParseError{pos, fmt.Sprintf(msg, v...)} 79 | } 80 | 81 | // Selector is a compiled CSS selector. 82 | type Selector struct { 83 | s []*selector 84 | } 85 | 86 | // Select returns any matches from a parsed HTML document. 87 | func (s *Selector) Select(n *html.Node) []*html.Node { 88 | selected := []*html.Node{} 89 | for _, sel := range s.s { 90 | selected = append(selected, sel.find(n)...) 91 | } 92 | return selected 93 | } 94 | 95 | func findAll(n *html.Node, fn func(n *html.Node) bool) []*html.Node { 96 | var m []*html.Node 97 | if fn(n) { 98 | m = append(m, n) 99 | } 100 | for c := n.FirstChild; c != nil; c = c.NextSibling { 101 | if c.Type != html.ElementNode { 102 | continue 103 | } 104 | m = append(m, findAll(c, fn)...) 105 | } 106 | return m 107 | } 108 | 109 | // MustParse is like Parse but panics on errors. 110 | func MustParse(s string) *Selector { 111 | sel, err := Parse(s) 112 | if err != nil { 113 | panic(err) 114 | } 115 | return sel 116 | } 117 | 118 | // Parse compiles a complex selector list from a string. The parser supports 119 | // Selectors Level 4. 120 | // 121 | // Multiple selectors are supported through comma separated values. For example 122 | // "h1, h2". 123 | // 124 | // Parse reports the first error hit when compiling. 125 | func Parse(s string) (*Selector, error) { 126 | p := newParser(s) 127 | list, err := p.parse() 128 | if err != nil { 129 | var perr *parseErr 130 | if errors.As(err, &perr) { 131 | return nil, &ParseError{perr.t.pos, perr.msg} 132 | } 133 | var lerr *lexErr 134 | if errors.As(err, &lerr) { 135 | return nil, &ParseError{lerr.last, lerr.msg} 136 | } 137 | return nil, err 138 | } 139 | sel := &Selector{} 140 | 141 | c := compiler{maxErrs: 1} 142 | for _, s := range list { 143 | m := c.compile(&s) 144 | if m == nil { 145 | continue 146 | } 147 | sel.s = append(sel.s, m) 148 | } 149 | if err := c.err(); err != nil { 150 | return nil, err 151 | } 152 | return sel, nil 153 | } 154 | 155 | type compiler struct { 156 | sels []complexSelector 157 | maxErrs int 158 | errs []error 159 | } 160 | 161 | func (c *compiler) err() error { 162 | if len(c.errs) == 0 { 163 | return nil 164 | } 165 | return c.errs[0] 166 | } 167 | 168 | func (c *compiler) errorf(pos int, msg string, v ...interface{}) bool { 169 | err := &ParseError{pos, fmt.Sprintf(msg, v...)} 170 | c.errs = append(c.errs, err) 171 | if len(c.errs) >= c.maxErrs { 172 | return true 173 | } 174 | return false 175 | } 176 | 177 | type combinator interface { 178 | find(n *html.Node) []*html.Node 179 | } 180 | 181 | type selector struct { 182 | m *compoundSelectorMatcher 183 | 184 | combinators []combinator 185 | } 186 | 187 | func (s selector) find(n *html.Node) []*html.Node { 188 | nodes := findAll(n, s.m.match) 189 | for _, c := range s.combinators { 190 | var ns []*html.Node 191 | for _, n := range nodes { 192 | ns = append(ns, c.find(n)...) 193 | } 194 | nodes = ns 195 | } 196 | return nodes 197 | } 198 | 199 | type descendantCombinator struct { 200 | m *compoundSelectorMatcher 201 | } 202 | 203 | func (c *descendantCombinator) find(n *html.Node) []*html.Node { 204 | var nodes []*html.Node 205 | for n := n.FirstChild; n != nil; n = n.NextSibling { 206 | if n.Type != html.ElementNode { 207 | continue 208 | } 209 | nodes = append(nodes, findAll(n, c.m.match)...) 210 | } 211 | return nodes 212 | } 213 | 214 | type childCombinator struct { 215 | m *compoundSelectorMatcher 216 | } 217 | 218 | func (c *childCombinator) find(n *html.Node) []*html.Node { 219 | var nodes []*html.Node 220 | for n := n.FirstChild; n != nil; n = n.NextSibling { 221 | if n.Type != html.ElementNode { 222 | continue 223 | } 224 | if c.m.match(n) { 225 | nodes = append(nodes, n) 226 | } 227 | } 228 | return nodes 229 | } 230 | 231 | type adjacentCombinator struct { 232 | m *compoundSelectorMatcher 233 | } 234 | 235 | func (c *adjacentCombinator) find(n *html.Node) []*html.Node { 236 | var ( 237 | nodes []*html.Node 238 | prev *html.Node 239 | next *html.Node 240 | ) 241 | for prev = n.PrevSibling; prev != nil; prev = prev.PrevSibling { 242 | if prev.Type == html.ElementNode { 243 | break 244 | } 245 | } 246 | for next = n.NextSibling; next != nil; next = next.NextSibling { 247 | if next.Type == html.ElementNode { 248 | break 249 | } 250 | } 251 | if prev != nil && c.m.match(prev) { 252 | nodes = append(nodes, prev) 253 | } 254 | if next != nil && c.m.match(next) { 255 | nodes = append(nodes, next) 256 | } 257 | return nodes 258 | } 259 | 260 | type siblingCombinator struct { 261 | m *compoundSelectorMatcher 262 | } 263 | 264 | func (c *siblingCombinator) find(n *html.Node) []*html.Node { 265 | var nodes []*html.Node 266 | for n := n.PrevSibling; n != nil; n = n.PrevSibling { 267 | if n.Type != html.ElementNode { 268 | continue 269 | } 270 | if c.m.match(n) { 271 | nodes = append(nodes, n) 272 | } 273 | } 274 | for n := n.NextSibling; n != nil; n = n.NextSibling { 275 | if n.Type != html.ElementNode { 276 | continue 277 | } 278 | if c.m.match(n) { 279 | nodes = append(nodes, n) 280 | } 281 | } 282 | return nodes 283 | } 284 | 285 | func (c *compiler) compile(s *complexSelector) *selector { 286 | m := &selector{ 287 | m: c.compoundSelector(&s.sel), 288 | } 289 | curr := s 290 | for { 291 | if curr.next == nil { 292 | return m 293 | } 294 | sel := c.compoundSelector(&curr.next.sel) 295 | comb := curr.combinator 296 | 297 | curr = curr.next 298 | 299 | var cm combinator 300 | switch comb { 301 | case "": 302 | cm = &descendantCombinator{sel} 303 | case ">": 304 | cm = &childCombinator{sel} 305 | case "+": 306 | cm = &adjacentCombinator{sel} 307 | case "~": 308 | cm = &siblingCombinator{sel} 309 | default: 310 | c.errorf(curr.pos, "unexpected combinator: %s", comb) 311 | continue 312 | } 313 | m.combinators = append(m.combinators, cm) 314 | } 315 | return m 316 | } 317 | 318 | type compoundSelectorMatcher struct { 319 | m *typeSelectorMatcher 320 | scm []subclassSelectorMatcher 321 | } 322 | 323 | func (c *compoundSelectorMatcher) match(n *html.Node) bool { 324 | if c.m != nil { 325 | if !c.m.match(n) { 326 | return false 327 | } 328 | } 329 | for _, m := range c.scm { 330 | if !m.match(n) { 331 | return false 332 | } 333 | } 334 | return true 335 | } 336 | 337 | func (c *compiler) compoundSelector(s *compoundSelector) *compoundSelectorMatcher { 338 | m := &compoundSelectorMatcher{} 339 | if s.typeSelector != nil { 340 | m.m = c.typeSelector(s.typeSelector) 341 | } 342 | for _, sc := range s.subClasses { 343 | scm := c.subclassSelector(&sc) 344 | if scm != nil { 345 | m.scm = append(m.scm, *scm) 346 | } 347 | } 348 | if len(s.pseudoSelectors) != 0 { 349 | // It's not clear that it makes sense for us to support pseudo elements, 350 | // since this is more about modifying added elements than selecting elements. 351 | // 352 | // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-elements 353 | if c.errorf(s.pos, "pseudo element selectors not supported") { 354 | return nil 355 | } 356 | } 357 | return m 358 | } 359 | 360 | type subclassSelectorMatcher struct { 361 | idSelector string 362 | classSelector string 363 | attributeSelector *attributeSelectorMatcher 364 | pseudoSelector func(*html.Node) bool 365 | } 366 | 367 | func (s *subclassSelectorMatcher) match(n *html.Node) bool { 368 | if s.idSelector != "" { 369 | for _, a := range n.Attr { 370 | if a.Key == "id" && a.Val == s.idSelector { 371 | return true 372 | } 373 | } 374 | return false 375 | } 376 | 377 | if s.classSelector != "" { 378 | for _, a := range n.Attr { 379 | if a.Key == "class" { 380 | for _, val := range strings.Fields(a.Val) { 381 | if val == s.classSelector { 382 | return true 383 | } 384 | } 385 | } 386 | } 387 | return false 388 | } 389 | 390 | if s.attributeSelector != nil { 391 | return s.attributeSelector.match(n) 392 | } 393 | 394 | if s.pseudoSelector != nil { 395 | return s.pseudoSelector(n) 396 | } 397 | return false 398 | } 399 | 400 | func (c *compiler) subclassSelector(s *subclassSelector) *subclassSelectorMatcher { 401 | m := &subclassSelectorMatcher{ 402 | idSelector: s.idSelector, 403 | classSelector: s.classSelector, 404 | } 405 | if s.attributeSelector != nil { 406 | m.attributeSelector = c.attributeSelector(s.attributeSelector) 407 | } 408 | if s.pseudoClassSelector != nil { 409 | m.pseudoSelector = c.pseudoClassSelector(s.pseudoClassSelector) 410 | } 411 | return m 412 | } 413 | 414 | type pseudoClassSelectorMatcher struct { 415 | matcher func(*html.Node) bool 416 | } 417 | 418 | func (c *compiler) pseudoClassSelector(s *pseudoClassSelector) func(*html.Node) bool { 419 | // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-classes 420 | switch s.ident { 421 | case "empty": 422 | return emptyMatcher 423 | case "first-child": 424 | return firstChildMatcher 425 | case "first-of-type": 426 | return firstOfTypeMatcher 427 | case "last-child": 428 | return lastChildMatcher 429 | case "last-of-type": 430 | return lastOfTypeMatcher 431 | case "only-child": 432 | return onlyChildMatcher 433 | case "only-of-type": 434 | return onlyOfTypeMatcher 435 | case "root": 436 | return rootMatcher 437 | case "": 438 | default: 439 | c.errorf(s.pos, "unsupported pseudo-class selector: %s", s.ident) 440 | return nil 441 | } 442 | 443 | switch s.function { 444 | case "nth-child(": 445 | return c.nthChild(s) 446 | case "nth-last-child(": 447 | return c.nthLastChild(s) 448 | case "nth-last-of-type(": 449 | return c.nthLastOfType(s) 450 | case "nth-of-type(": 451 | return c.nthOfType(s) 452 | default: 453 | c.errorf(s.pos, "unsupported pseudo-class selector: %s", s.function) 454 | return nil 455 | } 456 | 457 | return nil 458 | } 459 | 460 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-child 461 | func (c *compiler) nthChild(s *pseudoClassSelector) func(n *html.Node) bool { 462 | nth := c.compileNth(s) 463 | if nth == nil { 464 | return nil 465 | } 466 | return func(n *html.Node) bool { 467 | var i int64 = 1 468 | for s := n.PrevSibling; s != nil; s = s.PrevSibling { 469 | if s.Type == html.ElementNode { 470 | i++ 471 | } 472 | } 473 | return nth.matches(i) 474 | } 475 | } 476 | 477 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-of-type 478 | func (c *compiler) nthOfType(s *pseudoClassSelector) func(n *html.Node) bool { 479 | nth := c.compileNth(s) 480 | if nth == nil { 481 | return nil 482 | } 483 | return func(n *html.Node) bool { 484 | var i int64 = 1 485 | for s := n.PrevSibling; s != nil; s = s.PrevSibling { 486 | if s.Type == html.ElementNode && s.DataAtom == n.DataAtom { 487 | i++ 488 | } 489 | } 490 | return nth.matches(i) 491 | } 492 | } 493 | 494 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-last-child 495 | func (c *compiler) nthLastChild(s *pseudoClassSelector) func(n *html.Node) bool { 496 | nth := c.compileNth(s) 497 | if nth == nil { 498 | return nil 499 | } 500 | return func(n *html.Node) bool { 501 | var i int64 = 1 502 | for s := n.NextSibling; s != nil; s = s.NextSibling { 503 | if s.Type == html.ElementNode { 504 | i++ 505 | } 506 | } 507 | return nth.matches(i) 508 | } 509 | } 510 | 511 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-last-of-type 512 | func (c *compiler) nthLastOfType(s *pseudoClassSelector) func(n *html.Node) bool { 513 | nth := c.compileNth(s) 514 | if nth == nil { 515 | return nil 516 | } 517 | return func(n *html.Node) bool { 518 | var i int64 = 1 519 | for s := n.NextSibling; s != nil; s = s.NextSibling { 520 | if s.Type == html.ElementNode && n.DataAtom == s.DataAtom { 521 | i++ 522 | } 523 | } 524 | return nth.matches(i) 525 | } 526 | } 527 | 528 | // nth holds a computed An+B value for :nth-child() and its associated selectors. 529 | type nth struct { 530 | a int64 531 | b int64 532 | } 533 | 534 | func (nth nth) matches(val int64) bool { 535 | // Is there a value for "n" given "An+B=val" where "n" is non-negative? 536 | 537 | // An + B = val 538 | // An = val - B 539 | // n = (val - B) / A 540 | if nth.a == 0 { 541 | return val == nth.b 542 | } 543 | return (val-nth.b)%nth.a == 0 && (val-nth.b)/nth.a >= 0 544 | } 545 | 546 | func (c *compiler) compileNth(s *pseudoClassSelector) *nth { 547 | p := newParserFromTokens(s.args) 548 | a, err := p.aNPlusB() 549 | if err != nil { 550 | c.errorf(s.pos, "failed to parse expression: %v", err) 551 | return nil 552 | } 553 | if err := p.expectWhitespaceOrEOF(); err != nil { 554 | c.errorf(s.pos, "failed to parse expression: %v", err) 555 | return nil 556 | } 557 | return a 558 | } 559 | 560 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:empty 561 | func emptyMatcher(n *html.Node) bool { 562 | for c := n.FirstChild; c != nil; c = c.NextSibling { 563 | if c.Type == html.ElementNode { 564 | return false 565 | } 566 | } 567 | return true 568 | } 569 | 570 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:first-child 571 | func firstChildMatcher(n *html.Node) bool { 572 | for s := n.PrevSibling; s != nil; s = s.PrevSibling { 573 | if s.Type == html.ElementNode { 574 | return false 575 | } 576 | } 577 | return true 578 | } 579 | 580 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:first-of-type 581 | func firstOfTypeMatcher(n *html.Node) bool { 582 | for s := n.PrevSibling; s != nil; s = s.PrevSibling { 583 | if s.Type != html.ElementNode { 584 | continue 585 | } 586 | if s.DataAtom == n.DataAtom { 587 | return false 588 | } 589 | } 590 | return true 591 | } 592 | 593 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:last-child 594 | func lastChildMatcher(n *html.Node) bool { 595 | for s := n.NextSibling; s != nil; s = s.NextSibling { 596 | if s.Type == html.ElementNode { 597 | return false 598 | } 599 | } 600 | return true 601 | } 602 | 603 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:last-of-type 604 | func lastOfTypeMatcher(n *html.Node) bool { 605 | for s := n.NextSibling; s != nil; s = s.NextSibling { 606 | if s.Type != html.ElementNode { 607 | continue 608 | } 609 | if s.DataAtom == n.DataAtom { 610 | return false 611 | } 612 | } 613 | return true 614 | } 615 | 616 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:only-child 617 | func onlyChildMatcher(n *html.Node) bool { 618 | return firstChildMatcher(n) && lastChildMatcher(n) 619 | } 620 | 621 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:only-of-type 622 | func onlyOfTypeMatcher(n *html.Node) bool { 623 | return firstOfTypeMatcher(n) && lastOfTypeMatcher(n) 624 | } 625 | 626 | // https://developer.mozilla.org/en-US/docs/Web/CSS/:root 627 | func rootMatcher(n *html.Node) bool { 628 | return n.Parent == nil 629 | } 630 | 631 | type attributeSelectorMatcher struct { 632 | ns namespaceMatcher 633 | fn func(key, val string) bool 634 | } 635 | 636 | func (a *attributeSelectorMatcher) match(n *html.Node) bool { 637 | for _, attr := range n.Attr { 638 | if a.ns.match(attr.Namespace) && a.fn(attr.Key, attr.Val) { 639 | return true 640 | } 641 | } 642 | return false 643 | } 644 | 645 | func (c *compiler) attributeSelector(s *attributeSelector) *attributeSelectorMatcher { 646 | m := &attributeSelectorMatcher{ 647 | ns: newNamespaceMatcher(s.wqName.hasPrefix, s.wqName.prefix), 648 | } 649 | key := s.wqName.value 650 | val := s.val 651 | 652 | if s.modifier { 653 | key = strings.ToLower(key) 654 | val = strings.ToLower(val) 655 | } 656 | 657 | // https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors 658 | switch s.matcher { 659 | case "=": 660 | m.fn = func(k, v string) bool { return k == key && v == val } 661 | case "~=": 662 | m.fn = func(k, v string) bool { 663 | if k != key { 664 | return false 665 | } 666 | for _, f := range strings.Fields(v) { 667 | if f == val { 668 | return true 669 | } 670 | } 671 | return false 672 | } 673 | case "|=": 674 | // "Represents elements with an attribute name of attr whose value can be 675 | // exactly value or can begin with value immediately followed by a hyphen, 676 | // - (U+002D). It is often used for language subcode matches." 677 | m.fn = func(k, v string) bool { 678 | return k == key && (v == val || strings.HasPrefix(v, val+"-")) 679 | } 680 | case "^=": 681 | m.fn = func(k, v string) bool { 682 | return k == key && strings.HasPrefix(v, val) 683 | } 684 | case "$=": 685 | m.fn = func(k, v string) bool { 686 | return k == key && strings.HasSuffix(v, val) 687 | } 688 | case "*=": 689 | m.fn = func(k, v string) bool { 690 | return k == key && strings.Contains(v, val) 691 | } 692 | case "": 693 | m.fn = func(k, v string) bool { return k == key } 694 | default: 695 | c.errorf(s.pos, "unsupported attribute matcher: %s", s.matcher) 696 | return nil 697 | } 698 | if s.modifier { 699 | fn := m.fn 700 | m.fn = func(k, v string) bool { 701 | k = strings.ToLower(k) 702 | v = strings.ToLower(v) 703 | return fn(k, v) 704 | } 705 | } 706 | return m 707 | } 708 | 709 | // namespaceMatcher performs matching for elements and attributes. 710 | type namespaceMatcher struct { 711 | noNamespace bool 712 | namespace string 713 | } 714 | 715 | func newNamespaceMatcher(hasPrefix bool, prefix string) namespaceMatcher { 716 | if !hasPrefix { 717 | return namespaceMatcher{} 718 | } 719 | if prefix == "" { 720 | return namespaceMatcher{noNamespace: true} 721 | } 722 | if prefix == "*" { 723 | return namespaceMatcher{} 724 | } 725 | return namespaceMatcher{namespace: prefix} 726 | } 727 | 728 | func (n *namespaceMatcher) match(ns string) bool { 729 | if n.noNamespace { 730 | return ns == "" 731 | } 732 | if n.namespace == "" { 733 | return true 734 | } 735 | return n.namespace == ns 736 | } 737 | 738 | type typeSelectorMatcher struct { 739 | allAtoms bool 740 | atom atom.Atom 741 | ns namespaceMatcher 742 | } 743 | 744 | func (t *typeSelectorMatcher) match(n *html.Node) (ok bool) { 745 | if !(t.allAtoms || t.atom == n.DataAtom) { 746 | return false 747 | } 748 | return t.ns.match(n.Namespace) 749 | } 750 | 751 | func (c *compiler) typeSelector(s *typeSelector) *typeSelectorMatcher { 752 | m := &typeSelectorMatcher{} 753 | if s.value == "*" { 754 | m.allAtoms = true 755 | } else { 756 | a := atom.Lookup([]byte(s.value)) 757 | if a == 0 { 758 | if c.errorf(s.pos, "unrecognized node name: %s", s.value) { 759 | return nil 760 | } 761 | } 762 | m.atom = a 763 | } 764 | m.ns = newNamespaceMatcher(s.hasPrefix, s.prefix) 765 | return m 766 | } 767 | -------------------------------------------------------------------------------- /css_test.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "reflect" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/google/go-cmp/cmp" 12 | "golang.org/x/net/html" 13 | ) 14 | 15 | func (s *Selector) String() string { 16 | var b strings.Builder 17 | formatValue(reflect.ValueOf(s), &b, "") 18 | return b.String() 19 | } 20 | 21 | func formatValue(v reflect.Value, b *strings.Builder, ident string) { 22 | switch v.Kind() { 23 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 24 | fmt.Fprintf(b, "%d", v.Int()) 25 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 26 | fmt.Fprintf(b, "%d", v.Uint()) 27 | case reflect.Float32, reflect.Float64: 28 | fmt.Fprintf(b, "%f", v.Float()) 29 | case reflect.Bool: 30 | fmt.Fprintf(b, "%t", v.Bool()) 31 | case reflect.Array, reflect.Slice: 32 | if v.IsNil() { 33 | b.WriteString("[]") 34 | return 35 | } 36 | fmt.Fprintf(b, "[\n") 37 | for i := 0; i < v.Len(); i++ { 38 | b.WriteString(ident) 39 | b.WriteString("\t") 40 | formatValue(v.Index(i), b, ident+"\t") 41 | fmt.Fprintf(b, ",\n") 42 | } 43 | b.WriteString(ident) 44 | b.WriteString("]") 45 | case reflect.Func: 46 | if v.IsNil() { 47 | b.WriteString("") 48 | return 49 | } 50 | fmt.Fprintf(b, "") 51 | case reflect.Interface: 52 | if v.IsNil() { 53 | b.WriteString("") 54 | return 55 | } 56 | formatValue(v.Elem(), b, ident) 57 | case reflect.Map: 58 | if v.IsNil() { 59 | b.WriteString("") 60 | return 61 | } 62 | iter := v.MapRange() 63 | fmt.Fprintf(b, "{\n") 64 | for iter.Next() { 65 | b.WriteString(ident) 66 | formatValue(iter.Key(), b, ident+"\n") 67 | fmt.Fprintf(b, ", ") 68 | formatValue(iter.Value(), b, ident) 69 | } 70 | fmt.Fprintf(b, "}") 71 | case reflect.Ptr: 72 | if v.IsNil() { 73 | b.WriteString("") 74 | return 75 | } 76 | fmt.Fprintf(b, "*") 77 | formatValue(reflect.Indirect(v), b, ident) 78 | case reflect.String: 79 | fmt.Fprintf(b, "%q", v.String()) 80 | case reflect.Struct: 81 | t := v.Type() 82 | fmt.Fprintf(b, "%s{\n", t.Name()) 83 | for i := 0; i < v.NumField(); i++ { 84 | b.WriteString(ident + "\t") 85 | b.WriteString(t.Field(i).Name) 86 | b.WriteString(": ") 87 | formatValue(v.Field(i), b, ident+"\t") 88 | b.WriteString(",\n") 89 | } 90 | b.WriteString(ident) 91 | b.WriteString("}") 92 | } 93 | } 94 | 95 | type selectorTest struct { 96 | sel string 97 | in string 98 | want []string 99 | } 100 | 101 | var selectorTests = []selectorTest{ 102 | { 103 | "a", 104 | `

`, 105 | []string{``}, 106 | }, 107 | { 108 | "body", 109 | `

`, 110 | []string{`

`}, 111 | }, 112 | { 113 | "body *", 114 | `

`, 115 | []string{`

`, ``}, 116 | }, 117 | { 118 | "body > *", 119 | `

`, 120 | []string{`

`}, 121 | }, 122 | { 123 | "div", 124 | `

`, 125 | []string{ 126 | `
`, 127 | `
`, 128 | }, 129 | }, 130 | { 131 | "div", 132 | `

`, 133 | []string{`
`, `
`}, 134 | }, 135 | { 136 | ".foo", 137 | `

`, 138 | []string{ 139 | `

`, 140 | `
`, 141 | }, 142 | }, 143 | { 144 | ".foo", 145 | `

`, 146 | []string{ 147 | `

`, 148 | `
`, 149 | }, 150 | }, 151 | { 152 | "div.foo", 153 | `

`, 154 | []string{`
`}, 155 | }, 156 | { 157 | "#foo", 158 | `

`, 159 | []string{`
`}, 160 | }, 161 | { 162 | "div#foo", 163 | `

`, 164 | []string{`
`}, 165 | }, 166 | { 167 | "a", 168 | `
`, 169 | []string{``}, 170 | }, 171 | { 172 | "*|a", 173 | `
`, 174 | []string{``}, 175 | }, 176 | { 177 | "svg|a", 178 | `
`, 179 | []string{``}, 180 | }, 181 | { 182 | "|a", 183 | `
`, 184 | []string{}, 185 | }, 186 | { 187 | "other|a", 188 | `
`, 189 | []string{}, 190 | }, 191 | { 192 | "svg|*", 193 | `
`, 194 | []string{ 195 | ``, 196 | ``, 197 | }, 198 | }, 199 | { 200 | "div[class=foo]", 201 | `

`, 202 | []string{ 203 | `
`, 204 | }, 205 | }, 206 | { 207 | "div[class*=o]", 208 | `

`, 209 | []string{ 210 | `
`, 211 | }, 212 | }, 213 | { 214 | "div[class~=foo]", 215 | `

`, 216 | []string{ 217 | `
`, 218 | }, 219 | }, 220 | { 221 | "div[class|=foo]", 222 | `

`, 223 | []string{ 224 | `
`, 225 | `
`, 226 | }, 227 | }, 228 | { 229 | "div[class^=foo]", 230 | `

`, 231 | []string{ 232 | `
`, 233 | `
`, 234 | }, 235 | }, 236 | { 237 | "div[class$=foo]", 238 | `

`, 239 | []string{ 240 | `
`, 241 | `
`, 242 | }, 243 | }, 244 | { 245 | "div[class]", 246 | `

`, 247 | []string{ 248 | `
`, 249 | `
`, 250 | `
`, 251 | }, 252 | }, 253 | { 254 | "div[class^=foO i]", 255 | `

`, 256 | []string{ 257 | `
`, 258 | `
`, 259 | }, 260 | }, 261 | { 262 | "div a", 263 | ` 264 |

265 |
266 | 267 |
268 |
269 |
270 | 271 |
272 |
273 | 274 |

275 | `, 276 | []string{ 277 | ``, 278 | ``, 279 | ``, 280 | }, 281 | }, 282 | { 283 | "div > a", 284 | ` 285 |

286 |
287 | 288 |
289 |
290 |
291 | 292 |
293 |
294 | 295 |

296 | `, 297 | []string{ 298 | ``, 299 | ``, 300 | }, 301 | }, 302 | { 303 | "div + a", 304 | ` 305 |

306 |
307 | 308 |
309 | 310 |

311 | 312 |

313 | `, 314 | []string{ 315 | ``, 316 | }, 317 | }, 318 | { 319 | "div ~ a", 320 | ` 321 |

322 |
323 | 324 |
325 | 326 |

327 | 328 |

329 | `, 330 | []string{ 331 | ``, 332 | ``, 333 | }, 334 | }, 335 | { 336 | "body p em", // https://github.com/ericchiang/css/issues/7 337 | ` 338 | 339 | 340 |

341 | 342 |

343 | 344 | 345 | `, 346 | []string{""}, 347 | }, 348 | { 349 | "div:empty", 350 | ` 351 |

352 |
353 | `, 354 | []string{`
`}, 355 | }, 356 | { 357 | ":root", 358 | ``, 359 | []string{``}, 360 | }, 361 | { 362 | "div:first-child", 363 | ` 364 |

365 |
366 |

367 |
368 |
369 |

370 | `, 371 | []string{ 372 | `

`, 373 | `
`, 374 | }, 375 | }, 376 | { 377 | "div:last-child", 378 | ` 379 |

380 |
381 |

382 |
383 |
384 |

385 | `, 386 | []string{ 387 | `
`, 388 | `
`, 389 | }, 390 | }, 391 | { 392 | "div:only-child", 393 | ` 394 |

395 |
396 |

397 |
398 |
399 |

400 | `, 401 | []string{ 402 | `
`, 403 | }, 404 | }, 405 | { 406 | ".test:first-of-type", 407 | ` 408 |

409 |
410 |

411 |
412 |
413 |

414 |

415 |
416 |

417 | `, 418 | []string{ 419 | `

`, 420 | `
`, 421 | `

`, 422 | }, 423 | }, 424 | { 425 | ".test:last-of-type", 426 | ` 427 |

428 |
429 |

430 |
431 |
432 |

433 |

434 |
435 |

436 | `, 437 | []string{ 438 | `
`, 439 | `

`, 440 | `

`, 441 | }, 442 | }, 443 | { 444 | ".test:only-of-type", 445 | ` 446 |

447 |
448 |

449 |
450 |
451 |

452 |

453 |
454 |

455 | `, 456 | []string{ 457 | `

`, 458 | }, 459 | }, 460 | { 461 | "li:nth-child(2)", 462 | ` 463 |
    464 |
  • 1
  • 465 |
  • 2
  • 466 |
  • 3
  • 467 |
  • 4
  • 468 |
  • 5
  • 469 |
  • 6
  • 470 |
  • 7
  • 471 |
  • 8
  • 472 |
473 | `, 474 | []string{ 475 | `
  • 2
  • `, 476 | }, 477 | }, 478 | { 479 | "li:nth-child(1n+2)", 480 | ` 481 |
      482 |
    • 1
    • 483 |
    • 2
    • 484 |
    • 3
    • 485 |
    • 4
    • 486 |
    • 5
    • 487 |
    • 6
    • 488 |
    • 7
    • 489 |
    • 8
    • 490 |
    491 | `, 492 | []string{ 493 | `
  • 2
  • `, 494 | `
  • 3
  • `, 495 | `
  • 4
  • `, 496 | `
  • 5
  • `, 497 | `
  • 6
  • `, 498 | `
  • 7
  • `, 499 | `
  • 8
  • `, 500 | }, 501 | }, 502 | { 503 | "li:nth-child(3n)", 504 | ` 505 |
      506 |
    • 1
    • 507 |
    • 2
    • 508 |
    • 3
    • 509 |
    • 4
    • 510 |
    • 5
    • 511 |
    • 6
    • 512 |
    • 7
    • 513 |
    • 8
    • 514 |
    515 | `, 516 | []string{ 517 | `
  • 3
  • `, 518 | `
  • 6
  • `, 519 | }, 520 | }, 521 | { 522 | "li:nth-child(3n+2)", 523 | ` 524 |
      525 |
    • 1
    • 526 |
    • 2
    • 527 |
    • 3
    • 528 |
    • 4
    • 529 |
    • 5
    • 530 |
    • 6
    • 531 |
    • 7
    • 532 |
    • 8
    • 533 |
    534 | `, 535 | []string{ 536 | `
  • 2
  • `, 537 | `
  • 5
  • `, 538 | `
  • 8
  • `, 539 | }, 540 | }, 541 | { 542 | "li:nth-child(3n+ 2)", 543 | ` 544 |
      545 |
    • 1
    • 546 |
    • 2
    • 547 |
    • 3
    • 548 |
    • 4
    • 549 |
    • 5
    • 550 |
    • 6
    • 551 |
    • 7
    • 552 |
    • 8
    • 553 |
    554 | `, 555 | []string{ 556 | `
  • 2
  • `, 557 | `
  • 5
  • `, 558 | `
  • 8
  • `, 559 | }, 560 | }, 561 | { 562 | "li:nth-child(3n - 2)", 563 | ` 564 |
      565 |
    • 1
    • 566 |
    • 2
    • 567 |
    • 3
    • 568 |
    • 4
    • 569 |
    • 5
    • 570 |
    • 6
    • 571 |
    • 7
    • 572 |
    • 8
    • 573 |
    574 | `, 575 | []string{ 576 | `
  • 1
  • `, 577 | `
  • 4
  • `, 578 | `
  • 7
  • `, 579 | }, 580 | }, 581 | { 582 | "li:nth-child(even)", 583 | ` 584 |
      585 |
    • 1
    • 586 |
    • 2
    • 587 |
    • 3
    • 588 |
    • 4
    • 589 |
    • 5
    • 590 |
    • 6
    • 591 |
    • 7
    • 592 |
    • 8
    • 593 |
    594 | `, 595 | []string{ 596 | `
  • 2
  • `, 597 | `
  • 4
  • `, 598 | `
  • 6
  • `, 599 | `
  • 8
  • `, 600 | }, 601 | }, 602 | { 603 | "li:nth-child(odd)", 604 | ` 605 |
      606 |
    • 1
    • 607 |
    • 2
    • 608 |
    • 3
    • 609 |
    • 4
    • 610 |
    • 5
    • 611 |
    • 6
    • 612 |
    • 7
    • 613 |
    • 8
    • 614 |
    615 | `, 616 | []string{ 617 | `
  • 1
  • `, 618 | `
  • 3
  • `, 619 | `
  • 5
  • `, 620 | `
  • 7
  • `, 621 | }, 622 | }, 623 | { 624 | "li:nth-last-child(2)", 625 | ` 626 |
      627 |
    • 1
    • 628 |
    • 2
    • 629 |
    • 3
    • 630 |
    • 4
    • 631 |
    • 5
    • 632 |
    • 6
    • 633 |
    • 7
    • 634 |
    • 8
    • 635 |
    636 | `, 637 | []string{ 638 | `
  • 7
  • `, 639 | }, 640 | }, 641 | { 642 | "li:nth-last-child(1n+2)", 643 | ` 644 |
      645 |
    • 1
    • 646 |
    • 2
    • 647 |
    • 3
    • 648 |
    • 4
    • 649 |
    • 5
    • 650 |
    • 6
    • 651 |
    • 7
    • 652 |
    • 8
    • 653 |
    654 | `, 655 | []string{ 656 | `
  • 1
  • `, 657 | `
  • 2
  • `, 658 | `
  • 3
  • `, 659 | `
  • 4
  • `, 660 | `
  • 5
  • `, 661 | `
  • 6
  • `, 662 | `
  • 7
  • `, 663 | }, 664 | }, 665 | { 666 | "li:nth-last-child(3n)", 667 | ` 668 |
      669 |
    • 1
    • 670 |
    • 2
    • 671 |
    • 3
    • 672 |
    • 4
    • 673 |
    • 5
    • 674 |
    • 6
    • 675 |
    • 7
    • 676 |
    • 8
    • 677 |
    678 | `, 679 | []string{ 680 | `
  • 3
  • `, 681 | `
  • 6
  • `, 682 | }, 683 | }, 684 | { 685 | "li:nth-last-child(3n+2)", 686 | ` 687 |
      688 |
    • 1
    • 689 |
    • 2
    • 690 |
    • 3
    • 691 |
    • 4
    • 692 |
    • 5
    • 693 |
    • 6
    • 694 |
    • 7
    • 695 |
    • 8
    • 696 |
    697 | `, 698 | []string{ 699 | `
  • 1
  • `, 700 | `
  • 4
  • `, 701 | `
  • 7
  • `, 702 | }, 703 | }, 704 | { 705 | "li:nth-last-child(3n+2)", 706 | ` 707 |
      708 |
    • 1
    • 709 |
    • 2
    • 710 |
    • 3
    • 711 |
    • 4
    • 712 |
    • 5
    • 713 |
    • 6
    • 714 |
    • 7
    • 715 |
    • 8
    • 716 |
    717 | `, 718 | []string{ 719 | `
  • 1
  • `, 720 | `
  • 4
  • `, 721 | `
  • 7
  • `, 722 | }, 723 | }, 724 | { 725 | "ul :nth-of-type(3n+2)", 726 | ` 727 |
      728 |

      729 |
    • 1
    • 730 |

      731 |
    • 2
    • 732 |
    • 3
    • 733 |
    • 4
    • 734 |
    • 5
    • 735 |
    • 6
    • 736 |
    • 7
    • 737 |
    • 8
    • 738 |
    739 | `, 740 | []string{ 741 | `

    `, 742 | `
  • 2
  • `, 743 | `
  • 5
  • `, 744 | `
  • 8
  • `, 745 | }, 746 | }, 747 | { 748 | "ul :nth-last-of-type(3n+2)", 749 | ` 750 |
      751 |

      752 |
    • 1
    • 753 |

      754 |
    • 2
    • 755 |
    • 3
    • 756 |
    • 4
    • 757 |
    • 5
    • 758 |
    • 6
    • 759 |
    • 7
    • 760 |
    • 8
    • 761 |
    762 | `, 763 | []string{ 764 | `

    `, 765 | `
  • 1
  • `, 766 | `
  • 4
  • `, 767 | `
  • 7
  • `, 768 | }, 769 | }, 770 | } 771 | 772 | func TestSelector(t *testing.T) { 773 | for _, test := range selectorTests { 774 | s, err := Parse(test.sel) 775 | if err != nil { 776 | t.Errorf("Parse(%q) failed %v", test.sel, err) 777 | continue 778 | } 779 | root, err := html.Parse(strings.NewReader(test.in)) 780 | if err != nil { 781 | t.Errorf("html.Parse(%q) failed %v", test.in, err) 782 | continue 783 | } 784 | 785 | // Re-render test case in case the parser is interpeting it differently than 786 | // we expect. 787 | b := &bytes.Buffer{} 788 | if err := html.Render(b, root); err != nil { 789 | t.Errorf("Re-rendering input %s: %v", test.in, err) 790 | continue 791 | } 792 | in := b.String() 793 | 794 | got := []string{} 795 | for _, n := range s.Select(root) { 796 | b := &bytes.Buffer{} 797 | if err := html.Render(b, n); err != nil { 798 | t.Errorf("Failed to render result of selecting %q from %s: %v", test.sel, in, err) 799 | continue 800 | } 801 | got = append(got, b.String()) 802 | } 803 | if diff := cmp.Diff(test.want, got); diff != "" { 804 | t.Errorf("Selecting %q (%s) from %s returned diff (-want, +got): %s", test.sel, s, in, diff) 805 | } 806 | } 807 | } 808 | 809 | func TestBadSelector(t *testing.T) { 810 | tests := []struct { 811 | sel string 812 | pos int 813 | }{ 814 | {":nth-child(3+4n)", 0}, 815 | } 816 | for _, test := range tests { 817 | _, err := Parse(test.sel) 818 | if err == nil { 819 | t.Errorf("Expected parsing %s to return error", test.sel) 820 | continue 821 | } 822 | var perr *ParseError 823 | if !errors.As(err, &perr) { 824 | t.Errorf("Expected parsing %s to return error of type *ParseError, got %T: %v", test.sel, err, err) 825 | continue 826 | } 827 | if test.pos != perr.Pos { 828 | t.Errorf("Parsing %s returned unexpected position, got=%d, want=%d", test.sel, perr.Pos, test.pos) 829 | } 830 | } 831 | } 832 | -------------------------------------------------------------------------------- /fuzz_test.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "golang.org/x/net/html" 8 | ) 9 | 10 | func FuzzParse(f *testing.F) { 11 | corpus := []string{ 12 | "*", 13 | "a", 14 | "ns|a", 15 | ".red", 16 | "#demo", 17 | "[attr]", 18 | "[attr=value]", 19 | "[herf~=foo]", 20 | "[herf|=foo]", 21 | "[herf^=foo]", 22 | "[herf$=foo]", 23 | "[herf*=foo]", 24 | "[herf=foo i]", 25 | "h1 a", 26 | "h1, a", 27 | "h1 > a", 28 | "h1 ~ a", 29 | "h1 + a", 30 | "h1:empty", 31 | "h1:first-child", 32 | "h1:first-of-type", 33 | "h1:last-child", 34 | "h1:last-of-type", 35 | "h1:only-child", 36 | "h1:only-of-type", 37 | "h1:root", 38 | "h1:nth-child(1n + 3)", 39 | "h1:nth-child(odd)", 40 | "h1:nth-child(even)", 41 | "h1:nth-child(1n)", 42 | "h1:nth-child(3)", 43 | "h1:nth-child(+3)", 44 | "h1:last-child(1n + 3)", 45 | "h1:last-of-type(1n + 3)", 46 | "h1:nth-of-type(1n + 3)", 47 | } 48 | for _, s := range corpus { 49 | f.Add(s) 50 | } 51 | f.Fuzz(func(t *testing.T, s string) { 52 | Parse(s) 53 | }) 54 | } 55 | 56 | func FuzzSelector(f *testing.F) { 57 | for _, test := range selectorTests { 58 | f.Add(test.sel, test.in) 59 | } 60 | f.Fuzz(func(t *testing.T, sel, in string) { 61 | s, err := Parse(sel) 62 | if err != nil { 63 | t.Skip() 64 | } 65 | root, err := html.Parse(strings.NewReader(in)) 66 | if err != nil { 67 | t.Skip() 68 | } 69 | s.Select(root) 70 | }) 71 | } 72 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ericchiang/css 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.6 7 | golang.org/x/net v0.0.0-20211216030914-fe4d6282115f 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= 2 | github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 3 | golang.org/x/net v0.0.0-20211216030914-fe4d6282115f h1:hEYJvxw1lSnWIl8X9ofsYMklzaDs90JI2az5YMd4fPM= 4 | golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 5 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 6 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 7 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 8 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 9 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 10 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 11 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 12 | -------------------------------------------------------------------------------- /lex.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "unicode/utf8" 8 | ) 9 | 10 | // lexer implements tokenization for CSS selectors. The algorithm follows the 11 | // spec recommentations. 12 | // 13 | // https://www.w3.org/TR/css-syntax-3/#tokenization 14 | type lexer struct { 15 | s string 16 | last int 17 | pos int 18 | } 19 | 20 | func newLexer(s string) *lexer { 21 | return &lexer{s, 0, 0} 22 | } 23 | 24 | const eof = 0 25 | 26 | func (l *lexer) peek() rune { 27 | if len(l.s) <= l.pos { 28 | return eof 29 | } 30 | r, _ := utf8.DecodeRuneInString(l.s[l.pos:]) 31 | if r == utf8.RuneError { 32 | return eof 33 | } 34 | return r 35 | } 36 | 37 | func (l *lexer) peekN(n int) rune { 38 | pos := l.pos 39 | var r rune 40 | for i := 0; i <= n; i++ { 41 | r = l.pop() 42 | } 43 | l.pos = pos 44 | return r 45 | } 46 | 47 | // push is the equivalent of "reconsume the current input code point". 48 | func (l *lexer) push(r rune) { 49 | l.pos -= utf8.RuneLen(r) 50 | } 51 | 52 | func (l *lexer) pop() rune { 53 | if len(l.s) <= l.pos { 54 | return eof 55 | } 56 | r, n := utf8.DecodeRuneInString(l.s[l.pos:]) 57 | if r == utf8.RuneError { 58 | return eof 59 | } 60 | l.pos += n 61 | return r 62 | } 63 | 64 | func (l *lexer) popN(n int) { 65 | for i := 0; i < n; i++ { 66 | l.pop() 67 | } 68 | } 69 | 70 | type tokenType int 71 | 72 | // Create a shorter type aliases so links to csswg.org don't wrap. 73 | type tt = tokenType 74 | 75 | const ( 76 | _ tt = iota 77 | tokenAtKeyword // https://drafts.csswg.org/css-syntax-3/#typedef-at-keyword-token 78 | tokenBracketClose // https://drafts.csswg.org/css-syntax-3/#tokendef-close-square 79 | tokenBracketOpen // https://drafts.csswg.org/css-syntax-3/#tokendef-open-square 80 | tokenCDC // https://drafts.csswg.org/css-syntax-3/#typedef-cdc-token 81 | tokenCDO // https://drafts.csswg.org/css-syntax-3/#typedef-cdo-token 82 | tokenColon // https://drafts.csswg.org/css-syntax-3/#typedef-colon-token 83 | tokenComma // https://drafts.csswg.org/css-syntax-3/#typedef-comma-token 84 | tokenCurlyClose // https://drafts.csswg.org/css-syntax-3/#tokendef-close-curly 85 | tokenCurlyOpen // https://drafts.csswg.org/css-syntax-3/#tokendef-open-curly 86 | tokenDelim // https://drafts.csswg.org/css-syntax-3/#typedef-delim-token 87 | tokenDimension // https://drafts.csswg.org/css-syntax-3/#typedef-dimension-token 88 | tokenEOF // https://drafts.csswg.org/css-syntax-3/#typedef-eof-token 89 | tokenFunction // https://drafts.csswg.org/css-syntax-3/#typedef-function-token 90 | tokenHash // https://drafts.csswg.org/css-syntax-3/#typedef-hash-token 91 | tokenIdent // https://www.w3.org/TR/css-syntax-3/#typedef-ident-token 92 | tokenNumber // https://drafts.csswg.org/css-syntax-3/#typedef-number-token 93 | tokenParenClose // https://drafts.csswg.org/css-syntax-3/#tokendef-close-paren 94 | tokenParenOpen // https://drafts.csswg.org/css-syntax-3/#tokendef-open-paren 95 | tokenPercent // https://drafts.csswg.org/css-syntax-3/#typedef-percentage-token 96 | tokenSemicolon // https://drafts.csswg.org/css-syntax-3/#typedef-semicolon-token 97 | tokenString // https://drafts.csswg.org/css-syntax-3/#typedef-string-token 98 | tokenURL // https://drafts.csswg.org/css-syntax-3/#typedef-url-token 99 | tokenWhitespace // https://drafts.csswg.org/css-syntax-3/#typedef-whitespace-token 100 | ) 101 | 102 | var tokenTypeString = map[tokenType]string{ 103 | tokenAtKeyword: "", 104 | tokenBracketClose: "<]-token>", 105 | tokenBracketOpen: "<[-token>", 106 | tokenCDC: "", 107 | tokenCDO: "", 108 | tokenColon: "", 109 | tokenComma: "", 110 | tokenCurlyClose: "<}-token>", 111 | tokenCurlyOpen: "<{-token>", 112 | tokenDelim: "", 113 | tokenDimension: "", 114 | tokenEOF: "", 115 | tokenFunction: "", 116 | tokenHash: "", 117 | tokenIdent: "", 118 | tokenNumber: "", 119 | tokenParenClose: "<)-token>", 120 | tokenParenOpen: "<(-token>", 121 | tokenPercent: "", 122 | tokenSemicolon: "", 123 | tokenString: "", 124 | tokenURL: "", 125 | tokenWhitespace: "", 126 | } 127 | 128 | func (t tokenType) String() string { 129 | if s, ok := tokenTypeString[t]; ok { 130 | return s 131 | } 132 | return fmt.Sprintf("<0x%x-token>", int(t)) 133 | } 134 | 135 | type token struct { 136 | typ tokenType 137 | raw string 138 | s string 139 | pos int 140 | flag tokenFlag 141 | dim string // dimension value, set by 142 | } 143 | 144 | func (t token) withDim(dim string) token { 145 | t.dim = dim 146 | return t 147 | } 148 | 149 | func (t token) withString(s string) token { 150 | t.s = s 151 | return t 152 | } 153 | 154 | func (t token) withFlag(flag tokenFlag) token { 155 | t.flag = flag 156 | return t 157 | } 158 | 159 | // tokenFlag holds "type flag" information about the token. 160 | type tokenFlag int 161 | 162 | const ( 163 | tokenFlagNone tokenFlag = iota 164 | tokenFlagInteger 165 | tokenFlagID 166 | tokenFlagNumber 167 | tokenFlagUnrestricted 168 | ) 169 | 170 | var tokenFlagString = map[tokenFlag]string{ 171 | tokenFlagNone: "(no flag set)", 172 | tokenFlagInteger: "type=integer", 173 | tokenFlagID: "type=id", 174 | tokenFlagNumber: "type=number", 175 | tokenFlagUnrestricted: "type=unrestricted", 176 | } 177 | 178 | func (t tokenFlag) String() string { 179 | if s, ok := tokenFlagString[t]; ok { 180 | return s 181 | } 182 | return fmt.Sprintf("tokenFlag(0x%x)", int(t)) 183 | } 184 | 185 | func (t token) String() string { 186 | return fmt.Sprintf("%s %q pos=%d", t.typ, t.s, t.pos) 187 | } 188 | 189 | func (t token) isDelim(s string) bool { 190 | return t.typ == tokenDelim && t.s == s 191 | } 192 | 193 | func (t token) isIdent(s string) bool { 194 | return t.typ == tokenIdent && t.s == s 195 | } 196 | 197 | type lexErr struct { 198 | msg string 199 | last int 200 | pos int 201 | } 202 | 203 | func (l *lexErr) Error() string { 204 | return l.msg 205 | } 206 | 207 | func (l *lexer) errorf(format string, v ...interface{}) error { 208 | return &lexErr{fmt.Sprintf(format, v...), l.last, l.pos} 209 | } 210 | 211 | func (l *lexer) token(typ tokenType) token { 212 | s := l.s[l.last:l.pos] 213 | t := token{typ, s, s, l.last, 0, ""} 214 | l.last = l.pos 215 | return t 216 | } 217 | 218 | // https://www.w3.org/TR/css-syntax-3/#consume-token 219 | func (l *lexer) next() (token, error) { 220 | r := l.pop() 221 | 222 | if isWhitespace(r) { 223 | for isWhitespace(l.peek()) { 224 | l.pop() 225 | } 226 | return l.token(tokenWhitespace), nil 227 | } 228 | 229 | if isDigit(r) { 230 | l.push(r) 231 | return l.numericToken() 232 | } 233 | 234 | if isNameStart(r) { 235 | l.push(r) 236 | return l.identLikeToken() 237 | } 238 | 239 | switch r { 240 | case '"', '\'': 241 | return l.string(r) 242 | case eof: 243 | return l.token(tokenEOF), nil 244 | case '#': 245 | if isName(l.peek()) || isValidEscape(l.peek(), l.peekN(1)) { 246 | var b strings.Builder 247 | b.WriteRune(r) 248 | if err := l.consumeName(&b); err != nil { 249 | return token{}, err 250 | } 251 | return l.token(tokenHash).withString(b.String()).withFlag(tokenFlagID), nil 252 | } 253 | return l.token(tokenDelim), nil 254 | case '(': 255 | return l.token(tokenParenOpen), nil 256 | case ')': 257 | return l.token(tokenParenClose), nil 258 | case '+': 259 | if isNumStart(r, l.peek(), l.peekN(1)) { 260 | l.push(r) 261 | return l.numericToken() 262 | } 263 | return l.token(tokenDelim), nil 264 | case ',': 265 | return l.token(tokenComma), nil 266 | case '-': 267 | if isNumStart(r, l.peek(), l.peekN(1)) { 268 | l.push(r) 269 | return l.numericToken() 270 | } 271 | if l.peek() == '-' && l.peekN(1) == '>' { 272 | l.popN(2) 273 | return l.token(tokenCDC), nil 274 | } 275 | if isIdentStart(r, l.peek(), l.peekN(1)) { 276 | l.push(r) 277 | return l.identLikeToken() 278 | } 279 | return l.token(tokenDelim), nil 280 | case '.': 281 | if isNumStart(r, l.peek(), l.peekN(1)) { 282 | l.push(r) 283 | return l.numericToken() 284 | } 285 | return l.token(tokenDelim), nil 286 | case ':': 287 | return l.token(tokenColon), nil 288 | case ';': 289 | return l.token(tokenSemicolon), nil 290 | case '<': 291 | if l.peek() == '!' && l.peekN(1) == '-' && l.peekN(2) == '-' { 292 | l.popN(3) 293 | return l.token(tokenCDO), nil 294 | } 295 | return l.token(tokenDelim), nil 296 | case '@': 297 | if isIdentStart(l.peek(), l.peekN(1), l.peekN(2)) { 298 | var b strings.Builder 299 | b.WriteRune(r) 300 | if err := l.consumeName(&b); err != nil { 301 | return token{}, err 302 | } 303 | return l.token(tokenAtKeyword).withString(b.String()), nil 304 | } 305 | return l.token(tokenDelim), nil 306 | case '[': 307 | return l.token(tokenBracketOpen), nil 308 | case '\\': 309 | if !isValidEscape(r, l.peek()) { 310 | return token{}, l.errorf("invalid escape character") 311 | } 312 | l.push(r) 313 | return l.identLikeToken() 314 | case ']': 315 | return l.token(tokenBracketClose), nil 316 | case '{': 317 | return l.token(tokenCurlyOpen), nil 318 | case '}': 319 | return l.token(tokenCurlyClose), nil 320 | } 321 | return l.token(tokenDelim), nil 322 | } 323 | 324 | // https://www.w3.org/TR/css-syntax-3/#consume-a-string-token 325 | func (l *lexer) string(quote rune) (token, error) { 326 | var b strings.Builder 327 | for { 328 | switch r := l.pop(); r { 329 | case quote: 330 | return l.token(tokenString).withString(b.String()), nil 331 | case eof: 332 | return token{}, l.errorf("unexpected eof parsing string") 333 | case '\n': 334 | return token{}, l.errorf("unexpected newline parsing string") 335 | case '\\': 336 | switch l.peek() { 337 | case eof: 338 | case '\n': 339 | return token{}, l.errorf("unexpected newline after '\\' parsing string") 340 | default: 341 | if err := l.consumeEscape(&b); err != nil { 342 | return token{}, l.errorf("parsing string: %v", err) 343 | } 344 | } 345 | default: 346 | b.WriteRune(r) 347 | } 348 | } 349 | } 350 | 351 | // https://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point 352 | func (l *lexer) consumeEscape(b *strings.Builder) error { 353 | r := l.pop() 354 | if r == eof { 355 | return l.errorf("unexpected newline after escape sequence") 356 | } 357 | if !isHex(r) { 358 | b.WriteRune(r) 359 | return nil 360 | } 361 | 362 | var hexRune strings.Builder 363 | n := 0 364 | for { 365 | r := l.peek() 366 | if isHex(r) { 367 | l.pop() 368 | n++ 369 | if n > 5 { 370 | return l.errorf("too many hex digits consuming escape sequence") 371 | } 372 | hexRune.WriteRune(r) 373 | continue 374 | } 375 | 376 | if isWhitespace(r) { 377 | l.pop() 378 | continue 379 | } 380 | 381 | s := hexRune.String() 382 | val, err := strconv.ParseUint(s, 16, 64) 383 | if err != nil { 384 | return l.errorf("failed to parse hex escape sequence %s: %v", s, err) 385 | } 386 | b.WriteRune(rune(val)) 387 | return nil 388 | } 389 | } 390 | 391 | // https://www.w3.org/TR/css-syntax-3/#consume-a-name 392 | func (l *lexer) consumeName(b *strings.Builder) error { 393 | for { 394 | r := l.peek() 395 | if isName(r) { 396 | b.WriteRune(l.pop()) 397 | continue 398 | } 399 | 400 | if isValidEscape(r, l.peekN(1)) { 401 | l.pop() 402 | if err := l.consumeEscape(b); err != nil { 403 | return err 404 | } 405 | continue 406 | } 407 | return nil 408 | } 409 | } 410 | 411 | // https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token 412 | func (l *lexer) numericToken() (token, error) { 413 | var b strings.Builder 414 | f := l.consumeNumber(&b) 415 | 416 | if isIdentStart(l.peek(), l.peekN(1), l.peekN(2)) { 417 | var dim strings.Builder 418 | if err := l.consumeName(&dim); err != nil { 419 | return token{}, err 420 | } 421 | return l.token(tokenDimension). 422 | withString(b.String()). 423 | withFlag(f). 424 | withDim(dim.String()), nil 425 | } 426 | 427 | if l.peek() == '%' { 428 | b.WriteRune(l.pop()) 429 | return l.token(tokenPercent).withString(b.String()).withFlag(tokenFlagNumber), nil 430 | } 431 | return l.token(tokenNumber).withString(b.String()).withFlag(f), nil 432 | } 433 | 434 | // https://www.w3.org/TR/css-syntax-3/#consume-an-ident-like-token 435 | func (l *lexer) identLikeToken() (token, error) { 436 | var b strings.Builder 437 | if l.startsURL(&b) { 438 | return l.consumeURL(&b) 439 | } 440 | 441 | if err := l.consumeName(&b); err != nil { 442 | return token{}, err 443 | } 444 | 445 | if l.peek() == '(' { 446 | b.WriteRune(l.pop()) 447 | return l.token(tokenFunction).withString(b.String()), nil 448 | } 449 | 450 | return l.token(tokenIdent).withString(b.String()), nil 451 | } 452 | 453 | func (l *lexer) startsURL(b *strings.Builder) bool { 454 | if !(l.peek() == 'u' || l.peek() == 'U') { 455 | return false 456 | } 457 | if !(l.peekN(1) == 'r' || l.peekN(1) == 'R') { 458 | return false 459 | } 460 | if !(l.peekN(2) == 'l' || l.peekN(2) == 'L') { 461 | return false 462 | } 463 | if l.peekN(3) != '(' { 464 | return false 465 | } 466 | 467 | // Consume up to two characters of whitespace. 468 | n := 4 469 | for i := 0; i < 2; i++ { 470 | if !isWhitespace(l.peekN(n)) { 471 | break 472 | } 473 | n++ 474 | } 475 | 476 | r1 := l.peekN(n) 477 | r2 := l.peekN(n + 1) 478 | 479 | r := r1 480 | if isWhitespace(r1) { 481 | r = r2 482 | } 483 | if r == '\'' || r == '"' { 484 | return false 485 | } 486 | 487 | for i := 0; i < 4; i++ { 488 | b.WriteRune(l.pop()) 489 | } 490 | return true 491 | } 492 | 493 | // https://www.w3.org/TR/css-syntax-3/#consume-a-url-token 494 | func (l *lexer) consumeURL(b *strings.Builder) (token, error) { 495 | for isWhitespace(l.peek()) { 496 | b.WriteRune(l.pop()) 497 | } 498 | 499 | for { 500 | r := l.pop() 501 | switch { 502 | case r == ')': 503 | b.WriteRune(r) 504 | return l.token(tokenURL).withString(b.String()), nil 505 | case r == eof: 506 | return token{}, l.errorf("unexpected eof parsing URL") 507 | case isWhitespace(r): 508 | b.WriteRune(r) 509 | for isWhitespace(l.peek()) { 510 | b.WriteRune(l.pop()) 511 | } 512 | r := l.pop() 513 | b.WriteRune(r) 514 | if r == ')' { 515 | return l.token(tokenURL).withString(b.String()), nil 516 | } 517 | return token{}, l.errorf("unexpected character parsing URL: %c", r) 518 | case r == '\'', r == '"', r == '(', isNonPrintable(r): 519 | return token{}, l.errorf("invalid character parsing URL: %c", r) 520 | case r == '\\': 521 | if !isValidEscape(r, l.peek()) { 522 | return token{}, l.errorf("invalid '\\' parsing URL") 523 | } 524 | if err := l.consumeEscape(b); err != nil { 525 | return token{}, l.errorf("invalid escape parsing URL: %v", err) 526 | } 527 | default: 528 | b.WriteRune(r) 529 | } 530 | } 531 | } 532 | 533 | // https://www.w3.org/TR/css-syntax-3/#consume-a-number 534 | func (l *lexer) consumeNumber(b *strings.Builder) tokenFlag { 535 | // 1. Initially set type to "integer". Let repr be the empty string. 536 | f := tokenFlagInteger 537 | 538 | // 2. If the next input code point is U+002B PLUS SIGN (+) or U+002D 539 | // HYPHEN-MINUS (-), consume it and append it to repr. 540 | if l.peek() == '+' || l.peek() == '-' { 541 | b.WriteRune(l.pop()) 542 | } 543 | 544 | // 3. While the next input code point is a digit, consume it and append 545 | // it to repr. 546 | for isDigit(l.peek()) { 547 | b.WriteRune(l.pop()) 548 | } 549 | 550 | // 4. If the next 2 input code points are U+002E FULL STOP (.) followed 551 | // by a digit, then: 552 | if l.peek() == '.' && isDigit(l.peekN(1)) { 553 | // Consume them. 554 | // Append them to repr. 555 | b.WriteRune(l.pop()) 556 | b.WriteRune(l.pop()) 557 | f = tokenFlagNumber 558 | 559 | // While the next input code point is a digit, consume it and append 560 | // it to repr. 561 | for isDigit(l.peek()) { 562 | b.WriteRune(l.pop()) 563 | } 564 | } 565 | 566 | r1 := l.peek() 567 | r2 := l.peekN(1) 568 | r3 := l.peekN(2) 569 | 570 | // 5. If the next 2 or 3 input code points are U+0045 LATIN CAPITAL LETTER 571 | // E (E) or U+0065 LATIN SMALL LETTER E (e), optionally followed by U+002D 572 | // HYPHEN-MINUS (-) or U+002B PLUS SIGN (+), followed by a digit, then: 573 | if r1 == 'E' || r1 == 'e' { 574 | // Set type to "number". 575 | f = tokenFlagNumber 576 | if isDigit(r2) { 577 | b.WriteRune(l.pop()) 578 | b.WriteRune(l.pop()) 579 | 580 | for isDigit(l.peek()) { 581 | b.WriteRune(l.pop()) 582 | } 583 | } else if (r2 == '+' || r2 == '-') && isDigit(r3) { 584 | b.WriteRune(l.pop()) 585 | b.WriteRune(l.pop()) 586 | b.WriteRune(l.pop()) 587 | 588 | for isDigit(l.peek()) { 589 | b.WriteRune(l.pop()) 590 | } 591 | } 592 | } 593 | return f 594 | } 595 | 596 | // https://www.w3.org/TR/css-syntax-3/#whitespace 597 | func isWhitespace(r rune) bool { 598 | switch r { 599 | case '\n', '\t', ' ': 600 | return true 601 | default: 602 | return false 603 | } 604 | } 605 | 606 | // https://www.w3.org/TR/css-syntax-3/#hex-digit 607 | func isHex(r rune) bool { 608 | return isDigit(r) || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f') 609 | } 610 | 611 | // https://www.w3.org/TR/css-syntax-3/#digit 612 | func isDigit(r rune) bool { 613 | return '0' <= r && r <= '9' 614 | } 615 | 616 | // https://www.w3.org/TR/css-syntax-3/#letter 617 | func isLetter(r rune) bool { 618 | return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z') 619 | } 620 | 621 | // https://www.w3.org/TR/css-syntax-3/#non-ascii-code-point 622 | func isNonASCII(r rune) bool { 623 | return r > 0x80 624 | } 625 | 626 | // https://www.w3.org/TR/css-syntax-3/#name-code-point 627 | func isName(r rune) bool { 628 | return isNameStart(r) || isDigit(r) || r == '-' 629 | } 630 | 631 | // https://www.w3.org/TR/css-syntax-3/#name-start-code-point 632 | func isNameStart(r rune) bool { 633 | return isLetter(r) || isNonASCII(r) || r == '_' 634 | } 635 | 636 | // https://www.w3.org/TR/css-syntax-3/#check-if-three-code-points-would-start-a-number 637 | func isNumStart(r1, r2, r3 rune) bool { 638 | if r1 == '+' || r1 == '-' { 639 | if isDigit(r2) { 640 | return true 641 | } 642 | if r2 == '.' && isDigit(r3) { 643 | return true 644 | } 645 | return false 646 | } 647 | 648 | if r1 == '.' { 649 | return isDigit(r2) 650 | } 651 | return isDigit(r1) 652 | } 653 | 654 | // https://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escape 655 | func isValidEscape(r1, r2 rune) bool { 656 | if r1 != '\\' { 657 | return false 658 | } 659 | if r2 == '\n' || r2 == eof { 660 | return false 661 | } 662 | return true 663 | } 664 | 665 | // https://www.w3.org/TR/css-syntax-3/#check-if-three-code-points-would-start-an-identifier 666 | func isIdentStart(r1, r2, r3 rune) bool { 667 | if r1 == '-' { 668 | if isNameStart(r2) { 669 | return true 670 | } 671 | if isValidEscape(r2, r3) { 672 | return true 673 | } 674 | } 675 | if isNameStart(r1) { 676 | return true 677 | } 678 | if r1 == '\\' && isValidEscape(r1, r2) { 679 | return true 680 | } 681 | return false 682 | } 683 | 684 | func isNonPrintable(r rune) bool { 685 | if 0x0 <= r && r <= 0x8 { 686 | return true 687 | } 688 | if r == '\t' { 689 | return true 690 | } 691 | if 0xe <= r && r <= 0x1f { 692 | return true 693 | } 694 | if r == 0x7F { 695 | return true 696 | } 697 | return false 698 | } 699 | -------------------------------------------------------------------------------- /lex_test.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func tok(typ tokenType, s ...string) token { 9 | switch len(s) { 10 | case 1: 11 | return token{typ: typ, raw: s[0], s: s[0]} 12 | case 2: 13 | return token{typ: typ, raw: s[0], s: s[1]} 14 | } 15 | panic("invalid number of arguments") 16 | } 17 | 18 | func TestLexer(t *testing.T) { 19 | tests := []struct { 20 | s string 21 | want []token 22 | }{ 23 | { 24 | " ", 25 | []token{ 26 | tok(tokenWhitespace, " "), 27 | }, 28 | }, 29 | { 30 | " \t\n", 31 | []token{ 32 | tok(tokenWhitespace, " \t\n"), 33 | }, 34 | }, 35 | { 36 | " \"hello\" ", 37 | []token{ 38 | tok(tokenWhitespace, " "), 39 | tok(tokenString, "\"hello\"", "hello"), 40 | tok(tokenWhitespace, " "), 41 | }, 42 | }, 43 | { 44 | ` "\{" `, 45 | []token{ 46 | tok(tokenWhitespace, " "), 47 | tok(tokenString, `"\{"`, "{"), 48 | tok(tokenWhitespace, " "), 49 | }, 50 | }, 51 | { 52 | ` "\0af" `, 53 | []token{ 54 | tok(tokenWhitespace, " "), 55 | tok(tokenString, `"\0af"`, "¯"), 56 | tok(tokenWhitespace, " "), 57 | }, 58 | }, 59 | { 60 | ` "\0a f" `, 61 | []token{ 62 | tok(tokenWhitespace, " "), 63 | tok(tokenString, `"\0a f"`, "¯"), 64 | tok(tokenWhitespace, " "), 65 | }, 66 | }, 67 | { 68 | `# "foo"`, 69 | []token{ 70 | tok(tokenDelim, "#"), 71 | tok(tokenWhitespace, " "), 72 | tok(tokenString, `"foo"`, "foo"), 73 | }, 74 | }, 75 | { 76 | `#foo`, 77 | []token{ 78 | tok(tokenHash, "#foo").withFlag(tokenFlagID), 79 | }, 80 | }, 81 | { 82 | `#\0100`, 83 | []token{ 84 | tok(tokenHash, `#\0100`, "#Ā").withFlag(tokenFlagID), 85 | }, 86 | }, 87 | { 88 | `#foo()`, 89 | []token{ 90 | tok(tokenHash, "#foo").withFlag(tokenFlagID), 91 | tok(tokenParenOpen, "("), 92 | tok(tokenParenClose, ")"), 93 | }, 94 | }, 95 | { 96 | `+`, 97 | []token{ 98 | tok(tokenDelim, "+"), 99 | }, 100 | }, 101 | { 102 | `+1`, 103 | []token{ 104 | tok(tokenNumber, "+1").withFlag(tokenFlagInteger), 105 | }, 106 | }, 107 | { 108 | `+1.1 +1.11e11 +1.11e+11 +`, 109 | []token{ 110 | tok(tokenNumber, "+1.1").withFlag(tokenFlagNumber), 111 | tok(tokenWhitespace, " "), 112 | tok(tokenNumber, "+1.11e11").withFlag(tokenFlagNumber), 113 | tok(tokenWhitespace, " "), 114 | tok(tokenNumber, "+1.11e+11").withFlag(tokenFlagNumber), 115 | tok(tokenWhitespace, " "), 116 | tok(tokenDelim, "+"), 117 | }, 118 | }, 119 | { 120 | `+1cm`, 121 | []token{ 122 | tok(tokenDimension, "+1cm").withString("+1").withDim("cm").withFlag(tokenFlagInteger), 123 | }, 124 | }, 125 | { 126 | `+50%`, 127 | []token{ 128 | tok(tokenPercent, "+50%").withFlag(tokenFlagNumber), 129 | }, 130 | }, 131 | { 132 | `,`, 133 | []token{ 134 | tok(tokenComma, ","), 135 | }, 136 | }, 137 | { 138 | `-1.1 -1.11e11 --> -1.11e-11 -`, 139 | []token{ 140 | tok(tokenNumber, "-1.1").withFlag(tokenFlagNumber), 141 | tok(tokenWhitespace, " "), 142 | tok(tokenNumber, "-1.11e11").withFlag(tokenFlagNumber), 143 | tok(tokenWhitespace, " "), 144 | tok(tokenCDC, "-->"), 145 | tok(tokenWhitespace, " "), 146 | tok(tokenNumber, "-1.11e-11").withFlag(tokenFlagNumber), 147 | tok(tokenWhitespace, " "), 148 | tok(tokenDelim, "-"), 149 | }, 150 | }, 151 | { 152 | `.1 .11e11 .11e-11 .`, 153 | []token{ 154 | tok(tokenNumber, ".1").withFlag(tokenFlagNumber), 155 | tok(tokenWhitespace, " "), 156 | tok(tokenNumber, ".11e11").withFlag(tokenFlagNumber), 157 | tok(tokenWhitespace, " "), 158 | tok(tokenNumber, ".11e-11").withFlag(tokenFlagNumber), 159 | tok(tokenWhitespace, " "), 160 | tok(tokenDelim, "."), 161 | }, 162 | }, 163 | { 164 | `:;`, 165 | []token{ 166 | tok(tokenColon, ":"), 167 | tok(tokenSemicolon, ";"), 168 | }, 169 | }, 170 | { 171 | `<