├── script └── cibuild ├── go.mod ├── wildmatch_casefold.go ├── wildmatch_nocasefold.go ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── SECURITY.md ├── README.md ├── LICENSE.md ├── package.go ├── wildmatch_test.go └── wildmatch.go /script/cibuild: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | go test ./... 4 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/git-lfs/wildmatch/v2 2 | 3 | go 1.15 4 | -------------------------------------------------------------------------------- /wildmatch_casefold.go: -------------------------------------------------------------------------------- 1 | // +build windows darwin 2 | 3 | package wildmatch 4 | 5 | func init() { 6 | SystemCase = CaseFold 7 | } 8 | -------------------------------------------------------------------------------- /wildmatch_nocasefold.go: -------------------------------------------------------------------------------- 1 | // +build !windows,!darwin 2 | 3 | package wildmatch 4 | 5 | func init() { 6 | SystemCase = func(w *Wildmatch) {} 7 | } 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "monthly" 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | Please see 2 | [SECURITY.md](https://github.com/git-lfs/git-lfs/blob/main/SECURITY.md) 3 | in the main Git LFS repository for information on how to report security 4 | vulnerabilities in this package. 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push, pull_request] 3 | env: 4 | GOTOOLCHAIN: local 5 | 6 | jobs: 7 | build-go: 8 | name: Default build 9 | strategy: 10 | matrix: 11 | go: ['1.20.x', '1.21.x'] 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v5 15 | - uses: actions/setup-go@v6 16 | with: 17 | go-version: ${{ matrix.go }} 18 | - run: script/cibuild 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wildmatch 2 | 3 | [![CI status][ci_badge]][ci_url] 4 | 5 | [ci_badge]: https://github.com/git-lfs/wildmatch/workflows/CI/badge.svg 6 | [ci_url]: https://github.com/git-lfs/wildmatch/actions?query=workflow%3ACI 7 | 8 | package `wildmatch` is a reimplementation of Git's `wildmatch.c`-style filepath pattern matching. 9 | 10 | For more information, see the [godoc][1]. 11 | 12 | [1]: https://godoc.org/github.com/git-lfs/wildmatch 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018- GitHub, Inc. and Git LFS contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.go: -------------------------------------------------------------------------------- 1 | // package Wildmatch is an implementation of Git's wildmatch.c-style pattern 2 | // matching. 3 | // 4 | // Wildmatch patterns are comprised of any combination of the following three 5 | // components: 6 | // 7 | // - String literals. A string literal is "foo", or "foo\*" (matching "foo", 8 | // and "foo\", respectively). In general, string literals match their exact 9 | // contents in a filepath, and cannot match over directories unless they 10 | // include the operating system-specific path separator. 11 | // 12 | // - Wildcards. There are three types of wildcards: 13 | // 14 | // - Single-asterisk ('*'): matches any combination of characters, any 15 | // number of times. Does not match path separators. 16 | // 17 | // - Single-question mark ('?'): matches any single character, but not a 18 | // path separator. 19 | // 20 | // - Double-asterisk ('**'): greedily matches any number of directories. 21 | // For example, '**/foo' matches '/foo', 'bar/baz/woot/foot', but not 22 | // 'foo/bar'. Double-asterisks must be separated by filepath separators 23 | // on either side. 24 | // 25 | // - Character groups. A character group is composed of a set of included and 26 | // excluded character types. The set of included character types begins the 27 | // character group, and a '^' or '!' separates it from the set of excluded 28 | // character types. 29 | // 30 | // A character type can be one of the following: 31 | // 32 | // - Character literal: a single character, i.e., 'c'. 33 | // 34 | // - Character group: a group of characters, i.e., '[:alnum:]', etc. 35 | // 36 | // - Character range: a range of characters, i.e., 'a-z'. 37 | // 38 | // A Wildmatch pattern can be any combination of the above components, in any 39 | // ordering, and repeated any number of times. 40 | package wildmatch 41 | -------------------------------------------------------------------------------- /wildmatch_test.go: -------------------------------------------------------------------------------- 1 | package wildmatch 2 | 3 | import ( 4 | "runtime" 5 | "testing" 6 | ) 7 | 8 | type Case struct { 9 | Pattern string 10 | Subject string 11 | Match bool 12 | Opts []opt 13 | MatchOpts MatchOpts 14 | } 15 | 16 | func (c *Case) Assert(t *testing.T) { 17 | defer func() { 18 | if err := recover(); err != nil { 19 | if c.Match { 20 | t.Errorf("could not parse: %s (%s)", c.Pattern, err) 21 | } 22 | } 23 | }() 24 | 25 | p := NewWildmatch(c.Pattern, c.Opts...) 26 | if (c.MatchOpts != MatchOpts{} && p.MatchWithOpts(c.Subject, c.MatchOpts) != c.Match) || 27 | (c.MatchOpts == MatchOpts{} && p.Match(c.Subject) != c.Match) { 28 | if c.Match { 29 | t.Errorf("expected match: %s, %s", c.Pattern, c.Subject) 30 | } else { 31 | t.Errorf("unexpected match: %s, %s", c.Pattern, c.Subject) 32 | } 33 | } 34 | } 35 | 36 | var Cases = []*Case{ 37 | { 38 | Pattern: `foo`, 39 | Subject: `foo`, 40 | Match: true, 41 | }, 42 | { 43 | Pattern: `bar`, 44 | Subject: `foo`, 45 | Match: false, 46 | }, 47 | { 48 | Pattern: `???`, 49 | Subject: `foo`, 50 | Match: true, 51 | }, 52 | { 53 | Pattern: `??`, 54 | Subject: `foo`, 55 | Match: false, 56 | }, 57 | { 58 | Pattern: `*`, 59 | Subject: `foo`, 60 | Match: true, 61 | }, 62 | { 63 | Pattern: `f*`, 64 | Subject: `foo`, 65 | Match: true, 66 | }, 67 | { 68 | Pattern: `*f`, 69 | Subject: `foo`, 70 | Match: false, 71 | }, 72 | { 73 | Pattern: `*foo*`, 74 | Subject: `foo`, 75 | Match: true, 76 | }, 77 | { 78 | Pattern: `*ob*a*r*`, 79 | Subject: `foobar`, 80 | Match: true, 81 | }, 82 | { 83 | Pattern: `*ab`, 84 | Subject: `aaaaaaabababab`, 85 | Match: true, 86 | }, 87 | { 88 | Pattern: `foo\*`, 89 | Subject: `foo*`, 90 | Match: true, 91 | }, 92 | { 93 | Pattern: `foo\*bar`, 94 | Subject: `foobar`, 95 | Match: false, 96 | }, 97 | { 98 | Pattern: `f\\oo`, 99 | Subject: `f\oo`, 100 | Match: true, 101 | }, 102 | { 103 | Pattern: `*[al]?`, 104 | Subject: `ball`, 105 | Match: true, 106 | }, 107 | { 108 | Pattern: `[ten]`, 109 | Subject: `ten`, 110 | Match: false, 111 | }, 112 | { 113 | Pattern: `**[!te]`, 114 | Subject: `ten`, 115 | Match: true, 116 | }, 117 | { 118 | Pattern: `**[!ten]`, 119 | Subject: `ten`, 120 | Match: false, 121 | }, 122 | { 123 | Pattern: `t[a-g]n`, 124 | Subject: `ten`, 125 | Match: true, 126 | }, 127 | { 128 | Pattern: `t[!a-g]n`, 129 | Subject: `ten`, 130 | Match: false, 131 | }, 132 | { 133 | Pattern: `t[!a-g]n`, 134 | Subject: `ton`, 135 | Match: true, 136 | }, 137 | { 138 | Pattern: `t[^a-g]n`, 139 | Subject: `ton`, 140 | Match: true, 141 | }, 142 | { 143 | Pattern: `]`, 144 | Subject: `]`, 145 | Match: true, 146 | }, 147 | { 148 | Pattern: `foo*bar`, 149 | Subject: `foo/baz/bar`, 150 | Match: false, 151 | }, 152 | { 153 | Pattern: `foo?bar`, 154 | Subject: `foo/bar`, 155 | Match: false, 156 | }, 157 | { 158 | Pattern: `foo[/]bar`, 159 | Subject: `foo/bar`, 160 | Match: false, 161 | }, 162 | { 163 | Pattern: `f[^eiu][^eiu][^eiu][^eiu][^eiu]r`, 164 | Subject: `foo/bar`, 165 | Match: false, 166 | }, 167 | { 168 | Pattern: `f[^eiu][^eiu][^eiu][^eiu][^eiu]r`, 169 | Subject: `foo-bar`, 170 | Match: true, 171 | }, 172 | { 173 | Pattern: `**/foo`, 174 | Subject: `foo`, 175 | Match: true, 176 | }, 177 | { 178 | Pattern: `**/foo`, 179 | Subject: `/foo`, 180 | Match: true, 181 | }, 182 | { 183 | Pattern: `**/foo`, 184 | Subject: `bar/baz/foo`, 185 | Match: true, 186 | }, 187 | { 188 | Pattern: `*/foo`, 189 | Subject: `bar/baz/foo`, 190 | Match: false, 191 | }, 192 | { 193 | Pattern: `**/bar*`, 194 | Subject: `foo/bar/baz`, 195 | Match: false, 196 | }, 197 | { 198 | Pattern: `**/bar/*`, 199 | Subject: `deep/foo/bar/baz`, 200 | Match: true, 201 | }, 202 | { 203 | Pattern: `**/bar/*`, 204 | Subject: `deep/foo/bar/baz/`, 205 | Match: true, 206 | }, 207 | { 208 | Pattern: `**/bar/**`, 209 | Subject: `deep/foo/bar/baz/`, 210 | Match: true, 211 | }, 212 | { 213 | Pattern: `**/bar/*`, 214 | Subject: `deep/foo/bar`, 215 | Match: false, 216 | }, 217 | { 218 | Pattern: `**/bar/**`, 219 | Subject: `deep/foo/bar/`, 220 | Match: true, 221 | }, 222 | { 223 | Pattern: `**/bar/**`, 224 | Subject: `deep/foo/bar`, 225 | Match: false, 226 | }, 227 | { 228 | Pattern: `**/bar/**/*`, 229 | Subject: `deep/foo/bar/`, 230 | Match: true, 231 | }, 232 | { 233 | Pattern: `**/bar/**/*`, 234 | Subject: `deep/foo/bar`, 235 | Match: false, 236 | }, 237 | { 238 | Pattern: `**/bar/**/*`, 239 | Subject: `deep/bar/bar`, 240 | Match: false, 241 | }, 242 | { 243 | Pattern: `*/bar/**`, 244 | Subject: `foo/bar/baz/x`, 245 | Match: true, 246 | }, 247 | { 248 | Pattern: `*/bar/**`, 249 | Subject: `deep/foo/bar/baz/x`, 250 | Match: false, 251 | }, 252 | { 253 | Pattern: `**/bar/*/*`, 254 | Subject: `deep/foo/bar/baz/x`, 255 | Match: true, 256 | }, 257 | { 258 | Pattern: `*.txt`, 259 | Subject: `foo/bar/baz.txt`, 260 | Match: false, 261 | }, 262 | { 263 | Pattern: `*.txt`, 264 | Subject: `你好-世界.txt`, 265 | Match: true, 266 | }, 267 | { 268 | Pattern: `你好-世界.txt`, 269 | Subject: `你好-世界.txt`, 270 | Match: true, 271 | }, 272 | { 273 | Pattern: `foo*`, 274 | Subject: `foobar`, 275 | Match: true, 276 | }, 277 | { 278 | Pattern: `*foo*`, 279 | Subject: `somethingfoobar`, 280 | Match: true, 281 | }, 282 | { 283 | Pattern: `*foo`, 284 | Subject: `barfoo`, 285 | Match: true, 286 | }, 287 | { 288 | Pattern: `a[c-c]st`, 289 | Subject: `acrt`, 290 | Match: false, 291 | }, 292 | { 293 | Pattern: `a[c-c]rt`, 294 | Subject: `acrt`, 295 | Match: true, 296 | }, 297 | { 298 | Pattern: `\`, 299 | Subject: `''`, 300 | Match: false, 301 | }, 302 | { 303 | Pattern: `\`, 304 | Subject: `\`, 305 | Match: false, 306 | }, 307 | { 308 | Pattern: `*/\`, 309 | Subject: `/\`, 310 | Match: false, 311 | }, 312 | { 313 | Pattern: `foo`, 314 | Subject: `foo`, 315 | Match: true, 316 | }, 317 | { 318 | Pattern: `@foo`, 319 | Subject: `@foo`, 320 | Match: true, 321 | }, 322 | { 323 | Pattern: `@foo`, 324 | Subject: `foo`, 325 | Match: false, 326 | }, 327 | { 328 | Pattern: `\[ab]`, 329 | Subject: `[ab]`, 330 | Match: true, 331 | }, 332 | { 333 | Pattern: `[[]ab]`, 334 | Subject: `[ab]`, 335 | Match: true, 336 | }, 337 | { 338 | Pattern: `[[:]ab]`, 339 | Subject: `[ab]`, 340 | Match: true, 341 | }, 342 | { 343 | Pattern: `[[::]ab]`, 344 | Subject: `[ab]`, 345 | Match: false, 346 | }, 347 | { 348 | Pattern: `[[:digit]ab]`, 349 | Subject: `[ab]`, 350 | Match: false, 351 | }, 352 | { 353 | Pattern: `[\[:]ab]`, 354 | Subject: `[ab]`, 355 | Match: true, 356 | }, 357 | { 358 | Pattern: `\??\?b`, 359 | Subject: `?a?b`, 360 | Match: true, 361 | }, 362 | { 363 | Pattern: `''`, 364 | Subject: `foo`, 365 | Match: false, 366 | }, 367 | { 368 | Pattern: `**/t[o]`, 369 | Subject: `foo/bar/baz/to`, 370 | Match: true, 371 | }, 372 | { 373 | Pattern: `[[:alpha:]][[:digit:]][[:upper:]]`, 374 | Subject: `a1B`, 375 | Match: true, 376 | }, 377 | { 378 | Pattern: `[[:digit:][:upper:][:space:]]`, 379 | Subject: `a`, 380 | Match: false, 381 | }, 382 | { 383 | Pattern: `[[:digit:][:upper:][:space:]]`, 384 | Subject: `A`, 385 | Match: true, 386 | }, 387 | { 388 | Pattern: `[[:digit:][:upper:][:space:]]`, 389 | Subject: `1`, 390 | Match: true, 391 | }, 392 | { 393 | Pattern: `[[:digit:][:upper:][:spaci:]]`, 394 | Subject: `1`, 395 | Match: false, 396 | }, 397 | { 398 | Pattern: `'`, 399 | Subject: `'`, 400 | Match: true, 401 | }, 402 | { 403 | Pattern: `[[:digit:][:upper:][:space:]]`, 404 | Subject: `.`, 405 | Match: false, 406 | }, 407 | { 408 | Pattern: `[[:digit:][:punct:][:space:]]`, 409 | Subject: `.`, 410 | Match: true, 411 | }, 412 | { 413 | Pattern: `[[:xdigit:]]`, 414 | Subject: `5`, 415 | Match: true, 416 | }, 417 | { 418 | Pattern: `[[:xdigit:]]`, 419 | Subject: `f`, 420 | Match: true, 421 | }, 422 | { 423 | Pattern: `[[:xdigit:]]`, 424 | Subject: `D`, 425 | Match: true, 426 | }, 427 | { 428 | Pattern: `[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]`, 429 | Subject: `_`, 430 | Match: true, 431 | }, 432 | { 433 | Pattern: `[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]`, 434 | Subject: `.`, 435 | Match: true, 436 | }, 437 | { 438 | Pattern: `[a-c[:digit:]x-z]`, 439 | Subject: `5`, 440 | Match: true, 441 | }, 442 | { 443 | Pattern: `[a-c[:digit:]x-z]`, 444 | Subject: `b`, 445 | Match: true, 446 | }, 447 | { 448 | Pattern: `[a-c[:digit:]x-z]`, 449 | Subject: `y`, 450 | Match: true, 451 | }, 452 | { 453 | Pattern: `[a-c[:digit:]x-z]`, 454 | Subject: `q`, 455 | Match: false, 456 | }, 457 | { 458 | Pattern: `[\\-^]`, 459 | Subject: `]`, 460 | Match: true, 461 | }, 462 | { 463 | Pattern: `[\\-^]`, 464 | Subject: `[`, 465 | Match: false, 466 | }, 467 | { 468 | Pattern: `a[]b`, 469 | Subject: `ab`, 470 | Match: false, 471 | }, 472 | { 473 | Pattern: `a[]b`, 474 | Subject: `a[]b`, 475 | Match: false, 476 | }, 477 | { 478 | Pattern: `[!`, 479 | Subject: `ab`, 480 | Match: false, 481 | }, 482 | { 483 | Pattern: `[-`, 484 | Subject: `ab`, 485 | Match: false, 486 | }, 487 | { 488 | Pattern: `[-]`, 489 | Subject: `-`, 490 | Match: true, 491 | }, 492 | { 493 | Pattern: `[a-`, 494 | Subject: `-`, 495 | Match: false, 496 | }, 497 | { 498 | Pattern: `[!a-`, 499 | Subject: `-`, 500 | Match: false, 501 | }, 502 | { 503 | Pattern: `'`, 504 | Subject: `'`, 505 | Match: true, 506 | }, 507 | { 508 | Pattern: `'[`, 509 | Subject: `0`, 510 | Match: false, 511 | }, 512 | { 513 | Pattern: `[---]`, 514 | Subject: `-`, 515 | Match: true, 516 | }, 517 | { 518 | Pattern: `[------]`, 519 | Subject: `-`, 520 | Match: true, 521 | }, 522 | { 523 | Pattern: `[!------]`, 524 | Subject: `a`, 525 | Match: true, 526 | }, 527 | { 528 | Pattern: `[a^bc]`, 529 | Subject: `^`, 530 | Match: true, 531 | }, 532 | { 533 | Pattern: `[\]`, 534 | Subject: `\`, 535 | Match: false, 536 | }, 537 | { 538 | Pattern: `[\\]`, 539 | Subject: `\`, 540 | Match: true, 541 | }, 542 | { 543 | Pattern: `[!\\]`, 544 | Subject: `\`, 545 | Match: false, 546 | }, 547 | { 548 | Pattern: `[A-\\]`, 549 | Subject: `G`, 550 | Match: true, 551 | }, 552 | { 553 | Pattern: `b*a`, 554 | Subject: `aaabbb`, 555 | Match: false, 556 | }, 557 | { 558 | Pattern: `*ba*`, 559 | Subject: `aabcaa`, 560 | Match: false, 561 | }, 562 | { 563 | Pattern: `[,]`, 564 | Subject: `,`, 565 | Match: true, 566 | }, 567 | { 568 | Pattern: `[\\,]`, 569 | Subject: `,`, 570 | Match: true, 571 | }, 572 | { 573 | Pattern: `[\\,]`, 574 | Subject: `\`, 575 | Match: true, 576 | }, 577 | { 578 | Pattern: `[,-.]`, 579 | Subject: `-`, 580 | Match: true, 581 | }, 582 | { 583 | Pattern: `[,-.]`, 584 | Subject: `+`, 585 | Match: false, 586 | }, 587 | { 588 | Pattern: `[,-.]`, 589 | Subject: `-.]`, 590 | Match: false, 591 | }, 592 | { 593 | Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, 594 | Subject: `-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1`, 595 | Match: true, 596 | }, 597 | { 598 | Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, 599 | Subject: `-adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1`, 600 | Match: false, 601 | }, 602 | { 603 | Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, 604 | Subject: `-adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1`, 605 | Match: false, 606 | }, 607 | { 608 | Pattern: `**/*a*b*g*n*t`, 609 | Subject: `abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt`, 610 | Match: true, 611 | }, 612 | { 613 | Pattern: `**/*a*b*g*n*t`, 614 | Subject: `abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz`, 615 | Match: false, 616 | }, 617 | { 618 | Pattern: `file[[:space:]]with[[:space:]]spaces.\#`, 619 | Subject: `file with spaces.#`, 620 | Match: true, 621 | }, 622 | { 623 | Pattern: `foo`, 624 | Subject: `FOO`, 625 | Match: false, 626 | }, 627 | { 628 | Pattern: `foo`, 629 | Subject: `FOO`, 630 | Opts: []opt{CaseFold}, 631 | Match: true, 632 | }, 633 | { 634 | Pattern: `**/a*.txt`, 635 | Subject: `foo-a.txt`, 636 | Match: false, 637 | }, 638 | { 639 | Pattern: `*.txt`, 640 | Subject: `file.txt`, 641 | Opts: []opt{Basename}, 642 | Match: true, 643 | }, 644 | { 645 | Pattern: `file.txt`, 646 | Subject: `file.txt`, 647 | Opts: []opt{Basename, Contents}, 648 | Match: true, 649 | }, 650 | { 651 | Pattern: `*.txt`, 652 | Subject: `path/to/file.txt`, 653 | Opts: []opt{Basename}, 654 | Match: true, 655 | }, 656 | { 657 | Pattern: `path/to/*.txt`, 658 | Subject: `path/to/file.txt`, 659 | Opts: []opt{Basename}, 660 | Match: true, 661 | }, 662 | { 663 | Pattern: `path/to/*.txt`, 664 | Subject: `path/to/file.txt`, 665 | Match: true, 666 | }, 667 | { 668 | Pattern: `path/to/*.txt`, 669 | Subject: `outside/of/path/to/file.txt`, 670 | Opts: []opt{Basename}, 671 | Match: false, 672 | }, 673 | { 674 | Pattern: `path/to/*.txt`, 675 | Subject: `path/to/some/intermediaries/to/file.txt`, 676 | Opts: []opt{Basename}, 677 | Match: false, 678 | }, 679 | { 680 | Pattern: `path/`, 681 | Subject: `path/to/some/intermediaries/to/file.txt`, 682 | Match: false, 683 | }, 684 | { 685 | // GitAttribute-style matching directory. 686 | // false becalse gitattribute never matches directories. 687 | Pattern: `anotherfile.txt/`, 688 | Subject: `anotherfile.txt`, 689 | Opts: []opt{GitAttributes}, 690 | MatchOpts: MatchOpts{IsDirectory: true}, 691 | Match: false, 692 | }, 693 | { 694 | // gitAttribute-style matching normal file. 695 | // false as gitattribute matches ending in '/' indicate 696 | // trying to match directory but gitattribute never matches directory 697 | Pattern: `anotherfile1.txt/`, 698 | Subject: `anotherfile1.txt`, 699 | Opts: []opt{GitAttributes}, 700 | Match: false, 701 | }, 702 | { 703 | // gitignore-style matching directory. 704 | Pattern: `anotherfile2.txt/`, 705 | Subject: `anotherfile2.txt`, 706 | MatchOpts: MatchOpts{IsDirectory: true}, 707 | Match: true, 708 | }, 709 | { 710 | Pattern: `anotherfile3.txt/`, 711 | Subject: `anotherfile3.txt`, 712 | Match: false, 713 | }, 714 | { 715 | Pattern: `anotherfile4.txt`, 716 | Subject: `anotherfile4.txt/`, 717 | Opts: []opt{GitAttributes}, 718 | Match: false, 719 | }, 720 | { 721 | Pattern: `**/pdfkit.frameworks/pdfkit/**`, 722 | Subject: `MyFolder/libs/pdfkit.frameworks/pdfkit`, 723 | Match: false, 724 | }, 725 | { 726 | Pattern: `foo/`, 727 | Subject: `bar/baz/foo`, 728 | MatchOpts: MatchOpts{IsDirectory: true}, 729 | Match: true, 730 | }, 731 | { 732 | Pattern: `foo/`, 733 | Subject: `foo`, 734 | MatchOpts: MatchOpts{IsDirectory: true}, 735 | Match: true, 736 | }, 737 | { 738 | Pattern: `foo/`, 739 | Subject: `foo/`, 740 | Match: true, 741 | }, 742 | { 743 | Pattern: `/foo/`, 744 | Subject: `foo/`, 745 | Match: true, 746 | }, 747 | { 748 | Pattern: `big/b`, 749 | Subject: `big/b/b1`, 750 | Opts: []opt{Contents}, 751 | Match: true, 752 | }, 753 | { 754 | Pattern: `big`, 755 | Subject: `big/b/b1`, 756 | Opts: []opt{Contents}, 757 | Match: true, 758 | }, 759 | { 760 | Pattern: `b`, 761 | Subject: `big/b/b1`, 762 | Opts: []opt{Contents}, 763 | Match: true, 764 | }, 765 | { 766 | Pattern: `/foo/`, 767 | Subject: `foo/`, 768 | Opts: []opt{Contents}, 769 | Match: true, 770 | }, 771 | { 772 | Pattern: `/foo/`, 773 | Subject: `foo/`, 774 | Opts: []opt{Basename, Contents}, 775 | Match: true, 776 | }, 777 | { 778 | Pattern: `/foo`, 779 | Subject: `foo`, 780 | Match: true, 781 | }, 782 | { 783 | Pattern: `/foo/filename.txt`, 784 | Subject: `foo/filename.txt`, 785 | Match: true, 786 | }, 787 | { 788 | Pattern: `/foo/filename.txt`, 789 | Subject: `bar/foo/filename.txt`, 790 | Match: false, 791 | }, 792 | { 793 | Pattern: `/foo/*.txt`, 794 | Subject: `foo/filename.txt`, 795 | Match: true, 796 | }, 797 | { 798 | Pattern: `/*.txt`, 799 | Subject: `foo/filename.txt`, 800 | Match: false, 801 | }, 802 | { 803 | Pattern: `/foo/*.txt`, 804 | Subject: `bar/foo/filename.txt`, 805 | Match: false, 806 | }, 807 | { 808 | Pattern: `/foo/`, 809 | Subject: `foo`, 810 | MatchOpts: MatchOpts{IsDirectory: true}, 811 | Match: true, 812 | }, 813 | { 814 | Pattern: `/foo/`, 815 | Subject: `foo/filename.txt`, 816 | Opts: []opt{Contents}, 817 | Match: true, 818 | }, 819 | { 820 | Pattern: `/foo/**`, 821 | Subject: `foo/filename.txt`, 822 | Match: true, 823 | }, 824 | { 825 | Pattern: `path/`, 826 | Subject: `path/to/some/intermediaries/to/file.txt`, 827 | Opts: []opt{Contents}, 828 | Match: true, 829 | }, 830 | { 831 | Pattern: `to/`, 832 | Subject: `path/to/some/intermediaries/to/file.txt`, 833 | Opts: []opt{Contents}, 834 | Match: true, 835 | }, 836 | { 837 | Pattern: `nonexistent/`, 838 | Subject: `path/to/some/intermediaries/to/file.txt`, 839 | Opts: []opt{Contents}, 840 | Match: false, 841 | }, 842 | } 843 | 844 | func TestWildmatch(t *testing.T) { 845 | for _, c := range Cases { 846 | c.Assert(t) 847 | } 848 | } 849 | 850 | type SlashCase struct { 851 | Given string 852 | Expect string 853 | } 854 | 855 | func (c *SlashCase) Assert(t *testing.T) { 856 | got := slashEscape(c.Given) 857 | 858 | if c.Expect != got { 859 | t.Errorf("wildmatch: expected slashEscape(\"%s\") -> %s, got: %s", 860 | c.Given, 861 | c.Expect, 862 | got, 863 | ) 864 | } 865 | } 866 | 867 | func TestSlashEscape(t *testing.T) { 868 | for _, c := range []*SlashCase{ 869 | {Given: ``, Expect: ``}, 870 | {Given: `foo/bar`, Expect: `foo/bar`}, 871 | {Given: `foo\bar`, Expect: `foo/bar`}, 872 | {Given: `foo\*bar`, Expect: `foo\*bar`}, 873 | {Given: `foo\?bar`, Expect: `foo\?bar`}, 874 | {Given: `foo\[bar`, Expect: `foo\[bar`}, 875 | {Given: `foo\]bar`, Expect: `foo\]bar`}, 876 | {Given: `foo\#bar`, Expect: `foo\#bar`}, 877 | } { 878 | c.Assert(t) 879 | } 880 | } 881 | 882 | func TestCaseFold(t *testing.T) { 883 | m := NewWildmatch("*.bin", SystemCase) 884 | if runtime.GOOS == "windows" || runtime.GOOS == "darwin" { 885 | if !m.Match("UPCASE.BIN") { 886 | t.Errorf("wildmatch: expected system case to be folding") 887 | } 888 | } else if m.Match("UPCASE.BIN") { 889 | t.Errorf("wildmatch: expected system case to be non-folding") 890 | } 891 | } 892 | -------------------------------------------------------------------------------- /wildmatch.go: -------------------------------------------------------------------------------- 1 | package wildmatch 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "strings" 7 | "unicode" 8 | "unicode/utf8" 9 | ) 10 | 11 | // opt is an option type for configuring a new Wildmatch instance. 12 | type opt func(w *Wildmatch) 13 | 14 | var ( 15 | // Basename allows the receiving Wildmatch to match paths where the 16 | // pattern matches only the basename of the path when the pattern does 17 | // not contain directory separators. 18 | // 19 | // If the pattern contains directory separators, or if this option is 20 | // not given, the entire path will be matched. 21 | Basename opt = func(w *Wildmatch) { 22 | w.basename = true 23 | } 24 | 25 | // CaseFold allows the receiving Wildmatch to match paths with 26 | // different case structuring as in the pattern. 27 | CaseFold opt = func(w *Wildmatch) { 28 | w.caseFold = true 29 | } 30 | 31 | // GitAttributes augments the functionality of the matching algorithm 32 | // to match behavior of git when working with .gitattributes files. 33 | GitAttributes opt = func(w *Wildmatch) { 34 | w.gitattributes = true 35 | } 36 | 37 | // Contents indicates that if a pattern matches a directory that is a 38 | // parent of a path, then that path is included. This is the behavior 39 | // of patterns for .gitignore. 40 | Contents opt = func(w *Wildmatch) { 41 | w.contents = true 42 | } 43 | 44 | // SystemCase either folds or does not fold filepaths and patterns, 45 | // according to whether or not the operating system on which Wildmatch 46 | // runs supports case sensitive files or not. 47 | SystemCase opt 48 | ) 49 | 50 | const ( 51 | sep byte = '/' 52 | ) 53 | 54 | // Wildmatch implements pattern matching against filepaths using the format 55 | // described in the package documentation. 56 | // 57 | // For more, see documentation for package 'wildmatch'. 58 | type Wildmatch struct { 59 | // ts are the token set used to match the given pattern. 60 | ts []token 61 | // p is the raw pattern used to derive the token set. 62 | p string 63 | 64 | // basename indicates that this Wildmatch instance matches basenames 65 | // when possible (i.e., when there are no directory separators in the 66 | // pattern). 67 | basename bool 68 | // caseFold allows the instance Wildmatch to match patterns with the 69 | // same character but different case structures. 70 | caseFold bool 71 | 72 | // gitattributes flag indicates that logic specific to the .gitattributes file 73 | // should be used. The two main differences are that negative expressions are 74 | // not allowed and directories are not matched. 75 | gitattributes bool 76 | 77 | // contents indicates that if a pattern matches a directory that is a 78 | // parent of a path, then that path is included. This is the behavior 79 | // of patterns for .gitignore. 80 | contents bool 81 | } 82 | 83 | type MatchOpts struct { 84 | IsDirectory bool 85 | } 86 | 87 | // NewWildmatch constructs a new Wildmatch instance which matches filepaths 88 | // according to the given pattern and the rules for matching above. 89 | // 90 | // If the pattern is malformed, for instance, it has an unclosed character 91 | // group, escape sequence, or character class, NewWildmatch will panic(). 92 | func NewWildmatch(p string, opts ...opt) *Wildmatch { 93 | w := &Wildmatch{p: slashEscape(p)} 94 | 95 | for _, opt := range opts { 96 | opt(w) 97 | } 98 | 99 | if w.caseFold { 100 | // Before parsing the pattern, convert it to lower-case. 101 | w.p = strings.ToLower(w.p) 102 | } 103 | 104 | parts := strings.Split(w.p, string(sep)) 105 | if len(parts) > 1 { 106 | w.basename = false 107 | } 108 | w.ts = w.parseTokens(parts) 109 | 110 | return w 111 | } 112 | 113 | const ( 114 | // escapes is a constant string containing all escapable characters 115 | escapes = "\\[]*?#" 116 | ) 117 | 118 | // slashEscape converts paths "p" to POSIX-compliant path, independent of which 119 | // escape character the host machine uses. 120 | // 121 | // slashEscape resepcts escapable sequences, and thus will not transform 122 | // `foo\*bar` to `foo/*bar` on non-Windows operating systems. 123 | func slashEscape(p string) string { 124 | var pp string 125 | 126 | for i := 0; i < len(p); { 127 | c := p[i] 128 | 129 | switch c { 130 | case '\\': 131 | if i+1 < len(p) && escapable(p[i+1]) { 132 | pp += `\` 133 | pp += string(p[i+1]) 134 | 135 | i += 2 136 | } else { 137 | pp += `/` 138 | i += 1 139 | } 140 | default: 141 | pp += string([]byte{c}) 142 | i += 1 143 | } 144 | } 145 | 146 | return pp 147 | } 148 | 149 | // escapable returns whether the given "c" is escapable. 150 | func escapable(c byte) bool { 151 | return strings.IndexByte(escapes, c) > -1 152 | } 153 | 154 | // parseTokens parses a separated list of patterns into a sequence of 155 | // representative Tokens that will compose the pattern when applied in sequence. 156 | func (w *Wildmatch) parseTokens(dirs []string) []token { 157 | if len(dirs) == 0 { 158 | return make([]token, 0) 159 | } 160 | 161 | var finalComponents []token 162 | 163 | if !w.gitattributes { 164 | trailingIsEmpty := len(dirs) > 1 && dirs[len(dirs)-1] == "" 165 | numNonEmptyDirs := len(dirs) 166 | if trailingIsEmpty { 167 | numNonEmptyDirs -= 1 168 | } 169 | if w.contents { 170 | finalComponents = []token{&trailingComponents{}} 171 | if trailingIsEmpty { 172 | // Strip off the trailing empty string. 173 | dirs = dirs[:numNonEmptyDirs] 174 | } 175 | } 176 | // If we have one component, ignoring trailing empty 177 | // components and we know that a directory is permissible… 178 | if numNonEmptyDirs == 1 && (trailingIsEmpty || w.contents) { 179 | // We don't have a slash in the middle, so this can go 180 | // anywhere in the hierarchy. If there had been a slash 181 | // here, it would have been anchored at the root. 182 | rest := w.parseTokensSimple(dirs) 183 | tokens := append([]token{&unanchoredDirectory{ 184 | Until: rest[0], 185 | }}) 186 | // If we're not matching all contents, then do include 187 | // the empty component so we don't match 188 | // non-directories. 189 | if finalComponents == nil && len(rest) > 1 { 190 | finalComponents = rest[1:] 191 | } 192 | return append(tokens, finalComponents...) 193 | } 194 | } 195 | components := w.parseTokensSimple(dirs) 196 | return append(components, finalComponents...) 197 | } 198 | 199 | func (w *Wildmatch) parseTokensSimple(dirs []string) []token { 200 | if len(dirs) == 0 { 201 | return make([]token, 0) 202 | } 203 | 204 | switch dirs[0] { 205 | case "": 206 | if len(dirs) == 1 { 207 | return []token{&component{fns: []componentFn{substring("")}}} 208 | } 209 | return w.parseTokensSimple(dirs[1:]) 210 | case "**": 211 | rest := w.parseTokensSimple(dirs[1:]) 212 | if len(rest) == 0 { 213 | // If there are no remaining tokens, return a lone 214 | // doubleStar token. 215 | return []token{&doubleStar{ 216 | Until: nil, 217 | }} 218 | } 219 | 220 | // Otherwise, return a doubleStar token that will match greedily 221 | // until the first component in the remainder of the pattern, 222 | // and then the remainder of the pattern. 223 | return append([]token{&doubleStar{ 224 | Until: rest[0], 225 | }}, rest[1:]...) 226 | default: 227 | // Ordinarily, simply return the appropriate component, and 228 | // continue on. 229 | return append([]token{&component{ 230 | fns: parseComponent(dirs[0]), 231 | }}, w.parseTokensSimple(dirs[1:])...) 232 | } 233 | } 234 | 235 | // nonEmpty returns the non-empty strings in "all". 236 | func nonEmpty(all []string) (ne []string) { 237 | for _, x := range all { 238 | if len(x) > 0 { 239 | ne = append(ne, x) 240 | } 241 | } 242 | return ne 243 | } 244 | 245 | // Match returns true if and only if the pattern matched by the receiving 246 | // Wildmatch matches the entire filepath "t". 247 | func (w *Wildmatch) Match(t string) bool { 248 | dirs, ok := w.consume(t, MatchOpts{}) 249 | if !ok { 250 | return false 251 | } 252 | return len(dirs) == 0 253 | } 254 | 255 | func (w *Wildmatch) MatchWithOpts(t string, opt MatchOpts) bool { 256 | dirs, ok := w.consume(t, opt) 257 | if !ok { 258 | return false 259 | } 260 | return len(dirs) == 0 261 | } 262 | 263 | // consume performs the inner match of "t" against the receiver's pattern, and 264 | // returns a slice of remaining directory paths, and whether or not there was a 265 | // disagreement while matching. 266 | func (w *Wildmatch) consume(t string, opt MatchOpts) ([]string, bool) { 267 | if w.basename { 268 | // If the receiving Wildmatch has basename set, the pattern 269 | // matches only the basename of the given "t". 270 | t = filepath.Base(t) 271 | } 272 | 273 | if w.caseFold { 274 | // If the receiving Wildmatch is case insensitive, the pattern 275 | // "w.p" will be lower-case. 276 | // 277 | // To preserve insensitivity, lower the given path "t", as well. 278 | t = strings.ToLower(t) 279 | } 280 | 281 | var isDir bool 282 | if opt.IsDirectory { 283 | isDir = true 284 | // Standardize the formation of subject string so directories always 285 | // end with '/' 286 | if !strings.HasSuffix(t, "/") { 287 | t = t + "/" 288 | } 289 | } else { 290 | isDir = strings.HasSuffix(t, string(sep)) 291 | } 292 | 293 | dirs := strings.Split(t, string(sep)) 294 | 295 | // Git-attribute style matching can never match a directory 296 | if w.gitattributes && isDir { 297 | return dirs, false 298 | } 299 | 300 | // Match each directory token-wise, allowing each token to consume more 301 | // than one directory in the case of the '**' pattern. 302 | for _, tok := range w.ts { 303 | var ok bool 304 | 305 | dirs, ok = tok.Consume(dirs, isDir) 306 | if !ok { 307 | // If a pattern could not match the remainder of the 308 | // filepath, return so immediately, along with the paths 309 | // that we did successfully manage to match. 310 | return dirs, false 311 | } 312 | } 313 | // If this is a directory that we've otherwise matched and all we have 314 | // left is an empty path component, then this is a match. 315 | if isDir && len(dirs) == 1 && len(dirs[0]) == 0 { 316 | return nil, true 317 | } 318 | return dirs, true 319 | } 320 | 321 | // String implements fmt.Stringer and returns the receiver's pattern in the format 322 | // specified above. 323 | func (w *Wildmatch) String() string { 324 | return w.p 325 | } 326 | 327 | // token matches zero, one, or more directory components. 328 | type token interface { 329 | // Consume matches zero, one, or more directory components. 330 | // 331 | // Consider the following examples: 332 | // 333 | // (["foo", "bar", "baz"]) -> (["oo", "bar", baz"], true) 334 | // (["foo", "bar", "baz"]) -> (["bar", baz"], true) 335 | // (["foo", "bar", "baz"]) -> (["baz"], true) 336 | // (["foo", "bar", "baz"]) -> ([], true) 337 | // (["foo", "bar", "baz"]) -> (["foo", "bar", "baz"], false) 338 | // (["foo", "bar", "baz"]) -> (["oo", "bar", "baz"], false) 339 | // (["foo", "bar", "baz"]) -> (["bar", "baz"], false) 340 | // 341 | // The Consume operation can reduce the size of a single entry in the 342 | // slice (see: example (1) above), or remove it entirely, (see: examples 343 | // (2), (3), and (4) above). It can also refuse to match forward after 344 | // making any amount of progress (see: examples (5), (6), and (7) 345 | // above). 346 | // 347 | // Consume accepts a slice representing a path-delimited filepath on 348 | // disk, and a bool indicating whether the given path is a directory 349 | // (i.e., "foo/bar/" is, but "foo/bar" isn't). 350 | Consume(path []string, isDir bool) ([]string, bool) 351 | 352 | // String returns the string representation this component of the 353 | // pattern; i.e., a string that, when parsed, would form the same token. 354 | String() string 355 | } 356 | 357 | // doubleStar is an implementation of the Token interface which greedily matches 358 | // one-or-more path components until a successor token. 359 | type doubleStar struct { 360 | Until token 361 | EmptyPath bool 362 | } 363 | 364 | // Consume implements token.Consume as above. 365 | func (d *doubleStar) Consume(path []string, isDir bool) ([]string, bool) { 366 | if len(path) == 0 { 367 | return path, d.EmptyPath 368 | } 369 | 370 | // If there are no remaining tokens to match, allow matching the entire 371 | // path. 372 | if d.Until == nil { 373 | return nil, true 374 | } 375 | 376 | for i := len(path); i > 0; i-- { 377 | rest, ok := d.Until.Consume(path[i:], false) 378 | if ok { 379 | return rest, ok 380 | } 381 | } 382 | 383 | // If no match has been found, we assume that the '**' token matches the 384 | // empty string, and defer pattern matching to the rest of the path. 385 | return d.Until.Consume(path, isDir) 386 | } 387 | 388 | // String implements Component.String. 389 | func (d *doubleStar) String() string { 390 | if d.Until == nil { 391 | return "**" 392 | } 393 | return fmt.Sprintf("**/%s", d.Until.String()) 394 | } 395 | 396 | // unanchoredDirectory is an implementation of the Token interface which 397 | // greedily matches one-or-more path components until a successor token. 398 | type unanchoredDirectory struct { 399 | Until token 400 | } 401 | 402 | // Consume implements token.Consume as above. 403 | func (d *unanchoredDirectory) Consume(path []string, isDir bool) ([]string, bool) { 404 | // This matches the same way as a doubleStar, so just use that 405 | // implementation. 406 | s := &doubleStar{Until: d.Until} 407 | return s.Consume(path, isDir) 408 | } 409 | 410 | // String implements Component.String. 411 | func (d *unanchoredDirectory) String() string { 412 | return fmt.Sprintf("%s/", d.Until.String()) 413 | } 414 | 415 | // trailingComponents is an implementation of the Token interface which 416 | // greedily matches any trailing components, even if empty. 417 | type trailingComponents struct { 418 | } 419 | 420 | // Consume implements token.Consume as above. 421 | func (d *trailingComponents) Consume(path []string, isDir bool) ([]string, bool) { 422 | // This matches the same way as a doubleStar, so just use that 423 | // implementation. 424 | s := &doubleStar{Until: nil, EmptyPath: true} 425 | return s.Consume(path, isDir) 426 | } 427 | 428 | // String implements Component.String. 429 | func (d *trailingComponents) String() string { 430 | return "" 431 | } 432 | 433 | // componentFn is a functional type designed to match a single component of a 434 | // directory structure by reducing the unmatched part, and returning whether or 435 | // not a match was successful. 436 | type componentFn interface { 437 | Apply(s string) (rest string, ok bool) 438 | String() string 439 | } 440 | 441 | // cfn is a wrapper type for the Component interface that includes an applicable 442 | // function, and a string that represents it. 443 | type cfn struct { 444 | fn func(s string) (rest string, ok bool) 445 | str string 446 | } 447 | 448 | // Apply executes the component function as described above. 449 | func (c *cfn) Apply(s string) (rest string, ok bool) { 450 | return c.fn(s) 451 | } 452 | 453 | // String returns the string representation of this component. 454 | func (c *cfn) String() string { 455 | return c.str 456 | } 457 | 458 | // component is an implementation of the Token interface, which matches a single 459 | // component at the front of a tree structure by successively applying 460 | // implementations of the componentFn type. 461 | type component struct { 462 | // fns is the list of componentFn implementations to be successively 463 | // applied. 464 | fns []componentFn 465 | } 466 | 467 | // parseComponent parses a single component from its string representation, 468 | // including wildcards, character classes, string literals, and escape 469 | // sequences. 470 | func parseComponent(s string) []componentFn { 471 | if len(s) == 0 { 472 | // The empty string represents the absence of componentFn's. 473 | return make([]componentFn, 0) 474 | } 475 | 476 | switch s[0] { 477 | case '\\': 478 | // If the first character is a '\', the following character is a 479 | // part of an escape sequence, or it is unclosed. 480 | if len(s) < 2 { 481 | panic("wildmatch: unclosed escape sequence") 482 | } 483 | 484 | literal := substring(string(s[1])) 485 | 486 | var rest []componentFn 487 | if len(s) > 2 { 488 | // If there is more to follow, i.e., "\*foo", then parse 489 | // the remainder. 490 | rest = parseComponent(s[2:]) 491 | } 492 | return cons(literal, rest) 493 | case '[': 494 | var ( 495 | // i will denote the currently-inspected index of the character 496 | // group. 497 | i int = 1 498 | // include will denote the list of included runeFn's 499 | // composing the character group. 500 | include []runeFn 501 | // exclude will denote the list of excluded runeFn's 502 | // composing the character group. 503 | exclude []runeFn 504 | // run is the current run of strings (to either compose 505 | // a range, or select "any") 506 | run string 507 | // neg is whether we have seen a negation marker. 508 | neg bool 509 | ) 510 | 511 | for i < len(s) { 512 | if s[i] == '^' || s[i] == '!' { 513 | // Once a '^' or '!' character has been seen, 514 | // anything following it will be negated. 515 | neg = !neg 516 | i = i + 1 517 | } else if strings.HasPrefix(s[i:], "[:") { 518 | close := strings.Index(s[i:], ":]") 519 | if close < 0 { 520 | panic("unclosed character class") 521 | } 522 | 523 | if close == 1 { 524 | // The case "[:]" has a prefix "[:", and 525 | // a suffix ":]", but the atom refers to 526 | // a character group including the 527 | // literal ":", not an ill-formed 528 | // character class. 529 | // 530 | // Parse it as such; increment one 531 | // _less_ than expected, to terminate 532 | // the group. 533 | run += "[:]" 534 | i = i + 2 535 | continue 536 | } 537 | 538 | // Find the associated character class. 539 | name := strings.TrimPrefix( 540 | strings.ToLower(s[i:i+close]), "[:") 541 | fn, ok := classes[name] 542 | if !ok { 543 | panic(fmt.Sprintf("wildmatch: unknown class: %q", name)) 544 | } 545 | 546 | include, exclude = appendMaybe(!neg, include, exclude, fn) 547 | // Advance to the first index beyond the closing 548 | // ":]". 549 | i = i + close + 2 550 | } else if s[i] == '-' { 551 | if i < len(s) { 552 | // If there is a range marker at the 553 | // non-final position, construct a range 554 | // and an optional "any" match: 555 | var start, end byte 556 | if len(run) > 0 { 557 | // If there is at least one 558 | // character in the run, use it 559 | // as the starting point of the 560 | // range, and remove it from the 561 | // run. 562 | start = run[len(run)-1] 563 | run = run[:len(run)-1] 564 | } 565 | end = s[i+1] 566 | 567 | if len(run) > 0 { 568 | // If there is still information 569 | // in the run, construct a rune 570 | // function matching any 571 | // characters in the run. 572 | cfn := anyRune(run) 573 | 574 | include, exclude = appendMaybe(!neg, include, exclude, cfn) 575 | run = "" 576 | } 577 | 578 | // Finally, construct the rune range and 579 | // add it appropriately. 580 | bfn := between(rune(start), rune(end)) 581 | include, exclude = appendMaybe(!neg, 582 | include, exclude, bfn) 583 | 584 | i = i + 2 585 | } else { 586 | // If this is in the final position, add 587 | // it to the run and exit the loop. 588 | run = run + "-" 589 | i = i + 2 590 | } 591 | } else if s[i] == '\\' { 592 | // If we encounter an escape sequence in the 593 | // group, check its bounds and add it to the 594 | // run. 595 | if i+1 >= len(s) { 596 | panic("wildmatch: unclosed escape") 597 | } 598 | run = run + string(s[i+1]) 599 | i = i + 2 600 | } else if s[i] == ']' { 601 | // If we encounter a closing ']', then stop 602 | // parsing the group. 603 | break 604 | } else { 605 | // Otherwise, add the character to the run and 606 | // advance forward. 607 | run = run + string(s[i]) 608 | i = i + 1 609 | } 610 | } 611 | 612 | if len(run) > 0 { 613 | fn := anyRune(run) 614 | include, exclude = appendMaybe(!neg, include, exclude, fn) 615 | } 616 | 617 | var rest string 618 | if i+1 < len(s) { 619 | rest = s[i+1:] 620 | } 621 | // Assemble a character class, and cons it in front of the 622 | // remainder of the component pattern. 623 | return cons(charClass(include, exclude), parseComponent(rest)) 624 | case '?': 625 | return []componentFn{wildcard(1, parseComponent(s[1:]))} 626 | case '*': 627 | return []componentFn{wildcard(-1, parseComponent(s[1:]))} 628 | default: 629 | // Advance forward until we encounter a special character 630 | // (either '*', '[', '*', or '?') and parse across the divider. 631 | var i int 632 | for ; i < len(s); i++ { 633 | if s[i] == '[' || 634 | s[i] == '*' || 635 | s[i] == '?' || 636 | s[i] == '\\' { 637 | break 638 | } 639 | } 640 | 641 | return cons(substring(s[:i]), parseComponent(s[i:])) 642 | } 643 | } 644 | 645 | // appendMaybe appends the value "x" to either "a" or "b" depending on "yes". 646 | func appendMaybe(yes bool, a, b []runeFn, x runeFn) (ax, bx []runeFn) { 647 | if yes { 648 | return append(a, x), b 649 | } 650 | return a, append(b, x) 651 | } 652 | 653 | // cons prepends the "head" componentFn to the "tail" of componentFn's. 654 | func cons(head componentFn, tail []componentFn) []componentFn { 655 | return append([]componentFn{head}, tail...) 656 | } 657 | 658 | // Consume implements token.Consume as above by applying the above set of 659 | // componentFn's in succession to the first element of the path tree. 660 | func (c *component) Consume(path []string, isDir bool) ([]string, bool) { 661 | if len(path) == 0 { 662 | return path, false 663 | } 664 | 665 | head := path[0] 666 | for _, fn := range c.fns { 667 | var ok bool 668 | 669 | // Apply successively the component functions to make progress 670 | // matching the head. 671 | if head, ok = fn.Apply(head); !ok { 672 | // If any of the functions failed to match, there are 673 | // no other paths to match success, so return a failure 674 | // immediately. 675 | return path, false 676 | } 677 | } 678 | 679 | if len(head) > 0 { 680 | return append([]string{head}, path[1:]...), false 681 | } 682 | 683 | if len(path) == 1 { 684 | // Components can not match directories. If we were matching the 685 | // last path in a tree structure, we can only match if it 686 | // _wasn't_ a directory. 687 | return path[1:], true 688 | } 689 | 690 | return path[1:], true 691 | } 692 | 693 | // String implements token.String. 694 | func (c *component) String() string { 695 | var str string 696 | 697 | for _, fn := range c.fns { 698 | str += fn.String() 699 | } 700 | return str 701 | } 702 | 703 | // substring returns a componentFn that matches a prefix of "sub". 704 | func substring(sub string) componentFn { 705 | return &cfn{ 706 | fn: func(s string) (rest string, ok bool) { 707 | if !strings.HasPrefix(s, sub) { 708 | return s, false 709 | } 710 | return s[len(sub):], true 711 | }, 712 | str: sub, 713 | } 714 | } 715 | 716 | // wildcard returns a componentFn that greedily matches until a set of other 717 | // component functions no longer matches. 718 | func wildcard(n int, fns []componentFn) componentFn { 719 | until := func(s string) (string, bool) { 720 | head := s 721 | for _, fn := range fns { 722 | var ok bool 723 | 724 | if head, ok = fn.Apply(head); !ok { 725 | return s, false 726 | } 727 | } 728 | 729 | if len(head) > 0 { 730 | return s, false 731 | } 732 | return "", true 733 | } 734 | 735 | var str string = "*" 736 | for _, fn := range fns { 737 | str += fn.String() 738 | } 739 | 740 | return &cfn{ 741 | fn: func(s string) (rest string, ok bool) { 742 | if n > -1 { 743 | if n > len(s) { 744 | return "", false 745 | } 746 | return until(s[n:]) 747 | } 748 | 749 | for i := len(s); i > 0; i-- { 750 | rest, ok = until(s[i:]) 751 | if ok { 752 | return rest, ok 753 | } 754 | } 755 | return until(s) 756 | }, 757 | str: str, 758 | } 759 | } 760 | 761 | // charClass returns a component function emulating a character class, i.e., 762 | // that a single character can match if and only if it is included in one of the 763 | // includes (or true if there were no includes) and none of the excludes. 764 | func charClass(include, exclude []runeFn) componentFn { 765 | return &cfn{ 766 | fn: func(s string) (rest string, ok bool) { 767 | if len(s) == 0 { 768 | return s, false 769 | } 770 | 771 | // Find "r", the first rune in the string "s". 772 | r, l := utf8.DecodeRuneInString(s) 773 | 774 | var match bool 775 | for _, ifn := range include { 776 | // Attempt to find a match on "r" with "ifn". 777 | if ifn(r) { 778 | match = true 779 | break 780 | } 781 | } 782 | 783 | // If there wasn't a match and there were some including 784 | // patterns, return a failure to match. Otherwise, continue on 785 | // to make sure that no patterns exclude the rune "r". 786 | if !match && len(include) != 0 { 787 | return s, false 788 | } 789 | 790 | for _, efn := range exclude { 791 | // Attempt to find a negative match on "r" with "efn". 792 | if efn(r) { 793 | return s, false 794 | } 795 | } 796 | 797 | // If we progressed this far, return the remainder of the 798 | // string. 799 | return s[l:], true 800 | }, 801 | str: "", 802 | } 803 | } 804 | 805 | // runeFn matches a single rune. 806 | type runeFn func(rune) bool 807 | 808 | var ( 809 | // classes is a mapping from character class name to a rune function 810 | // that implements its behavior. 811 | classes = map[string]runeFn{ 812 | "alnum": func(r rune) bool { 813 | return unicode.In(r, unicode.Number, unicode.Letter) 814 | }, 815 | "alpha": unicode.IsLetter, 816 | "blank": func(r rune) bool { 817 | return r == ' ' || r == '\t' 818 | }, 819 | "cntrl": unicode.IsControl, 820 | "digit": unicode.IsDigit, 821 | "graph": unicode.IsGraphic, 822 | "lower": unicode.IsLower, 823 | "print": unicode.IsPrint, 824 | "punct": unicode.IsPunct, 825 | "space": unicode.IsSpace, 826 | "upper": unicode.IsUpper, 827 | "xdigit": func(r rune) bool { 828 | return unicode.IsDigit(r) || 829 | ('a' <= r && r <= 'f') || 830 | ('A' <= r && r <= 'F') 831 | }, 832 | } 833 | ) 834 | 835 | // anyRune returns true so long as the rune "r" appears in the string "s". 836 | func anyRune(s string) runeFn { 837 | return func(r rune) bool { 838 | return strings.IndexRune(s, r) > -1 839 | } 840 | } 841 | 842 | // between returns true so long as the rune "r" appears between "a" and "b". 843 | func between(a, b rune) runeFn { 844 | if b < a { 845 | a, b = b, a 846 | } 847 | 848 | return func(r rune) bool { 849 | return a <= r && r <= b 850 | } 851 | } 852 | --------------------------------------------------------------------------------